wine-staging/patches/vkd3d-latest/0001-Updated-vkd3d-to-5b2d62e59a6365e32aac3fa37fe16ab3582.patch
2025-01-23 08:14:29 +11:00

21107 lines
853 KiB
Diff

From 419cefedf7c0b02c10a86894d4348ce300b34518 Mon Sep 17 00:00:00 2001
From: Alistair Leslie-Hughes <leslie_alistair@hotmail.com>
Date: Fri, 29 Nov 2024 07:14:57 +1100
Subject: [PATCH] Updated vkd3d to 5b2d62e59a6365e32aac3fa37fe16ab3582deae4.
---
libs/vkd3d/include/private/vkd3d_common.h | 2 +-
libs/vkd3d/include/vkd3d.h | 29 +-
libs/vkd3d/include/vkd3d_shader.h | 219 +
libs/vkd3d/libs/vkd3d-common/blob.c | 1 +
libs/vkd3d/libs/vkd3d-shader/d3d_asm.c | 339 +-
libs/vkd3d/libs/vkd3d-shader/d3dbc.c | 1118 ++---
libs/vkd3d/libs/vkd3d-shader/dxbc.c | 35 +
libs/vkd3d/libs/vkd3d-shader/dxil.c | 87 +-
libs/vkd3d/libs/vkd3d-shader/fx.c | 175 +-
libs/vkd3d/libs/vkd3d-shader/glsl.c | 9 +-
libs/vkd3d/libs/vkd3d-shader/hlsl.c | 478 +-
libs/vkd3d/libs/vkd3d-shader/hlsl.h | 187 +-
libs/vkd3d/libs/vkd3d-shader/hlsl.l | 3 +
libs/vkd3d/libs/vkd3d-shader/hlsl.y | 879 +++-
libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 4112 ++++++++++++++---
.../libs/vkd3d-shader/hlsl_constant_ops.c | 363 +-
libs/vkd3d/libs/vkd3d-shader/ir.c | 2094 +++++++--
libs/vkd3d/libs/vkd3d-shader/msl.c | 464 +-
libs/vkd3d/libs/vkd3d-shader/preproc.l | 1 +
libs/vkd3d/libs/vkd3d-shader/spirv.c | 761 +--
libs/vkd3d/libs/vkd3d-shader/tpf.c | 2251 +--------
.../libs/vkd3d-shader/vkd3d_shader_main.c | 34 +-
.../libs/vkd3d-shader/vkd3d_shader_private.h | 86 +-
libs/vkd3d/libs/vkd3d/command.c | 330 +-
libs/vkd3d/libs/vkd3d/device.c | 93 +-
libs/vkd3d/libs/vkd3d/resource.c | 9 +-
libs/vkd3d/libs/vkd3d/state.c | 242 +-
libs/vkd3d/libs/vkd3d/utils.c | 2 +-
libs/vkd3d/libs/vkd3d/vkd3d_private.h | 70 +-
29 files changed, 9441 insertions(+), 5032 deletions(-)
diff --git a/libs/vkd3d/include/private/vkd3d_common.h b/libs/vkd3d/include/private/vkd3d_common.h
index ec1dd70c9b2..fd62730f948 100644
--- a/libs/vkd3d/include/private/vkd3d_common.h
+++ b/libs/vkd3d/include/private/vkd3d_common.h
@@ -275,7 +275,7 @@ static inline unsigned int vkd3d_popcount(unsigned int v)
{
#ifdef _MSC_VER
return __popcnt(v);
-#elif defined(__MINGW32__)
+#elif defined(HAVE_BUILTIN_POPCOUNT)
return __builtin_popcount(v);
#else
v -= (v >> 1) & 0x55555555;
diff --git a/libs/vkd3d/include/vkd3d.h b/libs/vkd3d/include/vkd3d.h
index b18fd14f4c3..2376693421c 100644
--- a/libs/vkd3d/include/vkd3d.h
+++ b/libs/vkd3d/include/vkd3d.h
@@ -411,9 +411,13 @@ VKD3D_API uint32_t vkd3d_get_vk_queue_family_index(ID3D12CommandQueue *queue);
* the Vulkan driver as being submitted before other work submitted
* though the Direct3D 12 API. If this is not desired, it is
* recommended to synchronize work submission using an ID3D12Fence
- * object, by submitting to the queue a signal operation after all the
- * Direct3D 12 work is submitted and waiting for it before calling
- * vkd3d_acquire_vk_queue().
+ * object:
+ * 1. submit work through the Direct3D 12 API;
+ * 2. call vkd3d_queue_signal_on_cpu();
+ * 3. wait for the fence to be signalled;
+ * 4. call vkd3d_acquire_vk_queue(); it is guaranteed that all work submitted
+ * at point 1 has already been submitted to Vulkan (though not necessarily
+ * executed).
*
* \since 1.0
*/
@@ -466,6 +470,21 @@ VKD3D_API HRESULT vkd3d_create_versioned_root_signature_deserializer(const void
*/
VKD3D_API void vkd3d_set_log_callback(PFN_vkd3d_log callback);
+/**
+ * Signal a fence on the CPU once all the currently outstanding queue work is
+ * submitted to Vulkan.
+ *
+ * The fence will be signalled on the CPU (as if ID3D12Fence_Signal() was
+ * called) once all the work submitted through the Direct3D 12 API before
+ * vkd3d_queue_signal_on_cpu() is called has left the internal queue and has
+ * been submitted to the underlying Vulkan queue. Read the documentation for
+ * vkd3d_acquire_vk_queue() for more details.
+ *
+ * \since 1.15
+ */
+VKD3D_API HRESULT vkd3d_queue_signal_on_cpu(ID3D12CommandQueue *queue,
+ ID3D12Fence *fence, uint64_t value);
+
#endif /* VKD3D_NO_PROTOTYPES */
/*
@@ -512,6 +531,10 @@ typedef HRESULT (*PFN_vkd3d_create_versioned_root_signature_deserializer)(const
/** Type of vkd3d_set_log_callback(). \since 1.4 */
typedef void (*PFN_vkd3d_set_log_callback)(PFN_vkd3d_log callback);
+/** Type of vkd3d_queue_signal_on_cpu(). \since 1.15 */
+typedef HRESULT (*PFN_vkd3d_queue_signal_on_cpu)(ID3D12CommandQueue *queue,
+ ID3D12Fence *fence, uint64_t value);
+
#ifdef __cplusplus
}
#endif /* __cplusplus */
diff --git a/libs/vkd3d/include/vkd3d_shader.h b/libs/vkd3d/include/vkd3d_shader.h
index f95caa2f825..af55d63a5c8 100644
--- a/libs/vkd3d/include/vkd3d_shader.h
+++ b/libs/vkd3d/include/vkd3d_shader.h
@@ -112,6 +112,11 @@ enum vkd3d_shader_structure_type
* \since 1.13
*/
VKD3D_SHADER_STRUCTURE_TYPE_PARAMETER_INFO,
+ /**
+ * The structure is a vkd3d_shader_scan_hull_shader_tessellation_info structure.
+ * \since 1.15
+ */
+ VKD3D_SHADER_STRUCTURE_TYPE_SCAN_HULL_SHADER_TESSELLATION_INFO,
VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_STRUCTURE_TYPE),
};
@@ -471,6 +476,109 @@ enum vkd3d_shader_binding_flag
VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_BINDING_FLAG),
};
+/**
+ * The factor used to interpolate the fragment output colour with fog.
+ *
+ * See VKD3D_SHADER_PARAMETER_NAME_FOG_FRAGMENT_MODE for specification of the
+ * interpolation factor as defined here.
+ *
+ * The following variables may be used to determine the interpolation factor:
+ *
+ * c = The fog coordinate value output from the vertex shader. This is an
+ * inter-stage varying with the semantic name "FOG" and semantic index 0.
+ * It may be modified by VKD3D_SHADER_PARAMETER_NAME_FOG_SOURCE.
+ * E = The value of VKD3D_SHADER_PARAMETER_NAME_FOG_END.
+ * k = The value of VKD3D_SHADER_PARAMETER_NAME_FOG_SCALE.
+ *
+ * \since 1.15
+ */
+enum vkd3d_shader_fog_fragment_mode
+{
+ /**
+ * No fog interpolation is applied;
+ * the output colour is passed through unmodified.
+ * Equivalently, the fog interpolation factor is 1.
+ */
+ VKD3D_SHADER_FOG_FRAGMENT_NONE = 0x0,
+ /**
+ * The fog interpolation factor is 2^-(k * c).
+ *
+ * In order to implement traditional exponential fog, as present in
+ * Direct3D and OpenGL, i.e.
+ *
+ * e^-(density * c)
+ *
+ * set
+ *
+ * k = density * logâ‚‚(e)
+ */
+ VKD3D_SHADER_FOG_FRAGMENT_EXP = 0x1,
+ /**
+ * The fog interpolation factor is 2^-((k * c)²).
+ *
+ * In order to implement traditional square-exponential fog, as present in
+ * Direct3D and OpenGL, i.e.
+ *
+ * e^-((density * c)²)
+ *
+ * set
+ *
+ * k = density * √log₂(e)
+ */
+ VKD3D_SHADER_FOG_FRAGMENT_EXP2 = 0x2,
+ /**
+ * The fog interpolation factor is (E - c) * k.
+ *
+ * In order to implement traditional linear fog, as present in Direct3D and
+ * OpenGL, i.e.
+ *
+ * (end - c) / (end - start)
+ *
+ * set
+ *
+ * E = end
+ * k = 1 / (end - start)
+ */
+ VKD3D_SHADER_FOG_FRAGMENT_LINEAR = 0x3,
+};
+
+/**
+ * The source of the fog varying output by a pre-rasterization shader.
+ * The fog varying is defined as the output varying with the semantic name "FOG"
+ * and semantic index 0.
+ *
+ * See VKD3D_SHADER_PARAMETER_NAME_FOG_SOURCE for further documentation of this
+ * parameter.
+ *
+ * \since 1.15
+ */
+enum vkd3d_shader_fog_source
+{
+ /**
+ * The source shader is not modified. That is, the fog varying in the target
+ * shader is the original fog varying if and only if present.
+ */
+ VKD3D_SHADER_FOG_SOURCE_FOG = 0x0,
+ /**
+ * If the source shader has a fog varying, it is not modified.
+ * Otherwise, if the source shader outputs a varying with semantic name
+ * "COLOR" and semantic index 1 whose index includes a W component,
+ * said W component is output as fog varying.
+ * Otherwise, no fog varying is output.
+ */
+ VKD3D_SHADER_FOG_SOURCE_FOG_OR_SPECULAR_W = 0x1,
+ /**
+ * The fog source is the Z component of the position output by the vertex
+ * shader.
+ */
+ VKD3D_SHADER_FOG_SOURCE_Z = 0x2,
+ /**
+ * The fog source is the W component of the position output by the vertex
+ * shader.
+ */
+ VKD3D_SHADER_FOG_SOURCE_W = 0x3,
+};
+
/**
* The manner in which a parameter value is provided to the shader, used in
* struct vkd3d_shader_parameter and struct vkd3d_shader_parameter1.
@@ -734,6 +842,97 @@ enum vkd3d_shader_parameter_name
* \since 1.14
*/
VKD3D_SHADER_PARAMETER_NAME_POINT_SPRITE,
+ /**
+ * Fog mode used in fragment shaders.
+ *
+ * The value specified by this parameter must be a member of
+ * enum vkd3d_shader_fog_fragment_mode.
+ *
+ * If not VKD3D_SHADER_FOG_FRAGMENT_NONE, the pixel shader colour output at
+ * location 0 is linearly interpolated with the fog colour defined by
+ * VKD3D_SHADER_PARAMETER_NAME_FOG_COLOUR. The interpolation factor is
+ * defined according to the enumerant selected by this parameter.
+ * The interpolated value is then outputted instead of the original value at
+ * location 0.
+ *
+ * An interpolation factor of 0 specifies to use the fog colour; a factor of
+ * 1 specifies to use the original colour output. The interpolation factor
+ * is clamped to the [0, 1] range before interpolating.
+ *
+ * The default value is VKD3D_SHADER_FOG_FRAGMENT_NONE.
+ *
+ * The data type for this parameter must be
+ * VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32.
+ *
+ * Only VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT is supported in this
+ * version of vkd3d-shader.
+ *
+ * \since 1.15
+ */
+ VKD3D_SHADER_PARAMETER_NAME_FOG_FRAGMENT_MODE,
+ /**
+ * Fog colour.
+ * See VKD3D_SHADER_PARAMETER_NAME_FOG_FRAGMENT_MODE for documentation of
+ * fog.
+ *
+ * The data type for this parameter must be
+ * VKD3D_SHADER_PARAMETER_DATA_TYPE_FLOAT32_VEC4.
+ *
+ * The default value is transparent black, i.e. the vector {0, 0, 0, 0}.
+ *
+ * \since 1.15
+ */
+ VKD3D_SHADER_PARAMETER_NAME_FOG_COLOUR,
+ /**
+ * End coordinate for linear fog.
+ * See VKD3D_SHADER_PARAMETER_NAME_FOG_FRAGMENT_MODE for documentation of
+ * fog.
+ *
+ * The data type for this parameter must be
+ * VKD3D_SHADER_PARAMETER_DATA_TYPE_FLOAT32.
+ *
+ * The default value is 1.0.
+ *
+ * \since 1.15
+ */
+ VKD3D_SHADER_PARAMETER_NAME_FOG_END,
+ /**
+ * Scale value for fog.
+ * See VKD3D_SHADER_PARAMETER_NAME_FOG_FRAGMENT_MODE for documentation of
+ * fog.
+ *
+ * The data type for this parameter must be
+ * VKD3D_SHADER_PARAMETER_DATA_TYPE_FLOAT32.
+ *
+ * The default value is 1.0.
+ *
+ * \since 1.15
+ */
+ VKD3D_SHADER_PARAMETER_NAME_FOG_SCALE,
+ /**
+ * Fog source. The value specified by this parameter must be a member of
+ * enum vkd3d_shader_fog_source.
+ *
+ * This parameter replaces or suppletes the fog varying output by a
+ * pre-rasterization shader. The fog varying is defined as the output
+ * varying with the semantic name "FOG" and semantic index 0.
+ *
+ * Together with other fog parameters, this parameter can be used to
+ * implement fixed function fog, as present in Direct3D versions up to 9,
+ * if the target environment does not support fog as part of its own
+ * fixed-function API (as Vulkan and core OpenGL).
+ *
+ * The default value is VKD3D_SHADER_FOG_SOURCE_FOG.
+ *
+ * The data type for this parameter must be
+ * VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32.
+ *
+ * Only VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT is supported in this
+ * version of vkd3d-shader.
+ *
+ * \since 1.15
+ */
+ VKD3D_SHADER_PARAMETER_NAME_FOG_SOURCE,
VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_PARAMETER_NAME),
};
@@ -2040,6 +2239,26 @@ struct vkd3d_shader_scan_combined_resource_sampler_info
unsigned int combined_sampler_count;
};
+/**
+ * A chained structure describing the tessellation information in a hull shader.
+ *
+ * This structure extends vkd3d_shader_compile_info.
+ *
+ * \since 1.15
+ */
+struct vkd3d_shader_scan_hull_shader_tessellation_info
+{
+ /** Must be set to VKD3D_SHADER_STRUCTURE_TYPE_SCAN_HULL_SHADER_TESSELLATION_INFO. */
+ enum vkd3d_shader_structure_type type;
+ /** Optional pointer to a structure containing further parameters. */
+ const void *next;
+
+ /** The tessellation output primitive. */
+ enum vkd3d_shader_tessellator_output_primitive output_primitive;
+ /** The tessellation partitioning mode. */
+ enum vkd3d_shader_tessellator_partitioning partitioning;
+};
+
/**
* Data type of a shader varying, returned as part of struct
* vkd3d_shader_signature_element.
diff --git a/libs/vkd3d/libs/vkd3d-common/blob.c b/libs/vkd3d/libs/vkd3d-common/blob.c
index f60ef7db769..c2c6ad67804 100644
--- a/libs/vkd3d/libs/vkd3d-common/blob.c
+++ b/libs/vkd3d/libs/vkd3d-common/blob.c
@@ -20,6 +20,7 @@
#define WIDL_C_INLINE_WRAPPERS
#endif
#define COBJMACROS
+
#define CONST_VTABLE
#include "vkd3d.h"
#include "vkd3d_blob.h"
diff --git a/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c
index 7c5444f63a3..0639da83aa6 100644
--- a/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c
+++ b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c
@@ -49,7 +49,7 @@ static const char * const shader_opcode_names[] =
[VKD3DSIH_BFREV ] = "bfrev",
[VKD3DSIH_BRANCH ] = "branch",
[VKD3DSIH_BREAK ] = "break",
- [VKD3DSIH_BREAKC ] = "breakc",
+ [VKD3DSIH_BREAKC ] = "break",
[VKD3DSIH_BREAKP ] = "breakp",
[VKD3DSIH_BUFINFO ] = "bufinfo",
[VKD3DSIH_CALL ] = "call",
@@ -183,7 +183,7 @@ static const char * const shader_opcode_names[] =
[VKD3DSIH_IDIV ] = "idiv",
[VKD3DSIH_IEQ ] = "ieq",
[VKD3DSIH_IF ] = "if",
- [VKD3DSIH_IFC ] = "ifc",
+ [VKD3DSIH_IFC ] = "if",
[VKD3DSIH_IGE ] = "ige",
[VKD3DSIH_ILT ] = "ilt",
[VKD3DSIH_IMAD ] = "imad",
@@ -354,6 +354,64 @@ static const char * const shader_opcode_names[] =
[VKD3DSIH_XOR ] = "xor",
};
+static const char * const shader_register_names[] =
+{
+ [VKD3DSPR_ADDR ] = "a",
+ [VKD3DSPR_ATTROUT ] = "oD",
+ [VKD3DSPR_COLOROUT ] = "oC",
+ [VKD3DSPR_COMBINED_SAMPLER ] = "s",
+ [VKD3DSPR_CONST ] = "c",
+ [VKD3DSPR_CONSTBOOL ] = "b",
+ [VKD3DSPR_CONSTBUFFER ] = "cb",
+ [VKD3DSPR_CONSTINT ] = "i",
+ [VKD3DSPR_COVERAGE ] = "vCoverage",
+ [VKD3DSPR_DEPTHOUT ] = "oDepth",
+ [VKD3DSPR_DEPTHOUTGE ] = "oDepthGE",
+ [VKD3DSPR_DEPTHOUTLE ] = "oDepthLE",
+ [VKD3DSPR_FORKINSTID ] = "vForkInstanceId",
+ [VKD3DSPR_FUNCTIONBODY ] = "fb",
+ [VKD3DSPR_FUNCTIONPOINTER ] = "fp",
+ [VKD3DSPR_GROUPSHAREDMEM ] = "g",
+ [VKD3DSPR_GSINSTID ] = "vGSInstanceID",
+ [VKD3DSPR_IDXTEMP ] = "x",
+ [VKD3DSPR_IMMCONST ] = "l",
+ [VKD3DSPR_IMMCONST64 ] = "d",
+ [VKD3DSPR_IMMCONSTBUFFER ] = "icb",
+ [VKD3DSPR_INCONTROLPOINT ] = "vicp",
+ [VKD3DSPR_INPUT ] = "v",
+ [VKD3DSPR_JOININSTID ] = "vJoinInstanceId",
+ [VKD3DSPR_LABEL ] = "l",
+ [VKD3DSPR_LOCALTHREADID ] = "vThreadIDInGroup",
+ [VKD3DSPR_LOCALTHREADINDEX ] = "vThreadIDInGroupFlattened",
+ [VKD3DSPR_LOOP ] = "aL",
+ [VKD3DSPR_NULL ] = "null",
+ [VKD3DSPR_OUTCONTROLPOINT ] = "vocp",
+ [VKD3DSPR_OUTPOINTID ] = "vOutputControlPointID",
+ [VKD3DSPR_OUTPUT ] = "o",
+ [VKD3DSPR_OUTSTENCILREF ] = "oStencilRef",
+ [VKD3DSPR_PARAMETER ] = "parameter",
+ [VKD3DSPR_PATCHCONST ] = "vpc",
+ [VKD3DSPR_POINT_COORD ] = "vPointCoord",
+ [VKD3DSPR_PREDICATE ] = "p",
+ [VKD3DSPR_PRIMID ] = "primID",
+ [VKD3DSPR_RASTERIZER ] = "rasterizer",
+ [VKD3DSPR_RESOURCE ] = "t",
+ [VKD3DSPR_SAMPLEMASK ] = "oMask",
+ [VKD3DSPR_SAMPLER ] = "s",
+ [VKD3DSPR_SSA ] = "sr",
+ [VKD3DSPR_STREAM ] = "m",
+ [VKD3DSPR_TEMP ] = "r",
+ [VKD3DSPR_TESSCOORD ] = "vDomainLocation",
+ [VKD3DSPR_TEXCRDOUT ] = "oT",
+ [VKD3DSPR_TEXTURE ] = "t",
+ [VKD3DSPR_THREADGROUPID ] = "vThreadGroupID",
+ [VKD3DSPR_THREADID ] = "vThreadID",
+ [VKD3DSPR_UAV ] = "u",
+ [VKD3DSPR_UNDEF ] = "undef",
+ [VKD3DSPR_WAVELANECOUNT ] = "vWaveLaneCount",
+ [VKD3DSPR_WAVELANEINDEX ] = "vWaveLaneIndex",
+};
+
struct vkd3d_d3d_asm_colours
{
const char *reset;
@@ -377,22 +435,6 @@ struct vkd3d_d3d_asm_compiler
const struct vkd3d_shader_instruction *current;
};
-/* Convert floating point offset relative to a register file to an absolute
- * offset for float constants. */
-static unsigned int shader_get_float_offset(enum vkd3d_shader_register_type register_type, UINT register_idx)
-{
- switch (register_type)
- {
- case VKD3DSPR_CONST: return register_idx;
- case VKD3DSPR_CONST2: return 2048 + register_idx;
- case VKD3DSPR_CONST3: return 4096 + register_idx;
- case VKD3DSPR_CONST4: return 6144 + register_idx;
- default:
- FIXME("Unsupported register type: %u.\n", register_type);
- return register_idx;
- }
-}
-
static void shader_dump_global_flags(struct vkd3d_d3d_asm_compiler *compiler, enum vsir_global_flags global_flags)
{
unsigned int i;
@@ -815,7 +857,7 @@ static void shader_print_dcl_usage(struct vkd3d_d3d_asm_compiler *compiler,
usage = "tessfactor";
break;
case VKD3D_DECL_USAGE_POSITIONT:
- usage = "positionT";
+ usage = "positiont";
indexed = true;
break;
case VKD3D_DECL_USAGE_FOG:
@@ -966,82 +1008,10 @@ static void shader_print_register(struct vkd3d_d3d_asm_compiler *compiler, const
reg->type == VKD3DSPR_LABEL ? compiler->colours.label : compiler->colours.reg);
switch (reg->type)
{
- case VKD3DSPR_TEMP:
- vkd3d_string_buffer_printf(buffer, "r");
- break;
-
- case VKD3DSPR_INPUT:
- vkd3d_string_buffer_printf(buffer, "v");
- break;
-
- case VKD3DSPR_CONST:
- case VKD3DSPR_CONST2:
- case VKD3DSPR_CONST3:
- case VKD3DSPR_CONST4:
- vkd3d_string_buffer_printf(buffer, "c");
- offset = shader_get_float_offset(reg->type, offset);
- break;
-
- case VKD3DSPR_TEXTURE: /* vs: case VKD3DSPR_ADDR */
- vkd3d_string_buffer_printf(buffer, "%c",
- compiler->shader_version.type == VKD3D_SHADER_TYPE_PIXEL ? 't' : 'a');
- break;
-
case VKD3DSPR_RASTOUT:
vkd3d_string_buffer_printf(buffer, "%s", rastout_reg_names[offset]);
break;
- case VKD3DSPR_COLOROUT:
- vkd3d_string_buffer_printf(buffer, "oC");
- break;
-
- case VKD3DSPR_DEPTHOUT:
- vkd3d_string_buffer_printf(buffer, "oDepth");
- break;
-
- case VKD3DSPR_DEPTHOUTGE:
- vkd3d_string_buffer_printf(buffer, "oDepthGE");
- break;
-
- case VKD3DSPR_DEPTHOUTLE:
- vkd3d_string_buffer_printf(buffer, "oDepthLE");
- break;
-
- case VKD3DSPR_ATTROUT:
- vkd3d_string_buffer_printf(buffer, "oD");
- break;
-
- case VKD3DSPR_TEXCRDOUT:
- /* Vertex shaders >= 3.0 use general purpose output registers
- * (VKD3DSPR_OUTPUT), which can include an address token. */
- if (vkd3d_shader_ver_ge(&compiler->shader_version, 3, 0))
- vkd3d_string_buffer_printf(buffer, "o");
- else
- vkd3d_string_buffer_printf(buffer, "oT");
- break;
-
- case VKD3DSPR_CONSTINT:
- vkd3d_string_buffer_printf(buffer, "i");
- break;
-
- case VKD3DSPR_CONSTBOOL:
- vkd3d_string_buffer_printf(buffer, "b");
- break;
-
- case VKD3DSPR_LABEL:
- vkd3d_string_buffer_printf(buffer, "l");
- break;
-
- case VKD3DSPR_LOOP:
- vkd3d_string_buffer_printf(buffer, "aL");
- break;
-
- case VKD3DSPR_COMBINED_SAMPLER:
- case VKD3DSPR_SAMPLER:
- vkd3d_string_buffer_printf(buffer, "s");
- is_descriptor = true;
- break;
-
case VKD3DSPR_MISCTYPE:
if (offset > 1)
vkd3d_string_buffer_printf(buffer, "%s<unhandled misctype %#x>%s",
@@ -1050,156 +1020,20 @@ static void shader_print_register(struct vkd3d_d3d_asm_compiler *compiler, const
vkd3d_string_buffer_printf(buffer, "%s", misctype_reg_names[offset]);
break;
- case VKD3DSPR_PREDICATE:
- vkd3d_string_buffer_printf(buffer, "p");
- break;
-
- case VKD3DSPR_IMMCONST:
- vkd3d_string_buffer_printf(buffer, "l");
- break;
-
- case VKD3DSPR_IMMCONST64:
- vkd3d_string_buffer_printf(buffer, "d");
- break;
-
+ case VKD3DSPR_COMBINED_SAMPLER:
+ case VKD3DSPR_SAMPLER:
case VKD3DSPR_CONSTBUFFER:
- vkd3d_string_buffer_printf(buffer, "cb");
- is_descriptor = true;
- break;
-
- case VKD3DSPR_IMMCONSTBUFFER:
- vkd3d_string_buffer_printf(buffer, "icb");
- break;
-
- case VKD3DSPR_PRIMID:
- vkd3d_string_buffer_printf(buffer, "primID");
- break;
-
- case VKD3DSPR_NULL:
- vkd3d_string_buffer_printf(buffer, "null");
- break;
-
- case VKD3DSPR_RASTERIZER:
- vkd3d_string_buffer_printf(buffer, "rasterizer");
- break;
-
case VKD3DSPR_RESOURCE:
- vkd3d_string_buffer_printf(buffer, "t");
- is_descriptor = true;
- break;
-
case VKD3DSPR_UAV:
- vkd3d_string_buffer_printf(buffer, "u");
is_descriptor = true;
- break;
-
- case VKD3DSPR_OUTPOINTID:
- vkd3d_string_buffer_printf(buffer, "vOutputControlPointID");
- break;
-
- case VKD3DSPR_FORKINSTID:
- vkd3d_string_buffer_printf(buffer, "vForkInstanceId");
- break;
-
- case VKD3DSPR_JOININSTID:
- vkd3d_string_buffer_printf(buffer, "vJoinInstanceId");
- break;
-
- case VKD3DSPR_INCONTROLPOINT:
- vkd3d_string_buffer_printf(buffer, "vicp");
- break;
-
- case VKD3DSPR_OUTCONTROLPOINT:
- vkd3d_string_buffer_printf(buffer, "vocp");
- break;
-
- case VKD3DSPR_PATCHCONST:
- vkd3d_string_buffer_printf(buffer, "vpc");
- break;
-
- case VKD3DSPR_TESSCOORD:
- vkd3d_string_buffer_printf(buffer, "vDomainLocation");
- break;
-
- case VKD3DSPR_GROUPSHAREDMEM:
- vkd3d_string_buffer_printf(buffer, "g");
- break;
-
- case VKD3DSPR_THREADID:
- vkd3d_string_buffer_printf(buffer, "vThreadID");
- break;
-
- case VKD3DSPR_THREADGROUPID:
- vkd3d_string_buffer_printf(buffer, "vThreadGroupID");
- break;
-
- case VKD3DSPR_LOCALTHREADID:
- vkd3d_string_buffer_printf(buffer, "vThreadIDInGroup");
- break;
-
- case VKD3DSPR_LOCALTHREADINDEX:
- vkd3d_string_buffer_printf(buffer, "vThreadIDInGroupFlattened");
- break;
-
- case VKD3DSPR_IDXTEMP:
- vkd3d_string_buffer_printf(buffer, "x");
- break;
-
- case VKD3DSPR_STREAM:
- vkd3d_string_buffer_printf(buffer, "m");
- break;
-
- case VKD3DSPR_FUNCTIONBODY:
- vkd3d_string_buffer_printf(buffer, "fb");
- break;
-
- case VKD3DSPR_FUNCTIONPOINTER:
- vkd3d_string_buffer_printf(buffer, "fp");
- break;
-
- case VKD3DSPR_COVERAGE:
- vkd3d_string_buffer_printf(buffer, "vCoverage");
- break;
-
- case VKD3DSPR_SAMPLEMASK:
- vkd3d_string_buffer_printf(buffer, "oMask");
- break;
-
- case VKD3DSPR_GSINSTID:
- vkd3d_string_buffer_printf(buffer, "vGSInstanceID");
- break;
-
- case VKD3DSPR_OUTSTENCILREF:
- vkd3d_string_buffer_printf(buffer, "oStencilRef");
- break;
-
- case VKD3DSPR_UNDEF:
- vkd3d_string_buffer_printf(buffer, "undef");
- break;
-
- case VKD3DSPR_SSA:
- vkd3d_string_buffer_printf(buffer, "sr");
- break;
-
- case VKD3DSPR_WAVELANECOUNT:
- vkd3d_string_buffer_printf(buffer, "vWaveLaneCount");
- break;
-
- case VKD3DSPR_WAVELANEINDEX:
- vkd3d_string_buffer_printf(buffer, "vWaveLaneIndex");
- break;
-
- case VKD3DSPR_PARAMETER:
- vkd3d_string_buffer_printf(buffer, "parameter");
- break;
-
- case VKD3DSPR_POINT_COORD:
- vkd3d_string_buffer_printf(buffer, "vPointCoord");
- break;
+ /* fall through */
default:
- vkd3d_string_buffer_printf(buffer, "%s<unhandled register type %#x>%s",
- compiler->colours.error, reg->type, compiler->colours.reset);
+ if (reg->type < ARRAY_SIZE(shader_register_names) && shader_register_names[reg->type])
+ vkd3d_string_buffer_printf(buffer, "%s", shader_register_names[reg->type]);
+ else
+ vkd3d_string_buffer_printf(buffer, "%s<unhandled register type %#x>%s",
+ compiler->colours.error, reg->type, compiler->colours.reset);
break;
}
@@ -1346,8 +1180,8 @@ static void shader_print_register(struct vkd3d_d3d_asm_compiler *compiler, const
bool is_sm_5_1 = vkd3d_shader_ver_ge(&compiler->shader_version, 5, 1);
if (reg->idx[0].rel_addr || reg->type == VKD3DSPR_IMMCONSTBUFFER
- || reg->type == VKD3DSPR_INCONTROLPOINT || (reg->type == VKD3DSPR_INPUT
- && (compiler->shader_version.type == VKD3D_SHADER_TYPE_GEOMETRY
+ || reg->type == VKD3DSPR_INCONTROLPOINT || reg->type == VKD3DSPR_OUTCONTROLPOINT
+ || (reg->type == VKD3DSPR_INPUT && (compiler->shader_version.type == VKD3D_SHADER_TYPE_GEOMETRY
|| compiler->shader_version.type == VKD3D_SHADER_TYPE_HULL)))
{
vkd3d_string_buffer_printf(buffer, "%s", compiler->colours.reset);
@@ -2132,8 +1966,7 @@ static void shader_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler,
case VKD3DSIH_DEF:
vkd3d_string_buffer_printf(buffer, " %sc%u%s", compiler->colours.reg,
- shader_get_float_offset(ins->dst[0].reg.type, ins->dst[0].reg.idx[0].offset),
- compiler->colours.reset);
+ ins->dst[0].reg.idx[0].offset, compiler->colours.reset);
shader_print_float_literal(compiler, " = ", ins->src[0].reg.u.immconst_f32[0], "");
shader_print_float_literal(compiler, ", ", ins->src[0].reg.u.immconst_f32[1], "");
shader_print_float_literal(compiler, ", ", ins->src[0].reg.u.immconst_f32[2], "");
@@ -2547,6 +2380,33 @@ static void trace_signature(const struct shader_signature *signature, const char
vkd3d_string_buffer_cleanup(&buffer);
}
+static void trace_io_declarations(const struct vsir_program *program)
+{
+ struct vkd3d_string_buffer buffer;
+ bool empty = true;
+ unsigned int i;
+
+ vkd3d_string_buffer_init(&buffer);
+
+ vkd3d_string_buffer_printf(&buffer, "Input/output declarations:");
+
+ for (i = 0; i < sizeof(program->io_dcls) * CHAR_BIT; ++i)
+ {
+ if (bitmap_is_set(program->io_dcls, i))
+ {
+ empty = false;
+ vkd3d_string_buffer_printf(&buffer, " %u", i);
+ }
+ }
+
+ if (empty)
+ vkd3d_string_buffer_printf(&buffer, " empty");
+
+ TRACE("%s\n", buffer.buffer);
+
+ vkd3d_string_buffer_cleanup(&buffer);
+}
+
void vsir_program_trace(const struct vsir_program *program)
{
const unsigned int flags = VSIR_ASM_FLAG_DUMP_TYPES | VSIR_ASM_FLAG_DUMP_ALL_INDICES;
@@ -2556,6 +2416,7 @@ void vsir_program_trace(const struct vsir_program *program)
trace_signature(&program->input_signature, "Input");
trace_signature(&program->output_signature, "Output");
trace_signature(&program->patch_constant_signature, "Patch-constant");
+ trace_io_declarations(program);
if (d3d_asm_compile(program, NULL, &code, flags) != VKD3D_OK)
return;
diff --git a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c
index 9e2eacbcfa6..58e35cf22e8 100644
--- a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c
+++ b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c
@@ -8,7 +8,7 @@
* Copyright 2006 Ivan Gyurdiev
* Copyright 2007-2008 Stefan Dösinger for CodeWeavers
* Copyright 2009, 2021 Henri Verbeet for CodeWeavers
- * Copyright 2019-2020 Zebediah Figura for CodeWeavers
+ * Copyright 2019-2020, 2023-2024 Elizabeth Figura for CodeWeavers
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
@@ -25,7 +25,7 @@
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
*/
-#include "hlsl.h"
+#include "vkd3d_shader_private.h"
#define VKD3D_SM1_VS 0xfffeu
#define VKD3D_SM1_PS 0xffffu
@@ -89,6 +89,32 @@
#define VKD3D_SM1_VERSION_MAJOR(version) (((version) >> 8u) & 0xffu)
#define VKD3D_SM1_VERSION_MINOR(version) (((version) >> 0u) & 0xffu)
+enum vkd3d_sm1_register_type
+{
+ VKD3D_SM1_REG_TEMP = 0x00,
+ VKD3D_SM1_REG_INPUT = 0x01,
+ VKD3D_SM1_REG_CONST = 0x02,
+ VKD3D_SM1_REG_ADDR = 0x03,
+ VKD3D_SM1_REG_TEXTURE = 0x03,
+ VKD3D_SM1_REG_RASTOUT = 0x04,
+ VKD3D_SM1_REG_ATTROUT = 0x05,
+ VKD3D_SM1_REG_TEXCRDOUT = 0x06,
+ VKD3D_SM1_REG_OUTPUT = 0x06,
+ VKD3D_SM1_REG_CONSTINT = 0x07,
+ VKD3D_SM1_REG_COLOROUT = 0x08,
+ VKD3D_SM1_REG_DEPTHOUT = 0x09,
+ VKD3D_SM1_REG_SAMPLER = 0x0a,
+ VKD3D_SM1_REG_CONST2 = 0x0b,
+ VKD3D_SM1_REG_CONST3 = 0x0c,
+ VKD3D_SM1_REG_CONST4 = 0x0d,
+ VKD3D_SM1_REG_CONSTBOOL = 0x0e,
+ VKD3D_SM1_REG_LOOP = 0x0f,
+ VKD3D_SM1_REG_TEMPFLOAT16 = 0x10,
+ VKD3D_SM1_REG_MISCTYPE = 0x11,
+ VKD3D_SM1_REG_LABEL = 0x12,
+ VKD3D_SM1_REG_PREDICATE = 0x13,
+};
+
enum vkd3d_sm1_address_mode_type
{
VKD3D_SM1_ADDRESS_MODE_ABSOLUTE = 0x0,
@@ -235,7 +261,7 @@ static const struct vkd3d_sm1_opcode_info vs_opcode_table[] =
/* Arithmetic */
{VKD3D_SM1_OP_NOP, 0, 0, VKD3DSIH_NOP},
{VKD3D_SM1_OP_MOV, 1, 1, VKD3DSIH_MOV},
- {VKD3D_SM1_OP_MOVA, 1, 1, VKD3DSIH_MOVA, {2, 0}, {~0u, ~0u}},
+ {VKD3D_SM1_OP_MOVA, 1, 1, VKD3DSIH_MOVA, {2, 0}},
{VKD3D_SM1_OP_ADD, 1, 2, VKD3DSIH_ADD},
{VKD3D_SM1_OP_SUB, 1, 2, VKD3DSIH_SUB},
{VKD3D_SM1_OP_MAD, 1, 3, VKD3DSIH_MAD},
@@ -248,22 +274,22 @@ static const struct vkd3d_sm1_opcode_info vs_opcode_table[] =
{VKD3D_SM1_OP_MAX, 1, 2, VKD3DSIH_MAX},
{VKD3D_SM1_OP_SLT, 1, 2, VKD3DSIH_SLT},
{VKD3D_SM1_OP_SGE, 1, 2, VKD3DSIH_SGE},
- {VKD3D_SM1_OP_ABS, 1, 1, VKD3DSIH_ABS},
+ {VKD3D_SM1_OP_ABS, 1, 1, VKD3DSIH_ABS, {2, 0}},
{VKD3D_SM1_OP_EXP, 1, 1, VKD3DSIH_EXP},
{VKD3D_SM1_OP_LOG, 1, 1, VKD3DSIH_LOG},
{VKD3D_SM1_OP_EXPP, 1, 1, VKD3DSIH_EXPP},
{VKD3D_SM1_OP_LOGP, 1, 1, VKD3DSIH_LOGP},
{VKD3D_SM1_OP_LIT, 1, 1, VKD3DSIH_LIT},
{VKD3D_SM1_OP_DST, 1, 2, VKD3DSIH_DST},
- {VKD3D_SM1_OP_LRP, 1, 3, VKD3DSIH_LRP},
+ {VKD3D_SM1_OP_LRP, 1, 3, VKD3DSIH_LRP, {2, 0}},
{VKD3D_SM1_OP_FRC, 1, 1, VKD3DSIH_FRC},
- {VKD3D_SM1_OP_POW, 1, 2, VKD3DSIH_POW},
- {VKD3D_SM1_OP_CRS, 1, 2, VKD3DSIH_CRS},
- {VKD3D_SM1_OP_SGN, 1, 3, VKD3DSIH_SGN, {2, 0}, { 2, 1}},
- {VKD3D_SM1_OP_SGN, 1, 1, VKD3DSIH_SGN, {3, 0}, {~0u, ~0u}},
- {VKD3D_SM1_OP_NRM, 1, 1, VKD3DSIH_NRM,},
- {VKD3D_SM1_OP_SINCOS, 1, 3, VKD3DSIH_SINCOS, {2, 0}, { 2, 1}},
- {VKD3D_SM1_OP_SINCOS, 1, 1, VKD3DSIH_SINCOS, {3, 0}, {~0u, ~0u}},
+ {VKD3D_SM1_OP_POW, 1, 2, VKD3DSIH_POW, {2, 0}},
+ {VKD3D_SM1_OP_CRS, 1, 2, VKD3DSIH_CRS, {2, 0}},
+ {VKD3D_SM1_OP_SGN, 1, 3, VKD3DSIH_SGN, {2, 0}, {2, 1}},
+ {VKD3D_SM1_OP_SGN, 1, 1, VKD3DSIH_SGN, {3, 0}},
+ {VKD3D_SM1_OP_NRM, 1, 1, VKD3DSIH_NRM, {2, 0}},
+ {VKD3D_SM1_OP_SINCOS, 1, 3, VKD3DSIH_SINCOS, {2, 0}, {2, 1}},
+ {VKD3D_SM1_OP_SINCOS, 1, 1, VKD3DSIH_SINCOS, {3, 0}},
/* Matrix */
{VKD3D_SM1_OP_M4x4, 1, 2, VKD3DSIH_M4x4},
{VKD3D_SM1_OP_M4x3, 1, 2, VKD3DSIH_M4x3},
@@ -274,27 +300,27 @@ static const struct vkd3d_sm1_opcode_info vs_opcode_table[] =
{VKD3D_SM1_OP_DCL, 0, 0, VKD3DSIH_DCL},
/* Constant definitions */
{VKD3D_SM1_OP_DEF, 1, 1, VKD3DSIH_DEF},
- {VKD3D_SM1_OP_DEFB, 1, 1, VKD3DSIH_DEFB},
- {VKD3D_SM1_OP_DEFI, 1, 1, VKD3DSIH_DEFI},
+ {VKD3D_SM1_OP_DEFB, 1, 1, VKD3DSIH_DEFB, {2, 0}},
+ {VKD3D_SM1_OP_DEFI, 1, 1, VKD3DSIH_DEFI, {2, 0}},
/* Control flow */
- {VKD3D_SM1_OP_REP, 0, 1, VKD3DSIH_REP, {2, 0}, {~0u, ~0u}},
- {VKD3D_SM1_OP_ENDREP, 0, 0, VKD3DSIH_ENDREP, {2, 0}, {~0u, ~0u}},
- {VKD3D_SM1_OP_IF, 0, 1, VKD3DSIH_IF, {2, 0}, {~0u, ~0u}},
- {VKD3D_SM1_OP_IFC, 0, 2, VKD3DSIH_IFC, {2, 1}, {~0u, ~0u}},
- {VKD3D_SM1_OP_ELSE, 0, 0, VKD3DSIH_ELSE, {2, 0}, {~0u, ~0u}},
- {VKD3D_SM1_OP_ENDIF, 0, 0, VKD3DSIH_ENDIF, {2, 0}, {~0u, ~0u}},
- {VKD3D_SM1_OP_BREAK, 0, 0, VKD3DSIH_BREAK, {2, 1}, {~0u, ~0u}},
- {VKD3D_SM1_OP_BREAKC, 0, 2, VKD3DSIH_BREAKC, {2, 1}, {~0u, ~0u}},
- {VKD3D_SM1_OP_BREAKP, 0, 1, VKD3DSIH_BREAKP},
- {VKD3D_SM1_OP_CALL, 0, 1, VKD3DSIH_CALL, {2, 0}, {~0u, ~0u}},
- {VKD3D_SM1_OP_CALLNZ, 0, 2, VKD3DSIH_CALLNZ, {2, 0}, {~0u, ~0u}},
- {VKD3D_SM1_OP_LOOP, 0, 2, VKD3DSIH_LOOP, {2, 0}, {~0u, ~0u}},
- {VKD3D_SM1_OP_RET, 0, 0, VKD3DSIH_RET, {2, 0}, {~0u, ~0u}},
- {VKD3D_SM1_OP_ENDLOOP, 0, 0, VKD3DSIH_ENDLOOP, {2, 0}, {~0u, ~0u}},
- {VKD3D_SM1_OP_LABEL, 0, 1, VKD3DSIH_LABEL, {2, 0}, {~0u, ~0u}},
-
- {VKD3D_SM1_OP_SETP, 1, 2, VKD3DSIH_SETP},
- {VKD3D_SM1_OP_TEXLDL, 1, 2, VKD3DSIH_TEXLDL, {3, 0}, {~0u, ~0u}},
+ {VKD3D_SM1_OP_REP, 0, 1, VKD3DSIH_REP, {2, 0}},
+ {VKD3D_SM1_OP_ENDREP, 0, 0, VKD3DSIH_ENDREP, {2, 0}},
+ {VKD3D_SM1_OP_IF, 0, 1, VKD3DSIH_IF, {2, 0}},
+ {VKD3D_SM1_OP_IFC, 0, 2, VKD3DSIH_IFC, {2, 1}},
+ {VKD3D_SM1_OP_ELSE, 0, 0, VKD3DSIH_ELSE, {2, 0}},
+ {VKD3D_SM1_OP_ENDIF, 0, 0, VKD3DSIH_ENDIF, {2, 0}},
+ {VKD3D_SM1_OP_BREAK, 0, 0, VKD3DSIH_BREAK, {2, 1}},
+ {VKD3D_SM1_OP_BREAKC, 0, 2, VKD3DSIH_BREAKC, {2, 1}},
+ {VKD3D_SM1_OP_BREAKP, 0, 1, VKD3DSIH_BREAKP, {2, 1}},
+ {VKD3D_SM1_OP_CALL, 0, 1, VKD3DSIH_CALL, {2, 0}},
+ {VKD3D_SM1_OP_CALLNZ, 0, 2, VKD3DSIH_CALLNZ, {2, 0}},
+ {VKD3D_SM1_OP_LOOP, 0, 2, VKD3DSIH_LOOP, {2, 0}},
+ {VKD3D_SM1_OP_RET, 0, 0, VKD3DSIH_RET, {2, 0}},
+ {VKD3D_SM1_OP_ENDLOOP, 0, 0, VKD3DSIH_ENDLOOP, {2, 0}},
+ {VKD3D_SM1_OP_LABEL, 0, 1, VKD3DSIH_LABEL, {2, 0}},
+
+ {VKD3D_SM1_OP_SETP, 1, 2, VKD3DSIH_SETP, {2, 1}},
+ {VKD3D_SM1_OP_TEXLDL, 1, 2, VKD3DSIH_TEXLDL, {3, 0}},
{0, 0, 0, VKD3DSIH_INVALID},
};
@@ -307,92 +333,115 @@ static const struct vkd3d_sm1_opcode_info ps_opcode_table[] =
{VKD3D_SM1_OP_SUB, 1, 2, VKD3DSIH_SUB},
{VKD3D_SM1_OP_MAD, 1, 3, VKD3DSIH_MAD},
{VKD3D_SM1_OP_MUL, 1, 2, VKD3DSIH_MUL},
- {VKD3D_SM1_OP_RCP, 1, 1, VKD3DSIH_RCP},
- {VKD3D_SM1_OP_RSQ, 1, 1, VKD3DSIH_RSQ},
+ {VKD3D_SM1_OP_RCP, 1, 1, VKD3DSIH_RCP, {2, 0}},
+ {VKD3D_SM1_OP_RSQ, 1, 1, VKD3DSIH_RSQ, {2, 0}},
{VKD3D_SM1_OP_DP3, 1, 2, VKD3DSIH_DP3},
- {VKD3D_SM1_OP_DP4, 1, 2, VKD3DSIH_DP4},
- {VKD3D_SM1_OP_MIN, 1, 2, VKD3DSIH_MIN},
- {VKD3D_SM1_OP_MAX, 1, 2, VKD3DSIH_MAX},
- {VKD3D_SM1_OP_SLT, 1, 2, VKD3DSIH_SLT},
- {VKD3D_SM1_OP_SGE, 1, 2, VKD3DSIH_SGE},
- {VKD3D_SM1_OP_ABS, 1, 1, VKD3DSIH_ABS},
- {VKD3D_SM1_OP_EXP, 1, 1, VKD3DSIH_EXP},
- {VKD3D_SM1_OP_LOG, 1, 1, VKD3DSIH_LOG},
- {VKD3D_SM1_OP_EXPP, 1, 1, VKD3DSIH_EXPP},
- {VKD3D_SM1_OP_LOGP, 1, 1, VKD3DSIH_LOGP},
- {VKD3D_SM1_OP_DST, 1, 2, VKD3DSIH_DST},
+ {VKD3D_SM1_OP_DP4, 1, 2, VKD3DSIH_DP4, {1, 2}},
+ {VKD3D_SM1_OP_MIN, 1, 2, VKD3DSIH_MIN, {2, 0}},
+ {VKD3D_SM1_OP_MAX, 1, 2, VKD3DSIH_MAX, {2, 0}},
+ {VKD3D_SM1_OP_ABS, 1, 1, VKD3DSIH_ABS, {2, 0}},
+ {VKD3D_SM1_OP_EXP, 1, 1, VKD3DSIH_EXP, {2, 0}},
+ {VKD3D_SM1_OP_LOG, 1, 1, VKD3DSIH_LOG, {2, 0}},
{VKD3D_SM1_OP_LRP, 1, 3, VKD3DSIH_LRP},
- {VKD3D_SM1_OP_FRC, 1, 1, VKD3DSIH_FRC},
- {VKD3D_SM1_OP_CND, 1, 3, VKD3DSIH_CND, {1, 0}, { 1, 4}},
- {VKD3D_SM1_OP_CMP, 1, 3, VKD3DSIH_CMP, {1, 2}, { 3, 0}},
- {VKD3D_SM1_OP_POW, 1, 2, VKD3DSIH_POW},
- {VKD3D_SM1_OP_CRS, 1, 2, VKD3DSIH_CRS},
- {VKD3D_SM1_OP_NRM, 1, 1, VKD3DSIH_NRM},
- {VKD3D_SM1_OP_SINCOS, 1, 3, VKD3DSIH_SINCOS, {2, 0}, { 2, 1}},
- {VKD3D_SM1_OP_SINCOS, 1, 1, VKD3DSIH_SINCOS, {3, 0}, {~0u, ~0u}},
- {VKD3D_SM1_OP_DP2ADD, 1, 3, VKD3DSIH_DP2ADD, {2, 0}, {~0u, ~0u}},
+ {VKD3D_SM1_OP_FRC, 1, 1, VKD3DSIH_FRC, {2, 0}},
+ {VKD3D_SM1_OP_CND, 1, 3, VKD3DSIH_CND, {1, 0}, {1, 4}},
+ {VKD3D_SM1_OP_CMP, 1, 3, VKD3DSIH_CMP, {1, 2}},
+ {VKD3D_SM1_OP_POW, 1, 2, VKD3DSIH_POW, {2, 0}},
+ {VKD3D_SM1_OP_CRS, 1, 2, VKD3DSIH_CRS, {2, 0}},
+ {VKD3D_SM1_OP_NRM, 1, 1, VKD3DSIH_NRM, {2, 0}},
+ {VKD3D_SM1_OP_SINCOS, 1, 3, VKD3DSIH_SINCOS, {2, 0}, {2, 1}},
+ {VKD3D_SM1_OP_SINCOS, 1, 1, VKD3DSIH_SINCOS, {3, 0}},
+ {VKD3D_SM1_OP_DP2ADD, 1, 3, VKD3DSIH_DP2ADD, {2, 0}},
/* Matrix */
- {VKD3D_SM1_OP_M4x4, 1, 2, VKD3DSIH_M4x4},
- {VKD3D_SM1_OP_M4x3, 1, 2, VKD3DSIH_M4x3},
- {VKD3D_SM1_OP_M3x4, 1, 2, VKD3DSIH_M3x4},
- {VKD3D_SM1_OP_M3x3, 1, 2, VKD3DSIH_M3x3},
- {VKD3D_SM1_OP_M3x2, 1, 2, VKD3DSIH_M3x2},
+ {VKD3D_SM1_OP_M4x4, 1, 2, VKD3DSIH_M4x4, {2, 0}},
+ {VKD3D_SM1_OP_M4x3, 1, 2, VKD3DSIH_M4x3, {2, 0}},
+ {VKD3D_SM1_OP_M3x4, 1, 2, VKD3DSIH_M3x4, {2, 0}},
+ {VKD3D_SM1_OP_M3x3, 1, 2, VKD3DSIH_M3x3, {2, 0}},
+ {VKD3D_SM1_OP_M3x2, 1, 2, VKD3DSIH_M3x2, {2, 0}},
/* Declarations */
- {VKD3D_SM1_OP_DCL, 0, 0, VKD3DSIH_DCL},
+ {VKD3D_SM1_OP_DCL, 0, 0, VKD3DSIH_DCL, {2, 0}},
/* Constant definitions */
{VKD3D_SM1_OP_DEF, 1, 1, VKD3DSIH_DEF},
- {VKD3D_SM1_OP_DEFB, 1, 1, VKD3DSIH_DEFB},
- {VKD3D_SM1_OP_DEFI, 1, 1, VKD3DSIH_DEFI},
+ {VKD3D_SM1_OP_DEFB, 1, 1, VKD3DSIH_DEFB, {2, 0}},
+ {VKD3D_SM1_OP_DEFI, 1, 1, VKD3DSIH_DEFI, {2, 1}},
/* Control flow */
- {VKD3D_SM1_OP_REP, 0, 1, VKD3DSIH_REP, {2, 1}, {~0u, ~0u}},
- {VKD3D_SM1_OP_ENDREP, 0, 0, VKD3DSIH_ENDREP, {2, 1}, {~0u, ~0u}},
- {VKD3D_SM1_OP_IF, 0, 1, VKD3DSIH_IF, {2, 1}, {~0u, ~0u}},
- {VKD3D_SM1_OP_IFC, 0, 2, VKD3DSIH_IFC, {2, 1}, {~0u, ~0u}},
- {VKD3D_SM1_OP_ELSE, 0, 0, VKD3DSIH_ELSE, {2, 1}, {~0u, ~0u}},
- {VKD3D_SM1_OP_ENDIF, 0, 0, VKD3DSIH_ENDIF, {2, 1}, {~0u, ~0u}},
- {VKD3D_SM1_OP_BREAK, 0, 0, VKD3DSIH_BREAK, {2, 1}, {~0u, ~0u}},
- {VKD3D_SM1_OP_BREAKC, 0, 2, VKD3DSIH_BREAKC, {2, 1}, {~0u, ~0u}},
- {VKD3D_SM1_OP_BREAKP, 0, 1, VKD3DSIH_BREAKP},
- {VKD3D_SM1_OP_CALL, 0, 1, VKD3DSIH_CALL, {2, 1}, {~0u, ~0u}},
- {VKD3D_SM1_OP_CALLNZ, 0, 2, VKD3DSIH_CALLNZ, {2, 1}, {~0u, ~0u}},
- {VKD3D_SM1_OP_LOOP, 0, 2, VKD3DSIH_LOOP, {3, 0}, {~0u, ~0u}},
- {VKD3D_SM1_OP_RET, 0, 0, VKD3DSIH_RET, {2, 1}, {~0u, ~0u}},
- {VKD3D_SM1_OP_ENDLOOP, 0, 0, VKD3DSIH_ENDLOOP, {3, 0}, {~0u, ~0u}},
- {VKD3D_SM1_OP_LABEL, 0, 1, VKD3DSIH_LABEL, {2, 1}, {~0u, ~0u}},
+ {VKD3D_SM1_OP_REP, 0, 1, VKD3DSIH_REP, {2, 1}},
+ {VKD3D_SM1_OP_ENDREP, 0, 0, VKD3DSIH_ENDREP, {2, 1}},
+ {VKD3D_SM1_OP_IF, 0, 1, VKD3DSIH_IF, {2, 1}},
+ {VKD3D_SM1_OP_IFC, 0, 2, VKD3DSIH_IFC, {2, 1}},
+ {VKD3D_SM1_OP_ELSE, 0, 0, VKD3DSIH_ELSE, {2, 1}},
+ {VKD3D_SM1_OP_ENDIF, 0, 0, VKD3DSIH_ENDIF, {2, 1}},
+ {VKD3D_SM1_OP_BREAK, 0, 0, VKD3DSIH_BREAK, {2, 1}},
+ {VKD3D_SM1_OP_BREAKC, 0, 2, VKD3DSIH_BREAKC, {2, 1}},
+ {VKD3D_SM1_OP_BREAKP, 0, 1, VKD3DSIH_BREAKP, {2, 1}},
+ {VKD3D_SM1_OP_CALL, 0, 1, VKD3DSIH_CALL, {2, 1}},
+ {VKD3D_SM1_OP_CALLNZ, 0, 2, VKD3DSIH_CALLNZ, {2, 1}},
+ {VKD3D_SM1_OP_LOOP, 0, 2, VKD3DSIH_LOOP, {3, 0}},
+ {VKD3D_SM1_OP_RET, 0, 0, VKD3DSIH_RET, {2, 1}},
+ {VKD3D_SM1_OP_ENDLOOP, 0, 0, VKD3DSIH_ENDLOOP, {3, 0}},
+ {VKD3D_SM1_OP_LABEL, 0, 1, VKD3DSIH_LABEL, {2, 1}},
/* Texture */
- {VKD3D_SM1_OP_TEXCOORD, 1, 0, VKD3DSIH_TEXCOORD, {0, 0}, { 1, 3}},
- {VKD3D_SM1_OP_TEXCOORD, 1, 1, VKD3DSIH_TEXCOORD, {1 ,4}, { 1, 4}},
- {VKD3D_SM1_OP_TEXKILL, 1, 0, VKD3DSIH_TEXKILL, {1 ,0}, { 3, 0}},
- {VKD3D_SM1_OP_TEX, 1, 0, VKD3DSIH_TEX, {0, 0}, { 1, 3}},
- {VKD3D_SM1_OP_TEX, 1, 1, VKD3DSIH_TEX, {1, 4}, { 1, 4}},
- {VKD3D_SM1_OP_TEX, 1, 2, VKD3DSIH_TEX, {2, 0}, {~0u, ~0u}},
- {VKD3D_SM1_OP_TEXBEM, 1, 1, VKD3DSIH_TEXBEM, {0, 0}, { 1, 3}},
- {VKD3D_SM1_OP_TEXBEML, 1, 1, VKD3DSIH_TEXBEML, {1, 0}, { 1, 3}},
- {VKD3D_SM1_OP_TEXREG2AR, 1, 1, VKD3DSIH_TEXREG2AR, {1, 0}, { 1, 3}},
- {VKD3D_SM1_OP_TEXREG2GB, 1, 1, VKD3DSIH_TEXREG2GB, {1, 0}, { 1, 3}},
- {VKD3D_SM1_OP_TEXREG2RGB, 1, 1, VKD3DSIH_TEXREG2RGB, {1, 2}, { 1, 3}},
- {VKD3D_SM1_OP_TEXM3x2PAD, 1, 1, VKD3DSIH_TEXM3x2PAD, {1, 0}, { 1, 3}},
- {VKD3D_SM1_OP_TEXM3x2TEX, 1, 1, VKD3DSIH_TEXM3x2TEX, {1, 0}, { 1, 3}},
- {VKD3D_SM1_OP_TEXM3x3PAD, 1, 1, VKD3DSIH_TEXM3x3PAD, {1, 0}, { 1, 3}},
- {VKD3D_SM1_OP_TEXM3x3DIFF, 1, 1, VKD3DSIH_TEXM3x3DIFF, {0, 0}, { 0, 0}},
- {VKD3D_SM1_OP_TEXM3x3SPEC, 1, 2, VKD3DSIH_TEXM3x3SPEC, {1, 0}, { 1, 3}},
- {VKD3D_SM1_OP_TEXM3x3VSPEC, 1, 1, VKD3DSIH_TEXM3x3VSPEC, {1, 0}, { 1, 3}},
- {VKD3D_SM1_OP_TEXM3x3TEX, 1, 1, VKD3DSIH_TEXM3x3TEX, {1, 0}, { 1, 3}},
- {VKD3D_SM1_OP_TEXDP3TEX, 1, 1, VKD3DSIH_TEXDP3TEX, {1, 2}, { 1, 3}},
- {VKD3D_SM1_OP_TEXM3x2DEPTH, 1, 1, VKD3DSIH_TEXM3x2DEPTH, {1, 3}, { 1, 3}},
- {VKD3D_SM1_OP_TEXDP3, 1, 1, VKD3DSIH_TEXDP3, {1, 2}, { 1, 3}},
- {VKD3D_SM1_OP_TEXM3x3, 1, 1, VKD3DSIH_TEXM3x3, {1, 2}, { 1, 3}},
- {VKD3D_SM1_OP_TEXDEPTH, 1, 0, VKD3DSIH_TEXDEPTH, {1, 4}, { 1, 4}},
- {VKD3D_SM1_OP_BEM, 1, 2, VKD3DSIH_BEM, {1, 4}, { 1, 4}},
- {VKD3D_SM1_OP_DSX, 1, 1, VKD3DSIH_DSX, {2, 1}, {~0u, ~0u}},
- {VKD3D_SM1_OP_DSY, 1, 1, VKD3DSIH_DSY, {2, 1}, {~0u, ~0u}},
- {VKD3D_SM1_OP_TEXLDD, 1, 4, VKD3DSIH_TEXLDD, {2, 1}, {~0u, ~0u}},
- {VKD3D_SM1_OP_SETP, 1, 2, VKD3DSIH_SETP},
- {VKD3D_SM1_OP_TEXLDL, 1, 2, VKD3DSIH_TEXLDL, {3, 0}, {~0u, ~0u}},
- {VKD3D_SM1_OP_PHASE, 0, 0, VKD3DSIH_PHASE},
+ {VKD3D_SM1_OP_TEXCOORD, 1, 0, VKD3DSIH_TEXCOORD, {0, 0}, {1, 3}},
+ {VKD3D_SM1_OP_TEXCOORD, 1, 1, VKD3DSIH_TEXCOORD, {1, 4}, {1, 4}},
+ {VKD3D_SM1_OP_TEXKILL, 1, 0, VKD3DSIH_TEXKILL, {1, 0}},
+ {VKD3D_SM1_OP_TEX, 1, 0, VKD3DSIH_TEX, {0, 0}, {1, 3}},
+ {VKD3D_SM1_OP_TEX, 1, 1, VKD3DSIH_TEX, {1, 4}, {1, 4}},
+ {VKD3D_SM1_OP_TEX, 1, 2, VKD3DSIH_TEX, {2, 0}},
+ {VKD3D_SM1_OP_TEXBEM, 1, 1, VKD3DSIH_TEXBEM, {0, 0}, {1, 3}},
+ {VKD3D_SM1_OP_TEXBEML, 1, 1, VKD3DSIH_TEXBEML, {1, 0}, {1, 3}},
+ {VKD3D_SM1_OP_TEXREG2AR, 1, 1, VKD3DSIH_TEXREG2AR, {1, 0}, {1, 3}},
+ {VKD3D_SM1_OP_TEXREG2GB, 1, 1, VKD3DSIH_TEXREG2GB, {1, 0}, {1, 3}},
+ {VKD3D_SM1_OP_TEXREG2RGB, 1, 1, VKD3DSIH_TEXREG2RGB, {1, 2}, {1, 3}},
+ {VKD3D_SM1_OP_TEXM3x2PAD, 1, 1, VKD3DSIH_TEXM3x2PAD, {1, 0}, {1, 3}},
+ {VKD3D_SM1_OP_TEXM3x2TEX, 1, 1, VKD3DSIH_TEXM3x2TEX, {1, 0}, {1, 3}},
+ {VKD3D_SM1_OP_TEXM3x3PAD, 1, 1, VKD3DSIH_TEXM3x3PAD, {1, 0}, {1, 3}},
+ {VKD3D_SM1_OP_TEXM3x3DIFF, 1, 1, VKD3DSIH_TEXM3x3DIFF, {0, 0}, {0, 0}},
+ {VKD3D_SM1_OP_TEXM3x3SPEC, 1, 2, VKD3DSIH_TEXM3x3SPEC, {1, 0}, {1, 3}},
+ {VKD3D_SM1_OP_TEXM3x3VSPEC, 1, 1, VKD3DSIH_TEXM3x3VSPEC, {1, 0}, {1, 3}},
+ {VKD3D_SM1_OP_TEXM3x3TEX, 1, 1, VKD3DSIH_TEXM3x3TEX, {1, 0}, {1, 3}},
+ {VKD3D_SM1_OP_TEXDP3TEX, 1, 1, VKD3DSIH_TEXDP3TEX, {1, 2}, {1, 3}},
+ {VKD3D_SM1_OP_TEXM3x2DEPTH, 1, 1, VKD3DSIH_TEXM3x2DEPTH, {1, 3}, {1, 3}},
+ {VKD3D_SM1_OP_TEXDP3, 1, 1, VKD3DSIH_TEXDP3, {1, 2}, {1, 3}},
+ {VKD3D_SM1_OP_TEXM3x3, 1, 1, VKD3DSIH_TEXM3x3, {1, 2}, {1, 3}},
+ {VKD3D_SM1_OP_TEXDEPTH, 1, 0, VKD3DSIH_TEXDEPTH, {1, 4}, {1, 4}},
+ {VKD3D_SM1_OP_BEM, 1, 2, VKD3DSIH_BEM, {1, 4}, {1, 4}},
+ {VKD3D_SM1_OP_DSX, 1, 1, VKD3DSIH_DSX, {2, 1}},
+ {VKD3D_SM1_OP_DSY, 1, 1, VKD3DSIH_DSY, {2, 1}},
+ {VKD3D_SM1_OP_TEXLDD, 1, 4, VKD3DSIH_TEXLDD, {2, 1}},
+ {VKD3D_SM1_OP_SETP, 1, 2, VKD3DSIH_SETP, {2, 1}},
+ {VKD3D_SM1_OP_TEXLDL, 1, 2, VKD3DSIH_TEXLDL, {3, 0}},
+ {VKD3D_SM1_OP_PHASE, 0, 0, VKD3DSIH_PHASE, {1, 4}, {1, 4}},
{0, 0, 0, VKD3DSIH_INVALID},
};
+static const struct
+{
+ enum vkd3d_sm1_register_type d3dbc_type;
+ enum vkd3d_shader_register_type vsir_type;
+}
+register_types[] =
+{
+ {VKD3D_SM1_REG_TEMP, VKD3DSPR_TEMP},
+ {VKD3D_SM1_REG_INPUT, VKD3DSPR_INPUT},
+ {VKD3D_SM1_REG_CONST, VKD3DSPR_CONST},
+ {VKD3D_SM1_REG_ADDR, VKD3DSPR_ADDR},
+ {VKD3D_SM1_REG_TEXTURE, VKD3DSPR_TEXTURE},
+ {VKD3D_SM1_REG_RASTOUT, VKD3DSPR_RASTOUT},
+ {VKD3D_SM1_REG_ATTROUT, VKD3DSPR_ATTROUT},
+ {VKD3D_SM1_REG_OUTPUT, VKD3DSPR_OUTPUT},
+ {VKD3D_SM1_REG_TEXCRDOUT, VKD3DSPR_TEXCRDOUT},
+ {VKD3D_SM1_REG_CONSTINT, VKD3DSPR_CONSTINT},
+ {VKD3D_SM1_REG_COLOROUT, VKD3DSPR_COLOROUT},
+ {VKD3D_SM1_REG_DEPTHOUT, VKD3DSPR_DEPTHOUT},
+ {VKD3D_SM1_REG_SAMPLER, VKD3DSPR_COMBINED_SAMPLER},
+ {VKD3D_SM1_REG_CONSTBOOL, VKD3DSPR_CONSTBOOL},
+ {VKD3D_SM1_REG_LOOP, VKD3DSPR_LOOP},
+ {VKD3D_SM1_REG_TEMPFLOAT16, VKD3DSPR_TEMPFLOAT16},
+ {VKD3D_SM1_REG_MISCTYPE, VKD3DSPR_MISCTYPE},
+ {VKD3D_SM1_REG_LABEL, VKD3DSPR_LABEL},
+ {VKD3D_SM1_REG_PREDICATE, VKD3DSPR_PREDICATE},
+};
+
static const enum vkd3d_shader_resource_type resource_type_table[] =
{
/* VKD3D_SM1_RESOURCE_UNKNOWN */ VKD3D_SHADER_RESOURCE_NONE,
@@ -458,6 +507,7 @@ static unsigned int idx_count_from_reg_type(enum vkd3d_shader_register_type reg_
switch (reg_type)
{
case VKD3DSPR_DEPTHOUT:
+ case VKD3DSPR_ADDR:
return 0;
default:
@@ -465,52 +515,82 @@ static unsigned int idx_count_from_reg_type(enum vkd3d_shader_register_type reg_
}
}
-static void shader_sm1_parse_src_param(uint32_t param, struct vkd3d_shader_src_param *rel_addr,
- struct vkd3d_shader_src_param *src)
+static enum vkd3d_shader_register_type parse_register_type(
+ struct vkd3d_shader_sm1_parser *sm1, uint32_t param, unsigned int *index_offset)
{
- enum vkd3d_shader_register_type reg_type = ((param & VKD3D_SM1_REGISTER_TYPE_MASK) >> VKD3D_SM1_REGISTER_TYPE_SHIFT)
+ enum vkd3d_sm1_register_type d3dbc_type = ((param & VKD3D_SM1_REGISTER_TYPE_MASK) >> VKD3D_SM1_REGISTER_TYPE_SHIFT)
| ((param & VKD3D_SM1_REGISTER_TYPE_MASK2) >> VKD3D_SM1_REGISTER_TYPE_SHIFT2);
- unsigned int idx_count = idx_count_from_reg_type(reg_type);
- vsir_register_init(&src->reg, reg_type, VKD3D_DATA_FLOAT, idx_count);
- src->reg.precision = VKD3D_SHADER_REGISTER_PRECISION_DEFAULT;
- src->reg.non_uniform = false;
- if (idx_count == 1)
+ *index_offset = 0;
+
+ if (d3dbc_type == VKD3D_SM1_REG_CONST2)
{
- src->reg.idx[0].offset = param & VKD3D_SM1_REGISTER_NUMBER_MASK;
- src->reg.idx[0].rel_addr = rel_addr;
+ *index_offset = 2048;
+ return VKD3DSPR_CONST;
}
- if (src->reg.type == VKD3DSPR_SAMPLER)
- src->reg.dimension = VSIR_DIMENSION_NONE;
- else if (src->reg.type == VKD3DSPR_DEPTHOUT)
- src->reg.dimension = VSIR_DIMENSION_SCALAR;
- else
- src->reg.dimension = VSIR_DIMENSION_VEC4;
- src->swizzle = swizzle_from_sm1((param & VKD3D_SM1_SWIZZLE_MASK) >> VKD3D_SM1_SWIZZLE_SHIFT);
- src->modifiers = (param & VKD3D_SM1_SRC_MODIFIER_MASK) >> VKD3D_SM1_SRC_MODIFIER_SHIFT;
+
+ if (d3dbc_type == VKD3D_SM1_REG_CONST3)
+ {
+ *index_offset = 4096;
+ return VKD3DSPR_CONST;
+ }
+
+ if (d3dbc_type == VKD3D_SM1_REG_CONST4)
+ {
+ *index_offset = 6144;
+ return VKD3DSPR_CONST;
+ }
+
+ if (d3dbc_type == VKD3D_SM1_REG_ADDR)
+ return sm1->p.program->shader_version.type == VKD3D_SHADER_TYPE_PIXEL ? VKD3DSPR_TEXTURE : VKD3DSPR_ADDR;
+ if (d3dbc_type == VKD3D_SM1_REG_TEXCRDOUT)
+ return vkd3d_shader_ver_ge(&sm1->p.program->shader_version, 3, 0) ? VKD3DSPR_OUTPUT : VKD3DSPR_TEXCRDOUT;
+
+ for (unsigned int i = 0; i < ARRAY_SIZE(register_types); ++i)
+ {
+ if (register_types[i].d3dbc_type == d3dbc_type)
+ return register_types[i].vsir_type;
+ }
+
+ return VKD3DSPR_INVALID;
}
-static void shader_sm1_parse_dst_param(uint32_t param, struct vkd3d_shader_src_param *rel_addr,
- struct vkd3d_shader_dst_param *dst)
+static void d3dbc_parse_register(struct vkd3d_shader_sm1_parser *d3dbc,
+ struct vkd3d_shader_register *reg, uint32_t param, struct vkd3d_shader_src_param *rel_addr)
{
- enum vkd3d_shader_register_type reg_type = ((param & VKD3D_SM1_REGISTER_TYPE_MASK) >> VKD3D_SM1_REGISTER_TYPE_SHIFT)
- | ((param & VKD3D_SM1_REGISTER_TYPE_MASK2) >> VKD3D_SM1_REGISTER_TYPE_SHIFT2);
- unsigned int idx_count = idx_count_from_reg_type(reg_type);
+ enum vkd3d_shader_register_type reg_type;
+ unsigned int index_offset, idx_count;
- vsir_register_init(&dst->reg, reg_type, VKD3D_DATA_FLOAT, idx_count);
- dst->reg.precision = VKD3D_SHADER_REGISTER_PRECISION_DEFAULT;
- dst->reg.non_uniform = false;
+ reg_type = parse_register_type(d3dbc, param, &index_offset);
+ idx_count = idx_count_from_reg_type(reg_type);
+ vsir_register_init(reg, reg_type, VKD3D_DATA_FLOAT, idx_count);
+ reg->precision = VKD3D_SHADER_REGISTER_PRECISION_DEFAULT;
+ reg->non_uniform = false;
if (idx_count == 1)
{
- dst->reg.idx[0].offset = param & VKD3D_SM1_REGISTER_NUMBER_MASK;
- dst->reg.idx[0].rel_addr = rel_addr;
+ reg->idx[0].offset = index_offset + (param & VKD3D_SM1_REGISTER_NUMBER_MASK);
+ reg->idx[0].rel_addr = rel_addr;
}
- if (dst->reg.type == VKD3DSPR_SAMPLER)
- dst->reg.dimension = VSIR_DIMENSION_NONE;
- else if (dst->reg.type == VKD3DSPR_DEPTHOUT)
- dst->reg.dimension = VSIR_DIMENSION_SCALAR;
+ if (reg->type == VKD3DSPR_SAMPLER)
+ reg->dimension = VSIR_DIMENSION_NONE;
+ else if (reg->type == VKD3DSPR_DEPTHOUT)
+ reg->dimension = VSIR_DIMENSION_SCALAR;
else
- dst->reg.dimension = VSIR_DIMENSION_VEC4;
+ reg->dimension = VSIR_DIMENSION_VEC4;
+}
+
+static void shader_sm1_parse_src_param(struct vkd3d_shader_sm1_parser *sm1, uint32_t param,
+ struct vkd3d_shader_src_param *rel_addr, struct vkd3d_shader_src_param *src)
+{
+ d3dbc_parse_register(sm1, &src->reg, param, rel_addr);
+ src->swizzle = swizzle_from_sm1((param & VKD3D_SM1_SWIZZLE_MASK) >> VKD3D_SM1_SWIZZLE_SHIFT);
+ src->modifiers = (param & VKD3D_SM1_SRC_MODIFIER_MASK) >> VKD3D_SM1_SRC_MODIFIER_SHIFT;
+}
+
+static void shader_sm1_parse_dst_param(struct vkd3d_shader_sm1_parser *sm1, uint32_t param,
+ struct vkd3d_shader_src_param *rel_addr, struct vkd3d_shader_dst_param *dst)
+{
+ d3dbc_parse_register(sm1, &dst->reg, param, rel_addr);
dst->modifiers = (param & VKD3D_SM1_DST_MODIFIER_MASK) >> VKD3D_SM1_DST_MODIFIER_SHIFT;
dst->shift = (param & VKD3D_SM1_DSTSHIFT_MASK) >> VKD3D_SM1_DSTSHIFT_SHIFT;
@@ -638,7 +718,32 @@ static void add_signature_mask(struct vkd3d_shader_sm1_parser *sm1, bool output,
return;
}
+ /* Normally VSIR mandates that the register mask is a subset of the usage
+ * mask, and the usage mask is a subset of the signature mask. This is
+ * doesn't always happen with SM1-3 registers, because of the limited
+ * flexibility with expressing swizzles.
+ *
+ * For example it's easy to find shaders like this:
+ * ps_3_0
+ * [...]
+ * dcl_texcoord0 v0
+ * [...]
+ * texld r2.xyzw, v0.xyzw, s1.xyzw
+ * [...]
+ *
+ * The dcl_textcoord0 instruction secretly has a .xy mask, which is used to
+ * compute the signature mask, but the texld instruction apparently uses all
+ * the components. Of course the last two components are ignored, but
+ * formally they seem to be used. So we end up with a signature element with
+ * mask .xy and usage mask .xyzw.
+ *
+ * In order to avoid this problem, when generating VSIR code with SM4
+ * normalisation level we remove the unused components in the write mask. We
+ * don't do that when targetting the SM1 normalisation level (i.e., when
+ * disassembling) so as to generate the same disassembly code as native. */
element->used_mask |= mask;
+ if (program->normalisation_level >= VSIR_NORMALISED_SM4)
+ element->used_mask &= element->mask;
}
static bool add_signature_element_from_register(struct vkd3d_shader_sm1_parser *sm1,
@@ -666,26 +771,18 @@ static bool add_signature_element_from_register(struct vkd3d_shader_sm1_parser *
VKD3D_SHADER_SV_NONE, SM1_COLOR_REGISTER_OFFSET + register_index, is_dcl, mask);
case VKD3DSPR_TEXTURE:
- /* For vertex shaders, this is ADDR. */
- if (version->type == VKD3D_SHADER_TYPE_VERTEX)
- return true;
return add_signature_element(sm1, false, "TEXCOORD", register_index,
VKD3D_SHADER_SV_NONE, register_index, is_dcl, mask);
+ case VKD3DSPR_TEXCRDOUT:
+ return add_signature_element(sm1, true, "TEXCOORD", register_index,
+ VKD3D_SHADER_SV_NONE, register_index, is_dcl, mask);
+
case VKD3DSPR_OUTPUT:
if (version->type == VKD3D_SHADER_TYPE_VERTEX)
{
- /* For sm < 2 vertex shaders, this is TEXCRDOUT.
- *
- * For sm3 vertex shaders, this is OUTPUT, but we already
- * should have had a DCL instruction. */
- if (version->major == 3)
- {
- add_signature_mask(sm1, true, register_index, mask);
- return true;
- }
- return add_signature_element(sm1, true, "TEXCOORD", register_index,
- VKD3D_SHADER_SV_NONE, register_index, is_dcl, mask);
+ add_signature_mask(sm1, true, register_index, mask);
+ return true;
}
/* fall through */
@@ -822,18 +919,6 @@ static void shader_sm1_scan_register(struct vkd3d_shader_sm1_parser *sm1,
record_constant_register(sm1, VKD3D_SHADER_D3DBC_FLOAT_CONSTANT_REGISTER, register_index, from_def);
break;
- case VKD3DSPR_CONST2:
- record_constant_register(sm1, VKD3D_SHADER_D3DBC_FLOAT_CONSTANT_REGISTER, 2048 + register_index, from_def);
- break;
-
- case VKD3DSPR_CONST3:
- record_constant_register(sm1, VKD3D_SHADER_D3DBC_FLOAT_CONSTANT_REGISTER, 4096 + register_index, from_def);
- break;
-
- case VKD3DSPR_CONST4:
- record_constant_register(sm1, VKD3D_SHADER_D3DBC_FLOAT_CONSTANT_REGISTER, 6144 + register_index, from_def);
- break;
-
case VKD3DSPR_CONSTINT:
record_constant_register(sm1, VKD3D_SHADER_D3DBC_INT_CONSTANT_REGISTER, register_index, from_def);
break;
@@ -941,9 +1026,9 @@ static void shader_sm1_read_src_param(struct vkd3d_shader_sm1_parser *sm1, const
sm1->abort = true;
return;
}
- shader_sm1_parse_src_param(addr_token, NULL, src_rel_addr);
+ shader_sm1_parse_src_param(sm1, addr_token, NULL, src_rel_addr);
}
- shader_sm1_parse_src_param(token, src_rel_addr, src_param);
+ shader_sm1_parse_src_param(sm1, token, src_rel_addr, src_param);
}
static void shader_sm1_read_dst_param(struct vkd3d_shader_sm1_parser *sm1, const uint32_t **ptr,
@@ -962,12 +1047,14 @@ static void shader_sm1_read_dst_param(struct vkd3d_shader_sm1_parser *sm1, const
sm1->abort = true;
return;
}
- shader_sm1_parse_src_param(addr_token, NULL, dst_rel_addr);
+ shader_sm1_parse_src_param(sm1, addr_token, NULL, dst_rel_addr);
}
- shader_sm1_parse_dst_param(token, dst_rel_addr, dst_param);
+ shader_sm1_parse_dst_param(sm1, token, dst_rel_addr, dst_param);
if (dst_param->reg.type == VKD3DSPR_RASTOUT && dst_param->reg.idx[0].offset == VSIR_RASTOUT_POINT_SIZE)
sm1->p.program->has_point_size = true;
+ if (dst_param->reg.type == VKD3DSPR_RASTOUT && dst_param->reg.idx[0].offset == VSIR_RASTOUT_FOG)
+ sm1->p.program->has_fog = true;
}
static void shader_sm1_read_semantic(struct vkd3d_shader_sm1_parser *sm1,
@@ -1005,7 +1092,7 @@ static void shader_sm1_read_semantic(struct vkd3d_shader_sm1_parser *sm1,
semantic->resource_data_type[1] = VKD3D_DATA_FLOAT;
semantic->resource_data_type[2] = VKD3D_DATA_FLOAT;
semantic->resource_data_type[3] = VKD3D_DATA_FLOAT;
- shader_sm1_parse_dst_param(dst_token, NULL, &semantic->resource.reg);
+ shader_sm1_parse_dst_param(sm1, dst_token, NULL, &semantic->resource.reg);
range = &semantic->resource.range;
range->space = 0;
range->first = range->last = semantic->resource.reg.reg.idx[0].offset;
@@ -1268,6 +1355,7 @@ static enum vkd3d_result shader_sm1_init(struct vkd3d_shader_sm1_parser *sm1, st
const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context)
{
const struct vkd3d_shader_location location = {.source_name = compile_info->source_name};
+ enum vsir_normalisation_level normalisation_level;
const uint32_t *code = compile_info->source.code;
size_t code_size = compile_info->source.size;
struct vkd3d_shader_version version;
@@ -1318,9 +1406,13 @@ static enum vkd3d_result shader_sm1_init(struct vkd3d_shader_sm1_parser *sm1, st
sm1->start = &code[1];
sm1->end = &code[token_count];
+ normalisation_level = VSIR_NORMALISED_SM1;
+ if (compile_info->target_type != VKD3D_SHADER_TARGET_D3D_ASM)
+ normalisation_level = VSIR_NORMALISED_SM4;
+
/* Estimate instruction count to avoid reallocation in most shaders. */
if (!vsir_program_init(program, compile_info, &version,
- code_size != ~(size_t)0 ? token_count / 4u + 4 : 16, VSIR_CF_STRUCTURED, VSIR_NOT_NORMALISED))
+ code_size != ~(size_t)0 ? token_count / 4u + 4 : 16, VSIR_CF_STRUCTURED, normalisation_level))
return VKD3D_ERROR_OUT_OF_MEMORY;
vkd3d_shader_parser_init(&sm1->p, program, message_context, compile_info->source_name);
@@ -1525,555 +1617,208 @@ static uint32_t sm1_version(enum vkd3d_shader_type type, unsigned int major, uns
type == VKD3D_SHADER_TYPE_VERTEX ? VKD3D_SM1_VS : VKD3D_SM1_PS);
}
-D3DXPARAMETER_CLASS hlsl_sm1_class(const struct hlsl_type *type)
+static const struct vkd3d_sm1_opcode_info *shader_sm1_get_opcode_info_from_vsir(
+ struct d3dbc_compiler *d3dbc, enum vkd3d_shader_opcode vkd3d_opcode)
{
- switch (type->class)
- {
- case HLSL_CLASS_ARRAY:
- return hlsl_sm1_class(type->e.array.type);
- case HLSL_CLASS_MATRIX:
- VKD3D_ASSERT(type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK);
- if (type->modifiers & HLSL_MODIFIER_COLUMN_MAJOR)
- return D3DXPC_MATRIX_COLUMNS;
- else
- return D3DXPC_MATRIX_ROWS;
- case HLSL_CLASS_SCALAR:
- return D3DXPC_SCALAR;
- case HLSL_CLASS_STRUCT:
- return D3DXPC_STRUCT;
- case HLSL_CLASS_VECTOR:
- return D3DXPC_VECTOR;
- case HLSL_CLASS_PIXEL_SHADER:
- case HLSL_CLASS_SAMPLER:
- case HLSL_CLASS_STRING:
- case HLSL_CLASS_TEXTURE:
- case HLSL_CLASS_VERTEX_SHADER:
- return D3DXPC_OBJECT;
- case HLSL_CLASS_DEPTH_STENCIL_STATE:
- case HLSL_CLASS_DEPTH_STENCIL_VIEW:
- case HLSL_CLASS_EFFECT_GROUP:
- case HLSL_CLASS_ERROR:
- case HLSL_CLASS_PASS:
- case HLSL_CLASS_RASTERIZER_STATE:
- case HLSL_CLASS_RENDER_TARGET_VIEW:
- case HLSL_CLASS_TECHNIQUE:
- case HLSL_CLASS_UAV:
- case HLSL_CLASS_VOID:
- case HLSL_CLASS_CONSTANT_BUFFER:
- case HLSL_CLASS_COMPUTE_SHADER:
- case HLSL_CLASS_DOMAIN_SHADER:
- case HLSL_CLASS_HULL_SHADER:
- case HLSL_CLASS_GEOMETRY_SHADER:
- case HLSL_CLASS_BLEND_STATE:
- case HLSL_CLASS_NULL:
- break;
- }
-
- vkd3d_unreachable();
-}
+ const struct vkd3d_shader_version *version = &d3dbc->program->shader_version;
+ const struct vkd3d_sm1_opcode_info *info;
+ unsigned int i = 0;
-D3DXPARAMETER_TYPE hlsl_sm1_base_type(const struct hlsl_type *type)
-{
- switch (type->class)
+ for (;;)
{
- case HLSL_CLASS_SCALAR:
- case HLSL_CLASS_VECTOR:
- case HLSL_CLASS_MATRIX:
- switch (type->e.numeric.type)
- {
- case HLSL_TYPE_BOOL:
- return D3DXPT_BOOL;
- /* Actually double behaves differently depending on DLL version:
- * For <= 36, it maps to D3DXPT_FLOAT.
- * For 37-40, it maps to zero (D3DXPT_VOID).
- * For >= 41, it maps to 39, which is D3D_SVT_DOUBLE (note D3D_SVT_*
- * values are mostly compatible with D3DXPT_*).
- * However, the latter two cases look like bugs, and a reasonable
- * application certainly wouldn't know what to do with them.
- * For fx_2_0 it's always D3DXPT_FLOAT regardless of DLL version. */
- case HLSL_TYPE_DOUBLE:
- case HLSL_TYPE_FLOAT:
- case HLSL_TYPE_HALF:
- return D3DXPT_FLOAT;
- case HLSL_TYPE_INT:
- case HLSL_TYPE_UINT:
- return D3DXPT_INT;
- default:
- vkd3d_unreachable();
- }
-
- case HLSL_CLASS_SAMPLER:
- switch (type->sampler_dim)
- {
- case HLSL_SAMPLER_DIM_1D:
- return D3DXPT_SAMPLER1D;
- case HLSL_SAMPLER_DIM_2D:
- return D3DXPT_SAMPLER2D;
- case HLSL_SAMPLER_DIM_3D:
- return D3DXPT_SAMPLER3D;
- case HLSL_SAMPLER_DIM_CUBE:
- return D3DXPT_SAMPLERCUBE;
- case HLSL_SAMPLER_DIM_GENERIC:
- return D3DXPT_SAMPLER;
- default:
- ERR("Invalid dimension %#x.\n", type->sampler_dim);
- vkd3d_unreachable();
- }
- break;
-
- case HLSL_CLASS_TEXTURE:
- switch (type->sampler_dim)
- {
- case HLSL_SAMPLER_DIM_1D:
- return D3DXPT_TEXTURE1D;
- case HLSL_SAMPLER_DIM_2D:
- return D3DXPT_TEXTURE2D;
- case HLSL_SAMPLER_DIM_3D:
- return D3DXPT_TEXTURE3D;
- case HLSL_SAMPLER_DIM_CUBE:
- return D3DXPT_TEXTURECUBE;
- case HLSL_SAMPLER_DIM_GENERIC:
- return D3DXPT_TEXTURE;
- default:
- ERR("Invalid dimension %#x.\n", type->sampler_dim);
- vkd3d_unreachable();
- }
- break;
+ info = &d3dbc->opcode_table[i++];
+ if (info->vkd3d_opcode == VKD3DSIH_INVALID)
+ return NULL;
- case HLSL_CLASS_ARRAY:
- return hlsl_sm1_base_type(type->e.array.type);
-
- case HLSL_CLASS_STRUCT:
- return D3DXPT_VOID;
-
- case HLSL_CLASS_STRING:
- return D3DXPT_STRING;
-
- case HLSL_CLASS_PIXEL_SHADER:
- return D3DXPT_PIXELSHADER;
-
- case HLSL_CLASS_VERTEX_SHADER:
- return D3DXPT_VERTEXSHADER;
-
- case HLSL_CLASS_DEPTH_STENCIL_STATE:
- case HLSL_CLASS_DEPTH_STENCIL_VIEW:
- case HLSL_CLASS_EFFECT_GROUP:
- case HLSL_CLASS_ERROR:
- case HLSL_CLASS_PASS:
- case HLSL_CLASS_RASTERIZER_STATE:
- case HLSL_CLASS_RENDER_TARGET_VIEW:
- case HLSL_CLASS_TECHNIQUE:
- case HLSL_CLASS_UAV:
- case HLSL_CLASS_VOID:
- case HLSL_CLASS_CONSTANT_BUFFER:
- case HLSL_CLASS_COMPUTE_SHADER:
- case HLSL_CLASS_DOMAIN_SHADER:
- case HLSL_CLASS_HULL_SHADER:
- case HLSL_CLASS_GEOMETRY_SHADER:
- case HLSL_CLASS_BLEND_STATE:
- case HLSL_CLASS_NULL:
- break;
+ if (vkd3d_opcode == info->vkd3d_opcode
+ && vkd3d_shader_ver_ge(version, info->min_version.major, info->min_version.minor)
+ && (vkd3d_shader_ver_le(version, info->max_version.major, info->max_version.minor)
+ || !info->max_version.major))
+ return info;
}
-
- vkd3d_unreachable();
}
-static void write_sm1_type(struct vkd3d_bytecode_buffer *buffer, struct hlsl_type *type, unsigned int ctab_start)
+static const struct vkd3d_sm1_opcode_info *shader_sm1_get_opcode_info_from_vsir_instruction(
+ struct d3dbc_compiler *d3dbc, const struct vkd3d_shader_instruction *ins)
{
- const struct hlsl_type *array_type = hlsl_get_multiarray_element_type(type);
- unsigned int array_size = hlsl_get_multiarray_size(type);
- unsigned int field_count = 0;
- size_t fields_offset = 0;
- size_t i;
-
- if (type->bytecode_offset)
- return;
+ const struct vkd3d_sm1_opcode_info *info;
- if (array_type->class == HLSL_CLASS_STRUCT)
+ if (!(info = shader_sm1_get_opcode_info_from_vsir(d3dbc, ins->opcode)))
{
- field_count = array_type->e.record.field_count;
-
- for (i = 0; i < field_count; ++i)
- {
- struct hlsl_struct_field *field = &array_type->e.record.fields[i];
-
- field->name_bytecode_offset = put_string(buffer, field->name);
- write_sm1_type(buffer, field->type, ctab_start);
- }
-
- fields_offset = bytecode_align(buffer) - ctab_start;
-
- for (i = 0; i < field_count; ++i)
- {
- struct hlsl_struct_field *field = &array_type->e.record.fields[i];
-
- put_u32(buffer, field->name_bytecode_offset - ctab_start);
- put_u32(buffer, field->type->bytecode_offset - ctab_start);
- }
+ vkd3d_shader_error(d3dbc->message_context, &ins->location, VKD3D_SHADER_ERROR_D3DBC_INVALID_OPCODE,
+ "Opcode %#x not supported for shader profile.", ins->opcode);
+ d3dbc->failed = true;
+ return NULL;
}
- type->bytecode_offset = put_u32(buffer, vkd3d_make_u32(hlsl_sm1_class(type), hlsl_sm1_base_type(array_type)));
- put_u32(buffer, vkd3d_make_u32(type->dimy, type->dimx));
- put_u32(buffer, vkd3d_make_u32(array_size, field_count));
- put_u32(buffer, fields_offset);
-}
-
-static void sm1_sort_extern(struct list *sorted, struct hlsl_ir_var *to_sort)
-{
- struct hlsl_ir_var *var;
-
- list_remove(&to_sort->extern_entry);
-
- LIST_FOR_EACH_ENTRY(var, sorted, struct hlsl_ir_var, extern_entry)
+ if (ins->dst_count != info->dst_count)
{
- if (strcmp(to_sort->name, var->name) < 0)
- {
- list_add_before(&var->extern_entry, &to_sort->extern_entry);
- return;
- }
+ vkd3d_shader_error(d3dbc->message_context, &ins->location, VKD3D_SHADER_ERROR_D3DBC_INVALID_REGISTER_COUNT,
+ "Invalid destination count %u for vsir instruction %#x (expected %u).",
+ ins->dst_count, ins->opcode, info->dst_count);
+ d3dbc->failed = true;
+ return NULL;
}
-
- list_add_tail(sorted, &to_sort->extern_entry);
-}
-
-static void sm1_sort_externs(struct hlsl_ctx *ctx)
-{
- struct list sorted = LIST_INIT(sorted);
- struct hlsl_ir_var *var, *next;
-
- LIST_FOR_EACH_ENTRY_SAFE(var, next, &ctx->extern_vars, struct hlsl_ir_var, extern_entry)
+ if (ins->src_count != info->src_count)
{
- if (var->is_uniform)
- sm1_sort_extern(&sorted, var);
+ vkd3d_shader_error(d3dbc->message_context, &ins->location, VKD3D_SHADER_ERROR_D3DBC_INVALID_REGISTER_COUNT,
+ "Invalid source count %u for vsir instruction %#x (expected %u).",
+ ins->src_count, ins->opcode, info->src_count);
+ d3dbc->failed = true;
+ return NULL;
}
- list_move_tail(&ctx->extern_vars, &sorted);
+
+ return info;
}
-void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer)
+static void d3dbc_write_comment(struct d3dbc_compiler *d3dbc,
+ uint32_t tag, const struct vkd3d_shader_code *comment)
{
- size_t ctab_offset, ctab_start, ctab_end, vars_offset, vars_start, size_offset, creator_offset, offset;
- unsigned int uniform_count = 0;
- struct hlsl_ir_var *var;
-
- LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry)
- {
- unsigned int r;
-
- for (r = 0; r <= HLSL_REGSET_LAST; ++r)
- {
- if (var->semantic.name || !var->regs[r].allocated || !var->last_read)
- continue;
-
- ++uniform_count;
-
- if (var->is_param && var->is_uniform)
- {
- char *new_name;
-
- if (!(new_name = hlsl_sprintf_alloc(ctx, "$%s", var->name)))
- return;
- vkd3d_free((char *)var->name);
- var->name = new_name;
- }
- }
- }
-
- sm1_sort_externs(ctx);
+ struct vkd3d_bytecode_buffer *buffer = &d3dbc->buffer;
+ size_t offset, start, end;
- size_offset = put_u32(buffer, 0);
- ctab_offset = put_u32(buffer, VKD3D_MAKE_TAG('C','T','A','B'));
+ offset = put_u32(buffer, 0);
- ctab_start = put_u32(buffer, 7 * sizeof(uint32_t)); /* CTAB header size. */
- creator_offset = put_u32(buffer, 0);
- put_u32(buffer, sm1_version(ctx->profile->type, ctx->profile->major_version, ctx->profile->minor_version));
- put_u32(buffer, uniform_count);
- vars_offset = put_u32(buffer, 0);
- put_u32(buffer, 0); /* FIXME: flags */
- put_u32(buffer, 0); /* FIXME: target string */
+ start = put_u32(buffer, tag);
+ bytecode_put_bytes(buffer, comment->code, comment->size);
+ end = bytecode_align(buffer);
- vars_start = bytecode_align(buffer);
- set_u32(buffer, vars_offset, vars_start - ctab_start);
+ set_u32(buffer, offset, vkd3d_make_u32(VKD3D_SM1_OP_COMMENT, (end - start) / sizeof(uint32_t)));
+}
- LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry)
+static enum vkd3d_sm1_register_type d3dbc_register_type_from_vsir(const struct vkd3d_shader_register *reg)
+{
+ if (reg->type == VKD3DSPR_CONST)
{
- unsigned int r;
-
- for (r = 0; r <= HLSL_REGSET_LAST; ++r)
- {
- if (var->semantic.name || !var->regs[r].allocated || !var->last_read)
- continue;
-
- put_u32(buffer, 0); /* name */
- if (r == HLSL_REGSET_NUMERIC)
- {
- put_u32(buffer, vkd3d_make_u32(D3DXRS_FLOAT4, var->regs[r].id));
- put_u32(buffer, var->bind_count[r]);
- }
- else
- {
- put_u32(buffer, vkd3d_make_u32(D3DXRS_SAMPLER, var->regs[r].index));
- put_u32(buffer, var->bind_count[r]);
- }
- put_u32(buffer, 0); /* type */
- put_u32(buffer, 0); /* default value */
- }
+ if (reg->idx[0].offset >= 6144)
+ return VKD3D_SM1_REG_CONST4;
+ if (reg->idx[0].offset >= 4096)
+ return VKD3D_SM1_REG_CONST3;
+ if (reg->idx[0].offset >= 2048)
+ return VKD3D_SM1_REG_CONST2;
}
- uniform_count = 0;
-
- LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry)
+ for (unsigned int i = 0; i < ARRAY_SIZE(register_types); ++i)
{
- unsigned int r;
-
- for (r = 0; r <= HLSL_REGSET_LAST; ++r)
- {
- size_t var_offset, name_offset;
-
- if (var->semantic.name || !var->regs[r].allocated || !var->last_read)
- continue;
-
- var_offset = vars_start + (uniform_count * 5 * sizeof(uint32_t));
-
- name_offset = put_string(buffer, var->name);
- set_u32(buffer, var_offset, name_offset - ctab_start);
-
- write_sm1_type(buffer, var->data_type, ctab_start);
- set_u32(buffer, var_offset + 3 * sizeof(uint32_t), var->data_type->bytecode_offset - ctab_start);
-
- if (var->default_values)
- {
- unsigned int reg_size = var->data_type->reg_size[HLSL_REGSET_NUMERIC];
- unsigned int comp_count = hlsl_type_component_count(var->data_type);
- unsigned int default_value_offset;
- unsigned int k;
-
- default_value_offset = bytecode_reserve_bytes(buffer, reg_size * sizeof(uint32_t));
- set_u32(buffer, var_offset + 4 * sizeof(uint32_t), default_value_offset - ctab_start);
-
- for (k = 0; k < comp_count; ++k)
- {
- struct hlsl_type *comp_type = hlsl_type_get_component_type(ctx, var->data_type, k);
- unsigned int comp_offset;
- enum hlsl_regset regset;
-
- comp_offset = hlsl_type_get_component_offset(ctx, var->data_type, k, &regset);
- if (regset == HLSL_REGSET_NUMERIC)
- {
- union
- {
- uint32_t u;
- float f;
- } uni;
-
- switch (comp_type->e.numeric.type)
- {
- case HLSL_TYPE_DOUBLE:
- if (ctx->double_as_float_alias)
- uni.u = var->default_values[k].number.u;
- else
- uni.u = 0;
- break;
-
- case HLSL_TYPE_INT:
- uni.f = var->default_values[k].number.i;
- break;
-
- case HLSL_TYPE_UINT:
- case HLSL_TYPE_BOOL:
- uni.f = var->default_values[k].number.u;
- break;
-
- case HLSL_TYPE_HALF:
- case HLSL_TYPE_FLOAT:
- uni.u = var->default_values[k].number.u;
- break;
-
- default:
- vkd3d_unreachable();
- }
-
- set_u32(buffer, default_value_offset + comp_offset * sizeof(uint32_t), uni.u);
- }
- }
- }
-
- ++uniform_count;
- }
+ if (register_types[i].vsir_type == reg->type)
+ return register_types[i].d3dbc_type;
}
- offset = put_string(buffer, vkd3d_shader_get_version(NULL, NULL));
- set_u32(buffer, creator_offset, offset - ctab_start);
-
- ctab_end = bytecode_align(buffer);
- set_u32(buffer, size_offset, vkd3d_make_u32(VKD3D_SM1_OP_COMMENT, (ctab_end - ctab_offset) / sizeof(uint32_t)));
+ vkd3d_unreachable();
}
-static uint32_t sm1_encode_register_type(enum vkd3d_shader_register_type type)
+static uint32_t sm1_encode_register_type(const struct vkd3d_shader_register *reg)
{
- return ((type << VKD3D_SM1_REGISTER_TYPE_SHIFT) & VKD3D_SM1_REGISTER_TYPE_MASK)
- | ((type << VKD3D_SM1_REGISTER_TYPE_SHIFT2) & VKD3D_SM1_REGISTER_TYPE_MASK2);
+ enum vkd3d_sm1_register_type sm1_type = d3dbc_register_type_from_vsir(reg);
+
+ return ((sm1_type << VKD3D_SM1_REGISTER_TYPE_SHIFT) & VKD3D_SM1_REGISTER_TYPE_MASK)
+ | ((sm1_type << VKD3D_SM1_REGISTER_TYPE_SHIFT2) & VKD3D_SM1_REGISTER_TYPE_MASK2);
}
-struct sm1_instruction
+static uint32_t swizzle_from_vsir(uint32_t swizzle)
{
- enum vkd3d_sm1_opcode opcode;
- unsigned int flags;
-
- struct sm1_dst_register
- {
- enum vkd3d_shader_register_type type;
- enum vkd3d_shader_dst_modifier mod;
- unsigned int writemask;
- uint32_t reg;
- } dst;
+ uint32_t x = vsir_swizzle_get_component(swizzle, 0);
+ uint32_t y = vsir_swizzle_get_component(swizzle, 1);
+ uint32_t z = vsir_swizzle_get_component(swizzle, 2);
+ uint32_t w = vsir_swizzle_get_component(swizzle, 3);
- struct sm1_src_register
- {
- enum vkd3d_shader_register_type type;
- enum vkd3d_shader_src_modifier mod;
- unsigned int swizzle;
- uint32_t reg;
- } srcs[4];
- unsigned int src_count;
+ if (x & ~0x3u || y & ~0x3u || z & ~0x3u || w & ~0x3u)
+ ERR("Unexpected vsir swizzle: 0x%08x.\n", swizzle);
- unsigned int has_dst;
-};
+ return ((x & 0x3u) << VKD3D_SM1_SWIZZLE_COMPONENT_SHIFT(0))
+ | ((y & 0x3) << VKD3D_SM1_SWIZZLE_COMPONENT_SHIFT(1))
+ | ((z & 0x3) << VKD3D_SM1_SWIZZLE_COMPONENT_SHIFT(2))
+ | ((w & 0x3) << VKD3D_SM1_SWIZZLE_COMPONENT_SHIFT(3));
+}
-static bool is_inconsequential_instr(const struct sm1_instruction *instr)
+static bool is_inconsequential_instr(const struct vkd3d_shader_instruction *ins)
{
- const struct sm1_src_register *src = &instr->srcs[0];
- const struct sm1_dst_register *dst = &instr->dst;
+ const struct vkd3d_shader_dst_param *dst = &ins->dst[0];
+ const struct vkd3d_shader_src_param *src = &ins->src[0];
unsigned int i;
- if (instr->opcode != VKD3D_SM1_OP_MOV)
+ if (ins->opcode != VKD3DSIH_MOV)
return false;
- if (dst->mod != VKD3DSPDM_NONE)
+ if (dst->modifiers != VKD3DSPDM_NONE)
return false;
- if (src->mod != VKD3DSPSM_NONE)
+ if (src->modifiers != VKD3DSPSM_NONE)
return false;
- if (src->type != dst->type)
+ if (src->reg.type != dst->reg.type)
return false;
- if (src->reg != dst->reg)
+ if (src->reg.idx[0].offset != dst->reg.idx[0].offset)
return false;
for (i = 0; i < 4; ++i)
{
- if ((dst->writemask & (1 << i)) && (vsir_swizzle_get_component(src->swizzle, i) != i))
+ if ((dst->write_mask & (1u << i)) && (vsir_swizzle_get_component(src->swizzle, i) != i))
return false;
}
return true;
}
-static void write_sm1_dst_register(struct vkd3d_bytecode_buffer *buffer, const struct sm1_dst_register *reg)
+static void write_sm1_dst_register(struct vkd3d_bytecode_buffer *buffer, const struct vkd3d_shader_dst_param *reg)
{
- VKD3D_ASSERT(reg->writemask);
+ VKD3D_ASSERT(reg->write_mask);
put_u32(buffer, VKD3D_SM1_INSTRUCTION_PARAMETER
- | sm1_encode_register_type(reg->type)
- | (reg->mod << VKD3D_SM1_DST_MODIFIER_SHIFT)
- | (reg->writemask << VKD3D_SM1_WRITEMASK_SHIFT) | reg->reg);
+ | sm1_encode_register_type(&reg->reg)
+ | (reg->modifiers << VKD3D_SM1_DST_MODIFIER_SHIFT)
+ | (reg->write_mask << VKD3D_SM1_WRITEMASK_SHIFT)
+ | (reg->reg.idx[0].offset & VKD3D_SM1_REGISTER_NUMBER_MASK));
}
-static void write_sm1_src_register(struct vkd3d_bytecode_buffer *buffer,
- const struct sm1_src_register *reg)
+static void write_sm1_src_register(struct vkd3d_bytecode_buffer *buffer, const struct vkd3d_shader_src_param *reg)
{
put_u32(buffer, VKD3D_SM1_INSTRUCTION_PARAMETER
- | sm1_encode_register_type(reg->type)
- | (reg->mod << VKD3D_SM1_SRC_MODIFIER_SHIFT)
- | (reg->swizzle << VKD3D_SM1_SWIZZLE_SHIFT) | reg->reg);
+ | sm1_encode_register_type(&reg->reg)
+ | (reg->modifiers << VKD3D_SM1_SRC_MODIFIER_SHIFT)
+ | (swizzle_from_vsir(reg->swizzle) << VKD3D_SM1_SWIZZLE_SHIFT)
+ | (reg->reg.idx[0].offset & VKD3D_SM1_REGISTER_NUMBER_MASK));
}
-static void d3dbc_write_instruction(struct d3dbc_compiler *d3dbc, const struct sm1_instruction *instr)
+static void d3dbc_write_instruction(struct d3dbc_compiler *d3dbc, const struct vkd3d_shader_instruction *ins)
{
const struct vkd3d_shader_version *version = &d3dbc->program->shader_version;
struct vkd3d_bytecode_buffer *buffer = &d3dbc->buffer;
- uint32_t token = instr->opcode;
+ const struct vkd3d_sm1_opcode_info *info;
unsigned int i;
+ uint32_t token;
+
+ if (!(info = shader_sm1_get_opcode_info_from_vsir_instruction(d3dbc, ins)))
+ return;
- if (is_inconsequential_instr(instr))
+ if (is_inconsequential_instr(ins))
return;
- token |= VKD3D_SM1_INSTRUCTION_FLAGS_MASK & (instr->flags << VKD3D_SM1_INSTRUCTION_FLAGS_SHIFT);
+ token = info->sm1_opcode;
+ token |= VKD3D_SM1_INSTRUCTION_FLAGS_MASK & (ins->flags << VKD3D_SM1_INSTRUCTION_FLAGS_SHIFT);
if (version->major > 1)
- token |= (instr->has_dst + instr->src_count) << VKD3D_SM1_INSTRUCTION_LENGTH_SHIFT;
+ token |= (ins->dst_count + ins->src_count) << VKD3D_SM1_INSTRUCTION_LENGTH_SHIFT;
put_u32(buffer, token);
- if (instr->has_dst)
- write_sm1_dst_register(buffer, &instr->dst);
-
- for (i = 0; i < instr->src_count; ++i)
- write_sm1_src_register(buffer, &instr->srcs[i]);
-};
-
-static const struct vkd3d_sm1_opcode_info *shader_sm1_get_opcode_info_from_vsir(
- struct d3dbc_compiler *d3dbc, enum vkd3d_shader_opcode vkd3d_opcode)
-{
- const struct vkd3d_shader_version *version = &d3dbc->program->shader_version;
- const struct vkd3d_sm1_opcode_info *info;
- unsigned int i = 0;
-
- for (;;)
+ for (i = 0; i < ins->dst_count; ++i)
{
- info = &d3dbc->opcode_table[i++];
- if (info->vkd3d_opcode == VKD3DSIH_INVALID)
- return NULL;
-
- if (vkd3d_opcode == info->vkd3d_opcode
- && vkd3d_shader_ver_ge(version, info->min_version.major, info->min_version.minor)
- && (vkd3d_shader_ver_le(version, info->max_version.major, info->max_version.minor)
- || !info->max_version.major))
- return info;
- }
-}
-
-static uint32_t swizzle_from_vsir(uint32_t swizzle)
-{
- uint32_t x = vsir_swizzle_get_component(swizzle, 0);
- uint32_t y = vsir_swizzle_get_component(swizzle, 1);
- uint32_t z = vsir_swizzle_get_component(swizzle, 2);
- uint32_t w = vsir_swizzle_get_component(swizzle, 3);
-
- if (x & ~0x3u || y & ~0x3u || z & ~0x3u || w & ~0x3u)
- ERR("Unexpected vsir swizzle: 0x%08x.\n", swizzle);
-
- return ((x & 0x3u) << VKD3D_SM1_SWIZZLE_COMPONENT_SHIFT(0))
- | ((y & 0x3) << VKD3D_SM1_SWIZZLE_COMPONENT_SHIFT(1))
- | ((z & 0x3) << VKD3D_SM1_SWIZZLE_COMPONENT_SHIFT(2))
- | ((w & 0x3) << VKD3D_SM1_SWIZZLE_COMPONENT_SHIFT(3));
-}
-
-static void sm1_src_reg_from_vsir(struct d3dbc_compiler *d3dbc, const struct vkd3d_shader_src_param *param,
- struct sm1_src_register *src, const struct vkd3d_shader_location *loc)
-{
- src->mod = param->modifiers;
- src->reg = param->reg.idx[0].offset;
- src->type = param->reg.type;
- src->swizzle = swizzle_from_vsir(param->swizzle);
-
- if (param->reg.idx[0].rel_addr)
- {
- vkd3d_shader_error(d3dbc->message_context, loc, VKD3D_SHADER_ERROR_D3DBC_NOT_IMPLEMENTED,
- "Unhandled relative addressing on source register.");
- d3dbc->failed = true;
+ if (ins->dst[i].reg.idx[0].rel_addr)
+ {
+ vkd3d_shader_error(d3dbc->message_context, &ins->location, VKD3D_SHADER_ERROR_D3DBC_NOT_IMPLEMENTED,
+ "Unhandled relative addressing on destination register.");
+ d3dbc->failed = true;
+ }
+ write_sm1_dst_register(buffer, &ins->dst[i]);
}
-}
-
-static void sm1_dst_reg_from_vsir(struct d3dbc_compiler *d3dbc, const struct vkd3d_shader_dst_param *param,
- struct sm1_dst_register *dst, const struct vkd3d_shader_location *loc)
-{
- dst->mod = param->modifiers;
- dst->reg = param->reg.idx[0].offset;
- dst->type = param->reg.type;
- dst->writemask = param->write_mask;
- if (param->reg.idx[0].rel_addr)
+ for (i = 0; i < ins->src_count; ++i)
{
- vkd3d_shader_error(d3dbc->message_context, loc, VKD3D_SHADER_ERROR_D3DBC_NOT_IMPLEMENTED,
- "Unhandled relative addressing on destination register.");
- d3dbc->failed = true;
+ if (ins->src[i].reg.idx[0].rel_addr)
+ {
+ vkd3d_shader_error(d3dbc->message_context, &ins->location, VKD3D_SHADER_ERROR_D3DBC_NOT_IMPLEMENTED,
+ "Unhandled relative addressing on source register.");
+ d3dbc->failed = true;
+ }
+ write_sm1_src_register(buffer, &ins->src[i]);
}
-}
+};
static void d3dbc_write_vsir_def(struct d3dbc_compiler *d3dbc, const struct vkd3d_shader_instruction *ins)
{
@@ -2081,11 +1826,11 @@ static void d3dbc_write_vsir_def(struct d3dbc_compiler *d3dbc, const struct vkd3
struct vkd3d_bytecode_buffer *buffer = &d3dbc->buffer;
uint32_t token;
- const struct sm1_dst_register reg =
+ const struct vkd3d_shader_dst_param reg =
{
- .type = VKD3DSPR_CONST,
- .writemask = VKD3DSP_WRITEMASK_ALL,
- .reg = ins->dst[0].reg.idx[0].offset,
+ .reg.type = VKD3DSPR_CONST,
+ .write_mask = VKD3DSP_WRITEMASK_ALL,
+ .reg.idx[0].offset = ins->dst[0].reg.idx[0].offset,
};
token = VKD3D_SM1_OP_DEF;
@@ -2103,7 +1848,7 @@ static void d3dbc_write_vsir_sampler_dcl(struct d3dbc_compiler *d3dbc,
{
const struct vkd3d_shader_version *version = &d3dbc->program->shader_version;
struct vkd3d_bytecode_buffer *buffer = &d3dbc->buffer;
- struct sm1_dst_register reg = {0};
+ struct vkd3d_shader_dst_param reg = {0};
uint32_t token;
token = VKD3D_SM1_OP_DCL;
@@ -2115,9 +1860,9 @@ static void d3dbc_write_vsir_sampler_dcl(struct d3dbc_compiler *d3dbc,
token |= res_type << VKD3D_SM1_RESOURCE_TYPE_SHIFT;
put_u32(buffer, token);
- reg.type = VKD3DSPR_COMBINED_SAMPLER;
- reg.writemask = VKD3DSP_WRITEMASK_ALL;
- reg.reg = reg_id;
+ reg.reg.type = VKD3DSPR_COMBINED_SAMPLER;
+ reg.write_mask = VKD3DSP_WRITEMASK_ALL;
+ reg.reg.idx[0].offset = reg_id;
write_sm1_dst_register(buffer, &reg);
}
@@ -2163,61 +1908,6 @@ static void d3dbc_write_vsir_dcl(struct d3dbc_compiler *d3dbc, const struct vkd3
}
}
-static const struct vkd3d_sm1_opcode_info *shader_sm1_get_opcode_info_from_vsir_instruction(
- struct d3dbc_compiler *d3dbc, const struct vkd3d_shader_instruction *ins)
-{
- const struct vkd3d_sm1_opcode_info *info;
-
- if (!(info = shader_sm1_get_opcode_info_from_vsir(d3dbc, ins->opcode)))
- {
- vkd3d_shader_error(d3dbc->message_context, &ins->location, VKD3D_SHADER_ERROR_D3DBC_INVALID_OPCODE,
- "Opcode %#x not supported for shader profile.", ins->opcode);
- d3dbc->failed = true;
- return NULL;
- }
-
- if (ins->dst_count != info->dst_count)
- {
- vkd3d_shader_error(d3dbc->message_context, &ins->location, VKD3D_SHADER_ERROR_D3DBC_INVALID_REGISTER_COUNT,
- "Invalid destination count %u for vsir instruction %#x (expected %u).",
- ins->dst_count, ins->opcode, info->dst_count);
- d3dbc->failed = true;
- return NULL;
- }
- if (ins->src_count != info->src_count)
- {
- vkd3d_shader_error(d3dbc->message_context, &ins->location, VKD3D_SHADER_ERROR_D3DBC_INVALID_REGISTER_COUNT,
- "Invalid source count %u for vsir instruction %#x (expected %u).",
- ins->src_count, ins->opcode, info->src_count);
- d3dbc->failed = true;
- return NULL;
- }
-
- return info;
-}
-
-static void d3dbc_write_vsir_simple_instruction(struct d3dbc_compiler *d3dbc,
- const struct vkd3d_shader_instruction *ins)
-{
- struct sm1_instruction instr = {0};
- const struct vkd3d_sm1_opcode_info *info;
-
- if (!(info = shader_sm1_get_opcode_info_from_vsir_instruction(d3dbc, ins)))
- return;
-
- instr.opcode = info->sm1_opcode;
- instr.flags = ins->flags;
- instr.has_dst = info->dst_count;
- instr.src_count = info->src_count;
-
- if (instr.has_dst)
- sm1_dst_reg_from_vsir(d3dbc, &ins->dst[0], &instr.dst, &ins->location);
- for (unsigned int i = 0; i < instr.src_count; ++i)
- sm1_src_reg_from_vsir(d3dbc, &ins->src[i], &instr.srcs[i], &ins->location);
-
- d3dbc_write_instruction(d3dbc, &instr);
-}
-
static void d3dbc_write_vsir_instruction(struct d3dbc_compiler *d3dbc, const struct vkd3d_shader_instruction *ins)
{
uint32_t writemask;
@@ -2254,7 +1944,7 @@ static void d3dbc_write_vsir_instruction(struct d3dbc_compiler *d3dbc, const str
case VKD3DSIH_TEX:
case VKD3DSIH_TEXKILL:
case VKD3DSIH_TEXLDD:
- d3dbc_write_vsir_simple_instruction(d3dbc, ins);
+ d3dbc_write_instruction(d3dbc, ins);
break;
case VKD3DSIH_EXP:
@@ -2271,7 +1961,7 @@ static void d3dbc_write_vsir_instruction(struct d3dbc_compiler *d3dbc, const str
writemask, ins->opcode);
d3dbc->failed = true;
}
- d3dbc_write_vsir_simple_instruction(d3dbc, ins);
+ d3dbc_write_instruction(d3dbc, ins);
break;
default:
@@ -2287,13 +1977,13 @@ static void d3dbc_write_semantic_dcl(struct d3dbc_compiler *d3dbc,
{
const struct vkd3d_shader_version *version = &d3dbc->program->shader_version;
struct vkd3d_bytecode_buffer *buffer = &d3dbc->buffer;
- struct sm1_dst_register reg = {0};
+ struct vkd3d_shader_dst_param reg = {0};
enum vkd3d_decl_usage usage;
uint32_t token, usage_idx;
bool ret;
if (sm1_register_from_semantic_name(version, element->semantic_name,
- element->semantic_index, output, &reg.type, &reg.reg))
+ element->semantic_index, output, &reg.reg.type, &reg.reg.idx[0].offset))
{
usage = 0;
usage_idx = 0;
@@ -2302,8 +1992,8 @@ static void d3dbc_write_semantic_dcl(struct d3dbc_compiler *d3dbc,
{
ret = sm1_usage_from_semantic_name(element->semantic_name, element->semantic_index, &usage, &usage_idx);
VKD3D_ASSERT(ret);
- reg.type = output ? VKD3DSPR_OUTPUT : VKD3DSPR_INPUT;
- reg.reg = element->register_index;
+ reg.reg.type = output ? VKD3DSPR_OUTPUT : VKD3DSPR_INPUT;
+ reg.reg.idx[0].offset = element->register_index;
}
token = VKD3D_SM1_OP_DCL;
@@ -2316,7 +2006,7 @@ static void d3dbc_write_semantic_dcl(struct d3dbc_compiler *d3dbc,
token |= usage_idx << VKD3D_SM1_DCL_USAGE_INDEX_SHIFT;
put_u32(buffer, token);
- reg.writemask = element->mask;
+ reg.write_mask = element->mask;
write_sm1_dst_register(buffer, &reg);
}
@@ -2384,9 +2074,7 @@ int d3dbc_compile(struct vsir_program *program, uint64_t config_flags,
}
put_u32(buffer, sm1_version(version->type, version->major, version->minor));
-
- bytecode_put_bytes(buffer, ctab->code, ctab->size);
-
+ d3dbc_write_comment(&d3dbc, VKD3D_MAKE_TAG('C','T','A','B'), ctab);
d3dbc_write_semantic_dcls(&d3dbc);
d3dbc_write_program_instructions(&d3dbc);
diff --git a/libs/vkd3d/libs/vkd3d-shader/dxbc.c b/libs/vkd3d/libs/vkd3d-shader/dxbc.c
index f6ac8e0829e..81af62f7810 100644
--- a/libs/vkd3d/libs/vkd3d-shader/dxbc.c
+++ b/libs/vkd3d/libs/vkd3d-shader/dxbc.c
@@ -115,6 +115,14 @@ static uint32_t read_u32(const char **ptr)
return ret;
}
+static uint64_t read_u64(const char **ptr)
+{
+ uint64_t ret;
+ memcpy(&ret, *ptr, sizeof(ret));
+ *ptr += sizeof(ret);
+ return ret;
+}
+
static float read_float(const char **ptr)
{
union
@@ -502,6 +510,28 @@ int shader_parse_input_signature(const struct vkd3d_shader_code *dxbc,
return ret;
}
+static int shdr_parse_features(const struct vkd3d_shader_dxbc_section_desc *section,
+ struct vkd3d_shader_message_context *message_context, struct vsir_features *f)
+{
+ const char *data = section->data.code;
+ const char *ptr = data;
+ uint64_t flags;
+
+ if (!require_space(0, 1, sizeof(uint64_t), section->data.size))
+ {
+ WARN("Invalid data size %#zx.\n", section->data.size);
+ vkd3d_shader_error(message_context, NULL, VKD3D_SHADER_ERROR_DXBC_INVALID_CHUNK_SIZE,
+ "SFI0 section size %zu is too small to contain flags.\n", section->data.size);
+ return VKD3D_ERROR_INVALID_ARGUMENT;
+ }
+ flags = read_u64(&ptr);
+
+ if (flags & DXBC_SFI0_REQUIRES_ROVS)
+ f->rovs = true;
+
+ return VKD3D_OK;
+}
+
static int shdr_handler(const struct vkd3d_shader_dxbc_section_desc *section,
struct vkd3d_shader_message_context *message_context, void *context)
{
@@ -558,6 +588,11 @@ static int shdr_handler(const struct vkd3d_shader_dxbc_section_desc *section,
desc->byte_code_size = section->data.size;
break;
+ case TAG_SFI0:
+ if ((ret = shdr_parse_features(section, message_context, &desc->features)) < 0)
+ return ret;
+ break;
+
case TAG_AON9:
TRACE("Skipping AON9 shader code chunk.\n");
break;
diff --git a/libs/vkd3d/libs/vkd3d-shader/dxil.c b/libs/vkd3d/libs/vkd3d-shader/dxil.c
index 7099bcc9ce2..399c2b67eae 100644
--- a/libs/vkd3d/libs/vkd3d-shader/dxil.c
+++ b/libs/vkd3d/libs/vkd3d-shader/dxil.c
@@ -430,6 +430,8 @@ enum dx_intrinsic_opcode
DX_DERIV_COARSEY = 84,
DX_DERIV_FINEX = 85,
DX_DERIV_FINEY = 86,
+ DX_EVAL_SAMPLE_INDEX = 88,
+ DX_EVAL_CENTROID = 89,
DX_SAMPLE_INDEX = 90,
DX_COVERAGE = 91,
DX_THREAD_ID = 93,
@@ -3824,7 +3826,7 @@ static void src_params_init_from_operands(struct vkd3d_shader_src_param *src_par
}
static enum vkd3d_shader_register_type register_type_from_dxil_semantic_kind(
- enum vkd3d_shader_sysval_semantic sysval_semantic)
+ enum vkd3d_shader_sysval_semantic sysval_semantic, bool is_input)
{
switch (sysval_semantic)
{
@@ -3834,7 +3836,7 @@ static enum vkd3d_shader_register_type register_type_from_dxil_semantic_kind(
case VKD3D_SHADER_SV_SAMPLE_INDEX:
return VKD3DSPR_NULL;
case VKD3D_SHADER_SV_COVERAGE:
- return VKD3DSPR_COVERAGE;
+ return is_input ? VKD3DSPR_COVERAGE : VKD3DSPR_SAMPLEMASK;
case VKD3D_SHADER_SV_DEPTH:
return VKD3DSPR_DEPTHOUT;
case VKD3D_SHADER_SV_DEPTH_GREATER_EQUAL:
@@ -3884,7 +3886,7 @@ static void sm6_parser_init_signature(struct sm6_parser *sm6, const struct shade
param = &params[i];
if (e->register_index == UINT_MAX
- && (io_reg_type = register_type_from_dxil_semantic_kind(e->sysval_semantic)) != VKD3DSPR_NULL)
+ && (io_reg_type = register_type_from_dxil_semantic_kind(e->sysval_semantic, is_input)) != VKD3DSPR_NULL)
{
dst_param_io_init(param, e, io_reg_type);
continue;
@@ -5098,6 +5100,53 @@ static void sm6_parser_emit_dx_dot(struct sm6_parser *sm6, enum dx_intrinsic_opc
instruction_dst_param_init_ssa_scalar(ins, sm6);
}
+static void sm6_parser_emit_dx_eval_attrib(struct sm6_parser *sm6, enum dx_intrinsic_opcode op,
+ const struct sm6_value **operands, struct function_emission_state *state)
+{
+ struct vkd3d_shader_instruction *ins = state->ins;
+ struct vkd3d_shader_src_param *src_params;
+ const struct shader_signature *signature;
+ unsigned int row_index, column_index;
+ const struct signature_element *e;
+
+ row_index = sm6_value_get_constant_uint(operands[0]);
+ column_index = sm6_value_get_constant_uint(operands[2]);
+
+ signature = &sm6->p.program->input_signature;
+ if (row_index >= signature->element_count)
+ {
+ WARN("Invalid row index %u.\n", row_index);
+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND,
+ "Invalid input row index %u for an attribute evaluation.", row_index);
+ return;
+ }
+
+ e = &signature->elements[row_index];
+ if (column_index >= VKD3D_VEC4_SIZE || !(e->mask & (1 << column_index)))
+ {
+ WARN("Invalid column index %u.\n", column_index);
+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND,
+ "Invalid input column index %u for an attribute evaluation.", column_index);
+ return;
+ }
+
+ vsir_instruction_init(ins, &sm6->p.location, (op == DX_EVAL_CENTROID)
+ ? VKD3DSIH_EVAL_CENTROID : VKD3DSIH_EVAL_SAMPLE_INDEX);
+
+ if (!(src_params = instruction_src_params_alloc(ins, 1 + (op == DX_EVAL_SAMPLE_INDEX), sm6)))
+ return;
+
+ src_params[0].reg = sm6->input_params[row_index].reg;
+ src_param_init_scalar(&src_params[0], column_index);
+ if (e->register_count > 1)
+ register_index_address_init(&src_params[0].reg.idx[0], operands[1], sm6);
+
+ if (op == DX_EVAL_SAMPLE_INDEX)
+ src_param_init_from_value(&src_params[1], operands[3]);
+
+ instruction_dst_param_init_ssa_scalar(ins, sm6);
+}
+
static void sm6_parser_emit_dx_fabs(struct sm6_parser *sm6, enum dx_intrinsic_opcode op,
const struct sm6_value **operands, struct function_emission_state *state)
{
@@ -6288,6 +6337,8 @@ static const struct sm6_dx_opcode_info sm6_dx_op_table[] =
[DX_DOT4 ] = {"g", "RRRRRRRR", sm6_parser_emit_dx_dot},
[DX_EMIT_STREAM ] = {"v", "c", sm6_parser_emit_dx_stream},
[DX_EMIT_THEN_CUT_STREAM ] = {"v", "c", sm6_parser_emit_dx_stream},
+ [DX_EVAL_CENTROID ] = {"o", "cic", sm6_parser_emit_dx_eval_attrib},
+ [DX_EVAL_SAMPLE_INDEX ] = {"o", "cici", sm6_parser_emit_dx_eval_attrib},
[DX_EXP ] = {"g", "R", sm6_parser_emit_dx_unary},
[DX_FABS ] = {"g", "R", sm6_parser_emit_dx_fabs},
[DX_FIRST_BIT_HI ] = {"i", "m", sm6_parser_emit_dx_unary},
@@ -9348,7 +9399,7 @@ static void signature_element_read_additional_element_values(struct signature_el
}
static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const struct sm6_metadata_value *m,
- struct shader_signature *s, enum vkd3d_tessellator_domain tessellator_domain)
+ struct shader_signature *s, enum vkd3d_tessellator_domain tessellator_domain, bool is_input)
{
unsigned int i, j, column_count, operand_count, index;
const struct sm6_metadata_node *node, *element_node;
@@ -9466,7 +9517,7 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const
if ((is_register = e->register_index == UINT_MAX))
{
- if (register_type_from_dxil_semantic_kind(e->sysval_semantic) == VKD3DSPR_INVALID)
+ if (register_type_from_dxil_semantic_kind(e->sysval_semantic, is_input) == VKD3DSPR_INVALID)
{
WARN("Unhandled I/O register semantic kind %u.\n", j);
vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_SIGNATURE,
@@ -9578,17 +9629,17 @@ static enum vkd3d_result sm6_parser_signatures_init(struct sm6_parser *sm6, cons
}
if (m->u.node->operand_count && (ret = sm6_parser_read_signature(sm6, m->u.node->operands[0],
- &program->input_signature, tessellator_domain)) < 0)
+ &program->input_signature, tessellator_domain, true)) < 0)
{
return ret;
}
if (m->u.node->operand_count > 1 && (ret = sm6_parser_read_signature(sm6, m->u.node->operands[1],
- &program->output_signature, tessellator_domain)) < 0)
+ &program->output_signature, tessellator_domain, false)) < 0)
{
return ret;
}
if (m->u.node->operand_count > 1 && (ret = sm6_parser_read_signature(sm6, m->u.node->operands[2],
- &program->patch_constant_signature, tessellator_domain)) < 0)
+ &program->patch_constant_signature, tessellator_domain, false)) < 0)
{
return ret;
}
@@ -9717,12 +9768,13 @@ static void sm6_parser_emit_dcl_tessellator_domain(struct sm6_parser *sm6,
ins = sm6_parser_add_instruction(sm6, VKD3DSIH_DCL_TESSELLATOR_DOMAIN);
ins->declaration.tessellator_domain = tessellator_domain;
+ sm6->p.program->tess_domain = tessellator_domain;
}
-static void sm6_parser_validate_control_point_count(struct sm6_parser *sm6, unsigned int count,
- const char *type)
+static void sm6_parser_validate_control_point_count(struct sm6_parser *sm6,
+ unsigned int count, bool allow_zero, const char *type)
{
- if (!count || count > 32)
+ if ((!count && !allow_zero) || count > 32)
{
WARN("%s control point count %u invalid.\n", type, count);
vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES,
@@ -9744,6 +9796,8 @@ static void sm6_parser_emit_dcl_tessellator_partitioning(struct sm6_parser *sm6,
ins = sm6_parser_add_instruction(sm6, VKD3DSIH_DCL_TESSELLATOR_PARTITIONING);
ins->declaration.tessellator_partitioning = tessellator_partitioning;
+
+ sm6->p.program->tess_partitioning = tessellator_partitioning;
}
static void sm6_parser_emit_dcl_tessellator_output_primitive(struct sm6_parser *sm6,
@@ -9760,6 +9814,8 @@ static void sm6_parser_emit_dcl_tessellator_output_primitive(struct sm6_parser *
ins = sm6_parser_add_instruction(sm6, VKD3DSIH_DCL_TESSELLATOR_OUTPUT_PRIMITIVE);
ins->declaration.tessellator_output_primitive = primitive;
+
+ sm6->p.program->tess_output_primitive = primitive;
}
static void sm6_parser_emit_dcl_max_tessellation_factor(struct sm6_parser *sm6, struct sm6_metadata_value *m)
@@ -9951,7 +10007,7 @@ static enum vkd3d_tessellator_domain sm6_parser_ds_properties_init(struct sm6_pa
}
sm6_parser_emit_dcl_tessellator_domain(sm6, operands[0]);
- sm6_parser_validate_control_point_count(sm6, operands[1], "Domain shader input");
+ sm6_parser_validate_control_point_count(sm6, operands[1], true, "Domain shader input");
sm6->p.program->input_control_point_count = operands[1];
return operands[0];
@@ -10010,9 +10066,9 @@ static enum vkd3d_tessellator_domain sm6_parser_hs_properties_init(struct sm6_pa
}
}
- sm6_parser_validate_control_point_count(sm6, operands[1], "Hull shader input");
+ sm6_parser_validate_control_point_count(sm6, operands[1], false, "Hull shader input");
program->input_control_point_count = operands[1];
- sm6_parser_validate_control_point_count(sm6, operands[2], "Hull shader output");
+ sm6_parser_validate_control_point_count(sm6, operands[2], false, "Hull shader output");
sm6_parser_emit_dcl_count(sm6, VKD3DSIH_DCL_OUTPUT_CONTROL_POINT_COUNT, operands[2]);
program->output_control_point_count = operands[2];
sm6_parser_emit_dcl_tessellator_domain(sm6, operands[3]);
@@ -10351,7 +10407,7 @@ static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, struct vsir_pro
/* Estimate instruction count to avoid reallocation in most shaders. */
count = max(token_count, 400) - 400;
if (!vsir_program_init(program, compile_info, &version,
- (count + (count >> 2)) / 2u + 10, VSIR_CF_BLOCKS, VSIR_FULLY_NORMALISED_IO))
+ (count + (count >> 2)) / 2u + 10, VSIR_CF_BLOCKS, VSIR_NORMALISED_SM6))
return VKD3D_ERROR_OUT_OF_MEMORY;
vkd3d_shader_parser_init(&sm6->p, program, message_context, compile_info->source_name);
sm6->ptr = &sm6->start[1];
@@ -10378,6 +10434,7 @@ static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, struct vsir_pro
*input_signature = dxbc_desc->input_signature;
*output_signature = dxbc_desc->output_signature;
*patch_constant_signature = dxbc_desc->patch_constant_signature;
+ program->features = dxbc_desc->features;
memset(dxbc_desc, 0, sizeof(*dxbc_desc));
block = &sm6->root_block;
diff --git a/libs/vkd3d/libs/vkd3d-shader/fx.c b/libs/vkd3d/libs/vkd3d-shader/fx.c
index e22177e1e30..779ffa1e156 100644
--- a/libs/vkd3d/libs/vkd3d-shader/fx.c
+++ b/libs/vkd3d/libs/vkd3d-shader/fx.c
@@ -36,6 +36,16 @@ struct fx_4_binary_type
uint32_t typeinfo;
};
+struct fx_5_shader
+{
+ uint32_t offset;
+ uint32_t sodecl[4];
+ uint32_t sodecl_count;
+ uint32_t rast_stream;
+ uint32_t iface_bindings_count;
+ uint32_t iface_bindings;
+};
+
struct string_entry
{
struct rb_entry entry;
@@ -550,6 +560,8 @@ enum fx_4_type_constants
FX_4_ASSIGNMENT_VARIABLE = 0x2,
FX_4_ASSIGNMENT_ARRAY_CONSTANT_INDEX = 0x3,
FX_4_ASSIGNMENT_ARRAY_VARIABLE_INDEX = 0x4,
+ FX_4_ASSIGNMENT_INLINE_SHADER = 0x7,
+ FX_5_ASSIGNMENT_INLINE_SHADER = 0x8,
};
static const uint32_t fx_4_numeric_base_types[] =
@@ -598,8 +610,8 @@ static uint32_t get_fx_4_numeric_type_description(const struct hlsl_type *type,
return 0;
}
- value |= (type->dimy & 0x7) << FX_4_NUMERIC_ROWS_SHIFT;
- value |= (type->dimx & 0x7) << FX_4_NUMERIC_COLUMNS_SHIFT;
+ value |= (type->e.numeric.dimy & 0x7) << FX_4_NUMERIC_ROWS_SHIFT;
+ value |= (type->e.numeric.dimx & 0x7) << FX_4_NUMERIC_COLUMNS_SHIFT;
if (type->modifiers & HLSL_MODIFIER_COLUMN_MAJOR)
value |= FX_4_NUMERIC_COLUMN_MAJOR_MASK;
@@ -762,6 +774,7 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co
case HLSL_CLASS_TECHNIQUE:
case HLSL_CLASS_CONSTANT_BUFFER:
case HLSL_CLASS_NULL:
+ case HLSL_CLASS_STREAM_OUTPUT:
vkd3d_unreachable();
case HLSL_CLASS_VOID:
@@ -1008,8 +1021,8 @@ static uint32_t get_fx_2_type_class(const struct hlsl_type *type)
return hlsl_sm1_class(type);
}
-static uint32_t write_fx_2_parameter(const struct hlsl_type *type, const char *name, const struct hlsl_semantic *semantic,
- struct fx_write_context *fx)
+static uint32_t write_fx_2_parameter(const struct hlsl_type *type, const char *name,
+ const struct hlsl_semantic *semantic, bool is_combined_sampler, struct fx_write_context *fx)
{
struct vkd3d_bytecode_buffer *buffer = &fx->unstructured;
uint32_t semantic_offset, offset, elements_count = 0, name_offset;
@@ -1025,7 +1038,7 @@ static uint32_t write_fx_2_parameter(const struct hlsl_type *type, const char *n
name_offset = write_string(name, fx);
semantic_offset = semantic->raw_name ? write_string(semantic->raw_name, fx) : 0;
- offset = put_u32(buffer, hlsl_sm1_base_type(type));
+ offset = put_u32(buffer, hlsl_sm1_base_type(type, is_combined_sampler));
put_u32(buffer, get_fx_2_type_class(type));
put_u32(buffer, name_offset);
put_u32(buffer, semantic_offset);
@@ -1034,13 +1047,13 @@ static uint32_t write_fx_2_parameter(const struct hlsl_type *type, const char *n
switch (type->class)
{
case HLSL_CLASS_VECTOR:
- put_u32(buffer, type->dimx);
- put_u32(buffer, type->dimy);
+ put_u32(buffer, type->e.numeric.dimx);
+ put_u32(buffer, type->e.numeric.dimy);
break;
case HLSL_CLASS_SCALAR:
case HLSL_CLASS_MATRIX:
- put_u32(buffer, type->dimy);
- put_u32(buffer, type->dimx);
+ put_u32(buffer, type->e.numeric.dimy);
+ put_u32(buffer, type->e.numeric.dimx);
break;
case HLSL_CLASS_STRUCT:
put_u32(buffer, type->e.record.field_count);
@@ -1061,7 +1074,7 @@ static uint32_t write_fx_2_parameter(const struct hlsl_type *type, const char *n
/* Validated in check_invalid_object_fields(). */
VKD3D_ASSERT(hlsl_is_numeric_type(field->type));
- write_fx_2_parameter(field->type, field->name, &field->semantic, fx);
+ write_fx_2_parameter(field->type, field->name, &field->semantic, false, fx);
}
}
@@ -1298,6 +1311,7 @@ static bool is_type_supported_fx_2(struct hlsl_ctx *ctx, const struct hlsl_type
case HLSL_CLASS_TECHNIQUE:
case HLSL_CLASS_CONSTANT_BUFFER:
case HLSL_CLASS_NULL:
+ case HLSL_CLASS_STREAM_OUTPUT:
/* This cannot appear as an extern variable. */
break;
}
@@ -1321,7 +1335,7 @@ static void write_fx_2_parameters(struct fx_write_context *fx)
if (!is_type_supported_fx_2(ctx, var->data_type, &var->loc))
continue;
- desc_offset = write_fx_2_parameter(var->data_type, var->name, &var->semantic, fx);
+ desc_offset = write_fx_2_parameter(var->data_type, var->name, &var->semantic, var->is_combined_sampler, fx);
value_offset = write_fx_2_initial_value(var, fx);
flags = 0;
@@ -1344,7 +1358,7 @@ static void write_fx_2_annotation(struct hlsl_ir_var *var, struct fx_write_conte
struct vkd3d_bytecode_buffer *buffer = &fx->structured;
uint32_t desc_offset, value_offset;
- desc_offset = write_fx_2_parameter(var->data_type, var->name, &var->semantic, fx);
+ desc_offset = write_fx_2_parameter(var->data_type, var->name, &var->semantic, var->is_combined_sampler, fx);
value_offset = write_fx_2_initial_value(var, fx);
put_u32(buffer, desc_offset);
@@ -1834,6 +1848,7 @@ enum state_property_component_type
FX_BLEND,
FX_VERTEXSHADER,
FX_PIXELSHADER,
+ FX_GEOMETRYSHADER,
FX_COMPONENT_TYPE_COUNT,
};
@@ -2065,6 +2080,7 @@ fx_4_states[] =
{ "VertexShader", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_VERTEXSHADER, 1, 1, 6 },
{ "PixelShader", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_PIXELSHADER, 1, 1, 7 },
+ { "GeometryShader", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_GEOMETRYSHADER, 1, 1, 8 },
{ "DS_StencilRef", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 9 },
{ "AB_BlendFactor", HLSL_CLASS_PASS, HLSL_CLASS_VECTOR, FX_FLOAT, 4, 1, 10 },
{ "AB_SampleMask", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 11 },
@@ -2951,7 +2967,7 @@ static void VKD3D_PRINTF_FUNC(3, 4) fx_parser_error(struct fx_parser *parser, en
static int fx_2_parse(struct fx_parser *parser)
{
- fx_parser_error(parser, VKD3D_SHADER_ERROR_FX_NOT_IMPLEMENTED, "Parsing fx_2_0 binaries is not implemented.\n");
+ fx_parser_error(parser, VKD3D_SHADER_ERROR_FX_NOT_IMPLEMENTED, "Parsing fx_2_0 binaries is not implemented.");
return -1;
}
@@ -3120,7 +3136,7 @@ static void fx_parse_fx_4_annotations(struct fx_parser *parser)
else
{
fx_parser_error(parser, VKD3D_SHADER_ERROR_FX_INVALID_DATA,
- "Only numeric and string types are supported in annotations.\n");
+ "Only numeric and string types are supported in annotations.");
}
if (type.element_count)
@@ -3210,27 +3226,13 @@ static void fx_parse_buffers(struct fx_parser *parser)
}
}
-static void fx_4_parse_shader_initializer(struct fx_parser *parser, unsigned int object_type)
+static void fx_4_parse_shader_blob(struct fx_parser *parser, unsigned int object_type, const struct fx_5_shader *shader)
{
struct vkd3d_shader_compile_info info = { 0 };
struct vkd3d_shader_code output;
- uint32_t data_size, offset;
const void *data = NULL;
const char *p, *q, *end;
- struct fx_5_shader
- {
- uint32_t offset;
- uint32_t sodecl[4];
- uint32_t sodecl_count;
- uint32_t rast_stream;
- uint32_t iface_bindings_count;
- uint32_t iface_bindings;
- } shader5;
- struct fx_4_gs_so
- {
- uint32_t offset;
- uint32_t sodecl;
- } gs_so;
+ uint32_t data_size;
int ret;
static const struct vkd3d_shader_compile_option options[] =
@@ -3238,35 +3240,9 @@ static void fx_4_parse_shader_initializer(struct fx_parser *parser, unsigned int
{VKD3D_SHADER_COMPILE_OPTION_API_VERSION, VKD3D_SHADER_API_VERSION_1_14},
};
- switch (object_type)
- {
- case FX_4_OBJECT_TYPE_PIXEL_SHADER:
- case FX_4_OBJECT_TYPE_VERTEX_SHADER:
- case FX_4_OBJECT_TYPE_GEOMETRY_SHADER:
- offset = fx_parser_read_u32(parser);
- break;
-
- case FX_4_OBJECT_TYPE_GEOMETRY_SHADER_SO:
- fx_parser_read_u32s(parser, &gs_so, sizeof(gs_so));
- offset = gs_so.offset;
- break;
-
- case FX_5_OBJECT_TYPE_GEOMETRY_SHADER:
- case FX_5_OBJECT_TYPE_COMPUTE_SHADER:
- case FX_5_OBJECT_TYPE_HULL_SHADER:
- case FX_5_OBJECT_TYPE_DOMAIN_SHADER:
- fx_parser_read_u32s(parser, &shader5, sizeof(shader5));
- offset = shader5.offset;
- break;
-
- default:
- parser->failed = true;
- return;
- }
-
- fx_parser_read_unstructured(parser, &data_size, offset, sizeof(data_size));
+ fx_parser_read_unstructured(parser, &data_size, shader->offset, sizeof(data_size));
if (data_size)
- data = fx_parser_get_unstructured_ptr(parser, offset + 4, data_size);
+ data = fx_parser_get_unstructured_ptr(parser, shader->offset + 4, data_size);
if (!data)
return;
@@ -3283,7 +3259,7 @@ static void fx_4_parse_shader_initializer(struct fx_parser *parser, unsigned int
if ((ret = vkd3d_shader_compile(&info, &output, NULL)) < 0)
{
fx_parser_error(parser, VKD3D_SHADER_ERROR_FX_INVALID_DATA,
- "Failed to disassemble shader blob.\n");
+ "Failed to disassemble shader blob.");
return;
}
parse_fx_print_indent(parser);
@@ -3307,26 +3283,58 @@ static void fx_4_parse_shader_initializer(struct fx_parser *parser, unsigned int
parse_fx_print_indent(parser);
vkd3d_string_buffer_printf(&parser->buffer, "}");
- if (object_type == FX_4_OBJECT_TYPE_GEOMETRY_SHADER_SO && gs_so.sodecl)
+ if (object_type == FX_4_OBJECT_TYPE_GEOMETRY_SHADER_SO && shader->sodecl[0])
{
vkd3d_string_buffer_printf(&parser->buffer, "\n/* Stream output declaration: \"%s\" */",
- fx_4_get_string(parser, gs_so.sodecl));
+ fx_4_get_string(parser, shader->sodecl[0]));
}
else if (object_type == FX_5_OBJECT_TYPE_GEOMETRY_SHADER)
{
- for (unsigned int i = 0; i < ARRAY_SIZE(shader5.sodecl); ++i)
+ for (unsigned int i = 0; i < ARRAY_SIZE(shader->sodecl); ++i)
{
- if (shader5.sodecl[i])
+ if (shader->sodecl[i])
vkd3d_string_buffer_printf(&parser->buffer, "\n/* Stream output %u declaration: \"%s\" */",
- i, fx_4_get_string(parser, shader5.sodecl[i]));
+ i, fx_4_get_string(parser, shader->sodecl[i]));
}
- if (shader5.sodecl_count)
- vkd3d_string_buffer_printf(&parser->buffer, "\n/* Rasterized stream %u */", shader5.rast_stream);
+ if (shader->sodecl_count)
+ vkd3d_string_buffer_printf(&parser->buffer, "\n/* Rasterized stream %u */", shader->rast_stream);
}
vkd3d_shader_free_shader_code(&output);
}
+static void fx_4_parse_shader_initializer(struct fx_parser *parser, unsigned int object_type)
+{
+ struct fx_5_shader shader = { 0 };
+
+ switch (object_type)
+ {
+ case FX_4_OBJECT_TYPE_PIXEL_SHADER:
+ case FX_4_OBJECT_TYPE_VERTEX_SHADER:
+ case FX_4_OBJECT_TYPE_GEOMETRY_SHADER:
+ shader.offset = fx_parser_read_u32(parser);
+ break;
+
+ case FX_4_OBJECT_TYPE_GEOMETRY_SHADER_SO:
+ shader.offset = fx_parser_read_u32(parser);
+ shader.sodecl[0] = fx_parser_read_u32(parser);
+ break;
+
+ case FX_5_OBJECT_TYPE_GEOMETRY_SHADER:
+ case FX_5_OBJECT_TYPE_COMPUTE_SHADER:
+ case FX_5_OBJECT_TYPE_HULL_SHADER:
+ case FX_5_OBJECT_TYPE_DOMAIN_SHADER:
+ fx_parser_read_u32s(parser, &shader, sizeof(shader));
+ break;
+
+ default:
+ parser->failed = true;
+ return;
+ }
+
+ fx_4_parse_shader_blob(parser, object_type, &shader);
+}
+
static bool fx_4_object_has_initializer(const struct fx_4_binary_type *type)
{
switch (type->typeinfo)
@@ -3390,6 +3398,8 @@ static void fx_4_parse_state_object_initializer(struct fx_parser *parser, uint32
[FX_UINT8] = "byte",
};
const struct rhs_named_value *named_value;
+ struct fx_5_shader shader = { 0 };
+ unsigned int shader_type = 0;
uint32_t i, j, comp_count;
struct fx_4_state *state;
@@ -3400,7 +3410,7 @@ static void fx_4_parse_state_object_initializer(struct fx_parser *parser, uint32
if (!(state = bsearch(&entry.id, fx_4_states, ARRAY_SIZE(fx_4_states),
sizeof(*fx_4_states), fx_4_state_id_compare)))
{
- fx_parser_error(parser, VKD3D_SHADER_ERROR_FX_INVALID_DATA, "Unrecognized state id %#x.\n", entry.id);
+ fx_parser_error(parser, VKD3D_SHADER_ERROR_FX_INVALID_DATA, "Unrecognized state id %#x.", entry.id);
break;
}
@@ -3486,9 +3496,38 @@ static void fx_4_parse_state_object_initializer(struct fx_parser *parser, uint32
vkd3d_string_buffer_printf(&parser->buffer, "%s[%s]", fx_4_get_string(parser, index.name),
fx_4_get_string(parser, index.index));
break;
+ case FX_4_ASSIGNMENT_INLINE_SHADER:
+ case FX_5_ASSIGNMENT_INLINE_SHADER:
+ {
+ bool shader5 = entry.type == FX_5_ASSIGNMENT_INLINE_SHADER;
+
+ if (shader5)
+ fx_parser_read_unstructured(parser, &shader, entry.value, sizeof(shader));
+ else
+ fx_parser_read_unstructured(parser, &shader, entry.value, 2 * sizeof(uint32_t));
+
+ if (state->type == FX_PIXELSHADER)
+ shader_type = FX_4_OBJECT_TYPE_PIXEL_SHADER;
+ else if (state->type == FX_VERTEXSHADER)
+ shader_type = FX_4_OBJECT_TYPE_VERTEX_SHADER;
+ else if (state->type == FX_GEOMETRYSHADER)
+ shader_type = shader5 ? FX_5_OBJECT_TYPE_GEOMETRY_SHADER : FX_4_OBJECT_TYPE_GEOMETRY_SHADER_SO;
+ else if (state->type == FX_HULLSHADER)
+ shader_type = FX_5_OBJECT_TYPE_HULL_SHADER;
+ else if (state->type == FX_DOMAINSHADER)
+ shader_type = FX_5_OBJECT_TYPE_DOMAIN_SHADER;
+ else if (state->type == FX_COMPUTESHADER)
+ shader_type = FX_5_OBJECT_TYPE_COMPUTE_SHADER;
+
+ vkd3d_string_buffer_printf(&parser->buffer, "\n");
+ parse_fx_start_indent(parser);
+ fx_4_parse_shader_blob(parser, shader_type, &shader);
+ parse_fx_end_indent(parser);
+ break;
+ }
default:
fx_parser_error(parser, VKD3D_SHADER_ERROR_FX_NOT_IMPLEMENTED,
- "Unsupported assignment type %u.\n", entry.type);
+ "Unsupported assignment type %u.", entry.type);
}
vkd3d_string_buffer_printf(&parser->buffer, ";\n");
}
diff --git a/libs/vkd3d/libs/vkd3d-shader/glsl.c b/libs/vkd3d/libs/vkd3d-shader/glsl.c
index 0df0e30f399..ab6604bd703 100644
--- a/libs/vkd3d/libs/vkd3d-shader/glsl.c
+++ b/libs/vkd3d/libs/vkd3d-shader/glsl.c
@@ -1507,13 +1507,6 @@ static void vkd3d_glsl_handle_instruction(struct vkd3d_glsl_generator *gen,
case VKD3DSIH_DCL_INDEXABLE_TEMP:
shader_glsl_dcl_indexable_temp(gen, ins);
break;
- case VKD3DSIH_DCL_INPUT:
- case VKD3DSIH_DCL_INPUT_PS:
- case VKD3DSIH_DCL_INPUT_PS_SGV:
- case VKD3DSIH_DCL_INPUT_PS_SIV:
- case VKD3DSIH_DCL_INPUT_SGV:
- case VKD3DSIH_DCL_OUTPUT:
- case VKD3DSIH_DCL_OUTPUT_SIV:
case VKD3DSIH_NOP:
break;
case VKD3DSIH_DEFAULT:
@@ -2476,7 +2469,7 @@ int glsl_compile(struct vsir_program *program, uint64_t config_flags,
if ((ret = vsir_program_transform(program, config_flags, compile_info, message_context)) < 0)
return ret;
- VKD3D_ASSERT(program->normalisation_level == VSIR_FULLY_NORMALISED_IO);
+ VKD3D_ASSERT(program->normalisation_level == VSIR_NORMALISED_SM6);
vkd3d_glsl_generator_init(&generator, program, compile_info,
descriptor_info, combined_sampler_info, message_context);
diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.c b/libs/vkd3d/libs/vkd3d-shader/hlsl.c
index 96de18dc886..48d9d4e0023 100644
--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.c
+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.c
@@ -192,18 +192,20 @@ bool hlsl_type_is_row_major(const struct hlsl_type *type)
unsigned int hlsl_type_minor_size(const struct hlsl_type *type)
{
+ VKD3D_ASSERT(hlsl_is_numeric_type(type));
if (type->class != HLSL_CLASS_MATRIX || hlsl_type_is_row_major(type))
- return type->dimx;
+ return type->e.numeric.dimx;
else
- return type->dimy;
+ return type->e.numeric.dimy;
}
unsigned int hlsl_type_major_size(const struct hlsl_type *type)
{
+ VKD3D_ASSERT(hlsl_is_numeric_type(type));
if (type->class != HLSL_CLASS_MATRIX || hlsl_type_is_row_major(type))
- return type->dimy;
+ return type->e.numeric.dimy;
else
- return type->dimx;
+ return type->e.numeric.dimx;
}
unsigned int hlsl_type_element_count(const struct hlsl_type *type)
@@ -211,7 +213,7 @@ unsigned int hlsl_type_element_count(const struct hlsl_type *type)
switch (type->class)
{
case HLSL_CLASS_VECTOR:
- return type->dimx;
+ return type->e.numeric.dimx;
case HLSL_CLASS_MATRIX:
return hlsl_type_major_size(type);
case HLSL_CLASS_ARRAY:
@@ -287,6 +289,7 @@ bool hlsl_type_is_shader(const struct hlsl_type *type)
case HLSL_CLASS_UAV:
case HLSL_CLASS_CONSTANT_BUFFER:
case HLSL_CLASS_BLEND_STATE:
+ case HLSL_CLASS_STREAM_OUTPUT:
case HLSL_CLASS_VOID:
case HLSL_CLASS_NULL:
return false;
@@ -354,14 +357,24 @@ static void hlsl_type_calculate_reg_size(struct hlsl_ctx *ctx, struct hlsl_type
{
case HLSL_CLASS_SCALAR:
case HLSL_CLASS_VECTOR:
- type->reg_size[HLSL_REGSET_NUMERIC] = is_sm4 ? type->dimx : 4;
+ type->reg_size[HLSL_REGSET_NUMERIC] = is_sm4 ? type->e.numeric.dimx : 4;
break;
case HLSL_CLASS_MATRIX:
if (hlsl_type_is_row_major(type))
- type->reg_size[HLSL_REGSET_NUMERIC] = is_sm4 ? (4 * (type->dimy - 1) + type->dimx) : (4 * type->dimy);
+ {
+ if (is_sm4)
+ type->reg_size[HLSL_REGSET_NUMERIC] = 4 * (type->e.numeric.dimy - 1) + type->e.numeric.dimx;
+ else
+ type->reg_size[HLSL_REGSET_NUMERIC] = 4 * type->e.numeric.dimy;
+ }
else
- type->reg_size[HLSL_REGSET_NUMERIC] = is_sm4 ? (4 * (type->dimx - 1) + type->dimy) : (4 * type->dimx);
+ {
+ if (is_sm4)
+ type->reg_size[HLSL_REGSET_NUMERIC] = 4 * (type->e.numeric.dimx - 1) + type->e.numeric.dimy;
+ else
+ type->reg_size[HLSL_REGSET_NUMERIC] = 4 * type->e.numeric.dimx;
+ }
break;
case HLSL_CLASS_ARRAY:
@@ -386,7 +399,6 @@ static void hlsl_type_calculate_reg_size(struct hlsl_ctx *ctx, struct hlsl_type
{
unsigned int i;
- type->dimx = 0;
for (i = 0; i < type->e.record.field_count; ++i)
{
struct hlsl_struct_field *field = &type->e.record.fields[i];
@@ -398,8 +410,6 @@ static void hlsl_type_calculate_reg_size(struct hlsl_ctx *ctx, struct hlsl_type
field->reg_offset[k] = type->reg_size[k];
type->reg_size[k] += field->type->reg_size[k];
}
-
- type->dimx += field->type->dimx * field->type->dimy * hlsl_get_multiarray_size(field->type);
}
break;
}
@@ -434,6 +444,7 @@ static void hlsl_type_calculate_reg_size(struct hlsl_ctx *ctx, struct hlsl_type
case HLSL_CLASS_HULL_SHADER:
case HLSL_CLASS_GEOMETRY_SHADER:
case HLSL_CLASS_BLEND_STATE:
+ case HLSL_CLASS_STREAM_OUTPUT:
case HLSL_CLASS_NULL:
break;
}
@@ -481,8 +492,8 @@ static struct hlsl_type *hlsl_new_type(struct hlsl_ctx *ctx, const char *name, e
}
type->class = type_class;
type->e.numeric.type = base_type;
- type->dimx = dimx;
- type->dimy = dimy;
+ type->e.numeric.dimx = dimx;
+ type->e.numeric.dimy = dimy;
hlsl_type_calculate_reg_size(ctx, type);
list_add_tail(&ctx->types, &type->entry);
@@ -525,6 +536,7 @@ static bool type_is_single_component(const struct hlsl_type *type)
case HLSL_CLASS_PASS:
case HLSL_CLASS_TECHNIQUE:
case HLSL_CLASS_VOID:
+ case HLSL_CLASS_STREAM_OUTPUT:
break;
}
vkd3d_unreachable();
@@ -549,18 +561,19 @@ static unsigned int traverse_path_from_component_index(struct hlsl_ctx *ctx,
switch (type->class)
{
case HLSL_CLASS_VECTOR:
- VKD3D_ASSERT(index < type->dimx);
+ VKD3D_ASSERT(index < type->e.numeric.dimx);
*type_ptr = hlsl_get_scalar_type(ctx, type->e.numeric.type);
*index_ptr = 0;
return index;
case HLSL_CLASS_MATRIX:
{
- unsigned int y = index / type->dimx, x = index % type->dimx;
+ unsigned int y = index / type->e.numeric.dimx, x = index % type->e.numeric.dimx;
bool row_major = hlsl_type_is_row_major(type);
- VKD3D_ASSERT(index < type->dimx * type->dimy);
- *type_ptr = hlsl_get_vector_type(ctx, type->e.numeric.type, row_major ? type->dimx : type->dimy);
+ VKD3D_ASSERT(index < type->e.numeric.dimx * type->e.numeric.dimy);
+ *type_ptr = hlsl_get_vector_type(ctx, type->e.numeric.type,
+ row_major ? type->e.numeric.dimx : type->e.numeric.dimy);
*index_ptr = row_major ? x : y;
return row_major ? y : x;
}
@@ -680,6 +693,7 @@ unsigned int hlsl_type_get_component_offset(struct hlsl_ctx *ctx, struct hlsl_ty
case HLSL_CLASS_SCALAR:
case HLSL_CLASS_CONSTANT_BUFFER:
case HLSL_CLASS_NULL:
+ case HLSL_CLASS_STREAM_OUTPUT:
vkd3d_unreachable();
}
type = next_type;
@@ -857,9 +871,9 @@ struct hlsl_type *hlsl_get_element_type_from_path_index(struct hlsl_ctx *ctx, co
case HLSL_CLASS_MATRIX:
if (hlsl_type_is_row_major(type))
- return hlsl_get_vector_type(ctx, type->e.numeric.type, type->dimx);
+ return hlsl_get_vector_type(ctx, type->e.numeric.type, type->e.numeric.dimx);
else
- return hlsl_get_vector_type(ctx, type->e.numeric.type, type->dimy);
+ return hlsl_get_vector_type(ctx, type->e.numeric.type, type->e.numeric.dimy);
case HLSL_CLASS_ARRAY:
return type->e.array.type;
@@ -888,8 +902,6 @@ struct hlsl_type *hlsl_new_array_type(struct hlsl_ctx *ctx, struct hlsl_type *ba
type->modifiers = basic_type->modifiers;
type->e.array.elements_count = array_size;
type->e.array.type = basic_type;
- type->dimx = basic_type->dimx;
- type->dimy = basic_type->dimy;
type->sampler_dim = basic_type->sampler_dim;
hlsl_type_calculate_reg_size(ctx, type);
@@ -898,6 +910,22 @@ struct hlsl_type *hlsl_new_array_type(struct hlsl_ctx *ctx, struct hlsl_type *ba
return type;
}
+struct hlsl_type *hlsl_new_stream_output_type(struct hlsl_ctx *ctx,
+ enum hlsl_so_object_type so_type, struct hlsl_type *data_type)
+{
+ struct hlsl_type *type;
+
+ if (!(type = hlsl_alloc(ctx, sizeof(*type))))
+ return NULL;
+ type->class = HLSL_CLASS_STREAM_OUTPUT;
+ type->e.so.so_type = so_type;
+ type->e.so.type = data_type;
+
+ list_add_tail(&ctx->types, &type->entry);
+
+ return type;
+}
+
struct hlsl_type *hlsl_new_struct_type(struct hlsl_ctx *ctx, const char *name,
struct hlsl_struct_field *fields, size_t field_count)
{
@@ -907,7 +935,6 @@ struct hlsl_type *hlsl_new_struct_type(struct hlsl_ctx *ctx, const char *name,
return NULL;
type->class = HLSL_CLASS_STRUCT;
type->name = name;
- type->dimy = 1;
type->e.record.fields = fields;
type->e.record.field_count = field_count;
hlsl_type_calculate_reg_size(ctx, type);
@@ -925,8 +952,6 @@ struct hlsl_type *hlsl_new_texture_type(struct hlsl_ctx *ctx, enum hlsl_sampler_
if (!(type = hlsl_alloc(ctx, sizeof(*type))))
return NULL;
type->class = HLSL_CLASS_TEXTURE;
- type->dimx = 4;
- type->dimy = 1;
type->sampler_dim = dim;
type->e.resource.format = format;
type->sample_count = sample_count;
@@ -943,8 +968,6 @@ struct hlsl_type *hlsl_new_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim
if (!(type = hlsl_alloc(ctx, sizeof(*type))))
return NULL;
type->class = HLSL_CLASS_UAV;
- type->dimx = format->dimx;
- type->dimy = 1;
type->sampler_dim = dim;
type->e.resource.format = format;
type->e.resource.rasteriser_ordered = rasteriser_ordered;
@@ -960,7 +983,6 @@ struct hlsl_type *hlsl_new_cb_type(struct hlsl_ctx *ctx, struct hlsl_type *forma
if (!(type = hlsl_alloc(ctx, sizeof(*type))))
return NULL;
type->class = HLSL_CLASS_CONSTANT_BUFFER;
- type->dimy = 1;
type->e.resource.format = format;
hlsl_type_calculate_reg_size(ctx, type);
list_add_tail(&ctx->types, &type->entry);
@@ -1046,7 +1068,7 @@ unsigned int hlsl_type_component_count(const struct hlsl_type *type)
case HLSL_CLASS_SCALAR:
case HLSL_CLASS_VECTOR:
case HLSL_CLASS_MATRIX:
- return type->dimx * type->dimy;
+ return type->e.numeric.dimx * type->e.numeric.dimy;
case HLSL_CLASS_STRUCT:
{
@@ -1086,6 +1108,7 @@ unsigned int hlsl_type_component_count(const struct hlsl_type *type)
case HLSL_CLASS_PASS:
case HLSL_CLASS_TECHNIQUE:
case HLSL_CLASS_VOID:
+ case HLSL_CLASS_STREAM_OUTPUT:
break;
}
@@ -1110,9 +1133,9 @@ bool hlsl_types_are_equal(const struct hlsl_type *t1, const struct hlsl_type *t2
if ((t1->modifiers & HLSL_MODIFIER_ROW_MAJOR)
!= (t2->modifiers & HLSL_MODIFIER_ROW_MAJOR))
return false;
- if (t1->dimx != t2->dimx)
+ if (t1->e.numeric.dimx != t2->e.numeric.dimx)
return false;
- if (t1->dimy != t2->dimy)
+ if (t1->e.numeric.dimy != t2->e.numeric.dimy)
return false;
return true;
@@ -1157,6 +1180,11 @@ bool hlsl_types_are_equal(const struct hlsl_type *t1, const struct hlsl_type *t2
case HLSL_CLASS_CONSTANT_BUFFER:
return hlsl_types_are_equal(t1->e.resource.format, t2->e.resource.format);
+ case HLSL_CLASS_STREAM_OUTPUT:
+ if (t1->e.so.so_type != t2->e.so.so_type)
+ return false;
+ return hlsl_types_are_equal(t1->e.so.type, t2->e.so.type);
+
case HLSL_CLASS_DEPTH_STENCIL_STATE:
case HLSL_CLASS_DEPTH_STENCIL_VIEW:
case HLSL_CLASS_EFFECT_GROUP:
@@ -1198,8 +1226,6 @@ struct hlsl_type *hlsl_type_clone(struct hlsl_ctx *ctx, struct hlsl_type *old,
}
}
type->class = old->class;
- type->dimx = old->dimx;
- type->dimy = old->dimy;
type->modifiers = old->modifiers | modifiers;
if (!(type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK))
type->modifiers |= default_majority;
@@ -1212,6 +1238,8 @@ struct hlsl_type *hlsl_type_clone(struct hlsl_ctx *ctx, struct hlsl_type *old,
case HLSL_CLASS_SCALAR:
case HLSL_CLASS_VECTOR:
case HLSL_CLASS_MATRIX:
+ type->e.numeric.dimx = old->e.numeric.dimx;
+ type->e.numeric.dimy = old->e.numeric.dimy;
type->e.numeric.type = old->e.numeric.type;
break;
@@ -1471,7 +1499,7 @@ struct hlsl_ir_node *hlsl_new_store_index(struct hlsl_ctx *ctx, const struct hls
hlsl_src_from_node(&store->rhs, rhs);
if (!writemask && type_is_single_reg(rhs->data_type))
- writemask = (1 << rhs->data_type->dimx) - 1;
+ writemask = (1 << rhs->data_type->e.numeric.dimx) - 1;
store->writemask = writemask;
return &store->node;
@@ -1498,7 +1526,7 @@ bool hlsl_new_store_component(struct hlsl_ctx *ctx, struct hlsl_block *block,
hlsl_src_from_node(&store->rhs, rhs);
if (type_is_single_reg(rhs->data_type))
- store->writemask = (1 << rhs->data_type->dimx) - 1;
+ store->writemask = (1 << rhs->data_type->e.numeric.dimx) - 1;
hlsl_block_add_instr(block, &store->node);
@@ -1695,22 +1723,6 @@ struct hlsl_ir_node *hlsl_new_switch(struct hlsl_ctx *ctx, struct hlsl_ir_node *
return &s->node;
}
-struct hlsl_ir_node *hlsl_new_vsir_instruction_ref(struct hlsl_ctx *ctx, unsigned int vsir_instr_idx,
- struct hlsl_type *type, const struct hlsl_reg *reg, const struct vkd3d_shader_location *loc)
-{
- struct hlsl_ir_vsir_instruction_ref *vsir_instr;
-
- if (!(vsir_instr = hlsl_alloc(ctx, sizeof(*vsir_instr))))
- return NULL;
- init_node(&vsir_instr->node, HLSL_IR_VSIR_INSTRUCTION_REF, type, loc);
- vsir_instr->vsir_instr_idx = vsir_instr_idx;
-
- if (reg)
- vsir_instr->node.reg = *reg;
-
- return &vsir_instr->node;
-}
-
struct hlsl_ir_load *hlsl_new_load_index(struct hlsl_ctx *ctx, const struct hlsl_deref *deref,
struct hlsl_ir_node *idx, const struct vkd3d_shader_location *loc)
{
@@ -1844,22 +1856,45 @@ struct hlsl_ir_node *hlsl_new_resource_store(struct hlsl_ctx *ctx, const struct
return &store->node;
}
-struct hlsl_ir_node *hlsl_new_swizzle(struct hlsl_ctx *ctx, uint32_t s, unsigned int components,
+struct hlsl_ir_node *hlsl_new_swizzle(struct hlsl_ctx *ctx, uint32_t s, unsigned int component_count,
struct hlsl_ir_node *val, const struct vkd3d_shader_location *loc)
{
struct hlsl_ir_swizzle *swizzle;
struct hlsl_type *type;
+ VKD3D_ASSERT(val->data_type->class <= HLSL_CLASS_VECTOR);
+
if (!(swizzle = hlsl_alloc(ctx, sizeof(*swizzle))))
return NULL;
- VKD3D_ASSERT(hlsl_is_numeric_type(val->data_type));
- if (components == 1)
+ if (component_count > 1)
+ type = hlsl_get_vector_type(ctx, val->data_type->e.numeric.type, component_count);
+ else
type = hlsl_get_scalar_type(ctx, val->data_type->e.numeric.type);
+ init_node(&swizzle->node, HLSL_IR_SWIZZLE, type, loc);
+ hlsl_src_from_node(&swizzle->val, val);
+ swizzle->u.vector = s;
+
+ return &swizzle->node;
+}
+
+struct hlsl_ir_node *hlsl_new_matrix_swizzle(struct hlsl_ctx *ctx, struct hlsl_matrix_swizzle s,
+ unsigned int component_count, struct hlsl_ir_node *val, const struct vkd3d_shader_location *loc)
+{
+ struct hlsl_ir_swizzle *swizzle;
+ struct hlsl_type *type;
+
+ VKD3D_ASSERT(val->data_type->class == HLSL_CLASS_MATRIX);
+
+ if (!(swizzle = hlsl_alloc(ctx, sizeof(*swizzle))))
+ return NULL;
+ if (component_count > 1)
+ type = hlsl_get_vector_type(ctx, val->data_type->e.numeric.type, component_count);
else
- type = hlsl_get_vector_type(ctx, val->data_type->e.numeric.type, components);
+ type = hlsl_get_scalar_type(ctx, val->data_type->e.numeric.type);
init_node(&swizzle->node, HLSL_IR_SWIZZLE, type, loc);
hlsl_src_from_node(&swizzle->val, val);
- swizzle->swizzle = s;
+ swizzle->u.matrix = s;
+
return &swizzle->node;
}
@@ -1996,6 +2031,25 @@ struct hlsl_ir_node *hlsl_new_stateblock_constant(struct hlsl_ctx *ctx, const ch
return &constant->node;
}
+struct hlsl_ir_node *hlsl_new_interlocked(struct hlsl_ctx *ctx, enum hlsl_interlocked_op op, struct hlsl_type *type,
+ const struct hlsl_deref *dst, struct hlsl_ir_node *coords, struct hlsl_ir_node *cmp_value,
+ struct hlsl_ir_node *value, const struct vkd3d_shader_location *loc)
+{
+ struct hlsl_ir_interlocked *interlocked;
+
+ if (!(interlocked = hlsl_alloc(ctx, sizeof(*interlocked))))
+ return NULL;
+
+ init_node(&interlocked->node, HLSL_IR_INTERLOCKED, type, loc);
+ interlocked->op = op;
+ hlsl_copy_deref(ctx, &interlocked->dst, dst);
+ hlsl_src_from_node(&interlocked->coords, coords);
+ hlsl_src_from_node(&interlocked->cmp_value, cmp_value);
+ hlsl_src_from_node(&interlocked->value, value);
+
+ return &interlocked->node;
+}
+
bool hlsl_index_is_noncontiguous(struct hlsl_ir_index *index)
{
struct hlsl_type *type = index->val.node->data_type;
@@ -2031,7 +2085,7 @@ struct hlsl_ir_node *hlsl_new_index(struct hlsl_ctx *ctx, struct hlsl_ir_node *v
if (type->class == HLSL_CLASS_TEXTURE || type->class == HLSL_CLASS_UAV)
type = type->e.resource.format;
else if (type->class == HLSL_CLASS_MATRIX)
- type = hlsl_get_vector_type(ctx, type->e.numeric.type, type->dimx);
+ type = hlsl_get_vector_type(ctx, type->e.numeric.type, type->e.numeric.dimx);
else
type = hlsl_get_element_type_from_path_index(ctx, type, idx);
@@ -2054,8 +2108,8 @@ struct hlsl_ir_node *hlsl_new_jump(struct hlsl_ctx *ctx, enum hlsl_ir_jump_type
return &jump->node;
}
-struct hlsl_ir_node *hlsl_new_loop(struct hlsl_ctx *ctx,
- struct hlsl_block *block, enum hlsl_ir_loop_unroll_type unroll_type,
+struct hlsl_ir_node *hlsl_new_loop(struct hlsl_ctx *ctx, struct hlsl_block *iter,
+ struct hlsl_block *block, enum hlsl_loop_unroll_type unroll_type,
unsigned int unroll_limit, const struct vkd3d_shader_location *loc)
{
struct hlsl_ir_loop *loop;
@@ -2066,6 +2120,10 @@ struct hlsl_ir_node *hlsl_new_loop(struct hlsl_ctx *ctx,
hlsl_block_init(&loop->body);
hlsl_block_add_block(&loop->body, block);
+ hlsl_block_init(&loop->iter);
+ if (iter)
+ hlsl_block_add_block(&loop->iter, iter);
+
loop->unroll_type = unroll_type;
loop->unroll_limit = unroll_limit;
return &loop->node;
@@ -2221,14 +2279,21 @@ static struct hlsl_ir_node *clone_load(struct hlsl_ctx *ctx, struct clone_instr_
static struct hlsl_ir_node *clone_loop(struct hlsl_ctx *ctx, struct clone_instr_map *map, struct hlsl_ir_loop *src)
{
+ struct hlsl_block iter, body;
struct hlsl_ir_node *dst;
- struct hlsl_block body;
+
+ if (!clone_block(ctx, &iter, &src->iter, map))
+ return NULL;
if (!clone_block(ctx, &body, &src->body, map))
+ {
+ hlsl_block_cleanup(&iter);
return NULL;
+ }
- if (!(dst = hlsl_new_loop(ctx, &body, src->unroll_type, src->unroll_limit, &src->node.loc)))
+ if (!(dst = hlsl_new_loop(ctx, &iter, &body, src->unroll_type, src->unroll_limit, &src->node.loc)))
{
+ hlsl_block_cleanup(&iter);
hlsl_block_cleanup(&body);
return NULL;
}
@@ -2310,8 +2375,12 @@ static struct hlsl_ir_node *clone_store(struct hlsl_ctx *ctx, struct clone_instr
static struct hlsl_ir_node *clone_swizzle(struct hlsl_ctx *ctx,
struct clone_instr_map *map, struct hlsl_ir_swizzle *src)
{
- return hlsl_new_swizzle(ctx, src->swizzle, src->node.data_type->dimx,
- map_instr(map, src->val.node), &src->node.loc);
+ if (src->val.node->data_type->class == HLSL_CLASS_MATRIX)
+ return hlsl_new_matrix_swizzle(ctx, src->u.matrix, src->node.data_type->e.numeric.dimx,
+ map_instr(map, src->val.node), &src->node.loc);
+ else
+ return hlsl_new_swizzle(ctx, src->u.vector, src->node.data_type->e.numeric.dimx,
+ map_instr(map, src->val.node), &src->node.loc);
}
static struct hlsl_ir_node *clone_index(struct hlsl_ctx *ctx, struct clone_instr_map *map,
@@ -2325,6 +2394,27 @@ static struct hlsl_ir_node *clone_index(struct hlsl_ctx *ctx, struct clone_instr
return dst;
}
+static struct hlsl_ir_node *clone_interlocked(struct hlsl_ctx *ctx,
+ struct clone_instr_map *map, struct hlsl_ir_interlocked *src)
+{
+ struct hlsl_ir_interlocked *dst;
+
+ if (!(dst = hlsl_alloc(ctx, sizeof(*dst))))
+ return NULL;
+ init_node(&dst->node, HLSL_IR_INTERLOCKED, NULL, &src->node.loc);
+ dst->op = src->op;
+
+ if (!clone_deref(ctx, map, &dst->dst, &src->dst))
+ {
+ vkd3d_free(dst);
+ return NULL;
+ }
+ clone_src(map, &dst->coords, &src->coords);
+ clone_src(map, &dst->cmp_value, &src->cmp_value);
+ clone_src(map, &dst->value, &src->value);
+ return &dst->node;
+}
+
static struct hlsl_ir_node *clone_compile(struct hlsl_ctx *ctx,
struct clone_instr_map *map, struct hlsl_ir_compile *compile)
{
@@ -2525,6 +2615,9 @@ static struct hlsl_ir_node *clone_instr(struct hlsl_ctx *ctx,
case HLSL_IR_SWIZZLE:
return clone_swizzle(ctx, map, hlsl_ir_swizzle(instr));
+ case HLSL_IR_INTERLOCKED:
+ return clone_interlocked(ctx, map, hlsl_ir_interlocked(instr));
+
case HLSL_IR_COMPILE:
return clone_compile(ctx, map, hlsl_ir_compile(instr));
@@ -2533,9 +2626,6 @@ static struct hlsl_ir_node *clone_instr(struct hlsl_ctx *ctx,
case HLSL_IR_STATEBLOCK_CONSTANT:
return clone_stateblock_constant(ctx, map, hlsl_ir_stateblock_constant(instr));
-
- case HLSL_IR_VSIR_INSTRUCTION_REF:
- vkd3d_unreachable();
}
vkd3d_unreachable();
@@ -2693,10 +2783,8 @@ struct hlsl_ir_function_decl *hlsl_get_func_decl(struct hlsl_ctx *ctx, const cha
return NULL;
}
-struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const struct hlsl_type *type)
+static void hlsl_dump_type(struct vkd3d_string_buffer *buffer, const struct hlsl_type *type)
{
- struct vkd3d_string_buffer *string, *inner_string;
-
static const char *const base_types[] =
{
[HLSL_TYPE_FLOAT] = "float",
@@ -2720,31 +2808,29 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru
[HLSL_SAMPLER_DIM_CUBEARRAY] = "CubeArray",
};
- if (!(string = hlsl_get_string_buffer(ctx)))
- return NULL;
-
if (type->name)
{
- vkd3d_string_buffer_printf(string, "%s", type->name);
- return string;
+ vkd3d_string_buffer_printf(buffer, "%s", type->name);
+ return;
}
switch (type->class)
{
case HLSL_CLASS_SCALAR:
VKD3D_ASSERT(type->e.numeric.type < ARRAY_SIZE(base_types));
- vkd3d_string_buffer_printf(string, "%s", base_types[type->e.numeric.type]);
- return string;
+ vkd3d_string_buffer_printf(buffer, "%s", base_types[type->e.numeric.type]);
+ return;
case HLSL_CLASS_VECTOR:
VKD3D_ASSERT(type->e.numeric.type < ARRAY_SIZE(base_types));
- vkd3d_string_buffer_printf(string, "%s%u", base_types[type->e.numeric.type], type->dimx);
- return string;
+ vkd3d_string_buffer_printf(buffer, "%s%u", base_types[type->e.numeric.type], type->e.numeric.dimx);
+ return;
case HLSL_CLASS_MATRIX:
VKD3D_ASSERT(type->e.numeric.type < ARRAY_SIZE(base_types));
- vkd3d_string_buffer_printf(string, "%s%ux%u", base_types[type->e.numeric.type], type->dimy, type->dimx);
- return string;
+ vkd3d_string_buffer_printf(buffer, "%s%ux%u", base_types[type->e.numeric.type],
+ type->e.numeric.dimy, type->e.numeric.dimx);
+ return;
case HLSL_CLASS_ARRAY:
{
@@ -2753,88 +2839,85 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru
for (t = type; t->class == HLSL_CLASS_ARRAY; t = t->e.array.type)
;
- if ((inner_string = hlsl_type_to_string(ctx, t)))
- {
- vkd3d_string_buffer_printf(string, "%s", inner_string->buffer);
- hlsl_release_string_buffer(ctx, inner_string);
- }
-
+ hlsl_dump_type(buffer, t);
for (t = type; t->class == HLSL_CLASS_ARRAY; t = t->e.array.type)
{
if (t->e.array.elements_count == HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT)
- vkd3d_string_buffer_printf(string, "[]");
+ vkd3d_string_buffer_printf(buffer, "[]");
else
- vkd3d_string_buffer_printf(string, "[%u]", t->e.array.elements_count);
+ vkd3d_string_buffer_printf(buffer, "[%u]", t->e.array.elements_count);
}
- return string;
+ return;
}
case HLSL_CLASS_STRUCT:
- vkd3d_string_buffer_printf(string, "<anonymous struct>");
- return string;
+ vkd3d_string_buffer_printf(buffer, "<anonymous struct>");
+ return;
case HLSL_CLASS_TEXTURE:
if (type->sampler_dim == HLSL_SAMPLER_DIM_RAW_BUFFER)
{
- vkd3d_string_buffer_printf(string, "ByteAddressBuffer");
- return string;
+ vkd3d_string_buffer_printf(buffer, "ByteAddressBuffer");
+ return;
}
if (type->sampler_dim == HLSL_SAMPLER_DIM_GENERIC)
{
- vkd3d_string_buffer_printf(string, "Texture");
- return string;
+ vkd3d_string_buffer_printf(buffer, "Texture");
+ return;
}
VKD3D_ASSERT(hlsl_is_numeric_type(type->e.resource.format));
VKD3D_ASSERT(type->e.resource.format->e.numeric.type < ARRAY_SIZE(base_types));
if (type->sampler_dim == HLSL_SAMPLER_DIM_BUFFER)
{
- vkd3d_string_buffer_printf(string, "Buffer");
+ vkd3d_string_buffer_printf(buffer, "Buffer<");
}
else
{
VKD3D_ASSERT(type->sampler_dim < ARRAY_SIZE(dimensions));
- vkd3d_string_buffer_printf(string, "Texture%s", dimensions[type->sampler_dim]);
- }
- if ((inner_string = hlsl_type_to_string(ctx, type->e.resource.format)))
- {
- vkd3d_string_buffer_printf(string, "<%s>", inner_string->buffer);
- hlsl_release_string_buffer(ctx, inner_string);
+ vkd3d_string_buffer_printf(buffer, "Texture%s<", dimensions[type->sampler_dim]);
}
- return string;
+ hlsl_dump_type(buffer, type->e.resource.format);
+ vkd3d_string_buffer_printf(buffer, ">");
+ return;
case HLSL_CLASS_UAV:
if (type->sampler_dim == HLSL_SAMPLER_DIM_RAW_BUFFER)
{
- vkd3d_string_buffer_printf(string, "RWByteAddressBuffer");
- return string;
+ vkd3d_string_buffer_printf(buffer, "RWByteAddressBuffer");
+ return;
}
if (type->sampler_dim == HLSL_SAMPLER_DIM_BUFFER)
- vkd3d_string_buffer_printf(string, "RWBuffer");
+ vkd3d_string_buffer_printf(buffer, "RWBuffer<");
else if (type->sampler_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER)
- vkd3d_string_buffer_printf(string, "RWStructuredBuffer");
+ vkd3d_string_buffer_printf(buffer, "RWStructuredBuffer<");
else
- vkd3d_string_buffer_printf(string, "RWTexture%s", dimensions[type->sampler_dim]);
- if ((inner_string = hlsl_type_to_string(ctx, type->e.resource.format)))
- {
- vkd3d_string_buffer_printf(string, "<%s>", inner_string->buffer);
- hlsl_release_string_buffer(ctx, inner_string);
- }
- return string;
+ vkd3d_string_buffer_printf(buffer, "RWTexture%s<", dimensions[type->sampler_dim]);
+ hlsl_dump_type(buffer, type->e.resource.format);
+ vkd3d_string_buffer_printf(buffer, ">");
+ return;
case HLSL_CLASS_CONSTANT_BUFFER:
- vkd3d_string_buffer_printf(string, "ConstantBuffer");
- if ((inner_string = hlsl_type_to_string(ctx, type->e.resource.format)))
- {
- vkd3d_string_buffer_printf(string, "<%s>", inner_string->buffer);
- hlsl_release_string_buffer(ctx, inner_string);
- }
- return string;
+ vkd3d_string_buffer_printf(buffer, "ConstantBuffer<");
+ hlsl_dump_type(buffer, type->e.resource.format);
+ vkd3d_string_buffer_printf(buffer, ">");
+ return;
case HLSL_CLASS_ERROR:
- vkd3d_string_buffer_printf(string, "<error type>");
- return string;
+ vkd3d_string_buffer_printf(buffer, "<error type>");
+ return;
+
+ case HLSL_CLASS_STREAM_OUTPUT:
+ if (type->e.so.so_type == HLSL_STREAM_OUTPUT_POINT_STREAM)
+ vkd3d_string_buffer_printf(buffer, "PointStream<");
+ else if (type->e.so.so_type == HLSL_STREAM_OUTPUT_LINE_STREAM)
+ vkd3d_string_buffer_printf(buffer, "LineStream<");
+ else
+ vkd3d_string_buffer_printf(buffer, "TriangleStream<");
+ hlsl_dump_type(buffer, type->e.so.type);
+ vkd3d_string_buffer_printf(buffer, ">");
+ return;
case HLSL_CLASS_DEPTH_STENCIL_STATE:
case HLSL_CLASS_DEPTH_STENCIL_VIEW:
@@ -2857,8 +2940,17 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru
break;
}
- vkd3d_string_buffer_printf(string, "<unexpected type>");
- return string;
+ vkd3d_string_buffer_printf(buffer, "<unexpected type>");
+}
+
+struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const struct hlsl_type *type)
+{
+ struct vkd3d_string_buffer *buffer;
+
+ if (!(buffer = hlsl_get_string_buffer(ctx)))
+ return NULL;
+ hlsl_dump_type(buffer, type);
+ return buffer;
}
struct vkd3d_string_buffer *hlsl_component_to_string(struct hlsl_ctx *ctx, const struct hlsl_ir_var *var,
@@ -2964,11 +3056,11 @@ const char *hlsl_node_type_to_string(enum hlsl_ir_node_type type)
[HLSL_IR_STORE ] = "HLSL_IR_STORE",
[HLSL_IR_SWITCH ] = "HLSL_IR_SWITCH",
[HLSL_IR_SWIZZLE ] = "HLSL_IR_SWIZZLE",
+ [HLSL_IR_INTERLOCKED ] = "HLSL_IR_INTERLOCKED",
[HLSL_IR_COMPILE] = "HLSL_IR_COMPILE",
[HLSL_IR_SAMPLER_STATE] = "HLSL_IR_SAMPLER_STATE",
[HLSL_IR_STATEBLOCK_CONSTANT] = "HLSL_IR_STATEBLOCK_CONSTANT",
- [HLSL_IR_VSIR_INSTRUCTION_REF] = "HLSL_IR_VSIR_INSTRUCTION_REF",
};
if (type >= ARRAY_SIZE(names))
@@ -3022,7 +3114,8 @@ static void dump_ir_var(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer
vkd3d_string_buffer_printf(buffer, "%s ", string->buffer);
hlsl_release_string_buffer(ctx, string);
}
- vkd3d_string_buffer_printf(buffer, "%s %s", debug_hlsl_type(ctx, var->data_type), var->name);
+ hlsl_dump_type(buffer, var->data_type);
+ vkd3d_string_buffer_printf(buffer, " %s", var->name);
if (var->semantic.name)
vkd3d_string_buffer_printf(buffer, " : %s%u", var->semantic.name, var->semantic.index);
}
@@ -3103,42 +3196,36 @@ const char *debug_hlsl_swizzle(uint32_t swizzle, unsigned int size)
return vkd3d_dbg_sprintf(".%s", string);
}
-static void dump_ir_call(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, const struct hlsl_ir_call *call)
+void hlsl_dump_ir_function_decl(struct hlsl_ctx *ctx,
+ struct vkd3d_string_buffer *buffer, const struct hlsl_ir_function_decl *f)
{
- const struct hlsl_ir_function_decl *decl = call->decl;
- struct vkd3d_string_buffer *string;
size_t i;
- if (!(string = hlsl_type_to_string(ctx, decl->return_type)))
- return;
-
- vkd3d_string_buffer_printf(buffer, "call %s %s(", string->buffer, decl->func->name);
- hlsl_release_string_buffer(ctx, string);
-
- for (i = 0; i < decl->parameters.count; ++i)
+ hlsl_dump_type(buffer, f->return_type);
+ vkd3d_string_buffer_printf(buffer, " %s(", f->func->name);
+ for (i = 0; i < f->parameters.count; ++i)
{
- const struct hlsl_ir_var *param = decl->parameters.vars[i];
-
- if (!(string = hlsl_type_to_string(ctx, param->data_type)))
- return;
-
if (i)
vkd3d_string_buffer_printf(buffer, ", ");
- vkd3d_string_buffer_printf(buffer, "%s", string->buffer);
-
- hlsl_release_string_buffer(ctx, string);
+ dump_ir_var(ctx, buffer, f->parameters.vars[i]);
}
vkd3d_string_buffer_printf(buffer, ")");
}
+static void dump_ir_call(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, const struct hlsl_ir_call *call)
+{
+ vkd3d_string_buffer_printf(buffer, "call ");
+ hlsl_dump_ir_function_decl(ctx, buffer, call->decl);
+}
+
static void dump_ir_constant(struct vkd3d_string_buffer *buffer, const struct hlsl_ir_constant *constant)
{
struct hlsl_type *type = constant->node.data_type;
unsigned int x;
- if (type->dimx != 1)
+ if (type->e.numeric.dimx != 1)
vkd3d_string_buffer_printf(buffer, "{");
- for (x = 0; x < type->dimx; ++x)
+ for (x = 0; x < type->e.numeric.dimx; ++x)
{
const union hlsl_constant_value_component *value = &constant->value.u[x];
@@ -3164,12 +3251,9 @@ static void dump_ir_constant(struct vkd3d_string_buffer *buffer, const struct hl
case HLSL_TYPE_UINT:
vkd3d_string_buffer_printf(buffer, "%u ", value->u);
break;
-
- default:
- vkd3d_unreachable();
}
}
- if (type->dimx != 1)
+ if (type->e.numeric.dimx != 1)
vkd3d_string_buffer_printf(buffer, "}");
}
@@ -3198,16 +3282,15 @@ const char *debug_hlsl_expr_op(enum hlsl_ir_expr_op op)
[HLSL_OP1_F32TOF16] = "f32tof16",
[HLSL_OP1_FLOOR] = "floor",
[HLSL_OP1_FRACT] = "fract",
+ [HLSL_OP1_ISINF] = "isinf",
[HLSL_OP1_LOG2] = "log2",
[HLSL_OP1_LOGIC_NOT] = "!",
[HLSL_OP1_NEG] = "-",
- [HLSL_OP1_NRM] = "nrm",
[HLSL_OP1_RCP] = "rcp",
[HLSL_OP1_REINTERPRET] = "reinterpret",
[HLSL_OP1_ROUND] = "round",
[HLSL_OP1_RSQ] = "rsq",
[HLSL_OP1_SAT] = "sat",
- [HLSL_OP1_SIGN] = "sign",
[HLSL_OP1_SIN] = "sin",
[HLSL_OP1_SIN_REDUCED] = "sin_reduced",
[HLSL_OP1_SQRT] = "sqrt",
@@ -3217,7 +3300,6 @@ const char *debug_hlsl_expr_op(enum hlsl_ir_expr_op op)
[HLSL_OP2_BIT_AND] = "&",
[HLSL_OP2_BIT_OR] = "|",
[HLSL_OP2_BIT_XOR] = "^",
- [HLSL_OP2_CRS] = "crs",
[HLSL_OP2_DIV] = "/",
[HLSL_OP2_DOT] = "dot",
[HLSL_OP2_EQUAL] = "==",
@@ -3398,15 +3480,17 @@ static void dump_ir_swizzle(struct vkd3d_string_buffer *buffer, const struct hls
unsigned int i;
dump_src(buffer, &swizzle->val);
- if (swizzle->val.node->data_type->dimy > 1)
+ if (swizzle->val.node->data_type->e.numeric.dimy > 1)
{
vkd3d_string_buffer_printf(buffer, ".");
- for (i = 0; i < swizzle->node.data_type->dimx; ++i)
- vkd3d_string_buffer_printf(buffer, "_m%u%u", (swizzle->swizzle >> i * 8) & 0xf, (swizzle->swizzle >> (i * 8 + 4)) & 0xf);
+ for (i = 0; i < swizzle->node.data_type->e.numeric.dimx; ++i)
+ vkd3d_string_buffer_printf(buffer, "_m%u%u",
+ swizzle->u.matrix.components[i].y, swizzle->u.matrix.components[i].x);
}
else
{
- vkd3d_string_buffer_printf(buffer, "%s", debug_hlsl_swizzle(swizzle->swizzle, swizzle->node.data_type->dimx));
+ vkd3d_string_buffer_printf(buffer, "%s",
+ debug_hlsl_swizzle(swizzle->u.vector, swizzle->node.data_type->e.numeric.dimx));
}
}
@@ -3418,6 +3502,35 @@ static void dump_ir_index(struct vkd3d_string_buffer *buffer, const struct hlsl_
vkd3d_string_buffer_printf(buffer, "]");
}
+static void dump_ir_interlocked(struct vkd3d_string_buffer *buffer, const struct hlsl_ir_interlocked *interlocked)
+{
+ static const char *const op_names[] =
+ {
+ [HLSL_INTERLOCKED_ADD] = "add",
+ [HLSL_INTERLOCKED_AND] = "and",
+ [HLSL_INTERLOCKED_CMP_EXCH] = "cmp_exch",
+ [HLSL_INTERLOCKED_EXCH] = "exch",
+ [HLSL_INTERLOCKED_MAX] = "max",
+ [HLSL_INTERLOCKED_MIN] = "min",
+ [HLSL_INTERLOCKED_OR] = "or",
+ [HLSL_INTERLOCKED_XOR] = "xor",
+ };
+
+ VKD3D_ASSERT(interlocked->op < ARRAY_SIZE(op_names));
+ vkd3d_string_buffer_printf(buffer, "interlocked_%s(dst = ", op_names[interlocked->op]);
+ dump_deref(buffer, &interlocked->dst);
+ vkd3d_string_buffer_printf(buffer, ", coords = ");
+ dump_src(buffer, &interlocked->coords);
+ if (interlocked->cmp_value.node)
+ {
+ vkd3d_string_buffer_printf(buffer, ", cmp_value = ");
+ dump_src(buffer, &interlocked->cmp_value);
+ }
+ vkd3d_string_buffer_printf(buffer, ", value = ");
+ dump_src(buffer, &interlocked->value);
+ vkd3d_string_buffer_printf(buffer, ")");
+}
+
static void dump_ir_compile(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer,
const struct hlsl_ir_compile *compile)
{
@@ -3551,6 +3664,10 @@ static void dump_instr(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer,
dump_ir_swizzle(buffer, hlsl_ir_swizzle(instr));
break;
+ case HLSL_IR_INTERLOCKED:
+ dump_ir_interlocked(buffer, hlsl_ir_interlocked(instr));
+ break;
+
case HLSL_IR_COMPILE:
dump_ir_compile(ctx, buffer, hlsl_ir_compile(instr));
break;
@@ -3562,11 +3679,6 @@ static void dump_instr(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer,
case HLSL_IR_STATEBLOCK_CONSTANT:
dump_ir_stateblock_constant(buffer, hlsl_ir_stateblock_constant(instr));
break;
-
- case HLSL_IR_VSIR_INSTRUCTION_REF:
- vkd3d_string_buffer_printf(buffer, "vsir_program instruction %u",
- hlsl_ir_vsir_instruction_ref(instr)->vsir_instr_idx);
- break;
}
}
@@ -3625,10 +3737,15 @@ void hlsl_dump_var_default_values(const struct hlsl_ir_var *var)
void hlsl_replace_node(struct hlsl_ir_node *old, struct hlsl_ir_node *new)
{
+ const struct hlsl_type *old_type = old->data_type, *new_type = new->data_type;
struct hlsl_src *src, *next;
- VKD3D_ASSERT(old->data_type == new->data_type || old->data_type->dimx == new->data_type->dimx);
- VKD3D_ASSERT(old->data_type == new->data_type || old->data_type->dimy == new->data_type->dimy);
+ if (hlsl_is_numeric_type(old_type))
+ {
+ VKD3D_ASSERT(hlsl_is_numeric_type(new_type));
+ VKD3D_ASSERT(old_type->e.numeric.dimx == new_type->e.numeric.dimx);
+ VKD3D_ASSERT(old_type->e.numeric.dimy == new_type->e.numeric.dimy);
+ }
LIST_FOR_EACH_ENTRY_SAFE(src, next, &old->uses, struct hlsl_src, entry)
{
@@ -3719,6 +3836,7 @@ static void free_ir_load(struct hlsl_ir_load *load)
static void free_ir_loop(struct hlsl_ir_loop *loop)
{
hlsl_block_cleanup(&loop->body);
+ hlsl_block_cleanup(&loop->iter);
vkd3d_free(loop);
}
@@ -3778,6 +3896,15 @@ static void free_ir_index(struct hlsl_ir_index *index)
vkd3d_free(index);
}
+static void free_ir_interlocked(struct hlsl_ir_interlocked *interlocked)
+{
+ hlsl_cleanup_deref(&interlocked->dst);
+ hlsl_src_remove(&interlocked->coords);
+ hlsl_src_remove(&interlocked->cmp_value);
+ hlsl_src_remove(&interlocked->value);
+ vkd3d_free(interlocked);
+}
+
static void free_ir_compile(struct hlsl_ir_compile *compile)
{
unsigned int i;
@@ -3864,6 +3991,10 @@ void hlsl_free_instr(struct hlsl_ir_node *node)
free_ir_switch(hlsl_ir_switch(node));
break;
+ case HLSL_IR_INTERLOCKED:
+ free_ir_interlocked(hlsl_ir_interlocked(node));
+ break;
+
case HLSL_IR_COMPILE:
free_ir_compile(hlsl_ir_compile(node));
break;
@@ -3875,10 +4006,6 @@ void hlsl_free_instr(struct hlsl_ir_node *node)
case HLSL_IR_STATEBLOCK_CONSTANT:
free_ir_stateblock_constant(hlsl_ir_stateblock_constant(node));
break;
-
- case HLSL_IR_VSIR_INSTRUCTION_REF:
- vkd3d_free(hlsl_ir_vsir_instruction_ref(node));
- break;
}
}
@@ -3977,8 +4104,8 @@ void hlsl_add_function(struct hlsl_ctx *ctx, char *name, struct hlsl_ir_function
uint32_t hlsl_map_swizzle(uint32_t swizzle, unsigned int writemask)
{
+ unsigned int src_component = 0;
uint32_t ret = 0;
- unsigned int i;
/* Leave replicate swizzles alone; some instructions need them. */
if (swizzle == HLSL_SWIZZLE(X, X, X, X)
@@ -3987,13 +4114,10 @@ uint32_t hlsl_map_swizzle(uint32_t swizzle, unsigned int writemask)
|| swizzle == HLSL_SWIZZLE(W, W, W, W))
return swizzle;
- for (i = 0; i < 4; ++i)
+ for (unsigned int dst_component = 0; dst_component < 4; ++dst_component)
{
- if (writemask & (1 << i))
- {
- ret |= (swizzle & 3) << (i * 2);
- swizzle >>= 2;
- }
+ if (writemask & (1 << dst_component))
+ hlsl_swizzle_set_component(&ret, dst_component, hlsl_swizzle_get_component(swizzle, src_component++));
}
return ret;
}
@@ -4046,7 +4170,7 @@ uint32_t hlsl_combine_swizzles(uint32_t first, uint32_t second, unsigned int dim
for (i = 0; i < dim; ++i)
{
unsigned int s = hlsl_swizzle_get_component(second, i);
- ret |= hlsl_swizzle_get_component(first, s) << HLSL_SWIZZLE_SHIFT(i);
+ hlsl_swizzle_set_component(&ret, i, hlsl_swizzle_get_component(first, s));
}
return ret;
}
@@ -4304,7 +4428,7 @@ static void declare_predefined_types(struct hlsl_ctx *ctx)
}
ctx->builtin_types.Void = hlsl_new_simple_type(ctx, "void", HLSL_CLASS_VOID);
- ctx->builtin_types.null = hlsl_new_type(ctx, "NULL", HLSL_CLASS_NULL, HLSL_TYPE_UINT, 1, 1);
+ ctx->builtin_types.null = hlsl_new_simple_type(ctx, "NULL", HLSL_CLASS_NULL);
ctx->builtin_types.string = hlsl_new_simple_type(ctx, "string", HLSL_CLASS_STRING);
ctx->builtin_types.error = hlsl_new_simple_type(ctx, "<error type>", HLSL_CLASS_ERROR);
hlsl_scope_add_type(ctx->globals, ctx->builtin_types.string);
diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h
index 075c76cb0e2..e9845f8f887 100644
--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.h
+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h
@@ -22,7 +22,6 @@
#include "vkd3d_shader_private.h"
#include "wine/rbtree.h"
-#include "d3dcommon.h"
#include "d3dx9shader.h"
/* The general IR structure is inspired by Mesa GLSL hir, even though the code
@@ -51,31 +50,17 @@
* DEALINGS IN THE SOFTWARE.
*/
-#define HLSL_SWIZZLE_X (0u)
-#define HLSL_SWIZZLE_Y (1u)
-#define HLSL_SWIZZLE_Z (2u)
-#define HLSL_SWIZZLE_W (3u)
-
-#define HLSL_SWIZZLE(x, y, z, w) \
- (((HLSL_SWIZZLE_ ## x) << 0) \
- | ((HLSL_SWIZZLE_ ## y) << 2) \
- | ((HLSL_SWIZZLE_ ## z) << 4) \
- | ((HLSL_SWIZZLE_ ## w) << 6))
-
-#define HLSL_SWIZZLE_MASK (0x3u)
-#define HLSL_SWIZZLE_SHIFT(idx) (2u * (idx))
+#define HLSL_SWIZZLE VKD3D_SHADER_SWIZZLE
static inline unsigned int hlsl_swizzle_get_component(uint32_t swizzle, unsigned int idx)
{
- return (swizzle >> HLSL_SWIZZLE_SHIFT(idx)) & HLSL_SWIZZLE_MASK;
+ return vsir_swizzle_get_component(swizzle, idx);
}
-static inline uint32_t vsir_swizzle_from_hlsl(uint32_t swizzle)
+static inline void hlsl_swizzle_set_component(uint32_t *swizzle, unsigned int idx, unsigned int component)
{
- return vkd3d_shader_create_swizzle(hlsl_swizzle_get_component(swizzle, 0),
- hlsl_swizzle_get_component(swizzle, 1),
- hlsl_swizzle_get_component(swizzle, 2),
- hlsl_swizzle_get_component(swizzle, 3));
+ *swizzle &= ~(VKD3D_SHADER_SWIZZLE_MASK << VKD3D_SHADER_SWIZZLE_SHIFT(idx));
+ *swizzle |= component << VKD3D_SHADER_SWIZZLE_SHIFT(idx);
}
enum hlsl_type_class
@@ -105,6 +90,7 @@ enum hlsl_type_class
HLSL_CLASS_GEOMETRY_SHADER,
HLSL_CLASS_CONSTANT_BUFFER,
HLSL_CLASS_BLEND_STATE,
+ HLSL_CLASS_STREAM_OUTPUT,
HLSL_CLASS_VOID,
HLSL_CLASS_NULL,
HLSL_CLASS_ERROR,
@@ -142,6 +128,13 @@ enum hlsl_sampler_dim
/* NOTE: Remember to update object_methods[] in hlsl.y if this enum is modified. */
};
+enum hlsl_so_object_type
+{
+ HLSL_STREAM_OUTPUT_POINT_STREAM,
+ HLSL_STREAM_OUTPUT_LINE_STREAM,
+ HLSL_STREAM_OUTPUT_TRIANGLE_STREAM,
+};
+
enum hlsl_regset
{
HLSL_REGSET_SAMPLERS,
@@ -176,16 +169,6 @@ struct hlsl_type
* Modifiers that don't fall inside this mask are to be stored in the variable in
* hlsl_ir_var.modifiers, or in the struct field in hlsl_ir_field.modifiers. */
uint32_t modifiers;
- /* Size of the type values on each dimension. For non-numeric types, they are set for the
- * convenience of the sm1/sm4 backends.
- * If type is HLSL_CLASS_SCALAR, then both dimx = 1 and dimy = 1.
- * If type is HLSL_CLASS_VECTOR, then dimx is the size of the vector, and dimy = 1.
- * If type is HLSL_CLASS_MATRIX, then dimx is the number of columns, and dimy the number of rows.
- * If type is HLSL_CLASS_ARRAY, then dimx and dimy have the same value as in the type of the array elements.
- * If type is HLSL_CLASS_STRUCT, then dimx is the sum of (dimx * dimy) of every component, and dimy = 1.
- */
- unsigned int dimx;
- unsigned int dimy;
/* Sample count for HLSL_SAMPLER_DIM_2DMS or HLSL_SAMPLER_DIM_2DMSARRAY. */
unsigned int sample_count;
@@ -195,6 +178,10 @@ struct hlsl_type
struct
{
enum hlsl_base_type type;
+ /* For scalars, dimx == dimy == 1.
+ * For vectors, dimx == vector width; dimy == 1.
+ * For matrices, dimx == column count; dimy == row count. */
+ unsigned int dimx, dimy;
} numeric;
/* Additional information if type is HLSL_CLASS_STRUCT. */
struct
@@ -220,6 +207,12 @@ struct hlsl_type
} resource;
/* Additional field to distinguish object types. Currently used only for technique types. */
unsigned int version;
+ /* Additional information if type is HLSL_CLASS_STREAM_OUTPUT. */
+ struct
+ {
+ struct hlsl_type *type;
+ enum hlsl_so_object_type so_type;
+ } so;
} e;
/* Number of numeric register components used by one value of this type, for each regset.
@@ -326,12 +319,11 @@ enum hlsl_ir_node_type
HLSL_IR_STORE,
HLSL_IR_SWIZZLE,
HLSL_IR_SWITCH,
+ HLSL_IR_INTERLOCKED,
HLSL_IR_COMPILE,
HLSL_IR_SAMPLER_STATE,
HLSL_IR_STATEBLOCK_CONSTANT,
-
- HLSL_IR_VSIR_INSTRUCTION_REF,
};
/* Common data for every type of IR instruction node. */
@@ -524,6 +516,10 @@ struct hlsl_ir_var
* element of a struct, and thus needs to be aligned when packed in the signature. */
bool force_align;
+ /* Whether this is a sampler that was created from the combination of a
+ * sampler and a texture for SM<4 backwards compatibility. */
+ bool is_combined_sampler;
+
uint32_t is_input_semantic : 1;
uint32_t is_output_semantic : 1;
uint32_t is_uniform : 1;
@@ -644,21 +640,30 @@ struct hlsl_ir_if
struct hlsl_block else_block;
};
-enum hlsl_ir_loop_unroll_type
+enum hlsl_loop_unroll_type
{
- HLSL_IR_LOOP_UNROLL,
- HLSL_IR_LOOP_FORCE_UNROLL,
- HLSL_IR_LOOP_FORCE_LOOP
+ HLSL_LOOP_UNROLL,
+ HLSL_LOOP_FORCE_UNROLL,
+ HLSL_LOOP_FORCE_LOOP
+};
+
+enum hlsl_loop_type
+{
+ HLSL_LOOP_FOR,
+ HLSL_LOOP_WHILE,
+ HLSL_LOOP_DO_WHILE
};
struct hlsl_ir_loop
{
struct hlsl_ir_node node;
+ struct hlsl_block iter;
/* loop condition is stored in the body (as "if (!condition) break;") */
struct hlsl_block body;
+ enum hlsl_loop_type type;
unsigned int next_index; /* liveness index of the end of the loop */
unsigned int unroll_limit;
- enum hlsl_ir_loop_unroll_type unroll_type;
+ enum hlsl_loop_unroll_type unroll_type;
};
struct hlsl_ir_switch_case
@@ -700,16 +705,15 @@ enum hlsl_ir_expr_op
HLSL_OP1_F32TOF16,
HLSL_OP1_FLOOR,
HLSL_OP1_FRACT,
+ HLSL_OP1_ISINF,
HLSL_OP1_LOG2,
HLSL_OP1_LOGIC_NOT,
HLSL_OP1_NEG,
- HLSL_OP1_NRM,
HLSL_OP1_RCP,
HLSL_OP1_REINTERPRET,
HLSL_OP1_ROUND,
HLSL_OP1_RSQ,
HLSL_OP1_SAT,
- HLSL_OP1_SIGN,
HLSL_OP1_SIN,
HLSL_OP1_SIN_REDUCED, /* Reduced range [-pi, pi], writes to .y */
HLSL_OP1_SQRT,
@@ -719,7 +723,6 @@ enum hlsl_ir_expr_op
HLSL_OP2_BIT_AND,
HLSL_OP2_BIT_OR,
HLSL_OP2_BIT_XOR,
- HLSL_OP2_CRS,
HLSL_OP2_DIV,
HLSL_OP2_DOT,
HLSL_OP2_EQUAL,
@@ -781,7 +784,17 @@ struct hlsl_ir_swizzle
{
struct hlsl_ir_node node;
struct hlsl_src val;
- uint32_t swizzle;
+ union
+ {
+ uint32_t vector;
+ struct hlsl_matrix_swizzle
+ {
+ struct
+ {
+ uint8_t x, y;
+ } components[4];
+ } matrix;
+ } u;
};
struct hlsl_ir_index
@@ -844,6 +857,10 @@ enum hlsl_resource_load_type
HLSL_RESOURCE_GATHER_GREEN,
HLSL_RESOURCE_GATHER_BLUE,
HLSL_RESOURCE_GATHER_ALPHA,
+ HLSL_RESOURCE_GATHER_CMP_RED,
+ HLSL_RESOURCE_GATHER_CMP_GREEN,
+ HLSL_RESOURCE_GATHER_CMP_BLUE,
+ HLSL_RESOURCE_GATHER_CMP_ALPHA,
HLSL_RESOURCE_SAMPLE_INFO,
HLSL_RESOURCE_RESINFO,
};
@@ -934,14 +951,30 @@ struct hlsl_ir_stateblock_constant
char *name;
};
-/* A vkd3d_shader_instruction that can be inserted in a hlsl_block.
- * Only used for the HLSL IR to vsir translation, might be removed once this translation is complete. */
-struct hlsl_ir_vsir_instruction_ref
+enum hlsl_interlocked_op
{
- struct hlsl_ir_node node;
+ HLSL_INTERLOCKED_ADD,
+ HLSL_INTERLOCKED_AND,
+ HLSL_INTERLOCKED_CMP_EXCH,
+ HLSL_INTERLOCKED_EXCH,
+ HLSL_INTERLOCKED_MAX,
+ HLSL_INTERLOCKED_MIN,
+ HLSL_INTERLOCKED_OR,
+ HLSL_INTERLOCKED_XOR,
+};
- /* Index to a vkd3d_shader_instruction within a vkd3d_shader_instruction_array in a vsir_program. */
- unsigned int vsir_instr_idx;
+/* Represents an interlocked operation.
+ *
+ * The data_type of the node indicates whether or not the original value is returned.
+ * If the original value is not returned, the data_type is set to NULL.
+ * Otherwise, the data_type is set to the type of the original value.
+ */
+struct hlsl_ir_interlocked
+{
+ struct hlsl_ir_node node;
+ enum hlsl_interlocked_op op;
+ struct hlsl_deref dst;
+ struct hlsl_src coords, cmp_value, value;
};
struct hlsl_scope
@@ -1241,6 +1274,12 @@ static inline struct hlsl_ir_switch *hlsl_ir_switch(const struct hlsl_ir_node *n
return CONTAINING_RECORD(node, struct hlsl_ir_switch, node);
}
+static inline struct hlsl_ir_interlocked *hlsl_ir_interlocked(const struct hlsl_ir_node *node)
+{
+ VKD3D_ASSERT(node->type == HLSL_IR_INTERLOCKED);
+ return CONTAINING_RECORD(node, struct hlsl_ir_interlocked, node);
+}
+
static inline struct hlsl_ir_compile *hlsl_ir_compile(const struct hlsl_ir_node *node)
{
VKD3D_ASSERT(node->type == HLSL_IR_COMPILE);
@@ -1259,12 +1298,6 @@ static inline struct hlsl_ir_stateblock_constant *hlsl_ir_stateblock_constant(co
return CONTAINING_RECORD(node, struct hlsl_ir_stateblock_constant, node);
}
-static inline struct hlsl_ir_vsir_instruction_ref *hlsl_ir_vsir_instruction_ref(const struct hlsl_ir_node *node)
-{
- VKD3D_ASSERT(node->type == HLSL_IR_VSIR_INSTRUCTION_REF);
- return CONTAINING_RECORD(node, struct hlsl_ir_vsir_instruction_ref, node);
-}
-
static inline void hlsl_block_init(struct hlsl_block *block)
{
list_init(&block->instrs);
@@ -1442,6 +1475,8 @@ void hlsl_block_cleanup(struct hlsl_block *block);
bool hlsl_clone_block(struct hlsl_ctx *ctx, struct hlsl_block *dst_block, const struct hlsl_block *src_block);
void hlsl_dump_function(struct hlsl_ctx *ctx, const struct hlsl_ir_function_decl *func);
+void hlsl_dump_ir_function_decl(struct hlsl_ctx *ctx,
+ struct vkd3d_string_buffer *buffer, const struct hlsl_ir_function_decl *f);
void hlsl_dump_var_default_values(const struct hlsl_ir_var *var);
bool hlsl_state_block_add_entry(struct hlsl_state_block *state_block,
@@ -1519,6 +1554,8 @@ struct hlsl_ir_node *hlsl_new_if(struct hlsl_ctx *ctx, struct hlsl_ir_node *cond
struct hlsl_ir_node *hlsl_new_int_constant(struct hlsl_ctx *ctx, int32_t n, const struct vkd3d_shader_location *loc);
struct hlsl_ir_node *hlsl_new_jump(struct hlsl_ctx *ctx,
enum hlsl_ir_jump_type type, struct hlsl_ir_node *condition, const struct vkd3d_shader_location *loc);
+struct hlsl_type *hlsl_new_stream_output_type(struct hlsl_ctx *ctx,
+ enum hlsl_so_object_type so_type, struct hlsl_type *type);
struct hlsl_ir_node *hlsl_new_ternary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_expr_op op,
struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, struct hlsl_ir_node *arg3);
@@ -1550,8 +1587,14 @@ struct hlsl_ir_node *hlsl_new_compile(struct hlsl_ctx *ctx, enum hlsl_compile_ty
struct hlsl_block *args_instrs, const struct vkd3d_shader_location *loc);
struct hlsl_ir_node *hlsl_new_index(struct hlsl_ctx *ctx, struct hlsl_ir_node *val,
struct hlsl_ir_node *idx, const struct vkd3d_shader_location *loc);
-struct hlsl_ir_node *hlsl_new_loop(struct hlsl_ctx *ctx,
- struct hlsl_block *block, enum hlsl_ir_loop_unroll_type unroll_type, unsigned int unroll_limit, const struct vkd3d_shader_location *loc);
+struct hlsl_ir_node *hlsl_new_interlocked(struct hlsl_ctx *ctx, enum hlsl_interlocked_op op, struct hlsl_type *type,
+ const struct hlsl_deref *dst, struct hlsl_ir_node *coords, struct hlsl_ir_node *cmp_value,
+ struct hlsl_ir_node *value, const struct vkd3d_shader_location *loc);
+struct hlsl_ir_node *hlsl_new_loop(struct hlsl_ctx *ctx, struct hlsl_block *iter,
+ struct hlsl_block *block, enum hlsl_loop_unroll_type unroll_type,
+ unsigned int unroll_limit, const struct vkd3d_shader_location *loc);
+struct hlsl_ir_node *hlsl_new_matrix_swizzle(struct hlsl_ctx *ctx, struct hlsl_matrix_swizzle s,
+ unsigned int width, struct hlsl_ir_node *val, const struct vkd3d_shader_location *loc);
struct hlsl_ir_node *hlsl_new_resource_load(struct hlsl_ctx *ctx,
const struct hlsl_resource_load_params *params, const struct vkd3d_shader_location *loc);
struct hlsl_ir_node *hlsl_new_resource_store(struct hlsl_ctx *ctx, const struct hlsl_deref *resource,
@@ -1588,9 +1631,6 @@ struct hlsl_ir_switch_case *hlsl_new_switch_case(struct hlsl_ctx *ctx, unsigned
struct hlsl_ir_node *hlsl_new_switch(struct hlsl_ctx *ctx, struct hlsl_ir_node *selector,
struct list *cases, const struct vkd3d_shader_location *loc);
-struct hlsl_ir_node *hlsl_new_vsir_instruction_ref(struct hlsl_ctx *ctx, unsigned int vsir_instr_idx,
- struct hlsl_type *type, const struct hlsl_reg *reg, const struct vkd3d_shader_location *loc);
-
void hlsl_error(struct hlsl_ctx *ctx, const struct vkd3d_shader_location *loc,
enum vkd3d_shader_error error, const char *fmt, ...) VKD3D_PRINTF_FUNC(4, 5);
void hlsl_fixme(struct hlsl_ctx *ctx, const struct vkd3d_shader_location *loc,
@@ -1645,24 +1685,35 @@ struct hlsl_reg hlsl_reg_from_deref(struct hlsl_ctx *ctx, const struct hlsl_dere
bool hlsl_copy_propagation_execute(struct hlsl_ctx *ctx, struct hlsl_block *block);
bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context);
bool hlsl_fold_constant_identities(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context);
+bool hlsl_normalize_binary_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context);
bool hlsl_fold_constant_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context);
bool hlsl_transform_ir(struct hlsl_ctx *ctx, bool (*func)(struct hlsl_ctx *ctx, struct hlsl_ir_node *, void *),
struct hlsl_block *block, void *context);
D3DXPARAMETER_CLASS hlsl_sm1_class(const struct hlsl_type *type);
-D3DXPARAMETER_TYPE hlsl_sm1_base_type(const struct hlsl_type *type);
+D3DXPARAMETER_TYPE hlsl_sm1_base_type(const struct hlsl_type *type, bool is_combined_sampler);
+
+struct extern_resource
+{
+ /* "var" is only not NULL if this resource is a whole variable, so it may
+ * be responsible for more than one component. */
+ const struct hlsl_ir_var *var;
+ const struct hlsl_buffer *buffer;
-void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer);
-int d3dbc_compile(struct vsir_program *program, uint64_t config_flags,
- const struct vkd3d_shader_compile_info *compile_info, const struct vkd3d_shader_code *ctab,
- struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context);
+ char *name;
+ bool is_user_packed;
+
+ /* The data type of a single component of the resource. This might be
+ * different from the data type of the resource itself in 4.0 profiles,
+ * where an array (or multi-dimensional array) is handled as a single
+ * resource, unlike in 5.0. */
+ struct hlsl_type *component_type;
-int tpf_compile(struct vsir_program *program, uint64_t config_flags,
- struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context,
- struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func);
+ enum hlsl_regset regset;
+ unsigned int id, space, index, bind_count;
-enum vkd3d_shader_interpolation_mode sm4_get_interpolation_mode(struct hlsl_type *type,
- unsigned int storage_modifiers);
+ struct vkd3d_shader_location loc;
+};
struct hlsl_ir_function_decl *hlsl_compile_internal_function(struct hlsl_ctx *ctx, const char *name, const char *hlsl);
diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.l b/libs/vkd3d/libs/vkd3d-shader/hlsl.l
index 8dace11916a..31fb30521e9 100644
--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.l
+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.l
@@ -104,6 +104,7 @@ if {return KW_IF; }
in {return KW_IN; }
inline {return KW_INLINE; }
inout {return KW_INOUT; }
+LineStream {return KW_LINESTREAM; }
linear {return KW_LINEAR; }
matrix {return KW_MATRIX; }
namespace {return KW_NAMESPACE; }
@@ -114,6 +115,7 @@ out {return KW_OUT; }
packoffset {return KW_PACKOFFSET; }
pass {return KW_PASS; }
PixelShader {return KW_PIXELSHADER; }
+PointStream {return KW_POINTSTREAM; }
pixelshader {return KW_PIXELSHADER; }
RasterizerOrderedBuffer {return KW_RASTERIZERORDEREDBUFFER; }
RasterizerOrderedStructuredBuffer {return KW_RASTERIZERORDEREDSTRUCTUREDBUFFER; }
@@ -170,6 +172,7 @@ texture3D {return KW_TEXTURE3D; }
TextureCube {return KW_TEXTURECUBE; }
textureCUBE {return KW_TEXTURECUBE; }
TextureCubeArray {return KW_TEXTURECUBEARRAY; }
+TriangleStream {return KW_TRIANGLESTREAM; }
true {return KW_TRUE; }
typedef {return KW_TYPEDEF; }
unsigned {return KW_UNSIGNED; }
diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.y b/libs/vkd3d/libs/vkd3d-shader/hlsl.y
index 60aade732db..da2f482b148 100644
--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.y
+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.y
@@ -247,18 +247,19 @@ static bool type_contains_only_numerics(const struct hlsl_type *type)
static bool explicit_compatible_data_types(struct hlsl_ctx *ctx, struct hlsl_type *src, struct hlsl_type *dst)
{
- if (hlsl_is_numeric_type(src) && src->dimx == 1 && src->dimy == 1 && type_contains_only_numerics(dst))
+ if (hlsl_is_numeric_type(src) && src->e.numeric.dimx == 1 && src->e.numeric.dimy == 1
+ && type_contains_only_numerics(dst))
return true;
if (src->class == HLSL_CLASS_MATRIX && dst->class == HLSL_CLASS_MATRIX
- && src->dimx >= dst->dimx && src->dimy >= dst->dimy)
+ && src->e.numeric.dimx >= dst->e.numeric.dimx && src->e.numeric.dimy >= dst->e.numeric.dimy)
return true;
- if ((src->class == HLSL_CLASS_MATRIX && src->dimx > 1 && src->dimy > 1)
+ if ((src->class == HLSL_CLASS_MATRIX && src->e.numeric.dimx > 1 && src->e.numeric.dimy > 1)
&& hlsl_type_component_count(src) != hlsl_type_component_count(dst))
return false;
- if ((dst->class == HLSL_CLASS_MATRIX && dst->dimy > 1)
+ if ((dst->class == HLSL_CLASS_MATRIX && dst->e.numeric.dimy > 1)
&& hlsl_type_component_count(src) != hlsl_type_component_count(dst))
return false;
@@ -273,16 +274,16 @@ static bool implicit_compatible_data_types(struct hlsl_ctx *ctx, struct hlsl_typ
if (hlsl_is_numeric_type(src))
{
/* Scalar vars can be converted to any other numeric data type */
- if (src->dimx == 1 && src->dimy == 1)
+ if (src->e.numeric.dimx == 1 && src->e.numeric.dimy == 1)
return true;
/* The other way around is true too */
- if (dst->dimx == 1 && dst->dimy == 1)
+ if (dst->e.numeric.dimx == 1 && dst->e.numeric.dimy == 1)
return true;
if (src->class == HLSL_CLASS_MATRIX || dst->class == HLSL_CLASS_MATRIX)
{
if (src->class == HLSL_CLASS_MATRIX && dst->class == HLSL_CLASS_MATRIX)
- return src->dimx >= dst->dimx && src->dimy >= dst->dimy;
+ return src->e.numeric.dimx >= dst->e.numeric.dimx && src->e.numeric.dimy >= dst->e.numeric.dimy;
/* Matrix-vector conversion is apparently allowed if they have
* the same components count, or if the matrix is 1xN or Nx1
@@ -292,8 +293,8 @@ static bool implicit_compatible_data_types(struct hlsl_ctx *ctx, struct hlsl_typ
if (hlsl_type_component_count(src) == hlsl_type_component_count(dst))
return true;
- if ((src->class == HLSL_CLASS_VECTOR || src->dimx == 1 || src->dimy == 1) &&
- (dst->class == HLSL_CLASS_VECTOR || dst->dimx == 1 || dst->dimy == 1))
+ if ((src->class == HLSL_CLASS_VECTOR || src->e.numeric.dimx == 1 || src->e.numeric.dimy == 1)
+ && (dst->class == HLSL_CLASS_VECTOR || dst->e.numeric.dimx == 1 || dst->e.numeric.dimy == 1))
return hlsl_type_component_count(src) >= hlsl_type_component_count(dst);
}
@@ -301,7 +302,7 @@ static bool implicit_compatible_data_types(struct hlsl_ctx *ctx, struct hlsl_typ
}
else
{
- return src->dimx >= dst->dimx;
+ return src->e.numeric.dimx >= dst->e.numeric.dimx;
}
}
@@ -335,7 +336,7 @@ static void check_condition_type(struct hlsl_ctx *ctx, const struct hlsl_ir_node
if (type->class == HLSL_CLASS_ERROR)
return;
- if (type->class > HLSL_CLASS_LAST_NUMERIC || type->dimx > 1 || type->dimy > 1)
+ if (type->class > HLSL_CLASS_LAST_NUMERIC || type->e.numeric.dimx > 1 || type->e.numeric.dimy > 1)
{
struct vkd3d_string_buffer *string;
@@ -368,14 +369,14 @@ static struct hlsl_ir_node *add_cast(struct hlsl_ctx *ctx, struct hlsl_block *bl
struct hlsl_ir_var *var;
unsigned int dst_idx;
- broadcast = hlsl_is_numeric_type(src_type) && src_type->dimx == 1 && src_type->dimy == 1;
+ broadcast = hlsl_is_numeric_type(src_type) && src_type->e.numeric.dimx == 1 && src_type->e.numeric.dimy == 1;
matrix_cast = !broadcast && dst_comp_count != src_comp_count
&& src_type->class == HLSL_CLASS_MATRIX && dst_type->class == HLSL_CLASS_MATRIX;
VKD3D_ASSERT(src_comp_count >= dst_comp_count || broadcast);
if (matrix_cast)
{
- VKD3D_ASSERT(dst_type->dimx <= src_type->dimx);
- VKD3D_ASSERT(dst_type->dimy <= src_type->dimy);
+ VKD3D_ASSERT(dst_type->e.numeric.dimx <= src_type->e.numeric.dimx);
+ VKD3D_ASSERT(dst_type->e.numeric.dimy <= src_type->e.numeric.dimy);
}
if (!(var = hlsl_new_synthetic_var(ctx, "cast", dst_type, loc)))
@@ -395,9 +396,9 @@ static struct hlsl_ir_node *add_cast(struct hlsl_ctx *ctx, struct hlsl_block *bl
}
else if (matrix_cast)
{
- unsigned int x = dst_idx % dst_type->dimx, y = dst_idx / dst_type->dimx;
+ unsigned int x = dst_idx % dst_type->e.numeric.dimx, y = dst_idx / dst_type->e.numeric.dimx;
- src_idx = y * src_type->dimx + x;
+ src_idx = y * src_type->e.numeric.dimx + x;
}
else
{
@@ -458,7 +459,9 @@ static struct hlsl_ir_node *add_implicit_conversion(struct hlsl_ctx *ctx, struct
return NULL;
}
- if (dst_type->dimx * dst_type->dimy < src_type->dimx * src_type->dimy && ctx->warn_implicit_truncation)
+ if (hlsl_is_numeric_type(dst_type) && hlsl_is_numeric_type(src_type)
+ && dst_type->e.numeric.dimx * dst_type->e.numeric.dimy < src_type->e.numeric.dimx * src_type->e.numeric.dimy
+ && ctx->warn_implicit_truncation)
hlsl_warning(ctx, loc, VKD3D_SHADER_WARNING_HLSL_IMPLICIT_TRUNCATION, "Implicit truncation of %s type.",
src_type->class == HLSL_CLASS_VECTOR ? "vector" : "matrix");
@@ -475,7 +478,11 @@ static bool add_explicit_conversion(struct hlsl_ctx *ctx, struct hlsl_block *blo
for (i = 0; i < arrays->count; ++i)
{
if (arrays->sizes[i] == HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT)
+ {
hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Implicit size arrays not allowed in casts.");
+ dst_type = ctx->builtin_types.error;
+ break;
+ }
dst_type = hlsl_new_array_type(ctx, dst_type, arrays->sizes[i]);
}
@@ -551,13 +558,6 @@ static bool append_conditional_break(struct hlsl_ctx *ctx, struct hlsl_block *co
return true;
}
-enum loop_type
-{
- LOOP_FOR,
- LOOP_WHILE,
- LOOP_DO_WHILE
-};
-
static void check_attribute_list_for_duplicates(struct hlsl_ctx *ctx, const struct parse_attribute_list *attrs)
{
unsigned int i, j;
@@ -573,8 +573,8 @@ static void check_attribute_list_for_duplicates(struct hlsl_ctx *ctx, const stru
}
}
-static void resolve_loop_continue(struct hlsl_ctx *ctx, struct hlsl_block *block, enum loop_type type,
- struct hlsl_block *cond, struct hlsl_block *iter)
+static void resolve_loop_continue(struct hlsl_ctx *ctx, struct hlsl_block *block,
+ enum hlsl_loop_type type, struct hlsl_block *cond)
{
struct hlsl_ir_node *instr, *next;
@@ -584,8 +584,8 @@ static void resolve_loop_continue(struct hlsl_ctx *ctx, struct hlsl_block *block
{
struct hlsl_ir_if *iff = hlsl_ir_if(instr);
- resolve_loop_continue(ctx, &iff->then_block, type, cond, iter);
- resolve_loop_continue(ctx, &iff->else_block, type, cond, iter);
+ resolve_loop_continue(ctx, &iff->then_block, type, cond);
+ resolve_loop_continue(ctx, &iff->else_block, type, cond);
}
else if (instr->type == HLSL_IR_JUMP)
{
@@ -595,7 +595,7 @@ static void resolve_loop_continue(struct hlsl_ctx *ctx, struct hlsl_block *block
if (jump->type != HLSL_IR_JUMP_UNRESOLVED_CONTINUE)
continue;
- if (type == LOOP_DO_WHILE)
+ if (type == HLSL_LOOP_DO_WHILE)
{
if (!hlsl_clone_block(ctx, &cond_block, cond))
return;
@@ -606,13 +606,6 @@ static void resolve_loop_continue(struct hlsl_ctx *ctx, struct hlsl_block *block
}
list_move_before(&instr->entry, &cond_block.instrs);
}
- else if (type == LOOP_FOR)
- {
- if (!hlsl_clone_block(ctx, &cond_block, iter))
- return;
- list_move_before(&instr->entry, &cond_block.instrs);
- }
- jump->type = HLSL_IR_JUMP_CONTINUE;
}
}
}
@@ -674,12 +667,11 @@ static struct hlsl_default_value evaluate_static_expression(struct hlsl_ctx *ctx
case HLSL_IR_RESOURCE_LOAD:
case HLSL_IR_RESOURCE_STORE:
case HLSL_IR_SWITCH:
+ case HLSL_IR_INTERLOCKED:
case HLSL_IR_STATEBLOCK_CONSTANT:
hlsl_error(ctx, &node->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX,
"Expected literal expression.");
break;
- case HLSL_IR_VSIR_INSTRUCTION_REF:
- vkd3d_unreachable();
}
}
@@ -738,11 +730,11 @@ static unsigned int evaluate_static_expression_as_uint(struct hlsl_ctx *ctx, str
return res.number.u;
}
-static struct hlsl_block *create_loop(struct hlsl_ctx *ctx, enum loop_type type,
+static struct hlsl_block *create_loop(struct hlsl_ctx *ctx, enum hlsl_loop_type type,
const struct parse_attribute_list *attributes, struct hlsl_block *init, struct hlsl_block *cond,
struct hlsl_block *iter, struct hlsl_block *body, const struct vkd3d_shader_location *loc)
{
- enum hlsl_ir_loop_unroll_type unroll_type = HLSL_IR_LOOP_UNROLL;
+ enum hlsl_loop_unroll_type unroll_type = HLSL_LOOP_UNROLL;
unsigned int i, unroll_limit = 0;
struct hlsl_ir_node *loop;
@@ -773,11 +765,11 @@ static struct hlsl_block *create_loop(struct hlsl_ctx *ctx, enum loop_type type,
hlsl_block_cleanup(&expr);
}
- unroll_type = HLSL_IR_LOOP_FORCE_UNROLL;
+ unroll_type = HLSL_LOOP_FORCE_UNROLL;
}
else if (!strcmp(attr->name, "loop"))
{
- unroll_type = HLSL_IR_LOOP_FORCE_LOOP;
+ unroll_type = HLSL_LOOP_FORCE_LOOP;
}
else if (!strcmp(attr->name, "fastopt")
|| !strcmp(attr->name, "allow_uav_condition"))
@@ -790,7 +782,7 @@ static struct hlsl_block *create_loop(struct hlsl_ctx *ctx, enum loop_type type,
}
}
- resolve_loop_continue(ctx, body, type, cond, iter);
+ resolve_loop_continue(ctx, body, type, cond);
if (!init && !(init = make_empty_block(ctx)))
goto oom;
@@ -798,15 +790,12 @@ static struct hlsl_block *create_loop(struct hlsl_ctx *ctx, enum loop_type type,
if (!append_conditional_break(ctx, cond))
goto oom;
- if (iter)
- hlsl_block_add_block(body, iter);
-
- if (type == LOOP_DO_WHILE)
+ if (type == HLSL_LOOP_DO_WHILE)
list_move_tail(&body->instrs, &cond->instrs);
else
list_move_head(&body->instrs, &cond->instrs);
- if (!(loop = hlsl_new_loop(ctx, body, unroll_type, unroll_limit, loc)))
+ if (!(loop = hlsl_new_loop(ctx, iter, body, unroll_type, unroll_limit, loc)))
goto oom;
hlsl_block_add_instr(init, loop);
@@ -860,6 +849,7 @@ static struct hlsl_ir_node *get_swizzle(struct hlsl_ctx *ctx, struct hlsl_ir_nod
if (value->data_type->class == HLSL_CLASS_MATRIX)
{
/* Matrix swizzle */
+ struct hlsl_matrix_swizzle s;
bool m_swizzle;
unsigned int inc, x, y;
@@ -888,12 +878,13 @@ static struct hlsl_ir_node *get_swizzle(struct hlsl_ctx *ctx, struct hlsl_ir_nod
x = swizzle[i + 2] - '1';
}
- if (x >= value->data_type->dimx || y >= value->data_type->dimy)
+ if (x >= value->data_type->e.numeric.dimx || y >= value->data_type->e.numeric.dimy)
return NULL;
- swiz |= (y << 4 | x) << component * 8;
+ s.components[component].x = x;
+ s.components[component].y = y;
component++;
}
- return hlsl_new_swizzle(ctx, swiz, component, value, loc);
+ return hlsl_new_matrix_swizzle(ctx, s, component, value, loc);
}
/* Vector swizzle */
@@ -920,10 +911,9 @@ static struct hlsl_ir_node *get_swizzle(struct hlsl_ctx *ctx, struct hlsl_ir_nod
break;
}
- if (s >= value->data_type->dimx)
+ if (s >= value->data_type->e.numeric.dimx)
return NULL;
- swiz |= s << component * 2;
- component++;
+ hlsl_swizzle_set_component(&swiz, component++, s);
}
if (valid)
return hlsl_new_swizzle(ctx, swiz, component, value, loc);
@@ -1035,7 +1025,7 @@ static bool add_array_access(struct hlsl_ctx *ctx, struct hlsl_block *block, str
{
unsigned int dim_count = hlsl_sampler_dim_count(expr_type->sampler_dim);
- if (index_type->class > HLSL_CLASS_VECTOR || index_type->dimx != dim_count)
+ if (index_type->class > HLSL_CLASS_VECTOR || index_type->e.numeric.dimx != dim_count)
{
struct vkd3d_string_buffer *string;
@@ -1192,6 +1182,8 @@ static bool gen_struct_fields(struct hlsl_ctx *ctx, struct parse_fields *fields,
{
hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE,
"Implicit size arrays not allowed in struct fields.");
+ field->type = ctx->builtin_types.error;
+ break;
}
field->type = hlsl_new_array_type(ctx, field->type, v->arrays.sizes[k]);
@@ -1282,6 +1274,12 @@ static bool add_typedef(struct hlsl_ctx *ctx, struct hlsl_type *const orig_type,
{
hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE,
"Implicit size arrays not allowed in typedefs.");
+ if (!(type = hlsl_type_clone(ctx, ctx->builtin_types.error, 0, 0)))
+ {
+ free_parse_variable_def(v);
+ ret = false;
+ }
+ break;
}
if (!(type = hlsl_new_array_type(ctx, type, v->arrays.sizes[i])))
@@ -1325,6 +1323,11 @@ static bool add_func_parameter(struct hlsl_ctx *ctx, struct hlsl_func_parameters
hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER,
"Parameter '%s' is declared as both \"out\" and \"uniform\".", param->name);
+ if ((param->modifiers & HLSL_STORAGE_OUT) && !(param->modifiers & HLSL_STORAGE_IN)
+ && (param->type->modifiers & HLSL_MODIFIER_CONST))
+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER,
+ "Parameter '%s' is declared as both \"out\" and \"const\".", param->name);
+
if (param->reg_reservation.offset_type)
hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION,
"packoffset() is not allowed on function parameters.");
@@ -1580,7 +1583,7 @@ static struct hlsl_block *make_block(struct hlsl_ctx *ctx, struct hlsl_ir_node *
static bool expr_compatible_data_types(struct hlsl_type *t1, struct hlsl_type *t2)
{
/* Scalar vars can be converted to pretty much everything */
- if ((t1->dimx == 1 && t1->dimy == 1) || (t2->dimx == 1 && t2->dimy == 1))
+ if ((t1->e.numeric.dimx == 1 && t1->e.numeric.dimy == 1) || (t2->e.numeric.dimx == 1 && t2->e.numeric.dimy == 1))
return true;
if (t1->class == HLSL_CLASS_VECTOR && t2->class == HLSL_CLASS_VECTOR)
@@ -1595,13 +1598,13 @@ static bool expr_compatible_data_types(struct hlsl_type *t1, struct hlsl_type *t
if (hlsl_type_component_count(t1) == hlsl_type_component_count(t2))
return true;
- return (t1->class == HLSL_CLASS_MATRIX && (t1->dimx == 1 || t1->dimy == 1))
- || (t2->class == HLSL_CLASS_MATRIX && (t2->dimx == 1 || t2->dimy == 1));
+ return (t1->class == HLSL_CLASS_MATRIX && (t1->e.numeric.dimx == 1 || t1->e.numeric.dimy == 1))
+ || (t2->class == HLSL_CLASS_MATRIX && (t2->e.numeric.dimx == 1 || t2->e.numeric.dimy == 1));
}
/* Both matrices */
- if ((t1->dimx >= t2->dimx && t1->dimy >= t2->dimy)
- || (t1->dimx <= t2->dimx && t1->dimy <= t2->dimy))
+ if ((t1->e.numeric.dimx >= t2->e.numeric.dimx && t1->e.numeric.dimy >= t2->e.numeric.dimy)
+ || (t1->e.numeric.dimx <= t2->e.numeric.dimx && t1->e.numeric.dimy <= t2->e.numeric.dimy))
return true;
}
@@ -1661,37 +1664,37 @@ static bool expr_common_shape(struct hlsl_ctx *ctx, struct hlsl_type *t1, struct
return false;
}
- if (t1->dimx == 1 && t1->dimy == 1)
+ if (t1->e.numeric.dimx == 1 && t1->e.numeric.dimy == 1)
{
*type = t2->class;
- *dimx = t2->dimx;
- *dimy = t2->dimy;
+ *dimx = t2->e.numeric.dimx;
+ *dimy = t2->e.numeric.dimy;
}
- else if (t2->dimx == 1 && t2->dimy == 1)
+ else if (t2->e.numeric.dimx == 1 && t2->e.numeric.dimy == 1)
{
*type = t1->class;
- *dimx = t1->dimx;
- *dimy = t1->dimy;
+ *dimx = t1->e.numeric.dimx;
+ *dimy = t1->e.numeric.dimy;
}
else if (t1->class == HLSL_CLASS_MATRIX && t2->class == HLSL_CLASS_MATRIX)
{
*type = HLSL_CLASS_MATRIX;
- *dimx = min(t1->dimx, t2->dimx);
- *dimy = min(t1->dimy, t2->dimy);
+ *dimx = min(t1->e.numeric.dimx, t2->e.numeric.dimx);
+ *dimy = min(t1->e.numeric.dimy, t2->e.numeric.dimy);
}
else
{
- if (t1->dimx * t1->dimy <= t2->dimx * t2->dimy)
+ if (t1->e.numeric.dimx * t1->e.numeric.dimy <= t2->e.numeric.dimx * t2->e.numeric.dimy)
{
*type = t1->class;
- *dimx = t1->dimx;
- *dimy = t1->dimy;
+ *dimx = t1->e.numeric.dimx;
+ *dimy = t1->e.numeric.dimy;
}
else
{
*type = t2->class;
- *dimx = t2->dimx;
- *dimy = t2->dimy;
+ *dimx = t2->e.numeric.dimx;
+ *dimy = t2->e.numeric.dimy;
}
}
@@ -1719,7 +1722,7 @@ static struct hlsl_ir_node *add_expr(struct hlsl_ctx *ctx, struct hlsl_block *bl
return NULL;
hlsl_init_simple_deref_from_var(&var_deref, var);
- for (i = 0; i < type->dimy * type->dimx; ++i)
+ for (i = 0; i < type->e.numeric.dimy * type->e.numeric.dimx; ++i)
{
struct hlsl_ir_node *value, *cell_operands[HLSL_MAX_OPERANDS] = { NULL };
struct hlsl_block store_block;
@@ -1822,7 +1825,7 @@ static struct hlsl_ir_node *add_unary_logical_expr(struct hlsl_ctx *ctx, struct
return arg;
bool_type = hlsl_get_numeric_type(ctx, arg->data_type->class, HLSL_TYPE_BOOL,
- arg->data_type->dimx, arg->data_type->dimy);
+ arg->data_type->e.numeric.dimx, arg->data_type->e.numeric.dimy);
if (!(args[0] = add_implicit_conversion(ctx, block, arg, bool_type, loc)))
return NULL;
@@ -1985,11 +1988,11 @@ static struct hlsl_ir_node *add_binary_dot_expr(struct hlsl_ctx *ctx, struct hls
}
if (arg1->data_type->class == HLSL_CLASS_SCALAR)
- dim = arg2->data_type->dimx;
+ dim = arg2->data_type->e.numeric.dimx;
else if (arg2->data_type->class == HLSL_CLASS_SCALAR)
- dim = arg1->data_type->dimx;
+ dim = arg1->data_type->e.numeric.dimx;
else
- dim = min(arg1->data_type->dimx, arg2->data_type->dimx);
+ dim = min(arg1->data_type->e.numeric.dimx, arg2->data_type->e.numeric.dimx);
if (dim == 1)
op = HLSL_OP2_MUL;
@@ -2092,8 +2095,8 @@ static bool invert_swizzle(uint32_t *swizzle, unsigned int *writemask, unsigned
{
if (*writemask & (1 << i))
{
- unsigned int s = (*swizzle >> (i * 2)) & 3;
- new_swizzle |= s << (bit++ * 2);
+ unsigned int s = hlsl_swizzle_get_component(*swizzle, i);
+ hlsl_swizzle_set_component(&new_swizzle, bit++, s);
if (new_writemask & (1 << s))
return false;
new_writemask |= 1 << s;
@@ -2107,9 +2110,9 @@ static bool invert_swizzle(uint32_t *swizzle, unsigned int *writemask, unsigned
{
for (j = 0; j < width; ++j)
{
- unsigned int s = (new_swizzle >> (j * 2)) & 3;
+ unsigned int s = hlsl_swizzle_get_component(new_swizzle, j);
if (s == i)
- inverted |= j << (bit++ * 2);
+ hlsl_swizzle_set_component(&inverted, bit++, j);
}
}
@@ -2119,22 +2122,22 @@ static bool invert_swizzle(uint32_t *swizzle, unsigned int *writemask, unsigned
return true;
}
-static bool invert_swizzle_matrix(uint32_t *swizzle, unsigned int *writemask, unsigned int *ret_width)
+static bool invert_swizzle_matrix(const struct hlsl_matrix_swizzle *swizzle,
+ uint32_t *ret_inverted, unsigned int *writemask, unsigned int *ret_width)
{
- /* swizzle is 8 bits per component, each component is (from LSB) 4 bits X, then 4 bits Y.
- * components are indexed by their sources. i.e. the first component comes from the first
- * component of the rhs. */
- unsigned int i, j, bit = 0, inverted = 0, width, new_writemask = 0, new_swizzle = 0;
+ unsigned int i, j, bit = 0, inverted = 0, width, new_writemask = 0;
+ struct hlsl_matrix_swizzle new_swizzle = {0};
/* First, we filter the swizzle to remove components that aren't enabled by writemask. */
for (i = 0; i < 4; ++i)
{
if (*writemask & (1 << i))
{
- unsigned int s = (*swizzle >> (i * 8)) & 0xff;
- unsigned int x = s & 0xf, y = (s >> 4) & 0xf;
+ unsigned int x = swizzle->components[i].x;
+ unsigned int y = swizzle->components[i].y;
unsigned int idx = x + y * 4;
- new_swizzle |= s << (bit++ * 8);
+
+ new_swizzle.components[bit++] = swizzle->components[i];
if (new_writemask & (1 << idx))
return false;
new_writemask |= 1 << idx;
@@ -2142,22 +2145,22 @@ static bool invert_swizzle_matrix(uint32_t *swizzle, unsigned int *writemask, un
}
width = bit;
- /* Then we invert the swizzle. The resulting swizzle has 2 bits per component, because it's for the
- * incoming vector. */
+ /* Then we invert the swizzle. The resulting swizzle uses a uint32_t
+ * vector format, because it's for the incoming vector. */
bit = 0;
for (i = 0; i < 16; ++i)
{
for (j = 0; j < width; ++j)
{
- unsigned int s = (new_swizzle >> (j * 8)) & 0xff;
- unsigned int x = s & 0xf, y = (s >> 4) & 0xf;
+ unsigned int x = new_swizzle.components[j].x;
+ unsigned int y = new_swizzle.components[j].y;
unsigned int idx = x + y * 4;
if (idx == i)
- inverted |= j << (bit++ * 2);
+ hlsl_swizzle_set_component(&inverted, bit++, j);
}
}
- *swizzle = inverted;
+ *ret_inverted = inverted;
*writemask = new_writemask;
*ret_width = width;
return true;
@@ -2193,8 +2196,8 @@ static bool add_assignment(struct hlsl_ctx *ctx, struct hlsl_block *block, struc
if (hlsl_is_numeric_type(lhs_type))
{
- writemask = (1 << lhs_type->dimx) - 1;
- width = lhs_type->dimx;
+ writemask = (1 << lhs_type->e.numeric.dimx) - 1;
+ width = lhs_type->e.numeric.dimx;
}
if (!(rhs = add_implicit_conversion(ctx, block, rhs, lhs_type, &rhs->loc)))
@@ -2211,28 +2214,34 @@ static bool add_assignment(struct hlsl_ctx *ctx, struct hlsl_block *block, struc
{
struct hlsl_ir_swizzle *swizzle = hlsl_ir_swizzle(lhs);
struct hlsl_ir_node *new_swizzle;
- uint32_t s = swizzle->swizzle;
+ uint32_t s;
VKD3D_ASSERT(!matrix_writemask);
if (swizzle->val.node->data_type->class == HLSL_CLASS_MATRIX)
{
+ struct hlsl_matrix_swizzle ms = swizzle->u.matrix;
+
if (swizzle->val.node->type != HLSL_IR_LOAD && swizzle->val.node->type != HLSL_IR_INDEX)
{
hlsl_fixme(ctx, &lhs->loc, "Unhandled source of matrix swizzle.");
return false;
}
- if (!invert_swizzle_matrix(&s, &writemask, &width))
+ if (!invert_swizzle_matrix(&ms, &s, &writemask, &width))
{
hlsl_error(ctx, &lhs->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_WRITEMASK, "Invalid writemask for matrix.");
return false;
}
matrix_writemask = true;
}
- else if (!invert_swizzle(&s, &writemask, &width))
+ else
{
- hlsl_error(ctx, &lhs->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_WRITEMASK, "Invalid writemask.");
- return false;
+ s = swizzle->u.vector;
+ if (!invert_swizzle(&s, &writemask, &width))
+ {
+ hlsl_error(ctx, &lhs->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_WRITEMASK, "Invalid writemask.");
+ return false;
+ }
}
if (!(new_swizzle = hlsl_new_swizzle(ctx, s, width, rhs, &swizzle->node.loc)))
@@ -2275,13 +2284,13 @@ static bool add_assignment(struct hlsl_ctx *ctx, struct hlsl_block *block, struc
dim_count = hlsl_sampler_dim_count(resource_type->sampler_dim);
- if (width != resource_type->e.resource.format->dimx * resource_type->e.resource.format->dimy)
+ if (width != resource_type->e.resource.format->e.numeric.dimx * resource_type->e.resource.format->e.numeric.dimy)
hlsl_error(ctx, &lhs->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_WRITEMASK,
"Resource store expressions must write to all components.");
VKD3D_ASSERT(coords->data_type->class == HLSL_CLASS_VECTOR);
VKD3D_ASSERT(coords->data_type->e.numeric.type == HLSL_TYPE_UINT);
- VKD3D_ASSERT(coords->data_type->dimx == dim_count);
+ VKD3D_ASSERT(coords->data_type->e.numeric.dimx == dim_count);
if (!(store = hlsl_new_resource_store(ctx, &resource_deref, coords, rhs, &lhs->loc)))
{
@@ -2298,14 +2307,14 @@ static bool add_assignment(struct hlsl_ctx *ctx, struct hlsl_block *block, struc
hlsl_init_deref_from_index_chain(ctx, &deref, lhs);
- for (i = 0; i < lhs->data_type->dimy; ++i)
+ for (i = 0; i < lhs->data_type->e.numeric.dimy; ++i)
{
- for (j = 0; j < lhs->data_type->dimx; ++j)
+ for (j = 0; j < lhs->data_type->e.numeric.dimx; ++j)
{
struct hlsl_ir_node *load;
struct hlsl_block store_block;
const unsigned int idx = i * 4 + j;
- const unsigned int component = i * lhs->data_type->dimx + j;
+ const unsigned int component = i * lhs->data_type->e.numeric.dimx + j;
if (!(writemask & (1 << idx)))
continue;
@@ -2335,7 +2344,7 @@ static bool add_assignment(struct hlsl_ctx *ctx, struct hlsl_block *block, struc
VKD3D_ASSERT(!matrix_writemask);
- for (i = 0; i < mat->data_type->dimx; ++i)
+ for (i = 0; i < mat->data_type->e.numeric.dimx; ++i)
{
struct hlsl_ir_node *cell, *load, *store, *c;
struct hlsl_deref deref;
@@ -2670,26 +2679,30 @@ static void declare_var(struct hlsl_ctx *ctx, struct parse_variable_def *v)
{
hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE,
"Only innermost array size can be implicit.");
- v->initializer.args_count = 0;
+ type = ctx->builtin_types.error;
+ break;
}
else if (elem_components == 0)
{
hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE,
"Cannot declare an implicit size array of a size 0 type.");
- v->initializer.args_count = 0;
+ type = ctx->builtin_types.error;
+ break;
}
else if (size == 0)
{
hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE,
"Implicit size arrays need to be initialized.");
- v->initializer.args_count = 0;
+ type = ctx->builtin_types.error;
+ break;
}
else if (size % elem_components != 0)
{
hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT,
"Cannot initialize implicit size array with %u components, expected a multiple of %u.",
size, elem_components);
- v->initializer.args_count = 0;
+ type = ctx->builtin_types.error;
+ break;
}
else
{
@@ -2908,7 +2921,8 @@ static struct hlsl_block *initialize_vars(struct hlsl_ctx *ctx, struct list *var
v->initializer.args[0] = node_from_block(v->initializer.instrs);
}
- initialize_var(ctx, var, &v->initializer, is_default_values_initializer);
+ if (var->data_type->class != HLSL_CLASS_ERROR)
+ initialize_var(ctx, var, &v->initializer, is_default_values_initializer);
if (is_default_values_initializer)
{
@@ -2993,13 +3007,137 @@ static bool func_is_compatible_match(struct hlsl_ctx *ctx, const struct hlsl_ir_
return true;
}
+static enum hlsl_base_type hlsl_base_type_class(enum hlsl_base_type t)
+{
+ switch (t)
+ {
+ case HLSL_TYPE_HALF:
+ case HLSL_TYPE_FLOAT:
+ case HLSL_TYPE_DOUBLE:
+ return HLSL_TYPE_FLOAT;
+
+ case HLSL_TYPE_INT:
+ case HLSL_TYPE_UINT:
+ return HLSL_TYPE_INT;
+
+ case HLSL_TYPE_BOOL:
+ return HLSL_TYPE_BOOL;
+ }
+
+ return 0;
+}
+
+static unsigned int hlsl_base_type_width(enum hlsl_base_type t)
+{
+ switch (t)
+ {
+ case HLSL_TYPE_HALF:
+ return 16;
+
+ case HLSL_TYPE_FLOAT:
+ case HLSL_TYPE_INT:
+ case HLSL_TYPE_UINT:
+ case HLSL_TYPE_BOOL:
+ return 32;
+
+ case HLSL_TYPE_DOUBLE:
+ return 64;
+ }
+
+ return 0;
+}
+
+static int function_parameter_compare(const struct hlsl_ir_var *candidate,
+ const struct hlsl_ir_var *ref, const struct hlsl_ir_node *arg)
+{
+ struct
+ {
+ enum hlsl_base_type type;
+ enum hlsl_base_type class;
+ unsigned int count, width;
+ } c, r, a;
+ int ret;
+
+ /* TODO: Non-numeric types. */
+ if (!hlsl_is_numeric_type(arg->data_type))
+ return 0;
+
+ c.type = candidate->data_type->e.numeric.type;
+ c.class = hlsl_base_type_class(c.type);
+ c.count = hlsl_type_component_count(candidate->data_type);
+ c.width = hlsl_base_type_width(c.type);
+
+ r.type = ref->data_type->e.numeric.type;
+ r.class = hlsl_base_type_class(r.type);
+ r.count = hlsl_type_component_count(ref->data_type);
+ r.width = hlsl_base_type_width(r.type);
+
+ a.type = arg->data_type->e.numeric.type;
+ a.class = hlsl_base_type_class(a.type);
+ a.count = hlsl_type_component_count(arg->data_type);
+ a.width = hlsl_base_type_width(a.type);
+
+ /* Prefer candidates without component count narrowing. E.g., given an
+ * float4 argument, half4 is a better match than float2. */
+ if ((ret = (a.count > r.count) - (a.count > c.count)))
+ return ret;
+
+ /* Prefer candidates with matching component type classes. E.g., given a
+ * float argument, double is a better match than int. */
+ if ((ret = (a.class == c.class) - (a.class == r.class)))
+ return ret;
+
+ /* Prefer candidates with matching component types. E.g., given an int
+ * argument, int4 is a better match than uint4. */
+ if ((ret = (a.type == c.type) - (a.type == r.type)))
+ return ret;
+
+ /* Prefer candidates without component type narrowing. E.g., given a float
+ * argument, double is a better match than half. */
+ if ((ret = (a.width > r.width) - (a.width > c.width)))
+ return ret;
+
+ /* Prefer candidates without component count widening. E.g. given a float
+ * argument, float is a better match than float2. */
+ return (a.count < r.count) - (a.count < c.count);
+}
+
+static int function_compare(const struct hlsl_ir_function_decl *candidate,
+ const struct hlsl_ir_function_decl *ref, const struct parse_initializer *args)
+{
+ bool any_worse = false, any_better = false;
+ unsigned int i;
+ int ret;
+
+ for (i = 0; i < args->args_count; ++i)
+ {
+ ret = function_parameter_compare(candidate->parameters.vars[i], ref->parameters.vars[i], args->args[i]);
+ if (ret < 0)
+ any_worse = true;
+ else if (ret > 0)
+ any_better = true;
+ }
+
+ /* We consider a candidate better if at least one parameter is a better
+ * match, and none are a worse match. */
+ return any_better - any_worse;
+}
+
static struct hlsl_ir_function_decl *find_function_call(struct hlsl_ctx *ctx,
const char *name, const struct parse_initializer *args, bool is_compile,
const struct vkd3d_shader_location *loc)
{
- struct hlsl_ir_function_decl *decl, *compatible_match = NULL;
+ struct hlsl_ir_function_decl *decl;
+ struct vkd3d_string_buffer *s;
struct hlsl_ir_function *func;
struct rb_entry *entry;
+ int compare;
+ size_t i;
+ struct
+ {
+ struct hlsl_ir_function_decl **candidates;
+ size_t count, capacity;
+ } candidates = {0};
if (!(entry = rb_get(&ctx->functions, name)))
return NULL;
@@ -3007,18 +3145,58 @@ static struct hlsl_ir_function_decl *find_function_call(struct hlsl_ctx *ctx,
LIST_FOR_EACH_ENTRY(decl, &func->overloads, struct hlsl_ir_function_decl, entry)
{
- if (func_is_compatible_match(ctx, decl, is_compile, args))
+ if (!func_is_compatible_match(ctx, decl, is_compile, args))
+ continue;
+
+ if (candidates.count)
{
- if (compatible_match)
+ compare = function_compare(decl, candidates.candidates[0], args);
+
+ /* The candidate is worse; skip it. */
+ if (compare < 0)
+ continue;
+
+ /* The candidate is better; replace the current candidates. */
+ if (compare > 0)
{
- hlsl_fixme(ctx, loc, "Prioritize between multiple compatible function overloads.");
- break;
+ candidates.candidates[0] = decl;
+ candidates.count = 1;
+ continue;
+ }
+ }
+
+ if (!(hlsl_array_reserve(ctx, (void **)&candidates.candidates,
+ &candidates.capacity, candidates.count + 1, sizeof(decl))))
+ {
+ vkd3d_free(candidates.candidates);
+ return NULL;
+ }
+ candidates.candidates[candidates.count++] = decl;
+ }
+
+ if (!candidates.count)
+ return NULL;
+
+ if (candidates.count > 1)
+ {
+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_AMBIGUOUS_CALL, "Ambiguous function call.");
+ if ((s = hlsl_get_string_buffer(ctx)))
+ {
+ hlsl_note(ctx, loc, VKD3D_SHADER_LOG_ERROR, "Candidates are:");
+ for (i = 0; i < candidates.count; ++i)
+ {
+ hlsl_dump_ir_function_decl(ctx, s, candidates.candidates[i]);
+ hlsl_note(ctx, loc, VKD3D_SHADER_LOG_ERROR, " %s;", s->buffer);
+ vkd3d_string_buffer_clear(s);
}
- compatible_match = decl;
+ hlsl_release_string_buffer(ctx, s);
}
}
- return compatible_match;
+ decl = candidates.candidates[0];
+ vkd3d_free(candidates.candidates);
+
+ return decl;
}
static struct hlsl_ir_node *hlsl_new_void_expr(struct hlsl_ctx *ctx, const struct vkd3d_shader_location *loc)
@@ -3164,7 +3342,7 @@ static struct hlsl_ir_node *intrinsic_float_convert_arg(struct hlsl_ctx *ctx,
if (!type_is_integer(type->e.numeric.type))
return arg;
- type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_FLOAT, type->dimx, type->dimy);
+ type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_FLOAT, type->e.numeric.dimx, type->e.numeric.dimy);
return add_implicit_conversion(ctx, params->instrs, arg, type, loc);
}
@@ -3203,13 +3381,13 @@ static struct hlsl_type *elementwise_intrinsic_get_common_type(struct hlsl_ctx *
if (arg_type->class == HLSL_CLASS_VECTOR)
{
vectors = true;
- dimx = min(dimx, arg_type->dimx);
+ dimx = min(dimx, arg_type->e.numeric.dimx);
}
else if (arg_type->class == HLSL_CLASS_MATRIX)
{
matrices = true;
- dimx = min(dimx, arg_type->dimx);
- dimy = min(dimy, arg_type->dimy);
+ dimx = min(dimx, arg_type->e.numeric.dimx);
+ dimy = min(dimy, arg_type->e.numeric.dimy);
}
}
@@ -3254,7 +3432,7 @@ static bool elementwise_intrinsic_float_convert_args(struct hlsl_ctx *ctx,
if (!(type = elementwise_intrinsic_get_common_type(ctx, params, loc)))
return false;
if (type_is_integer(type->e.numeric.type))
- type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_FLOAT, type->dimx, type->dimy);
+ type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_FLOAT, type->e.numeric.dimx, type->e.numeric.dimy);
return convert_args(ctx, params, type, loc);
}
@@ -3267,7 +3445,7 @@ static bool elementwise_intrinsic_uint_convert_args(struct hlsl_ctx *ctx,
if (!(type = elementwise_intrinsic_get_common_type(ctx, params, loc)))
return false;
- type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_UINT, type->dimx, type->dimy);
+ type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_UINT, type->e.numeric.dimx, type->e.numeric.dimy);
return convert_args(ctx, params, type, loc);
}
@@ -3334,7 +3512,7 @@ static bool intrinsic_acos(struct hlsl_ctx *ctx,
static struct hlsl_type *convert_numeric_type(const struct hlsl_ctx *ctx,
const struct hlsl_type *type, enum hlsl_base_type base_type)
{
- return hlsl_get_numeric_type(ctx, type->class, base_type, type->dimx, type->dimy);
+ return hlsl_get_numeric_type(ctx, type->class, base_type, type->e.numeric.dimx, type->e.numeric.dimy);
}
static bool add_combine_components(struct hlsl_ctx *ctx, const struct parse_initializer *params,
@@ -3855,7 +4033,7 @@ static bool intrinsic_determinant(struct hlsl_ctx *ctx,
if (!(arg = intrinsic_float_convert_arg(ctx, params, arg, loc)))
return false;
- dim = min(type->dimx, type->dimy);
+ dim = min(type->e.numeric.dimx, type->e.numeric.dimy);
if (dim == 1)
return hlsl_add_load_component(ctx, params->instrs, arg, 0, loc);
@@ -3939,7 +4117,7 @@ static bool intrinsic_dst(struct hlsl_ctx *ctx, const struct parse_initializer *
return false;
type = params->args[0]->data_type;
if (!(type->class == HLSL_CLASS_SCALAR
- || (type->class == HLSL_CLASS_VECTOR && type->dimx == 4)))
+ || (type->class == HLSL_CLASS_VECTOR && type->e.numeric.dimx == 4)))
{
struct vkd3d_string_buffer *string;
if ((string = hlsl_type_to_string(ctx, type)))
@@ -4141,6 +4319,19 @@ static bool intrinsic_fwidth(struct hlsl_ctx *ctx,
return !!add_user_call(ctx, func, params, false, loc);
}
+static bool intrinsic_isinf(struct hlsl_ctx *ctx,
+ const struct parse_initializer *params, const struct vkd3d_shader_location *loc)
+{
+ struct hlsl_type *type = params->args[0]->data_type, *bool_type;
+ struct hlsl_ir_node *args[HLSL_MAX_OPERANDS] = {0};
+
+ bool_type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_BOOL,
+ type->e.numeric.dimx, type->e.numeric.dimy);
+
+ args[0] = params->args[0];
+ return !!add_expr(ctx, params->instrs, HLSL_OP1_ISINF, args, bool_type, loc);
+}
+
static bool intrinsic_ldexp(struct hlsl_ctx *ctx,
const struct parse_initializer *params, const struct vkd3d_shader_location *loc)
{
@@ -4371,15 +4562,15 @@ static bool intrinsic_mul(struct hlsl_ctx *ctx,
if (arg1->data_type->class == HLSL_CLASS_VECTOR)
{
vect_count++;
- cast_type1 = hlsl_get_matrix_type(ctx, base, arg1->data_type->dimx, 1);
+ cast_type1 = hlsl_get_matrix_type(ctx, base, arg1->data_type->e.numeric.dimx, 1);
}
if (arg2->data_type->class == HLSL_CLASS_VECTOR)
{
vect_count++;
- cast_type2 = hlsl_get_matrix_type(ctx, base, 1, arg2->data_type->dimx);
+ cast_type2 = hlsl_get_matrix_type(ctx, base, 1, arg2->data_type->e.numeric.dimx);
}
- matrix_type = hlsl_get_matrix_type(ctx, base, cast_type2->dimx, cast_type1->dimy);
+ matrix_type = hlsl_get_matrix_type(ctx, base, cast_type2->e.numeric.dimx, cast_type1->e.numeric.dimy);
if (vect_count == 0)
{
@@ -4387,12 +4578,12 @@ static bool intrinsic_mul(struct hlsl_ctx *ctx,
}
else if (vect_count == 1)
{
- VKD3D_ASSERT(matrix_type->dimx == 1 || matrix_type->dimy == 1);
- ret_type = hlsl_get_vector_type(ctx, base, matrix_type->dimx * matrix_type->dimy);
+ VKD3D_ASSERT(matrix_type->e.numeric.dimx == 1 || matrix_type->e.numeric.dimy == 1);
+ ret_type = hlsl_get_vector_type(ctx, base, matrix_type->e.numeric.dimx * matrix_type->e.numeric.dimy);
}
else
{
- VKD3D_ASSERT(matrix_type->dimx == 1 && matrix_type->dimy == 1);
+ VKD3D_ASSERT(matrix_type->e.numeric.dimx == 1 && matrix_type->e.numeric.dimy == 1);
ret_type = hlsl_get_scalar_type(ctx, base);
}
@@ -4406,23 +4597,23 @@ static bool intrinsic_mul(struct hlsl_ctx *ctx,
return false;
hlsl_init_simple_deref_from_var(&var_deref, var);
- for (i = 0; i < matrix_type->dimx; ++i)
+ for (i = 0; i < matrix_type->e.numeric.dimx; ++i)
{
- for (j = 0; j < matrix_type->dimy; ++j)
+ for (j = 0; j < matrix_type->e.numeric.dimy; ++j)
{
struct hlsl_ir_node *instr = NULL;
struct hlsl_block block;
- for (k = 0; k < cast_type1->dimx && k < cast_type2->dimy; ++k)
+ for (k = 0; k < cast_type1->e.numeric.dimx && k < cast_type2->e.numeric.dimy; ++k)
{
struct hlsl_ir_node *value1, *value2, *mul;
if (!(value1 = hlsl_add_load_component(ctx, params->instrs,
- cast1, j * cast1->data_type->dimx + k, loc)))
+ cast1, j * cast1->data_type->e.numeric.dimx + k, loc)))
return false;
if (!(value2 = hlsl_add_load_component(ctx, params->instrs,
- cast2, k * cast2->data_type->dimx + i, loc)))
+ cast2, k * cast2->data_type->e.numeric.dimx + i, loc)))
return false;
if (!(mul = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, value1, value2, loc)))
@@ -4439,7 +4630,7 @@ static bool intrinsic_mul(struct hlsl_ctx *ctx,
}
}
- if (!hlsl_new_store_component(ctx, &block, &var_deref, j * matrix_type->dimx + i, instr))
+ if (!hlsl_new_store_component(ctx, &block, &var_deref, j * matrix_type->e.numeric.dimx + i, instr))
return false;
hlsl_block_add_block(params->instrs, &block);
}
@@ -4632,7 +4823,7 @@ static bool intrinsic_sign(struct hlsl_ctx *ctx,
static const struct hlsl_constant_value zero_value;
struct hlsl_type *int_type = hlsl_get_numeric_type(ctx, arg->data_type->class, HLSL_TYPE_INT,
- arg->data_type->dimx, arg->data_type->dimy);
+ arg->data_type->e.numeric.dimx, arg->data_type->e.numeric.dimy);
if (!(zero = hlsl_new_constant(ctx, hlsl_get_scalar_type(ctx, arg->data_type->e.numeric.type), &zero_value, loc)))
return false;
@@ -5086,22 +5277,23 @@ static bool intrinsic_transpose(struct hlsl_ctx *ctx,
return true;
}
- mat_type = hlsl_get_matrix_type(ctx, arg_type->e.numeric.type, arg_type->dimy, arg_type->dimx);
+ mat_type = hlsl_get_matrix_type(ctx, arg_type->e.numeric.type, arg_type->e.numeric.dimy, arg_type->e.numeric.dimx);
if (!(var = hlsl_new_synthetic_var(ctx, "transpose", mat_type, loc)))
return false;
hlsl_init_simple_deref_from_var(&var_deref, var);
- for (i = 0; i < arg_type->dimx; ++i)
+ for (i = 0; i < arg_type->e.numeric.dimx; ++i)
{
- for (j = 0; j < arg_type->dimy; ++j)
+ for (j = 0; j < arg_type->e.numeric.dimy; ++j)
{
struct hlsl_block block;
- if (!(load = hlsl_add_load_component(ctx, params->instrs, arg, j * arg->data_type->dimx + i, loc)))
+ if (!(load = hlsl_add_load_component(ctx, params->instrs, arg,
+ j * arg->data_type->e.numeric.dimx + i, loc)))
return false;
- if (!hlsl_new_store_component(ctx, &block, &var_deref, i * var->data_type->dimx + j, load))
+ if (!hlsl_new_store_component(ctx, &block, &var_deref, i * var->data_type->e.numeric.dimx + j, load))
return false;
hlsl_block_add_block(params->instrs, &block);
}
@@ -5131,7 +5323,8 @@ static bool intrinsic_d3dcolor_to_ubyte4(struct hlsl_ctx *ctx,
struct hlsl_ir_node *arg = params->args[0], *ret, *c, *swizzle;
struct hlsl_type *arg_type = arg->data_type;
- if (arg_type->class != HLSL_CLASS_SCALAR && !(arg_type->class == HLSL_CLASS_VECTOR && arg_type->dimx == 4))
+ if (arg_type->class != HLSL_CLASS_SCALAR && !(arg_type->class == HLSL_CLASS_VECTOR
+ && arg_type->e.numeric.dimx == 4))
{
struct vkd3d_string_buffer *string;
@@ -5187,6 +5380,185 @@ static bool intrinsic_GetRenderTargetSampleCount(struct hlsl_ctx *ctx,
return true;
}
+static bool intrinsic_interlocked(struct hlsl_ctx *ctx, enum hlsl_interlocked_op op,
+ const struct parse_initializer *params, const struct vkd3d_shader_location *loc, const char *name)
+{
+ struct hlsl_ir_node *lhs, *coords, *val, *cmp_val = NULL, *orig_val = NULL;
+ struct hlsl_ir_node *interlocked, *void_ret;
+ struct hlsl_type *lhs_type, *val_type;
+ struct vkd3d_string_buffer *string;
+ struct hlsl_deref dst_deref;
+
+ if (hlsl_version_lt(ctx, 5, 0))
+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INCOMPATIBLE_PROFILE,
+ "Interlocked functions can only be used in shader model 5.0 or higher.");
+
+ if (op != HLSL_INTERLOCKED_CMP_EXCH && op != HLSL_INTERLOCKED_EXCH
+ && params->args_count != 2 && params->args_count != 3)
+ {
+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT,
+ "Unexpected number of arguments to function '%s': expected 2 or 3, but got %u.",
+ name, params->args_count);
+ return false;
+ }
+
+ lhs = params->args[0];
+ lhs_type = lhs->data_type;
+
+ if (op == HLSL_INTERLOCKED_CMP_EXCH)
+ {
+ cmp_val = params->args[1];
+ val = params->args[2];
+ if (params->args_count == 4)
+ orig_val = params->args[3];
+ }
+ else
+ {
+ val = params->args[1];
+ if (params->args_count == 3)
+ orig_val = params->args[2];
+ }
+
+ if (lhs_type->class != HLSL_CLASS_SCALAR || (lhs_type->e.numeric.type != HLSL_TYPE_UINT
+ && lhs_type->e.numeric.type != HLSL_TYPE_INT))
+ {
+ if ((string = hlsl_type_to_string(ctx, lhs_type)))
+ {
+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE,
+ "Unexpected type for argument 0 of '%s': expected 'uint' or 'int', but got '%s'.",
+ name, string->buffer);
+ hlsl_release_string_buffer(ctx, string);
+ }
+ return false;
+ }
+
+ /* Interlocked*() functions always take uint for the value parameters,
+ * except for InterlockedMax()/InterlockedMin(). */
+ if (op == HLSL_INTERLOCKED_MAX || op == HLSL_INTERLOCKED_MIN)
+ {
+ enum hlsl_base_type val_base_type = val->data_type->e.numeric.type;
+
+ /* Floating values are always cast to signed integers. */
+ if (val_base_type == HLSL_TYPE_FLOAT || val_base_type == HLSL_TYPE_HALF || val_base_type == HLSL_TYPE_DOUBLE)
+ val_type = hlsl_get_scalar_type(ctx, HLSL_TYPE_INT);
+ else
+ val_type = hlsl_get_scalar_type(ctx, lhs_type->e.numeric.type);
+ }
+ else
+ {
+ val_type = hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT);
+ }
+
+ if (cmp_val && !(cmp_val = add_implicit_conversion(ctx, params->instrs, cmp_val, val_type, loc)))
+ return false;
+ if (!(val = add_implicit_conversion(ctx, params->instrs, val, val_type, loc)))
+ return false;
+
+ /* TODO: groupshared variables */
+ if (lhs->type == HLSL_IR_INDEX && hlsl_index_chain_has_resource_access(hlsl_ir_index(lhs)))
+ {
+ if (!hlsl_index_is_resource_access(hlsl_ir_index(lhs)))
+ {
+ hlsl_fixme(ctx, &lhs->loc, "Non-direct structured resource interlocked targets.");
+ return false;
+ }
+
+ if (!hlsl_init_deref_from_index_chain(ctx, &dst_deref, hlsl_ir_index(lhs)->val.node))
+ return false;
+ coords = hlsl_ir_index(lhs)->idx.node;
+
+ VKD3D_ASSERT(coords->data_type->class == HLSL_CLASS_VECTOR);
+ VKD3D_ASSERT(coords->data_type->e.numeric.type == HLSL_TYPE_UINT);
+
+ if (hlsl_deref_get_type(ctx, &dst_deref)->class != HLSL_CLASS_UAV)
+ {
+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Interlocked targets must be UAV elements.");
+ return false;
+ }
+ }
+ else
+ {
+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Interlocked targets must be UAV elements.");
+ return false;
+ }
+
+ interlocked = hlsl_new_interlocked(ctx, op, orig_val ? lhs_type : NULL, &dst_deref, coords, cmp_val, val, loc);
+ hlsl_cleanup_deref(&dst_deref);
+ if (!interlocked)
+ return false;
+ hlsl_block_add_instr(params->instrs, interlocked);
+
+ if (orig_val)
+ {
+ if (orig_val->data_type->modifiers & HLSL_MODIFIER_CONST)
+ hlsl_error(ctx, &orig_val->loc, VKD3D_SHADER_ERROR_HLSL_MODIFIES_CONST,
+ "Output argument to '%s' is const.", name);
+
+ if (!add_assignment(ctx, params->instrs, orig_val, ASSIGN_OP_ASSIGN, interlocked))
+ return false;
+ }
+
+ if (!(void_ret = hlsl_new_void_expr(ctx, loc)))
+ return false;
+ hlsl_block_add_instr(params->instrs, void_ret);
+
+ return true;
+}
+
+static bool intrinsic_InterlockedAdd(struct hlsl_ctx *ctx,
+ const struct parse_initializer *params, const struct vkd3d_shader_location *loc)
+{
+ return intrinsic_interlocked(ctx, HLSL_INTERLOCKED_ADD, params, loc, "InterlockedAdd");
+}
+
+static bool intrinsic_InterlockedAnd(struct hlsl_ctx *ctx,
+ const struct parse_initializer *params, const struct vkd3d_shader_location *loc)
+{
+ return intrinsic_interlocked(ctx, HLSL_INTERLOCKED_AND, params, loc, "InterlockedAnd");
+}
+
+static bool intrinsic_InterlockedCompareExchange(struct hlsl_ctx *ctx,
+ const struct parse_initializer *params, const struct vkd3d_shader_location *loc)
+{
+ return intrinsic_interlocked(ctx, HLSL_INTERLOCKED_CMP_EXCH, params, loc, "InterlockedCompareExchange");
+}
+
+static bool intrinsic_InterlockedCompareStore(struct hlsl_ctx *ctx,
+ const struct parse_initializer *params, const struct vkd3d_shader_location *loc)
+{
+ return intrinsic_interlocked(ctx, HLSL_INTERLOCKED_CMP_EXCH, params, loc, "InterlockedCompareStore");
+}
+
+static bool intrinsic_InterlockedExchange(struct hlsl_ctx *ctx,
+ const struct parse_initializer *params, const struct vkd3d_shader_location *loc)
+{
+ return intrinsic_interlocked(ctx, HLSL_INTERLOCKED_EXCH, params, loc, "InterlockedExchange");
+}
+
+static bool intrinsic_InterlockedMax(struct hlsl_ctx *ctx,
+ const struct parse_initializer *params, const struct vkd3d_shader_location *loc)
+{
+ return intrinsic_interlocked(ctx, HLSL_INTERLOCKED_MAX, params, loc, "InterlockedMax");
+}
+
+static bool intrinsic_InterlockedMin(struct hlsl_ctx *ctx,
+ const struct parse_initializer *params, const struct vkd3d_shader_location *loc)
+{
+ return intrinsic_interlocked(ctx, HLSL_INTERLOCKED_MIN, params, loc, "InterlockedMin");
+}
+
+static bool intrinsic_InterlockedOr(struct hlsl_ctx *ctx,
+ const struct parse_initializer *params, const struct vkd3d_shader_location *loc)
+{
+ return intrinsic_interlocked(ctx, HLSL_INTERLOCKED_OR, params, loc, "InterlockedOr");
+}
+
+static bool intrinsic_InterlockedXor(struct hlsl_ctx *ctx,
+ const struct parse_initializer *params, const struct vkd3d_shader_location *loc)
+{
+ return intrinsic_interlocked(ctx, HLSL_INTERLOCKED_XOR, params, loc, "InterlockedXor");
+}
+
static const struct intrinsic_function
{
const char *name;
@@ -5200,6 +5572,15 @@ intrinsic_functions[] =
/* Note: these entries should be kept in alphabetical order. */
{"D3DCOLORtoUBYTE4", 1, true, intrinsic_d3dcolor_to_ubyte4},
{"GetRenderTargetSampleCount", 0, true, intrinsic_GetRenderTargetSampleCount},
+ {"InterlockedAdd", -1, true, intrinsic_InterlockedAdd},
+ {"InterlockedAnd", -1, true, intrinsic_InterlockedAnd},
+ {"InterlockedCompareExchange", 4, true, intrinsic_InterlockedCompareExchange},
+ {"InterlockedCompareStore", 3, true, intrinsic_InterlockedCompareStore},
+ {"InterlockedExchange", 3, true, intrinsic_InterlockedExchange},
+ {"InterlockedMax", -1, true, intrinsic_InterlockedMax},
+ {"InterlockedMin", -1, true, intrinsic_InterlockedMin},
+ {"InterlockedOr", -1, true, intrinsic_InterlockedOr},
+ {"InterlockedXor", -1, true, intrinsic_InterlockedXor},
{"abs", 1, true, intrinsic_abs},
{"acos", 1, true, intrinsic_acos},
{"all", 1, true, intrinsic_all},
@@ -5236,6 +5617,7 @@ intrinsic_functions[] =
{"fmod", 2, true, intrinsic_fmod},
{"frac", 1, true, intrinsic_frac},
{"fwidth", 1, true, intrinsic_fwidth},
+ {"isinf", 1, true, intrinsic_isinf},
{"ldexp", 2, true, intrinsic_ldexp},
{"length", 1, true, intrinsic_length},
{"lerp", 3, true, intrinsic_lerp},
@@ -5447,6 +5829,17 @@ static struct hlsl_block *add_constructor(struct hlsl_ctx *ctx, struct hlsl_type
struct hlsl_ir_load *load;
struct hlsl_ir_var *var;
+ if (!hlsl_is_numeric_type(type))
+ {
+ struct vkd3d_string_buffer *string;
+
+ if ((string = hlsl_type_to_string(ctx, type)))
+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE,
+ "Constructor data type %s is not numeric.", string->buffer);
+ hlsl_release_string_buffer(ctx, string);
+ return NULL;
+ }
+
if (!(var = hlsl_new_synthetic_var(ctx, "constructor", type, loc)))
return NULL;
@@ -5483,6 +5876,7 @@ static bool add_ternary(struct hlsl_ctx *ctx, struct hlsl_block *block,
hlsl_error(ctx, &cond->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE,
"Ternary condition type '%s' is not numeric.", string->buffer);
hlsl_release_string_buffer(ctx, string);
+ return false;
}
if (first->data_type->class <= HLSL_CLASS_LAST_NUMERIC
@@ -5491,21 +5885,22 @@ static bool add_ternary(struct hlsl_ctx *ctx, struct hlsl_block *block,
if (!(common_type = get_common_numeric_type(ctx, first, second, &first->loc)))
return false;
- if (cond_type->dimx == 1 && cond_type->dimy == 1)
+ if (cond_type->e.numeric.dimx == 1 && cond_type->e.numeric.dimy == 1)
{
cond_type = hlsl_get_numeric_type(ctx, common_type->class,
- HLSL_TYPE_BOOL, common_type->dimx, common_type->dimy);
+ HLSL_TYPE_BOOL, common_type->e.numeric.dimx, common_type->e.numeric.dimy);
if (!(cond = add_implicit_conversion(ctx, block, cond, cond_type, &cond->loc)))
return false;
}
else
{
- if (common_type->dimx == 1 && common_type->dimy == 1)
+ if (common_type->e.numeric.dimx == 1 && common_type->e.numeric.dimy == 1)
{
common_type = hlsl_get_numeric_type(ctx, cond_type->class,
- common_type->e.numeric.type, cond_type->dimx, cond_type->dimy);
+ common_type->e.numeric.type, cond_type->e.numeric.dimx, cond_type->e.numeric.dimy);
}
- else if (cond_type->dimx != common_type->dimx || cond_type->dimy != common_type->dimy)
+ else if (cond_type->e.numeric.dimx != common_type->e.numeric.dimx
+ || cond_type->e.numeric.dimy != common_type->e.numeric.dimy)
{
/* This condition looks wrong but is correct.
* floatN is compatible with float1xN, but not with floatNx1. */
@@ -5523,7 +5918,7 @@ static bool add_ternary(struct hlsl_ctx *ctx, struct hlsl_block *block,
}
cond_type = hlsl_get_numeric_type(ctx, common_type->class, HLSL_TYPE_BOOL,
- common_type->dimx, common_type->dimy);
+ common_type->e.numeric.dimx, common_type->e.numeric.dimy);
if (!(cond = add_implicit_conversion(ctx, block, cond, cond_type, &cond->loc)))
return false;
}
@@ -5551,7 +5946,7 @@ static bool add_ternary(struct hlsl_ctx *ctx, struct hlsl_block *block,
}
cond_type = hlsl_get_numeric_type(ctx, cond_type->class, HLSL_TYPE_BOOL,
- cond_type->dimx, cond_type->dimy);
+ cond_type->e.numeric.dimx, cond_type->e.numeric.dimy);
if (!(cond = add_implicit_conversion(ctx, block, cond, cond_type, &cond->loc)))
return false;
@@ -5923,7 +6318,7 @@ static bool add_gather_method_call(struct hlsl_ctx *ctx, struct hlsl_block *bloc
return false;
}
- if (read_channel >= object_type->e.resource.format->dimx)
+ if (read_channel >= object_type->e.resource.format->e.numeric.dimx)
{
hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE,
"Method %s() requires at least %u channels.", name, read_channel + 1);
@@ -5944,6 +6339,87 @@ static bool add_gather_method_call(struct hlsl_ctx *ctx, struct hlsl_block *bloc
return true;
}
+static bool add_gather_cmp_method_call(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *object,
+ const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc)
+{
+ const struct hlsl_type *object_type = object->data_type;
+ struct hlsl_resource_load_params load_params = {0};
+ unsigned int sampler_dim, offset_dim;
+ const struct hlsl_type *sampler_type;
+ struct hlsl_ir_node *load;
+
+ sampler_dim = hlsl_sampler_dim_count(object_type->sampler_dim);
+ offset_dim = hlsl_offset_dim_count(object_type->sampler_dim);
+
+ if (!strcmp(name, "GatherCmpGreen"))
+ load_params.type = HLSL_RESOURCE_GATHER_CMP_GREEN;
+ else if (!strcmp(name, "GatherCmpBlue"))
+ load_params.type = HLSL_RESOURCE_GATHER_CMP_BLUE;
+ else if (!strcmp(name, "GatherCmpAlpha"))
+ load_params.type = HLSL_RESOURCE_GATHER_CMP_ALPHA;
+ else
+ load_params.type = HLSL_RESOURCE_GATHER_CMP_RED;
+
+ if (!strcmp(name, "GatherCmp") || !offset_dim)
+ {
+ if (params->args_count < 3 || params->args_count > 4 + !!offset_dim)
+ {
+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT,
+ "Wrong number of arguments to method '%s': expected from 3 to %u, but got %u.",
+ name, 4 + !!offset_dim, params->args_count);
+ return false;
+ }
+ }
+ else if (params->args_count < 3 || params->args_count == 6 || params->args_count > 8)
+ {
+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT,
+ "Wrong number of arguments to method '%s': expected 3, 4, 5, 7, or 8, but got %u.",
+ name, params->args_count);
+ return false;
+ }
+
+ if (params->args_count == 5 || params->args_count == 8)
+ {
+ hlsl_fixme(ctx, loc, "Tiled resource status argument.");
+ }
+ else if (offset_dim && params->args_count > 3)
+ {
+ if (!(load_params.texel_offset = add_implicit_conversion(ctx, block, params->args[3],
+ hlsl_get_vector_type(ctx, HLSL_TYPE_INT, offset_dim), loc)))
+ return false;
+ }
+
+ sampler_type = params->args[0]->data_type;
+ if (sampler_type->class != HLSL_CLASS_SAMPLER || sampler_type->sampler_dim != HLSL_SAMPLER_DIM_COMPARISON)
+ {
+ struct vkd3d_string_buffer *string;
+
+ if ((string = hlsl_type_to_string(ctx, sampler_type)))
+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE,
+ "Wrong type for argument 0 of %s(): expected 'SamplerComparisonState', but got '%s'.",
+ name, string->buffer);
+ hlsl_release_string_buffer(ctx, string);
+ return false;
+ }
+
+ if (!(load_params.coords = add_implicit_conversion(ctx, block, params->args[1],
+ hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc)))
+ return false;
+
+ if (!(load_params.cmp = add_implicit_conversion(ctx, block, params->args[2],
+ hlsl_get_scalar_type(ctx, HLSL_TYPE_FLOAT), loc)))
+ return false;
+
+ load_params.format = hlsl_get_vector_type(ctx, object_type->e.resource.format->e.numeric.type, 4);
+ load_params.resource = object;
+ load_params.sampler = params->args[0];
+
+ if (!(load = hlsl_new_resource_load(ctx, &load_params, loc)))
+ return false;
+ hlsl_block_add_instr(block, load);
+ return true;
+}
+
static bool add_assignment_from_component(struct hlsl_ctx *ctx, struct hlsl_block *instrs, struct hlsl_ir_node *dest,
struct hlsl_ir_node *src, unsigned int component, const struct vkd3d_shader_location *loc)
{
@@ -6311,6 +6787,11 @@ texture_methods[] =
{ "Gather", add_gather_method_call, "00010101001000" },
{ "GatherAlpha", add_gather_method_call, "00010101001000" },
{ "GatherBlue", add_gather_method_call, "00010101001000" },
+ { "GatherCmp", add_gather_cmp_method_call, "00010101001000" },
+ { "GatherCmpAlpha", add_gather_cmp_method_call, "00010101001000" },
+ { "GatherCmpBlue", add_gather_cmp_method_call, "00010101001000" },
+ { "GatherCmpGreen", add_gather_cmp_method_call, "00010101001000" },
+ { "GatherCmpRed", add_gather_cmp_method_call, "00010101001000" },
{ "GatherGreen", add_gather_method_call, "00010101001000" },
{ "GatherRed", add_gather_method_call, "00010101001000" },
@@ -6553,6 +7034,7 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim,
struct hlsl_semantic semantic;
enum hlsl_buffer_type buffer_type;
enum hlsl_sampler_dim sampler_dim;
+ enum hlsl_so_object_type so_type;
struct hlsl_attribute *attr;
struct parse_attribute_list attr_list;
struct hlsl_ir_switch_case *switch_case;
@@ -6596,6 +7078,7 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim,
%token KW_INLINE
%token KW_INOUT
%token KW_LINEAR
+%token KW_LINESTREAM
%token KW_MATRIX
%token KW_NAMESPACE
%token KW_NOINTERPOLATION
@@ -6605,6 +7088,7 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim,
%token KW_PACKOFFSET
%token KW_PASS
%token KW_PIXELSHADER
+%token KW_POINTSTREAM
%token KW_RASTERIZERORDEREDBUFFER
%token KW_RASTERIZERORDEREDSTRUCTUREDBUFFER
%token KW_RASTERIZERORDEREDTEXTURE1D
@@ -6654,6 +7138,7 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim,
%token KW_TEXTURE3D
%token KW_TEXTURECUBE
%token KW_TEXTURECUBEARRAY
+%token KW_TRIANGLESTREAM
%token KW_TRUE
%token KW_TYPEDEF
%token KW_UNSIGNED
@@ -6784,6 +7269,8 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim,
%type <semantic> semantic
+%type <so_type> so_type
+
%type <state_block> state_block
%type <state_block_index> state_block_index_opt
@@ -7684,7 +8171,10 @@ parameter_decl:
{
hlsl_error(ctx, &@3, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE,
"Implicit size arrays not allowed in function parameters.");
+ type = ctx->builtin_types.error;
+ break;
}
+
type = hlsl_new_array_type(ctx, type, $4.sizes[i]);
}
vkd3d_free($4.sizes);
@@ -7805,6 +8295,20 @@ rov_type:
$$ = HLSL_SAMPLER_DIM_3D;
}
+so_type:
+ KW_POINTSTREAM
+ {
+ $$ = HLSL_STREAM_OUTPUT_POINT_STREAM;
+ }
+ | KW_LINESTREAM
+ {
+ $$ = HLSL_STREAM_OUTPUT_LINE_STREAM;
+ }
+ | KW_TRIANGLESTREAM
+ {
+ $$ = HLSL_STREAM_OUTPUT_TRIANGLE_STREAM;
+ }
+
resource_format:
var_modifiers type
{
@@ -7948,6 +8452,10 @@ type_no_void:
validate_uav_type(ctx, $1, $3, &@4);
$$ = hlsl_new_uav_type(ctx, $1, $3, true);
}
+ | so_type '<' type '>'
+ {
+ $$ = hlsl_new_stream_output_type(ctx, $1, $3);
+ }
| KW_RWBYTEADDRESSBUFFER
{
$$ = hlsl_new_uav_type(ctx, HLSL_SAMPLER_DIM_RAW_BUFFER, hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), false);
@@ -8088,14 +8596,9 @@ typedef:
}
if (modifiers)
- {
hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER,
"Storage modifiers are not allowed on typedefs.");
- LIST_FOR_EACH_ENTRY_SAFE(v, v_next, $4, struct parse_variable_def, entry)
- vkd3d_free(v);
- vkd3d_free($4);
- YYABORT;
- }
+
if (!add_typedef(ctx, type, $4))
YYABORT;
}
@@ -8753,25 +9256,25 @@ if_body:
loop_statement:
attribute_list_optional loop_scope_start KW_WHILE '(' expr ')' statement
{
- $$ = create_loop(ctx, LOOP_WHILE, &$1, NULL, $5, NULL, $7, &@3);
+ $$ = create_loop(ctx, HLSL_LOOP_WHILE, &$1, NULL, $5, NULL, $7, &@3);
hlsl_pop_scope(ctx);
cleanup_parse_attribute_list(&$1);
}
| attribute_list_optional loop_scope_start KW_DO statement KW_WHILE '(' expr ')' ';'
{
- $$ = create_loop(ctx, LOOP_DO_WHILE, &$1, NULL, $7, NULL, $4, &@3);
+ $$ = create_loop(ctx, HLSL_LOOP_DO_WHILE, &$1, NULL, $7, NULL, $4, &@3);
hlsl_pop_scope(ctx);
cleanup_parse_attribute_list(&$1);
}
| attribute_list_optional loop_scope_start KW_FOR '(' expr_statement expr_statement expr_optional ')' statement
{
- $$ = create_loop(ctx, LOOP_FOR, &$1, $5, $6, $7, $9, &@3);
+ $$ = create_loop(ctx, HLSL_LOOP_FOR, &$1, $5, $6, $7, $9, &@3);
hlsl_pop_scope(ctx);
cleanup_parse_attribute_list(&$1);
}
| attribute_list_optional loop_scope_start KW_FOR '(' declaration expr_statement expr_optional ')' statement
{
- $$ = create_loop(ctx, LOOP_FOR, &$1, $5, $6, $7, $9, &@3);
+ $$ = create_loop(ctx, HLSL_LOOP_FOR, &$1, $5, $6, $7, $9, &@3);
hlsl_pop_scope(ctx);
cleanup_parse_attribute_list(&$1);
}
@@ -8979,17 +9482,24 @@ primary_expr:
struct hlsl_ir_load *load;
struct hlsl_ir_var *var;
- if (!(var = hlsl_get_var(ctx->cur_scope, $1)))
+ if ((var = hlsl_get_var(ctx->cur_scope, $1)))
+ {
+ vkd3d_free($1);
+
+ if (!(load = hlsl_new_var_load(ctx, var, &@1)))
+ YYABORT;
+ if (!($$ = make_block(ctx, &load->node)))
+ YYABORT;
+ }
+ else
{
hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_NOT_DEFINED, "Variable \"%s\" is not defined.", $1);
vkd3d_free($1);
- YYABORT;
+
+ if (!($$ = make_empty_block(ctx)))
+ YYABORT;
+ $$->value = ctx->error_instr;
}
- vkd3d_free($1);
- if (!(load = hlsl_new_var_load(ctx, var, &@1)))
- YYABORT;
- if (!($$ = make_block(ctx, &load->node)))
- YYABORT;
}
| '(' expr ')'
{
@@ -9149,23 +9659,8 @@ postfix_expr:
| var_modifiers type '(' initializer_expr_list ')'
{
if ($1)
- {
hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER,
"Modifiers are not allowed on constructors.");
- free_parse_initializer(&$4);
- YYABORT;
- }
- if (!hlsl_is_numeric_type($2))
- {
- struct vkd3d_string_buffer *string;
-
- if ((string = hlsl_type_to_string(ctx, $2)))
- hlsl_error(ctx, &@2, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE,
- "Constructor data type %s is not numeric.", string->buffer);
- hlsl_release_string_buffer(ctx, string);
- free_parse_initializer(&$4);
- YYABORT;
- }
if (!($$ = add_constructor(ctx, $2, &$4, &@2)))
{
@@ -9233,11 +9728,8 @@ unary_expr:
| '(' var_modifiers type arrays ')' unary_expr
{
if ($2)
- {
hlsl_error(ctx, &@2, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER,
"Modifiers are not allowed on casts.");
- YYABORT;
- }
if (!add_explicit_conversion(ctx, $6, $3, &$4, &@3))
{
@@ -9381,10 +9873,7 @@ assignment_expr:
struct hlsl_ir_node *lhs = node_from_block($1), *rhs = node_from_block($3);
if (lhs->data_type->modifiers & HLSL_MODIFIER_CONST)
- {
hlsl_error(ctx, &@2, VKD3D_SHADER_ERROR_HLSL_MODIFIES_CONST, "Statement modifies a const expression.");
- YYABORT;
- }
hlsl_block_add_block($3, $1);
destroy_block($1);
if (!add_assignment(ctx, $3, lhs, $2, rhs))
diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c
index d11ff481f6b..8d817b051ce 100644
--- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c
+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c
@@ -19,9 +19,14 @@
*/
#include "hlsl.h"
+#include "vkd3d_shader_private.h"
+#include "d3dcommon.h"
#include <stdio.h>
#include <math.h>
+/* The shift that corresponds to the D3D_SIF_TEXTURE_COMPONENTS mask. */
+#define VKD3D_SM4_SIF_TEXTURE_COMPONENTS_SHIFT 2
+
/* TODO: remove when no longer needed, only used for new_offset_instr_from_deref() */
static struct hlsl_ir_node *new_offset_from_path_index(struct hlsl_ctx *ctx, struct hlsl_block *block,
struct hlsl_type *type, struct hlsl_ir_node *base_offset, struct hlsl_ir_node *idx,
@@ -269,7 +274,7 @@ static bool types_are_semantic_equivalent(struct hlsl_ctx *ctx, const struct hls
if (ctx->profile->major_version < 4)
return true;
- if (type1->dimx != type2->dimx)
+ if (type1->e.numeric.dimx != type2->e.numeric.dimx)
return false;
return base_type_get_semantic_equivalent(type1->e.numeric.type)
@@ -291,6 +296,9 @@ static struct hlsl_ir_var *add_semantic_var(struct hlsl_ctx *ctx, struct hlsl_ir
{
if (!ascii_strcasecmp(ext_var->name, new_name))
{
+ VKD3D_ASSERT(ext_var->data_type->class <= HLSL_CLASS_VECTOR);
+ VKD3D_ASSERT(type->class <= HLSL_CLASS_VECTOR);
+
if (output)
{
if (index >= semantic->reported_duplicated_output_next_index)
@@ -731,6 +739,10 @@ static bool transform_instr_derefs(struct hlsl_ctx *ctx, struct hlsl_ir_node *in
res = func(ctx, &hlsl_ir_resource_store(instr)->resource, instr);
return res;
+ case HLSL_IR_INTERLOCKED:
+ res = func(ctx, &hlsl_ir_interlocked(instr)->dst, instr);
+ return res;
+
default:
return false;
}
@@ -1031,7 +1043,7 @@ static bool lower_calls(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *
static struct hlsl_ir_node *add_zero_mipmap_level(struct hlsl_ctx *ctx, struct hlsl_ir_node *index,
const struct vkd3d_shader_location *loc)
{
- unsigned int dim_count = index->data_type->dimx;
+ unsigned int dim_count = index->data_type->e.numeric.dimx;
struct hlsl_ir_node *store, *zero;
struct hlsl_ir_load *coords_load;
struct hlsl_deref coords_deref;
@@ -1075,7 +1087,7 @@ static bool lower_matrix_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *ins
struct hlsl_deref var_deref;
struct hlsl_type *matrix_type;
struct hlsl_ir_var *var;
- unsigned int x, y, k, i;
+ unsigned int k, i;
if (instr->type != HLSL_IR_SWIZZLE)
return false;
@@ -1088,14 +1100,12 @@ static bool lower_matrix_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *ins
return false;
hlsl_init_simple_deref_from_var(&var_deref, var);
- for (i = 0; i < instr->data_type->dimx; ++i)
+ for (i = 0; i < instr->data_type->e.numeric.dimx; ++i)
{
struct hlsl_block store_block;
struct hlsl_ir_node *load;
- y = (swizzle->swizzle >> (8 * i + 4)) & 0xf;
- x = (swizzle->swizzle >> 8 * i) & 0xf;
- k = y * matrix_type->dimx + x;
+ k = swizzle->u.matrix.components[i].y * matrix_type->e.numeric.dimx + swizzle->u.matrix.components[i].x;
if (!(load = hlsl_add_load_component(ctx, block, swizzle->val.node, k, &instr->loc)))
return false;
@@ -1140,7 +1150,7 @@ static bool lower_index_loads(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr,
VKD3D_ASSERT(coords->data_type->class == HLSL_CLASS_VECTOR);
VKD3D_ASSERT(coords->data_type->e.numeric.type == HLSL_TYPE_UINT);
- VKD3D_ASSERT(coords->data_type->dimx == dim_count);
+ VKD3D_ASSERT(coords->data_type->e.numeric.dimx == dim_count);
if (!(coords = add_zero_mipmap_level(ctx, coords, &instr->loc)))
return false;
@@ -1176,7 +1186,7 @@ static bool lower_index_loads(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr,
return false;
hlsl_init_simple_deref_from_var(&row_deref, var);
- for (i = 0; i < mat->data_type->dimx; ++i)
+ for (i = 0; i < mat->data_type->e.numeric.dimx; ++i)
{
struct hlsl_ir_node *c;
@@ -1225,7 +1235,7 @@ static bool lower_broadcasts(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, s
src_type = cast->operands[0].node->data_type;
dst_type = cast->node.data_type;
- if (src_type->class <= HLSL_CLASS_VECTOR && dst_type->class <= HLSL_CLASS_VECTOR && src_type->dimx == 1)
+ if (src_type->class <= HLSL_CLASS_VECTOR && dst_type->class <= HLSL_CLASS_VECTOR && src_type->e.numeric.dimx == 1)
{
struct hlsl_ir_node *new_cast, *swizzle;
@@ -1236,9 +1246,10 @@ static bool lower_broadcasts(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, s
return false;
hlsl_block_add_instr(block, new_cast);
- if (dst_type->dimx != 1)
+ if (dst_type->e.numeric.dimx != 1)
{
- if (!(swizzle = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, X, X, X), dst_type->dimx, new_cast, &cast->node.loc)))
+ if (!(swizzle = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, X, X, X),
+ dst_type->e.numeric.dimx, new_cast, &cast->node.loc)))
return false;
hlsl_block_add_instr(block, swizzle);
}
@@ -1358,8 +1369,10 @@ struct copy_propagation_var_def
struct copy_propagation_state
{
- struct rb_tree var_defs;
- struct copy_propagation_state *parent;
+ struct rb_tree *scope_var_defs;
+ size_t scope_count, scopes_capacity;
+ struct hlsl_ir_node *stop;
+ bool stopped;
};
static int copy_propagation_var_def_compare(const void *key, const struct rb_entry *entry)
@@ -1381,6 +1394,38 @@ static void copy_propagation_var_def_destroy(struct rb_entry *entry, void *conte
vkd3d_free(var_def);
}
+static size_t copy_propagation_push_scope(struct copy_propagation_state *state, struct hlsl_ctx *ctx)
+{
+ if (!(hlsl_array_reserve(ctx, (void **)&state->scope_var_defs, &state->scopes_capacity,
+ state->scope_count + 1, sizeof(*state->scope_var_defs))))
+ return false;
+
+ rb_init(&state->scope_var_defs[state->scope_count++], copy_propagation_var_def_compare);
+
+ return state->scope_count;
+}
+
+static size_t copy_propagation_pop_scope(struct copy_propagation_state *state)
+{
+ rb_destroy(&state->scope_var_defs[--state->scope_count], copy_propagation_var_def_destroy, NULL);
+
+ return state->scope_count;
+}
+
+static bool copy_propagation_state_init(struct copy_propagation_state *state, struct hlsl_ctx *ctx)
+{
+ memset(state, 0, sizeof(*state));
+
+ return copy_propagation_push_scope(state, ctx);
+}
+
+static void copy_propagation_state_destroy(struct copy_propagation_state *state)
+{
+ while (copy_propagation_pop_scope(state));
+
+ vkd3d_free(state->scope_var_defs);
+}
+
static struct copy_propagation_value *copy_propagation_get_value_at_time(
struct copy_propagation_component_trace *trace, unsigned int time)
{
@@ -1398,9 +1443,10 @@ static struct copy_propagation_value *copy_propagation_get_value_at_time(
static struct copy_propagation_value *copy_propagation_get_value(const struct copy_propagation_state *state,
const struct hlsl_ir_var *var, unsigned int component, unsigned int time)
{
- for (; state; state = state->parent)
+ for (size_t i = state->scope_count - 1; i < state->scope_count; i--)
{
- struct rb_entry *entry = rb_get(&state->var_defs, var);
+ struct rb_tree *tree = &state->scope_var_defs[i];
+ struct rb_entry *entry = rb_get(tree, var);
if (entry)
{
struct copy_propagation_var_def *var_def = RB_ENTRY_VALUE(entry, struct copy_propagation_var_def, entry);
@@ -1426,7 +1472,8 @@ static struct copy_propagation_value *copy_propagation_get_value(const struct co
static struct copy_propagation_var_def *copy_propagation_create_var_def(struct hlsl_ctx *ctx,
struct copy_propagation_state *state, struct hlsl_ir_var *var)
{
- struct rb_entry *entry = rb_get(&state->var_defs, var);
+ struct rb_tree *tree = &state->scope_var_defs[state->scope_count - 1];
+ struct rb_entry *entry = rb_get(tree, var);
struct copy_propagation_var_def *var_def;
unsigned int component_count = hlsl_type_component_count(var->data_type);
int res;
@@ -1439,7 +1486,7 @@ static struct copy_propagation_var_def *copy_propagation_create_var_def(struct h
var_def->var = var;
- res = rb_put(&state->var_defs, var, &var_def->entry);
+ res = rb_put(tree, var, &var_def->entry);
VKD3D_ASSERT(!res);
return var_def;
@@ -1596,7 +1643,7 @@ static bool copy_propagation_replace_with_single_instr(struct hlsl_ctx *ctx,
var->name, start, start + count, debug_hlsl_swizzle(swizzle, instr_component_count));
return false;
}
- ret_swizzle |= value->component << HLSL_SWIZZLE_SHIFT(i);
+ hlsl_swizzle_set_component(&ret_swizzle, i, value->component);
}
TRACE("Load from %s[%u-%u]%s propagated as instruction %p%s.\n",
@@ -1678,6 +1725,7 @@ static bool copy_propagation_transform_load(struct hlsl_ctx *ctx,
case HLSL_CLASS_DEPTH_STENCIL_VIEW:
case HLSL_CLASS_GEOMETRY_SHADER:
case HLSL_CLASS_BLEND_STATE:
+ case HLSL_CLASS_STREAM_OUTPUT:
case HLSL_CLASS_NULL:
break;
@@ -1719,10 +1767,10 @@ static bool copy_propagation_transform_swizzle(struct hlsl_ctx *ctx,
return false;
load = hlsl_ir_load(swizzle->val.node);
- if (copy_propagation_replace_with_constant_vector(ctx, state, load, swizzle->swizzle, &swizzle->node))
+ if (copy_propagation_replace_with_constant_vector(ctx, state, load, swizzle->u.vector, &swizzle->node))
return true;
- if (copy_propagation_replace_with_single_instr(ctx, state, load, swizzle->swizzle, &swizzle->node))
+ if (copy_propagation_replace_with_single_instr(ctx, state, load, swizzle->u.vector, &swizzle->node))
return true;
return false;
@@ -1792,6 +1840,15 @@ static bool copy_propagation_transform_resource_store(struct hlsl_ctx *ctx,
return progress;
}
+static bool copy_propagation_transform_interlocked(struct hlsl_ctx *ctx,
+ struct hlsl_ir_interlocked *interlocked, struct copy_propagation_state *state)
+{
+ bool progress = false;
+
+ progress |= copy_propagation_transform_object_load(ctx, &interlocked->dst, state, interlocked->node.index);
+ return progress;
+}
+
static void copy_propagation_record_store(struct hlsl_ctx *ctx, struct hlsl_ir_store *store,
struct copy_propagation_state *state)
{
@@ -1818,18 +1875,6 @@ static void copy_propagation_record_store(struct hlsl_ctx *ctx, struct hlsl_ir_s
}
}
-static void copy_propagation_state_init(struct hlsl_ctx *ctx, struct copy_propagation_state *state,
- struct copy_propagation_state *parent)
-{
- rb_init(&state->var_defs, copy_propagation_var_def_compare);
- state->parent = parent;
-}
-
-static void copy_propagation_state_destroy(struct copy_propagation_state *state)
-{
- rb_destroy(&state->var_defs, copy_propagation_var_def_destroy, NULL);
-}
-
static void copy_propagation_invalidate_from_block(struct hlsl_ctx *ctx, struct copy_propagation_state *state,
struct hlsl_block *block, unsigned int time)
{
@@ -1898,16 +1943,19 @@ static bool copy_propagation_transform_block(struct hlsl_ctx *ctx, struct hlsl_b
static bool copy_propagation_process_if(struct hlsl_ctx *ctx, struct hlsl_ir_if *iff,
struct copy_propagation_state *state)
{
- struct copy_propagation_state inner_state;
bool progress = false;
- copy_propagation_state_init(ctx, &inner_state, state);
- progress |= copy_propagation_transform_block(ctx, &iff->then_block, &inner_state);
- copy_propagation_state_destroy(&inner_state);
+ copy_propagation_push_scope(state, ctx);
+ progress |= copy_propagation_transform_block(ctx, &iff->then_block, state);
+ if (state->stopped)
+ return progress;
+ copy_propagation_pop_scope(state);
- copy_propagation_state_init(ctx, &inner_state, state);
- progress |= copy_propagation_transform_block(ctx, &iff->else_block, &inner_state);
- copy_propagation_state_destroy(&inner_state);
+ copy_propagation_push_scope(state, ctx);
+ progress |= copy_propagation_transform_block(ctx, &iff->else_block, state);
+ if (state->stopped)
+ return progress;
+ copy_propagation_pop_scope(state);
/* Ideally we'd invalidate the outer state looking at what was
* touched in the two inner states, but this doesn't work for
@@ -1922,14 +1970,16 @@ static bool copy_propagation_process_if(struct hlsl_ctx *ctx, struct hlsl_ir_if
static bool copy_propagation_process_loop(struct hlsl_ctx *ctx, struct hlsl_ir_loop *loop,
struct copy_propagation_state *state)
{
- struct copy_propagation_state inner_state;
bool progress = false;
copy_propagation_invalidate_from_block(ctx, state, &loop->body, loop->node.index);
+ copy_propagation_invalidate_from_block(ctx, state, &loop->iter, loop->node.index);
- copy_propagation_state_init(ctx, &inner_state, state);
- progress |= copy_propagation_transform_block(ctx, &loop->body, &inner_state);
- copy_propagation_state_destroy(&inner_state);
+ copy_propagation_push_scope(state, ctx);
+ progress |= copy_propagation_transform_block(ctx, &loop->body, state);
+ if (state->stopped)
+ return progress;
+ copy_propagation_pop_scope(state);
return progress;
}
@@ -1937,15 +1987,16 @@ static bool copy_propagation_process_loop(struct hlsl_ctx *ctx, struct hlsl_ir_l
static bool copy_propagation_process_switch(struct hlsl_ctx *ctx, struct hlsl_ir_switch *s,
struct copy_propagation_state *state)
{
- struct copy_propagation_state inner_state;
struct hlsl_ir_switch_case *c;
bool progress = false;
LIST_FOR_EACH_ENTRY(c, &s->cases, struct hlsl_ir_switch_case, entry)
{
- copy_propagation_state_init(ctx, &inner_state, state);
- progress |= copy_propagation_transform_block(ctx, &c->body, &inner_state);
- copy_propagation_state_destroy(&inner_state);
+ copy_propagation_push_scope(state, ctx);
+ progress |= copy_propagation_transform_block(ctx, &c->body, state);
+ if (state->stopped)
+ return progress;
+ copy_propagation_pop_scope(state);
}
LIST_FOR_EACH_ENTRY(c, &s->cases, struct hlsl_ir_switch_case, entry)
@@ -1964,6 +2015,12 @@ static bool copy_propagation_transform_block(struct hlsl_ctx *ctx, struct hlsl_b
LIST_FOR_EACH_ENTRY_SAFE(instr, next, &block->instrs, struct hlsl_ir_node, entry)
{
+ if (instr == state->stop)
+ {
+ state->stopped = true;
+ return progress;
+ }
+
switch (instr->type)
{
case HLSL_IR_LOAD:
@@ -1998,9 +2055,15 @@ static bool copy_propagation_transform_block(struct hlsl_ctx *ctx, struct hlsl_b
progress |= copy_propagation_process_switch(ctx, hlsl_ir_switch(instr), state);
break;
+ case HLSL_IR_INTERLOCKED:
+ progress |= copy_propagation_transform_interlocked(ctx, hlsl_ir_interlocked(instr), state);
+
default:
break;
}
+
+ if (state->stopped)
+ return progress;
}
return progress;
@@ -2013,7 +2076,7 @@ bool hlsl_copy_propagation_execute(struct hlsl_ctx *ctx, struct hlsl_block *bloc
index_instructions(block, 2);
- copy_propagation_state_init(ctx, &state, NULL);
+ copy_propagation_state_init(&state, ctx);
progress = copy_propagation_transform_block(ctx, block, &state);
@@ -2053,10 +2116,10 @@ static enum validation_result validate_component_index_range_from_deref(struct h
switch (type->class)
{
case HLSL_CLASS_VECTOR:
- if (idx >= type->dimx)
+ if (idx >= type->e.numeric.dimx)
{
hlsl_error(ctx, &path_node->loc, VKD3D_SHADER_ERROR_HLSL_OFFSET_OUT_OF_BOUNDS,
- "Vector index is out of bounds. %u/%u", idx, type->dimx);
+ "Vector index is out of bounds. %u/%u", idx, type->e.numeric.dimx);
return DEREF_VALIDATION_OUT_OF_BOUNDS;
}
break;
@@ -2178,6 +2241,24 @@ static bool validate_dereferences(struct hlsl_ctx *ctx, struct hlsl_ir_node *ins
validate_component_index_range_from_deref(ctx, &store->lhs);
break;
}
+ case HLSL_IR_INTERLOCKED:
+ {
+ struct hlsl_ir_interlocked *interlocked = hlsl_ir_interlocked(instr);
+
+ if (!interlocked->dst.var->is_uniform)
+ {
+ hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF,
+ "Accessed resource must have a single uniform source.");
+ }
+ else if (validate_component_index_range_from_deref(ctx, &interlocked->dst) == DEREF_VALIDATION_NOT_CONSTANT)
+ {
+ hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF,
+ "Accessed resource from \"%s\" must be determinable at compile time.",
+ interlocked->dst.var->name);
+ note_non_static_deref_expressions(ctx, &interlocked->dst, "accessed resource");
+ }
+ break;
+ }
default:
break;
}
@@ -2187,7 +2268,7 @@ static bool validate_dereferences(struct hlsl_ctx *ctx, struct hlsl_ir_node *ins
static bool is_vec1(const struct hlsl_type *type)
{
- return (type->class == HLSL_CLASS_SCALAR) || (type->class == HLSL_CLASS_VECTOR && type->dimx == 1);
+ return (type->class == HLSL_CLASS_SCALAR) || (type->class == HLSL_CLASS_VECTOR && type->e.numeric.dimx == 1);
}
static bool fold_redundant_casts(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context)
@@ -2364,18 +2445,20 @@ static bool lower_narrowing_casts(struct hlsl_ctx *ctx, struct hlsl_ir_node *ins
src_type = cast->operands[0].node->data_type;
dst_type = cast->node.data_type;
- if (src_type->class <= HLSL_CLASS_VECTOR && dst_type->class <= HLSL_CLASS_VECTOR && dst_type->dimx < src_type->dimx)
+ if (src_type->class <= HLSL_CLASS_VECTOR && dst_type->class <= HLSL_CLASS_VECTOR
+ && dst_type->e.numeric.dimx < src_type->e.numeric.dimx)
{
struct hlsl_ir_node *new_cast, *swizzle;
- dst_vector_type = hlsl_get_vector_type(ctx, dst_type->e.numeric.type, src_type->dimx);
+ dst_vector_type = hlsl_get_vector_type(ctx, dst_type->e.numeric.type, src_type->e.numeric.dimx);
/* We need to preserve the cast since it might be doing more than just
* narrowing the vector. */
if (!(new_cast = hlsl_new_cast(ctx, cast->operands[0].node, dst_vector_type, &cast->node.loc)))
return false;
hlsl_block_add_instr(block, new_cast);
- if (!(swizzle = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, Y, Z, W), dst_type->dimx, new_cast, &cast->node.loc)))
+ if (!(swizzle = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, Y, Z, W),
+ dst_type->e.numeric.dimx, new_cast, &cast->node.loc)))
return false;
hlsl_block_add_instr(block, swizzle);
@@ -2401,11 +2484,12 @@ static bool fold_swizzle_chains(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr
struct hlsl_ir_node *new_swizzle;
uint32_t combined_swizzle;
- combined_swizzle = hlsl_combine_swizzles(hlsl_ir_swizzle(next_instr)->swizzle,
- swizzle->swizzle, instr->data_type->dimx);
+ combined_swizzle = hlsl_combine_swizzles(hlsl_ir_swizzle(next_instr)->u.vector,
+ swizzle->u.vector, instr->data_type->e.numeric.dimx);
next_instr = hlsl_ir_swizzle(next_instr)->val.node;
- if (!(new_swizzle = hlsl_new_swizzle(ctx, combined_swizzle, instr->data_type->dimx, next_instr, &instr->loc)))
+ if (!(new_swizzle = hlsl_new_swizzle(ctx, combined_swizzle,
+ instr->data_type->e.numeric.dimx, next_instr, &instr->loc)))
return false;
list_add_before(&instr->entry, &new_swizzle->entry);
@@ -2425,11 +2509,11 @@ static bool remove_trivial_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *i
return false;
swizzle = hlsl_ir_swizzle(instr);
- if (instr->data_type->dimx != swizzle->val.node->data_type->dimx)
+ if (instr->data_type->e.numeric.dimx != swizzle->val.node->data_type->e.numeric.dimx)
return false;
- for (i = 0; i < instr->data_type->dimx; ++i)
- if (hlsl_swizzle_get_component(swizzle->swizzle, i) != i)
+ for (i = 0; i < instr->data_type->e.numeric.dimx; ++i)
+ if (hlsl_swizzle_get_component(swizzle->u.vector, i) != i)
return false;
hlsl_replace_node(instr, swizzle->val.node);
@@ -2589,6 +2673,7 @@ static bool lower_nonconstant_vector_derefs(struct hlsl_ctx *ctx, struct hlsl_ir
if (type->class == HLSL_CLASS_VECTOR && idx->type != HLSL_IR_CONSTANT)
{
struct hlsl_ir_node *eq, *swizzle, *dot, *c, *operands[HLSL_MAX_OPERANDS] = {0};
+ unsigned int width = type->e.numeric.dimx;
struct hlsl_constant_value value;
struct hlsl_ir_load *vector_load;
enum hlsl_ir_expr_op op;
@@ -2597,7 +2682,7 @@ static bool lower_nonconstant_vector_derefs(struct hlsl_ctx *ctx, struct hlsl_ir
return false;
hlsl_block_add_instr(block, &vector_load->node);
- if (!(swizzle = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, X, X, X), type->dimx, idx, &instr->loc)))
+ if (!(swizzle = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, X, X, X), width, idx, &instr->loc)))
return false;
hlsl_block_add_instr(block, swizzle);
@@ -2605,14 +2690,14 @@ static bool lower_nonconstant_vector_derefs(struct hlsl_ctx *ctx, struct hlsl_ir
value.u[1].u = 1;
value.u[2].u = 2;
value.u[3].u = 3;
- if (!(c = hlsl_new_constant(ctx, hlsl_get_vector_type(ctx, HLSL_TYPE_UINT, type->dimx), &value, &instr->loc)))
+ if (!(c = hlsl_new_constant(ctx, hlsl_get_vector_type(ctx, HLSL_TYPE_UINT, width), &value, &instr->loc)))
return false;
hlsl_block_add_instr(block, c);
operands[0] = swizzle;
operands[1] = c;
if (!(eq = hlsl_new_expr(ctx, HLSL_OP2_EQUAL, operands,
- hlsl_get_vector_type(ctx, HLSL_TYPE_BOOL, type->dimx), &instr->loc)))
+ hlsl_get_vector_type(ctx, HLSL_TYPE_BOOL, width), &instr->loc)))
return false;
hlsl_block_add_instr(block, eq);
@@ -2621,7 +2706,7 @@ static bool lower_nonconstant_vector_derefs(struct hlsl_ctx *ctx, struct hlsl_ir
hlsl_block_add_instr(block, eq);
op = HLSL_OP2_DOT;
- if (type->dimx == 1)
+ if (width == 1)
op = type->e.numeric.type == HLSL_TYPE_BOOL ? HLSL_OP2_LOGIC_AND : HLSL_OP2_MUL;
/* Note: We may be creating a DOT for bool vectors here, which we need to lower to
@@ -2748,7 +2833,8 @@ static bool lower_nonconstant_array_loads(struct hlsl_ctx *ctx, struct hlsl_ir_n
return false;
hlsl_block_add_instr(block, equals);
- if (!(equals = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, X, X, X), var->data_type->dimx, equals, &cut_index->loc)))
+ if (!(equals = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, X, X, X),
+ var->data_type->e.numeric.dimx, equals, &cut_index->loc)))
return false;
hlsl_block_add_instr(block, equals);
@@ -2788,6 +2874,116 @@ static bool lower_nonconstant_array_loads(struct hlsl_ctx *ctx, struct hlsl_ir_n
return true;
}
+
+static struct hlsl_type *clone_texture_array_as_combined_sampler_array(struct hlsl_ctx *ctx, struct hlsl_type *type)
+{
+ struct hlsl_type *sampler_type;
+
+ if (type->class == HLSL_CLASS_ARRAY)
+ {
+ if (!(sampler_type = clone_texture_array_as_combined_sampler_array(ctx, type->e.array.type)))
+ return NULL;
+
+ return hlsl_new_array_type(ctx, sampler_type, type->e.array.elements_count);
+ }
+
+ return ctx->builtin_types.sampler[type->sampler_dim];
+}
+
+static bool deref_offset_is_zero(struct hlsl_ctx *ctx, const struct hlsl_deref *deref)
+{
+ enum hlsl_regset regset = hlsl_deref_get_regset(ctx, deref);
+ unsigned int index;
+
+ if (!hlsl_regset_index_from_deref(ctx, deref, regset, &index))
+ return false;
+ return index == 0;
+}
+
+/* Lower samples from separate texture and sampler variables to samples from
+ * synthetized combined samplers. That is, translate SM4-style samples in the
+ * source to SM1-style samples in the bytecode. */
+static bool lower_separate_samples(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context)
+{
+ struct hlsl_ir_var *var, *resource, *sampler;
+ struct hlsl_ir_resource_load *load;
+ struct vkd3d_string_buffer *name;
+ struct hlsl_type *sampler_type;
+
+ if (instr->type != HLSL_IR_RESOURCE_LOAD)
+ return false;
+ load = hlsl_ir_resource_load(instr);
+
+ if (load->load_type != HLSL_RESOURCE_SAMPLE
+ && load->load_type != HLSL_RESOURCE_SAMPLE_GRAD
+ && load->load_type != HLSL_RESOURCE_SAMPLE_LOD
+ && load->load_type != HLSL_RESOURCE_SAMPLE_LOD_BIAS)
+ return false;
+
+ if (!load->sampler.var)
+ return false;
+ resource = load->resource.var;
+ sampler = load->sampler.var;
+
+ VKD3D_ASSERT(hlsl_type_is_resource(resource->data_type));
+ VKD3D_ASSERT(hlsl_type_is_resource(sampler->data_type));
+ if (sampler->data_type->class == HLSL_CLASS_ARRAY && !deref_offset_is_zero(ctx, &load->sampler))
+ {
+ /* Not supported by d3dcompiler. */
+ hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NOT_IMPLEMENTED,
+ "Lower separated samples with sampler arrays.");
+ return false;
+ }
+ if (!resource->is_uniform)
+ return false;
+ if(!sampler->is_uniform)
+ return false;
+
+ if (!(name = hlsl_get_string_buffer(ctx)))
+ return false;
+ vkd3d_string_buffer_printf(name, "%s+%s", sampler->name, resource->name);
+
+ if (load->texel_offset.node)
+ {
+ hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_INCOMPATIBLE_PROFILE,
+ "Texel offsets are not supported on profiles lower than 4.0.\n");
+ return false;
+ }
+
+ TRACE("Lowering to combined sampler %s.\n", debugstr_a(name->buffer));
+
+ if (!(var = hlsl_get_var(ctx->globals, name->buffer)))
+ {
+ if (!(sampler_type = clone_texture_array_as_combined_sampler_array(ctx, resource->data_type)))
+ {
+ hlsl_release_string_buffer(ctx, name);
+ return false;
+ }
+
+ if (!(var = hlsl_new_synthetic_var_named(ctx, name->buffer, sampler_type, &instr->loc, false)))
+ {
+ hlsl_release_string_buffer(ctx, name);
+ return false;
+ }
+ var->storage_modifiers |= HLSL_STORAGE_UNIFORM;
+ var->is_combined_sampler = true;
+ var->is_uniform = 1;
+
+ list_remove(&var->scope_entry);
+ list_add_after(&sampler->scope_entry, &var->scope_entry);
+
+ list_add_after(&sampler->extern_entry, &var->extern_entry);
+ }
+ hlsl_release_string_buffer(ctx, name);
+
+ /* Only change the deref's var, keep the path. */
+ load->resource.var = var;
+ hlsl_cleanup_deref(&load->sampler);
+ load->sampler.var = NULL;
+
+ return true;
+}
+
/* Lower combined samples and sampler variables to synthesized separated textures and samplers.
* That is, translate SM1-style samples in the source to SM4-style samples in the bytecode. */
static bool lower_combined_samples(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context)
@@ -2808,6 +3004,10 @@ static bool lower_combined_samples(struct hlsl_ctx *ctx, struct hlsl_ir_node *in
case HLSL_RESOURCE_GATHER_GREEN:
case HLSL_RESOURCE_GATHER_BLUE:
case HLSL_RESOURCE_GATHER_ALPHA:
+ case HLSL_RESOURCE_GATHER_CMP_RED:
+ case HLSL_RESOURCE_GATHER_CMP_GREEN:
+ case HLSL_RESOURCE_GATHER_CMP_BLUE:
+ case HLSL_RESOURCE_GATHER_CMP_ALPHA:
case HLSL_RESOURCE_RESINFO:
case HLSL_RESOURCE_SAMPLE_CMP:
case HLSL_RESOURCE_SAMPLE_CMP_LZ:
@@ -2899,6 +3099,27 @@ static void insert_ensuring_decreasing_bind_count(struct list *list, struct hlsl
list_add_tail(list, &to_add->extern_entry);
}
+static bool sort_synthetic_combined_samplers_first(struct hlsl_ctx *ctx)
+{
+ struct list separated_resources;
+ struct hlsl_ir_var *var, *next;
+
+ list_init(&separated_resources);
+
+ LIST_FOR_EACH_ENTRY_SAFE(var, next, &ctx->extern_vars, struct hlsl_ir_var, extern_entry)
+ {
+ if (var->is_combined_sampler)
+ {
+ list_remove(&var->extern_entry);
+ insert_ensuring_decreasing_bind_count(&separated_resources, var, HLSL_REGSET_SAMPLERS);
+ }
+ }
+
+ list_move_head(&ctx->extern_vars, &separated_resources);
+
+ return false;
+}
+
static bool sort_synthetic_separated_samplers_first(struct hlsl_ctx *ctx)
{
struct list separated_resources;
@@ -2920,11 +3141,24 @@ static bool sort_synthetic_separated_samplers_first(struct hlsl_ctx *ctx)
return false;
}
-/* Turn CAST to int or uint into FLOOR + REINTERPRET (which is written as a mere MOV). */
+/* Turn CAST to int or uint as follows:
+ *
+ * CAST(x) = x - FRACT(x) + extra
+ *
+ * where
+ *
+ * extra = FRACT(x) > 0 && x < 0
+ *
+ * where the comparisons in the extra term are performed using CMP or SLT
+ * depending on whether this is a pixel or vertex shader, respectively.
+ *
+ * A REINTERPET (which is written as a mere MOV) is also applied to the final
+ * result for type consistency.
+ */
static bool lower_casts_to_int(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block)
{
struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = { 0 };
- struct hlsl_ir_node *arg, *floor, *res;
+ struct hlsl_ir_node *arg, *res;
struct hlsl_ir_expr *expr;
if (instr->type != HLSL_IR_EXPR)
@@ -2939,12 +3173,83 @@ static bool lower_casts_to_int(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr,
if (arg->data_type->e.numeric.type != HLSL_TYPE_FLOAT && arg->data_type->e.numeric.type != HLSL_TYPE_HALF)
return false;
- if (!(floor = hlsl_new_unary_expr(ctx, HLSL_OP1_FLOOR, arg, &instr->loc)))
- return false;
- hlsl_block_add_instr(block, floor);
+ if (ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL)
+ {
+ struct hlsl_ir_node *fract, *neg_fract, *has_fract, *floor, *extra, *zero, *one;
+ struct hlsl_constant_value zero_value, one_value;
+
+ memset(&zero_value, 0, sizeof(zero_value));
+ if (!(zero = hlsl_new_constant(ctx, arg->data_type, &zero_value, &instr->loc)))
+ return false;
+ hlsl_block_add_instr(block, zero);
+
+ one_value.u[0].f = 1.0;
+ one_value.u[1].f = 1.0;
+ one_value.u[2].f = 1.0;
+ one_value.u[3].f = 1.0;
+ if (!(one = hlsl_new_constant(ctx, arg->data_type, &one_value, &instr->loc)))
+ return false;
+ hlsl_block_add_instr(block, one);
+
+ if (!(fract = hlsl_new_unary_expr(ctx, HLSL_OP1_FRACT, arg, &instr->loc)))
+ return false;
+ hlsl_block_add_instr(block, fract);
+
+ if (!(neg_fract = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, fract, &instr->loc)))
+ return false;
+ hlsl_block_add_instr(block, neg_fract);
+
+ if (!(has_fract = hlsl_new_ternary_expr(ctx, HLSL_OP3_CMP, neg_fract, zero, one)))
+ return false;
+ hlsl_block_add_instr(block, has_fract);
+
+ if (!(extra = hlsl_new_ternary_expr(ctx, HLSL_OP3_CMP, arg, zero, has_fract)))
+ return false;
+ hlsl_block_add_instr(block, extra);
+
+ if (!(floor = hlsl_new_binary_expr(ctx, HLSL_OP2_ADD, arg, neg_fract)))
+ return false;
+ hlsl_block_add_instr(block, floor);
+
+ if (!(res = hlsl_new_binary_expr(ctx, HLSL_OP2_ADD, floor, extra)))
+ return false;
+ hlsl_block_add_instr(block, res);
+ }
+ else
+ {
+ struct hlsl_ir_node *neg_arg, *is_neg, *fract, *neg_fract, *has_fract, *floor;
+
+ if (!(neg_arg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, arg, &instr->loc)))
+ return false;
+ hlsl_block_add_instr(block, neg_arg);
+
+ if (!(is_neg = hlsl_new_binary_expr(ctx, HLSL_OP2_SLT, arg, neg_arg)))
+ return false;
+ hlsl_block_add_instr(block, is_neg);
+
+ if (!(fract = hlsl_new_unary_expr(ctx, HLSL_OP1_FRACT, arg, &instr->loc)))
+ return false;
+ hlsl_block_add_instr(block, fract);
+
+ if (!(neg_fract = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, fract, &instr->loc)))
+ return false;
+ hlsl_block_add_instr(block, neg_fract);
+
+ if (!(has_fract = hlsl_new_binary_expr(ctx, HLSL_OP2_SLT, neg_fract, fract)))
+ return false;
+ hlsl_block_add_instr(block, has_fract);
+
+ if (!(floor = hlsl_new_binary_expr(ctx, HLSL_OP2_ADD, arg, neg_fract)))
+ return false;
+ hlsl_block_add_instr(block, floor);
+
+ if (!(res = hlsl_new_ternary_expr(ctx, HLSL_OP3_MAD, is_neg, has_fract, floor)))
+ return false;
+ hlsl_block_add_instr(block, res);
+ }
memset(operands, 0, sizeof(operands));
- operands[0] = floor;
+ operands[0] = res;
if (!(res = hlsl_new_expr(ctx, HLSL_OP1_REINTERPRET, operands, instr->data_type, &instr->loc)))
return false;
hlsl_block_add_instr(block, res);
@@ -3010,7 +3315,7 @@ static bool lower_dot(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct h
arg2 = expr->operands[1].node;
if (expr->op != HLSL_OP2_DOT)
return false;
- if (arg1->data_type->dimx != 2)
+ if (arg1->data_type->e.numeric.dimx != 2)
return false;
if (ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL)
@@ -3034,11 +3339,13 @@ static bool lower_dot(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct h
return false;
hlsl_block_add_instr(block, mul);
- if (!(add_x = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, X, X, X), instr->data_type->dimx, mul, &expr->node.loc)))
+ if (!(add_x = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, X, X, X),
+ instr->data_type->e.numeric.dimx, mul, &expr->node.loc)))
return false;
hlsl_block_add_instr(block, add_x);
- if (!(add_y = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(Y, Y, Y, Y), instr->data_type->dimx, mul, &expr->node.loc)))
+ if (!(add_y = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(Y, Y, Y, Y),
+ instr->data_type->e.numeric.dimx, mul, &expr->node.loc)))
return false;
hlsl_block_add_instr(block, add_y);
@@ -3202,7 +3509,7 @@ static bool lower_trig(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct
type = arg->data_type;
/* Reduce the range of the input angles to [-pi, pi]. */
- for (i = 0; i < type->dimx; ++i)
+ for (i = 0; i < type->e.numeric.dimx; ++i)
{
half_value.u[i].f = 0.5;
two_pi_value.u[i].f = 2.0 * M_PI;
@@ -3230,7 +3537,7 @@ static bool lower_trig(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct
return false;
hlsl_block_add_instr(block, reduced);
- if (type->dimx == 1)
+ if (type->e.numeric.dimx == 1)
{
if (!(sincos = hlsl_new_unary_expr(ctx, op, reduced, &instr->loc)))
return false;
@@ -3243,7 +3550,7 @@ static bool lower_trig(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct
struct hlsl_deref var_deref;
struct hlsl_ir_load *var_load;
- for (i = 0; i < type->dimx; ++i)
+ for (i = 0; i < type->e.numeric.dimx; ++i)
{
uint32_t s = hlsl_swizzle_from_writemask(1 << i);
@@ -3256,7 +3563,7 @@ static bool lower_trig(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct
return false;
hlsl_init_simple_deref_from_var(&var_deref, var);
- for (i = 0; i < type->dimx; ++i)
+ for (i = 0; i < type->e.numeric.dimx; ++i)
{
struct hlsl_block store_block;
@@ -3292,7 +3599,7 @@ static bool lower_logic_not(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, st
return false;
arg = expr->operands[0].node;
- float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, arg->data_type->dimx);
+ float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, arg->data_type->e.numeric.dimx);
/* If this is happens, it means we failed to cast the argument to boolean somewhere. */
VKD3D_ASSERT(arg->data_type->e.numeric.type == HLSL_TYPE_BOOL);
@@ -3354,7 +3661,7 @@ static bool lower_ternary(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, stru
VKD3D_ASSERT(cond->data_type->e.numeric.type == HLSL_TYPE_BOOL);
type = hlsl_get_numeric_type(ctx, instr->data_type->class, HLSL_TYPE_FLOAT,
- instr->data_type->dimx, instr->data_type->dimy);
+ instr->data_type->e.numeric.dimx, instr->data_type->e.numeric.dimy);
if (!(float_cond = hlsl_new_cast(ctx, cond, type, &instr->loc)))
return false;
@@ -3375,6 +3682,51 @@ static bool lower_ternary(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, stru
return true;
}
+static bool lower_resource_load_bias(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context)
+{
+ struct hlsl_ir_node *swizzle, *store;
+ struct hlsl_ir_resource_load *load;
+ struct hlsl_ir_load *tmp_load;
+ struct hlsl_ir_var *tmp_var;
+ struct hlsl_deref deref;
+
+ if (instr->type != HLSL_IR_RESOURCE_LOAD)
+ return false;
+ load = hlsl_ir_resource_load(instr);
+ if (load->load_type != HLSL_RESOURCE_SAMPLE_LOD
+ && load->load_type != HLSL_RESOURCE_SAMPLE_LOD_BIAS)
+ return false;
+
+ if (!load->lod.node)
+ return false;
+
+ if (!(tmp_var = hlsl_new_synthetic_var(ctx, "coords-with-lod",
+ hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, 4), &instr->loc)))
+ return false;
+
+ if (!(swizzle = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, X, X, X), 4, load->lod.node, &load->lod.node->loc)))
+ return false;
+ list_add_before(&instr->entry, &swizzle->entry);
+
+ if (!(store = hlsl_new_simple_store(ctx, tmp_var, swizzle)))
+ return false;
+ list_add_before(&instr->entry, &store->entry);
+
+ hlsl_init_simple_deref_from_var(&deref, tmp_var);
+ if (!(store = hlsl_new_store_index(ctx, &deref, NULL, load->coords.node, 0, &instr->loc)))
+ return false;
+ list_add_before(&instr->entry, &store->entry);
+
+ if (!(tmp_load = hlsl_new_var_load(ctx, tmp_var, &instr->loc)))
+ return false;
+ list_add_before(&instr->entry, &tmp_load->node.entry);
+
+ hlsl_src_remove(&load->coords);
+ hlsl_src_from_node(&load->coords, &tmp_load->node);
+ hlsl_src_remove(&load->lod);
+ return true;
+}
+
static bool lower_comparison_operators(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr,
struct hlsl_block *block)
{
@@ -3393,7 +3745,7 @@ static bool lower_comparison_operators(struct hlsl_ctx *ctx, struct hlsl_ir_node
arg1 = expr->operands[0].node;
arg2 = expr->operands[1].node;
- float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, instr->data_type->dimx);
+ float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, instr->data_type->e.numeric.dimx);
if (!(arg1_cast = hlsl_new_cast(ctx, arg1, float_type, &instr->loc)))
return false;
@@ -3519,7 +3871,7 @@ static bool lower_slt(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct h
arg1 = expr->operands[0].node;
arg2 = expr->operands[1].node;
- float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, instr->data_type->dimx);
+ float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, instr->data_type->e.numeric.dimx);
if (!(arg1_cast = hlsl_new_cast(ctx, arg1, float_type, &instr->loc)))
return false;
@@ -3579,7 +3931,7 @@ static bool lower_cmp(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct h
if (expr->op != HLSL_OP3_CMP)
return false;
- float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, instr->data_type->dimx);
+ float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, instr->data_type->e.numeric.dimx);
for (i = 0; i < 3; ++i)
{
@@ -3649,7 +4001,7 @@ static bool lower_casts_to_bool(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr
return false;
/* Narrowing casts should have already been lowered. */
- VKD3D_ASSERT(type->dimx == arg_type->dimx);
+ VKD3D_ASSERT(type->e.numeric.dimx == arg_type->e.numeric.dimx);
zero = hlsl_new_constant(ctx, arg_type, &zero_value, &instr->loc);
if (!zero)
@@ -3675,7 +4027,8 @@ struct hlsl_ir_node *hlsl_add_conditional(struct hlsl_ctx *ctx, struct hlsl_bloc
if (cond_type->e.numeric.type != HLSL_TYPE_BOOL)
{
- cond_type = hlsl_get_numeric_type(ctx, cond_type->class, HLSL_TYPE_BOOL, cond_type->dimx, cond_type->dimy);
+ cond_type = hlsl_get_numeric_type(ctx, cond_type->class, HLSL_TYPE_BOOL,
+ cond_type->e.numeric.dimx, cond_type->e.numeric.dimy);
if (!(condition = hlsl_new_cast(ctx, condition, cond_type, &condition->loc)))
return NULL;
@@ -3711,13 +4064,13 @@ static bool lower_int_division(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr,
return false;
if (type->e.numeric.type != HLSL_TYPE_INT)
return false;
- utype = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_UINT, type->dimx, type->dimy);
+ utype = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_UINT, type->e.numeric.dimx, type->e.numeric.dimy);
if (!(xor = hlsl_new_binary_expr(ctx, HLSL_OP2_BIT_XOR, arg1, arg2)))
return false;
hlsl_block_add_instr(block, xor);
- for (i = 0; i < type->dimx; ++i)
+ for (i = 0; i < type->e.numeric.dimx; ++i)
high_bit_value.u[i].u = 0x80000000;
if (!(high_bit = hlsl_new_constant(ctx, type, &high_bit_value, &instr->loc)))
return false;
@@ -3777,9 +4130,9 @@ static bool lower_int_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr,
return false;
if (type->e.numeric.type != HLSL_TYPE_INT)
return false;
- utype = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_UINT, type->dimx, type->dimy);
+ utype = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_UINT, type->e.numeric.dimx, type->e.numeric.dimy);
- for (i = 0; i < type->dimx; ++i)
+ for (i = 0; i < type->e.numeric.dimx; ++i)
high_bit_value.u[i].u = 0x80000000;
if (!(high_bit = hlsl_new_constant(ctx, type, &high_bit_value, &instr->loc)))
return false;
@@ -3870,8 +4223,8 @@ static bool lower_int_dot(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, stru
{
arg1 = expr->operands[0].node;
arg2 = expr->operands[1].node;
- VKD3D_ASSERT(arg1->data_type->dimx == arg2->data_type->dimx);
- dimx = arg1->data_type->dimx;
+ VKD3D_ASSERT(arg1->data_type->e.numeric.dimx == arg2->data_type->e.numeric.dimx);
+ dimx = arg1->data_type->e.numeric.dimx;
is_bool = type->e.numeric.type == HLSL_TYPE_BOOL;
if (!(mult = hlsl_new_binary_expr(ctx, is_bool ? HLSL_OP2_LOGIC_AND : HLSL_OP2_MUL, arg1, arg2)))
@@ -3920,7 +4273,7 @@ static bool lower_float_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr
return false;
if (type->e.numeric.type != HLSL_TYPE_FLOAT)
return false;
- btype = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_BOOL, type->dimx, type->dimy);
+ btype = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_BOOL, type->e.numeric.dimx, type->e.numeric.dimy);
if (!(mul1 = hlsl_new_binary_expr(ctx, HLSL_OP2_MUL, arg2, arg1)))
return false;
@@ -3942,7 +4295,7 @@ static bool lower_float_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr
if (!(cond = hlsl_add_conditional(ctx, block, ge, arg2, neg2)))
return false;
- for (i = 0; i < type->dimx; ++i)
+ for (i = 0; i < type->e.numeric.dimx; ++i)
one_value.u[i].f = 1.0f;
if (!(one = hlsl_new_constant(ctx, type, &one_value, &instr->loc)))
return false;
@@ -4000,7 +4353,7 @@ static bool lower_nonfloat_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *inst
if (!arg)
continue;
- float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, arg->data_type->dimx);
+ float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, arg->data_type->e.numeric.dimx);
if (!(arg_cast = hlsl_new_cast(ctx, arg, float_type, &instr->loc)))
return false;
hlsl_block_add_instr(block, arg_cast);
@@ -4008,7 +4361,7 @@ static bool lower_nonfloat_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *inst
operands[i] = arg_cast;
}
- float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, instr->data_type->dimx);
+ float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, instr->data_type->e.numeric.dimx);
if (!(float_expr = hlsl_new_expr(ctx, expr->op, operands, float_type, &instr->loc)))
return false;
hlsl_block_add_instr(block, float_expr);
@@ -4049,7 +4402,8 @@ static bool lower_discard_neg(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr,
operands[0] = jump->condition.node;
operands[1] = zero;
- cmp_type = hlsl_get_numeric_type(ctx, arg_type->class, HLSL_TYPE_BOOL, arg_type->dimx, arg_type->dimy);
+ cmp_type = hlsl_get_numeric_type(ctx, arg_type->class, HLSL_TYPE_BOOL,
+ arg_type->e.numeric.dimx, arg_type->e.numeric.dimy);
if (!(cmp = hlsl_new_expr(ctx, HLSL_OP2_LESS, operands, cmp_type, &instr->loc)))
return false;
hlsl_block_add_instr(&block, cmp);
@@ -4093,7 +4447,7 @@ static bool lower_discard_nz(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, v
return false;
cond = jump->condition.node;
- float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, cond->data_type->dimx);
+ float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, cond->data_type->e.numeric.dimx);
hlsl_block_init(&block);
@@ -4158,13 +4512,11 @@ static bool dce(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context)
case HLSL_IR_LOOP:
case HLSL_IR_RESOURCE_STORE:
case HLSL_IR_SWITCH:
+ case HLSL_IR_INTERLOCKED:
break;
case HLSL_IR_STATEBLOCK_CONSTANT:
/* Stateblock constants should not appear in the shader program. */
vkd3d_unreachable();
- case HLSL_IR_VSIR_INSTRUCTION_REF:
- /* HLSL IR nodes are not translated to hlsl_ir_vsir_instruction_ref at this point. */
- vkd3d_unreachable();
}
return false;
@@ -4304,9 +4656,6 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop
case HLSL_IR_STATEBLOCK_CONSTANT:
/* Stateblock constants should not appear in the shader program. */
vkd3d_unreachable();
- case HLSL_IR_VSIR_INSTRUCTION_REF:
- /* HLSL IR nodes are not translated to hlsl_ir_vsir_instruction_ref at this point. */
- vkd3d_unreachable();
case HLSL_IR_STORE:
{
@@ -4410,6 +4759,19 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop
index->idx.node->last_read = last_read;
break;
}
+ case HLSL_IR_INTERLOCKED:
+ {
+ struct hlsl_ir_interlocked *interlocked = hlsl_ir_interlocked(instr);
+
+ var = interlocked->dst.var;
+ var->last_read = max(var->last_read, last_read);
+ deref_mark_last_read(&interlocked->dst, last_read);
+ interlocked->coords.node->last_read = last_read;
+ interlocked->value.node->last_read = last_read;
+ if (interlocked->cmp_value.node)
+ interlocked->cmp_value.node->last_read = last_read;
+ break;
+ }
case HLSL_IR_JUMP:
{
struct hlsl_ir_jump *jump = hlsl_ir_jump(instr);
@@ -4494,6 +4856,9 @@ struct register_allocator
/* Two allocations with different mode can't share the same register. */
int mode;
+ /* If an allocation is VIP, no new allocations can be made in the
+ * register unless they are VIP as well. */
+ bool vip;
} *allocations;
size_t count, capacity;
@@ -4513,7 +4878,7 @@ struct register_allocator
};
static unsigned int get_available_writemask(const struct register_allocator *allocator,
- unsigned int first_write, unsigned int last_read, uint32_t reg_idx, int mode)
+ unsigned int first_write, unsigned int last_read, uint32_t reg_idx, int mode, bool vip)
{
unsigned int writemask = VKD3DSP_WRITEMASK_ALL;
size_t i;
@@ -4532,6 +4897,8 @@ static unsigned int get_available_writemask(const struct register_allocator *all
writemask &= ~allocation->writemask;
if (allocation->mode != mode)
writemask = 0;
+ if (allocation->vip && !vip)
+ writemask = 0;
}
if (!writemask)
@@ -4542,7 +4909,7 @@ static unsigned int get_available_writemask(const struct register_allocator *all
}
static void record_allocation(struct hlsl_ctx *ctx, struct register_allocator *allocator, uint32_t reg_idx,
- unsigned int writemask, unsigned int first_write, unsigned int last_read, int mode)
+ unsigned int writemask, unsigned int first_write, unsigned int last_read, int mode, bool vip)
{
struct allocation *allocation;
@@ -4556,16 +4923,25 @@ static void record_allocation(struct hlsl_ctx *ctx, struct register_allocator *a
allocation->first_write = first_write;
allocation->last_read = last_read;
allocation->mode = mode;
+ allocation->vip = vip;
allocator->reg_count = max(allocator->reg_count, reg_idx + 1);
}
-/* reg_size is the number of register components to be reserved, while component_count is the number
- * of components for the register's writemask. In SM1, floats and vectors allocate the whole
- * register, even if they don't use it completely. */
+/* Allocates a register (or some components of it) within the register allocator.
+ * 'reg_size' is the number of register components to be reserved.
+ * 'component_count' is the number of components for the hlsl_reg's
+ * writemask, which can be smaller than 'reg_size'. For instance, sm1
+ * floats and vectors allocate the whole register even if they are not
+ * using all components.
+ * 'mode' can be provided to avoid allocating on a register that already has an
+ * allocation with a different mode.
+ * 'force_align' can be used so that the allocation always start in '.x'.
+ * 'vip' can be used so that no new allocations can be made in the given register
+ * unless they are 'vip' as well. */
static struct hlsl_reg allocate_register(struct hlsl_ctx *ctx, struct register_allocator *allocator,
unsigned int first_write, unsigned int last_read, unsigned int reg_size,
- unsigned int component_count, int mode, bool force_align)
+ unsigned int component_count, int mode, bool force_align, bool vip)
{
struct hlsl_reg ret = {.allocation_size = 1, .allocated = true};
unsigned int required_size = force_align ? 4 : reg_size;
@@ -4579,7 +4955,7 @@ static struct hlsl_reg allocate_register(struct hlsl_ctx *ctx, struct register_a
for (uint32_t reg_idx = 0; reg_idx < allocator->reg_count; ++reg_idx)
{
unsigned int available_writemask = get_available_writemask(allocator,
- first_write, last_read, reg_idx, mode);
+ first_write, last_read, reg_idx, mode, vip);
if (vkd3d_popcount(available_writemask) >= pref)
{
@@ -4589,7 +4965,8 @@ static struct hlsl_reg allocate_register(struct hlsl_ctx *ctx, struct register_a
ret.id = reg_idx;
ret.writemask = hlsl_combine_writemasks(writemask,
vkd3d_write_mask_from_component_count(component_count));
- record_allocation(ctx, allocator, reg_idx, writemask, first_write, last_read, mode);
+
+ record_allocation(ctx, allocator, reg_idx, writemask, first_write, last_read, mode, vip);
return ret;
}
}
@@ -4598,13 +4975,14 @@ static struct hlsl_reg allocate_register(struct hlsl_ctx *ctx, struct register_a
ret.id = allocator->reg_count;
ret.writemask = vkd3d_write_mask_from_component_count(component_count);
record_allocation(ctx, allocator, allocator->reg_count,
- vkd3d_write_mask_from_component_count(reg_size), first_write, last_read, mode);
+ vkd3d_write_mask_from_component_count(reg_size), first_write, last_read, mode, vip);
return ret;
}
/* Allocate a register with writemask, while reserving reg_writemask. */
-static struct hlsl_reg allocate_register_with_masks(struct hlsl_ctx *ctx, struct register_allocator *allocator,
- unsigned int first_write, unsigned int last_read, uint32_t reg_writemask, uint32_t writemask, int mode)
+static struct hlsl_reg allocate_register_with_masks(struct hlsl_ctx *ctx,
+ struct register_allocator *allocator, unsigned int first_write, unsigned int last_read,
+ uint32_t reg_writemask, uint32_t writemask, int mode, bool vip)
{
struct hlsl_reg ret = {0};
uint32_t reg_idx;
@@ -4614,11 +4992,11 @@ static struct hlsl_reg allocate_register_with_masks(struct hlsl_ctx *ctx, struct
for (reg_idx = 0;; ++reg_idx)
{
if ((get_available_writemask(allocator, first_write, last_read,
- reg_idx, mode) & reg_writemask) == reg_writemask)
+ reg_idx, mode, vip) & reg_writemask) == reg_writemask)
break;
}
- record_allocation(ctx, allocator, reg_idx, reg_writemask, first_write, last_read, mode);
+ record_allocation(ctx, allocator, reg_idx, reg_writemask, first_write, last_read, mode, vip);
ret.id = reg_idx;
ret.allocation_size = 1;
@@ -4628,7 +5006,7 @@ static struct hlsl_reg allocate_register_with_masks(struct hlsl_ctx *ctx, struct
}
static bool is_range_available(const struct register_allocator *allocator, unsigned int first_write,
- unsigned int last_read, uint32_t reg_idx, unsigned int reg_size, int mode)
+ unsigned int last_read, uint32_t reg_idx, unsigned int reg_size, int mode, bool vip)
{
unsigned int last_reg_mask = (1u << (reg_size % 4)) - 1;
unsigned int writemask;
@@ -4636,18 +5014,18 @@ static bool is_range_available(const struct register_allocator *allocator, unsig
for (i = 0; i < (reg_size / 4); ++i)
{
- writemask = get_available_writemask(allocator, first_write, last_read, reg_idx + i, mode);
+ writemask = get_available_writemask(allocator, first_write, last_read, reg_idx + i, mode, vip);
if (writemask != VKD3DSP_WRITEMASK_ALL)
return false;
}
- writemask = get_available_writemask(allocator, first_write, last_read, reg_idx + (reg_size / 4), mode);
+ writemask = get_available_writemask(allocator, first_write, last_read, reg_idx + (reg_size / 4), mode, vip);
if ((writemask & last_reg_mask) != last_reg_mask)
return false;
return true;
}
static struct hlsl_reg allocate_range(struct hlsl_ctx *ctx, struct register_allocator *allocator,
- unsigned int first_write, unsigned int last_read, unsigned int reg_size, int mode)
+ unsigned int first_write, unsigned int last_read, unsigned int reg_size, int mode, bool vip)
{
struct hlsl_reg ret = {0};
uint32_t reg_idx;
@@ -4655,15 +5033,15 @@ static struct hlsl_reg allocate_range(struct hlsl_ctx *ctx, struct register_allo
for (reg_idx = 0;; ++reg_idx)
{
- if (is_range_available(allocator, first_write, last_read, reg_idx, reg_size, mode))
+ if (is_range_available(allocator, first_write, last_read, reg_idx, reg_size, mode, vip))
break;
}
for (i = 0; i < reg_size / 4; ++i)
- record_allocation(ctx, allocator, reg_idx + i, VKD3DSP_WRITEMASK_ALL, first_write, last_read, mode);
+ record_allocation(ctx, allocator, reg_idx + i, VKD3DSP_WRITEMASK_ALL, first_write, last_read, mode, vip);
if (reg_size % 4)
record_allocation(ctx, allocator, reg_idx + (reg_size / 4),
- (1u << (reg_size % 4)) - 1, first_write, last_read, mode);
+ (1u << (reg_size % 4)) - 1, first_write, last_read, mode, vip);
ret.id = reg_idx;
ret.allocation_size = align(reg_size, 4) / 4;
@@ -4679,9 +5057,10 @@ static struct hlsl_reg allocate_numeric_registers_for_type(struct hlsl_ctx *ctx,
/* FIXME: We could potentially pack structs or arrays more efficiently... */
if (type->class <= HLSL_CLASS_VECTOR)
- return allocate_register(ctx, allocator, first_write, last_read, type->dimx, type->dimx, 0, false);
+ return allocate_register(ctx, allocator, first_write, last_read,
+ type->e.numeric.dimx, type->e.numeric.dimx, 0, false, false);
else
- return allocate_range(ctx, allocator, first_write, last_read, reg_size, 0);
+ return allocate_range(ctx, allocator, first_write, last_read, reg_size, 0, false);
}
static const char *debug_register(char class, struct hlsl_reg reg, const struct hlsl_type *type)
@@ -4804,6 +5183,10 @@ static bool track_components_usage(struct hlsl_ctx *ctx, struct hlsl_ir_node *in
register_deref_usage(ctx, &hlsl_ir_resource_store(instr)->resource);
break;
+ case HLSL_IR_INTERLOCKED:
+ register_deref_usage(ctx, &hlsl_ir_interlocked(instr)->dst);
+ break;
+
default:
break;
}
@@ -4859,8 +5242,8 @@ static void allocate_instr_temp_register(struct hlsl_ctx *ctx,
}
if (reg_writemask)
- instr->reg = allocate_register_with_masks(ctx, allocator,
- instr->index, instr->last_read, reg_writemask, dst_writemask, 0);
+ instr->reg = allocate_register_with_masks(ctx, allocator, instr->index,
+ instr->last_read, reg_writemask, dst_writemask, 0, false);
else
instr->reg = allocate_numeric_registers_for_type(ctx, allocator,
instr->index, instr->last_read, instr->data_type);
@@ -5006,13 +5389,13 @@ static void allocate_const_registers_recurse(struct hlsl_ctx *ctx,
TRACE("Allocated constant @%u to %s.\n", instr->index, debug_register('c', constant->reg, type));
VKD3D_ASSERT(hlsl_is_numeric_type(type));
- VKD3D_ASSERT(type->dimy == 1);
+ VKD3D_ASSERT(type->e.numeric.dimy == 1);
VKD3D_ASSERT(constant->reg.writemask);
for (x = 0, i = 0; x < 4; ++x)
{
const union hlsl_constant_value_component *value;
- float f;
+ float f = 0;
if (!(constant->reg.writemask & (1u << x)))
continue;
@@ -5040,9 +5423,6 @@ static void allocate_const_registers_recurse(struct hlsl_ctx *ctx,
case HLSL_TYPE_DOUBLE:
FIXME("Double constant.\n");
return;
-
- default:
- vkd3d_unreachable();
}
record_constant(ctx, constant->reg.id * 4 + x, f, &constant->node.loc);
@@ -5084,7 +5464,7 @@ static void allocate_const_registers_recurse(struct hlsl_ctx *ctx,
}
}
-static void sort_uniform_by_numeric_bind_count(struct list *sorted, struct hlsl_ir_var *to_sort)
+static void sort_uniform_by_bind_count(struct list *sorted, struct hlsl_ir_var *to_sort, enum hlsl_regset regset)
{
struct hlsl_ir_var *var;
@@ -5092,8 +5472,8 @@ static void sort_uniform_by_numeric_bind_count(struct list *sorted, struct hlsl_
LIST_FOR_EACH_ENTRY(var, sorted, struct hlsl_ir_var, extern_entry)
{
- uint32_t to_sort_size = to_sort->bind_count[HLSL_REGSET_NUMERIC];
- uint32_t var_size = var->bind_count[HLSL_REGSET_NUMERIC];
+ uint32_t to_sort_size = to_sort->bind_count[regset];
+ uint32_t var_size = var->bind_count[regset];
if (to_sort_size > var_size)
{
@@ -5105,7 +5485,7 @@ static void sort_uniform_by_numeric_bind_count(struct list *sorted, struct hlsl_
list_add_tail(sorted, &to_sort->extern_entry);
}
-static void sort_uniforms_by_numeric_bind_count(struct hlsl_ctx *ctx)
+static void sort_uniforms_by_bind_count(struct hlsl_ctx *ctx, enum hlsl_regset regset)
{
struct list sorted = LIST_INIT(sorted);
struct hlsl_ir_var *var, *next;
@@ -5113,7 +5493,7 @@ static void sort_uniforms_by_numeric_bind_count(struct hlsl_ctx *ctx)
LIST_FOR_EACH_ENTRY_SAFE(var, next, &ctx->extern_vars, struct hlsl_ir_var, extern_entry)
{
if (var->is_uniform)
- sort_uniform_by_numeric_bind_count(&sorted, var);
+ sort_uniform_by_bind_count(&sorted, var, regset);
}
list_move_tail(&ctx->extern_vars, &sorted);
}
@@ -5161,7 +5541,7 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi
struct register_allocator allocator = {0};
struct hlsl_ir_var *var;
- sort_uniforms_by_numeric_bind_count(ctx);
+ sort_uniforms_by_bind_count(ctx, HLSL_REGSET_NUMERIC);
LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry)
{
@@ -5181,14 +5561,15 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi
{
if (i < bind_count)
{
- if (get_available_writemask(&allocator_used, 1, UINT_MAX, reg_idx + i, 0) != VKD3DSP_WRITEMASK_ALL)
+ if (get_available_writemask(&allocator_used, 1, UINT_MAX,
+ reg_idx + i, 0, false) != VKD3DSP_WRITEMASK_ALL)
{
hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION,
"Overlapping register() reservations on 'c%u'.", reg_idx + i);
}
- record_allocation(ctx, &allocator_used, reg_idx + i, VKD3DSP_WRITEMASK_ALL, 1, UINT_MAX, 0);
+ record_allocation(ctx, &allocator_used, reg_idx + i, VKD3DSP_WRITEMASK_ALL, 1, UINT_MAX, 0, false);
}
- record_allocation(ctx, &allocator, reg_idx + i, VKD3DSP_WRITEMASK_ALL, 1, UINT_MAX, 0);
+ record_allocation(ctx, &allocator, reg_idx + i, VKD3DSP_WRITEMASK_ALL, 1, UINT_MAX, 0, false);
}
var->regs[HLSL_REGSET_NUMERIC].id = reg_idx;
@@ -5211,7 +5592,7 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi
if (!var->regs[HLSL_REGSET_NUMERIC].allocated)
{
- var->regs[HLSL_REGSET_NUMERIC] = allocate_range(ctx, &allocator, 1, UINT_MAX, alloc_size, 0);
+ var->regs[HLSL_REGSET_NUMERIC] = allocate_range(ctx, &allocator, 1, UINT_MAX, alloc_size, 0, false);
TRACE("Allocated %s to %s.\n", var->name,
debug_register('c', var->regs[HLSL_REGSET_NUMERIC], var->data_type));
}
@@ -5254,7 +5635,8 @@ static uint32_t allocate_temp_registers(struct hlsl_ctx *ctx, struct hlsl_ir_fun
var = entry_func->parameters.vars[i];
if (var->is_output_semantic)
{
- record_allocation(ctx, &allocator, 0, VKD3DSP_WRITEMASK_ALL, var->first_write, var->last_read, 0);
+ record_allocation(ctx, &allocator, 0, VKD3DSP_WRITEMASK_ALL,
+ var->first_write, var->last_read, 0, false);
break;
}
}
@@ -5266,7 +5648,8 @@ static uint32_t allocate_temp_registers(struct hlsl_ctx *ctx, struct hlsl_ir_fun
return allocator.reg_count;
}
-enum vkd3d_shader_interpolation_mode sm4_get_interpolation_mode(struct hlsl_type *type, unsigned int storage_modifiers)
+static enum vkd3d_shader_interpolation_mode sm4_get_interpolation_mode(struct hlsl_type *type,
+ unsigned int storage_modifiers)
{
unsigned int i;
@@ -5311,6 +5694,8 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var
enum vkd3d_shader_register_type type;
struct vkd3d_shader_version version;
+ bool special_interpolation = false;
+ bool vip_allocation = false;
uint32_t reg;
bool builtin;
@@ -5363,6 +5748,14 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var
* domains, it is allocated as if it was 'float[1]'. */
var->force_align = true;
}
+
+ if (semantic == VKD3D_SHADER_SV_RENDER_TARGET_ARRAY_INDEX
+ || semantic == VKD3D_SHADER_SV_VIEWPORT_ARRAY_INDEX
+ || semantic == VKD3D_SHADER_SV_PRIMITIVE_ID)
+ vip_allocation = true;
+
+ if (semantic == VKD3D_SHADER_SV_IS_FRONT_FACE || semantic == VKD3D_SHADER_SV_SAMPLE_INDEX)
+ special_interpolation = true;
}
if (builtin)
@@ -5374,10 +5767,13 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var
{
int mode = (ctx->profile->major_version < 4)
? 0 : sm4_get_interpolation_mode(var->data_type, var->storage_modifiers);
- unsigned int reg_size = optimize ? var->data_type->dimx : 4;
+ unsigned int reg_size = optimize ? var->data_type->e.numeric.dimx : 4;
+
+ if (special_interpolation)
+ mode = VKD3DSIM_NONE;
- var->regs[HLSL_REGSET_NUMERIC] = allocate_register(ctx, allocator, 1,
- UINT_MAX, reg_size, var->data_type->dimx, mode, var->force_align);
+ var->regs[HLSL_REGSET_NUMERIC] = allocate_register(ctx, allocator, 1, UINT_MAX,
+ reg_size, var->data_type->e.numeric.dimx, mode, var->force_align, vip_allocation);
TRACE("Allocated %s to %s (mode %d).\n", var->name, debug_register(output ? 'o' : 'v',
var->regs[HLSL_REGSET_NUMERIC], var->data_type), mode);
@@ -5831,7 +6227,7 @@ bool hlsl_component_index_range_from_deref(struct hlsl_ctx *ctx, const struct hl
switch (type->class)
{
case HLSL_CLASS_VECTOR:
- if (idx >= type->dimx)
+ if (idx >= type->e.numeric.dimx)
return false;
*start += idx;
break;
@@ -5840,9 +6236,9 @@ bool hlsl_component_index_range_from_deref(struct hlsl_ctx *ctx, const struct hl
if (idx >= hlsl_type_major_size(type))
return false;
if (hlsl_type_is_row_major(type))
- *start += idx * type->dimx;
+ *start += idx * type->e.numeric.dimx;
else
- *start += idx * type->dimy;
+ *start += idx * type->e.numeric.dimy;
break;
case HLSL_CLASS_ARRAY:
@@ -6419,6 +6815,7 @@ void hlsl_run_const_passes(struct hlsl_ctx *ctx, struct hlsl_block *body)
{
progress = hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, body, NULL);
progress |= hlsl_transform_ir(ctx, hlsl_fold_constant_identities, body, NULL);
+ progress |= hlsl_transform_ir(ctx, hlsl_normalize_binary_exprs, body, NULL);
progress |= hlsl_transform_ir(ctx, hlsl_fold_constant_swizzles, body, NULL);
progress |= hlsl_copy_propagation_execute(ctx, body);
progress |= hlsl_transform_ir(ctx, fold_swizzle_chains, body, NULL);
@@ -6430,8 +6827,8 @@ void hlsl_run_const_passes(struct hlsl_ctx *ctx, struct hlsl_block *body)
static void generate_vsir_signature_entry(struct hlsl_ctx *ctx, struct vsir_program *program,
struct shader_signature *signature, bool output, bool is_patch_constant_func, struct hlsl_ir_var *var)
{
+ enum vkd3d_shader_component_type component_type = VKD3D_SHADER_COMPONENT_VOID;
enum vkd3d_shader_sysval_semantic sysval = VKD3D_SHADER_SV_NONE;
- enum vkd3d_shader_component_type component_type;
unsigned int register_index, mask, use_mask;
const char *name = var->semantic.name;
enum vkd3d_shader_register_type type;
@@ -6451,7 +6848,7 @@ static void generate_vsir_signature_entry(struct hlsl_ctx *ctx, struct vsir_prog
if (sm4_register_from_semantic_name(&program->shader_version, var->semantic.name, output, &type, &has_idx))
{
register_index = has_idx ? var->semantic.index : ~0u;
- mask = (1u << var->data_type->dimx) - 1;
+ mask = (1u << var->data_type->e.numeric.dimx) - 1;
}
else
{
@@ -6478,12 +6875,11 @@ static void generate_vsir_signature_entry(struct hlsl_ctx *ctx, struct vsir_prog
component_type = VKD3D_SHADER_COMPONENT_UINT;
break;
- default:
+ case HLSL_TYPE_DOUBLE:
if ((string = hlsl_type_to_string(ctx, var->data_type)))
hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE,
"Invalid data type %s for semantic variable %s.", string->buffer, var->name);
hlsl_release_string_buffer(ctx, string);
- component_type = VKD3D_SHADER_COMPONENT_VOID;
break;
}
@@ -6519,19 +6915,19 @@ static void generate_vsir_signature_entry(struct hlsl_ctx *ctx, struct vsir_prog
sysval = VKD3D_SHADER_SV_POSITION;
}
- mask = (1 << var->data_type->dimx) - 1;
+ mask = (1 << var->data_type->e.numeric.dimx) - 1;
if (!ascii_strcasecmp(var->semantic.name, "PSIZE") && output
&& program->shader_version.type == VKD3D_SHADER_TYPE_VERTEX)
{
- if (var->data_type->dimx > 1)
+ if (var->data_type->e.numeric.dimx > 1)
hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC,
"PSIZE output must have only 1 component in this shader model.");
/* For some reason the writemask has all components set. */
mask = VKD3DSP_WRITEMASK_ALL;
}
if (!ascii_strcasecmp(var->semantic.name, "FOG") && output && program->shader_version.major < 3
- && program->shader_version.type == VKD3D_SHADER_TYPE_VERTEX && var->data_type->dimx > 1)
+ && program->shader_version.type == VKD3D_SHADER_TYPE_VERTEX && var->data_type->e.numeric.dimx > 1)
hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC,
"FOG output must have only 1 component in this shader model.");
@@ -6636,7 +7032,6 @@ static uint32_t generate_vsir_get_src_swizzle(uint32_t src_writemask, uint32_t d
swizzle = hlsl_swizzle_from_writemask(src_writemask);
swizzle = hlsl_map_swizzle(swizzle, dst_writemask);
- swizzle = vsir_swizzle_from_hlsl(swizzle);
return swizzle;
}
@@ -6726,7 +7121,8 @@ static void sm1_generate_vsir_sampler_dcls(struct hlsl_ctx *ctx,
break;
case HLSL_SAMPLER_DIM_GENERIC:
- /* These can appear in sm4-style combined sample instructions. */
+ /* These can appear in sm4-style separate sample
+ * instructions that haven't been lowered. */
hlsl_fixme(ctx, &var->loc, "Generic samplers need to be lowered.");
continue;
@@ -6812,7 +7208,7 @@ static void vsir_src_from_hlsl_constant_value(struct vkd3d_shader_src_param *src
}
static void vsir_src_from_hlsl_node(struct vkd3d_shader_src_param *src,
- struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, uint32_t map_writemask)
+ struct hlsl_ctx *ctx, const struct hlsl_ir_node *instr, uint32_t map_writemask)
{
struct hlsl_ir_constant *constant;
@@ -6821,7 +7217,7 @@ static void vsir_src_from_hlsl_node(struct vkd3d_shader_src_param *src,
/* In SM4 constants are inlined */
constant = hlsl_ir_constant(instr);
vsir_src_from_hlsl_constant_value(src, ctx, &constant->value,
- vsir_data_type_from_hlsl_instruction(ctx, instr), instr->data_type->dimx, map_writemask);
+ vsir_data_type_from_hlsl_instruction(ctx, instr), instr->data_type->e.numeric.dimx, map_writemask);
}
else
{
@@ -6832,29 +7228,265 @@ static void vsir_src_from_hlsl_node(struct vkd3d_shader_src_param *src,
}
}
-static void vsir_dst_from_hlsl_node(struct vkd3d_shader_dst_param *dst,
- struct hlsl_ctx *ctx, const struct hlsl_ir_node *instr)
+static bool sm4_generate_vsir_numeric_reg_from_deref(struct hlsl_ctx *ctx, struct vsir_program *program,
+ struct vkd3d_shader_register *reg, uint32_t *writemask, const struct hlsl_deref *deref)
{
- VKD3D_ASSERT(instr->reg.allocated);
- vsir_dst_param_init(dst, VKD3DSPR_TEMP, vsir_data_type_from_hlsl_instruction(ctx, instr), 1);
- dst->reg.idx[0].offset = instr->reg.id;
- dst->reg.dimension = VSIR_DIMENSION_VEC4;
- dst->write_mask = instr->reg.writemask;
-}
+ const struct hlsl_ir_var *var = deref->var;
+ unsigned int offset_const_deref;
-static void sm1_generate_vsir_instr_constant(struct hlsl_ctx *ctx,
- struct vsir_program *program, struct hlsl_ir_constant *constant)
-{
- struct hlsl_ir_node *instr = &constant->node;
- struct vkd3d_shader_dst_param *dst_param;
- struct vkd3d_shader_src_param *src_param;
- struct vkd3d_shader_instruction *ins;
+ reg->type = var->indexable ? VKD3DSPR_IDXTEMP : VKD3DSPR_TEMP;
+ reg->idx[0].offset = var->regs[HLSL_REGSET_NUMERIC].id;
+ reg->dimension = VSIR_DIMENSION_VEC4;
- VKD3D_ASSERT(instr->reg.allocated);
- VKD3D_ASSERT(constant->reg.allocated);
+ VKD3D_ASSERT(var->regs[HLSL_REGSET_NUMERIC].allocated);
- if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_MOV, 1, 1)))
- return;
+ if (!var->indexable)
+ {
+ offset_const_deref = hlsl_offset_from_deref_safe(ctx, deref);
+ reg->idx[0].offset += offset_const_deref / 4;
+ reg->idx_count = 1;
+ }
+ else
+ {
+ offset_const_deref = deref->const_offset;
+ reg->idx[1].offset = offset_const_deref / 4;
+ reg->idx_count = 2;
+
+ if (deref->rel_offset.node)
+ {
+ struct vkd3d_shader_src_param *idx_src;
+
+ if (!(idx_src = vsir_program_get_src_params(program, 1)))
+ {
+ ctx->result = VKD3D_ERROR_OUT_OF_MEMORY;
+ return false;
+ }
+ memset(idx_src, 0, sizeof(*idx_src));
+ reg->idx[1].rel_addr = idx_src;
+
+ vsir_src_from_hlsl_node(idx_src, ctx, deref->rel_offset.node, VKD3DSP_WRITEMASK_ALL);
+ }
+ }
+
+ *writemask = 0xf & (0xf << (offset_const_deref % 4));
+ if (var->regs[HLSL_REGSET_NUMERIC].writemask)
+ *writemask = hlsl_combine_writemasks(var->regs[HLSL_REGSET_NUMERIC].writemask, *writemask);
+ return true;
+}
+
+static bool sm4_generate_vsir_reg_from_deref(struct hlsl_ctx *ctx, struct vsir_program *program,
+ struct vkd3d_shader_register *reg, uint32_t *writemask, const struct hlsl_deref *deref)
+{
+ const struct vkd3d_shader_version *version = &program->shader_version;
+ const struct hlsl_type *data_type = hlsl_deref_get_type(ctx, deref);
+ const struct hlsl_ir_var *var = deref->var;
+
+ if (var->is_uniform)
+ {
+ enum hlsl_regset regset = hlsl_deref_get_regset(ctx, deref);
+
+ if (regset == HLSL_REGSET_TEXTURES)
+ {
+ reg->type = VKD3DSPR_RESOURCE;
+ reg->dimension = VSIR_DIMENSION_VEC4;
+ if (vkd3d_shader_ver_ge(version, 5, 1))
+ {
+ reg->idx[0].offset = var->regs[HLSL_REGSET_TEXTURES].id;
+ reg->idx[1].offset = var->regs[HLSL_REGSET_TEXTURES].index; /* FIXME: array index */
+ reg->idx_count = 2;
+ }
+ else
+ {
+ reg->idx[0].offset = var->regs[HLSL_REGSET_TEXTURES].index;
+ reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref);
+ reg->idx_count = 1;
+ }
+ VKD3D_ASSERT(regset == HLSL_REGSET_TEXTURES);
+ *writemask = VKD3DSP_WRITEMASK_ALL;
+ }
+ else if (regset == HLSL_REGSET_UAVS)
+ {
+ reg->type = VKD3DSPR_UAV;
+ reg->dimension = VSIR_DIMENSION_VEC4;
+ if (vkd3d_shader_ver_ge(version, 5, 1))
+ {
+ reg->idx[0].offset = var->regs[HLSL_REGSET_UAVS].id;
+ reg->idx[1].offset = var->regs[HLSL_REGSET_UAVS].index; /* FIXME: array index */
+ reg->idx_count = 2;
+ }
+ else
+ {
+ reg->idx[0].offset = var->regs[HLSL_REGSET_UAVS].index;
+ reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref);
+ reg->idx_count = 1;
+ }
+ VKD3D_ASSERT(regset == HLSL_REGSET_UAVS);
+ *writemask = VKD3DSP_WRITEMASK_ALL;
+ }
+ else if (regset == HLSL_REGSET_SAMPLERS)
+ {
+ reg->type = VKD3DSPR_SAMPLER;
+ reg->dimension = VSIR_DIMENSION_NONE;
+ if (vkd3d_shader_ver_ge(version, 5, 1))
+ {
+ reg->idx[0].offset = var->regs[HLSL_REGSET_SAMPLERS].id;
+ reg->idx[1].offset = var->regs[HLSL_REGSET_SAMPLERS].index; /* FIXME: array index */
+ reg->idx_count = 2;
+ }
+ else
+ {
+ reg->idx[0].offset = var->regs[HLSL_REGSET_SAMPLERS].index;
+ reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref);
+ reg->idx_count = 1;
+ }
+ VKD3D_ASSERT(regset == HLSL_REGSET_SAMPLERS);
+ *writemask = VKD3DSP_WRITEMASK_ALL;
+ }
+ else
+ {
+ unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref) + var->buffer_offset;
+
+ VKD3D_ASSERT(data_type->class <= HLSL_CLASS_VECTOR);
+ reg->type = VKD3DSPR_CONSTBUFFER;
+ reg->dimension = VSIR_DIMENSION_VEC4;
+ if (vkd3d_shader_ver_ge(version, 5, 1))
+ {
+ reg->idx[0].offset = var->buffer->reg.id;
+ reg->idx[1].offset = var->buffer->reg.index; /* FIXME: array index */
+ reg->idx[2].offset = offset / 4;
+ reg->idx_count = 3;
+ }
+ else
+ {
+ reg->idx[0].offset = var->buffer->reg.index;
+ reg->idx[1].offset = offset / 4;
+ reg->idx_count = 2;
+ }
+ *writemask = ((1u << data_type->e.numeric.dimx) - 1) << (offset & 3);
+ }
+ }
+ else if (var->is_input_semantic)
+ {
+ bool has_idx;
+
+ if (sm4_register_from_semantic_name(version, var->semantic.name, false, &reg->type, &has_idx))
+ {
+ unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref);
+
+ if (has_idx)
+ {
+ reg->idx[0].offset = var->semantic.index + offset / 4;
+ reg->idx_count = 1;
+ }
+
+ if (shader_sm4_is_scalar_register(reg))
+ reg->dimension = VSIR_DIMENSION_SCALAR;
+ else
+ reg->dimension = VSIR_DIMENSION_VEC4;
+ *writemask = ((1u << data_type->e.numeric.dimx) - 1) << (offset % 4);
+ }
+ else
+ {
+ struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref);
+
+ VKD3D_ASSERT(hlsl_reg.allocated);
+
+ if (version->type == VKD3D_SHADER_TYPE_DOMAIN)
+ reg->type = VKD3DSPR_PATCHCONST;
+ else
+ reg->type = VKD3DSPR_INPUT;
+ reg->dimension = VSIR_DIMENSION_VEC4;
+ reg->idx[0].offset = hlsl_reg.id;
+ reg->idx_count = 1;
+ *writemask = hlsl_reg.writemask;
+ }
+ }
+ else if (var->is_output_semantic)
+ {
+ bool has_idx;
+
+ if (sm4_register_from_semantic_name(version, var->semantic.name, true, &reg->type, &has_idx))
+ {
+ unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref);
+
+ if (has_idx)
+ {
+ reg->idx[0].offset = var->semantic.index + offset / 4;
+ reg->idx_count = 1;
+ }
+
+ if (shader_sm4_is_scalar_register(reg))
+ reg->dimension = VSIR_DIMENSION_SCALAR;
+ else
+ reg->dimension = VSIR_DIMENSION_VEC4;
+ *writemask = ((1u << data_type->e.numeric.dimx) - 1) << (offset % 4);
+ }
+ else
+ {
+ struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref);
+
+ VKD3D_ASSERT(hlsl_reg.allocated);
+ reg->type = VKD3DSPR_OUTPUT;
+ reg->dimension = VSIR_DIMENSION_VEC4;
+ reg->idx[0].offset = hlsl_reg.id;
+ reg->idx_count = 1;
+ *writemask = hlsl_reg.writemask;
+ }
+ }
+ else
+ {
+ return sm4_generate_vsir_numeric_reg_from_deref(ctx, program, reg, writemask, deref);
+ }
+ return true;
+}
+
+static bool sm4_generate_vsir_init_src_param_from_deref(struct hlsl_ctx *ctx, struct vsir_program *program,
+ struct vkd3d_shader_src_param *src_param, const struct hlsl_deref *deref,
+ unsigned int dst_writemask, const struct vkd3d_shader_location *loc)
+{
+ uint32_t writemask;
+
+ if (!sm4_generate_vsir_reg_from_deref(ctx, program, &src_param->reg, &writemask, deref))
+ return false;
+ src_param->swizzle = generate_vsir_get_src_swizzle(writemask, dst_writemask);
+ return true;
+}
+
+static bool sm4_generate_vsir_init_dst_param_from_deref(struct hlsl_ctx *ctx, struct vsir_program *program,
+ struct vkd3d_shader_dst_param *dst_param, const struct hlsl_deref *deref,
+ const struct vkd3d_shader_location *loc, unsigned int writemask)
+{
+ uint32_t reg_writemask;
+
+ if (!sm4_generate_vsir_reg_from_deref(ctx, program, &dst_param->reg, &reg_writemask, deref))
+ return false;
+ dst_param->write_mask = hlsl_combine_writemasks(reg_writemask, writemask);
+ return true;
+}
+
+static void vsir_dst_from_hlsl_node(struct vkd3d_shader_dst_param *dst,
+ struct hlsl_ctx *ctx, const struct hlsl_ir_node *instr)
+{
+ VKD3D_ASSERT(instr->reg.allocated);
+ vsir_dst_param_init(dst, VKD3DSPR_TEMP, vsir_data_type_from_hlsl_instruction(ctx, instr), 1);
+ dst->reg.idx[0].offset = instr->reg.id;
+ dst->reg.dimension = VSIR_DIMENSION_VEC4;
+ dst->write_mask = instr->reg.writemask;
+}
+
+static void sm1_generate_vsir_instr_constant(struct hlsl_ctx *ctx,
+ struct vsir_program *program, struct hlsl_ir_constant *constant)
+{
+ struct hlsl_ir_node *instr = &constant->node;
+ struct vkd3d_shader_dst_param *dst_param;
+ struct vkd3d_shader_src_param *src_param;
+ struct vkd3d_shader_instruction *ins;
+
+ VKD3D_ASSERT(instr->reg.allocated);
+ VKD3D_ASSERT(constant->reg.allocated);
+
+ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_MOV, 1, 1)))
+ return;
src_param = &ins->src[0];
vsir_register_init(&src_param->reg, VKD3DSPR_CONST, VKD3D_DATA_FLOAT, 1);
@@ -7014,7 +7646,7 @@ static bool sm1_generate_vsir_instr_expr_cast(struct hlsl_ctx *ctx,
dst_type = instr->data_type;
/* Narrowing casts were already lowered. */
- VKD3D_ASSERT(src_type->dimx == dst_type->dimx);
+ VKD3D_ASSERT(src_type->e.numeric.dimx == dst_type->e.numeric.dimx);
switch (dst_type->e.numeric.type)
{
@@ -7040,9 +7672,6 @@ static bool sm1_generate_vsir_instr_expr_cast(struct hlsl_ctx *ctx,
hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE,
"The 'double' type is not supported for the %s profile.", ctx->profile->name);
break;
-
- default:
- vkd3d_unreachable();
}
break;
@@ -7059,19 +7688,13 @@ static bool sm1_generate_vsir_instr_expr_cast(struct hlsl_ctx *ctx,
case HLSL_TYPE_INT:
case HLSL_TYPE_UINT:
+ case HLSL_TYPE_BOOL:
generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, 0, true);
return true;
- case HLSL_TYPE_BOOL:
- hlsl_fixme(ctx, &instr->loc, "SM1 cast from bool to integer.");
- break;
-
case HLSL_TYPE_DOUBLE:
hlsl_fixme(ctx, &instr->loc, "SM1 cast from double to integer.");
break;
-
- default:
- vkd3d_unreachable();
}
break;
@@ -7096,7 +7719,6 @@ static bool sm1_generate_vsir_instr_expr_cast(struct hlsl_ctx *ctx,
case HLSL_TYPE_BOOL:
/* Casts to bool should have already been lowered. */
- default:
hlsl_fixme(ctx, &expr->node.loc, "SM1 cast from %s to %s.",
debug_hlsl_type(ctx, src_type), debug_hlsl_type(ctx, dst_type));
break;
@@ -7178,7 +7800,7 @@ static bool sm1_generate_vsir_instr_expr(struct hlsl_ctx *ctx, struct vsir_progr
break;
case HLSL_OP2_DOT:
- switch (expr->operands[0].node->data_type->dimx)
+ switch (expr->operands[0].node->data_type->e.numeric.dimx)
{
case 3:
generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DP3, 0, 0, false);
@@ -7276,7 +7898,7 @@ static void sm1_generate_vsir_init_dst_param_from_deref(struct hlsl_ctx *ctx,
register_index = reg.id;
}
else
- writemask = (1u << deref->var->data_type->dimx) - 1;
+ writemask = (1u << deref->var->data_type->e.numeric.dimx) - 1;
if (version.type == VKD3D_SHADER_TYPE_PIXEL && (!ascii_strcasecmp(semantic_name, "PSIZE")
|| (!ascii_strcasecmp(semantic_name, "FOG") && version.major < 3)))
@@ -7334,7 +7956,7 @@ static void sm1_generate_vsir_init_src_param_from_deref(struct hlsl_ctx *ctx,
if (sm1_register_from_semantic_name(&version, deref->var->semantic.name,
deref->var->semantic.index, false, &type, &register_index))
{
- writemask = (1 << deref->var->data_type->dimx) - 1;
+ writemask = (1 << deref->var->data_type->e.numeric.dimx) - 1;
}
else
{
@@ -7472,9 +8094,8 @@ static void generate_vsir_instr_swizzle(struct hlsl_ctx *ctx,
dst_param->write_mask = instr->reg.writemask;
swizzle = hlsl_swizzle_from_writemask(val->reg.writemask);
- swizzle = hlsl_combine_swizzles(swizzle, swizzle_instr->swizzle, instr->data_type->dimx);
+ swizzle = hlsl_combine_swizzles(swizzle, swizzle_instr->u.vector, instr->data_type->e.numeric.dimx);
swizzle = hlsl_map_swizzle(swizzle, ins->dst[0].write_mask);
- swizzle = vsir_swizzle_from_hlsl(swizzle);
src_param = &ins->src[0];
VKD3D_ASSERT(val->type != HLSL_IR_CONSTANT);
@@ -7539,7 +8160,7 @@ static void sm1_generate_vsir_instr_if(struct hlsl_ctx *ctx, struct vsir_program
hlsl_fixme(ctx, &instr->loc, "Flatten \"if\" conditionals branches.");
return;
}
- VKD3D_ASSERT(condition->data_type->dimx == 1 && condition->data_type->dimy == 1);
+ VKD3D_ASSERT(condition->data_type->e.numeric.dimx == 1 && condition->data_type->e.numeric.dimy == 1);
if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_IFC, 0, 2)))
return;
@@ -7624,31 +8245,20 @@ static void sm1_generate_vsir_block(struct hlsl_ctx *ctx, struct hlsl_block *blo
}
static void sm1_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func,
- uint64_t config_flags, struct vsir_program *program, struct vkd3d_shader_code *ctab)
+ uint64_t config_flags, struct vsir_program *program)
{
struct vkd3d_shader_version version = {0};
- struct vkd3d_bytecode_buffer buffer = {0};
struct hlsl_block block;
version.major = ctx->profile->major_version;
version.minor = ctx->profile->minor_version;
version.type = ctx->profile->type;
- if (!vsir_program_init(program, NULL, &version, 0, VSIR_CF_STRUCTURED, VSIR_NOT_NORMALISED))
+ if (!vsir_program_init(program, NULL, &version, 0, VSIR_CF_STRUCTURED, VSIR_NORMALISED_SM4))
{
ctx->result = VKD3D_ERROR_OUT_OF_MEMORY;
return;
}
- write_sm1_uniforms(ctx, &buffer);
- if (buffer.status)
- {
- vkd3d_free(buffer.data);
- ctx->result = buffer.status;
- return;
- }
- ctab->code = buffer.data;
- ctab->size = buffer.size;
-
generate_vsir_signature(ctx, program, entry_func);
hlsl_block_init(&block);
@@ -7659,80 +8269,448 @@ static void sm1_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl
sm1_generate_vsir_block(ctx, &entry_func->body, program);
}
-static void add_last_vsir_instr_to_block(struct hlsl_ctx *ctx, struct vsir_program *program, struct hlsl_block *block)
+D3DXPARAMETER_CLASS hlsl_sm1_class(const struct hlsl_type *type)
{
- struct vkd3d_shader_location *loc;
- struct hlsl_ir_node *vsir_instr;
-
- loc = &program->instructions.elements[program->instructions.count - 1].location;
-
- if (!(vsir_instr = hlsl_new_vsir_instruction_ref(ctx, program->instructions.count - 1, NULL, NULL, loc)))
+ switch (type->class)
{
- ctx->result = VKD3D_ERROR_OUT_OF_MEMORY;
- return;
+ case HLSL_CLASS_ARRAY:
+ return hlsl_sm1_class(type->e.array.type);
+ case HLSL_CLASS_MATRIX:
+ VKD3D_ASSERT(type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK);
+ if (type->modifiers & HLSL_MODIFIER_COLUMN_MAJOR)
+ return D3DXPC_MATRIX_COLUMNS;
+ else
+ return D3DXPC_MATRIX_ROWS;
+ case HLSL_CLASS_SCALAR:
+ return D3DXPC_SCALAR;
+ case HLSL_CLASS_STRUCT:
+ return D3DXPC_STRUCT;
+ case HLSL_CLASS_VECTOR:
+ return D3DXPC_VECTOR;
+ case HLSL_CLASS_PIXEL_SHADER:
+ case HLSL_CLASS_SAMPLER:
+ case HLSL_CLASS_STRING:
+ case HLSL_CLASS_TEXTURE:
+ case HLSL_CLASS_VERTEX_SHADER:
+ return D3DXPC_OBJECT;
+ case HLSL_CLASS_DEPTH_STENCIL_STATE:
+ case HLSL_CLASS_DEPTH_STENCIL_VIEW:
+ case HLSL_CLASS_EFFECT_GROUP:
+ case HLSL_CLASS_ERROR:
+ case HLSL_CLASS_PASS:
+ case HLSL_CLASS_RASTERIZER_STATE:
+ case HLSL_CLASS_RENDER_TARGET_VIEW:
+ case HLSL_CLASS_TECHNIQUE:
+ case HLSL_CLASS_UAV:
+ case HLSL_CLASS_VOID:
+ case HLSL_CLASS_CONSTANT_BUFFER:
+ case HLSL_CLASS_COMPUTE_SHADER:
+ case HLSL_CLASS_DOMAIN_SHADER:
+ case HLSL_CLASS_HULL_SHADER:
+ case HLSL_CLASS_GEOMETRY_SHADER:
+ case HLSL_CLASS_BLEND_STATE:
+ case HLSL_CLASS_STREAM_OUTPUT:
+ case HLSL_CLASS_NULL:
+ break;
}
- hlsl_block_add_instr(block, vsir_instr);
+
+ vkd3d_unreachable();
}
-static void replace_instr_with_last_vsir_instr(struct hlsl_ctx *ctx,
- struct vsir_program *program, struct hlsl_ir_node *instr)
+D3DXPARAMETER_TYPE hlsl_sm1_base_type(const struct hlsl_type *type, bool is_combined_sampler)
{
- struct vkd3d_shader_location *loc;
- struct hlsl_ir_node *vsir_instr;
+ enum hlsl_type_class class = type->class;
- loc = &program->instructions.elements[program->instructions.count - 1].location;
+ if (is_combined_sampler)
+ class = HLSL_CLASS_TEXTURE;
- if (!(vsir_instr = hlsl_new_vsir_instruction_ref(ctx,
- program->instructions.count - 1, instr->data_type, &instr->reg, loc)))
+ switch (class)
{
- ctx->result = VKD3D_ERROR_OUT_OF_MEMORY;
- return;
- }
+ case HLSL_CLASS_SCALAR:
+ case HLSL_CLASS_VECTOR:
+ case HLSL_CLASS_MATRIX:
+ switch (type->e.numeric.type)
+ {
+ case HLSL_TYPE_BOOL:
+ return D3DXPT_BOOL;
+ /* Actually double behaves differently depending on DLL version:
+ * For <= 36, it maps to D3DXPT_FLOAT.
+ * For 37-40, it maps to zero (D3DXPT_VOID).
+ * For >= 41, it maps to 39, which is D3D_SVT_DOUBLE (note D3D_SVT_*
+ * values are mostly compatible with D3DXPT_*).
+ * However, the latter two cases look like bugs, and a reasonable
+ * application certainly wouldn't know what to do with them.
+ * For fx_2_0 it's always D3DXPT_FLOAT regardless of DLL version. */
+ case HLSL_TYPE_DOUBLE:
+ case HLSL_TYPE_FLOAT:
+ case HLSL_TYPE_HALF:
+ return D3DXPT_FLOAT;
+ case HLSL_TYPE_INT:
+ case HLSL_TYPE_UINT:
+ return D3DXPT_INT;
+ }
+ break;
- list_add_before(&instr->entry, &vsir_instr->entry);
- hlsl_replace_node(instr, vsir_instr);
-}
+ case HLSL_CLASS_SAMPLER:
+ switch (type->sampler_dim)
+ {
+ case HLSL_SAMPLER_DIM_1D:
+ return D3DXPT_SAMPLER1D;
+ case HLSL_SAMPLER_DIM_2D:
+ return D3DXPT_SAMPLER2D;
+ case HLSL_SAMPLER_DIM_3D:
+ return D3DXPT_SAMPLER3D;
+ case HLSL_SAMPLER_DIM_CUBE:
+ return D3DXPT_SAMPLERCUBE;
+ case HLSL_SAMPLER_DIM_GENERIC:
+ return D3DXPT_SAMPLER;
+ default:
+ ERR("Invalid dimension %#x.\n", type->sampler_dim);
+ vkd3d_unreachable();
+ }
+ break;
-static void sm4_generate_vsir_instr_dcl_semantic(struct hlsl_ctx *ctx, struct vsir_program *program,
- const struct hlsl_ir_var *var, bool is_patch_constant_func, struct hlsl_block *block,
- const struct vkd3d_shader_location *loc)
-{
- const struct vkd3d_shader_version *version = &program->shader_version;
- const bool output = var->is_output_semantic;
- enum vkd3d_shader_sysval_semantic semantic;
- struct vkd3d_shader_dst_param *dst_param;
- struct vkd3d_shader_instruction *ins;
- enum vkd3d_shader_register_type type;
- enum vkd3d_shader_opcode opcode;
- unsigned int idx = 0;
- uint32_t write_mask;
- bool has_idx;
+ case HLSL_CLASS_TEXTURE:
+ switch (type->sampler_dim)
+ {
+ case HLSL_SAMPLER_DIM_1D:
+ return D3DXPT_TEXTURE1D;
+ case HLSL_SAMPLER_DIM_2D:
+ return D3DXPT_TEXTURE2D;
+ case HLSL_SAMPLER_DIM_3D:
+ return D3DXPT_TEXTURE3D;
+ case HLSL_SAMPLER_DIM_CUBE:
+ return D3DXPT_TEXTURECUBE;
+ case HLSL_SAMPLER_DIM_GENERIC:
+ return D3DXPT_TEXTURE;
+ default:
+ ERR("Invalid dimension %#x.\n", type->sampler_dim);
+ vkd3d_unreachable();
+ }
+ break;
- sm4_sysval_semantic_from_semantic_name(&semantic, version, ctx->semantic_compat_mapping,
- ctx->domain, var->semantic.name, var->semantic.index, output, is_patch_constant_func);
- if (semantic == ~0u)
- semantic = VKD3D_SHADER_SV_NONE;
+ case HLSL_CLASS_ARRAY:
+ return hlsl_sm1_base_type(type->e.array.type, is_combined_sampler);
- if (var->is_input_semantic)
- {
- switch (semantic)
- {
- case VKD3D_SHADER_SV_NONE:
- opcode = (version->type == VKD3D_SHADER_TYPE_PIXEL)
- ? VKD3DSIH_DCL_INPUT_PS : VKD3DSIH_DCL_INPUT;
- break;
+ case HLSL_CLASS_STRUCT:
+ return D3DXPT_VOID;
- case VKD3D_SHADER_SV_INSTANCE_ID:
- case VKD3D_SHADER_SV_IS_FRONT_FACE:
- case VKD3D_SHADER_SV_PRIMITIVE_ID:
- case VKD3D_SHADER_SV_SAMPLE_INDEX:
- case VKD3D_SHADER_SV_VERTEX_ID:
- opcode = (version->type == VKD3D_SHADER_TYPE_PIXEL)
- ? VKD3DSIH_DCL_INPUT_PS_SGV : VKD3DSIH_DCL_INPUT_SGV;
- break;
+ case HLSL_CLASS_STRING:
+ return D3DXPT_STRING;
- default:
- opcode = (version->type == VKD3D_SHADER_TYPE_PIXEL)
+ case HLSL_CLASS_PIXEL_SHADER:
+ return D3DXPT_PIXELSHADER;
+
+ case HLSL_CLASS_VERTEX_SHADER:
+ return D3DXPT_VERTEXSHADER;
+
+ case HLSL_CLASS_DEPTH_STENCIL_STATE:
+ case HLSL_CLASS_DEPTH_STENCIL_VIEW:
+ case HLSL_CLASS_EFFECT_GROUP:
+ case HLSL_CLASS_ERROR:
+ case HLSL_CLASS_PASS:
+ case HLSL_CLASS_RASTERIZER_STATE:
+ case HLSL_CLASS_RENDER_TARGET_VIEW:
+ case HLSL_CLASS_TECHNIQUE:
+ case HLSL_CLASS_UAV:
+ case HLSL_CLASS_VOID:
+ case HLSL_CLASS_CONSTANT_BUFFER:
+ case HLSL_CLASS_COMPUTE_SHADER:
+ case HLSL_CLASS_DOMAIN_SHADER:
+ case HLSL_CLASS_HULL_SHADER:
+ case HLSL_CLASS_GEOMETRY_SHADER:
+ case HLSL_CLASS_BLEND_STATE:
+ case HLSL_CLASS_STREAM_OUTPUT:
+ case HLSL_CLASS_NULL:
+ break;
+ }
+
+ vkd3d_unreachable();
+}
+
+static void write_sm1_type(struct vkd3d_bytecode_buffer *buffer,
+ struct hlsl_type *type, bool is_combined_sampler, unsigned int ctab_start)
+{
+ const struct hlsl_type *array_type = hlsl_get_multiarray_element_type(type);
+ unsigned int array_size = hlsl_get_multiarray_size(type);
+ struct hlsl_struct_field *field;
+ size_t i;
+
+ if (type->bytecode_offset)
+ return;
+
+ if (array_type->class == HLSL_CLASS_STRUCT)
+ {
+ unsigned int field_count = array_type->e.record.field_count;
+ size_t fields_offset;
+
+ for (i = 0; i < field_count; ++i)
+ {
+ field = &array_type->e.record.fields[i];
+ field->name_bytecode_offset = put_string(buffer, field->name);
+ write_sm1_type(buffer, field->type, false, ctab_start);
+ }
+
+ fields_offset = bytecode_align(buffer) - ctab_start;
+
+ for (i = 0; i < field_count; ++i)
+ {
+ field = &array_type->e.record.fields[i];
+ put_u32(buffer, field->name_bytecode_offset - ctab_start);
+ put_u32(buffer, field->type->bytecode_offset - ctab_start);
+ }
+
+ type->bytecode_offset = put_u32(buffer, vkd3d_make_u32(D3DXPC_STRUCT, D3DXPT_VOID));
+ put_u32(buffer, vkd3d_make_u32(1, hlsl_type_component_count(array_type)));
+ put_u32(buffer, vkd3d_make_u32(array_size, field_count));
+ put_u32(buffer, fields_offset);
+ }
+ else
+ {
+ type->bytecode_offset = put_u32(buffer,
+ vkd3d_make_u32(hlsl_sm1_class(type), hlsl_sm1_base_type(array_type, is_combined_sampler)));
+ if (hlsl_is_numeric_type(array_type))
+ put_u32(buffer, vkd3d_make_u32(array_type->e.numeric.dimy, array_type->e.numeric.dimx));
+ else
+ put_u32(buffer, vkd3d_make_u32(1, 1));
+ put_u32(buffer, vkd3d_make_u32(array_size, 0));
+ put_u32(buffer, 1);
+ }
+}
+
+static void sm1_sort_extern(struct list *sorted, struct hlsl_ir_var *to_sort)
+{
+ struct hlsl_ir_var *var;
+
+ list_remove(&to_sort->extern_entry);
+
+ LIST_FOR_EACH_ENTRY(var, sorted, struct hlsl_ir_var, extern_entry)
+ {
+ if (strcmp(to_sort->name, var->name) < 0)
+ {
+ list_add_before(&var->extern_entry, &to_sort->extern_entry);
+ return;
+ }
+ }
+
+ list_add_tail(sorted, &to_sort->extern_entry);
+}
+
+static void sm1_sort_externs(struct hlsl_ctx *ctx)
+{
+ struct list sorted = LIST_INIT(sorted);
+ struct hlsl_ir_var *var, *next;
+
+ LIST_FOR_EACH_ENTRY_SAFE(var, next, &ctx->extern_vars, struct hlsl_ir_var, extern_entry)
+ {
+ if (var->is_uniform)
+ sm1_sort_extern(&sorted, var);
+ }
+ list_move_tail(&ctx->extern_vars, &sorted);
+}
+
+static void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer)
+{
+ size_t ctab_start, vars_offset, vars_start, creator_offset, offset;
+ unsigned int uniform_count = 0, r;
+ struct hlsl_ir_var *var;
+
+ LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry)
+ {
+ for (r = 0; r <= HLSL_REGSET_LAST; ++r)
+ {
+ if (var->semantic.name || !var->regs[r].allocated || !var->last_read)
+ continue;
+
+ ++uniform_count;
+
+ if (var->is_param && var->is_uniform)
+ {
+ char *new_name;
+
+ if (!(new_name = hlsl_sprintf_alloc(ctx, "$%s", var->name)))
+ return;
+ vkd3d_free((char *)var->name);
+ var->name = new_name;
+ }
+ }
+ }
+
+ sm1_sort_externs(ctx);
+
+ ctab_start = put_u32(buffer, 7 * sizeof(uint32_t)); /* CTAB header size. */
+ creator_offset = put_u32(buffer, 0);
+ if (ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX)
+ put_u32(buffer, D3DVS_VERSION(ctx->profile->major_version, ctx->profile->minor_version));
+ else
+ put_u32(buffer, D3DPS_VERSION(ctx->profile->major_version, ctx->profile->minor_version));
+ put_u32(buffer, uniform_count);
+ vars_offset = put_u32(buffer, 0);
+ put_u32(buffer, 0); /* FIXME: flags */
+ put_u32(buffer, 0); /* FIXME: target string */
+
+ vars_start = bytecode_align(buffer);
+ set_u32(buffer, vars_offset, vars_start - ctab_start);
+
+ LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry)
+ {
+ for (r = 0; r <= HLSL_REGSET_LAST; ++r)
+ {
+ if (var->semantic.name || !var->regs[r].allocated || !var->last_read)
+ continue;
+
+ put_u32(buffer, 0); /* name */
+ if (r == HLSL_REGSET_NUMERIC)
+ {
+ put_u32(buffer, vkd3d_make_u32(D3DXRS_FLOAT4, var->regs[r].id));
+ put_u32(buffer, var->bind_count[r]);
+ }
+ else
+ {
+ put_u32(buffer, vkd3d_make_u32(D3DXRS_SAMPLER, var->regs[r].index));
+ put_u32(buffer, var->bind_count[r]);
+ }
+ put_u32(buffer, 0); /* type */
+ put_u32(buffer, 0); /* default value */
+ }
+ }
+
+ uniform_count = 0;
+
+ LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry)
+ {
+ for (r = 0; r <= HLSL_REGSET_LAST; ++r)
+ {
+ size_t var_offset, name_offset;
+
+ if (var->semantic.name || !var->regs[r].allocated || !var->last_read)
+ continue;
+
+ var_offset = vars_start + (uniform_count * 5 * sizeof(uint32_t));
+
+ name_offset = put_string(buffer, var->name);
+ set_u32(buffer, var_offset, name_offset - ctab_start);
+
+ write_sm1_type(buffer, var->data_type, var->is_combined_sampler, ctab_start);
+ set_u32(buffer, var_offset + 3 * sizeof(uint32_t), var->data_type->bytecode_offset - ctab_start);
+
+ if (var->default_values)
+ {
+ unsigned int reg_size = var->data_type->reg_size[HLSL_REGSET_NUMERIC];
+ unsigned int comp_count = hlsl_type_component_count(var->data_type);
+ unsigned int default_value_offset;
+ unsigned int k;
+
+ default_value_offset = bytecode_reserve_bytes(buffer, reg_size * sizeof(uint32_t));
+ set_u32(buffer, var_offset + 4 * sizeof(uint32_t), default_value_offset - ctab_start);
+
+ for (k = 0; k < comp_count; ++k)
+ {
+ struct hlsl_type *comp_type = hlsl_type_get_component_type(ctx, var->data_type, k);
+ unsigned int comp_offset;
+ enum hlsl_regset regset;
+
+ comp_offset = hlsl_type_get_component_offset(ctx, var->data_type, k, &regset);
+ if (regset == HLSL_REGSET_NUMERIC)
+ {
+ union
+ {
+ uint32_t u;
+ float f;
+ } uni = {0};
+
+ switch (comp_type->e.numeric.type)
+ {
+ case HLSL_TYPE_DOUBLE:
+ if (ctx->double_as_float_alias)
+ uni.u = var->default_values[k].number.u;
+ else
+ uni.u = 0;
+ break;
+
+ case HLSL_TYPE_INT:
+ uni.f = var->default_values[k].number.i;
+ break;
+
+ case HLSL_TYPE_UINT:
+ case HLSL_TYPE_BOOL:
+ uni.f = var->default_values[k].number.u;
+ break;
+
+ case HLSL_TYPE_HALF:
+ case HLSL_TYPE_FLOAT:
+ uni.u = var->default_values[k].number.u;
+ break;
+ }
+
+ set_u32(buffer, default_value_offset + comp_offset * sizeof(uint32_t), uni.u);
+ }
+ }
+ }
+
+ ++uniform_count;
+ }
+ }
+
+ offset = put_string(buffer, vkd3d_shader_get_version(NULL, NULL));
+ set_u32(buffer, creator_offset, offset - ctab_start);
+}
+
+static void sm1_generate_ctab(struct hlsl_ctx *ctx, struct vkd3d_shader_code *ctab)
+{
+ struct vkd3d_bytecode_buffer buffer = {0};
+
+ write_sm1_uniforms(ctx, &buffer);
+ if (buffer.status)
+ {
+ vkd3d_free(buffer.data);
+ ctx->result = buffer.status;
+ return;
+ }
+ ctab->code = buffer.data;
+ ctab->size = buffer.size;
+}
+
+static void sm4_generate_vsir_instr_dcl_semantic(struct hlsl_ctx *ctx, struct vsir_program *program,
+ const struct hlsl_ir_var *var, bool is_patch_constant_func, struct hlsl_block *block,
+ const struct vkd3d_shader_location *loc)
+{
+ const struct vkd3d_shader_version *version = &program->shader_version;
+ const bool output = var->is_output_semantic;
+ enum vkd3d_shader_sysval_semantic semantic;
+ struct vkd3d_shader_dst_param *dst_param;
+ struct vkd3d_shader_instruction *ins;
+ enum vkd3d_shader_register_type type;
+ enum vkd3d_shader_opcode opcode;
+ unsigned int idx = 0;
+ uint32_t write_mask;
+ bool has_idx;
+
+ sm4_sysval_semantic_from_semantic_name(&semantic, version, ctx->semantic_compat_mapping,
+ ctx->domain, var->semantic.name, var->semantic.index, output, is_patch_constant_func);
+ if (semantic == ~0u)
+ semantic = VKD3D_SHADER_SV_NONE;
+
+ if (var->is_input_semantic)
+ {
+ switch (semantic)
+ {
+ case VKD3D_SHADER_SV_NONE:
+ opcode = (version->type == VKD3D_SHADER_TYPE_PIXEL)
+ ? VKD3DSIH_DCL_INPUT_PS : VKD3DSIH_DCL_INPUT;
+ break;
+
+ case VKD3D_SHADER_SV_INSTANCE_ID:
+ case VKD3D_SHADER_SV_IS_FRONT_FACE:
+ case VKD3D_SHADER_SV_PRIMITIVE_ID:
+ case VKD3D_SHADER_SV_SAMPLE_INDEX:
+ case VKD3D_SHADER_SV_VERTEX_ID:
+ opcode = (version->type == VKD3D_SHADER_TYPE_PIXEL)
+ ? VKD3DSIH_DCL_INPUT_PS_SGV : VKD3DSIH_DCL_INPUT_SGV;
+ break;
+
+ default:
+ opcode = (version->type == VKD3D_SHADER_TYPE_PIXEL)
? VKD3DSIH_DCL_INPUT_PS_SIV : VKD3DSIH_DCL_INPUT_SIV;
break;
}
@@ -7749,7 +8727,7 @@ static void sm4_generate_vsir_instr_dcl_semantic(struct hlsl_ctx *ctx, struct vs
{
if (has_idx)
idx = var->semantic.index;
- write_mask = (1u << var->data_type->dimx) - 1;
+ write_mask = (1u << var->data_type->e.numeric.dimx) - 1;
}
else
{
@@ -7806,8 +8784,6 @@ static void sm4_generate_vsir_instr_dcl_semantic(struct hlsl_ctx *ctx, struct vs
if (var->is_input_semantic && version->type == VKD3D_SHADER_TYPE_PIXEL)
ins->flags = sm4_get_interpolation_mode(var->data_type, var->storage_modifiers);
-
- add_last_vsir_instr_to_block(ctx, program, block);
}
static void sm4_generate_vsir_instr_dcl_temps(struct hlsl_ctx *ctx, struct vsir_program *program,
@@ -7819,8 +8795,6 @@ static void sm4_generate_vsir_instr_dcl_temps(struct hlsl_ctx *ctx, struct vsir_
return;
ins->declaration.count = temp_count;
-
- add_last_vsir_instr_to_block(ctx, program, block);
}
static void sm4_generate_vsir_instr_dcl_indexable_temp(struct hlsl_ctx *ctx,
@@ -7838,8 +8812,6 @@ static void sm4_generate_vsir_instr_dcl_indexable_temp(struct hlsl_ctx *ctx,
ins->declaration.indexable_temp.data_type = VKD3D_DATA_FLOAT;
ins->declaration.indexable_temp.component_count = comp_count;
ins->declaration.indexable_temp.has_function_scope = false;
-
- add_last_vsir_instr_to_block(ctx, program, block);
}
static bool type_is_float(const struct hlsl_type *type)
@@ -7891,7 +8863,7 @@ static bool sm4_generate_vsir_instr_expr_cast(struct hlsl_ctx *ctx,
} one = { .f = 1.0 };
/* Narrowing casts were already lowered. */
- VKD3D_ASSERT(src_type->dimx == dst_type->dimx);
+ VKD3D_ASSERT(src_type->e.numeric.dimx == dst_type->e.numeric.dimx);
switch (dst_type->e.numeric.type)
{
@@ -7919,9 +8891,6 @@ static bool sm4_generate_vsir_instr_expr_cast(struct hlsl_ctx *ctx,
case HLSL_TYPE_DOUBLE:
hlsl_fixme(ctx, &expr->node.loc, "SM4 cast from double to float.");
return false;
-
- default:
- vkd3d_unreachable();
}
break;
@@ -7945,9 +8914,6 @@ static bool sm4_generate_vsir_instr_expr_cast(struct hlsl_ctx *ctx,
case HLSL_TYPE_DOUBLE:
hlsl_fixme(ctx, &expr->node.loc, "SM4 cast from double to int.");
return false;
-
- default:
- vkd3d_unreachable();
}
break;
@@ -7971,9 +8937,6 @@ static bool sm4_generate_vsir_instr_expr_cast(struct hlsl_ctx *ctx,
case HLSL_TYPE_DOUBLE:
hlsl_fixme(ctx, &expr->node.loc, "SM4 cast from double to uint.");
return false;
-
- default:
- vkd3d_unreachable();
}
break;
@@ -7983,9 +8946,10 @@ static bool sm4_generate_vsir_instr_expr_cast(struct hlsl_ctx *ctx,
case HLSL_TYPE_BOOL:
/* Casts to bool should have already been lowered. */
- default:
- vkd3d_unreachable();
+ break;
}
+
+ vkd3d_unreachable();
}
static void sm4_generate_vsir_expr_with_two_destinations(struct hlsl_ctx *ctx, struct vsir_program *program,
@@ -8040,7 +9004,7 @@ static void sm4_generate_vsir_rcp_using_div(struct hlsl_ctx *ctx,
value.u[2].f = 1.0f;
value.u[3].f = 1.0f;
vsir_src_from_hlsl_constant_value(&ins->src[0], ctx, &value,
- VKD3D_DATA_FLOAT, instr->data_type->dimx, dst_param->write_mask);
+ VKD3D_DATA_FLOAT, instr->data_type->e.numeric.dimx, dst_param->write_mask);
vsir_src_from_hlsl_node(&ins->src[1], ctx, operand, dst_param->write_mask);
}
@@ -8270,7 +9234,7 @@ static bool sm4_generate_vsir_instr_expr(struct hlsl_ctx *ctx,
switch (dst_type->e.numeric.type)
{
case HLSL_TYPE_FLOAT:
- switch (expr->operands[0].node->data_type->dimx)
+ switch (expr->operands[0].node->data_type->e.numeric.dimx)
{
case 4:
generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DP4, 0, 0, false);
@@ -8505,188 +9469,2042 @@ static bool sm4_generate_vsir_instr_expr(struct hlsl_ctx *ctx,
}
}
-static void sm4_generate_vsir_block(struct hlsl_ctx *ctx, struct hlsl_block *block, struct vsir_program *program)
+static bool sm4_generate_vsir_instr_store(struct hlsl_ctx *ctx,
+ struct vsir_program *program, struct hlsl_ir_store *store)
{
- struct vkd3d_string_buffer *dst_type_string;
- struct hlsl_ir_node *instr, *next;
- struct hlsl_ir_switch_case *c;
+ struct hlsl_ir_node *instr = &store->node;
+ struct vkd3d_shader_dst_param *dst_param;
+ struct vkd3d_shader_src_param *src_param;
+ struct vkd3d_shader_instruction *ins;
- LIST_FOR_EACH_ENTRY_SAFE(instr, next, &block->instrs, struct hlsl_ir_node, entry)
- {
- if (instr->data_type)
- {
- if (instr->data_type->class != HLSL_CLASS_SCALAR && instr->data_type->class != HLSL_CLASS_VECTOR)
- {
- hlsl_fixme(ctx, &instr->loc, "Class %#x should have been lowered or removed.", instr->data_type->class);
- break;
- }
- }
+ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_MOV, 1, 1)))
+ return false;
- switch (instr->type)
- {
- case HLSL_IR_CALL:
- vkd3d_unreachable();
+ dst_param = &ins->dst[0];
+ if (!sm4_generate_vsir_init_dst_param_from_deref(ctx, program,
+ dst_param, &store->lhs, &instr->loc, store->writemask))
+ return false;
- case HLSL_IR_CONSTANT:
- /* In SM4 all constants are inlined. */
- break;
+ src_param = &ins->src[0];
+ vsir_src_from_hlsl_node(src_param, ctx, store->rhs.node, dst_param->write_mask);
- case HLSL_IR_EXPR:
- if (!(dst_type_string = hlsl_type_to_string(ctx, instr->data_type)))
- break;
+ return true;
+}
- if (sm4_generate_vsir_instr_expr(ctx, program, hlsl_ir_expr(instr), dst_type_string->buffer))
- replace_instr_with_last_vsir_instr(ctx, program, instr);
+/* Does this variable's data come directly from the API user, rather than
+ * being temporary or from a previous shader stage? I.e. is it a uniform or
+ * VS input? */
+static bool var_is_user_input(const struct vkd3d_shader_version *version, const struct hlsl_ir_var *var)
+{
+ if (var->is_uniform)
+ return true;
- hlsl_release_string_buffer(ctx, dst_type_string);
- break;
+ return var->is_input_semantic && version->type == VKD3D_SHADER_TYPE_VERTEX;
+}
- case HLSL_IR_IF:
- sm4_generate_vsir_block(ctx, &hlsl_ir_if(instr)->then_block, program);
- sm4_generate_vsir_block(ctx, &hlsl_ir_if(instr)->else_block, program);
- break;
+static bool sm4_generate_vsir_instr_load(struct hlsl_ctx *ctx, struct vsir_program *program, struct hlsl_ir_load *load)
+{
+ const struct vkd3d_shader_version *version = &program->shader_version;
+ const struct hlsl_type *type = load->node.data_type;
+ struct vkd3d_shader_dst_param *dst_param;
+ struct hlsl_ir_node *instr = &load->node;
+ struct vkd3d_shader_instruction *ins;
+ struct hlsl_constant_value value;
- case HLSL_IR_LOOP:
- sm4_generate_vsir_block(ctx, &hlsl_ir_loop(instr)->body, program);
- break;
+ VKD3D_ASSERT(hlsl_is_numeric_type(type));
+ if (type->e.numeric.type == HLSL_TYPE_BOOL && var_is_user_input(version, load->src.var))
+ {
+ /* Uniform bools can be specified as anything, but internal bools
+ * always have 0 for false and ~0 for true. Normalise that here. */
- case HLSL_IR_SWITCH:
- LIST_FOR_EACH_ENTRY(c, &hlsl_ir_switch(instr)->cases, struct hlsl_ir_switch_case, entry)
- sm4_generate_vsir_block(ctx, &c->body, program);
- break;
+ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_MOVC, 1, 3)))
+ return false;
+
+ dst_param = &ins->dst[0];
+ vsir_dst_from_hlsl_node(dst_param, ctx, instr);
+
+ if (!sm4_generate_vsir_init_src_param_from_deref(ctx, program,
+ &ins->src[0], &load->src, dst_param->write_mask, &instr->loc))
+ return false;
+
+ memset(&value, 0xff, sizeof(value));
+ vsir_src_from_hlsl_constant_value(&ins->src[1], ctx, &value,
+ VKD3D_DATA_UINT, type->e.numeric.dimx, dst_param->write_mask);
+ memset(&value, 0x00, sizeof(value));
+ vsir_src_from_hlsl_constant_value(&ins->src[2], ctx, &value,
+ VKD3D_DATA_UINT, type->e.numeric.dimx, dst_param->write_mask);
+ }
+ else
+ {
+ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_MOV, 1, 1)))
+ return false;
+
+ dst_param = &ins->dst[0];
+ vsir_dst_from_hlsl_node(dst_param, ctx, instr);
+
+ if (!sm4_generate_vsir_init_src_param_from_deref(ctx, program,
+ &ins->src[0], &load->src, dst_param->write_mask, &instr->loc))
+ return false;
+ }
+ return true;
+}
+
+static bool sm4_generate_vsir_instr_resource_store(struct hlsl_ctx *ctx,
+ struct vsir_program *program, struct hlsl_ir_resource_store *store)
+{
+ struct hlsl_type *resource_type = hlsl_deref_get_type(ctx, &store->resource);
+ struct hlsl_ir_node *coords = store->coords.node, *value = store->value.node;
+ struct hlsl_ir_node *instr = &store->node;
+ struct vkd3d_shader_instruction *ins;
+ unsigned int writemask;
+
+ if (!store->resource.var->is_uniform)
+ {
+ hlsl_fixme(ctx, &store->node.loc, "Store to non-uniform resource variable.");
+ return false;
+ }
+
+ if (resource_type->sampler_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER)
+ {
+ hlsl_fixme(ctx, &store->node.loc, "Structured buffers store is not implemented.");
+ return false;
+ }
+
+ if (resource_type->sampler_dim == HLSL_SAMPLER_DIM_RAW_BUFFER)
+ {
+ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_STORE_RAW, 1, 2)))
+ return false;
+
+ writemask = vkd3d_write_mask_from_component_count(value->data_type->e.numeric.dimx);
+ if (!sm4_generate_vsir_init_dst_param_from_deref(ctx, program,
+ &ins->dst[0], &store->resource, &instr->loc, writemask))
+ return false;
+ }
+ else
+ {
+ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_STORE_UAV_TYPED, 1, 2)))
+ return false;
+
+ if (!sm4_generate_vsir_init_dst_param_from_deref(ctx, program,
+ &ins->dst[0], &store->resource, &instr->loc, VKD3DSP_WRITEMASK_ALL))
+ return false;
+ }
+
+ vsir_src_from_hlsl_node(&ins->src[0], ctx, coords, VKD3DSP_WRITEMASK_ALL);
+ vsir_src_from_hlsl_node(&ins->src[1], ctx, value, VKD3DSP_WRITEMASK_ALL);
+
+ return true;
+}
+
+static bool sm4_generate_vsir_validate_texel_offset_aoffimmi(const struct hlsl_ir_node *texel_offset)
+{
+ struct hlsl_ir_constant *offset;
+
+ VKD3D_ASSERT(texel_offset);
+ if (texel_offset->type != HLSL_IR_CONSTANT)
+ return false;
+ offset = hlsl_ir_constant(texel_offset);
+
+ if (offset->value.u[0].i < -8 || offset->value.u[0].i > 7)
+ return false;
+ if (offset->node.data_type->e.numeric.dimx > 1 && (offset->value.u[1].i < -8 || offset->value.u[1].i > 7))
+ return false;
+ if (offset->node.data_type->e.numeric.dimx > 2 && (offset->value.u[2].i < -8 || offset->value.u[2].i > 7))
+ return false;
+ return true;
+}
+
+static void sm4_generate_vsir_encode_texel_offset_as_aoffimmi(
+ struct vkd3d_shader_instruction *ins, const struct hlsl_ir_node *texel_offset)
+{
+ struct hlsl_ir_constant *offset;
+
+ if (!texel_offset)
+ return;
+ offset = hlsl_ir_constant(texel_offset);
+
+ ins->texel_offset.u = offset->value.u[0].i;
+ ins->texel_offset.v = 0;
+ ins->texel_offset.w = 0;
+ if (offset->node.data_type->e.numeric.dimx > 1)
+ ins->texel_offset.v = offset->value.u[1].i;
+ if (offset->node.data_type->e.numeric.dimx > 2)
+ ins->texel_offset.w = offset->value.u[2].i;
+}
+
+static bool sm4_generate_vsir_instr_ld(struct hlsl_ctx *ctx,
+ struct vsir_program *program, const struct hlsl_ir_resource_load *load)
+{
+ const struct hlsl_type *resource_type = hlsl_deref_get_type(ctx, &load->resource);
+ bool uav = (hlsl_deref_get_regset(ctx, &load->resource) == HLSL_REGSET_UAVS);
+ const struct vkd3d_shader_version *version = &program->shader_version;
+ bool raw = resource_type->sampler_dim == HLSL_SAMPLER_DIM_RAW_BUFFER;
+ const struct hlsl_ir_node *sample_index = load->sample_index.node;
+ const struct hlsl_ir_node *texel_offset = load->texel_offset.node;
+ const struct hlsl_ir_node *coords = load->coords.node;
+ unsigned int coords_writemask = VKD3DSP_WRITEMASK_ALL;
+ const struct hlsl_deref *resource = &load->resource;
+ const struct hlsl_ir_node *instr = &load->node;
+ enum hlsl_sampler_dim dim = load->sampling_dim;
+ struct vkd3d_shader_instruction *ins;
+ enum vkd3d_shader_opcode opcode;
+ bool multisampled;
+
+ VKD3D_ASSERT(load->load_type == HLSL_RESOURCE_LOAD);
+
+ multisampled = resource_type->class == HLSL_CLASS_TEXTURE
+ && (resource_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS
+ || resource_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY);
+
+ if (uav)
+ opcode = VKD3DSIH_LD_UAV_TYPED;
+ else if (raw)
+ opcode = VKD3DSIH_LD_RAW;
+ else
+ opcode = multisampled ? VKD3DSIH_LD2DMS : VKD3DSIH_LD;
+
+ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, opcode, 1, 2 + multisampled)))
+ return false;
+
+ if (texel_offset && !sm4_generate_vsir_validate_texel_offset_aoffimmi(texel_offset))
+ {
+ hlsl_error(ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET,
+ "Offset must resolve to integer literal in the range -8 to 7.");
+ return false;
+ }
+ sm4_generate_vsir_encode_texel_offset_as_aoffimmi(ins, texel_offset);
+
+ vsir_dst_from_hlsl_node(&ins->dst[0], ctx, instr);
+
+ if (!uav)
+ {
+ /* Mipmap level is in the last component in the IR, but needs to be in
+ * the W component in the instruction. */
+ unsigned int dim_count = hlsl_sampler_dim_count(dim);
+
+ if (dim_count == 1)
+ coords_writemask = VKD3DSP_WRITEMASK_0 | VKD3DSP_WRITEMASK_3;
+ if (dim_count == 2)
+ coords_writemask = VKD3DSP_WRITEMASK_0 | VKD3DSP_WRITEMASK_1 | VKD3DSP_WRITEMASK_3;
+ }
+
+ vsir_src_from_hlsl_node(&ins->src[0], ctx, coords, coords_writemask);
+
+ if (!sm4_generate_vsir_init_src_param_from_deref(ctx, program,
+ &ins->src[1], resource, ins->dst[0].write_mask, &instr->loc))
+ return false;
+
+ if (multisampled)
+ {
+ if (sample_index->type == HLSL_IR_CONSTANT)
+ vsir_src_from_hlsl_constant_value(&ins->src[2], ctx,
+ &hlsl_ir_constant(sample_index)->value, VKD3D_DATA_INT, 1, 0);
+ else if (version->major == 4 && version->minor == 0)
+ hlsl_error(ctx, &sample_index->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Expected literal sample index.");
+ else
+ vsir_src_from_hlsl_node(&ins->src[2], ctx, sample_index, VKD3DSP_WRITEMASK_ALL);
+ }
+ return true;
+}
+
+static bool sm4_generate_vsir_instr_sample(struct hlsl_ctx *ctx,
+ struct vsir_program *program, const struct hlsl_ir_resource_load *load)
+{
+ const struct hlsl_ir_node *texel_offset = load->texel_offset.node;
+ const struct hlsl_ir_node *coords = load->coords.node;
+ const struct hlsl_deref *resource = &load->resource;
+ const struct hlsl_deref *sampler = &load->sampler;
+ const struct hlsl_ir_node *instr = &load->node;
+ struct vkd3d_shader_instruction *ins;
+ enum vkd3d_shader_opcode opcode;
+ unsigned int src_count;
+
+ switch (load->load_type)
+ {
+ case HLSL_RESOURCE_SAMPLE:
+ opcode = VKD3DSIH_SAMPLE;
+ src_count = 3;
+ break;
+
+ case HLSL_RESOURCE_SAMPLE_CMP:
+ opcode = VKD3DSIH_SAMPLE_C;
+ src_count = 4;
+ break;
+
+ case HLSL_RESOURCE_SAMPLE_CMP_LZ:
+ opcode = VKD3DSIH_SAMPLE_C_LZ;
+ src_count = 4;
+ break;
+
+ case HLSL_RESOURCE_SAMPLE_LOD:
+ opcode = VKD3DSIH_SAMPLE_LOD;
+ src_count = 4;
+ break;
+
+ case HLSL_RESOURCE_SAMPLE_LOD_BIAS:
+ opcode = VKD3DSIH_SAMPLE_B;
+ src_count = 4;
+ break;
+
+ case HLSL_RESOURCE_SAMPLE_GRAD:
+ opcode = VKD3DSIH_SAMPLE_GRAD;
+ src_count = 5;
+ break;
+
+ default:
+ vkd3d_unreachable();
+ }
+
+ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, opcode, 1, src_count)))
+ return false;
+
+ if (texel_offset && !sm4_generate_vsir_validate_texel_offset_aoffimmi(texel_offset))
+ {
+ hlsl_error(ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET,
+ "Offset must resolve to integer literal in the range -8 to 7.");
+ return false;
+ }
+ sm4_generate_vsir_encode_texel_offset_as_aoffimmi(ins, texel_offset);
+
+ vsir_dst_from_hlsl_node(&ins->dst[0], ctx, instr);
+
+ vsir_src_from_hlsl_node(&ins->src[0], ctx, coords, VKD3DSP_WRITEMASK_ALL);
+
+ if (!sm4_generate_vsir_init_src_param_from_deref(ctx, program, &ins->src[1],
+ resource, ins->dst[0].write_mask, &instr->loc))
+ return false;
+
+ if (!sm4_generate_vsir_init_src_param_from_deref(ctx, program, &ins->src[2],
+ sampler, VKD3DSP_WRITEMASK_ALL, &instr->loc))
+ return false;
+
+ if (opcode == VKD3DSIH_SAMPLE_LOD || opcode == VKD3DSIH_SAMPLE_B)
+ {
+ vsir_src_from_hlsl_node(&ins->src[3], ctx, load->lod.node, VKD3DSP_WRITEMASK_ALL);
+ }
+ else if (opcode == VKD3DSIH_SAMPLE_C || opcode == VKD3DSIH_SAMPLE_C_LZ)
+ {
+ vsir_src_from_hlsl_node(&ins->src[3], ctx, load->cmp.node, VKD3DSP_WRITEMASK_ALL);
+ }
+ else if (opcode == VKD3DSIH_SAMPLE_GRAD)
+ {
+ vsir_src_from_hlsl_node(&ins->src[3], ctx, load->ddx.node, VKD3DSP_WRITEMASK_ALL);
+ vsir_src_from_hlsl_node(&ins->src[4], ctx, load->ddy.node, VKD3DSP_WRITEMASK_ALL);
+ }
+ return true;
+}
+
+static bool sm4_generate_vsir_instr_gather(struct hlsl_ctx *ctx, struct vsir_program *program,
+ const struct hlsl_ir_resource_load *load, uint32_t swizzle, bool compare)
+{
+ const struct vkd3d_shader_version *version = &program->shader_version;
+ const struct hlsl_ir_node *texel_offset = load->texel_offset.node;
+ const struct hlsl_ir_node *coords = load->coords.node;
+ const struct hlsl_deref *resource = &load->resource;
+ enum vkd3d_shader_opcode opcode = VKD3DSIH_GATHER4;
+ const struct hlsl_deref *sampler = &load->sampler;
+ const struct hlsl_ir_node *instr = &load->node;
+ unsigned int src_count = 3, current_arg = 0;
+ struct vkd3d_shader_instruction *ins;
+
+ if (texel_offset && !sm4_generate_vsir_validate_texel_offset_aoffimmi(texel_offset))
+ {
+ if (!vkd3d_shader_ver_ge(version, 5, 0))
+ {
+ hlsl_error(ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET,
+ "Offset must resolve to integer literal in the range -8 to 7 for profiles < 5.");
+ return false;
+ }
+ opcode = VKD3DSIH_GATHER4_PO;
+ ++src_count;
+ }
+
+ if (compare)
+ {
+ opcode = opcode == VKD3DSIH_GATHER4 ? VKD3DSIH_GATHER4_C : VKD3DSIH_GATHER4_PO_C;
+ ++src_count;
+ }
+
+ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, opcode, 1, src_count)))
+ return false;
+
+ vsir_dst_from_hlsl_node(&ins->dst[0], ctx, instr);
+ vsir_src_from_hlsl_node(&ins->src[current_arg++], ctx, coords, VKD3DSP_WRITEMASK_ALL);
+
+ if (opcode == VKD3DSIH_GATHER4_PO || opcode == VKD3DSIH_GATHER4_PO_C)
+ vsir_src_from_hlsl_node(&ins->src[current_arg++], ctx, texel_offset, VKD3DSP_WRITEMASK_ALL);
+ else
+ sm4_generate_vsir_encode_texel_offset_as_aoffimmi(ins, texel_offset);
+
+ if (!sm4_generate_vsir_init_src_param_from_deref(ctx, program,
+ &ins->src[current_arg++], resource, ins->dst[0].write_mask, &instr->loc))
+ return false;
+
+ if (!sm4_generate_vsir_init_src_param_from_deref(ctx, program,
+ &ins->src[current_arg], sampler, VKD3DSP_WRITEMASK_ALL, &instr->loc))
+ return false;
+ ins->src[current_arg].reg.dimension = VSIR_DIMENSION_VEC4;
+ ins->src[current_arg].swizzle = swizzle;
+ current_arg++;
+
+ if (compare)
+ vsir_src_from_hlsl_node(&ins->src[current_arg++], ctx, load->cmp.node, VKD3DSP_WRITEMASK_0);
+
+ return true;
+}
+
+static bool sm4_generate_vsir_instr_sample_info(struct hlsl_ctx *ctx,
+ struct vsir_program *program, const struct hlsl_ir_resource_load *load)
+{
+ const struct hlsl_deref *resource = &load->resource;
+ const struct hlsl_ir_node *instr = &load->node;
+ struct hlsl_type *type = instr->data_type;
+ struct vkd3d_shader_instruction *ins;
+
+ VKD3D_ASSERT(type->e.numeric.type == HLSL_TYPE_UINT || type->e.numeric.type == HLSL_TYPE_FLOAT);
+
+ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_SAMPLE_INFO, 1, 1)))
+ return false;
+
+ if (type->e.numeric.type == HLSL_TYPE_UINT)
+ ins->flags = VKD3DSI_SAMPLE_INFO_UINT;
+
+ vsir_dst_from_hlsl_node(&ins->dst[0], ctx, instr);
+
+ if (!sm4_generate_vsir_init_src_param_from_deref(ctx, program,
+ &ins->src[0], resource, ins->dst[0].write_mask, &instr->loc))
+ return false;
+
+ return true;
+}
+
+static bool sm4_generate_vsir_instr_resinfo(struct hlsl_ctx *ctx,
+ struct vsir_program *program, const struct hlsl_ir_resource_load *load)
+{
+ const struct hlsl_deref *resource = &load->resource;
+ const struct hlsl_ir_node *instr = &load->node;
+ struct hlsl_type *type = instr->data_type;
+ struct vkd3d_shader_instruction *ins;
+
+ if (resource->data_type->sampler_dim == HLSL_SAMPLER_DIM_BUFFER
+ || resource->data_type->sampler_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER)
+ {
+ hlsl_fixme(ctx, &load->node.loc, "resinfo for buffers.");
+ return false;
+ }
+
+ VKD3D_ASSERT(type->e.numeric.type == HLSL_TYPE_UINT || type->e.numeric.type == HLSL_TYPE_FLOAT);
+
+ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_RESINFO, 1, 2)))
+ return false;
+
+ if (type->e.numeric.type == HLSL_TYPE_UINT)
+ ins->flags = VKD3DSI_RESINFO_UINT;
+
+ vsir_dst_from_hlsl_node(&ins->dst[0], ctx, instr);
+
+ vsir_src_from_hlsl_node(&ins->src[0], ctx, load->lod.node, VKD3DSP_WRITEMASK_ALL);
+
+ if (!sm4_generate_vsir_init_src_param_from_deref(ctx, program,
+ &ins->src[1], resource, ins->dst[0].write_mask, &instr->loc))
+ return false;
+
+ return true;
+}
+
+static uint32_t get_gather_swizzle(enum hlsl_resource_load_type type)
+{
+ switch (type)
+ {
+ case HLSL_RESOURCE_GATHER_RED:
+ case HLSL_RESOURCE_GATHER_CMP_RED:
+ return VKD3D_SHADER_SWIZZLE(X, X, X, X);
+
+ case HLSL_RESOURCE_GATHER_GREEN:
+ case HLSL_RESOURCE_GATHER_CMP_GREEN:
+ return VKD3D_SHADER_SWIZZLE(Y, Y, Y, Y);
+
+ case HLSL_RESOURCE_GATHER_BLUE:
+ case HLSL_RESOURCE_GATHER_CMP_BLUE:
+ return VKD3D_SHADER_SWIZZLE(Z, Z, Z, Z);
+
+ case HLSL_RESOURCE_GATHER_ALPHA:
+ case HLSL_RESOURCE_GATHER_CMP_ALPHA:
+ return VKD3D_SHADER_SWIZZLE(W, W, W, W);
+ default:
+ return 0;
+ }
+
+ return 0;
+}
+
+static bool sm4_generate_vsir_instr_resource_load(struct hlsl_ctx *ctx,
+ struct vsir_program *program, const struct hlsl_ir_resource_load *load)
+{
+ if (load->sampler.var && !load->sampler.var->is_uniform)
+ {
+ hlsl_fixme(ctx, &load->node.loc, "Sample using non-uniform sampler variable.");
+ return false;
+ }
+
+ if (!load->resource.var->is_uniform)
+ {
+ hlsl_fixme(ctx, &load->node.loc, "Load from non-uniform resource variable.");
+ return false;
+ }
+
+ switch (load->load_type)
+ {
+ case HLSL_RESOURCE_LOAD:
+ return sm4_generate_vsir_instr_ld(ctx, program, load);
+
+ case HLSL_RESOURCE_SAMPLE:
+ case HLSL_RESOURCE_SAMPLE_CMP:
+ case HLSL_RESOURCE_SAMPLE_CMP_LZ:
+ case HLSL_RESOURCE_SAMPLE_LOD:
+ case HLSL_RESOURCE_SAMPLE_LOD_BIAS:
+ case HLSL_RESOURCE_SAMPLE_GRAD:
+ /* Combined sample expressions were lowered. */
+ VKD3D_ASSERT(load->sampler.var);
+ return sm4_generate_vsir_instr_sample(ctx, program, load);
+
+ case HLSL_RESOURCE_GATHER_RED:
+ case HLSL_RESOURCE_GATHER_GREEN:
+ case HLSL_RESOURCE_GATHER_BLUE:
+ case HLSL_RESOURCE_GATHER_ALPHA:
+ return sm4_generate_vsir_instr_gather(ctx, program, load, get_gather_swizzle(load->load_type), false);
+
+ case HLSL_RESOURCE_GATHER_CMP_RED:
+ case HLSL_RESOURCE_GATHER_CMP_GREEN:
+ case HLSL_RESOURCE_GATHER_CMP_BLUE:
+ case HLSL_RESOURCE_GATHER_CMP_ALPHA:
+ return sm4_generate_vsir_instr_gather(ctx, program, load, get_gather_swizzle(load->load_type), true);
+
+ case HLSL_RESOURCE_SAMPLE_INFO:
+ return sm4_generate_vsir_instr_sample_info(ctx, program, load);
+
+ case HLSL_RESOURCE_RESINFO:
+ return sm4_generate_vsir_instr_resinfo(ctx, program, load);
+
+ case HLSL_RESOURCE_SAMPLE_PROJ:
+ vkd3d_unreachable();
+
+ default:
+ return false;
+ }
+}
+
+static bool sm4_generate_vsir_instr_interlocked(struct hlsl_ctx *ctx,
+ struct vsir_program *program, struct hlsl_ir_interlocked *interlocked)
+{
+
+ static const enum vkd3d_shader_opcode opcodes[] =
+ {
+ [HLSL_INTERLOCKED_ADD] = VKD3DSIH_ATOMIC_IADD,
+ [HLSL_INTERLOCKED_AND] = VKD3DSIH_ATOMIC_AND,
+ [HLSL_INTERLOCKED_CMP_EXCH] = VKD3DSIH_ATOMIC_CMP_STORE,
+ [HLSL_INTERLOCKED_MAX] = VKD3DSIH_ATOMIC_UMAX,
+ [HLSL_INTERLOCKED_MIN] = VKD3DSIH_ATOMIC_UMIN,
+ [HLSL_INTERLOCKED_OR] = VKD3DSIH_ATOMIC_OR,
+ [HLSL_INTERLOCKED_XOR] = VKD3DSIH_ATOMIC_XOR,
+ };
+
+ static const enum vkd3d_shader_opcode imm_opcodes[] =
+ {
+ [HLSL_INTERLOCKED_ADD] = VKD3DSIH_IMM_ATOMIC_IADD,
+ [HLSL_INTERLOCKED_AND] = VKD3DSIH_IMM_ATOMIC_AND,
+ [HLSL_INTERLOCKED_CMP_EXCH] = VKD3DSIH_IMM_ATOMIC_CMP_EXCH,
+ [HLSL_INTERLOCKED_EXCH] = VKD3DSIH_IMM_ATOMIC_EXCH,
+ [HLSL_INTERLOCKED_MAX] = VKD3DSIH_IMM_ATOMIC_UMAX,
+ [HLSL_INTERLOCKED_MIN] = VKD3DSIH_IMM_ATOMIC_UMIN,
+ [HLSL_INTERLOCKED_OR] = VKD3DSIH_IMM_ATOMIC_OR,
+ [HLSL_INTERLOCKED_XOR] = VKD3DSIH_IMM_ATOMIC_XOR,
+ };
+
+ struct hlsl_ir_node *cmp_value = interlocked->cmp_value.node, *value = interlocked->value.node;
+ struct hlsl_ir_node *coords = interlocked->coords.node;
+ struct hlsl_ir_node *instr = &interlocked->node;
+ bool is_imm = interlocked->node.reg.allocated;
+ struct vkd3d_shader_dst_param *dst_param;
+ struct vkd3d_shader_instruction *ins;
+ enum vkd3d_shader_opcode opcode;
+
+ opcode = is_imm ? imm_opcodes[interlocked->op] : opcodes[interlocked->op];
+
+ if (value->data_type->e.numeric.type == HLSL_TYPE_INT)
+ {
+ if (opcode == VKD3DSIH_ATOMIC_UMAX)
+ opcode = VKD3DSIH_ATOMIC_IMAX;
+ else if (opcode == VKD3DSIH_ATOMIC_UMIN)
+ opcode = VKD3DSIH_ATOMIC_IMIN;
+ else if (opcode == VKD3DSIH_IMM_ATOMIC_UMAX)
+ opcode = VKD3DSIH_IMM_ATOMIC_IMAX;
+ else if (opcode == VKD3DSIH_IMM_ATOMIC_UMIN)
+ opcode = VKD3DSIH_IMM_ATOMIC_IMIN;
+ }
+
+ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, opcode,
+ is_imm ? 2 : 1, cmp_value ? 3 : 2)))
+ return false;
+
+ if (is_imm)
+ vsir_dst_from_hlsl_node(&ins->dst[0], ctx, instr);
+
+ dst_param = is_imm ? &ins->dst[1] : &ins->dst[0];
+ if (!sm4_generate_vsir_init_dst_param_from_deref(ctx, program, dst_param, &interlocked->dst, &instr->loc, 0))
+ return false;
+ dst_param->reg.dimension = VSIR_DIMENSION_NONE;
+
+ vsir_src_from_hlsl_node(&ins->src[0], ctx, coords, VKD3DSP_WRITEMASK_ALL);
+ if (cmp_value)
+ {
+ vsir_src_from_hlsl_node(&ins->src[1], ctx, cmp_value, VKD3DSP_WRITEMASK_ALL);
+ vsir_src_from_hlsl_node(&ins->src[2], ctx, value, VKD3DSP_WRITEMASK_ALL);
+ }
+ else
+ {
+ vsir_src_from_hlsl_node(&ins->src[1], ctx, value, VKD3DSP_WRITEMASK_ALL);
+ }
+
+ return true;
+}
+
+static bool sm4_generate_vsir_instr_jump(struct hlsl_ctx *ctx,
+ struct vsir_program *program, const struct hlsl_ir_jump *jump)
+{
+ const struct hlsl_ir_node *instr = &jump->node;
+ struct vkd3d_shader_instruction *ins;
+
+ switch (jump->type)
+ {
+ case HLSL_IR_JUMP_BREAK:
+ return generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_BREAK, 0, 0);
+
+ case HLSL_IR_JUMP_CONTINUE:
+ return generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_CONTINUE, 0, 0);
+
+ case HLSL_IR_JUMP_DISCARD_NZ:
+ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_DISCARD, 0, 1)))
+ return false;
+ ins->flags = VKD3D_SHADER_CONDITIONAL_OP_NZ;
+
+ vsir_src_from_hlsl_node(&ins->src[0], ctx, jump->condition.node, VKD3DSP_WRITEMASK_ALL);
+ return true;
+
+ case HLSL_IR_JUMP_RETURN:
+ vkd3d_unreachable();
+
+ default:
+ hlsl_fixme(ctx, &jump->node.loc, "Jump type %s.", hlsl_jump_type_to_string(jump->type));
+ return false;
+ }
+}
+
+static void sm4_generate_vsir_block(struct hlsl_ctx *ctx, struct hlsl_block *block, struct vsir_program *program);
+
+static void sm4_generate_vsir_instr_if(struct hlsl_ctx *ctx, struct vsir_program *program, struct hlsl_ir_if *iff)
+{
+ struct hlsl_ir_node *instr = &iff->node;
+ struct vkd3d_shader_instruction *ins;
+
+ VKD3D_ASSERT(iff->condition.node->data_type->e.numeric.dimx == 1);
+
+ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_IF, 0, 1)))
+ return;
+ ins->flags = VKD3D_SHADER_CONDITIONAL_OP_NZ;
+
+ vsir_src_from_hlsl_node(&ins->src[0], ctx, iff->condition.node, VKD3DSP_WRITEMASK_ALL);
+
+ sm4_generate_vsir_block(ctx, &iff->then_block, program);
+
+ if (!list_empty(&iff->else_block.instrs))
+ {
+ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_ELSE, 0, 0)))
+ return;
+ sm4_generate_vsir_block(ctx, &iff->else_block, program);
+ }
+
+ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_ENDIF, 0, 0)))
+ return;
+}
+
+static void sm4_generate_vsir_instr_loop(struct hlsl_ctx *ctx,
+ struct vsir_program *program, struct hlsl_ir_loop *loop)
+{
+ struct hlsl_ir_node *instr = &loop->node;
+ struct vkd3d_shader_instruction *ins;
+
+ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_LOOP, 0, 0)))
+ return;
+
+ sm4_generate_vsir_block(ctx, &loop->body, program);
+
+ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_ENDLOOP, 0, 0)))
+ return;
+}
+
+static void sm4_generate_vsir_instr_switch(struct hlsl_ctx *ctx,
+ struct vsir_program *program, struct hlsl_ir_switch *swi)
+{
+ const struct hlsl_ir_node *selector = swi->selector.node;
+ struct hlsl_ir_node *instr = &swi->node;
+ struct vkd3d_shader_instruction *ins;
+ struct hlsl_ir_switch_case *cas;
+
+ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_SWITCH, 0, 1)))
+ return;
+ vsir_src_from_hlsl_node(&ins->src[0], ctx, selector, VKD3DSP_WRITEMASK_ALL);
+
+ LIST_FOR_EACH_ENTRY(cas, &swi->cases, struct hlsl_ir_switch_case, entry)
+ {
+ if (cas->is_default)
+ {
+ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_DEFAULT, 0, 0)))
+ return;
+ }
+ else
+ {
+ struct hlsl_constant_value value = {.u[0].u = cas->value};
+
+ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_CASE, 0, 1)))
+ return;
+ vsir_src_from_hlsl_constant_value(&ins->src[0], ctx, &value, VKD3D_DATA_UINT, 1, VKD3DSP_WRITEMASK_ALL);
+ }
+
+ sm4_generate_vsir_block(ctx, &cas->body, program);
+ }
+
+ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_ENDSWITCH, 0, 0)))
+ return;
+}
+
+static void sm4_generate_vsir_block(struct hlsl_ctx *ctx, struct hlsl_block *block, struct vsir_program *program)
+{
+ struct vkd3d_string_buffer *dst_type_string;
+ struct hlsl_ir_node *instr, *next;
+
+ LIST_FOR_EACH_ENTRY_SAFE(instr, next, &block->instrs, struct hlsl_ir_node, entry)
+ {
+ if (instr->data_type)
+ {
+ if (instr->data_type->class != HLSL_CLASS_SCALAR && instr->data_type->class != HLSL_CLASS_VECTOR)
+ {
+ hlsl_fixme(ctx, &instr->loc, "Class %#x should have been lowered or removed.", instr->data_type->class);
+ break;
+ }
+ }
+
+ switch (instr->type)
+ {
+ case HLSL_IR_CALL:
+ vkd3d_unreachable();
+
+ case HLSL_IR_CONSTANT:
+ /* In SM4 all constants are inlined. */
+ break;
+
+ case HLSL_IR_EXPR:
+ if (!(dst_type_string = hlsl_type_to_string(ctx, instr->data_type)))
+ break;
+ sm4_generate_vsir_instr_expr(ctx, program, hlsl_ir_expr(instr), dst_type_string->buffer);
+ hlsl_release_string_buffer(ctx, dst_type_string);
+ break;
+
+ case HLSL_IR_IF:
+ sm4_generate_vsir_instr_if(ctx, program, hlsl_ir_if(instr));
+ break;
+
+ case HLSL_IR_LOAD:
+ sm4_generate_vsir_instr_load(ctx, program, hlsl_ir_load(instr));
+ break;
+
+ case HLSL_IR_LOOP:
+ sm4_generate_vsir_instr_loop(ctx, program, hlsl_ir_loop(instr));
+ break;
+
+ case HLSL_IR_RESOURCE_LOAD:
+ sm4_generate_vsir_instr_resource_load(ctx, program, hlsl_ir_resource_load(instr));
+ break;
+
+ case HLSL_IR_RESOURCE_STORE:
+ sm4_generate_vsir_instr_resource_store(ctx, program, hlsl_ir_resource_store(instr));
+ break;
+
+ case HLSL_IR_JUMP:
+ sm4_generate_vsir_instr_jump(ctx, program, hlsl_ir_jump(instr));
+ break;
+
+ case HLSL_IR_STORE:
+ sm4_generate_vsir_instr_store(ctx, program, hlsl_ir_store(instr));
+ break;
+
+ case HLSL_IR_SWITCH:
+ sm4_generate_vsir_instr_switch(ctx, program, hlsl_ir_switch(instr));
+ break;
+
+ case HLSL_IR_SWIZZLE:
+ generate_vsir_instr_swizzle(ctx, program, hlsl_ir_swizzle(instr));
+ break;
+
+ case HLSL_IR_INTERLOCKED:
+ sm4_generate_vsir_instr_interlocked(ctx, program, hlsl_ir_interlocked(instr));
+ break;
+
+ default:
+ break;
+ }
+ }
+}
+
+static void sm4_generate_vsir_add_function(struct hlsl_ctx *ctx,
+ struct hlsl_ir_function_decl *func, uint64_t config_flags, struct vsir_program *program)
+{
+ bool is_patch_constant_func = func == ctx->patch_constant_func;
+ struct hlsl_block block = {0};
+ struct hlsl_scope *scope;
+ struct hlsl_ir_var *var;
+ uint32_t temp_count;
+
+ compute_liveness(ctx, func);
+ mark_indexable_vars(ctx, func);
+ temp_count = allocate_temp_registers(ctx, func);
+ if (ctx->result)
+ return;
+ program->temp_count = max(program->temp_count, temp_count);
+
+ hlsl_block_init(&block);
+
+ LIST_FOR_EACH_ENTRY(var, &func->extern_vars, struct hlsl_ir_var, extern_entry)
+ {
+ if ((var->is_input_semantic && var->last_read)
+ || (var->is_output_semantic && var->first_write))
+ sm4_generate_vsir_instr_dcl_semantic(ctx, program, var, is_patch_constant_func, &block, &var->loc);
+ }
+
+ if (temp_count)
+ sm4_generate_vsir_instr_dcl_temps(ctx, program, temp_count, &block, &func->loc);
+
+ LIST_FOR_EACH_ENTRY(scope, &ctx->scopes, struct hlsl_scope, entry)
+ {
+ LIST_FOR_EACH_ENTRY(var, &scope->vars, struct hlsl_ir_var, scope_entry)
+ {
+ if (var->is_uniform || var->is_input_semantic || var->is_output_semantic)
+ continue;
+ if (!var->regs[HLSL_REGSET_NUMERIC].allocated)
+ continue;
+
+ if (var->indexable)
+ {
+ unsigned int id = var->regs[HLSL_REGSET_NUMERIC].id;
+ unsigned int size = align(var->data_type->reg_size[HLSL_REGSET_NUMERIC], 4) / 4;
+
+ sm4_generate_vsir_instr_dcl_indexable_temp(ctx, program, &block, id, size, 4, &var->loc);
+ }
+ }
+ }
+
+ list_move_head(&func->body.instrs, &block.instrs);
+
+ hlsl_block_cleanup(&block);
+
+ sm4_generate_vsir_block(ctx, &func->body, program);
+
+ generate_vsir_add_program_instruction(ctx, program, &func->loc, VKD3DSIH_RET, 0, 0);
+}
+
+static int sm4_compare_extern_resources(const void *a, const void *b)
+{
+ const struct extern_resource *aa = a;
+ const struct extern_resource *bb = b;
+ int r;
+
+ if ((r = vkd3d_u32_compare(aa->regset, bb->regset)))
+ return r;
+
+ if ((r = vkd3d_u32_compare(aa->space, bb->space)))
+ return r;
+
+ return vkd3d_u32_compare(aa->index, bb->index);
+}
+
+static const char *string_skip_tag(const char *string)
+{
+ if (!strncmp(string, "<resource>", strlen("<resource>")))
+ return string + strlen("<resource>");
+ return string;
+}
+
+static void sm4_free_extern_resources(struct extern_resource *extern_resources, unsigned int count)
+{
+ unsigned int i;
+
+ for (i = 0; i < count; ++i)
+ {
+ vkd3d_free(extern_resources[i].name);
+ }
+ vkd3d_free(extern_resources);
+}
+
+static struct extern_resource *sm4_get_extern_resources(struct hlsl_ctx *ctx, unsigned int *count)
+{
+ bool separate_components = ctx->profile->major_version == 5 && ctx->profile->minor_version == 0;
+ struct extern_resource *extern_resources = NULL;
+ const struct hlsl_ir_var *var;
+ struct hlsl_buffer *buffer;
+ enum hlsl_regset regset;
+ size_t capacity = 0;
+ char *name;
+
+ *count = 0;
+
+ LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry)
+ {
+ if (separate_components)
+ {
+ unsigned int component_count = hlsl_type_component_count(var->data_type);
+ unsigned int k, regset_offset;
+
+ for (k = 0; k < component_count; ++k)
+ {
+ struct hlsl_type *component_type = hlsl_type_get_component_type(ctx, var->data_type, k);
+ struct vkd3d_string_buffer *name_buffer;
+
+ if (!hlsl_type_is_resource(component_type))
+ continue;
+
+ regset_offset = hlsl_type_get_component_offset(ctx, var->data_type, k, &regset);
+ if (regset_offset > var->regs[regset].allocation_size)
+ continue;
+
+ if (!var->objects_usage[regset][regset_offset].used)
+ continue;
+
+ if (!(hlsl_array_reserve(ctx, (void **)&extern_resources,
+ &capacity, *count + 1, sizeof(*extern_resources))))
+ {
+ sm4_free_extern_resources(extern_resources, *count);
+ *count = 0;
+ return NULL;
+ }
+
+ if (!(name_buffer = hlsl_component_to_string(ctx, var, k)))
+ {
+ sm4_free_extern_resources(extern_resources, *count);
+ *count = 0;
+ return NULL;
+ }
+ if (!(name = hlsl_strdup(ctx, string_skip_tag(name_buffer->buffer))))
+ {
+ sm4_free_extern_resources(extern_resources, *count);
+ *count = 0;
+ hlsl_release_string_buffer(ctx, name_buffer);
+ return NULL;
+ }
+ hlsl_release_string_buffer(ctx, name_buffer);
+
+ extern_resources[*count].var = NULL;
+ extern_resources[*count].buffer = NULL;
+
+ extern_resources[*count].name = name;
+ extern_resources[*count].is_user_packed = !!var->reg_reservation.reg_type;
+
+ extern_resources[*count].component_type = component_type;
+
+ extern_resources[*count].regset = regset;
+ extern_resources[*count].id = var->regs[regset].id;
+ extern_resources[*count].space = var->regs[regset].space;
+ extern_resources[*count].index = var->regs[regset].index + regset_offset;
+ extern_resources[*count].bind_count = 1;
+ extern_resources[*count].loc = var->loc;
+
+ ++*count;
+ }
+ }
+ else
+ {
+ unsigned int r;
+
+ if (!hlsl_type_is_resource(var->data_type))
+ continue;
+
+ for (r = 0; r <= HLSL_REGSET_LAST; ++r)
+ {
+ if (!var->regs[r].allocated)
+ continue;
+
+ if (!(hlsl_array_reserve(ctx, (void **)&extern_resources,
+ &capacity, *count + 1, sizeof(*extern_resources))))
+ {
+ sm4_free_extern_resources(extern_resources, *count);
+ *count = 0;
+ return NULL;
+ }
+
+ if (!(name = hlsl_strdup(ctx, string_skip_tag(var->name))))
+ {
+ sm4_free_extern_resources(extern_resources, *count);
+ *count = 0;
+ return NULL;
+ }
+
+ extern_resources[*count].var = var;
+ extern_resources[*count].buffer = NULL;
+
+ extern_resources[*count].name = name;
+ /* For some reason 5.1 resources aren't marked as
+ * user-packed, but cbuffers still are. */
+ extern_resources[*count].is_user_packed = hlsl_version_lt(ctx, 5, 1)
+ && !!var->reg_reservation.reg_type;
+
+ extern_resources[*count].component_type = hlsl_type_get_component_type(ctx, var->data_type, 0);
+
+ extern_resources[*count].regset = r;
+ extern_resources[*count].id = var->regs[r].id;
+ extern_resources[*count].space = var->regs[r].space;
+ extern_resources[*count].index = var->regs[r].index;
+ extern_resources[*count].bind_count = var->bind_count[r];
+ extern_resources[*count].loc = var->loc;
+
+ ++*count;
+ }
+ }
+ }
+
+ LIST_FOR_EACH_ENTRY(buffer, &ctx->buffers, struct hlsl_buffer, entry)
+ {
+ if (!buffer->reg.allocated)
+ continue;
+
+ if (!(hlsl_array_reserve(ctx, (void **)&extern_resources,
+ &capacity, *count + 1, sizeof(*extern_resources))))
+ {
+ sm4_free_extern_resources(extern_resources, *count);
+ *count = 0;
+ return NULL;
+ }
+
+ if (!(name = hlsl_strdup(ctx, buffer->name)))
+ {
+ sm4_free_extern_resources(extern_resources, *count);
+ *count = 0;
+ return NULL;
+ }
+
+ extern_resources[*count].var = NULL;
+ extern_resources[*count].buffer = buffer;
+
+ extern_resources[*count].name = name;
+ extern_resources[*count].is_user_packed = !!buffer->reservation.reg_type;
+
+ extern_resources[*count].component_type = NULL;
+
+ extern_resources[*count].regset = HLSL_REGSET_NUMERIC;
+ extern_resources[*count].id = buffer->reg.id;
+ extern_resources[*count].space = buffer->reg.space;
+ extern_resources[*count].index = buffer->reg.index;
+ extern_resources[*count].bind_count = 1;
+ extern_resources[*count].loc = buffer->loc;
+
+ ++*count;
+ }
+
+ if (extern_resources)
+ qsort(extern_resources, *count, sizeof(*extern_resources), sm4_compare_extern_resources);
+
+ return extern_resources;
+}
+
+static void generate_vsir_scan_required_features(struct hlsl_ctx *ctx, struct vsir_program *program)
+{
+ struct extern_resource *extern_resources;
+ unsigned int extern_resources_count;
+
+ extern_resources = sm4_get_extern_resources(ctx, &extern_resources_count);
+ for (unsigned int i = 0; i < extern_resources_count; ++i)
+ {
+ if (extern_resources[i].component_type && extern_resources[i].component_type->e.resource.rasteriser_ordered)
+ program->features.rovs = true;
+ }
+ sm4_free_extern_resources(extern_resources, extern_resources_count);
+
+ /* FIXME: We also emit code that should require UAVS_AT_EVERY_STAGE,
+ * STENCIL_REF, and TYPED_UAV_LOAD_ADDITIONAL_FORMATS. */
+}
+
+static void generate_vsir_scan_global_flags(struct hlsl_ctx *ctx,
+ struct vsir_program *program, const struct hlsl_ir_function_decl *entry_func)
+{
+ const struct vkd3d_shader_version *version = &program->shader_version;
+ struct extern_resource *extern_resources;
+ unsigned int extern_resources_count, i;
+
+ extern_resources = sm4_get_extern_resources(ctx, &extern_resources_count);
+
+ if (version->major == 4)
+ {
+ for (i = 0; i < extern_resources_count; ++i)
+ {
+ const struct extern_resource *resource = &extern_resources[i];
+ const struct hlsl_type *type = resource->component_type;
+
+ if (type && type->class == HLSL_CLASS_TEXTURE && type->sampler_dim == HLSL_SAMPLER_DIM_RAW_BUFFER)
+ {
+ program->global_flags |= VKD3DSGF_ENABLE_RAW_AND_STRUCTURED_BUFFERS;
+ break;
+ }
+ }
+ }
+
+ sm4_free_extern_resources(extern_resources, extern_resources_count);
+
+ if (entry_func->early_depth_test && vkd3d_shader_ver_ge(version, 5, 0))
+ program->global_flags |= VKD3DSGF_FORCE_EARLY_DEPTH_STENCIL;
+}
+
+static void sm4_generate_vsir_add_dcl_constant_buffer(struct hlsl_ctx *ctx,
+ struct vsir_program *program, const struct hlsl_buffer *cbuffer)
+{
+ unsigned int array_first = cbuffer->reg.index;
+ unsigned int array_last = cbuffer->reg.index; /* FIXME: array end. */
+ struct vkd3d_shader_src_param *src_param;
+ struct vkd3d_shader_instruction *ins;
+
+ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &cbuffer->loc, VKD3DSIH_DCL_CONSTANT_BUFFER, 0, 0)))
+ {
+ ctx->result = VKD3D_ERROR_OUT_OF_MEMORY;
+ return;
+ }
+
+ ins->declaration.cb.size = cbuffer->size;
+
+ src_param = &ins->declaration.cb.src;
+ vsir_src_param_init(src_param, VKD3DSPR_CONSTBUFFER, VKD3D_DATA_FLOAT, 0);
+ src_param->reg.dimension = VSIR_DIMENSION_VEC4;
+ src_param->swizzle = VKD3D_SHADER_NO_SWIZZLE;
+
+ ins->declaration.cb.range.space = cbuffer->reg.space;
+ ins->declaration.cb.range.first = array_first;
+ ins->declaration.cb.range.last = array_last;
+
+ src_param->reg.idx[0].offset = cbuffer->reg.id;
+ src_param->reg.idx[1].offset = array_first;
+ src_param->reg.idx[2].offset = array_last;
+ src_param->reg.idx_count = 3;
+}
+
+static void sm4_generate_vsir_add_dcl_sampler(struct hlsl_ctx *ctx,
+ struct vsir_program *program, const struct extern_resource *resource)
+{
+ struct vkd3d_shader_src_param *src_param;
+ struct vkd3d_shader_instruction *ins;
+ unsigned int i;
+
+ VKD3D_ASSERT(resource->regset == HLSL_REGSET_SAMPLERS);
+ VKD3D_ASSERT(hlsl_version_lt(ctx, 5, 1) || resource->bind_count == 1);
+
+ for (i = 0; i < resource->bind_count; ++i)
+ {
+ unsigned int array_first = resource->index + i;
+ unsigned int array_last = resource->index + i; /* FIXME: array end. */
+
+ if (resource->var && !resource->var->objects_usage[HLSL_REGSET_SAMPLERS][i].used)
+ continue;
+
+ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &resource->loc, VKD3DSIH_DCL_SAMPLER, 0, 0)))
+ {
+ ctx->result = VKD3D_ERROR_OUT_OF_MEMORY;
+ return;
+ }
+
+ if (resource->component_type->sampler_dim == HLSL_SAMPLER_DIM_COMPARISON)
+ ins->flags |= VKD3DSI_SAMPLER_COMPARISON_MODE;
+
+ src_param = &ins->declaration.sampler.src;
+ vsir_src_param_init(src_param, VKD3DSPR_SAMPLER, VKD3D_DATA_UNUSED, 0);
+
+ ins->declaration.sampler.range.first = array_first;
+ ins->declaration.sampler.range.last = array_last;
+ ins->declaration.sampler.range.space = resource->space;
+
+ src_param->reg.idx[0].offset = resource->id;
+ src_param->reg.idx[1].offset = array_first;
+ src_param->reg.idx[2].offset = array_last;
+ src_param->reg.idx_count = 3;
+ }
+}
+
+static enum vkd3d_shader_resource_type sm4_generate_vsir_get_resource_type(const struct hlsl_type *type)
+{
+ switch (type->sampler_dim)
+ {
+ case HLSL_SAMPLER_DIM_1D:
+ return VKD3D_SHADER_RESOURCE_TEXTURE_1D;
+ case HLSL_SAMPLER_DIM_2D:
+ return VKD3D_SHADER_RESOURCE_TEXTURE_2D;
+ case HLSL_SAMPLER_DIM_3D:
+ return VKD3D_SHADER_RESOURCE_TEXTURE_3D;
+ case HLSL_SAMPLER_DIM_CUBE:
+ return VKD3D_SHADER_RESOURCE_TEXTURE_CUBE;
+ case HLSL_SAMPLER_DIM_1DARRAY:
+ return VKD3D_SHADER_RESOURCE_TEXTURE_1DARRAY;
+ case HLSL_SAMPLER_DIM_2DARRAY:
+ return VKD3D_SHADER_RESOURCE_TEXTURE_2DARRAY;
+ case HLSL_SAMPLER_DIM_2DMS:
+ return VKD3D_SHADER_RESOURCE_TEXTURE_2DMS;
+ case HLSL_SAMPLER_DIM_2DMSARRAY:
+ return VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY;
+ case HLSL_SAMPLER_DIM_CUBEARRAY:
+ return VKD3D_SHADER_RESOURCE_TEXTURE_CUBEARRAY;
+ case HLSL_SAMPLER_DIM_BUFFER:
+ case HLSL_SAMPLER_DIM_RAW_BUFFER:
+ case HLSL_SAMPLER_DIM_STRUCTURED_BUFFER:
+ return VKD3D_SHADER_RESOURCE_BUFFER;
+ default:
+ vkd3d_unreachable();
+ }
+}
+
+static enum vkd3d_data_type sm4_generate_vsir_get_format_type(const struct hlsl_type *type)
+{
+ const struct hlsl_type *format = type->e.resource.format;
+
+ switch (format->e.numeric.type)
+ {
+ case HLSL_TYPE_DOUBLE:
+ return VKD3D_DATA_DOUBLE;
+
+ case HLSL_TYPE_FLOAT:
+ case HLSL_TYPE_HALF:
+ if (format->modifiers & HLSL_MODIFIER_UNORM)
+ return VKD3D_DATA_UNORM;
+ if (format->modifiers & HLSL_MODIFIER_SNORM)
+ return VKD3D_DATA_SNORM;
+ return VKD3D_DATA_FLOAT;
+
+ case HLSL_TYPE_INT:
+ return VKD3D_DATA_INT;
+
+ case HLSL_TYPE_BOOL:
+ case HLSL_TYPE_UINT:
+ return VKD3D_DATA_UINT;
+ }
+
+ vkd3d_unreachable();
+}
+
+static void sm4_generate_vsir_add_dcl_texture(struct hlsl_ctx *ctx,
+ struct vsir_program *program, const struct extern_resource *resource,
+ bool uav)
+{
+ enum hlsl_regset regset = uav ? HLSL_REGSET_UAVS : HLSL_REGSET_TEXTURES;
+ struct vkd3d_shader_structured_resource *structured_resource;
+ struct vkd3d_shader_dst_param *dst_param;
+ struct vkd3d_shader_semantic *semantic;
+ struct vkd3d_shader_instruction *ins;
+ struct hlsl_type *component_type;
+ enum vkd3d_shader_opcode opcode;
+ bool multisampled;
+ unsigned int i, j;
+
+ VKD3D_ASSERT(resource->regset == regset);
+ VKD3D_ASSERT(hlsl_version_lt(ctx, 5, 1) || resource->bind_count == 1);
+
+ component_type = resource->component_type;
+
+ for (i = 0; i < resource->bind_count; ++i)
+ {
+ unsigned int array_first = resource->index + i;
+ unsigned int array_last = resource->index + i; /* FIXME: array end. */
+
+ if (resource->var && !resource->var->objects_usage[regset][i].used)
+ continue;
+
+ if (uav)
+ {
+ switch (component_type->sampler_dim)
+ {
+ case HLSL_SAMPLER_DIM_STRUCTURED_BUFFER:
+ opcode = VKD3DSIH_DCL_UAV_STRUCTURED;
+ break;
+ case HLSL_SAMPLER_DIM_RAW_BUFFER:
+ opcode = VKD3DSIH_DCL_UAV_RAW;
+ break;
+ default:
+ opcode = VKD3DSIH_DCL_UAV_TYPED;
+ break;
+ }
+ }
+ else
+ {
+ switch (component_type->sampler_dim)
+ {
+ case HLSL_SAMPLER_DIM_RAW_BUFFER:
+ opcode = VKD3DSIH_DCL_RESOURCE_RAW;
+ break;
+ default:
+ opcode = VKD3DSIH_DCL;
+ break;
+ }
+ }
+
+ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &resource->loc, opcode, 0, 0)))
+ {
+ ctx->result = VKD3D_ERROR_OUT_OF_MEMORY;
+ return;
+ }
+ semantic = &ins->declaration.semantic;
+ structured_resource = &ins->declaration.structured_resource;
+ dst_param = &semantic->resource.reg;
+ vsir_dst_param_init(dst_param, uav ? VKD3DSPR_UAV : VKD3DSPR_RESOURCE, VKD3D_DATA_UNUSED, 0);
+
+ if (uav && component_type->sampler_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER)
+ structured_resource->byte_stride = 4 * component_type->e.resource.format->reg_size[HLSL_REGSET_NUMERIC];
+ if (uav && component_type->e.resource.rasteriser_ordered)
+ ins->flags = VKD3DSUF_RASTERISER_ORDERED_VIEW;
+
+ multisampled = component_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS
+ || component_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY;
+
+ if (!hlsl_version_ge(ctx, 4, 1) && multisampled && !component_type->sample_count)
+ {
+ hlsl_error(ctx, &resource->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE,
+ "Multisampled texture object declaration needs sample count for profile %u.%u.",
+ ctx->profile->major_version, ctx->profile->minor_version);
+ }
+
+ for (j = 0; j < 4; ++j)
+ semantic->resource_data_type[j] = sm4_generate_vsir_get_format_type(component_type);
+
+ semantic->resource.range.first = array_first;
+ semantic->resource.range.last = array_last;
+ semantic->resource.range.space = resource->space;
+
+ dst_param->reg.idx[0].offset = resource->id;
+ dst_param->reg.idx[1].offset = array_first;
+ dst_param->reg.idx[2].offset = array_last;
+ dst_param->reg.idx_count = 3;
+
+ ins->resource_type = sm4_generate_vsir_get_resource_type(resource->component_type);
+ if (resource->component_type->sampler_dim == HLSL_SAMPLER_DIM_RAW_BUFFER)
+ ins->raw = true;
+ if (resource->component_type->sampler_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER)
+ {
+ ins->structured = true;
+ ins->resource_stride = 4 * component_type->e.resource.format->reg_size[HLSL_REGSET_NUMERIC];
+ }
+
+ if (multisampled)
+ semantic->sample_count = component_type->sample_count;
+ }
+}
+
+/* OBJECTIVE: Translate all the information from ctx and entry_func to the
+ * vsir_program, so it can be used as input to tpf_compile() without relying
+ * on ctx and entry_func. */
+static void sm4_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func,
+ uint64_t config_flags, struct vsir_program *program)
+{
+ struct vkd3d_shader_version version = {0};
+ struct extern_resource *extern_resources;
+ unsigned int extern_resources_count;
+ const struct hlsl_buffer *cbuffer;
+
+ version.major = ctx->profile->major_version;
+ version.minor = ctx->profile->minor_version;
+ version.type = ctx->profile->type;
+
+ if (!vsir_program_init(program, NULL, &version, 0, VSIR_CF_STRUCTURED, VSIR_NORMALISED_SM4))
+ {
+ ctx->result = VKD3D_ERROR_OUT_OF_MEMORY;
+ return;
+ }
+
+ generate_vsir_signature(ctx, program, func);
+ if (version.type == VKD3D_SHADER_TYPE_HULL)
+ generate_vsir_signature(ctx, program, ctx->patch_constant_func);
+
+ if (version.type == VKD3D_SHADER_TYPE_COMPUTE)
+ {
+ program->thread_group_size.x = ctx->thread_count[0];
+ program->thread_group_size.y = ctx->thread_count[1];
+ program->thread_group_size.z = ctx->thread_count[2];
+ }
+ else if (version.type == VKD3D_SHADER_TYPE_HULL)
+ {
+ program->input_control_point_count = 1; /* TODO: Obtain from InputPatch */
+ program->output_control_point_count = ctx->output_control_point_count;
+ program->tess_domain = ctx->domain;
+ program->tess_partitioning = ctx->partitioning;
+ program->tess_output_primitive = ctx->output_primitive;
+ }
+ else if (version.type == VKD3D_SHADER_TYPE_DOMAIN)
+ {
+ program->input_control_point_count = 0; /* TODO: Obtain from OutputPatch */
+ program->tess_domain = ctx->domain;
+ }
+
+ LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry)
+ {
+ if (cbuffer->reg.allocated)
+ sm4_generate_vsir_add_dcl_constant_buffer(ctx, program, cbuffer);
+ }
+
+ extern_resources = sm4_get_extern_resources(ctx, &extern_resources_count);
+ for (unsigned int i = 0; i < extern_resources_count; ++i)
+ {
+ const struct extern_resource *resource = &extern_resources[i];
+
+ if (resource->regset == HLSL_REGSET_SAMPLERS)
+ sm4_generate_vsir_add_dcl_sampler(ctx, program, resource);
+ else if (resource->regset == HLSL_REGSET_TEXTURES)
+ sm4_generate_vsir_add_dcl_texture(ctx, program, resource, false);
+ else if (resource->regset == HLSL_REGSET_UAVS)
+ sm4_generate_vsir_add_dcl_texture(ctx, program, resource, true);
+ }
+ sm4_free_extern_resources(extern_resources, extern_resources_count);
+
+ if (version.type == VKD3D_SHADER_TYPE_HULL)
+ generate_vsir_add_program_instruction(ctx, program,
+ &ctx->patch_constant_func->loc, VKD3DSIH_HS_CONTROL_POINT_PHASE, 0, 0);
+ sm4_generate_vsir_add_function(ctx, func, config_flags, program);
+ if (version.type == VKD3D_SHADER_TYPE_HULL)
+ {
+ generate_vsir_add_program_instruction(ctx, program,
+ &ctx->patch_constant_func->loc, VKD3DSIH_HS_FORK_PHASE, 0, 0);
+ sm4_generate_vsir_add_function(ctx, ctx->patch_constant_func, config_flags, program);
+ }
+
+ generate_vsir_scan_required_features(ctx, program);
+ generate_vsir_scan_global_flags(ctx, program, func);
+}
- case HLSL_IR_SWIZZLE:
- generate_vsir_instr_swizzle(ctx, program, hlsl_ir_swizzle(instr));
- replace_instr_with_last_vsir_instr(ctx, program, instr);
- break;
+/* For some reason, for matrices, values from default value initializers end
+ * up in different components than from regular initializers. Default value
+ * initializers fill the matrix in vertical reading order
+ * (left-to-right top-to-bottom) instead of regular reading order
+ * (top-to-bottom left-to-right), so they have to be adjusted. An exception is
+ * that the order of matrix initializers for function parameters are row-major
+ * (top-to-bottom left-to-right). */
+static unsigned int get_component_index_from_default_initializer_index(struct hlsl_type *type, unsigned int index)
+{
+ unsigned int element_comp_count, element, x, y, i;
+ unsigned int base = 0;
- default:
- break;
+ switch (type->class)
+ {
+ case HLSL_CLASS_MATRIX:
+ x = index / type->e.numeric.dimy;
+ y = index % type->e.numeric.dimy;
+ return y * type->e.numeric.dimx + x;
+
+ case HLSL_CLASS_ARRAY:
+ element_comp_count = hlsl_type_component_count(type->e.array.type);
+ element = index / element_comp_count;
+ base = element * element_comp_count;
+ return base + get_component_index_from_default_initializer_index(type->e.array.type, index - base);
+
+ case HLSL_CLASS_STRUCT:
+ for (i = 0; i < type->e.record.field_count; ++i)
+ {
+ struct hlsl_type *field_type = type->e.record.fields[i].type;
+
+ element_comp_count = hlsl_type_component_count(field_type);
+ if (index - base < element_comp_count)
+ return base + get_component_index_from_default_initializer_index(field_type, index - base);
+ base += element_comp_count;
+ }
+ break;
+
+ default:
+ return index;
+ }
+
+ vkd3d_unreachable();
+}
+
+static D3D_SRV_DIMENSION sm4_rdef_resource_dimension(const struct hlsl_type *type)
+{
+ switch (type->sampler_dim)
+ {
+ case HLSL_SAMPLER_DIM_1D:
+ return D3D_SRV_DIMENSION_TEXTURE1D;
+ case HLSL_SAMPLER_DIM_2D:
+ return D3D_SRV_DIMENSION_TEXTURE2D;
+ case HLSL_SAMPLER_DIM_3D:
+ return D3D_SRV_DIMENSION_TEXTURE3D;
+ case HLSL_SAMPLER_DIM_CUBE:
+ return D3D_SRV_DIMENSION_TEXTURECUBE;
+ case HLSL_SAMPLER_DIM_1DARRAY:
+ return D3D_SRV_DIMENSION_TEXTURE1DARRAY;
+ case HLSL_SAMPLER_DIM_2DARRAY:
+ return D3D_SRV_DIMENSION_TEXTURE2DARRAY;
+ case HLSL_SAMPLER_DIM_2DMS:
+ return D3D_SRV_DIMENSION_TEXTURE2DMS;
+ case HLSL_SAMPLER_DIM_2DMSARRAY:
+ return D3D_SRV_DIMENSION_TEXTURE2DMSARRAY;
+ case HLSL_SAMPLER_DIM_CUBEARRAY:
+ return D3D_SRV_DIMENSION_TEXTURECUBEARRAY;
+ case HLSL_SAMPLER_DIM_BUFFER:
+ case HLSL_SAMPLER_DIM_RAW_BUFFER:
+ case HLSL_SAMPLER_DIM_STRUCTURED_BUFFER:
+ return D3D_SRV_DIMENSION_BUFFER;
+ default:
+ break;
+ }
+
+ vkd3d_unreachable();
+}
+
+static enum D3D_RESOURCE_RETURN_TYPE sm4_data_type(const struct hlsl_type *type)
+{
+ const struct hlsl_type *format = type->e.resource.format;
+
+ switch (format->e.numeric.type)
+ {
+ case HLSL_TYPE_DOUBLE:
+ return D3D_RETURN_TYPE_DOUBLE;
+
+ case HLSL_TYPE_FLOAT:
+ case HLSL_TYPE_HALF:
+ if (format->modifiers & HLSL_MODIFIER_UNORM)
+ return D3D_RETURN_TYPE_UNORM;
+ if (format->modifiers & HLSL_MODIFIER_SNORM)
+ return D3D_RETURN_TYPE_SNORM;
+ return D3D_RETURN_TYPE_FLOAT;
+
+ case HLSL_TYPE_INT:
+ return D3D_RETURN_TYPE_SINT;
+ break;
+
+ case HLSL_TYPE_BOOL:
+ case HLSL_TYPE_UINT:
+ return D3D_RETURN_TYPE_UINT;
+ }
+
+ vkd3d_unreachable();
+}
+
+static D3D_SHADER_INPUT_TYPE sm4_resource_type(const struct hlsl_type *type)
+{
+ switch (type->class)
+ {
+ case HLSL_CLASS_SAMPLER:
+ return D3D_SIT_SAMPLER;
+ case HLSL_CLASS_TEXTURE:
+ return D3D_SIT_TEXTURE;
+ case HLSL_CLASS_UAV:
+ return D3D_SIT_UAV_RWTYPED;
+ default:
+ break;
+ }
+
+ vkd3d_unreachable();
+}
+
+static D3D_SHADER_VARIABLE_CLASS sm4_class(const struct hlsl_type *type)
+{
+ switch (type->class)
+ {
+ case HLSL_CLASS_MATRIX:
+ VKD3D_ASSERT(type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK);
+ if (type->modifiers & HLSL_MODIFIER_COLUMN_MAJOR)
+ return D3D_SVC_MATRIX_COLUMNS;
+ else
+ return D3D_SVC_MATRIX_ROWS;
+ case HLSL_CLASS_SCALAR:
+ return D3D_SVC_SCALAR;
+ case HLSL_CLASS_VECTOR:
+ return D3D_SVC_VECTOR;
+
+ case HLSL_CLASS_ARRAY:
+ case HLSL_CLASS_DEPTH_STENCIL_STATE:
+ case HLSL_CLASS_DEPTH_STENCIL_VIEW:
+ case HLSL_CLASS_EFFECT_GROUP:
+ case HLSL_CLASS_ERROR:
+ case HLSL_CLASS_STRUCT:
+ case HLSL_CLASS_PASS:
+ case HLSL_CLASS_PIXEL_SHADER:
+ case HLSL_CLASS_RASTERIZER_STATE:
+ case HLSL_CLASS_RENDER_TARGET_VIEW:
+ case HLSL_CLASS_SAMPLER:
+ case HLSL_CLASS_STRING:
+ case HLSL_CLASS_TECHNIQUE:
+ case HLSL_CLASS_TEXTURE:
+ case HLSL_CLASS_UAV:
+ case HLSL_CLASS_VERTEX_SHADER:
+ case HLSL_CLASS_VOID:
+ case HLSL_CLASS_CONSTANT_BUFFER:
+ case HLSL_CLASS_COMPUTE_SHADER:
+ case HLSL_CLASS_DOMAIN_SHADER:
+ case HLSL_CLASS_HULL_SHADER:
+ case HLSL_CLASS_GEOMETRY_SHADER:
+ case HLSL_CLASS_BLEND_STATE:
+ case HLSL_CLASS_STREAM_OUTPUT:
+ case HLSL_CLASS_NULL:
+ break;
+ }
+
+ vkd3d_unreachable();
+}
+
+static D3D_SHADER_VARIABLE_TYPE sm4_base_type(const struct hlsl_type *type)
+{
+ switch (type->e.numeric.type)
+ {
+ case HLSL_TYPE_BOOL:
+ return D3D_SVT_BOOL;
+ case HLSL_TYPE_DOUBLE:
+ return D3D_SVT_DOUBLE;
+ case HLSL_TYPE_FLOAT:
+ case HLSL_TYPE_HALF:
+ return D3D_SVT_FLOAT;
+ case HLSL_TYPE_INT:
+ return D3D_SVT_INT;
+ case HLSL_TYPE_UINT:
+ return D3D_SVT_UINT;
+ }
+
+ vkd3d_unreachable();
+}
+
+static void write_sm4_type(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, struct hlsl_type *type)
+{
+ const struct hlsl_type *array_type = hlsl_get_multiarray_element_type(type);
+ const char *name = array_type->name ? array_type->name : "<unnamed>";
+ const struct hlsl_profile_info *profile = ctx->profile;
+ unsigned int array_size = 0;
+ size_t name_offset = 0;
+ size_t i;
+
+ if (type->bytecode_offset)
+ return;
+
+ if (profile->major_version >= 5)
+ name_offset = put_string(buffer, name);
+
+ if (type->class == HLSL_CLASS_ARRAY)
+ array_size = hlsl_get_multiarray_size(type);
+
+ if (array_type->class == HLSL_CLASS_STRUCT)
+ {
+ unsigned int field_count = 0;
+ size_t fields_offset = 0;
+
+ for (i = 0; i < array_type->e.record.field_count; ++i)
+ {
+ struct hlsl_struct_field *field = &array_type->e.record.fields[i];
+
+ if (!field->type->reg_size[HLSL_REGSET_NUMERIC])
+ continue;
+
+ field->name_bytecode_offset = put_string(buffer, field->name);
+ write_sm4_type(ctx, buffer, field->type);
+ ++field_count;
}
+
+ fields_offset = bytecode_align(buffer);
+
+ for (i = 0; i < array_type->e.record.field_count; ++i)
+ {
+ struct hlsl_struct_field *field = &array_type->e.record.fields[i];
+
+ if (!field->type->reg_size[HLSL_REGSET_NUMERIC])
+ continue;
+
+ put_u32(buffer, field->name_bytecode_offset);
+ put_u32(buffer, field->type->bytecode_offset);
+ put_u32(buffer, field->reg_offset[HLSL_REGSET_NUMERIC] * sizeof(float));
+ }
+ type->bytecode_offset = put_u32(buffer, vkd3d_make_u32(D3D_SVC_STRUCT, D3D_SVT_VOID));
+ put_u32(buffer, vkd3d_make_u32(1, hlsl_type_component_count(array_type)));
+ put_u32(buffer, vkd3d_make_u32(array_size, field_count));
+ put_u32(buffer, fields_offset);
+ }
+ else
+ {
+ VKD3D_ASSERT(array_type->class <= HLSL_CLASS_LAST_NUMERIC);
+ type->bytecode_offset = put_u32(buffer, vkd3d_make_u32(sm4_class(array_type), sm4_base_type(array_type)));
+ put_u32(buffer, vkd3d_make_u32(array_type->e.numeric.dimy, array_type->e.numeric.dimx));
+ put_u32(buffer, vkd3d_make_u32(array_size, 0));
+ put_u32(buffer, 1);
+ }
+
+ if (profile->major_version >= 5)
+ {
+ put_u32(buffer, 0); /* FIXME: unknown */
+ put_u32(buffer, 0); /* FIXME: unknown */
+ put_u32(buffer, 0); /* FIXME: unknown */
+ put_u32(buffer, 0); /* FIXME: unknown */
+ put_u32(buffer, name_offset);
}
}
-static void sm4_generate_vsir_add_function(struct hlsl_ctx *ctx,
- struct hlsl_ir_function_decl *func, uint64_t config_flags, struct vsir_program *program)
+static void sm4_generate_rdef(struct hlsl_ctx *ctx, struct vkd3d_shader_code *rdef)
{
- bool is_patch_constant_func = func == ctx->patch_constant_func;
- struct hlsl_block block = {0};
- struct hlsl_scope *scope;
- struct hlsl_ir_var *var;
- uint32_t temp_count;
+ uint32_t binding_desc_size = (hlsl_version_ge(ctx, 5, 1) ? 10 : 8) * sizeof(uint32_t);
+ size_t cbuffers_offset, resources_offset, creator_offset, string_offset;
+ unsigned int cbuffer_count = 0, extern_resources_count, i, j;
+ size_t cbuffer_position, resource_position, creator_position;
+ const struct hlsl_profile_info *profile = ctx->profile;
+ struct vkd3d_bytecode_buffer buffer = {0};
+ struct extern_resource *extern_resources;
+ const struct hlsl_buffer *cbuffer;
+ const struct hlsl_ir_var *var;
- compute_liveness(ctx, func);
- mark_indexable_vars(ctx, func);
- temp_count = allocate_temp_registers(ctx, func);
- if (ctx->result)
+ static const uint16_t target_types[] =
+ {
+ 0xffff, /* PIXEL */
+ 0xfffe, /* VERTEX */
+ 0x4753, /* GEOMETRY */
+ 0x4853, /* HULL */
+ 0x4453, /* DOMAIN */
+ 0x4353, /* COMPUTE */
+ };
+
+ extern_resources = sm4_get_extern_resources(ctx, &extern_resources_count);
+
+ LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry)
+ {
+ if (cbuffer->reg.allocated)
+ ++cbuffer_count;
+ }
+
+ put_u32(&buffer, cbuffer_count);
+ cbuffer_position = put_u32(&buffer, 0);
+ put_u32(&buffer, extern_resources_count);
+ resource_position = put_u32(&buffer, 0);
+ put_u32(&buffer, vkd3d_make_u32(vkd3d_make_u16(profile->minor_version, profile->major_version),
+ target_types[profile->type]));
+ put_u32(&buffer, 0); /* FIXME: compilation flags */
+ creator_position = put_u32(&buffer, 0);
+
+ if (profile->major_version >= 5)
+ {
+ put_u32(&buffer, hlsl_version_ge(ctx, 5, 1) ? TAG_RD11_REVERSE : TAG_RD11);
+ put_u32(&buffer, 15 * sizeof(uint32_t)); /* size of RDEF header including this header */
+ put_u32(&buffer, 6 * sizeof(uint32_t)); /* size of buffer desc */
+ put_u32(&buffer, binding_desc_size); /* size of binding desc */
+ put_u32(&buffer, 10 * sizeof(uint32_t)); /* size of variable desc */
+ put_u32(&buffer, 9 * sizeof(uint32_t)); /* size of type desc */
+ put_u32(&buffer, 3 * sizeof(uint32_t)); /* size of member desc */
+ put_u32(&buffer, 0); /* unknown; possibly a null terminator */
+ }
+
+ /* Bound resources. */
+
+ resources_offset = bytecode_align(&buffer);
+ set_u32(&buffer, resource_position, resources_offset);
+
+ for (i = 0; i < extern_resources_count; ++i)
+ {
+ const struct extern_resource *resource = &extern_resources[i];
+ uint32_t flags = 0;
+
+ if (resource->is_user_packed)
+ flags |= D3D_SIF_USERPACKED;
+
+ put_u32(&buffer, 0); /* name */
+ if (resource->buffer)
+ put_u32(&buffer, resource->buffer->type == HLSL_BUFFER_CONSTANT ? D3D_SIT_CBUFFER : D3D_SIT_TBUFFER);
+ else
+ put_u32(&buffer, sm4_resource_type(resource->component_type));
+ if (resource->regset == HLSL_REGSET_TEXTURES || resource->regset == HLSL_REGSET_UAVS)
+ {
+ unsigned int dimx = resource->component_type->e.resource.format->e.numeric.dimx;
+
+ put_u32(&buffer, sm4_data_type(resource->component_type));
+ put_u32(&buffer, sm4_rdef_resource_dimension(resource->component_type));
+ put_u32(&buffer, ~0u); /* FIXME: multisample count */
+ flags |= (dimx - 1) << VKD3D_SM4_SIF_TEXTURE_COMPONENTS_SHIFT;
+ }
+ else
+ {
+ put_u32(&buffer, 0);
+ put_u32(&buffer, 0);
+ put_u32(&buffer, 0);
+ }
+ put_u32(&buffer, resource->index);
+ put_u32(&buffer, resource->bind_count);
+ put_u32(&buffer, flags);
+
+ if (hlsl_version_ge(ctx, 5, 1))
+ {
+ put_u32(&buffer, resource->space);
+ put_u32(&buffer, resource->id);
+ }
+ }
+
+ for (i = 0; i < extern_resources_count; ++i)
+ {
+ const struct extern_resource *resource = &extern_resources[i];
+
+ string_offset = put_string(&buffer, resource->name);
+ set_u32(&buffer, resources_offset + i * binding_desc_size, string_offset);
+ }
+
+ /* Buffers. */
+
+ cbuffers_offset = bytecode_align(&buffer);
+ set_u32(&buffer, cbuffer_position, cbuffers_offset);
+ LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry)
+ {
+ unsigned int var_count = 0;
+
+ if (!cbuffer->reg.allocated)
+ continue;
+
+ LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry)
+ {
+ if (var->is_uniform && var->buffer == cbuffer && var->data_type->reg_size[HLSL_REGSET_NUMERIC])
+ ++var_count;
+ }
+
+ put_u32(&buffer, 0); /* name */
+ put_u32(&buffer, var_count);
+ put_u32(&buffer, 0); /* variable offset */
+ put_u32(&buffer, align(cbuffer->size, 4) * sizeof(float));
+ put_u32(&buffer, 0); /* FIXME: flags */
+ put_u32(&buffer, cbuffer->type == HLSL_BUFFER_CONSTANT ? D3D_CT_CBUFFER : D3D_CT_TBUFFER);
+ }
+
+ i = 0;
+ LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry)
+ {
+ if (!cbuffer->reg.allocated)
+ continue;
+
+ string_offset = put_string(&buffer, cbuffer->name);
+ set_u32(&buffer, cbuffers_offset + i++ * 6 * sizeof(uint32_t), string_offset);
+ }
+
+ i = 0;
+ LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry)
+ {
+ size_t vars_start = bytecode_align(&buffer);
+
+ if (!cbuffer->reg.allocated)
+ continue;
+
+ set_u32(&buffer, cbuffers_offset + (i++ * 6 + 2) * sizeof(uint32_t), vars_start);
+
+ LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry)
+ {
+ uint32_t flags = 0;
+
+ if (!var->is_uniform || var->buffer != cbuffer || !var->data_type->reg_size[HLSL_REGSET_NUMERIC])
+ continue;
+
+ if (var->is_read)
+ flags |= D3D_SVF_USED;
+
+ put_u32(&buffer, 0); /* name */
+ put_u32(&buffer, var->buffer_offset * sizeof(float));
+ put_u32(&buffer, var->data_type->reg_size[HLSL_REGSET_NUMERIC] * sizeof(float));
+ put_u32(&buffer, flags);
+ put_u32(&buffer, 0); /* type */
+ put_u32(&buffer, 0); /* default value */
+
+ if (profile->major_version >= 5)
+ {
+ put_u32(&buffer, 0); /* texture start */
+ put_u32(&buffer, 0); /* texture count */
+ put_u32(&buffer, 0); /* sampler start */
+ put_u32(&buffer, 0); /* sampler count */
+ }
+ }
+
+ j = 0;
+ LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry)
+ {
+ const unsigned int var_size = (profile->major_version >= 5 ? 10 : 6);
+ size_t var_offset = vars_start + j * var_size * sizeof(uint32_t);
+
+ if (!var->is_uniform || var->buffer != cbuffer || !var->data_type->reg_size[HLSL_REGSET_NUMERIC])
+ continue;
+
+ string_offset = put_string(&buffer, var->name);
+ set_u32(&buffer, var_offset, string_offset);
+ write_sm4_type(ctx, &buffer, var->data_type);
+ set_u32(&buffer, var_offset + 4 * sizeof(uint32_t), var->data_type->bytecode_offset);
+
+ if (var->default_values)
+ {
+ unsigned int reg_size = var->data_type->reg_size[HLSL_REGSET_NUMERIC];
+ unsigned int comp_count = hlsl_type_component_count(var->data_type);
+ unsigned int default_value_offset;
+ unsigned int k;
+
+ default_value_offset = bytecode_reserve_bytes(&buffer, reg_size * sizeof(uint32_t));
+ set_u32(&buffer, var_offset + 5 * sizeof(uint32_t), default_value_offset);
+
+ for (k = 0; k < comp_count; ++k)
+ {
+ struct hlsl_type *comp_type = hlsl_type_get_component_type(ctx, var->data_type, k);
+ unsigned int comp_offset, comp_index;
+ enum hlsl_regset regset;
+
+ if (comp_type->class == HLSL_CLASS_STRING)
+ {
+ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE,
+ "Cannot write string default value.");
+ continue;
+ }
+
+ comp_index = get_component_index_from_default_initializer_index(var->data_type, k);
+ comp_offset = hlsl_type_get_component_offset(ctx, var->data_type, comp_index, &regset);
+ if (regset == HLSL_REGSET_NUMERIC)
+ {
+ if (comp_type->e.numeric.type == HLSL_TYPE_DOUBLE)
+ hlsl_fixme(ctx, &var->loc, "Write double default values.");
+
+ set_u32(&buffer, default_value_offset + comp_offset * sizeof(uint32_t),
+ var->default_values[k].number.u);
+ }
+ }
+ }
+
+ ++j;
+ }
+ }
+
+ creator_offset = put_string(&buffer, vkd3d_shader_get_version(NULL, NULL));
+ set_u32(&buffer, creator_position, creator_offset);
+
+ sm4_free_extern_resources(extern_resources, extern_resources_count);
+
+ if (buffer.status)
+ {
+ vkd3d_free(buffer.data);
+ ctx->result = buffer.status;
return;
- program->temp_count = max(program->temp_count, temp_count);
+ }
+ rdef->code = buffer.data;
+ rdef->size = buffer.size;
+}
+
+static bool loop_unrolling_generate_const_bool_store(struct hlsl_ctx *ctx, struct hlsl_ir_var *var,
+ bool val, struct hlsl_block *block, struct vkd3d_shader_location *loc)
+{
+ struct hlsl_ir_node *const_node, *store;
+
+ if (!(const_node = hlsl_new_bool_constant(ctx, val, loc)))
+ return false;
+ hlsl_block_add_instr(block, const_node);
+
+ if (!(store = hlsl_new_simple_store(ctx, var, const_node)))
+ return false;
+ hlsl_block_add_instr(block, store);
- hlsl_block_init(&block);
+ return true;
+}
- LIST_FOR_EACH_ENTRY(var, &func->extern_vars, struct hlsl_ir_var, extern_entry)
+static bool loop_unrolling_remove_jumps_recurse(struct hlsl_ctx *ctx, struct hlsl_block *block,
+ struct hlsl_ir_var *loop_broken, struct hlsl_ir_var *loop_continued);
+
+static bool loop_unrolling_remove_jumps_visit(struct hlsl_ctx *ctx, struct hlsl_ir_node *node,
+ struct hlsl_ir_var *loop_broken, struct hlsl_ir_var *loop_continued)
+{
+ struct hlsl_ir_jump *jump;
+ struct hlsl_ir_var *var;
+ struct hlsl_block draft;
+ struct hlsl_ir_if *iff;
+
+ if (node->type == HLSL_IR_IF)
{
- if ((var->is_input_semantic && var->last_read)
- || (var->is_output_semantic && var->first_write))
- sm4_generate_vsir_instr_dcl_semantic(ctx, program, var, is_patch_constant_func, &block, &var->loc);
+ iff = hlsl_ir_if(node);
+ if (loop_unrolling_remove_jumps_recurse(ctx, &iff->then_block, loop_broken, loop_continued))
+ return true;
+ if (loop_unrolling_remove_jumps_recurse(ctx, &iff->else_block, loop_broken, loop_continued))
+ return true;
+ return false;
}
- if (temp_count)
- sm4_generate_vsir_instr_dcl_temps(ctx, program, temp_count, &block, &func->loc);
-
- LIST_FOR_EACH_ENTRY(scope, &ctx->scopes, struct hlsl_scope, entry)
+ if (node->type == HLSL_IR_JUMP)
{
- LIST_FOR_EACH_ENTRY(var, &scope->vars, struct hlsl_ir_var, scope_entry)
- {
- if (var->is_uniform || var->is_input_semantic || var->is_output_semantic)
- continue;
- if (!var->regs[HLSL_REGSET_NUMERIC].allocated)
- continue;
+ jump = hlsl_ir_jump(node);
+ if (jump->type != HLSL_IR_JUMP_UNRESOLVED_CONTINUE && jump->type != HLSL_IR_JUMP_BREAK)
+ return false;
- if (var->indexable)
- {
- unsigned int id = var->regs[HLSL_REGSET_NUMERIC].id;
- unsigned int size = align(var->data_type->reg_size[HLSL_REGSET_NUMERIC], 4) / 4;
+ hlsl_block_init(&draft);
- sm4_generate_vsir_instr_dcl_indexable_temp(ctx, program, &block, id, size, 4, &var->loc);
- }
- }
- }
+ if (jump->type == HLSL_IR_JUMP_UNRESOLVED_CONTINUE)
+ var = loop_continued;
+ else
+ var = loop_broken;
- list_move_head(&func->body.instrs, &block.instrs);
+ if (!loop_unrolling_generate_const_bool_store(ctx, var, true, &draft, &jump->node.loc))
+ return false;
- hlsl_block_cleanup(&block);
+ list_move_before(&jump->node.entry, &draft.instrs);
+ list_remove(&jump->node.entry);
+ hlsl_free_instr(&jump->node);
- sm4_generate_vsir_block(ctx, &func->body, program);
+ return true;
+ }
+
+ return false;
}
-/* OBJECTIVE: Translate all the information from ctx and entry_func to the
- * vsir_program, so it can be used as input to tpf_compile() without relying
- * on ctx and entry_func. */
-static void sm4_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func,
- uint64_t config_flags, struct vsir_program *program)
+static struct hlsl_ir_if *loop_unrolling_generate_var_check(struct hlsl_ctx *ctx,
+ struct hlsl_block *dst, struct hlsl_ir_var *var, struct vkd3d_shader_location *loc)
{
- struct vkd3d_shader_version version = {0};
+ struct hlsl_ir_node *cond, *iff;
+ struct hlsl_block then_block;
+ struct hlsl_ir_load *load;
- version.major = ctx->profile->major_version;
- version.minor = ctx->profile->minor_version;
- version.type = ctx->profile->type;
+ hlsl_block_init(&then_block);
- if (!vsir_program_init(program, NULL, &version, 0, VSIR_CF_STRUCTURED, VSIR_NOT_NORMALISED))
- {
- ctx->result = VKD3D_ERROR_OUT_OF_MEMORY;
- return;
- }
+ if (!(load = hlsl_new_var_load(ctx, var, loc)))
+ return NULL;
+ hlsl_block_add_instr(dst, &load->node);
- generate_vsir_signature(ctx, program, func);
- if (version.type == VKD3D_SHADER_TYPE_HULL)
- generate_vsir_signature(ctx, program, ctx->patch_constant_func);
+ if (!(cond = hlsl_new_unary_expr(ctx, HLSL_OP1_LOGIC_NOT, &load->node, loc)))
+ return NULL;
+ hlsl_block_add_instr(dst, cond);
- if (version.type == VKD3D_SHADER_TYPE_COMPUTE)
- {
- program->thread_group_size.x = ctx->thread_count[0];
- program->thread_group_size.y = ctx->thread_count[1];
- program->thread_group_size.z = ctx->thread_count[2];
- }
+ if (!(iff = hlsl_new_if(ctx, cond, &then_block, NULL, loc)))
+ return NULL;
+ hlsl_block_add_instr(dst, iff);
- sm4_generate_vsir_add_function(ctx, func, config_flags, program);
- if (version.type == VKD3D_SHADER_TYPE_HULL)
- sm4_generate_vsir_add_function(ctx, ctx->patch_constant_func, config_flags, program);
+ return hlsl_ir_if(iff);
}
-static struct hlsl_ir_jump *loop_unrolling_find_jump(struct hlsl_block *block, struct hlsl_ir_node *stop_point,
- struct hlsl_block **found_block)
+static bool loop_unrolling_remove_jumps_recurse(struct hlsl_ctx *ctx, struct hlsl_block *block,
+ struct hlsl_ir_var *loop_broken, struct hlsl_ir_var *loop_continued)
{
- struct hlsl_ir_node *node;
+ struct hlsl_ir_node *node, *next;
- LIST_FOR_EACH_ENTRY(node, &block->instrs, struct hlsl_ir_node, entry)
+ LIST_FOR_EACH_ENTRY_SAFE(node, next, &block->instrs, struct hlsl_ir_node, entry)
{
- if (node == stop_point)
- return NULL;
+ struct hlsl_ir_if *broken_check, *continued_check;
+ struct hlsl_block draft;
- if (node->type == HLSL_IR_IF)
- {
- struct hlsl_ir_if *iff = hlsl_ir_if(node);
- struct hlsl_ir_jump *jump = NULL;
+ if (!loop_unrolling_remove_jumps_visit(ctx, node, loop_broken, loop_continued))
+ continue;
- if ((jump = loop_unrolling_find_jump(&iff->then_block, stop_point, found_block)))
- return jump;
- if ((jump = loop_unrolling_find_jump(&iff->else_block, stop_point, found_block)))
- return jump;
- }
- else if (node->type == HLSL_IR_JUMP)
- {
- struct hlsl_ir_jump *jump = hlsl_ir_jump(node);
+ if (&next->entry == &block->instrs)
+ return true;
- if (jump->type == HLSL_IR_JUMP_BREAK || jump->type == HLSL_IR_JUMP_CONTINUE)
- {
- *found_block = block;
- return jump;
- }
- }
+ hlsl_block_init(&draft);
+
+ broken_check = loop_unrolling_generate_var_check(ctx, &draft, loop_broken, &next->loc);
+ continued_check = loop_unrolling_generate_var_check(ctx,
+ &broken_check->then_block, loop_continued, &next->loc);
+
+ list_move_before(&next->entry, &draft.instrs);
+
+ list_move_slice_tail(&continued_check->then_block.instrs, &next->entry, list_tail(&block->instrs));
+
+ return true;
}
- return NULL;
+ return false;
+}
+
+static void loop_unrolling_remove_jumps(struct hlsl_ctx *ctx, struct hlsl_block *block,
+ struct hlsl_ir_var *loop_broken, struct hlsl_ir_var *loop_continued)
+{
+ while (loop_unrolling_remove_jumps_recurse(ctx, block, loop_broken, loop_continued));
}
static unsigned int loop_unrolling_get_max_iterations(struct hlsl_ctx *ctx, struct hlsl_ir_loop *loop)
@@ -8696,7 +11514,7 @@ static unsigned int loop_unrolling_get_max_iterations(struct hlsl_ctx *ctx, stru
return loop->unroll_limit;
/* All SMs will default to 1024 if [unroll] has been specified without an explicit limit. */
- if (loop->unroll_type == HLSL_IR_LOOP_FORCE_UNROLL)
+ if (loop->unroll_type == HLSL_LOOP_FORCE_UNROLL)
return 1024;
/* SM4 limits implicit unrolling to 254 iterations. */
@@ -8707,167 +11525,279 @@ static unsigned int loop_unrolling_get_max_iterations(struct hlsl_ctx *ctx, stru
return 1024;
}
-static bool loop_unrolling_unroll_loop(struct hlsl_ctx *ctx, struct hlsl_block *block,
- struct hlsl_block *loop_parent, struct hlsl_ir_loop *loop)
+static void loop_unrolling_simplify(struct hlsl_ctx *ctx, struct hlsl_block *block,
+ struct copy_propagation_state *state, unsigned int *index)
{
- unsigned int max_iterations, i;
+ size_t scopes_depth = state->scope_count - 1;
+ unsigned int current_index;
+ bool progress;
+
+ do
+ {
+ state->stopped = false;
+ for (size_t i = state->scope_count; scopes_depth < i; --i)
+ copy_propagation_pop_scope(state);
+ copy_propagation_push_scope(state, ctx);
+
+ progress = hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, block, NULL);
+ progress |= hlsl_transform_ir(ctx, hlsl_fold_constant_identities, block, NULL);
+ progress |= hlsl_transform_ir(ctx, hlsl_fold_constant_swizzles, block, NULL);
+
+ current_index = index_instructions(block, *index);
+ progress |= copy_propagation_transform_block(ctx, block, state);
+
+ progress |= hlsl_transform_ir(ctx, fold_swizzle_chains, block, NULL);
+ progress |= hlsl_transform_ir(ctx, remove_trivial_swizzles, block, NULL);
+ progress |= hlsl_transform_ir(ctx, remove_trivial_conditional_branches, block, NULL);
+ } while (progress);
+
+ *index = current_index;
+}
+
+static bool loop_unrolling_check_val(struct copy_propagation_state *state, struct hlsl_ir_var *var)
+{
+ struct copy_propagation_value *v;
+
+ if (!(v = copy_propagation_get_value(state, var, 0, UINT_MAX))
+ || v->node->type != HLSL_IR_CONSTANT)
+ return false;
+
+ return hlsl_ir_constant(v->node)->value.u[0].u;
+}
+
+static bool loop_unrolling_unroll_loop(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_loop *loop)
+{
+ struct hlsl_block draft, tmp_dst, loop_body;
+ struct hlsl_ir_var *broken, *continued;
+ unsigned int max_iterations, i, index;
+ struct copy_propagation_state state;
+ struct hlsl_ir_if *target_if;
+
+ if (!(broken = hlsl_new_synthetic_var(ctx, "broken",
+ hlsl_get_scalar_type(ctx, HLSL_TYPE_BOOL), &loop->node.loc)))
+ goto fail;
+
+ if (!(continued = hlsl_new_synthetic_var(ctx, "continued",
+ hlsl_get_scalar_type(ctx, HLSL_TYPE_BOOL), &loop->node.loc)))
+ goto fail;
+
+ hlsl_block_init(&draft);
+ hlsl_block_init(&tmp_dst);
max_iterations = loop_unrolling_get_max_iterations(ctx, loop);
+ copy_propagation_state_init(&state, ctx);
+ index = 2;
+ state.stop = &loop->node;
+ loop_unrolling_simplify(ctx, block, &state, &index);
+ state.stopped = false;
+ index = loop->node.index;
+
+ if (!loop_unrolling_generate_const_bool_store(ctx, broken, false, &tmp_dst, &loop->node.loc))
+ goto fail;
+ hlsl_block_add_block(&draft, &tmp_dst);
+
+ if (!loop_unrolling_generate_const_bool_store(ctx, continued, false, &tmp_dst, &loop->node.loc))
+ goto fail;
+ hlsl_block_add_block(&draft, &tmp_dst);
+
+ if (!(target_if = loop_unrolling_generate_var_check(ctx, &tmp_dst, broken, &loop->node.loc)))
+ goto fail;
+ state.stop = LIST_ENTRY(list_head(&tmp_dst.instrs), struct hlsl_ir_node, entry);
+ hlsl_block_add_block(&draft, &tmp_dst);
+
+ copy_propagation_push_scope(&state, ctx);
+ loop_unrolling_simplify(ctx, &draft, &state, &index);
+
+ /* As an optimization, we only remove jumps from the loop's body once. */
+ if (!hlsl_clone_block(ctx, &loop_body, &loop->body))
+ goto fail;
+ loop_unrolling_remove_jumps(ctx, &loop_body, broken, continued);
for (i = 0; i < max_iterations; ++i)
{
- struct hlsl_block tmp_dst, *jump_block;
- struct hlsl_ir_jump *jump = NULL;
+ copy_propagation_push_scope(&state, ctx);
- if (!hlsl_clone_block(ctx, &tmp_dst, &loop->body))
- return false;
- list_move_before(&loop->node.entry, &tmp_dst.instrs);
- hlsl_block_cleanup(&tmp_dst);
+ if (!loop_unrolling_generate_const_bool_store(ctx, continued, false, &tmp_dst, &loop->node.loc))
+ goto fail;
+ hlsl_block_add_block(&target_if->then_block, &tmp_dst);
- hlsl_run_const_passes(ctx, block);
+ if (!hlsl_clone_block(ctx, &tmp_dst, &loop_body))
+ goto fail;
+ hlsl_block_add_block(&target_if->then_block, &tmp_dst);
- if ((jump = loop_unrolling_find_jump(loop_parent, &loop->node, &jump_block)))
- {
- enum hlsl_ir_jump_type type = jump->type;
+ loop_unrolling_simplify(ctx, &target_if->then_block, &state, &index);
- if (jump_block != loop_parent)
- {
- if (loop->unroll_type == HLSL_IR_LOOP_FORCE_UNROLL)
- hlsl_error(ctx, &jump->node.loc, VKD3D_SHADER_ERROR_HLSL_FAILED_FORCED_UNROLL,
- "Unable to unroll loop, unrolling loops with conditional jumps is currently not supported.");
- return false;
- }
+ if (loop_unrolling_check_val(&state, broken))
+ break;
- list_move_slice_tail(&tmp_dst.instrs, &jump->node.entry, list_prev(&loop_parent->instrs, &loop->node.entry));
- hlsl_block_cleanup(&tmp_dst);
+ if (!(target_if = loop_unrolling_generate_var_check(ctx, &tmp_dst, broken, &loop->node.loc)))
+ goto fail;
+ hlsl_block_add_block(&draft, &tmp_dst);
- if (type == HLSL_IR_JUMP_BREAK)
- break;
- }
- }
+ if (!hlsl_clone_block(ctx, &tmp_dst, &loop->iter))
+ goto fail;
+ hlsl_block_add_block(&target_if->then_block, &tmp_dst);
+ }
/* Native will not emit an error if max_iterations has been reached with an
* explicit limit. It also will not insert a loop if there are iterations left
* i.e [unroll(4)] for (i = 0; i < 8; ++i)) */
if (!loop->unroll_limit && i == max_iterations)
{
- if (loop->unroll_type == HLSL_IR_LOOP_FORCE_UNROLL)
+ if (loop->unroll_type == HLSL_LOOP_FORCE_UNROLL)
hlsl_error(ctx, &loop->node.loc, VKD3D_SHADER_ERROR_HLSL_FAILED_FORCED_UNROLL,
"Unable to unroll loop, maximum iterations reached (%u).", max_iterations);
- return false;
+ goto fail;
}
+ hlsl_block_cleanup(&loop_body);
+ copy_propagation_state_destroy(&state);
+
+ list_move_before(&loop->node.entry, &draft.instrs);
+ hlsl_block_cleanup(&draft);
list_remove(&loop->node.entry);
hlsl_free_instr(&loop->node);
return true;
+
+fail:
+ hlsl_block_cleanup(&loop_body);
+ copy_propagation_state_destroy(&state);
+ hlsl_block_cleanup(&draft);
+
+ return false;
}
-/*
- * loop_unrolling_find_unrollable_loop() is not the normal way to do things;
- * normal passes simply iterate over the whole block and apply a transformation
- * to every relevant instruction. However, loop unrolling can fail, and we want
- * to leave the loop in its previous state in that case. That isn't a problem by
- * itself, except that loop unrolling needs copy-prop in order to work properly,
- * and copy-prop state at the time of the loop depends on the rest of the program
- * up to that point. This means we need to clone the whole program, and at that
- * point we have to search it again anyway to find the clone of the loop we were
- * going to unroll.
- *
- * FIXME: Ideally we wouldn't clone the whole program; instead we would run copyprop
- * up until the loop instruction, clone just that loop, then use copyprop again
- * with the saved state after unrolling. However, copyprop currently isn't built
- * for that yet [notably, it still relies on indices]. Note also this still doesn't
- * really let us use transform_ir() anyway [since we don't have a good way to say
- * "copyprop from the beginning of the program up to the instruction we're
- * currently processing" from the callback]; we'd have to use a dedicated
- * recursive function instead. */
-static struct hlsl_ir_loop *loop_unrolling_find_unrollable_loop(struct hlsl_ctx *ctx, struct hlsl_block *block,
- struct hlsl_block **containing_block)
+static bool unroll_loops(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, void *context)
{
- struct hlsl_ir_node *instr;
+ struct hlsl_block *program = context;
+ struct hlsl_ir_loop *loop;
- LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry)
+ if (node->type != HLSL_IR_LOOP)
+ return true;
+
+ loop = hlsl_ir_loop(node);
+
+ if (loop->unroll_type != HLSL_LOOP_UNROLL && loop->unroll_type != HLSL_LOOP_FORCE_UNROLL)
+ return true;
+
+ if (!loop_unrolling_unroll_loop(ctx, program, loop))
+ loop->unroll_type = HLSL_LOOP_FORCE_LOOP;
+
+ return true;
+}
+
+/* We could handle this at parse time. However, loop unrolling often needs to
+ * know the value of variables modified in the "iter" block. It is possible to
+ * detect that all exit paths of a loop body modify such variables in the same
+ * way, but difficult, and d3dcompiler does not attempt to do so.
+ * In fact, d3dcompiler is capable of unrolling the following loop:
+ * for (int i = 0; i < 10; ++i)
+ * {
+ * if (some_uniform > 4)
+ * continue;
+ * }
+ * but cannot unroll the same loop with "++i" moved to each exit path:
+ * for (int i = 0; i < 10;)
+ * {
+ * if (some_uniform > 4)
+ * {
+ * ++i;
+ * continue;
+ * }
+ * ++i;
+ * }
+ */
+static bool resolve_loops(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, void *context)
+{
+ struct hlsl_ir_loop *loop;
+
+ if (node->type != HLSL_IR_LOOP)
+ return true;
+
+ loop = hlsl_ir_loop(node);
+
+ hlsl_block_add_block(&loop->body, &loop->iter);
+ return true;
+}
+
+static void resolve_continues(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_loop *last_loop)
+{
+ struct hlsl_ir_node *node;
+
+ LIST_FOR_EACH_ENTRY(node, &block->instrs, struct hlsl_ir_node, entry)
{
- switch (instr->type)
+ switch (node->type)
{
case HLSL_IR_LOOP:
{
- struct hlsl_ir_loop *nested_loop;
- struct hlsl_ir_loop *loop = hlsl_ir_loop(instr);
-
- if ((nested_loop = loop_unrolling_find_unrollable_loop(ctx, &loop->body, containing_block)))
- return nested_loop;
-
- if (loop->unroll_type == HLSL_IR_LOOP_UNROLL || loop->unroll_type == HLSL_IR_LOOP_FORCE_UNROLL)
- {
- *containing_block = block;
- return loop;
- }
+ struct hlsl_ir_loop *loop = hlsl_ir_loop(node);
+ resolve_continues(ctx, &loop->body, loop);
break;
}
case HLSL_IR_IF:
{
- struct hlsl_ir_loop *loop;
- struct hlsl_ir_if *iff = hlsl_ir_if(instr);
-
- if ((loop = loop_unrolling_find_unrollable_loop(ctx, &iff->then_block, containing_block)))
- return loop;
- if ((loop = loop_unrolling_find_unrollable_loop(ctx, &iff->else_block, containing_block)))
- return loop;
-
+ struct hlsl_ir_if *iff = hlsl_ir_if(node);
+ resolve_continues(ctx, &iff->then_block, last_loop);
+ resolve_continues(ctx, &iff->else_block, last_loop);
break;
}
case HLSL_IR_SWITCH:
{
- struct hlsl_ir_switch *s = hlsl_ir_switch(instr);
+ struct hlsl_ir_switch *s = hlsl_ir_switch(node);
struct hlsl_ir_switch_case *c;
- struct hlsl_ir_loop *loop;
LIST_FOR_EACH_ENTRY(c, &s->cases, struct hlsl_ir_switch_case, entry)
{
- if ((loop = loop_unrolling_find_unrollable_loop(ctx, &c->body, containing_block)))
- return loop;
+ resolve_continues(ctx, &c->body, last_loop);
+ }
+
+ break;
+ }
+ case HLSL_IR_JUMP:
+ {
+ struct hlsl_ir_jump *jump = hlsl_ir_jump(node);
+
+ if (jump->type != HLSL_IR_JUMP_UNRESOLVED_CONTINUE)
+ break;
+
+ if (last_loop->type == HLSL_LOOP_FOR)
+ {
+ struct hlsl_block draft;
+
+ if (!hlsl_clone_block(ctx, &draft, &last_loop->iter))
+ return;
+
+ list_move_before(&node->entry, &draft.instrs);
+ hlsl_block_cleanup(&draft);
}
+ jump->type = HLSL_IR_JUMP_CONTINUE;
break;
}
default:
break;
}
}
-
- return NULL;
}
-static void transform_unroll_loops(struct hlsl_ctx *ctx, struct hlsl_block *block)
+static void loop_unrolling_execute(struct hlsl_ctx *ctx, struct hlsl_block *block)
{
- while (true)
- {
- struct hlsl_block clone, *containing_block;
- struct hlsl_ir_loop *loop, *cloned_loop;
-
- if (!(loop = loop_unrolling_find_unrollable_loop(ctx, block, &containing_block)))
- return;
-
- if (!hlsl_clone_block(ctx, &clone, block))
- return;
-
- cloned_loop = loop_unrolling_find_unrollable_loop(ctx, &clone, &containing_block);
- VKD3D_ASSERT(cloned_loop);
+ bool progress;
- if (!loop_unrolling_unroll_loop(ctx, &clone, containing_block, cloned_loop))
- {
- hlsl_block_cleanup(&clone);
- loop->unroll_type = HLSL_IR_LOOP_FORCE_LOOP;
- continue;
- }
+ /* These are required by copy propagation, which in turn is required for
+ * unrolling. */
+ do
+ {
+ progress = hlsl_transform_ir(ctx, split_array_copies, block, NULL);
+ progress |= hlsl_transform_ir(ctx, split_struct_copies, block, NULL);
+ } while (progress);
+ hlsl_transform_ir(ctx, split_matrix_copies, block, NULL);
- hlsl_block_cleanup(block);
- hlsl_block_init(block);
- hlsl_block_add_block(block, &clone);
- }
+ hlsl_transform_ir(ctx, unroll_loops, block, block);
+ resolve_continues(ctx, block, NULL);
+ hlsl_transform_ir(ctx, resolve_loops, block, NULL);
}
static bool lower_f16tof32(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, struct hlsl_block *block)
@@ -9026,6 +11956,95 @@ static bool lower_f32tof16(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, stru
return true;
}
+static bool lower_isinf(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, struct hlsl_block *block)
+{
+ struct hlsl_ir_node *call, *rhs, *store;
+ struct hlsl_ir_function_decl *func;
+ unsigned int component_count;
+ struct hlsl_ir_load *load;
+ struct hlsl_ir_expr *expr;
+ struct hlsl_ir_var *lhs;
+ const char *template;
+ char *body;
+
+ static const char template_sm2[] =
+ "typedef bool%u boolX;\n"
+ "typedef float%u floatX;\n"
+ "boolX isinf(floatX x)\n"
+ "{\n"
+ " floatX v = 1 / x;\n"
+ " v = v * v;\n"
+ " return v <= 0;\n"
+ "}\n";
+
+ static const char template_sm3[] =
+ "typedef bool%u boolX;\n"
+ "typedef float%u floatX;\n"
+ "boolX isinf(floatX x)\n"
+ "{\n"
+ " floatX v = 1 / x;\n"
+ " return v <= 0;\n"
+ "}\n";
+
+ static const char template_sm4[] =
+ "typedef bool%u boolX;\n"
+ "typedef float%u floatX;\n"
+ "boolX isinf(floatX x)\n"
+ "{\n"
+ " return (asuint(x) & 0x7fffffff) == 0x7f800000;\n"
+ "}\n";
+
+ static const char template_int[] =
+ "typedef bool%u boolX;\n"
+ "typedef float%u floatX;\n"
+ "boolX isinf(floatX x)\n"
+ "{\n"
+ " return false;\n"
+ "}";
+
+ if (node->type != HLSL_IR_EXPR)
+ return false;
+
+ expr = hlsl_ir_expr(node);
+
+ if (expr->op != HLSL_OP1_ISINF)
+ return false;
+
+ rhs = expr->operands[0].node;
+
+ if (hlsl_version_lt(ctx, 3, 0))
+ template = template_sm2;
+ else if (hlsl_version_lt(ctx, 4, 0))
+ template = template_sm3;
+ else if (type_is_integer(rhs->data_type))
+ template = template_int;
+ else
+ template = template_sm4;
+
+ component_count = hlsl_type_component_count(rhs->data_type);
+ if (!(body = hlsl_sprintf_alloc(ctx, template, component_count, component_count)))
+ return false;
+
+ if (!(func = hlsl_compile_internal_function(ctx, "isinf", body)))
+ return false;
+
+ lhs = func->parameters.vars[0];
+
+ if (!(store = hlsl_new_simple_store(ctx, lhs, rhs)))
+ return false;
+ hlsl_block_add_instr(block, store);
+
+ if (!(call = hlsl_new_call(ctx, func, &node->loc)))
+ return false;
+ hlsl_block_add_instr(block, call);
+
+ if (!(load = hlsl_new_var_load(ctx, func->return_var, &node->loc)))
+ return false;
+ hlsl_block_add_instr(block, &load->node);
+
+ return true;
+}
+
static void process_entry_function(struct hlsl_ctx *ctx,
const struct hlsl_block *global_uniform_block, struct hlsl_ir_function_decl *entry_func)
{
@@ -9059,6 +12078,8 @@ static void process_entry_function(struct hlsl_ctx *ctx,
lower_ir(ctx, lower_f32tof16, body);
}
+ lower_ir(ctx, lower_isinf, body);
+
lower_return(ctx, entry_func, body, false);
while (hlsl_transform_ir(ctx, lower_calls, body, NULL));
@@ -9107,16 +12128,17 @@ static void process_entry_function(struct hlsl_ctx *ctx,
append_output_var_copy(ctx, entry_func, entry_func->return_var);
}
- if (profile->major_version >= 4)
+ if (hlsl_version_ge(ctx, 4, 0))
{
hlsl_transform_ir(ctx, lower_discard_neg, body, NULL);
}
else
{
hlsl_transform_ir(ctx, lower_discard_nz, body, NULL);
+ hlsl_transform_ir(ctx, lower_resource_load_bias, body, NULL);
}
- transform_unroll_loops(ctx, body);
+ loop_unrolling_execute(ctx, body);
hlsl_run_const_passes(ctx, body);
remove_unreachable_code(ctx, body);
@@ -9126,9 +12148,13 @@ static void process_entry_function(struct hlsl_ctx *ctx,
lower_ir(ctx, lower_casts_to_bool, body);
lower_ir(ctx, lower_int_dot, body);
+ if (hlsl_version_lt(ctx, 4, 0))
+ hlsl_transform_ir(ctx, lower_separate_samples, body, NULL);
+
hlsl_transform_ir(ctx, validate_dereferences, body, NULL);
hlsl_transform_ir(ctx, track_object_components_sampler_dim, body, NULL);
- if (profile->major_version >= 4)
+
+ if (hlsl_version_ge(ctx, 4, 0))
hlsl_transform_ir(ctx, lower_combined_samples, body, NULL);
do
@@ -9136,7 +12162,10 @@ static void process_entry_function(struct hlsl_ctx *ctx,
while (hlsl_transform_ir(ctx, dce, body, NULL));
hlsl_transform_ir(ctx, track_components_usage, body, NULL);
- sort_synthetic_separated_samplers_first(ctx);
+ if (hlsl_version_lt(ctx, 4, 0))
+ sort_synthetic_combined_samplers_first(ctx);
+ else
+ sort_synthetic_separated_samplers_first(ctx);
if (profile->major_version < 4)
{
@@ -9241,14 +12270,16 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry
mark_indexable_vars(ctx, entry_func);
allocate_temp_registers(ctx, entry_func);
allocate_const_registers(ctx, entry_func);
+ sort_uniforms_by_bind_count(ctx, HLSL_REGSET_SAMPLERS);
+ allocate_objects(ctx, entry_func, HLSL_REGSET_SAMPLERS);
}
else
{
allocate_buffers(ctx);
allocate_objects(ctx, entry_func, HLSL_REGSET_TEXTURES);
allocate_objects(ctx, entry_func, HLSL_REGSET_UAVS);
+ allocate_objects(ctx, entry_func, HLSL_REGSET_SAMPLERS);
}
- allocate_objects(ctx, entry_func, HLSL_REGSET_SAMPLERS);
if (TRACE_ON())
rb_for_each_entry(&ctx->functions, dump_function, ctx);
@@ -9265,7 +12296,11 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry
struct vsir_program program;
int result;
- sm1_generate_vsir(ctx, entry_func, config_flags, &program, &ctab);
+ sm1_generate_ctab(ctx, &ctab);
+ if (ctx->result)
+ return ctx->result;
+
+ sm1_generate_vsir(ctx, entry_func, config_flags, &program);
if (ctx->result)
{
vsir_program_cleanup(&program);
@@ -9282,18 +12317,25 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry
case VKD3D_SHADER_TARGET_DXBC_TPF:
{
uint32_t config_flags = vkd3d_shader_init_config_flags();
+ struct vkd3d_shader_code rdef = {0};
struct vsir_program program;
int result;
+ sm4_generate_rdef(ctx, &rdef);
+ if (ctx->result)
+ return ctx->result;
+
sm4_generate_vsir(ctx, entry_func, config_flags, &program);
if (ctx->result)
{
vsir_program_cleanup(&program);
+ vkd3d_shader_free_shader_code(&rdef);
return ctx->result;
}
- result = tpf_compile(&program, config_flags, out, ctx->message_context, ctx, entry_func);
+ result = tpf_compile(&program, config_flags, &rdef, out, ctx->message_context);
vsir_program_cleanup(&program);
+ vkd3d_shader_free_shader_code(&rdef);
return result;
}
diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c
index 716adb15f08..e8dd4d62ae2 100644
--- a/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c
+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c
@@ -30,7 +30,7 @@ static bool fold_abs(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst,
VKD3D_ASSERT(type == src->node.data_type->e.numeric.type);
- for (k = 0; k < dst_type->dimx; ++k)
+ for (k = 0; k < dst_type->e.numeric.dimx; ++k)
{
switch (type)
{
@@ -121,7 +121,7 @@ static bool fold_bit_not(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst,
VKD3D_ASSERT(type == src->node.data_type->e.numeric.type);
- for (k = 0; k < dst_type->dimx; ++k)
+ for (k = 0; k < dst_type->e.numeric.dimx; ++k)
{
switch (type)
{
@@ -143,20 +143,20 @@ static bool fold_cast(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst,
const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src)
{
unsigned int k;
- uint32_t u;
- int32_t i;
- double d;
- float f;
+ uint32_t u = 0;
+ double d = 0.0;
+ float f = 0.0f;
+ int32_t i = 0;
- if (dst_type->dimx != src->node.data_type->dimx
- || dst_type->dimy != src->node.data_type->dimy)
+ if (dst_type->e.numeric.dimx != src->node.data_type->e.numeric.dimx
+ || dst_type->e.numeric.dimy != src->node.data_type->e.numeric.dimy)
{
FIXME("Cast from %s to %s.\n", debug_hlsl_type(ctx, src->node.data_type),
debug_hlsl_type(ctx, dst_type));
return false;
}
- for (k = 0; k < dst_type->dimx; ++k)
+ for (k = 0; k < dst_type->e.numeric.dimx; ++k)
{
switch (src->node.data_type->e.numeric.type)
{
@@ -195,9 +195,6 @@ static bool fold_cast(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst,
f = !!src->value.u[k].u;
d = !!src->value.u[k].u;
break;
-
- default:
- vkd3d_unreachable();
}
switch (dst_type->e.numeric.type)
@@ -220,9 +217,8 @@ static bool fold_cast(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst,
break;
case HLSL_TYPE_BOOL:
- /* Casts to bool should have already been lowered. */
- default:
- vkd3d_unreachable();
+ dst->u[k].u = u ? ~0u : 0u;
+ break;
}
}
return true;
@@ -236,7 +232,7 @@ static bool fold_ceil(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst,
VKD3D_ASSERT(type == src->node.data_type->e.numeric.type);
- for (k = 0; k < dst_type->dimx; ++k)
+ for (k = 0; k < dst_type->e.numeric.dimx; ++k)
{
switch (type)
{
@@ -262,7 +258,7 @@ static bool fold_exp2(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst,
VKD3D_ASSERT(type == src->node.data_type->e.numeric.type);
- for (k = 0; k < dst_type->dimx; ++k)
+ for (k = 0; k < dst_type->e.numeric.dimx; ++k)
{
switch (type)
{
@@ -288,7 +284,7 @@ static bool fold_floor(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst,
VKD3D_ASSERT(type == src->node.data_type->e.numeric.type);
- for (k = 0; k < dst_type->dimx; ++k)
+ for (k = 0; k < dst_type->e.numeric.dimx; ++k)
{
switch (type)
{
@@ -315,7 +311,7 @@ static bool fold_fract(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst,
VKD3D_ASSERT(type == src->node.data_type->e.numeric.type);
- for (k = 0; k < dst_type->dimx; ++k)
+ for (k = 0; k < dst_type->e.numeric.dimx; ++k)
{
switch (type)
{
@@ -341,7 +337,7 @@ static bool fold_log2(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, con
VKD3D_ASSERT(type == src->node.data_type->e.numeric.type);
- for (k = 0; k < dst_type->dimx; ++k)
+ for (k = 0; k < dst_type->e.numeric.dimx; ++k)
{
switch (type)
{
@@ -386,7 +382,7 @@ static bool fold_neg(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst,
VKD3D_ASSERT(type == src->node.data_type->e.numeric.type);
- for (k = 0; k < dst_type->dimx; ++k)
+ for (k = 0; k < dst_type->e.numeric.dimx; ++k)
{
switch (type)
{
@@ -420,7 +416,7 @@ static bool fold_not(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst,
VKD3D_ASSERT(type == src->node.data_type->e.numeric.type);
- for (k = 0; k < dst_type->dimx; ++k)
+ for (k = 0; k < dst_type->e.numeric.dimx; ++k)
{
switch (type)
{
@@ -444,7 +440,7 @@ static bool fold_rcp(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons
VKD3D_ASSERT(type == src->node.data_type->e.numeric.type);
- for (k = 0; k < dst_type->dimx; ++k)
+ for (k = 0; k < dst_type->e.numeric.dimx; ++k)
{
switch (type)
{
@@ -489,7 +485,7 @@ static bool fold_rsq(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons
VKD3D_ASSERT(type == src->node.data_type->e.numeric.type);
- for (k = 0; k < dst_type->dimx; ++k)
+ for (k = 0; k < dst_type->e.numeric.dimx; ++k)
{
switch (type)
{
@@ -529,7 +525,7 @@ static bool fold_sat(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons
VKD3D_ASSERT(type == src->node.data_type->e.numeric.type);
- for (k = 0; k < dst_type->dimx; ++k)
+ for (k = 0; k < dst_type->e.numeric.dimx; ++k)
{
switch (type)
{
@@ -555,7 +551,7 @@ static bool fold_sqrt(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, con
VKD3D_ASSERT(type == src->node.data_type->e.numeric.type);
- for (k = 0; k < dst_type->dimx; ++k)
+ for (k = 0; k < dst_type->e.numeric.dimx; ++k)
{
switch (type)
{
@@ -601,7 +597,7 @@ static bool fold_add(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons
VKD3D_ASSERT(type == src1->node.data_type->e.numeric.type);
VKD3D_ASSERT(type == src2->node.data_type->e.numeric.type);
- for (k = 0; k < dst_type->dimx; ++k)
+ for (k = 0; k < dst_type->e.numeric.dimx; ++k)
{
switch (type)
{
@@ -638,7 +634,7 @@ static bool fold_and(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons
VKD3D_ASSERT(type == src1->node.data_type->e.numeric.type);
VKD3D_ASSERT(type == src2->node.data_type->e.numeric.type);
- for (k = 0; k < dst_type->dimx; ++k)
+ for (k = 0; k < dst_type->e.numeric.dimx; ++k)
{
switch (type)
{
@@ -665,7 +661,7 @@ static bool fold_or(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const
VKD3D_ASSERT(type == src1->node.data_type->e.numeric.type);
VKD3D_ASSERT(type == src2->node.data_type->e.numeric.type);
- for (k = 0; k < dst_type->dimx; ++k)
+ for (k = 0; k < dst_type->e.numeric.dimx; ++k)
{
switch (type)
{
@@ -692,7 +688,7 @@ static bool fold_bit_xor(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst,
VKD3D_ASSERT(type == src1->node.data_type->e.numeric.type);
VKD3D_ASSERT(type == src2->node.data_type->e.numeric.type);
- for (k = 0; k < dst_type->dimx; ++k)
+ for (k = 0; k < dst_type->e.numeric.dimx; ++k)
{
switch (type)
{
@@ -717,10 +713,10 @@ static bool fold_dot(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons
VKD3D_ASSERT(type == src1->node.data_type->e.numeric.type);
VKD3D_ASSERT(type == src2->node.data_type->e.numeric.type);
- VKD3D_ASSERT(src1->node.data_type->dimx == src2->node.data_type->dimx);
+ VKD3D_ASSERT(src1->node.data_type->e.numeric.dimx == src2->node.data_type->e.numeric.dimx);
dst->u[0].f = 0.0f;
- for (k = 0; k < src1->node.data_type->dimx; ++k)
+ for (k = 0; k < src1->node.data_type->e.numeric.dimx; ++k)
{
switch (type)
{
@@ -746,11 +742,11 @@ static bool fold_dp2add(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, c
VKD3D_ASSERT(type == src1->node.data_type->e.numeric.type);
VKD3D_ASSERT(type == src2->node.data_type->e.numeric.type);
VKD3D_ASSERT(type == src3->node.data_type->e.numeric.type);
- VKD3D_ASSERT(src1->node.data_type->dimx == src2->node.data_type->dimx);
- VKD3D_ASSERT(src3->node.data_type->dimx == 1);
+ VKD3D_ASSERT(src1->node.data_type->e.numeric.dimx == src2->node.data_type->e.numeric.dimx);
+ VKD3D_ASSERT(src3->node.data_type->e.numeric.dimx == 1);
dst->u[0].f = src3->value.u[0].f;
- for (k = 0; k < src1->node.data_type->dimx; ++k)
+ for (k = 0; k < src1->node.data_type->e.numeric.dimx; ++k)
{
switch (type)
{
@@ -777,7 +773,7 @@ static bool fold_div(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons
VKD3D_ASSERT(type == src1->node.data_type->e.numeric.type);
VKD3D_ASSERT(type == src2->node.data_type->e.numeric.type);
- for (k = 0; k < dst_type->dimx; ++k)
+ for (k = 0; k < dst_type->e.numeric.dimx; ++k)
{
switch (type)
{
@@ -844,7 +840,7 @@ static bool fold_equal(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, co
VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL);
VKD3D_ASSERT(src1->node.data_type->e.numeric.type == src2->node.data_type->e.numeric.type);
- for (k = 0; k < dst_type->dimx; ++k)
+ for (k = 0; k < dst_type->e.numeric.dimx; ++k)
{
switch (src1->node.data_type->e.numeric.type)
{
@@ -862,9 +858,6 @@ static bool fold_equal(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, co
case HLSL_TYPE_BOOL:
dst->u[k].u = src1->value.u[k].u == src2->value.u[k].u;
break;
-
- default:
- vkd3d_unreachable();
}
dst->u[k].u *= ~0u;
@@ -880,7 +873,7 @@ static bool fold_gequal(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, c
VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL);
VKD3D_ASSERT(src1->node.data_type->e.numeric.type == src2->node.data_type->e.numeric.type);
- for (k = 0; k < dst_type->dimx; ++k)
+ for (k = 0; k < dst_type->e.numeric.dimx; ++k)
{
switch (src1->node.data_type->e.numeric.type)
{
@@ -901,9 +894,6 @@ static bool fold_gequal(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, c
case HLSL_TYPE_BOOL:
dst->u[k].u = src1->value.u[k].u >= src2->value.u[k].u;
break;
-
- default:
- vkd3d_unreachable();
}
dst->u[k].u *= ~0u;
@@ -919,7 +909,7 @@ static bool fold_less(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, con
VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL);
VKD3D_ASSERT(src1->node.data_type->e.numeric.type == src2->node.data_type->e.numeric.type);
- for (k = 0; k < dst_type->dimx; ++k)
+ for (k = 0; k < dst_type->e.numeric.dimx; ++k)
{
switch (src1->node.data_type->e.numeric.type)
{
@@ -940,9 +930,6 @@ static bool fold_less(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, con
case HLSL_TYPE_BOOL:
dst->u[k].u = src1->value.u[k].u < src2->value.u[k].u;
break;
-
- default:
- vkd3d_unreachable();
}
dst->u[k].u *= ~0u;
@@ -958,16 +945,13 @@ static bool fold_lshift(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, c
VKD3D_ASSERT(dst_type->e.numeric.type == src1->node.data_type->e.numeric.type);
VKD3D_ASSERT(src2->node.data_type->e.numeric.type == HLSL_TYPE_INT);
- for (k = 0; k < dst_type->dimx; ++k)
+ for (k = 0; k < dst_type->e.numeric.dimx; ++k)
{
unsigned int shift = src2->value.u[k].u % 32;
switch (src1->node.data_type->e.numeric.type)
{
case HLSL_TYPE_INT:
- dst->u[k].i = src1->value.u[k].i << shift;
- break;
-
case HLSL_TYPE_UINT:
dst->u[k].u = src1->value.u[k].u << shift;
break;
@@ -989,7 +973,7 @@ static bool fold_max(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons
VKD3D_ASSERT(type == src1->node.data_type->e.numeric.type);
VKD3D_ASSERT(type == src2->node.data_type->e.numeric.type);
- for (k = 0; k < dst_type->dimx; ++k)
+ for (k = 0; k < dst_type->e.numeric.dimx; ++k)
{
switch (type)
{
@@ -1027,7 +1011,7 @@ static bool fold_min(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons
VKD3D_ASSERT(type == src1->node.data_type->e.numeric.type);
VKD3D_ASSERT(type == src2->node.data_type->e.numeric.type);
- for (k = 0; k < dst_type->dimx; ++k)
+ for (k = 0; k < dst_type->e.numeric.dimx; ++k)
{
switch (type)
{
@@ -1066,7 +1050,7 @@ static bool fold_mod(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons
VKD3D_ASSERT(type == src1->node.data_type->e.numeric.type);
VKD3D_ASSERT(type == src2->node.data_type->e.numeric.type);
- for (k = 0; k < dst_type->dimx; ++k)
+ for (k = 0; k < dst_type->e.numeric.dimx; ++k)
{
switch (type)
{
@@ -1108,7 +1092,7 @@ static bool fold_mul(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons
VKD3D_ASSERT(type == src1->node.data_type->e.numeric.type);
VKD3D_ASSERT(type == src2->node.data_type->e.numeric.type);
- for (k = 0; k < dst_type->dimx; ++k)
+ for (k = 0; k < dst_type->e.numeric.dimx; ++k)
{
switch (type)
{
@@ -1142,7 +1126,7 @@ static bool fold_nequal(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, c
VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL);
VKD3D_ASSERT(src1->node.data_type->e.numeric.type == src2->node.data_type->e.numeric.type);
- for (k = 0; k < dst_type->dimx; ++k)
+ for (k = 0; k < dst_type->e.numeric.dimx; ++k)
{
switch (src1->node.data_type->e.numeric.type)
{
@@ -1160,9 +1144,6 @@ static bool fold_nequal(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, c
case HLSL_TYPE_BOOL:
dst->u[k].u = src1->value.u[k].u != src2->value.u[k].u;
break;
-
- default:
- vkd3d_unreachable();
}
dst->u[k].u *= ~0u;
@@ -1179,7 +1160,7 @@ static bool fold_ternary(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst,
VKD3D_ASSERT(dst_type->e.numeric.type == src3->node.data_type->e.numeric.type);
VKD3D_ASSERT(src1->node.data_type->e.numeric.type == HLSL_TYPE_BOOL);
- for (k = 0; k < dst_type->dimx; ++k)
+ for (k = 0; k < dst_type->e.numeric.dimx; ++k)
dst->u[k] = src1->value.u[k].u ? src2->value.u[k] : src3->value.u[k];
return true;
@@ -1193,7 +1174,7 @@ static bool fold_rshift(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, c
VKD3D_ASSERT(dst_type->e.numeric.type == src1->node.data_type->e.numeric.type);
VKD3D_ASSERT(src2->node.data_type->e.numeric.type == HLSL_TYPE_INT);
- for (k = 0; k < dst_type->dimx; ++k)
+ for (k = 0; k < dst_type->e.numeric.dimx; ++k)
{
unsigned int shift = src2->value.u[k].u % 32;
@@ -1401,7 +1382,7 @@ static bool constant_is_zero(struct hlsl_ir_constant *const_arg)
struct hlsl_type *data_type = const_arg->node.data_type;
unsigned int k;
- for (k = 0; k < data_type->dimx; ++k)
+ for (k = 0; k < data_type->e.numeric.dimx; ++k)
{
switch (data_type->e.numeric.type)
{
@@ -1422,9 +1403,6 @@ static bool constant_is_zero(struct hlsl_ir_constant *const_arg)
if (const_arg->value.u[k].u != 0)
return false;
break;
-
- default:
- return false;
}
}
return true;
@@ -1435,7 +1413,7 @@ static bool constant_is_one(struct hlsl_ir_constant *const_arg)
struct hlsl_type *data_type = const_arg->node.data_type;
unsigned int k;
- for (k = 0; k < data_type->dimx; ++k)
+ for (k = 0; k < data_type->e.numeric.dimx; ++k)
{
switch (data_type->e.numeric.type)
{
@@ -1460,9 +1438,6 @@ static bool constant_is_one(struct hlsl_ir_constant *const_arg)
if (const_arg->value.u[k].u != ~0)
return false;
break;
-
- default:
- return false;
}
}
return true;
@@ -1544,6 +1519,250 @@ bool hlsl_fold_constant_identities(struct hlsl_ctx *ctx, struct hlsl_ir_node *in
return false;
}
+static bool is_op_associative(enum hlsl_ir_expr_op op, enum hlsl_base_type type)
+{
+ switch (op)
+ {
+ case HLSL_OP2_ADD:
+ case HLSL_OP2_MUL:
+ return type == HLSL_TYPE_INT || type == HLSL_TYPE_UINT;
+
+ case HLSL_OP2_BIT_AND:
+ case HLSL_OP2_BIT_OR:
+ case HLSL_OP2_BIT_XOR:
+ case HLSL_OP2_LOGIC_AND:
+ case HLSL_OP2_LOGIC_OR:
+ case HLSL_OP2_MAX:
+ case HLSL_OP2_MIN:
+ return true;
+
+ default:
+ return false;
+ }
+}
+
+static bool is_op_commutative(enum hlsl_ir_expr_op op)
+{
+ switch (op)
+ {
+ case HLSL_OP2_ADD:
+ case HLSL_OP2_BIT_AND:
+ case HLSL_OP2_BIT_OR:
+ case HLSL_OP2_BIT_XOR:
+ case HLSL_OP2_DOT:
+ case HLSL_OP2_LOGIC_AND:
+ case HLSL_OP2_LOGIC_OR:
+ case HLSL_OP2_MAX:
+ case HLSL_OP2_MIN:
+ case HLSL_OP2_MUL:
+ return true;
+
+ default:
+ return false;
+ }
+}
+
+/* Returns true iff x OPL (y OPR z) = (x OPL y) OPR (x OPL z). */
+static bool is_op_left_distributive(enum hlsl_ir_expr_op opl, enum hlsl_ir_expr_op opr, enum hlsl_base_type type)
+{
+ switch (opl)
+ {
+ case HLSL_OP2_BIT_AND:
+ return opr == HLSL_OP2_BIT_OR || opr == HLSL_OP2_BIT_XOR;
+
+ case HLSL_OP2_BIT_OR:
+ return opr == HLSL_OP2_BIT_AND;
+
+ case HLSL_OP2_DOT:
+ case HLSL_OP2_MUL:
+ return opr == HLSL_OP2_ADD && (type == HLSL_TYPE_INT || type == HLSL_TYPE_UINT);
+
+ case HLSL_OP2_MAX:
+ return opr == HLSL_OP2_MIN;
+
+ case HLSL_OP2_MIN:
+ return opr == HLSL_OP2_MAX;
+
+ default:
+ return false;
+ }
+}
+
+/* Attempt to collect together the expression (x OPL a) OPR (x OPL b) -> x OPL (a OPR b). */
+static struct hlsl_ir_node *collect_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr,
+ enum hlsl_ir_expr_op opr, struct hlsl_ir_node *node1, struct hlsl_ir_node *node2)
+{
+ enum hlsl_base_type type = instr->data_type->e.numeric.type;
+ struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {0};
+ struct hlsl_ir_node *ab, *res;
+ struct hlsl_ir_expr *e1, *e2;
+ enum hlsl_ir_expr_op opl;
+
+ if (!node1 || !node2 || node1->type != HLSL_IR_EXPR || node2->type != HLSL_IR_EXPR)
+ return NULL;
+ e1 = hlsl_ir_expr(node1);
+ e2 = hlsl_ir_expr(node2);
+ opl = e1->op;
+
+ if (e2->op != opl || !is_op_left_distributive(opl, opr, type))
+ return NULL;
+ if (e1->operands[0].node != e2->operands[0].node)
+ return NULL;
+ if (e1->operands[1].node->type != HLSL_IR_CONSTANT || e2->operands[1].node->type != HLSL_IR_CONSTANT)
+ return NULL;
+
+ if (!(ab = hlsl_new_binary_expr(ctx, opr, e1->operands[1].node, e2->operands[1].node)))
+ return NULL;
+ list_add_before(&instr->entry, &ab->entry);
+
+ operands[0] = e1->operands[0].node;
+ operands[1] = ab;
+
+ if (!(res = hlsl_new_expr(ctx, opl, operands, instr->data_type, &instr->loc)))
+ return NULL;
+ list_add_before(&instr->entry, &res->entry);
+ return res;
+}
+
+bool hlsl_normalize_binary_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context)
+{
+ struct hlsl_ir_node *arg1, *arg2, *tmp;
+ struct hlsl_ir_expr *expr;
+ enum hlsl_base_type type;
+ enum hlsl_ir_expr_op op;
+ bool progress = false;
+
+ if (instr->type != HLSL_IR_EXPR)
+ return false;
+ expr = hlsl_ir_expr(instr);
+
+ if (instr->data_type->class > HLSL_CLASS_VECTOR)
+ return false;
+
+ arg1 = expr->operands[0].node;
+ arg2 = expr->operands[1].node;
+ type = instr->data_type->e.numeric.type;
+ op = expr->op;
+
+ if (!arg1 || !arg2)
+ return false;
+
+ if ((tmp = collect_exprs(ctx, instr, op, arg1, arg2)))
+ {
+ /* (x OPL a) OPR (x OPL b) -> x OPL (a OPR b) */
+ hlsl_replace_node(instr, tmp);
+ return true;
+ }
+
+ if (is_op_commutative(op) && arg1->type == HLSL_IR_CONSTANT && arg2->type != HLSL_IR_CONSTANT)
+ {
+ /* a OP x -> x OP a */
+ tmp = arg1;
+ arg1 = arg2;
+ arg2 = tmp;
+ progress = true;
+ }
+
+ if (is_op_associative(op, type))
+ {
+ struct hlsl_ir_expr *e1 = arg1->type == HLSL_IR_EXPR ? hlsl_ir_expr(arg1) : NULL;
+ struct hlsl_ir_expr *e2 = arg2->type == HLSL_IR_EXPR ? hlsl_ir_expr(arg2) : NULL;
+
+ if (e1 && e1->op == op && e1->operands[0].node->type != HLSL_IR_CONSTANT
+ && e1->operands[1].node->type == HLSL_IR_CONSTANT)
+ {
+ if (arg2->type == HLSL_IR_CONSTANT)
+ {
+ /* (x OP a) OP b -> x OP (a OP b) */
+ struct hlsl_ir_node *ab;
+
+ if (!(ab = hlsl_new_binary_expr(ctx, op, e1->operands[1].node, arg2)))
+ return false;
+ list_add_before(&instr->entry, &ab->entry);
+
+ arg1 = e1->operands[0].node;
+ arg2 = ab;
+ progress = true;
+ }
+ else if (is_op_commutative(op))
+ {
+ /* (x OP a) OP y -> (x OP y) OP a */
+ struct hlsl_ir_node *xy;
+
+ if (!(xy = hlsl_new_binary_expr(ctx, op, e1->operands[0].node, arg2)))
+ return false;
+ list_add_before(&instr->entry, &xy->entry);
+
+ arg1 = xy;
+ arg2 = e1->operands[1].node;
+ progress = true;
+ }
+ }
+
+ if (!progress && arg1->type != HLSL_IR_CONSTANT && e2 && e2->op == op
+ && e2->operands[0].node->type != HLSL_IR_CONSTANT && e2->operands[1].node->type == HLSL_IR_CONSTANT)
+ {
+ /* x OP (y OP a) -> (x OP y) OP a */
+ struct hlsl_ir_node *xy;
+
+ if (!(xy = hlsl_new_binary_expr(ctx, op, arg1, e2->operands[0].node)))
+ return false;
+ list_add_before(&instr->entry, &xy->entry);
+
+ arg1 = xy;
+ arg2 = e2->operands[1].node;
+ progress = true;
+ }
+
+ if (!progress && e1 && (tmp = collect_exprs(ctx, instr, op, e1->operands[1].node, arg2)))
+ {
+ /* (y OPR (x OPL a)) OPR (x OPL b) -> y OPR (x OPL (a OPR b)) */
+ arg1 = e1->operands[0].node;
+ arg2 = tmp;
+ progress = true;
+ }
+
+ if (!progress && is_op_commutative(op) && e1
+ && (tmp = collect_exprs(ctx, instr, op, e1->operands[0].node, arg2)))
+ {
+ /* ((x OPL a) OPR y) OPR (x OPL b) -> (x OPL (a OPR b)) OPR y */
+ arg1 = tmp;
+ arg2 = e1->operands[1].node;
+ progress = true;
+ }
+
+ if (!progress && e2 && (tmp = collect_exprs(ctx, instr, op, arg1, e2->operands[0].node)))
+ {
+ /* (x OPL a) OPR ((x OPL b) OPR y) -> (x OPL (a OPR b)) OPR y */
+ arg1 = tmp;
+ arg2 = e2->operands[1].node;
+ progress = true;
+ }
+
+ if (!progress && is_op_commutative(op) && e2
+ && (tmp = collect_exprs(ctx, instr, op, arg1, e2->operands[1].node)))
+ {
+ /* (x OPL a) OPR (y OPR (x OPL b)) -> (x OPL (a OPR b)) OPR y */
+ arg1 = tmp;
+ arg2 = e2->operands[0].node;
+ progress = true;
+ }
+ }
+
+ if (progress)
+ {
+ struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {arg1, arg2};
+ struct hlsl_ir_node *res;
+
+ if (!(res = hlsl_new_expr(ctx, op, operands, instr->data_type, &instr->loc)))
+ return false;
+ list_add_before(&instr->entry, &res->entry);
+ hlsl_replace_node(instr, res);
+ }
+
+ return progress;
+}
+
bool hlsl_fold_constant_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context)
{
struct hlsl_constant_value value;
@@ -1559,8 +1778,8 @@ bool hlsl_fold_constant_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *inst
return false;
src = hlsl_ir_constant(swizzle->val.node);
- for (i = 0; i < swizzle->node.data_type->dimx; ++i)
- value.u[i] = src->value.u[hlsl_swizzle_get_component(swizzle->swizzle, i)];
+ for (i = 0; i < swizzle->node.data_type->e.numeric.dimx; ++i)
+ value.u[i] = src->value.u[hlsl_swizzle_get_component(swizzle->u.vector, i)];
if (!(dst = hlsl_new_constant(ctx, instr->data_type, &value, &instr->loc)))
return false;
diff --git a/libs/vkd3d/libs/vkd3d-shader/ir.c b/libs/vkd3d/libs/vkd3d-shader/ir.c
index b0e89bededb..3678ad0bacf 100644
--- a/libs/vkd3d/libs/vkd3d-shader/ir.c
+++ b/libs/vkd3d/libs/vkd3d-shader/ir.c
@@ -1,5 +1,6 @@
/*
* Copyright 2023 Conor McCarthy for CodeWeavers
+ * Copyright 2023-2024 Elizabeth Figura for CodeWeavers
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
@@ -201,6 +202,14 @@ static void src_param_init_const_uint(struct vkd3d_shader_src_param *src, uint32
src->reg.u.immconst_u32[0] = value;
}
+static void vsir_src_param_init_io(struct vkd3d_shader_src_param *src,
+ enum vkd3d_shader_register_type reg_type, const struct signature_element *e, unsigned int idx_count)
+{
+ vsir_src_param_init(src, reg_type, vkd3d_data_type_from_component_type(e->component_type), idx_count);
+ src->reg.dimension = VSIR_DIMENSION_VEC4;
+ src->swizzle = vsir_swizzle_from_writemask(e->mask);
+}
+
void vsir_src_param_init_label(struct vkd3d_shader_src_param *param, unsigned int label_id)
{
vsir_src_param_init(param, VKD3DSPR_LABEL, VKD3D_DATA_UNUSED, 1);
@@ -214,6 +223,14 @@ static void src_param_init_parameter(struct vkd3d_shader_src_param *src, uint32_
src->reg.idx[0].offset = idx;
}
+static void src_param_init_parameter_vec4(struct vkd3d_shader_src_param *src, uint32_t idx, enum vkd3d_data_type type)
+{
+ vsir_src_param_init(src, VKD3DSPR_PARAMETER, type, 1);
+ src->reg.idx[0].offset = idx;
+ src->reg.dimension = VSIR_DIMENSION_VEC4;
+ src->swizzle = VKD3D_SHADER_NO_SWIZZLE;
+}
+
static void vsir_src_param_init_resource(struct vkd3d_shader_src_param *src, unsigned int id, unsigned int idx)
{
vsir_src_param_init(src, VKD3DSPR_RESOURCE, VKD3D_DATA_UNUSED, 2);
@@ -243,6 +260,14 @@ static void src_param_init_ssa_float(struct vkd3d_shader_src_param *src, unsigne
src->reg.idx[0].offset = idx;
}
+static void src_param_init_ssa_float4(struct vkd3d_shader_src_param *src, unsigned int idx)
+{
+ vsir_src_param_init(src, VKD3DSPR_SSA, VKD3D_DATA_FLOAT, 1);
+ src->reg.idx[0].offset = idx;
+ src->reg.dimension = VSIR_DIMENSION_VEC4;
+ src->swizzle = VKD3D_SHADER_NO_SWIZZLE;
+}
+
static void src_param_init_temp_bool(struct vkd3d_shader_src_param *src, unsigned int idx)
{
vsir_src_param_init(src, VKD3DSPR_TEMP, VKD3D_DATA_BOOL, 1);
@@ -278,6 +303,14 @@ void vsir_dst_param_init(struct vkd3d_shader_dst_param *param, enum vkd3d_shader
param->shift = 0;
}
+static void vsir_dst_param_init_io(struct vkd3d_shader_dst_param *dst, enum vkd3d_shader_register_type reg_type,
+ const struct signature_element *e, unsigned int idx_count)
+{
+ vsir_dst_param_init(dst, reg_type, vkd3d_data_type_from_component_type(e->component_type), idx_count);
+ dst->reg.dimension = VSIR_DIMENSION_VEC4;
+ dst->write_mask = e->mask;
+}
+
static void dst_param_init_ssa_bool(struct vkd3d_shader_dst_param *dst, unsigned int idx)
{
vsir_dst_param_init(dst, VKD3DSPR_SSA, VKD3D_DATA_BOOL, 1);
@@ -290,6 +323,14 @@ static void dst_param_init_ssa_float(struct vkd3d_shader_dst_param *dst, unsigne
dst->reg.idx[0].offset = idx;
}
+static void dst_param_init_ssa_float4(struct vkd3d_shader_dst_param *dst, unsigned int idx)
+{
+ vsir_dst_param_init(dst, VKD3DSPR_SSA, VKD3D_DATA_FLOAT, 1);
+ dst->reg.idx[0].offset = idx;
+ dst->reg.dimension = VSIR_DIMENSION_VEC4;
+ dst->write_mask = VKD3DSP_WRITEMASK_ALL;
+}
+
static void dst_param_init_temp_bool(struct vkd3d_shader_dst_param *dst, unsigned int idx)
{
vsir_dst_param_init(dst, VKD3DSPR_TEMP, VKD3D_DATA_BOOL, 1);
@@ -662,7 +703,56 @@ static enum vkd3d_result vsir_program_lower_sm1_sincos(struct vsir_program *prog
return VKD3D_OK;
}
-static enum vkd3d_result vsir_program_lower_tex(struct vsir_program *program, struct vkd3d_shader_instruction *tex)
+static enum vkd3d_result vsir_program_lower_texldp(struct vsir_program *program,
+ struct vkd3d_shader_instruction *tex, unsigned int *tmp_idx)
+{
+ struct vkd3d_shader_instruction_array *instructions = &program->instructions;
+ struct vkd3d_shader_location *location = &tex->location;
+ struct vkd3d_shader_instruction *div_ins, *tex_ins;
+ size_t pos = tex - instructions->elements;
+ unsigned int w_comp;
+
+ w_comp = vsir_swizzle_get_component(tex->src[0].swizzle, 3);
+
+ if (!shader_instruction_array_insert_at(instructions, pos + 1, 2))
+ return VKD3D_ERROR_OUT_OF_MEMORY;
+
+ if (*tmp_idx == ~0u)
+ *tmp_idx = program->temp_count++;
+
+ div_ins = &instructions->elements[pos + 1];
+ tex_ins = &instructions->elements[pos + 2];
+
+ if (!vsir_instruction_init_with_params(program, div_ins, location, VKD3DSIH_DIV, 1, 2))
+ return VKD3D_ERROR_OUT_OF_MEMORY;
+
+ vsir_dst_param_init(&div_ins->dst[0], VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1);
+ div_ins->dst[0].reg.dimension = VSIR_DIMENSION_VEC4;
+ div_ins->dst[0].reg.idx[0].offset = *tmp_idx;
+ div_ins->dst[0].write_mask = VKD3DSP_WRITEMASK_ALL;
+
+ div_ins->src[0] = tex->src[0];
+
+ div_ins->src[1] = tex->src[0];
+ div_ins->src[1].swizzle = vkd3d_shader_create_swizzle(w_comp, w_comp, w_comp, w_comp);
+
+ if (!vsir_instruction_init_with_params(program, tex_ins, location, VKD3DSIH_TEX, 1, 2))
+ return VKD3D_ERROR_OUT_OF_MEMORY;
+
+ tex_ins->dst[0] = tex->dst[0];
+
+ tex_ins->src[0].reg = div_ins->dst[0].reg;
+ tex_ins->src[0].swizzle = VKD3D_SHADER_NO_SWIZZLE;
+
+ tex_ins->src[1] = tex->src[1];
+
+ vkd3d_shader_instruction_make_nop(tex);
+
+ return VKD3D_OK;
+}
+
+static enum vkd3d_result vsir_program_lower_tex(struct vsir_program *program,
+ struct vkd3d_shader_instruction *tex, struct vkd3d_shader_message_context *message_context)
{
unsigned int idx = tex->src[1].reg.idx[0].offset;
struct vkd3d_shader_src_param *srcs;
@@ -670,16 +760,34 @@ static enum vkd3d_result vsir_program_lower_tex(struct vsir_program *program, st
VKD3D_ASSERT(tex->src[1].reg.idx_count == 1);
VKD3D_ASSERT(!tex->src[1].reg.idx[0].rel_addr);
- if (!(srcs = shader_src_param_allocator_get(&program->instructions.src_params, 3)))
+ if (!(srcs = shader_src_param_allocator_get(&program->instructions.src_params, 4)))
return VKD3D_ERROR_OUT_OF_MEMORY;
srcs[0] = tex->src[0];
vsir_src_param_init_resource(&srcs[1], idx, idx);
vsir_src_param_init_sampler(&srcs[2], idx, idx);
- tex->opcode = VKD3DSIH_SAMPLE;
- tex->src = srcs;
- tex->src_count = 3;
+ if (!tex->flags)
+ {
+ tex->opcode = VKD3DSIH_SAMPLE;
+ tex->src = srcs;
+ tex->src_count = 3;
+ }
+ else if (tex->flags == VKD3DSI_TEXLD_BIAS)
+ {
+ tex->opcode = VKD3DSIH_SAMPLE_B;
+ tex->src = srcs;
+ tex->src_count = 4;
+
+ srcs[3] = tex->src[0];
+ srcs[3].swizzle = VKD3D_SHADER_SWIZZLE(W, W, W, W);
+ }
+ else
+ {
+ vkd3d_shader_error(message_context, &tex->location,
+ VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, "Unhandled tex flags %#x.", tex->flags);
+ return VKD3D_ERROR_NOT_IMPLEMENTED;
+ }
return VKD3D_OK;
}
@@ -709,6 +817,76 @@ static enum vkd3d_result vsir_program_lower_texldd(struct vsir_program *program,
return VKD3D_OK;
}
+static enum vkd3d_result vsir_program_lower_dcl_input(struct vsir_program *program,
+ struct vkd3d_shader_instruction *ins, struct vsir_transformation_context *ctx)
+{
+ switch (ins->declaration.dst.reg.type)
+ {
+ case VKD3DSPR_INPUT:
+ case VKD3DSPR_OUTPUT:
+ case VKD3DSPR_PATCHCONST:
+ case VKD3DSPR_INCONTROLPOINT:
+ case VKD3DSPR_OUTCONTROLPOINT:
+ break;
+
+ case VKD3DSPR_PRIMID:
+ case VKD3DSPR_FORKINSTID:
+ case VKD3DSPR_JOININSTID:
+ case VKD3DSPR_THREADID:
+ case VKD3DSPR_THREADGROUPID:
+ case VKD3DSPR_LOCALTHREADID:
+ case VKD3DSPR_LOCALTHREADINDEX:
+ case VKD3DSPR_COVERAGE:
+ case VKD3DSPR_TESSCOORD:
+ case VKD3DSPR_OUTPOINTID:
+ case VKD3DSPR_GSINSTID:
+ case VKD3DSPR_WAVELANECOUNT:
+ case VKD3DSPR_WAVELANEINDEX:
+ bitmap_set(program->io_dcls, ins->declaration.dst.reg.type);
+ break;
+
+ default:
+ vkd3d_shader_error(ctx->message_context, &ins->location,
+ VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE,
+ "Internal compiler error: invalid register type %#x for DCL_INPUT.",
+ ins->declaration.dst.reg.type);
+ return VKD3D_ERROR;
+ }
+
+ return VKD3D_OK;
+}
+
+static enum vkd3d_result vsir_program_lower_dcl_output(struct vsir_program *program,
+ struct vkd3d_shader_instruction *ins, struct vsir_transformation_context *ctx)
+{
+ switch (ins->declaration.dst.reg.type)
+ {
+ case VKD3DSPR_INPUT:
+ case VKD3DSPR_OUTPUT:
+ case VKD3DSPR_PATCHCONST:
+ case VKD3DSPR_INCONTROLPOINT:
+ case VKD3DSPR_OUTCONTROLPOINT:
+ break;
+
+ case VKD3DSPR_DEPTHOUT:
+ case VKD3DSPR_SAMPLEMASK:
+ case VKD3DSPR_DEPTHOUTGE:
+ case VKD3DSPR_DEPTHOUTLE:
+ case VKD3DSPR_OUTSTENCILREF:
+ bitmap_set(program->io_dcls, ins->declaration.dst.reg.type);
+ break;
+
+ default:
+ vkd3d_shader_error(ctx->message_context, &ins->location,
+ VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE,
+ "Internal compiler error: invalid register type %#x for DCL_OUTPUT.",
+ ins->declaration.dst.reg.type);
+ return VKD3D_ERROR;
+ }
+
+ return VKD3D_OK;
+}
+
static enum vkd3d_result vsir_program_lower_instructions(struct vsir_program *program,
struct vsir_transformation_context *ctx)
{
@@ -743,19 +921,47 @@ static enum vkd3d_result vsir_program_lower_instructions(struct vsir_program *pr
case VKD3DSIH_DCL_GLOBAL_FLAGS:
case VKD3DSIH_DCL_SAMPLER:
case VKD3DSIH_DCL_TEMPS:
+ case VKD3DSIH_DCL_TESSELLATOR_DOMAIN:
case VKD3DSIH_DCL_THREAD_GROUP:
case VKD3DSIH_DCL_UAV_TYPED:
vkd3d_shader_instruction_make_nop(ins);
break;
+ case VKD3DSIH_DCL_INPUT:
+ vsir_program_lower_dcl_input(program, ins, ctx);
+ vkd3d_shader_instruction_make_nop(ins);
+ break;
+
+ case VKD3DSIH_DCL_OUTPUT:
+ vsir_program_lower_dcl_output(program, ins, ctx);
+ vkd3d_shader_instruction_make_nop(ins);
+ break;
+
+ case VKD3DSIH_DCL_INPUT_SGV:
+ case VKD3DSIH_DCL_INPUT_SIV:
+ case VKD3DSIH_DCL_INPUT_PS:
+ case VKD3DSIH_DCL_INPUT_PS_SGV:
+ case VKD3DSIH_DCL_INPUT_PS_SIV:
+ case VKD3DSIH_DCL_OUTPUT_SIV:
+ vkd3d_shader_instruction_make_nop(ins);
+ break;
+
case VKD3DSIH_SINCOS:
if ((ret = vsir_program_lower_sm1_sincos(program, ins)) < 0)
return ret;
break;
case VKD3DSIH_TEX:
- if ((ret = vsir_program_lower_tex(program, ins)) < 0)
- return ret;
+ if (ins->flags == VKD3DSI_TEXLD_PROJECT)
+ {
+ if ((ret = vsir_program_lower_texldp(program, ins, &tmp_idx)) < 0)
+ return ret;
+ }
+ else
+ {
+ if ((ret = vsir_program_lower_tex(program, ins, message_context)) < 0)
+ return ret;
+ }
break;
case VKD3DSIH_TEXLDD:
@@ -847,11 +1053,36 @@ static enum vkd3d_result vsir_program_ensure_ret(struct vsir_program *program,
return VKD3D_OK;
}
+static bool add_signature_element(struct shader_signature *signature, const char *semantic_name,
+ uint32_t semantic_index, uint32_t mask, uint32_t register_index,
+ enum vkd3d_shader_interpolation_mode interpolation_mode)
+{
+ struct signature_element *new_elements, *e;
+
+ if (!(new_elements = vkd3d_realloc(signature->elements,
+ (signature->element_count + 1) * sizeof(*signature->elements))))
+ return false;
+ signature->elements = new_elements;
+ e = &signature->elements[signature->element_count++];
+ memset(e, 0, sizeof(*e));
+ e->semantic_name = vkd3d_strdup(semantic_name);
+ e->semantic_index = semantic_index;
+ e->sysval_semantic = VKD3D_SHADER_SV_NONE;
+ e->component_type = VKD3D_SHADER_COMPONENT_FLOAT;
+ e->register_count = 1;
+ e->mask = mask;
+ e->used_mask = mask;
+ e->register_index = register_index;
+ e->target_location = register_index;
+ e->interpolation_mode = interpolation_mode;
+ return true;
+}
+
static enum vkd3d_result vsir_program_add_diffuse_output(struct vsir_program *program,
struct vsir_transformation_context *ctx)
{
struct shader_signature *signature = &program->output_signature;
- struct signature_element *new_elements, *e;
+ struct signature_element *e;
if (program->shader_version.type != VKD3D_SHADER_TYPE_VERTEX)
return VKD3D_OK;
@@ -864,22 +1095,8 @@ static enum vkd3d_result vsir_program_add_diffuse_output(struct vsir_program *pr
return VKD3D_OK;
}
- if (!(new_elements = vkd3d_realloc(signature->elements,
- (signature->element_count + 1) * sizeof(*signature->elements))))
+ if (!add_signature_element(signature, "COLOR", 0, VKD3DSP_WRITEMASK_ALL, SM1_COLOR_REGISTER_OFFSET, VKD3DSIM_NONE))
return VKD3D_ERROR_OUT_OF_MEMORY;
- signature->elements = new_elements;
- e = &signature->elements[signature->element_count++];
- memset(e, 0, sizeof(*e));
- e->semantic_name = vkd3d_strdup("COLOR");
- e->sysval_semantic = VKD3D_SHADER_SV_NONE;
- e->component_type = VKD3D_SHADER_COMPONENT_FLOAT;
- e->register_count = 1;
- e->mask = VKD3DSP_WRITEMASK_ALL;
- e->used_mask = VKD3DSP_WRITEMASK_ALL;
- e->register_index = SM1_COLOR_REGISTER_OFFSET;
- e->target_location = SM1_COLOR_REGISTER_OFFSET;
- e->interpolation_mode = VKD3DSIM_NONE;
-
return VKD3D_OK;
}
@@ -975,6 +1192,7 @@ static void remove_unread_output_components(const struct shader_signature *signa
switch (dst->reg.type)
{
case VKD3DSPR_OUTPUT:
+ case VKD3DSPR_TEXCRDOUT:
e = vsir_signature_find_element_for_reg(signature, dst->reg.idx[0].offset, 0);
break;
@@ -1034,6 +1252,9 @@ static enum vkd3d_result vsir_program_remap_output_signature(struct vsir_program
e->target_location = map->input_register_index;
+ TRACE("Mapping signature index %u (mask %#x) to target location %u (mask %#x).\n",
+ i, e->mask, map->input_register_index, map->input_mask);
+
if ((input_mask & e->mask) == input_mask)
{
++subset_varying_count;
@@ -1054,6 +1275,8 @@ static enum vkd3d_result vsir_program_remap_output_signature(struct vsir_program
}
else
{
+ TRACE("Marking signature index %u (mask %#x) as unused.\n", i, e->mask);
+
e->target_location = SIGNATURE_TARGET_LOCATION_UNUSED;
}
@@ -1213,12 +1436,6 @@ static void flattener_eliminate_phase_related_dcls(struct hull_flattener *normal
vkd3d_shader_instruction_make_nop(ins);
return;
}
- else if (ins->opcode == VKD3DSIH_DCL_INPUT && shader_register_is_phase_instance_id(
- &ins->declaration.dst.reg))
- {
- vkd3d_shader_instruction_make_nop(ins);
- return;
- }
if (normaliser->phase == VKD3DSIH_INVALID || vsir_instruction_is_dcl(ins))
return;
@@ -1369,25 +1586,15 @@ static void shader_dst_param_normalise_outpointid(struct vkd3d_shader_dst_param
}
}
-static void shader_dst_param_io_init(struct vkd3d_shader_dst_param *param, const struct signature_element *e,
- enum vkd3d_shader_register_type reg_type, unsigned int idx_count)
-{
- param->write_mask = e->mask;
- param->modifiers = 0;
- param->shift = 0;
- vsir_register_init(&param->reg, reg_type, vkd3d_data_type_from_component_type(e->component_type), idx_count);
-}
-
static enum vkd3d_result control_point_normaliser_emit_hs_input(struct control_point_normaliser *normaliser,
const struct shader_signature *s, unsigned int input_control_point_count, unsigned int dst,
const struct vkd3d_shader_location *location)
{
struct vkd3d_shader_instruction *ins;
- struct vkd3d_shader_dst_param *param;
const struct signature_element *e;
- unsigned int i, count;
+ unsigned int i, count = 2;
- for (i = 0, count = 1; i < s->element_count; ++i)
+ for (i = 0; i < s->element_count; ++i)
count += !!s->elements[i].used_mask;
if (!shader_instruction_array_reserve(&normaliser->instructions, normaliser->instructions.count + count))
@@ -1399,7 +1606,7 @@ static enum vkd3d_result control_point_normaliser_emit_hs_input(struct control_p
ins = &normaliser->instructions.elements[dst];
vsir_instruction_init(ins, location, VKD3DSIH_HS_CONTROL_POINT_PHASE);
- ins->flags = 1;
+
++ins;
for (i = 0; i < s->element_count; ++i)
@@ -1408,26 +1615,35 @@ static enum vkd3d_result control_point_normaliser_emit_hs_input(struct control_p
if (!e->used_mask)
continue;
- if (e->sysval_semantic != VKD3D_SHADER_SV_NONE)
- {
- vsir_instruction_init(ins, location, VKD3DSIH_DCL_INPUT_SIV);
- param = &ins->declaration.register_semantic.reg;
- ins->declaration.register_semantic.sysval_semantic = vkd3d_siv_from_sysval(e->sysval_semantic);
- }
- else
+ vsir_instruction_init(ins, location, VKD3DSIH_MOV);
+ ins->dst = shader_dst_param_allocator_get(&normaliser->instructions.dst_params, 1);
+ ins->dst_count = 1;
+ ins->src = shader_src_param_allocator_get(&normaliser->instructions.src_params, 1);
+ ins->src_count = 1;
+
+ if (!ins->dst || ! ins->src)
{
- vsir_instruction_init(ins, location, VKD3DSIH_DCL_INPUT);
- param = &ins->declaration.dst;
+ WARN("Failed to allocate dst/src param.\n");
+ return VKD3D_ERROR_OUT_OF_MEMORY;
}
- shader_dst_param_io_init(param, e, VKD3DSPR_INPUT, 2);
- param->reg.idx[0].offset = input_control_point_count;
- param->reg.idx[1].offset = e->register_index;
- param->write_mask = e->mask;
+ vsir_dst_param_init_io(&ins->dst[0], VKD3DSPR_OUTPUT, e, 2);
+ ins->dst[0].reg.dimension = VSIR_DIMENSION_VEC4;
+ ins->dst[0].reg.idx[0].offset = 0;
+ ins->dst[0].reg.idx[0].rel_addr = normaliser->outpointid_param;
+ ins->dst[0].reg.idx[1].offset = e->register_index;
+
+ vsir_src_param_init_io(&ins->src[0], VKD3DSPR_INPUT, e, 2);
+ ins->src[0].reg.dimension = VSIR_DIMENSION_VEC4;
+ ins->src[0].reg.idx[0].offset = 0;
+ ins->src[0].reg.idx[0].rel_addr = normaliser->outpointid_param;
+ ins->src[0].reg.idx[1].offset = e->register_index;
++ins;
}
+ vsir_instruction_init(ins, location, VKD3DSIH_RET);
+
return VKD3D_OK;
}
@@ -1442,7 +1658,7 @@ static enum vkd3d_result instruction_array_normalise_hull_shader_control_point_i
enum vkd3d_result ret;
unsigned int i, j;
- VKD3D_ASSERT(program->normalisation_level == VSIR_NOT_NORMALISED);
+ VKD3D_ASSERT(program->normalisation_level == VSIR_NORMALISED_SM4);
if (program->shader_version.type != VKD3D_SHADER_TYPE_HULL)
{
@@ -1545,11 +1761,6 @@ static bool io_normaliser_is_in_fork_or_join_phase(const struct io_normaliser *n
return normaliser->phase == VKD3DSIH_HS_FORK_PHASE || normaliser->phase == VKD3DSIH_HS_JOIN_PHASE;
}
-static bool io_normaliser_is_in_control_point_phase(const struct io_normaliser *normaliser)
-{
- return normaliser->phase == VKD3DSIH_HS_CONTROL_POINT_PHASE;
-}
-
static bool shader_signature_find_element_for_reg(const struct shader_signature *signature,
unsigned int reg_idx, unsigned int write_mask, unsigned int *element_idx)
{
@@ -1820,7 +2031,8 @@ static bool shader_signature_merge(struct shader_signature *s, uint8_t range_map
element_count = s->element_count;
if (!(elements = vkd3d_malloc(element_count * sizeof(*elements))))
return false;
- memcpy(elements, s->elements, element_count * sizeof(*elements));
+ if (element_count)
+ memcpy(elements, s->elements, element_count * sizeof(*elements));
for (i = 0; i < element_count; ++i)
elements[i].sort_index = i;
@@ -1920,41 +2132,26 @@ static unsigned int shader_register_normalise_arrayed_addressing(struct vkd3d_sh
{
VKD3D_ASSERT(id_idx < ARRAY_SIZE(reg->idx) - 1);
- /* For a relative-addressed register index, move the id up a slot to separate it from the address,
- * because rel_addr can be replaced with a constant offset in some cases. */
- if (reg->idx[id_idx].rel_addr)
- {
- reg->idx[id_idx + 1].rel_addr = NULL;
- reg->idx[id_idx + 1].offset = reg->idx[id_idx].offset;
- reg->idx[id_idx].offset -= register_index;
- if (id_idx)
- {
- /* idx[id_idx] now contains the array index, which must be moved below the control point id. */
- struct vkd3d_shader_register_index tmp = reg->idx[id_idx];
- reg->idx[id_idx] = reg->idx[id_idx - 1];
- reg->idx[id_idx - 1] = tmp;
- }
- ++id_idx;
- }
- /* Otherwise we have no address for the arrayed register, so insert one. This happens e.g. where
- * tessellation level registers are merged into an array because they're an array in SPIR-V. */
- else
- {
- ++id_idx;
- memmove(&reg->idx[1], &reg->idx[0], id_idx * sizeof(reg->idx[0]));
- reg->idx[0].rel_addr = NULL;
- reg->idx[0].offset = reg->idx[id_idx].offset - register_index;
- }
+ /* Make room for the array index at the front of the array. */
+ ++id_idx;
+ memmove(&reg->idx[1], &reg->idx[0], id_idx * sizeof(reg->idx[0]));
+
+ /* The array index inherits the register relative address, but is offsetted
+ * by the signature element register index. */
+ reg->idx[0].rel_addr = reg->idx[id_idx].rel_addr;
+ reg->idx[0].offset = reg->idx[id_idx].offset - register_index;
+ reg->idx[id_idx].rel_addr = NULL;
+
+ /* The signature index offset will be fixed in the caller. */
return id_idx;
}
-static bool shader_dst_param_io_normalise(struct vkd3d_shader_dst_param *dst_param, bool is_io_dcl,
+static bool shader_dst_param_io_normalise(struct vkd3d_shader_dst_param *dst_param,
struct io_normaliser *normaliser)
{
unsigned int id_idx, reg_idx, write_mask, element_idx;
struct vkd3d_shader_register *reg = &dst_param->reg;
- struct vkd3d_shader_dst_param **dcl_params;
const struct shader_signature *signature;
const struct signature_element *e;
@@ -1970,26 +2167,23 @@ static bool shader_dst_param_io_normalise(struct vkd3d_shader_dst_param *dst_par
/* Convert patch constant outputs to the patch constant register type to avoid the need
* to convert compiler symbols when accessed as inputs in a later stage. */
reg->type = VKD3DSPR_PATCHCONST;
- dcl_params = normaliser->pc_dcl_params;
}
else
{
signature = normaliser->output_signature;
- dcl_params = normaliser->output_dcl_params;
}
break;
case VKD3DSPR_PATCHCONST:
reg_idx = reg->idx[reg->idx_count - 1].offset;
signature = normaliser->patch_constant_signature;
- dcl_params = normaliser->pc_dcl_params;
break;
+ case VKD3DSPR_TEXCRDOUT:
case VKD3DSPR_COLOROUT:
reg_idx = reg->idx[0].offset;
signature = normaliser->output_signature;
reg->type = VKD3DSPR_OUTPUT;
- dcl_params = normaliser->output_dcl_params;
break;
case VKD3DSPR_INCONTROLPOINT:
@@ -1997,14 +2191,12 @@ static bool shader_dst_param_io_normalise(struct vkd3d_shader_dst_param *dst_par
reg_idx = reg->idx[reg->idx_count - 1].offset;
signature = normaliser->input_signature;
reg->type = VKD3DSPR_INPUT;
- dcl_params = normaliser->input_dcl_params;
break;
case VKD3DSPR_ATTROUT:
reg_idx = SM1_COLOR_REGISTER_OFFSET + reg->idx[0].offset;
signature = normaliser->output_signature;
reg->type = VKD3DSPR_OUTPUT;
- dcl_params = normaliser->output_dcl_params;
break;
case VKD3DSPR_RASTOUT:
@@ -2014,7 +2206,6 @@ static bool shader_dst_param_io_normalise(struct vkd3d_shader_dst_param *dst_par
reg_idx = SM1_RASTOUT_REGISTER_OFFSET + reg->idx[0].offset;
signature = normaliser->output_signature;
reg->type = VKD3DSPR_OUTPUT;
- dcl_params = normaliser->output_dcl_params;
/* Fog and point size are scalar, but fxc/d3dcompiler emits a full
* write mask when writing to them. */
if (reg->idx[0].offset > 0)
@@ -2030,54 +2221,8 @@ static bool shader_dst_param_io_normalise(struct vkd3d_shader_dst_param *dst_par
vkd3d_unreachable();
e = &signature->elements[element_idx];
- if (is_io_dcl)
- {
- /* Validated in the TPF reader. */
- VKD3D_ASSERT(element_idx < ARRAY_SIZE(normaliser->input_dcl_params));
-
- if (dcl_params[element_idx])
- {
- /* Merge split declarations into a single one. */
- dcl_params[element_idx]->write_mask |= dst_param->write_mask;
- /* Turn this into a nop. */
- return false;
- }
- else
- {
- dcl_params[element_idx] = dst_param;
- }
- }
-
- if (io_normaliser_is_in_control_point_phase(normaliser) && reg->type == VKD3DSPR_OUTPUT)
- {
- if (is_io_dcl)
- {
- /* Emit an array size for the control points for consistency with inputs. */
- reg->idx[0].offset = normaliser->output_control_point_count;
- }
- else
- {
- /* The control point id param. */
- VKD3D_ASSERT(reg->idx[0].rel_addr);
- }
- id_idx = 1;
- }
-
if ((e->register_count > 1 || vsir_sysval_semantic_is_tess_factor(e->sysval_semantic)))
- {
- if (is_io_dcl)
- {
- /* For control point I/O, idx 0 contains the control point count.
- * Ensure it is moved up to the next slot. */
- reg->idx[id_idx].offset = reg->idx[0].offset;
- reg->idx[0].offset = e->register_count;
- ++id_idx;
- }
- else
- {
- id_idx = shader_register_normalise_arrayed_addressing(reg, id_idx, e->register_index);
- }
- }
+ id_idx = shader_register_normalise_arrayed_addressing(reg, id_idx, e->register_index);
/* Replace the register index with the signature element index */
reg->idx[id_idx].offset = element_idx;
@@ -2129,6 +2274,8 @@ static void shader_src_param_io_normalise(struct vkd3d_shader_src_param *src_par
case VKD3DSPR_OUTCONTROLPOINT:
reg->type = VKD3DSPR_OUTPUT;
+ if (io_normaliser_is_in_fork_or_join_phase(normaliser))
+ normaliser->use_vocp = true;
/* fall through */
case VKD3DSPR_OUTPUT:
reg_idx = reg->idx[reg->idx_count - 1].offset;
@@ -2136,8 +2283,6 @@ static void shader_src_param_io_normalise(struct vkd3d_shader_src_param *src_par
break;
case VKD3DSPR_TEXTURE:
- if (normaliser->shader_type != VKD3D_SHADER_TYPE_PIXEL)
- return;
reg->type = VKD3DSPR_INPUT;
reg_idx = reg->idx[0].offset;
signature = normaliser->input_signature;
@@ -2169,40 +2314,10 @@ static void shader_src_param_io_normalise(struct vkd3d_shader_src_param *src_par
static void shader_instruction_normalise_io_params(struct vkd3d_shader_instruction *ins,
struct io_normaliser *normaliser)
{
- struct vkd3d_shader_register *reg;
unsigned int i;
switch (ins->opcode)
{
- case VKD3DSIH_DCL_INPUT:
- if (normaliser->shader_type == VKD3D_SHADER_TYPE_HULL)
- {
- reg = &ins->declaration.dst.reg;
-
- if (reg->type == VKD3DSPR_OUTCONTROLPOINT)
- normaliser->use_vocp = true;
-
- /* We don't need to keep OUTCONTROLPOINT or PATCHCONST input declarations since their
- * equivalents were declared earlier, but INCONTROLPOINT may be the first occurrence. */
- if (reg->type == VKD3DSPR_OUTCONTROLPOINT || reg->type == VKD3DSPR_PATCHCONST)
- vkd3d_shader_instruction_make_nop(ins);
- else if (reg->type == VKD3DSPR_INCONTROLPOINT)
- reg->type = VKD3DSPR_INPUT;
- }
- /* fall through */
- case VKD3DSIH_DCL_INPUT_PS:
- case VKD3DSIH_DCL_OUTPUT:
- if (!shader_dst_param_io_normalise(&ins->declaration.dst, true, normaliser))
- vkd3d_shader_instruction_make_nop(ins);
- break;
- case VKD3DSIH_DCL_INPUT_SGV:
- case VKD3DSIH_DCL_INPUT_SIV:
- case VKD3DSIH_DCL_INPUT_PS_SGV:
- case VKD3DSIH_DCL_INPUT_PS_SIV:
- case VKD3DSIH_DCL_OUTPUT_SIV:
- if (!shader_dst_param_io_normalise(&ins->declaration.register_semantic.reg, true, normaliser))
- vkd3d_shader_instruction_make_nop(ins);
- break;
case VKD3DSIH_HS_CONTROL_POINT_PHASE:
case VKD3DSIH_HS_FORK_PHASE:
case VKD3DSIH_HS_JOIN_PHASE:
@@ -2215,7 +2330,7 @@ static void shader_instruction_normalise_io_params(struct vkd3d_shader_instructi
if (vsir_instruction_is_dcl(ins))
break;
for (i = 0; i < ins->dst_count; ++i)
- shader_dst_param_io_normalise(&ins->dst[i], false, normaliser);
+ shader_dst_param_io_normalise(&ins->dst[i], normaliser);
for (i = 0; i < ins->src_count; ++i)
shader_src_param_io_normalise(&ins->src[i], normaliser);
break;
@@ -2275,7 +2390,7 @@ static enum vkd3d_result vsir_program_normalise_io_registers(struct vsir_program
program->instructions = normaliser.instructions;
program->use_vocp = normaliser.use_vocp;
- program->normalisation_level = VSIR_FULLY_NORMALISED_IO;
+ program->normalisation_level = VSIR_NORMALISED_SM6;
return VKD3D_OK;
}
@@ -2299,16 +2414,12 @@ static bool get_flat_constant_register_type(const struct vkd3d_shader_register *
{
enum vkd3d_shader_register_type type;
enum vkd3d_shader_d3dbc_constant_register set;
- uint32_t offset;
}
regs[] =
{
- {VKD3DSPR_CONST, VKD3D_SHADER_D3DBC_FLOAT_CONSTANT_REGISTER, 0},
- {VKD3DSPR_CONST2, VKD3D_SHADER_D3DBC_FLOAT_CONSTANT_REGISTER, 2048},
- {VKD3DSPR_CONST3, VKD3D_SHADER_D3DBC_FLOAT_CONSTANT_REGISTER, 4096},
- {VKD3DSPR_CONST4, VKD3D_SHADER_D3DBC_FLOAT_CONSTANT_REGISTER, 6144},
- {VKD3DSPR_CONSTINT, VKD3D_SHADER_D3DBC_INT_CONSTANT_REGISTER, 0},
- {VKD3DSPR_CONSTBOOL, VKD3D_SHADER_D3DBC_BOOL_CONSTANT_REGISTER, 0},
+ {VKD3DSPR_CONST, VKD3D_SHADER_D3DBC_FLOAT_CONSTANT_REGISTER},
+ {VKD3DSPR_CONSTINT, VKD3D_SHADER_D3DBC_INT_CONSTANT_REGISTER},
+ {VKD3DSPR_CONSTBOOL, VKD3D_SHADER_D3DBC_BOOL_CONSTANT_REGISTER},
};
unsigned int i;
@@ -2324,7 +2435,7 @@ static bool get_flat_constant_register_type(const struct vkd3d_shader_register *
}
*set = regs[i].set;
- *index = regs[i].offset + reg->idx[0].offset;
+ *index = reg->idx[0].offset;
return true;
}
}
@@ -3726,7 +3837,8 @@ static enum vkd3d_result vsir_cfg_structure_list_append_from_region(struct vsir_
sizeof(*list->structures)))
return VKD3D_ERROR_OUT_OF_MEMORY;
- memcpy(&list->structures[list->count], begin, size * sizeof(*begin));
+ if (size)
+ memcpy(&list->structures[list->count], begin, size * sizeof(*begin));
list->count += size;
@@ -4663,7 +4775,8 @@ static enum vkd3d_result vsir_cfg_generate_synthetic_loop_intervals(struct vsir_
}
}
- qsort(cfg->loop_intervals, cfg->loop_interval_count, sizeof(*cfg->loop_intervals), compare_loop_intervals);
+ if (cfg->loop_intervals)
+ qsort(cfg->loop_intervals, cfg->loop_interval_count, sizeof(*cfg->loop_intervals), compare_loop_intervals);
if (TRACE_ON())
for (i = 0; i < cfg->loop_interval_count; ++i)
@@ -6634,52 +6747,486 @@ static enum vkd3d_result vsir_program_insert_point_coord(struct vsir_program *pr
return VKD3D_OK;
}
-struct validation_context
+static enum vkd3d_result vsir_program_add_fog_input(struct vsir_program *program,
+ struct vsir_transformation_context *ctx)
{
- struct vkd3d_shader_message_context *message_context;
- const struct vsir_program *program;
- size_t instruction_idx;
- struct vkd3d_shader_location null_location;
- bool invalid_instruction_idx;
- enum vkd3d_result status;
- bool dcl_temps_found;
- enum vkd3d_shader_opcode phase;
- bool inside_block;
-
- struct validation_context_temp_data
- {
- enum vsir_dimension dimension;
- size_t first_seen;
- } *temps;
+ struct shader_signature *signature = &program->input_signature;
+ uint32_t register_idx = 0;
- struct validation_context_ssa_data
- {
- enum vsir_dimension dimension;
- enum vkd3d_data_type data_type;
- size_t first_seen;
- uint32_t write_mask;
- uint32_t read_mask;
- size_t first_assigned;
- } *ssas;
+ if (program->shader_version.type != VKD3D_SHADER_TYPE_PIXEL)
+ return VKD3D_OK;
- enum vkd3d_shader_opcode *blocks;
- size_t depth;
- size_t blocks_capacity;
-};
+ if (!vsir_program_get_parameter(program, VKD3D_SHADER_PARAMETER_NAME_FOG_FRAGMENT_MODE))
+ return VKD3D_OK;
-static void VKD3D_PRINTF_FUNC(3, 4) validator_error(struct validation_context *ctx,
- enum vkd3d_shader_error error, const char *format, ...)
-{
- struct vkd3d_string_buffer buf;
- va_list args;
+ /* We could check the value and skip this if NONE, but chances are if a
+ * user specifies the fog fragment mode as a parameter, they'll want to
+ * enable it dynamically. Always specifying it (and hence always outputting
+ * it from the VS) avoids an extra VS variant. */
- vkd3d_string_buffer_init(&buf);
+ if (vsir_signature_find_element_by_name(signature, "FOG", 0))
+ return VKD3D_OK;
- va_start(args, format);
- vkd3d_string_buffer_vprintf(&buf, format, args);
- va_end(args);
+ for (unsigned int i = 0; i < signature->element_count; ++i)
+ register_idx = max(register_idx, signature->elements[i].register_index + 1);
- if (ctx->invalid_instruction_idx)
+ if (!add_signature_element(signature, "FOG", 0, VKD3DSP_WRITEMASK_0, register_idx, VKD3DSIM_LINEAR))
+ return VKD3D_ERROR_OUT_OF_MEMORY;
+ return VKD3D_OK;
+}
+
+static enum vkd3d_result insert_fragment_fog_before_ret(struct vsir_program *program,
+ const struct vkd3d_shader_instruction *ret, enum vkd3d_shader_fog_fragment_mode mode,
+ uint32_t fog_signature_idx, uint32_t colour_signature_idx, uint32_t colour_temp,
+ size_t *ret_pos, struct vkd3d_shader_message_context *message_context)
+{
+ struct vkd3d_shader_instruction_array *instructions = &program->instructions;
+ struct vkd3d_shader_location loc = ret->location;
+ uint32_t ssa_factor = program->ssa_count++;
+ size_t pos = ret - instructions->elements;
+ struct vkd3d_shader_instruction *ins;
+ uint32_t ssa_temp, ssa_temp2;
+
+ switch (mode)
+ {
+ case VKD3D_SHADER_FOG_FRAGMENT_LINEAR:
+ /* We generate the following code:
+ *
+ * add sr0, FOG_END, -vFOG.x
+ * mul_sat srFACTOR, sr0, FOG_SCALE
+ */
+ if (!shader_instruction_array_insert_at(&program->instructions, pos, 4))
+ return VKD3D_ERROR_OUT_OF_MEMORY;
+ *ret_pos = pos + 4;
+
+ ssa_temp = program->ssa_count++;
+
+ ins = &program->instructions.elements[pos];
+
+ vsir_instruction_init_with_params(program, ins, &loc, VKD3DSIH_ADD, 1, 2);
+ dst_param_init_ssa_float(&ins->dst[0], ssa_temp);
+ src_param_init_parameter(&ins->src[0], VKD3D_SHADER_PARAMETER_NAME_FOG_END, VKD3D_DATA_FLOAT);
+ vsir_src_param_init(&ins->src[1], VKD3DSPR_INPUT, VKD3D_DATA_FLOAT, 1);
+ ins->src[1].reg.idx[0].offset = fog_signature_idx;
+ ins->src[1].reg.dimension = VSIR_DIMENSION_VEC4;
+ ins->src[1].swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X);
+ ins->src[1].modifiers = VKD3DSPSM_NEG;
+
+ vsir_instruction_init_with_params(program, ++ins, &loc, VKD3DSIH_MUL, 1, 2);
+ dst_param_init_ssa_float(&ins->dst[0], ssa_factor);
+ ins->dst[0].modifiers = VKD3DSPDM_SATURATE;
+ src_param_init_ssa_float(&ins->src[0], ssa_temp);
+ src_param_init_parameter(&ins->src[1], VKD3D_SHADER_PARAMETER_NAME_FOG_SCALE, VKD3D_DATA_FLOAT);
+ break;
+
+ case VKD3D_SHADER_FOG_FRAGMENT_EXP:
+ /* We generate the following code:
+ *
+ * mul sr0, FOG_SCALE, vFOG.x
+ * exp_sat srFACTOR, -sr0
+ */
+ if (!shader_instruction_array_insert_at(&program->instructions, pos, 4))
+ return VKD3D_ERROR_OUT_OF_MEMORY;
+ *ret_pos = pos + 4;
+
+ ssa_temp = program->ssa_count++;
+
+ ins = &program->instructions.elements[pos];
+
+ vsir_instruction_init_with_params(program, ins, &loc, VKD3DSIH_MUL, 1, 2);
+ dst_param_init_ssa_float(&ins->dst[0], ssa_temp);
+ src_param_init_parameter(&ins->src[0], VKD3D_SHADER_PARAMETER_NAME_FOG_SCALE, VKD3D_DATA_FLOAT);
+ vsir_src_param_init(&ins->src[1], VKD3DSPR_INPUT, VKD3D_DATA_FLOAT, 1);
+ ins->src[1].reg.idx[0].offset = fog_signature_idx;
+ ins->src[1].reg.dimension = VSIR_DIMENSION_VEC4;
+ ins->src[1].swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X);
+
+ vsir_instruction_init_with_params(program, ++ins, &loc, VKD3DSIH_EXP, 1, 1);
+ dst_param_init_ssa_float(&ins->dst[0], ssa_factor);
+ ins->dst[0].modifiers = VKD3DSPDM_SATURATE;
+ src_param_init_ssa_float(&ins->src[0], ssa_temp);
+ ins->src[0].modifiers = VKD3DSPSM_NEG;
+ break;
+
+ case VKD3D_SHADER_FOG_FRAGMENT_EXP2:
+ /* We generate the following code:
+ *
+ * mul sr0, FOG_SCALE, vFOG.x
+ * mul sr1, sr0, sr0
+ * exp_sat srFACTOR, -sr1
+ */
+ if (!shader_instruction_array_insert_at(&program->instructions, pos, 5))
+ return VKD3D_ERROR_OUT_OF_MEMORY;
+ *ret_pos = pos + 5;
+
+ ssa_temp = program->ssa_count++;
+ ssa_temp2 = program->ssa_count++;
+
+ ins = &program->instructions.elements[pos];
+
+ vsir_instruction_init_with_params(program, ins, &loc, VKD3DSIH_MUL, 1, 2);
+ dst_param_init_ssa_float(&ins->dst[0], ssa_temp);
+ src_param_init_parameter(&ins->src[0], VKD3D_SHADER_PARAMETER_NAME_FOG_SCALE, VKD3D_DATA_FLOAT);
+ vsir_src_param_init(&ins->src[1], VKD3DSPR_INPUT, VKD3D_DATA_FLOAT, 1);
+ ins->src[1].reg.idx[0].offset = fog_signature_idx;
+ ins->src[1].reg.dimension = VSIR_DIMENSION_VEC4;
+ ins->src[1].swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X);
+
+ vsir_instruction_init_with_params(program, ++ins, &loc, VKD3DSIH_MUL, 1, 2);
+ dst_param_init_ssa_float(&ins->dst[0], ssa_temp2);
+ src_param_init_ssa_float(&ins->src[0], ssa_temp);
+ src_param_init_ssa_float(&ins->src[1], ssa_temp);
+
+ vsir_instruction_init_with_params(program, ++ins, &loc, VKD3DSIH_EXP, 1, 1);
+ dst_param_init_ssa_float(&ins->dst[0], ssa_factor);
+ ins->dst[0].modifiers = VKD3DSPDM_SATURATE;
+ src_param_init_ssa_float(&ins->src[0], ssa_temp2);
+ ins->src[0].modifiers = VKD3DSPSM_NEG;
+ break;
+
+ default:
+ vkd3d_unreachable();
+ }
+
+ /* We generate the following code:
+ *
+ * add sr0, FRAG_COLOUR, -FOG_COLOUR
+ * mad oC0, sr0, srFACTOR, FOG_COLOUR
+ */
+
+ vsir_instruction_init_with_params(program, ++ins, &loc, VKD3DSIH_ADD, 1, 2);
+ dst_param_init_ssa_float4(&ins->dst[0], program->ssa_count++);
+ src_param_init_temp_float4(&ins->src[0], colour_temp);
+ src_param_init_parameter_vec4(&ins->src[1], VKD3D_SHADER_PARAMETER_NAME_FOG_COLOUR, VKD3D_DATA_FLOAT);
+ ins->src[1].modifiers = VKD3DSPSM_NEG;
+
+ vsir_instruction_init_with_params(program, ++ins, &loc, VKD3DSIH_MAD, 1, 3);
+ dst_param_init_output(&ins->dst[0], VKD3D_DATA_FLOAT, colour_signature_idx,
+ program->output_signature.elements[colour_signature_idx].mask);
+ src_param_init_ssa_float4(&ins->src[0], program->ssa_count - 1);
+ src_param_init_ssa_float(&ins->src[1], ssa_factor);
+ src_param_init_parameter_vec4(&ins->src[2], VKD3D_SHADER_PARAMETER_NAME_FOG_COLOUR, VKD3D_DATA_FLOAT);
+
+ return VKD3D_OK;
+}
+
+static enum vkd3d_result vsir_program_insert_fragment_fog(struct vsir_program *program,
+ struct vsir_transformation_context *ctx)
+{
+ struct vkd3d_shader_message_context *message_context = ctx->message_context;
+ uint32_t colour_signature_idx, fog_signature_idx, colour_temp;
+ const struct vkd3d_shader_parameter1 *mode_parameter = NULL;
+ static const struct vkd3d_shader_location no_loc;
+ const struct signature_element *fog_element;
+ enum vkd3d_shader_fog_fragment_mode mode;
+ struct vkd3d_shader_instruction *ins;
+ size_t new_pos;
+ int ret;
+
+ if (program->shader_version.type != VKD3D_SHADER_TYPE_PIXEL)
+ return VKD3D_OK;
+
+ if (!vsir_signature_find_sysval(&program->output_signature, VKD3D_SHADER_SV_TARGET, 0, &colour_signature_idx))
+ return VKD3D_OK;
+
+ if (!(mode_parameter = vsir_program_get_parameter(program, VKD3D_SHADER_PARAMETER_NAME_FOG_FRAGMENT_MODE)))
+ return VKD3D_OK;
+
+ if (mode_parameter->type != VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT)
+ {
+ vkd3d_shader_error(message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED,
+ "Unsupported fog fragment mode parameter type %#x.", mode_parameter->type);
+ return VKD3D_ERROR_NOT_IMPLEMENTED;
+ }
+ if (mode_parameter->data_type != VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32)
+ {
+ vkd3d_shader_error(message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE,
+ "Invalid fog fragment mode parameter data type %#x.", mode_parameter->data_type);
+ return VKD3D_ERROR_INVALID_ARGUMENT;
+ }
+ mode = mode_parameter->u.immediate_constant.u.u32;
+
+ if (mode == VKD3D_SHADER_FOG_FRAGMENT_NONE)
+ return VKD3D_OK;
+
+ /* Should have been added by vsir_program_add_fog_input(). */
+ if (!(fog_element = vsir_signature_find_element_by_name(&program->input_signature, "FOG", 0)))
+ {
+ ERR("Fog input not found.\n");
+ return VKD3D_ERROR_INVALID_SHADER;
+ }
+ fog_signature_idx = fog_element - program->input_signature.elements;
+
+ /* We're going to be reading from the output, so we need to go
+ * through the whole shader and convert it to a temp. */
+ colour_temp = program->temp_count++;
+
+ for (size_t i = 0; i < program->instructions.count; ++i)
+ {
+ ins = &program->instructions.elements[i];
+
+ if (vsir_instruction_is_dcl(ins))
+ continue;
+
+ if (ins->opcode == VKD3DSIH_RET)
+ {
+ if ((ret = insert_fragment_fog_before_ret(program, ins, mode, fog_signature_idx,
+ colour_signature_idx, colour_temp, &new_pos, message_context)) < 0)
+ return ret;
+ i = new_pos;
+ continue;
+ }
+
+ for (size_t j = 0; j < ins->dst_count; ++j)
+ {
+ struct vkd3d_shader_dst_param *dst = &ins->dst[j];
+
+ /* Note we run after I/O normalization. */
+ if (dst->reg.type == VKD3DSPR_OUTPUT && dst->reg.idx[0].offset == colour_signature_idx)
+ {
+ dst->reg.type = VKD3DSPR_TEMP;
+ dst->reg.idx[0].offset = colour_temp;
+ }
+ }
+ }
+
+ return VKD3D_OK;
+}
+
+static enum vkd3d_result vsir_program_add_fog_output(struct vsir_program *program,
+ struct vsir_transformation_context *ctx)
+{
+ struct shader_signature *signature = &program->output_signature;
+ const struct vkd3d_shader_parameter1 *source_parameter;
+ uint32_t register_idx = 0;
+
+ if (!is_pre_rasterization_shader(program->shader_version.type))
+ return VKD3D_OK;
+
+ if (!(source_parameter = vsir_program_get_parameter(program, VKD3D_SHADER_PARAMETER_NAME_FOG_SOURCE)))
+ return VKD3D_OK;
+
+ if (source_parameter->type == VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT)
+ {
+ enum vkd3d_shader_fog_source source = source_parameter->u.immediate_constant.u.u32;
+
+ if (source == VKD3D_SHADER_FOG_SOURCE_FOG)
+ return VKD3D_OK;
+
+ if (source == VKD3D_SHADER_FOG_SOURCE_FOG_OR_SPECULAR_W
+ && !vsir_signature_find_element_by_name(signature, "COLOR", 1))
+ return VKD3D_OK;
+ }
+
+ if (vsir_signature_find_element_by_name(signature, "FOG", 0))
+ return VKD3D_OK;
+
+ for (unsigned int i = 0; i < signature->element_count; ++i)
+ register_idx = max(register_idx, signature->elements[i].register_index + 1);
+
+ if (!add_signature_element(signature, "FOG", 0, VKD3DSP_WRITEMASK_0, register_idx, VKD3DSIM_LINEAR))
+ return VKD3D_ERROR_OUT_OF_MEMORY;
+ return VKD3D_OK;
+}
+
+static enum vkd3d_result insert_vertex_fog_before_ret(struct vsir_program *program,
+ const struct vkd3d_shader_instruction *ret, enum vkd3d_shader_fog_source source, uint32_t temp,
+ uint32_t fog_signature_idx, uint32_t source_signature_idx, size_t *ret_pos)
+{
+ const struct signature_element *e = &program->output_signature.elements[source_signature_idx];
+ struct vkd3d_shader_instruction_array *instructions = &program->instructions;
+ size_t pos = ret - instructions->elements;
+ struct vkd3d_shader_instruction *ins;
+
+ if (!shader_instruction_array_insert_at(&program->instructions, pos, 2))
+ return VKD3D_ERROR_OUT_OF_MEMORY;
+
+ ins = &program->instructions.elements[pos];
+
+ /* Write the fog output. */
+ vsir_instruction_init_with_params(program, ins, &ret->location, VKD3DSIH_MOV, 1, 1);
+ dst_param_init_output(&ins->dst[0], VKD3D_DATA_FLOAT, fog_signature_idx, 0x1);
+ src_param_init_temp_float4(&ins->src[0], temp);
+ if (source == VKD3D_SHADER_FOG_SOURCE_Z)
+ ins->src[0].swizzle = VKD3D_SHADER_SWIZZLE(Z, Z, Z, Z);
+ else /* Position or specular W. */
+ ins->src[0].swizzle = VKD3D_SHADER_SWIZZLE(W, W, W, W);
+ ++ins;
+
+ /* Write the position or specular output. */
+ vsir_instruction_init_with_params(program, ins, &ret->location, VKD3DSIH_MOV, 1, 1);
+ dst_param_init_output(&ins->dst[0], vkd3d_data_type_from_component_type(e->component_type),
+ source_signature_idx, e->mask);
+ src_param_init_temp_float4(&ins->src[0], temp);
+ ++ins;
+
+ *ret_pos = pos + 2;
+ return VKD3D_OK;
+}
+
+static enum vkd3d_result vsir_program_insert_vertex_fog(struct vsir_program *program,
+ struct vsir_transformation_context *ctx)
+{
+ struct vkd3d_shader_message_context *message_context = ctx->message_context;
+ const struct vkd3d_shader_parameter1 *source_parameter = NULL;
+ uint32_t fog_signature_idx, source_signature_idx, temp;
+ static const struct vkd3d_shader_location no_loc;
+ enum vkd3d_shader_fog_source source;
+ const struct signature_element *e;
+
+ if (!is_pre_rasterization_shader(program->shader_version.type))
+ return VKD3D_OK;
+
+ if (!(source_parameter = vsir_program_get_parameter(program, VKD3D_SHADER_PARAMETER_NAME_FOG_SOURCE)))
+ return VKD3D_OK;
+
+ if (source_parameter->type != VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT)
+ {
+ vkd3d_shader_error(message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED,
+ "Unsupported fog source parameter type %#x.", source_parameter->type);
+ return VKD3D_ERROR_NOT_IMPLEMENTED;
+ }
+ if (source_parameter->data_type != VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32)
+ {
+ vkd3d_shader_error(message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE,
+ "Invalid fog source parameter data type %#x.", source_parameter->data_type);
+ return VKD3D_ERROR_INVALID_ARGUMENT;
+ }
+ source = source_parameter->u.immediate_constant.u.u32;
+
+ TRACE("Fog source %#x.\n", source);
+
+ if (source == VKD3D_SHADER_FOG_SOURCE_FOG)
+ return VKD3D_OK;
+
+ if (source == VKD3D_SHADER_FOG_SOURCE_FOG_OR_SPECULAR_W)
+ {
+ if (program->has_fog || !(e = vsir_signature_find_element_by_name(&program->output_signature, "COLOR", 1)))
+ return VKD3D_OK;
+ source_signature_idx = e - program->output_signature.elements;
+ }
+ else
+ {
+ if (!vsir_signature_find_sysval(&program->output_signature,
+ VKD3D_SHADER_SV_POSITION, 0, &source_signature_idx))
+ {
+ vkd3d_shader_error(ctx->message_context, &no_loc,
+ VKD3D_SHADER_ERROR_VSIR_MISSING_SEMANTIC, "Shader does not write position.");
+ return VKD3D_ERROR_INVALID_SHADER;
+ }
+ }
+
+ if (!(e = vsir_signature_find_element_by_name(&program->output_signature, "FOG", 0)))
+ {
+ ERR("Fog output not found.\n");
+ return VKD3D_ERROR_INVALID_SHADER;
+ }
+ fog_signature_idx = e - program->output_signature.elements;
+
+ temp = program->temp_count++;
+
+ /* Insert a fog write before each ret, and convert either specular or
+ * position output to a temp. */
+ for (size_t i = 0; i < program->instructions.count; ++i)
+ {
+ struct vkd3d_shader_instruction *ins = &program->instructions.elements[i];
+
+ if (vsir_instruction_is_dcl(ins))
+ continue;
+
+ if (ins->opcode == VKD3DSIH_RET)
+ {
+ size_t new_pos;
+ int ret;
+
+ if ((ret = insert_vertex_fog_before_ret(program, ins, source, temp,
+ fog_signature_idx, source_signature_idx, &new_pos)) < 0)
+ return ret;
+ i = new_pos;
+ continue;
+ }
+
+ for (size_t j = 0; j < ins->dst_count; ++j)
+ {
+ struct vkd3d_shader_dst_param *dst = &ins->dst[j];
+
+ /* Note we run after I/O normalization. */
+ if (dst->reg.type == VKD3DSPR_OUTPUT && dst->reg.idx[0].offset == source_signature_idx)
+ {
+ dst->reg.type = VKD3DSPR_TEMP;
+ dst->reg.idx[0].offset = temp;
+ }
+ }
+ }
+
+ program->has_fog = true;
+
+ return VKD3D_OK;
+}
+
+struct validation_context
+{
+ struct vkd3d_shader_message_context *message_context;
+ const struct vsir_program *program;
+ size_t instruction_idx;
+ struct vkd3d_shader_location null_location;
+ bool invalid_instruction_idx;
+ enum vkd3d_result status;
+ bool dcl_temps_found;
+ enum vkd3d_shader_opcode phase;
+ bool inside_block;
+
+ struct validation_context_temp_data
+ {
+ enum vsir_dimension dimension;
+ size_t first_seen;
+ } *temps;
+
+ struct validation_context_ssa_data
+ {
+ enum vsir_dimension dimension;
+ enum vkd3d_data_type data_type;
+ size_t first_seen;
+ uint32_t write_mask;
+ uint32_t read_mask;
+ size_t first_assigned;
+ } *ssas;
+
+ enum vkd3d_shader_opcode *blocks;
+ size_t depth;
+ size_t blocks_capacity;
+
+ unsigned int outer_tess_idxs[4];
+ unsigned int inner_tess_idxs[2];
+
+ struct validation_context_signature_data
+ {
+ struct validation_context_signature_stream_data
+ {
+ struct validation_context_signature_register_data
+ {
+ struct validation_context_signature_component_data
+ {
+ const struct signature_element *element;
+ } components[VKD3D_VEC4_SIZE];
+ } registers[MAX_REG_OUTPUT];
+ } streams[VKD3D_MAX_STREAM_COUNT];
+ } input_signature_data, output_signature_data, patch_constant_signature_data;
+};
+
+static void VKD3D_PRINTF_FUNC(3, 4) validator_error(struct validation_context *ctx,
+ enum vkd3d_shader_error error, const char *format, ...)
+{
+ struct vkd3d_string_buffer buf;
+ va_list args;
+
+ vkd3d_string_buffer_init(&buf);
+
+ va_start(args, format);
+ vkd3d_string_buffer_vprintf(&buf, format, args);
+ va_end(args);
+
+ if (ctx->invalid_instruction_idx)
{
vkd3d_shader_error(ctx->message_context, &ctx->null_location, error, "%s", buf.buffer);
WARN("VSIR validation error: %s\n", buf.buffer);
@@ -6707,76 +7254,240 @@ static void vsir_validate_register_without_indices(struct validation_context *ct
reg->idx_count, reg->type);
}
-static void vsir_validate_io_register(struct validation_context *ctx,
- const struct vkd3d_shader_register *reg)
+enum vsir_signature_type
+{
+ SIGNATURE_TYPE_INPUT,
+ SIGNATURE_TYPE_OUTPUT,
+ SIGNATURE_TYPE_PATCH_CONSTANT,
+};
+
+enum vsir_io_reg_type
{
+ REG_V,
+ REG_O,
+ REG_VPC,
+ REG_VICP,
+ REG_VOCP,
+ REG_COUNT,
+};
+
+enum vsir_phase
+{
+ PHASE_NONE,
+ PHASE_CONTROL_POINT,
+ PHASE_FORK,
+ PHASE_JOIN,
+ PHASE_COUNT,
+};
+
+struct vsir_io_register_data
+{
+ unsigned int flags;
+ enum vsir_signature_type signature_type;
const struct shader_signature *signature;
- bool has_control_point = false;
+ unsigned int control_point_count;
+};
- switch (reg->type)
+enum
+{
+ INPUT_BIT = (1u << 0),
+ OUTPUT_BIT = (1u << 1),
+ CONTROL_POINT_BIT = (1u << 2),
+};
+
+static const struct vsir_io_register_data vsir_sm4_io_register_data
+ [VKD3D_SHADER_TYPE_GRAPHICS_COUNT][PHASE_COUNT][REG_COUNT] =
+{
+ [VKD3D_SHADER_TYPE_PIXEL][PHASE_NONE] =
+ {
+ [REG_V] = {INPUT_BIT, SIGNATURE_TYPE_INPUT},
+ [REG_O] = {OUTPUT_BIT, SIGNATURE_TYPE_OUTPUT},
+ },
+ [VKD3D_SHADER_TYPE_VERTEX][PHASE_NONE] =
+ {
+ [REG_V] = {INPUT_BIT, SIGNATURE_TYPE_INPUT},
+ [REG_O] = {OUTPUT_BIT, SIGNATURE_TYPE_OUTPUT},
+ },
+ [VKD3D_SHADER_TYPE_GEOMETRY][PHASE_NONE] =
+ {
+ [REG_V] = {INPUT_BIT | CONTROL_POINT_BIT, SIGNATURE_TYPE_INPUT},
+ [REG_O] = {OUTPUT_BIT, SIGNATURE_TYPE_OUTPUT},
+ },
+ [VKD3D_SHADER_TYPE_HULL][PHASE_CONTROL_POINT] =
+ {
+ [REG_V] = {INPUT_BIT | CONTROL_POINT_BIT, SIGNATURE_TYPE_INPUT},
+ [REG_O] = {OUTPUT_BIT, SIGNATURE_TYPE_OUTPUT},
+ },
+ [VKD3D_SHADER_TYPE_HULL][PHASE_FORK] =
+ {
+ [REG_VICP] = {INPUT_BIT | CONTROL_POINT_BIT, SIGNATURE_TYPE_INPUT},
+ [REG_VOCP] = {INPUT_BIT | CONTROL_POINT_BIT, SIGNATURE_TYPE_OUTPUT},
+ /* According to MSDN, vpc is not allowed in fork phases. However we
+ * don't really distinguish between fork and join phases, so we
+ * allow it. */
+ [REG_VPC] = {INPUT_BIT, SIGNATURE_TYPE_PATCH_CONSTANT},
+ [REG_O] = {OUTPUT_BIT, SIGNATURE_TYPE_PATCH_CONSTANT},
+ },
+ [VKD3D_SHADER_TYPE_HULL][PHASE_JOIN] =
+ {
+ [REG_VICP] = {INPUT_BIT | CONTROL_POINT_BIT, SIGNATURE_TYPE_INPUT},
+ [REG_VOCP] = {INPUT_BIT | CONTROL_POINT_BIT, SIGNATURE_TYPE_OUTPUT},
+ [REG_VPC] = {INPUT_BIT, SIGNATURE_TYPE_PATCH_CONSTANT},
+ [REG_O] = {OUTPUT_BIT, SIGNATURE_TYPE_PATCH_CONSTANT},
+ },
+ [VKD3D_SHADER_TYPE_DOMAIN][PHASE_NONE] =
+ {
+ [REG_VICP] = {INPUT_BIT | CONTROL_POINT_BIT, SIGNATURE_TYPE_INPUT},
+ [REG_VPC] = {INPUT_BIT, SIGNATURE_TYPE_PATCH_CONSTANT},
+ [REG_O] = {OUTPUT_BIT, SIGNATURE_TYPE_OUTPUT},
+ },
+};
+
+static const struct vsir_io_register_data vsir_sm6_io_register_data
+ [VKD3D_SHADER_TYPE_GRAPHICS_COUNT][PHASE_COUNT][REG_COUNT] =
+{
+ [VKD3D_SHADER_TYPE_PIXEL][PHASE_NONE] =
+ {
+ [REG_V] = {INPUT_BIT, SIGNATURE_TYPE_INPUT},
+ [REG_O] = {OUTPUT_BIT, SIGNATURE_TYPE_OUTPUT},
+ },
+ [VKD3D_SHADER_TYPE_VERTEX][PHASE_NONE] =
+ {
+ [REG_V] = {INPUT_BIT, SIGNATURE_TYPE_INPUT},
+ [REG_O] = {OUTPUT_BIT, SIGNATURE_TYPE_OUTPUT},
+ },
+ [VKD3D_SHADER_TYPE_GEOMETRY][PHASE_NONE] =
+ {
+ [REG_V] = {INPUT_BIT | CONTROL_POINT_BIT, SIGNATURE_TYPE_INPUT},
+ [REG_O] = {OUTPUT_BIT, SIGNATURE_TYPE_OUTPUT},
+ },
+ [VKD3D_SHADER_TYPE_HULL][PHASE_CONTROL_POINT] =
+ {
+ [REG_V] = {INPUT_BIT | CONTROL_POINT_BIT, SIGNATURE_TYPE_INPUT},
+ [REG_O] = {OUTPUT_BIT | CONTROL_POINT_BIT, SIGNATURE_TYPE_OUTPUT},
+ },
+ [VKD3D_SHADER_TYPE_HULL][PHASE_FORK] =
+ {
+ [REG_V] = {INPUT_BIT | CONTROL_POINT_BIT, SIGNATURE_TYPE_INPUT},
+ [REG_O] = {INPUT_BIT | CONTROL_POINT_BIT, SIGNATURE_TYPE_OUTPUT},
+ [REG_VPC] = {INPUT_BIT | OUTPUT_BIT, SIGNATURE_TYPE_PATCH_CONSTANT},
+ },
+ [VKD3D_SHADER_TYPE_HULL][PHASE_JOIN] =
+ {
+ [REG_V] = {INPUT_BIT | CONTROL_POINT_BIT, SIGNATURE_TYPE_INPUT},
+ [REG_O] = {INPUT_BIT | CONTROL_POINT_BIT, SIGNATURE_TYPE_OUTPUT},
+ [REG_VPC] = {INPUT_BIT | OUTPUT_BIT, SIGNATURE_TYPE_PATCH_CONSTANT},
+ },
+ [VKD3D_SHADER_TYPE_DOMAIN][PHASE_NONE] =
+ {
+ [REG_V] = {INPUT_BIT | CONTROL_POINT_BIT, SIGNATURE_TYPE_INPUT},
+ [REG_VPC] = {INPUT_BIT, SIGNATURE_TYPE_PATCH_CONSTANT},
+ [REG_O] = {OUTPUT_BIT, SIGNATURE_TYPE_OUTPUT},
+ },
+};
+
+static const bool vsir_get_io_register_data(struct validation_context *ctx,
+ enum vkd3d_shader_register_type register_type, struct vsir_io_register_data *data)
+{
+ const struct vsir_io_register_data (*signature_register_data)
+ [VKD3D_SHADER_TYPE_GRAPHICS_COUNT][PHASE_COUNT][REG_COUNT];
+ enum vsir_io_reg_type io_reg_type;
+ enum vsir_phase phase;
+
+ if (ctx->program->shader_version.type >= ARRAY_SIZE(*signature_register_data))
+ return NULL;
+
+ if (ctx->program->normalisation_level >= VSIR_NORMALISED_SM6)
+ signature_register_data = &vsir_sm6_io_register_data;
+ else
+ signature_register_data = &vsir_sm4_io_register_data;
+
+ switch (register_type)
{
- case VKD3DSPR_INPUT:
- signature = &ctx->program->input_signature;
+ case VKD3DSPR_INPUT: io_reg_type = REG_V; break;
+ case VKD3DSPR_OUTPUT: io_reg_type = REG_O; break;
+ case VKD3DSPR_INCONTROLPOINT: io_reg_type = REG_VICP; break;
+ case VKD3DSPR_OUTCONTROLPOINT: io_reg_type = REG_VOCP; break;
+ case VKD3DSPR_PATCHCONST: io_reg_type = REG_VPC; break;
- switch (ctx->program->shader_version.type)
- {
- case VKD3D_SHADER_TYPE_GEOMETRY:
- case VKD3D_SHADER_TYPE_HULL:
- case VKD3D_SHADER_TYPE_DOMAIN:
- has_control_point = true;
- break;
+ default:
+ return NULL;
+ }
- default:
- break;
- }
- break;
+ switch (ctx->phase)
+ {
+ case VKD3DSIH_HS_CONTROL_POINT_PHASE: phase = PHASE_CONTROL_POINT; break;
+ case VKD3DSIH_HS_FORK_PHASE: phase = PHASE_FORK; break;
+ case VKD3DSIH_HS_JOIN_PHASE: phase = PHASE_JOIN; break;
+ case VKD3DSIH_INVALID: phase = PHASE_NONE; break;
- case VKD3DSPR_OUTPUT:
- switch (ctx->program->shader_version.type)
- {
- case VKD3D_SHADER_TYPE_HULL:
- if (ctx->phase == VKD3DSIH_HS_CONTROL_POINT_PHASE
- || ctx->program->normalisation_level >= VSIR_FULLY_NORMALISED_IO)
- {
- signature = &ctx->program->output_signature;
- has_control_point = ctx->program->normalisation_level >= VSIR_NORMALISED_HULL_CONTROL_POINT_IO;
- }
- else
- {
- signature = &ctx->program->patch_constant_signature;
- }
- break;
+ default:
+ vkd3d_unreachable();
+ }
- default:
- signature = &ctx->program->output_signature;
- break;
- }
- break;
+ *data = (*signature_register_data)[ctx->program->shader_version.type][phase][io_reg_type];
- case VKD3DSPR_INCONTROLPOINT:
- signature = &ctx->program->input_signature;
- has_control_point = true;
- break;
+ if (!(data->flags & (INPUT_BIT | OUTPUT_BIT)))
+ return false;
- case VKD3DSPR_OUTCONTROLPOINT:
- signature = &ctx->program->output_signature;
- has_control_point = true;
- break;
+ /* VSIR_NORMALISED_HULL_CONTROL_POINT_IO differs from VSIR_NORMALISED_SM4
+ * for just a single flag. So we don't keep a whole copy of it, but just
+ * patch SM4 when needed. */
+ if (ctx->program->normalisation_level == VSIR_NORMALISED_HULL_CONTROL_POINT_IO
+ && ctx->program->shader_version.type == VKD3D_SHADER_TYPE_HULL
+ && phase == PHASE_CONTROL_POINT && io_reg_type == REG_O)
+ {
+ VKD3D_ASSERT(!(data->flags & CONTROL_POINT_BIT));
+ data->flags |= CONTROL_POINT_BIT;
+ }
- case VKD3DSPR_PATCHCONST:
- signature = &ctx->program->patch_constant_signature;
- break;
+ switch (data->signature_type)
+ {
+ case SIGNATURE_TYPE_INPUT:
+ data->signature = &ctx->program->input_signature;
+ data->control_point_count = ctx->program->input_control_point_count;
+ return true;
+
+ case SIGNATURE_TYPE_OUTPUT:
+ data->signature = &ctx->program->output_signature;
+ data->control_point_count = ctx->program->output_control_point_count;
+ return true;
+
+ case SIGNATURE_TYPE_PATCH_CONSTANT:
+ data->signature = &ctx->program->patch_constant_signature;
+ return true;
default:
vkd3d_unreachable();
}
+}
- if (ctx->program->normalisation_level < VSIR_FULLY_NORMALISED_IO)
+static void vsir_validate_io_register(struct validation_context *ctx, const struct vkd3d_shader_register *reg)
+{
+ unsigned int control_point_index, control_point_count;
+ const struct shader_signature *signature;
+ struct vsir_io_register_data io_reg_data;
+ bool has_control_point;
+
+ if (!vsir_get_io_register_data(ctx, reg->type, &io_reg_data))
+ {
+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE,
+ "Invalid usage of register type %#x.", reg->type);
+ return;
+ }
+
+ signature = io_reg_data.signature;
+ has_control_point = io_reg_data.flags & CONTROL_POINT_BIT;
+ control_point_count = io_reg_data.control_point_count;
+
+ if (ctx->program->normalisation_level < VSIR_NORMALISED_SM6)
{
/* Indices are [register] or [control point, register]. Both are
* allowed to have a relative address. */
unsigned int expected_idx_count = 1 + !!has_control_point;
+ control_point_index = 0;
+
if (reg->idx_count != expected_idx_count)
{
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT,
@@ -6795,7 +7506,7 @@ static void vsir_validate_io_register(struct validation_context *ctx,
/* If the signature element is not an array, indices are
* [signature] or [control point, signature]. If the signature
* element is an array, indices are [array, signature] or
- * [control point, array, signature]. In any case `signature' is
+ * [array, control point, signature]. In any case `signature' is
* not allowed to have a relative address, while the others are.
*/
if (reg->idx_count < 1)
@@ -6829,6 +7540,7 @@ static void vsir_validate_io_register(struct validation_context *ctx,
is_array = true;
expected_idx_count = 1 + !!has_control_point + !!is_array;
+ control_point_index = !!is_array;
if (reg->idx_count != expected_idx_count)
{
@@ -6837,7 +7549,18 @@ static void vsir_validate_io_register(struct validation_context *ctx,
reg->idx_count, reg->type);
return;
}
+
+ if (is_array && !reg->idx[0].rel_addr && reg->idx[0].offset >= element->register_count)
+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX,
+ "Array index %u exceeds the signature element register count %u in a register of type %#x.",
+ reg->idx[0].offset, element->register_count, reg->type);
}
+
+ if (has_control_point && !reg->idx[control_point_index].rel_addr
+ && reg->idx[control_point_index].offset >= control_point_count)
+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX,
+ "Control point index %u exceeds the control point count %u in a register of type %#x.",
+ reg->idx[control_point_index].offset, control_point_count, reg->type);
}
static void vsir_validate_temp_register(struct validation_context *ctx,
@@ -7143,8 +7866,26 @@ static void vsir_validate_register(struct validation_context *ctx,
for (i = 0; i < min(reg->idx_count, ARRAY_SIZE(reg->idx)); ++i)
{
const struct vkd3d_shader_src_param *param = reg->idx[i].rel_addr;
- if (reg->idx[i].rel_addr)
+ if (param)
+ {
vsir_validate_src_param(ctx, param);
+
+ switch (param->reg.type)
+ {
+ case VKD3DSPR_TEMP:
+ case VKD3DSPR_SSA:
+ case VKD3DSPR_ADDR:
+ case VKD3DSPR_LOOP:
+ case VKD3DSPR_OUTPOINTID:
+ break;
+
+ default:
+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE,
+ "Invalid register type %#x for a relative address parameter.",
+ param->reg.type);
+ break;
+ }
+ }
}
switch (reg->type)
@@ -7185,6 +7926,10 @@ static void vsir_validate_register(struct validation_context *ctx,
vsir_validate_register_without_indices(ctx, reg);
break;
+ case VKD3DSPR_PRIMID:
+ vsir_validate_register_without_indices(ctx, reg);
+ break;
+
case VKD3DSPR_NULL:
vsir_validate_register_without_indices(ctx, reg);
break;
@@ -7201,6 +7946,18 @@ static void vsir_validate_register(struct validation_context *ctx,
vsir_validate_uav_register(ctx, reg);
break;
+ case VKD3DSPR_OUTPOINTID:
+ vsir_validate_register_without_indices(ctx, reg);
+ break;
+
+ case VKD3DSPR_FORKINSTID:
+ vsir_validate_register_without_indices(ctx, reg);
+ break;
+
+ case VKD3DSPR_JOININSTID:
+ vsir_validate_register_without_indices(ctx, reg);
+ break;
+
case VKD3DSPR_INCONTROLPOINT:
vsir_validate_io_register(ctx, reg);
break;
@@ -7213,6 +7970,38 @@ static void vsir_validate_register(struct validation_context *ctx,
vsir_validate_io_register(ctx, reg);
break;
+ case VKD3DSPR_TESSCOORD:
+ vsir_validate_register_without_indices(ctx, reg);
+ break;
+
+ case VKD3DSPR_THREADID:
+ vsir_validate_register_without_indices(ctx, reg);
+ break;
+
+ case VKD3DSPR_THREADGROUPID:
+ vsir_validate_register_without_indices(ctx, reg);
+ break;
+
+ case VKD3DSPR_LOCALTHREADID:
+ vsir_validate_register_without_indices(ctx, reg);
+ break;
+
+ case VKD3DSPR_LOCALTHREADINDEX:
+ vsir_validate_register_without_indices(ctx, reg);
+ break;
+
+ case VKD3DSPR_COVERAGE:
+ vsir_validate_register_without_indices(ctx, reg);
+ break;
+
+ case VKD3DSPR_SAMPLEMASK:
+ vsir_validate_register_without_indices(ctx, reg);
+ break;
+
+ case VKD3DSPR_GSINSTID:
+ vsir_validate_register_without_indices(ctx, reg);
+ break;
+
case VKD3DSPR_DEPTHOUTGE:
vsir_validate_register_without_indices(ctx, reg);
break;
@@ -7221,15 +8010,37 @@ static void vsir_validate_register(struct validation_context *ctx,
vsir_validate_register_without_indices(ctx, reg);
break;
+ case VKD3DSPR_OUTSTENCILREF:
+ vsir_validate_register_without_indices(ctx, reg);
+ break;
+
case VKD3DSPR_SSA:
vsir_validate_ssa_register(ctx, reg);
break;
+ case VKD3DSPR_WAVELANECOUNT:
+ vsir_validate_register_without_indices(ctx, reg);
+ break;
+
+ case VKD3DSPR_WAVELANEINDEX:
+ vsir_validate_register_without_indices(ctx, reg);
+ break;
+
default:
break;
}
}
+static void vsir_validate_io_dst_param(struct validation_context *ctx,
+ const struct vkd3d_shader_dst_param *dst)
+{
+ struct vsir_io_register_data io_reg_data;
+
+ if (!vsir_get_io_register_data(ctx, dst->reg.type, &io_reg_data) || !(io_reg_data.flags & OUTPUT_BIT))
+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE,
+ "Invalid register type %#x used as destination parameter.", dst->reg.type);
+}
+
static void vsir_validate_dst_param(struct validation_context *ctx,
const struct vkd3d_shader_dst_param *dst)
{
@@ -7304,15 +8115,28 @@ static void vsir_validate_dst_param(struct validation_context *ctx,
case VKD3DSPR_IMMCONST64:
case VKD3DSPR_SAMPLER:
case VKD3DSPR_RESOURCE:
- case VKD3DSPR_INPUT:
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE,
"Invalid %#x register used as destination parameter.", dst->reg.type);
break;
+ case VKD3DSPR_INPUT:
+ vsir_validate_io_dst_param(ctx, dst);
+ break;
+
+ case VKD3DSPR_OUTPUT:
+ vsir_validate_io_dst_param(ctx, dst);
+ break;
+
+ case VKD3DSPR_INCONTROLPOINT:
+ vsir_validate_io_dst_param(ctx, dst);
+ break;
+
+ case VKD3DSPR_OUTCONTROLPOINT:
+ vsir_validate_io_dst_param(ctx, dst);
+ break;
+
case VKD3DSPR_PATCHCONST:
- if (ctx->program->shader_version.type != VKD3D_SHADER_TYPE_HULL)
- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE,
- "PATCHCONST register used as destination parameters are only allowed in Hull Shaders.");
+ vsir_validate_io_dst_param(ctx, dst);
break;
default:
@@ -7320,6 +8144,16 @@ static void vsir_validate_dst_param(struct validation_context *ctx,
}
}
+static void vsir_validate_io_src_param(struct validation_context *ctx,
+ const struct vkd3d_shader_src_param *src)
+{
+ struct vsir_io_register_data io_reg_data;
+
+ if (!vsir_get_io_register_data(ctx, src->reg.type, &io_reg_data) || !(io_reg_data.flags & INPUT_BIT))
+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE,
+ "Invalid register type %#x used as source parameter.", src->reg.type);
+}
+
static void vsir_validate_src_param(struct validation_context *ctx,
const struct vkd3d_shader_src_param *src)
{
@@ -7355,18 +8189,24 @@ static void vsir_validate_src_param(struct validation_context *ctx,
"Invalid NULL register used as source parameter.");
break;
+ case VKD3DSPR_INPUT:
+ vsir_validate_io_src_param(ctx, src);
+ break;
+
case VKD3DSPR_OUTPUT:
- if (ctx->program->shader_version.type != VKD3D_SHADER_TYPE_HULL
- || (ctx->phase != VKD3DSIH_HS_FORK_PHASE && ctx->phase != VKD3DSIH_HS_JOIN_PHASE))
- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE,
- "Invalid OUTPUT register used as source parameter.");
+ vsir_validate_io_src_param(ctx, src);
+ break;
+
+ case VKD3DSPR_INCONTROLPOINT:
+ vsir_validate_io_src_param(ctx, src);
+ break;
+
+ case VKD3DSPR_OUTCONTROLPOINT:
+ vsir_validate_io_src_param(ctx, src);
break;
case VKD3DSPR_PATCHCONST:
- if (ctx->program->shader_version.type != VKD3D_SHADER_TYPE_DOMAIN
- && ctx->program->shader_version.type != VKD3D_SHADER_TYPE_HULL)
- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE,
- "PATCHCONST register used as source parameters are only allowed in Hull and Domain Shaders.");
+ vsir_validate_io_src_param(ctx, src);
break;
default:
@@ -7420,13 +8260,6 @@ static bool vsir_validate_src_max_count(struct validation_context *ctx,
return true;
}
-enum vsir_signature_type
-{
- SIGNATURE_TYPE_INPUT,
- SIGNATURE_TYPE_OUTPUT,
- SIGNATURE_TYPE_PATCH_CONSTANT,
-};
-
static const char * const signature_type_names[] =
{
[SIGNATURE_TYPE_INPUT] = "input",
@@ -7466,17 +8299,37 @@ sysval_validation_data[] =
};
static void vsir_validate_signature_element(struct validation_context *ctx,
- const struct shader_signature *signature, enum vsir_signature_type signature_type,
- unsigned int idx)
+ const struct shader_signature *signature, struct validation_context_signature_data *signature_data,
+ enum vsir_signature_type signature_type, unsigned int idx)
{
+ enum vkd3d_tessellator_domain expected_tess_domain = VKD3D_TESSELLATOR_DOMAIN_INVALID;
+ bool integer_type = false, is_outer = false, is_gs_output, require_index = true;
const char *signature_type_name = signature_type_names[signature_type];
const struct signature_element *element = &signature->elements[idx];
- bool integer_type = false;
+ unsigned int semantic_index_max = 0, i, j;
if (element->register_count == 0)
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE,
"element %u of %s signature: Invalid zero register count.", idx, signature_type_name);
+ if (ctx->program->normalisation_level < VSIR_NORMALISED_SM6 && element->register_count != 1)
+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE,
+ "element %u of %s signature: Invalid register count %u.", idx, signature_type_name,
+ element->register_count);
+
+ if (element->register_index != UINT_MAX && (element->register_index >= MAX_REG_OUTPUT
+ || MAX_REG_OUTPUT - element->register_index < element->register_count))
+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE,
+ "element %u of %s signature: Invalid register index %u and count %u.",
+ idx, signature_type_name, element->register_index, element->register_count);
+
+ is_gs_output = ctx->program->shader_version.type == VKD3D_SHADER_TYPE_GEOMETRY
+ && signature_type == SIGNATURE_TYPE_OUTPUT;
+ if (element->stream_index >= VKD3D_MAX_STREAM_COUNT || (element->stream_index != 0 && !is_gs_output))
+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE,
+ "element %u of %s signature: Invalid stream index %u.",
+ idx, signature_type_name, element->stream_index);
+
if (element->mask == 0 || (element->mask & ~0xf))
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE,
"element %u of %s signature: Invalid mask %#x.", idx, signature_type_name, element->mask);
@@ -7486,33 +8339,27 @@ static void vsir_validate_signature_element(struct validation_context *ctx,
"element %u of %s signature: Non-contiguous mask %#x.",
idx, signature_type_name, element->mask);
- /* Here we'd likely want to validate that the usage mask is a subset of the
- * signature mask. Unfortunately the D3DBC parser sometimes violates this.
- * For example I've seen a shader like this:
- * ps_3_0
- * [...]
- * dcl_texcoord0 v0
- * [...]
- * texld r2.xyzw, v0.xyzw, s1.xyzw
- * [...]
- *
- * The dcl_textcoord0 instruction secretly has a .xy mask, which is used to
- * compute the signature mask, but the texld instruction apparently uses all
- * the components. Of course the last two components are ignored, but
- * formally they seem to be used. So we end up with a signature element with
- * mask .xy and usage mask .xyzw.
- *
- * The correct fix would probably be to make the D3DBC parser aware of which
- * components are really used for each instruction, but that would take some
- * time. */
- if (element->used_mask & ~0xf)
- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE,
- "element %u of %s signature: Invalid usage mask %#x.",
- idx, signature_type_name, element->used_mask);
+ if (ctx->program->normalisation_level >= VSIR_NORMALISED_SM4)
+ {
+ if ((element->used_mask & element->mask) != element->used_mask)
+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE,
+ "element %u of %s signature: Invalid usage mask %#x with mask %#x.",
+ idx, signature_type_name, element->used_mask, element->mask);
+ }
+ else
+ {
+ if (element->used_mask & ~0xf)
+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE,
+ "element %u of %s signature: Invalid usage mask %#x.",
+ idx, signature_type_name, element->used_mask);
+ }
switch (element->sysval_semantic)
{
case VKD3D_SHADER_SV_NONE:
+ case VKD3D_SHADER_SV_TARGET:
+ break;
+
case VKD3D_SHADER_SV_POSITION:
case VKD3D_SHADER_SV_CLIP_DISTANCE:
case VKD3D_SHADER_SV_CULL_DISTANCE:
@@ -7523,18 +8370,43 @@ static void vsir_validate_signature_element(struct validation_context *ctx,
case VKD3D_SHADER_SV_INSTANCE_ID:
case VKD3D_SHADER_SV_IS_FRONT_FACE:
case VKD3D_SHADER_SV_SAMPLE_INDEX:
+ case VKD3D_SHADER_SV_DEPTH:
+ case VKD3D_SHADER_SV_COVERAGE:
+ case VKD3D_SHADER_SV_DEPTH_GREATER_EQUAL:
+ case VKD3D_SHADER_SV_DEPTH_LESS_EQUAL:
+ case VKD3D_SHADER_SV_STENCIL_REF:
+ require_index = false;
+ break;
+
case VKD3D_SHADER_SV_TESS_FACTOR_QUADEDGE:
+ expected_tess_domain = VKD3D_TESSELLATOR_DOMAIN_QUAD;
+ semantic_index_max = 4;
+ is_outer = true;
+ break;
+
case VKD3D_SHADER_SV_TESS_FACTOR_QUADINT:
+ expected_tess_domain = VKD3D_TESSELLATOR_DOMAIN_QUAD;
+ semantic_index_max = 2;
+ is_outer = false;
+ break;
+
case VKD3D_SHADER_SV_TESS_FACTOR_TRIEDGE:
+ expected_tess_domain = VKD3D_TESSELLATOR_DOMAIN_TRIANGLE;
+ semantic_index_max = 3;
+ is_outer = true;
+ break;
+
case VKD3D_SHADER_SV_TESS_FACTOR_TRIINT:
+ expected_tess_domain = VKD3D_TESSELLATOR_DOMAIN_TRIANGLE;
+ semantic_index_max = 1;
+ is_outer = false;
+ break;
+
case VKD3D_SHADER_SV_TESS_FACTOR_LINEDET:
case VKD3D_SHADER_SV_TESS_FACTOR_LINEDEN:
- case VKD3D_SHADER_SV_TARGET:
- case VKD3D_SHADER_SV_DEPTH:
- case VKD3D_SHADER_SV_COVERAGE:
- case VKD3D_SHADER_SV_DEPTH_GREATER_EQUAL:
- case VKD3D_SHADER_SV_DEPTH_LESS_EQUAL:
- case VKD3D_SHADER_SV_STENCIL_REF:
+ expected_tess_domain = VKD3D_TESSELLATOR_DOMAIN_LINE;
+ semantic_index_max = 2;
+ is_outer = true;
break;
default:
@@ -7544,6 +8416,43 @@ static void vsir_validate_signature_element(struct validation_context *ctx,
break;
}
+ if (require_index && element->register_index == UINT_MAX)
+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE,
+ "element %u of %s signature: System value semantic %#x requires a register index.",
+ idx, signature_type_name, element->sysval_semantic);
+
+ if (expected_tess_domain != VKD3D_TESSELLATOR_DOMAIN_INVALID)
+ {
+ if (signature_type != SIGNATURE_TYPE_PATCH_CONSTANT)
+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE,
+ "element %u of %s signature: System value semantic %#x is only valid "
+ "in the patch constant signature.",
+ idx, signature_type_name, element->sysval_semantic);
+
+ if (ctx->program->tess_domain != expected_tess_domain)
+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE,
+ "element %u of %s signature: Invalid system value semantic %#x for tessellator domain %#x.",
+ idx, signature_type_name, element->sysval_semantic, ctx->program->tess_domain);
+
+ if (element->semantic_index >= semantic_index_max)
+ {
+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE,
+ "element %u of %s signature: Invalid semantic index %u for system value semantic %#x.",
+ idx, signature_type_name, element->semantic_index, element->sysval_semantic);
+ }
+ else
+ {
+ unsigned int *idx_pos = &(is_outer ? ctx->outer_tess_idxs : ctx->inner_tess_idxs)[element->semantic_index];
+
+ if (*idx_pos != ~0u)
+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE,
+ "element %u of %s signature: Duplicate semantic index %u for system value semantic %#x.",
+ idx, signature_type_name, element->semantic_index, element->sysval_semantic);
+ else
+ *idx_pos = idx;
+ }
+ }
+
if (element->sysval_semantic < ARRAY_SIZE(sysval_validation_data))
{
const struct sysval_validation_data_element *data = &sysval_validation_data[element->sysval_semantic];
@@ -7622,6 +8531,31 @@ static void vsir_validate_signature_element(struct validation_context *ctx,
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE,
"element %u of %s signature: Invalid interpolation mode %#x for integer component type.",
idx, signature_type_name, element->interpolation_mode);
+
+ if (element->stream_index >= VKD3D_MAX_STREAM_COUNT || !require_index)
+ return;
+
+ for (i = element->register_index; i < MAX_REG_OUTPUT
+ && i - element->register_index < element->register_count; ++i)
+ {
+ struct validation_context_signature_stream_data *stream_data = &signature_data->streams[element->stream_index];
+ struct validation_context_signature_register_data *register_data = &stream_data->registers[i];
+
+ for (j = 0; j < VKD3D_VEC4_SIZE; ++j)
+ {
+ struct validation_context_signature_component_data *component_data = &register_data->components[j];
+
+ if (!(element->mask & (1u << j)))
+ continue;
+
+ if (!component_data->element)
+ component_data->element = element;
+ else
+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE,
+ "element %u of %s signature: Conflict with element %zu.",
+ idx, signature_type_name, component_data->element - signature->elements);
+ }
+ }
}
static const unsigned int allowed_signature_phases[] =
@@ -7631,8 +8565,8 @@ static const unsigned int allowed_signature_phases[] =
[SIGNATURE_TYPE_PATCH_CONSTANT] = HS_BIT | DS_BIT,
};
-static void vsir_validate_signature(struct validation_context *ctx,
- const struct shader_signature *signature, enum vsir_signature_type signature_type)
+static void vsir_validate_signature(struct validation_context *ctx, const struct shader_signature *signature,
+ struct validation_context_signature_data *signature_data, enum vsir_signature_type signature_type)
{
unsigned int i;
@@ -7642,7 +8576,110 @@ static void vsir_validate_signature(struct validation_context *ctx,
"Unexpected %s signature.", signature_type_names[signature_type]);
for (i = 0; i < signature->element_count; ++i)
- vsir_validate_signature_element(ctx, signature, signature_type, i);
+ vsir_validate_signature_element(ctx, signature, signature_data, signature_type, i);
+
+ if (signature_type == SIGNATURE_TYPE_PATCH_CONSTANT)
+ {
+ const struct signature_element *first_element, *element;
+ unsigned int expected_outer_count = 0;
+ unsigned int expected_inner_count = 0;
+
+ switch (ctx->program->tess_domain)
+ {
+ case VKD3D_TESSELLATOR_DOMAIN_QUAD:
+ expected_outer_count = 4;
+ expected_inner_count = 2;
+ break;
+
+ case VKD3D_TESSELLATOR_DOMAIN_TRIANGLE:
+ expected_outer_count = 3;
+ expected_inner_count = 1;
+ break;
+
+ case VKD3D_TESSELLATOR_DOMAIN_LINE:
+ expected_outer_count = 2;
+ expected_inner_count = 0;
+ break;
+
+ default:
+ break;
+ }
+
+ /* After I/O normalisation tessellation factors are merged in a single array. */
+ if (ctx->program->normalisation_level >= VSIR_NORMALISED_SM6)
+ {
+ expected_outer_count = min(1, expected_outer_count);
+ expected_inner_count = min(1, expected_inner_count);
+ }
+
+ first_element = NULL;
+ for (i = 0; i < expected_outer_count; ++i)
+ {
+ if (ctx->outer_tess_idxs[i] == ~0u)
+ {
+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE,
+ "Missing outer system value semantic %u.", i);
+ }
+ else
+ {
+ element = &signature->elements[ctx->outer_tess_idxs[i]];
+
+ if (!first_element)
+ {
+ first_element = element;
+ continue;
+ }
+
+ if (element->register_index != first_element->register_index + i)
+ {
+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX,
+ "Invalid register index %u for outer system value semantic %u, expected %u.",
+ element->register_index, i, first_element->register_index + i);
+ }
+
+ if (element->mask != first_element->mask)
+ {
+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_WRITE_MASK,
+ "Invalid mask %#x for outer system value semantic %u, expected %#x.",
+ element->mask, i, first_element->mask);
+ }
+ }
+ }
+
+ first_element = NULL;
+ for (i = 0; i < expected_inner_count; ++i)
+ {
+ if (ctx->inner_tess_idxs[i] == ~0u)
+ {
+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE,
+ "Missing inner system value semantic %u.", i);
+ }
+ else
+ {
+ element = &signature->elements[ctx->inner_tess_idxs[i]];
+
+ if (!first_element)
+ {
+ first_element = element;
+ continue;
+ }
+
+ if (element->register_index != first_element->register_index + i)
+ {
+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX,
+ "Invalid register index %u for inner system value semantic %u, expected %u.",
+ element->register_index, i, first_element->register_index + i);
+ }
+
+ if (element->mask != first_element->mask)
+ {
+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_WRITE_MASK,
+ "Invalid mask %#x for inner system value semantic %u, expected %#x.",
+ element->mask, i, first_element->mask);
+ }
+ }
+ }
+ }
}
static const char *name_from_cf_type(enum vsir_control_flow_type type)
@@ -7754,6 +8791,206 @@ static void vsir_validate_dcl_hs_max_tessfactor(struct validation_context *ctx,
instruction->declaration.max_tessellation_factor);
}
+static void vsir_validate_dcl_index_range(struct validation_context *ctx,
+ const struct vkd3d_shader_instruction *instruction)
+{
+ unsigned int i, j, base_register_idx, effective_write_mask = 0, control_point_count, first_component = UINT_MAX;
+ const struct vkd3d_shader_index_range *range = &instruction->declaration.index_range;
+ enum vkd3d_shader_sysval_semantic sysval = ~0u;
+ const struct shader_signature *signature;
+ struct vsir_io_register_data io_reg_data;
+ bool has_control_point;
+
+ if (ctx->program->normalisation_level >= VSIR_NORMALISED_SM6)
+ {
+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_HANDLER,
+ "DCL_INDEX_RANGE is not allowed with fully normalised input/output.");
+ return;
+ }
+
+ if (range->dst.modifiers != VKD3DSPDM_NONE)
+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_MODIFIERS,
+ "Invalid modifier %#x on a DCL_INDEX_RANGE destination parameter.", range->dst.modifiers);
+
+ if (range->dst.shift != 0)
+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SHIFT,
+ "Invalid shift %u on a DCL_INDEX_RANGE destination parameter.", range->dst.shift);
+
+ if (!vsir_get_io_register_data(ctx, range->dst.reg.type, &io_reg_data))
+ {
+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE,
+ "Invalid register type %#x in DCL_INDEX_RANGE instruction.",
+ range->dst.reg.type);
+ return;
+ }
+
+ signature = io_reg_data.signature;
+ has_control_point = io_reg_data.flags & CONTROL_POINT_BIT;
+ control_point_count = io_reg_data.control_point_count;
+
+ if (range->dst.reg.idx_count != 1 + !!has_control_point)
+ {
+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT,
+ "Invalid index count %u in DCL_INDEX_RANGE instruction.",
+ range->dst.reg.idx_count);
+ return;
+ }
+
+ if (range->dst.reg.idx[0].rel_addr || (has_control_point && range->dst.reg.idx[1].rel_addr))
+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX,
+ "Invalid relative address in DCL_INDEX_RANGE instruction.");
+
+ if (has_control_point)
+ {
+ if (range->dst.reg.idx[0].offset != control_point_count)
+ {
+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX,
+ "Invalid control point index %u in DCL_INDEX_RANGE instruction, expected %u.",
+ range->dst.reg.idx[0].offset, control_point_count);
+ }
+
+ base_register_idx = range->dst.reg.idx[1].offset;
+ }
+ else
+ {
+ base_register_idx = range->dst.reg.idx[0].offset;
+ }
+
+ if (range->register_count < 2)
+ {
+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_RANGE,
+ "Invalid register count %u in DCL_INDEX_RANGE instruction, expected at least 2.",
+ range->register_count);
+ return;
+ }
+
+ /* Check that for each register in the range the write mask intersects at
+ * most one (and possibly zero) signature elements. Keep track of the union
+ * of all signature element masks. */
+ for (i = 0; i < range->register_count; ++i)
+ {
+ bool found = false;
+
+ for (j = 0; j < signature->element_count; ++j)
+ {
+ const struct signature_element *element = &signature->elements[j];
+
+ if (base_register_idx + i != element->register_index || !(range->dst.write_mask & element->mask))
+ continue;
+
+ if (found)
+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_WRITE_MASK,
+ "Invalid write mask %#x on a DCL_INDEX_RANGE destination parameter.",
+ range->dst.write_mask);
+
+ found = true;
+
+ if (first_component == UINT_MAX)
+ first_component = vsir_write_mask_get_component_idx(element->mask);
+ else if (first_component != vsir_write_mask_get_component_idx(element->mask))
+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_WRITE_MASK,
+ "Signature masks are not left-aligned within a DCL_INDEX_RANGE.");
+
+ effective_write_mask |= element->mask;
+ }
+ }
+
+ /* Check again to have at most one intersection for each register, but this
+ * time using the effective write mask. Also check that we have stabilized,
+ * i.e., the effective write mask now contains all the signature element
+ * masks. This important for being able to merge all the signature elements
+ * in a single one without conflicts (there is no hard reason why we
+ * couldn't support an effective write mask that stabilizes after more
+ * iterations, but the code would be more complicated, and we avoid that if
+ * we can). */
+ for (i = 0; i < range->register_count; ++i)
+ {
+ bool found = false;
+
+ for (j = 0; j < signature->element_count; ++j)
+ {
+ const struct signature_element *element = &signature->elements[j];
+
+ if (base_register_idx + i != element->register_index || !(effective_write_mask & element->mask))
+ continue;
+
+ if (element->sysval_semantic != VKD3D_SHADER_SV_NONE
+ && !vsir_sysval_semantic_is_tess_factor(element->sysval_semantic))
+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE,
+ "Invalid sysval semantic %#x on a signature element touched by DCL_INDEX_RANGE.",
+ element->sysval_semantic);
+
+ if (sysval == ~0u)
+ {
+ sysval = element->sysval_semantic;
+ /* Line density and line detail can be arrayed together. */
+ if (sysval == VKD3D_SHADER_SV_TESS_FACTOR_LINEDEN)
+ sysval = VKD3D_SHADER_SV_TESS_FACTOR_LINEDET;
+ }
+ else
+ {
+ if (sysval != element->sysval_semantic)
+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE,
+ "Inconsistent sysval semantic %#x on a signature element touched by DCL_INDEX_RANGE, "
+ "%#x was already seen.",
+ element->sysval_semantic, sysval);
+ }
+
+ if (found)
+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_WRITE_MASK,
+ "Invalid write mask %#x on a DCL_INDEX_RANGE destination parameter.",
+ range->dst.write_mask);
+
+ found = true;
+
+ if (~effective_write_mask & element->mask)
+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_WRITE_MASK,
+ "Invalid write mask %#x on a signature element touched by a "
+ "DCL_INDEX_RANGE instruction with effective write mask %#x.",
+ element->mask, effective_write_mask);
+
+ if (first_component != vsir_write_mask_get_component_idx(element->mask))
+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_WRITE_MASK,
+ "Signature element masks are not left-aligned within a DCL_INDEX_RANGE.");
+ }
+ }
+
+ VKD3D_ASSERT(sysval != ~0u);
+}
+
+static void vsir_validate_dcl_input(struct validation_context *ctx,
+ const struct vkd3d_shader_instruction *instruction)
+{
+ switch (instruction->declaration.dst.reg.type)
+ {
+ /* Signature input registers. */
+ case VKD3DSPR_INPUT:
+ case VKD3DSPR_INCONTROLPOINT:
+ case VKD3DSPR_OUTCONTROLPOINT:
+ case VKD3DSPR_PATCHCONST:
+ /* Non-signature input registers. */
+ case VKD3DSPR_PRIMID:
+ case VKD3DSPR_FORKINSTID:
+ case VKD3DSPR_JOININSTID:
+ case VKD3DSPR_THREADID:
+ case VKD3DSPR_THREADGROUPID:
+ case VKD3DSPR_LOCALTHREADID:
+ case VKD3DSPR_LOCALTHREADINDEX:
+ case VKD3DSPR_COVERAGE:
+ case VKD3DSPR_TESSCOORD:
+ case VKD3DSPR_OUTPOINTID:
+ case VKD3DSPR_GSINSTID:
+ case VKD3DSPR_WAVELANECOUNT:
+ case VKD3DSPR_WAVELANEINDEX:
+ break;
+
+ default:
+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE,
+ "Invalid register type %#x in instruction DCL_INPUT.",
+ instruction->declaration.dst.reg.type);
+ }
+}
+
static void vsir_validate_dcl_input_primitive(struct validation_context *ctx,
const struct vkd3d_shader_instruction *instruction)
{
@@ -7763,6 +9000,105 @@ static void vsir_validate_dcl_input_primitive(struct validation_context *ctx,
instruction->declaration.primitive_type.type);
}
+static void vsir_validate_dcl_input_ps(struct validation_context *ctx,
+ const struct vkd3d_shader_instruction *instruction)
+{
+ switch (instruction->declaration.dst.reg.type)
+ {
+ case VKD3DSPR_INPUT:
+ break;
+
+ default:
+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE,
+ "Invalid register type %#x in instruction DCL_INPUT_PS.",
+ instruction->declaration.dst.reg.type);
+ }
+}
+
+static void vsir_validate_dcl_input_ps_sgv(struct validation_context *ctx,
+ const struct vkd3d_shader_instruction *instruction)
+{
+ switch (instruction->declaration.register_semantic.reg.reg.type)
+ {
+ case VKD3DSPR_INPUT:
+ break;
+
+ default:
+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE,
+ "Invalid register type %#x in instruction DCL_INPUT_PS_SGV.",
+ instruction->declaration.register_semantic.reg.reg.type);
+ }
+}
+
+static void vsir_validate_dcl_input_ps_siv(struct validation_context *ctx,
+ const struct vkd3d_shader_instruction *instruction)
+{
+ switch (instruction->declaration.register_semantic.reg.reg.type)
+ {
+ case VKD3DSPR_INPUT:
+ break;
+
+ default:
+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE,
+ "Invalid register type %#x in instruction DCL_INPUT_PS_SIV.",
+ instruction->declaration.register_semantic.reg.reg.type);
+ }
+}
+
+static void vsir_validate_dcl_input_sgv(struct validation_context *ctx,
+ const struct vkd3d_shader_instruction *instruction)
+{
+ switch (instruction->declaration.register_semantic.reg.reg.type)
+ {
+ case VKD3DSPR_INPUT:
+ break;
+
+ default:
+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE,
+ "Invalid register type %#x in instruction DCL_INPUT_SGV.",
+ instruction->declaration.register_semantic.reg.reg.type);
+ }
+}
+
+static void vsir_validate_dcl_input_siv(struct validation_context *ctx,
+ const struct vkd3d_shader_instruction *instruction)
+{
+ switch (instruction->declaration.register_semantic.reg.reg.type)
+ {
+ case VKD3DSPR_INPUT:
+ case VKD3DSPR_PATCHCONST:
+ break;
+
+ default:
+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE,
+ "Invalid register type %#x in instruction DCL_INPUT_SIV.",
+ instruction->declaration.register_semantic.reg.reg.type);
+ }
+}
+
+static void vsir_validate_dcl_output(struct validation_context *ctx,
+ const struct vkd3d_shader_instruction *instruction)
+{
+ switch (instruction->declaration.dst.reg.type)
+ {
+ /* Signature output registers. */
+ case VKD3DSPR_OUTPUT:
+ case VKD3DSPR_PATCHCONST:
+ /* Non-signature output registers. */
+ case VKD3DSPR_DEPTHOUT:
+ case VKD3DSPR_SAMPLEMASK:
+ case VKD3DSPR_DEPTHOUTGE:
+ case VKD3DSPR_DEPTHOUTLE:
+ case VKD3DSPR_OUTSTENCILREF:
+ break;
+
+ default:
+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE,
+ "Invalid register type %#x in instruction DCL_OUTPUT.",
+ instruction->declaration.dst.reg.type);
+ }
+}
+
static void vsir_validate_dcl_output_control_point_count(struct validation_context *ctx,
const struct vkd3d_shader_instruction *instruction)
{
@@ -7772,6 +9108,22 @@ static void vsir_validate_dcl_output_control_point_count(struct validation_conte
instruction->declaration.count);
}
+static void vsir_validate_dcl_output_siv(struct validation_context *ctx,
+ const struct vkd3d_shader_instruction *instruction)
+{
+ switch (instruction->declaration.register_semantic.reg.reg.type)
+ {
+ case VKD3DSPR_OUTPUT:
+ case VKD3DSPR_PATCHCONST:
+ break;
+
+ default:
+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE,
+ "Invalid register type %#x in instruction DCL_OUTPUT_SIV.",
+ instruction->declaration.register_semantic.reg.reg.type);
+ }
+}
+
static void vsir_validate_dcl_output_topology(struct validation_context *ctx,
const struct vkd3d_shader_instruction *instruction)
{
@@ -7801,6 +9153,11 @@ static void vsir_validate_dcl_tessellator_domain(struct validation_context *ctx,
|| instruction->declaration.tessellator_domain >= VKD3D_TESSELLATOR_DOMAIN_COUNT)
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION,
"Tessellator domain %#x is invalid.", instruction->declaration.tessellator_domain);
+
+ if (instruction->declaration.tessellator_domain != ctx->program->tess_domain)
+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION,
+ "DCL_TESSELLATOR_DOMAIN argument %#x doesn't match the shader tessellator domain %#x.",
+ instruction->declaration.tessellator_domain, ctx->program->tess_domain);
}
static void vsir_validate_dcl_tessellator_output_primitive(struct validation_context *ctx,
@@ -8063,8 +9420,17 @@ static const struct vsir_validator_instruction_desc vsir_validator_instructions[
[VKD3DSIH_HS_JOIN_PHASE] = {0, 0, vsir_validate_hull_shader_phase},
[VKD3DSIH_DCL_GS_INSTANCES] = {0, 0, vsir_validate_dcl_gs_instances},
[VKD3DSIH_DCL_HS_MAX_TESSFACTOR] = {0, 0, vsir_validate_dcl_hs_max_tessfactor},
+ [VKD3DSIH_DCL_INDEX_RANGE] = {0, 0, vsir_validate_dcl_index_range},
+ [VKD3DSIH_DCL_INPUT] = {0, 0, vsir_validate_dcl_input},
[VKD3DSIH_DCL_INPUT_PRIMITIVE] = {0, 0, vsir_validate_dcl_input_primitive},
+ [VKD3DSIH_DCL_INPUT_PS] = {0, 0, vsir_validate_dcl_input_ps},
+ [VKD3DSIH_DCL_INPUT_PS_SGV] = {0, 0, vsir_validate_dcl_input_ps_sgv},
+ [VKD3DSIH_DCL_INPUT_PS_SIV] = {0, 0, vsir_validate_dcl_input_ps_siv},
+ [VKD3DSIH_DCL_INPUT_SGV] = {0, 0, vsir_validate_dcl_input_sgv},
+ [VKD3DSIH_DCL_INPUT_SIV] = {0, 0, vsir_validate_dcl_input_siv},
+ [VKD3DSIH_DCL_OUTPUT] = {0, 0, vsir_validate_dcl_output},
[VKD3DSIH_DCL_OUTPUT_CONTROL_POINT_COUNT] = {0, 0, vsir_validate_dcl_output_control_point_count},
+ [VKD3DSIH_DCL_OUTPUT_SIV] = {0, 0, vsir_validate_dcl_output_siv},
[VKD3DSIH_DCL_OUTPUT_TOPOLOGY] = {0, 0, vsir_validate_dcl_output_topology},
[VKD3DSIH_DCL_TEMPS] = {0, 0, vsir_validate_dcl_temps},
[VKD3DSIH_DCL_TESSELLATOR_DOMAIN] = {0, 0, vsir_validate_dcl_tessellator_domain},
@@ -8177,6 +9543,12 @@ enum vkd3d_result vsir_program_validate(struct vsir_program *program, uint64_t c
.status = VKD3D_OK,
.phase = VKD3DSIH_INVALID,
.invalid_instruction_idx = true,
+ .outer_tess_idxs[0] = ~0u,
+ .outer_tess_idxs[1] = ~0u,
+ .outer_tess_idxs[2] = ~0u,
+ .outer_tess_idxs[3] = ~0u,
+ .inner_tess_idxs[0] = ~0u,
+ .inner_tess_idxs[1] = ~0u,
};
unsigned int i;
@@ -8187,12 +9559,20 @@ enum vkd3d_result vsir_program_validate(struct vsir_program *program, uint64_t c
{
case VKD3D_SHADER_TYPE_HULL:
case VKD3D_SHADER_TYPE_DOMAIN:
+ if (program->tess_domain == VKD3D_TESSELLATOR_DOMAIN_INVALID
+ || program->tess_domain >= VKD3D_TESSELLATOR_DOMAIN_COUNT)
+ validator_error(&ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION,
+ "Invalid tessellation domain %#x.", program->tess_domain);
break;
default:
if (program->patch_constant_signature.element_count != 0)
validator_error(&ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE,
"Patch constant signature is only valid for hull and domain shaders.");
+
+ if (program->tess_domain != VKD3D_TESSELLATOR_DOMAIN_INVALID)
+ validator_error(&ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION,
+ "Invalid tessellation domain %#x.", program->tess_domain);
}
switch (program->shader_version.type)
@@ -8226,9 +9606,47 @@ enum vkd3d_result vsir_program_validate(struct vsir_program *program, uint64_t c
program->output_control_point_count);
}
- vsir_validate_signature(&ctx, &program->input_signature, SIGNATURE_TYPE_INPUT);
- vsir_validate_signature(&ctx, &program->output_signature, SIGNATURE_TYPE_OUTPUT);
- vsir_validate_signature(&ctx, &program->patch_constant_signature, SIGNATURE_TYPE_PATCH_CONSTANT);
+ vsir_validate_signature(&ctx, &program->input_signature,
+ &ctx.input_signature_data, SIGNATURE_TYPE_INPUT);
+ vsir_validate_signature(&ctx, &program->output_signature,
+ &ctx.output_signature_data, SIGNATURE_TYPE_OUTPUT);
+ vsir_validate_signature(&ctx, &program->patch_constant_signature,
+ &ctx.patch_constant_signature_data, SIGNATURE_TYPE_PATCH_CONSTANT);
+
+ for (i = 0; i < sizeof(program->io_dcls) * CHAR_BIT; ++i)
+ {
+ if (!bitmap_is_set(program->io_dcls, i))
+ continue;
+
+ switch (i)
+ {
+ /* Input registers */
+ case VKD3DSPR_PRIMID:
+ case VKD3DSPR_FORKINSTID:
+ case VKD3DSPR_JOININSTID:
+ case VKD3DSPR_THREADID:
+ case VKD3DSPR_THREADGROUPID:
+ case VKD3DSPR_LOCALTHREADID:
+ case VKD3DSPR_LOCALTHREADINDEX:
+ case VKD3DSPR_COVERAGE:
+ case VKD3DSPR_TESSCOORD:
+ case VKD3DSPR_OUTPOINTID:
+ case VKD3DSPR_GSINSTID:
+ case VKD3DSPR_WAVELANECOUNT:
+ case VKD3DSPR_WAVELANEINDEX:
+ /* Output registers */
+ case VKD3DSPR_DEPTHOUT:
+ case VKD3DSPR_SAMPLEMASK:
+ case VKD3DSPR_DEPTHOUTGE:
+ case VKD3DSPR_DEPTHOUTLE:
+ case VKD3DSPR_OUTSTENCILREF:
+ break;
+
+ default:
+ validator_error(&ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE,
+ "Invalid input/output declaration %u.", i);
+ }
+ }
if (!(ctx.temps = vkd3d_calloc(ctx.program->temp_count, sizeof(*ctx.temps))))
goto fail;
@@ -8318,6 +9736,12 @@ enum vkd3d_result vsir_program_transform_early(struct vsir_program *program, uin
if (program->shader_version.major <= 2)
vsir_transform(&ctx, vsir_program_add_diffuse_output);
+ /* For vsir_program_insert_fragment_fog(). */
+ vsir_transform(&ctx, vsir_program_add_fog_input);
+
+ /* For vsir_program_insert_vertex_fog(). */
+ vsir_transform(&ctx, vsir_program_add_fog_output);
+
return ctx.result;
}
@@ -8372,6 +9796,8 @@ enum vkd3d_result vsir_program_transform(struct vsir_program *program, uint64_t
vsir_transform(&ctx, vsir_program_insert_point_size);
vsir_transform(&ctx, vsir_program_insert_point_size_clamp);
vsir_transform(&ctx, vsir_program_insert_point_coord);
+ vsir_transform(&ctx, vsir_program_insert_fragment_fog);
+ vsir_transform(&ctx, vsir_program_insert_vertex_fog);
if (TRACE_ON())
vsir_program_trace(program);
diff --git a/libs/vkd3d/libs/vkd3d-shader/msl.c b/libs/vkd3d/libs/vkd3d-shader/msl.c
index df3edeaa4e6..e783128e236 100644
--- a/libs/vkd3d/libs/vkd3d-shader/msl.c
+++ b/libs/vkd3d/libs/vkd3d-shader/msl.c
@@ -41,6 +41,8 @@ struct msl_generator
const char *prefix;
bool failed;
+ bool write_depth;
+
const struct vkd3d_shader_interface_info *interface_info;
const struct vkd3d_shader_scan_descriptor_info1 *descriptor_info;
};
@@ -153,6 +155,71 @@ static void msl_print_register_name(struct vkd3d_string_buffer *buffer,
msl_print_register_datatype(buffer, gen, reg->data_type);
break;
+ case VKD3DSPR_DEPTHOUT:
+ if (gen->program->shader_version.type != VKD3D_SHADER_TYPE_PIXEL)
+ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL,
+ "Internal compiler error: Unhandled depth output in shader type #%x.",
+ gen->program->shader_version.type);
+ vkd3d_string_buffer_printf(buffer, "o_depth");
+ break;
+
+ case VKD3DSPR_IMMCONST:
+ switch (reg->dimension)
+ {
+ case VSIR_DIMENSION_SCALAR:
+ switch (reg->data_type)
+ {
+ case VKD3D_DATA_INT:
+ vkd3d_string_buffer_printf(buffer, "as_type<int>(%#xu)", reg->u.immconst_u32[0]);
+ break;
+ case VKD3D_DATA_UINT:
+ vkd3d_string_buffer_printf(buffer, "%#xu", reg->u.immconst_u32[0]);
+ break;
+ case VKD3D_DATA_FLOAT:
+ vkd3d_string_buffer_printf(buffer, "as_type<float>(%#xu)", reg->u.immconst_u32[0]);
+ break;
+ default:
+ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL,
+ "Internal compiler error: Unhandled immconst datatype %#x.", reg->data_type);
+ vkd3d_string_buffer_printf(buffer, "<unrecognised immconst datatype %#x>", reg->data_type);
+ break;
+ }
+ break;
+
+ case VSIR_DIMENSION_VEC4:
+ switch (reg->data_type)
+ {
+ case VKD3D_DATA_INT:
+ vkd3d_string_buffer_printf(buffer, "as_type<int4>(uint4(%#xu, %#xu, %#xu, %#xu))",
+ reg->u.immconst_u32[0], reg->u.immconst_u32[1],
+ reg->u.immconst_u32[2], reg->u.immconst_u32[3]);
+ break;
+ case VKD3D_DATA_UINT:
+ vkd3d_string_buffer_printf(buffer, "uint4(%#xu, %#xu, %#xu, %#xu)",
+ reg->u.immconst_u32[0], reg->u.immconst_u32[1],
+ reg->u.immconst_u32[2], reg->u.immconst_u32[3]);
+ break;
+ case VKD3D_DATA_FLOAT:
+ vkd3d_string_buffer_printf(buffer, "as_type<float4>(uint4(%#xu, %#xu, %#xu, %#xu))",
+ reg->u.immconst_u32[0], reg->u.immconst_u32[1],
+ reg->u.immconst_u32[2], reg->u.immconst_u32[3]);
+ break;
+ default:
+ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL,
+ "Internal compiler error: Unhandled immconst datatype %#x.", reg->data_type);
+ vkd3d_string_buffer_printf(buffer, "<unrecognised immconst datatype %#x>", reg->data_type);
+ break;
+ }
+ break;
+
+ default:
+ vkd3d_string_buffer_printf(buffer, "<unhandled_dimension %#x>", reg->dimension);
+ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL,
+ "Internal compiler error: Unhandled dimension %#x.", reg->dimension);
+ break;
+ }
+ break;
+
case VKD3DSPR_CONSTBUFFER:
if (reg->idx_count != 3)
{
@@ -215,19 +282,43 @@ static void msl_src_init(struct msl_src *msl_src, struct msl_generator *gen,
const struct vkd3d_shader_src_param *vsir_src, uint32_t mask)
{
const struct vkd3d_shader_register *reg = &vsir_src->reg;
+ struct vkd3d_string_buffer *str;
msl_src->str = vkd3d_string_buffer_get(&gen->string_buffers);
if (reg->non_uniform)
msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL,
"Internal compiler error: Unhandled 'non-uniform' modifier.");
- if (vsir_src->modifiers)
- msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL,
- "Internal compiler error: Unhandled source modifier(s) %#x.", vsir_src->modifiers);
- msl_print_register_name(msl_src->str, gen, reg);
+ if (!vsir_src->modifiers)
+ str = msl_src->str;
+ else
+ str = vkd3d_string_buffer_get(&gen->string_buffers);
+
+ msl_print_register_name(str, gen, reg);
if (reg->dimension == VSIR_DIMENSION_VEC4)
- msl_print_swizzle(msl_src->str, vsir_src->swizzle, mask);
+ msl_print_swizzle(str, vsir_src->swizzle, mask);
+
+ switch (vsir_src->modifiers)
+ {
+ case VKD3DSPSM_NONE:
+ break;
+ case VKD3DSPSM_NEG:
+ vkd3d_string_buffer_printf(msl_src->str, "-%s", str->buffer);
+ break;
+ case VKD3DSPSM_ABS:
+ vkd3d_string_buffer_printf(msl_src->str, "abs(%s)", str->buffer);
+ break;
+ default:
+ vkd3d_string_buffer_printf(msl_src->str, "<unhandled modifier %#x>(%s)",
+ vsir_src->modifiers, str->buffer);
+ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL,
+ "Internal compiler error: Unhandled source modifier(s) %#x.", vsir_src->modifiers);
+ break;
+ }
+
+ if (str != msl_src->str)
+ vkd3d_string_buffer_release(&gen->string_buffers, str);
}
static void msl_dst_cleanup(struct msl_dst *dst, struct vkd3d_string_buffer_cache *cache)
@@ -253,7 +344,8 @@ static uint32_t msl_dst_init(struct msl_dst *msl_dst, struct msl_generator *gen,
msl_dst->mask = vkd3d_string_buffer_get(&gen->string_buffers);
msl_print_register_name(msl_dst->register_name, gen, &vsir_dst->reg);
- msl_print_write_mask(msl_dst->mask, write_mask);
+ if (vsir_dst->reg.dimension == VSIR_DIMENSION_VEC4)
+ msl_print_write_mask(msl_dst->mask, write_mask);
return write_mask;
}
@@ -261,22 +353,29 @@ static uint32_t msl_dst_init(struct msl_dst *msl_dst, struct msl_generator *gen,
static void VKD3D_PRINTF_FUNC(3, 4) msl_print_assignment(
struct msl_generator *gen, struct msl_dst *dst, const char *format, ...)
{
+ uint32_t modifiers = dst->vsir->modifiers;
va_list args;
if (dst->vsir->shift)
msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL,
"Internal compiler error: Unhandled destination shift %#x.", dst->vsir->shift);
- if (dst->vsir->modifiers)
+ if (modifiers & ~VKD3DSPDM_SATURATE)
msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL,
- "Internal compiler error: Unhandled destination modifier(s) %#x.", dst->vsir->modifiers);
+ "Internal compiler error: Unhandled destination modifier(s) %#x.", modifiers);
msl_print_indent(gen->buffer, gen->indent);
vkd3d_string_buffer_printf(gen->buffer, "%s%s = ", dst->register_name->buffer, dst->mask->buffer);
+ if (modifiers & VKD3DSPDM_SATURATE)
+ vkd3d_string_buffer_printf(gen->buffer, "saturate(");
+
va_start(args, format);
vkd3d_string_buffer_vprintf(gen->buffer, format, args);
va_end(args);
+ if (modifiers & VKD3DSPDM_SATURATE)
+ vkd3d_string_buffer_printf(gen->buffer, ")");
+
vkd3d_string_buffer_printf(gen->buffer, ";\n");
}
@@ -288,6 +387,164 @@ static void msl_unhandled(struct msl_generator *gen, const struct vkd3d_shader_i
"Internal compiler error: Unhandled instruction %#x.", ins->opcode);
}
+static void msl_binop(struct msl_generator *gen, const struct vkd3d_shader_instruction *ins, const char *op)
+{
+ struct msl_src src[2];
+ struct msl_dst dst;
+ uint32_t mask;
+
+ mask = msl_dst_init(&dst, gen, ins, &ins->dst[0]);
+ msl_src_init(&src[0], gen, &ins->src[0], mask);
+ msl_src_init(&src[1], gen, &ins->src[1], mask);
+
+ msl_print_assignment(gen, &dst, "%s %s %s", src[0].str->buffer, op, src[1].str->buffer);
+
+ msl_src_cleanup(&src[1], &gen->string_buffers);
+ msl_src_cleanup(&src[0], &gen->string_buffers);
+ msl_dst_cleanup(&dst, &gen->string_buffers);
+}
+
+static void msl_dot(struct msl_generator *gen, const struct vkd3d_shader_instruction *ins, uint32_t src_mask)
+{
+ unsigned int component_count;
+ struct msl_src src[2];
+ struct msl_dst dst;
+ uint32_t dst_mask;
+
+ dst_mask = msl_dst_init(&dst, gen, ins, &ins->dst[0]);
+ msl_src_init(&src[0], gen, &ins->src[0], src_mask);
+ msl_src_init(&src[1], gen, &ins->src[1], src_mask);
+
+ if ((component_count = vsir_write_mask_component_count(dst_mask)) > 1)
+ msl_print_assignment(gen, &dst, "float%u(dot(%s, %s))",
+ component_count, src[0].str->buffer, src[1].str->buffer);
+ else
+ msl_print_assignment(gen, &dst, "dot(%s, %s)", src[0].str->buffer, src[1].str->buffer);
+
+ msl_src_cleanup(&src[1], &gen->string_buffers);
+ msl_src_cleanup(&src[0], &gen->string_buffers);
+ msl_dst_cleanup(&dst, &gen->string_buffers);
+}
+
+static void msl_intrinsic(struct msl_generator *gen, const struct vkd3d_shader_instruction *ins, const char *op)
+{
+ struct vkd3d_string_buffer *args;
+ struct msl_src src;
+ struct msl_dst dst;
+ unsigned int i;
+ uint32_t mask;
+
+ mask = msl_dst_init(&dst, gen, ins, &ins->dst[0]);
+ args = vkd3d_string_buffer_get(&gen->string_buffers);
+
+ for (i = 0; i < ins->src_count; ++i)
+ {
+ msl_src_init(&src, gen, &ins->src[i], mask);
+ vkd3d_string_buffer_printf(args, "%s%s", i ? ", " : "", src.str->buffer);
+ msl_src_cleanup(&src, &gen->string_buffers);
+ }
+
+ msl_print_assignment(gen, &dst, "%s(%s)", op, args->buffer);
+
+ vkd3d_string_buffer_release(&gen->string_buffers, args);
+ msl_dst_cleanup(&dst, &gen->string_buffers);
+}
+
+static void msl_relop(struct msl_generator *gen, const struct vkd3d_shader_instruction *ins, const char *op)
+{
+ unsigned int mask_size;
+ struct msl_src src[2];
+ struct msl_dst dst;
+ uint32_t mask;
+
+ mask = msl_dst_init(&dst, gen, ins, &ins->dst[0]);
+ msl_src_init(&src[0], gen, &ins->src[0], mask);
+ msl_src_init(&src[1], gen, &ins->src[1], mask);
+
+ if ((mask_size = vsir_write_mask_component_count(mask)) > 1)
+ msl_print_assignment(gen, &dst, "select(uint%u(0u), uint%u(0xffffffffu), bool%u(%s %s %s))",
+ mask_size, mask_size, mask_size, src[0].str->buffer, op, src[1].str->buffer);
+ else
+ msl_print_assignment(gen, &dst, "%s %s %s ? 0xffffffffu : 0u",
+ src[0].str->buffer, op, src[1].str->buffer);
+
+ msl_src_cleanup(&src[1], &gen->string_buffers);
+ msl_src_cleanup(&src[0], &gen->string_buffers);
+ msl_dst_cleanup(&dst, &gen->string_buffers);
+}
+
+static void msl_cast(struct msl_generator *gen, const struct vkd3d_shader_instruction *ins, const char *constructor)
+{
+ unsigned int component_count;
+ struct msl_src src;
+ struct msl_dst dst;
+ uint32_t mask;
+
+ mask = msl_dst_init(&dst, gen, ins, &ins->dst[0]);
+ msl_src_init(&src, gen, &ins->src[0], mask);
+
+ if ((component_count = vsir_write_mask_component_count(mask)) > 1)
+ msl_print_assignment(gen, &dst, "%s%u(%s)", constructor, component_count, src.str->buffer);
+ else
+ msl_print_assignment(gen, &dst, "%s(%s)", constructor, src.str->buffer);
+
+ msl_src_cleanup(&src, &gen->string_buffers);
+ msl_dst_cleanup(&dst, &gen->string_buffers);
+}
+
+static void msl_end_block(struct msl_generator *gen)
+{
+ --gen->indent;
+ msl_print_indent(gen->buffer, gen->indent);
+ vkd3d_string_buffer_printf(gen->buffer, "}\n");
+}
+
+static void msl_begin_block(struct msl_generator *gen)
+{
+ msl_print_indent(gen->buffer, gen->indent);
+ vkd3d_string_buffer_printf(gen->buffer, "{\n");
+ ++gen->indent;
+}
+
+static void msl_if(struct msl_generator *gen, const struct vkd3d_shader_instruction *ins)
+{
+ const char *condition;
+ struct msl_src src;
+
+ msl_src_init(&src, gen, &ins->src[0], VKD3DSP_WRITEMASK_0);
+
+ msl_print_indent(gen->buffer, gen->indent);
+ condition = ins->flags == VKD3D_SHADER_CONDITIONAL_OP_NZ ? "bool" : "!bool";
+ vkd3d_string_buffer_printf(gen->buffer, "if (%s(%s))\n", condition, src.str->buffer);
+
+ msl_src_cleanup(&src, &gen->string_buffers);
+
+ msl_begin_block(gen);
+}
+
+static void msl_else(struct msl_generator *gen)
+{
+ msl_end_block(gen);
+ msl_print_indent(gen->buffer, gen->indent);
+ vkd3d_string_buffer_printf(gen->buffer, "else\n");
+ msl_begin_block(gen);
+}
+
+static void msl_unary_op(struct msl_generator *gen, const struct vkd3d_shader_instruction *ins, const char *op)
+{
+ struct msl_src src;
+ struct msl_dst dst;
+ uint32_t mask;
+
+ mask = msl_dst_init(&dst, gen, ins, &ins->dst[0]);
+ msl_src_init(&src, gen, &ins->src[0], mask);
+
+ msl_print_assignment(gen, &dst, "%s%s", op, src.str->buffer);
+
+ msl_src_cleanup(&src, &gen->string_buffers);
+ msl_dst_cleanup(&dst, &gen->string_buffers);
+}
+
static void msl_mov(struct msl_generator *gen, const struct vkd3d_shader_instruction *ins)
{
struct msl_src src;
@@ -303,6 +560,31 @@ static void msl_mov(struct msl_generator *gen, const struct vkd3d_shader_instruc
msl_dst_cleanup(&dst, &gen->string_buffers);
}
+static void msl_movc(struct msl_generator *gen, const struct vkd3d_shader_instruction *ins)
+{
+ unsigned int component_count;
+ struct msl_src src[3];
+ struct msl_dst dst;
+ uint32_t mask;
+
+ mask = msl_dst_init(&dst, gen, ins, &ins->dst[0]);
+ msl_src_init(&src[0], gen, &ins->src[0], mask);
+ msl_src_init(&src[1], gen, &ins->src[1], mask);
+ msl_src_init(&src[2], gen, &ins->src[2], mask);
+
+ if ((component_count = vsir_write_mask_component_count(mask)) > 1)
+ msl_print_assignment(gen, &dst, "select(%s, %s, bool%u(%s))",
+ src[2].str->buffer, src[1].str->buffer, component_count, src[0].str->buffer);
+ else
+ msl_print_assignment(gen, &dst, "select(%s, %s, bool(%s))",
+ src[2].str->buffer, src[1].str->buffer, src[0].str->buffer);
+
+ msl_src_cleanup(&src[2], &gen->string_buffers);
+ msl_src_cleanup(&src[1], &gen->string_buffers);
+ msl_src_cleanup(&src[0], &gen->string_buffers);
+ msl_dst_cleanup(&dst, &gen->string_buffers);
+}
+
static void msl_ret(struct msl_generator *gen, const struct vkd3d_shader_instruction *ins)
{
msl_print_indent(gen->buffer, gen->indent);
@@ -315,17 +597,119 @@ static void msl_handle_instruction(struct msl_generator *gen, const struct vkd3d
switch (ins->opcode)
{
- case VKD3DSIH_DCL_INPUT:
- case VKD3DSIH_DCL_OUTPUT:
- case VKD3DSIH_DCL_OUTPUT_SIV:
+ case VKD3DSIH_ADD:
+ msl_binop(gen, ins, "+");
+ break;
+ case VKD3DSIH_AND:
+ msl_binop(gen, ins, "&");
+ break;
case VKD3DSIH_NOP:
break;
+ case VKD3DSIH_DIV:
+ msl_binop(gen, ins, "/");
+ break;
+ case VKD3DSIH_DP2:
+ msl_dot(gen, ins, vkd3d_write_mask_from_component_count(2));
+ break;
+ case VKD3DSIH_DP3:
+ msl_dot(gen, ins, vkd3d_write_mask_from_component_count(3));
+ break;
+ case VKD3DSIH_DP4:
+ msl_dot(gen, ins, VKD3DSP_WRITEMASK_ALL);
+ break;
+ case VKD3DSIH_ELSE:
+ msl_else(gen);
+ break;
+ case VKD3DSIH_ENDIF:
+ msl_end_block(gen);
+ break;
+ case VKD3DSIH_IEQ:
+ msl_relop(gen, ins, "==");
+ break;
+ case VKD3DSIH_EXP:
+ msl_intrinsic(gen, ins, "exp2");
+ break;
+ case VKD3DSIH_FRC:
+ msl_intrinsic(gen, ins, "fract");
+ break;
+ case VKD3DSIH_FTOI:
+ msl_cast(gen, ins, "int");
+ break;
+ case VKD3DSIH_FTOU:
+ msl_cast(gen, ins, "uint");
+ break;
+ case VKD3DSIH_GEO:
+ msl_relop(gen, ins, ">=");
+ break;
+ case VKD3DSIH_IF:
+ msl_if(gen, ins);
+ break;
+ case VKD3DSIH_ISHL:
+ msl_binop(gen, ins, "<<");
+ break;
+ case VKD3DSIH_ISHR:
+ case VKD3DSIH_USHR:
+ msl_binop(gen, ins, ">>");
+ break;
+ case VKD3DSIH_LTO:
+ msl_relop(gen, ins, "<");
+ break;
+ case VKD3DSIH_MAD:
+ msl_intrinsic(gen, ins, "fma");
+ break;
+ case VKD3DSIH_MAX:
+ msl_intrinsic(gen, ins, "max");
+ break;
+ case VKD3DSIH_MIN:
+ msl_intrinsic(gen, ins, "min");
+ break;
+ case VKD3DSIH_INE:
+ case VKD3DSIH_NEU:
+ msl_relop(gen, ins, "!=");
+ break;
+ case VKD3DSIH_ITOF:
+ case VKD3DSIH_UTOF:
+ msl_cast(gen, ins, "float");
+ break;
+ case VKD3DSIH_LOG:
+ msl_intrinsic(gen, ins, "log2");
+ break;
case VKD3DSIH_MOV:
msl_mov(gen, ins);
break;
+ case VKD3DSIH_MOVC:
+ msl_movc(gen, ins);
+ break;
+ case VKD3DSIH_MUL:
+ msl_binop(gen, ins, "*");
+ break;
+ case VKD3DSIH_NOT:
+ msl_unary_op(gen, ins, "~");
+ break;
+ case VKD3DSIH_OR:
+ msl_binop(gen, ins, "|");
+ break;
case VKD3DSIH_RET:
msl_ret(gen, ins);
break;
+ case VKD3DSIH_ROUND_NE:
+ msl_intrinsic(gen, ins, "rint");
+ break;
+ case VKD3DSIH_ROUND_NI:
+ msl_intrinsic(gen, ins, "floor");
+ break;
+ case VKD3DSIH_ROUND_PI:
+ msl_intrinsic(gen, ins, "ceil");
+ break;
+ case VKD3DSIH_ROUND_Z:
+ msl_intrinsic(gen, ins, "trunc");
+ break;
+ case VKD3DSIH_RSQ:
+ msl_intrinsic(gen, ins, "rsqrt");
+ break;
+ case VKD3DSIH_SQRT:
+ msl_intrinsic(gen, ins, "sqrt");
+ break;
default:
msl_unhandled(gen, ins);
break;
@@ -489,6 +873,16 @@ static void msl_generate_input_struct_declarations(struct msl_generator *gen)
if (e->sysval_semantic)
{
+ if (e->sysval_semantic == VKD3D_SHADER_SV_IS_FRONT_FACE)
+ {
+ if (type != VKD3D_SHADER_TYPE_PIXEL)
+ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL,
+ "Internal compiler error: Unhandled SV_IS_FRONT_FACE in shader type #%x.", type);
+
+ msl_print_indent(gen->buffer, 1);
+ vkd3d_string_buffer_printf(buffer, "bool is_front_face [[front_facing]];\n");
+ continue;
+ }
msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL,
"Internal compiler error: Unhandled system value %#x.", e->sysval_semantic);
continue;
@@ -501,13 +895,6 @@ static void msl_generate_input_struct_declarations(struct msl_generator *gen)
continue;
}
- if (e->interpolation_mode != VKD3DSIM_NONE)
- {
- msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL,
- "Internal compiler error: Unhandled interpolation mode %#x.", e->interpolation_mode);
- continue;
- }
-
if(e->register_count > 1)
{
msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL,
@@ -551,6 +938,18 @@ static void msl_generate_input_struct_declarations(struct msl_generator *gen)
break;
}
+ switch (e->interpolation_mode)
+ {
+ /* The default interpolation attribute. */
+ case VKD3DSIM_LINEAR:
+ case VKD3DSIM_NONE:
+ break;
+ default:
+ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL,
+ "Internal compiler error: Unhandled interpolation mode %#x.", e->interpolation_mode);
+ break;
+ }
+
vkd3d_string_buffer_printf(buffer, ";\n");
}
@@ -602,6 +1001,14 @@ static void msl_generate_output_struct_declarations(struct msl_generator *gen)
{
e = &signature->elements[i];
+ if (e->sysval_semantic == VKD3D_SHADER_SV_DEPTH)
+ {
+ gen->write_depth = true;
+ msl_print_indent(gen->buffer, 1);
+ vkd3d_string_buffer_printf(buffer, "float shader_out_depth [[depth(any)]];\n");
+ continue;
+ }
+
if (e->target_location == SIGNATURE_TARGET_LOCATION_UNUSED)
continue;
@@ -690,6 +1097,10 @@ static void msl_generate_entrypoint_prologue(struct msl_generator *gen)
vkd3d_string_buffer_printf(buffer, " = input.shader_in_%u", i);
msl_print_write_mask(buffer, e->mask);
}
+ else if (e->sysval_semantic == VKD3D_SHADER_SV_IS_FRONT_FACE)
+ {
+ vkd3d_string_buffer_printf(buffer, ".u = uint4(input.is_front_face ? 0xffffffffu : 0u, 0, 0, 0)");
+ }
else
{
vkd3d_string_buffer_printf(buffer, " = <unhandled sysval %#x>", e->sysval_semantic);
@@ -711,6 +1122,12 @@ static void msl_generate_entrypoint_epilogue(struct msl_generator *gen)
{
e = &signature->elements[i];
+ if (e->sysval_semantic == VKD3D_SHADER_SV_DEPTH)
+ {
+ vkd3d_string_buffer_printf(buffer, " output.shader_out_depth = shader_out_depth;\n");
+ continue;
+ }
+
if (e->target_location == SIGNATURE_TARGET_LOCATION_UNUSED)
continue;
@@ -770,9 +1187,14 @@ static void msl_generate_entrypoint(struct msl_generator *gen)
vkd3d_string_buffer_printf(gen->buffer, " vkd3d_vec4 %s_out[%u];\n", gen->prefix, 32);
vkd3d_string_buffer_printf(gen->buffer, " vkd3d_%s_out output;\n", gen->prefix);
+ if (gen->write_depth)
+ vkd3d_string_buffer_printf(gen->buffer, " float shader_out_depth;\n");
+
msl_generate_entrypoint_prologue(gen);
vkd3d_string_buffer_printf(gen->buffer, " %s_main(%s_in, %s_out", gen->prefix, gen->prefix, gen->prefix);
+ if (gen->write_depth)
+ vkd3d_string_buffer_printf(gen->buffer, ", shader_out_depth");
if (gen->descriptor_info->descriptor_count)
vkd3d_string_buffer_printf(gen->buffer, ", descriptors");
vkd3d_string_buffer_printf(gen->buffer, ");\n");
@@ -790,6 +1212,8 @@ static int msl_generator_generate(struct msl_generator *gen, struct vkd3d_shader
MESSAGE("Generating a MSL shader. This is unsupported; you get to keep all the pieces if it breaks.\n");
vkd3d_string_buffer_printf(gen->buffer, "/* Generated by %s. */\n\n", vkd3d_shader_get_version(NULL, NULL));
+ vkd3d_string_buffer_printf(gen->buffer, "#include <metal_common>\n\n");
+ vkd3d_string_buffer_printf(gen->buffer, "using namespace metal;\n\n");
if (gen->program->global_flags)
msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL,
@@ -808,6 +1232,8 @@ static int msl_generator_generate(struct msl_generator *gen, struct vkd3d_shader
"void %s_main(thread vkd3d_vec4 *v, "
"thread vkd3d_vec4 *o",
gen->prefix);
+ if (gen->write_depth)
+ vkd3d_string_buffer_printf(gen->buffer, ", thread float& o_depth");
if (gen->descriptor_info->descriptor_count)
vkd3d_string_buffer_printf(gen->buffer, ", constant vkd3d_%s_descriptors& descriptors", gen->prefix);
vkd3d_string_buffer_printf(gen->buffer, ")\n{\n");
@@ -887,7 +1313,7 @@ int msl_compile(struct vsir_program *program, uint64_t config_flags,
if ((ret = vsir_program_transform(program, config_flags, compile_info, message_context)) < 0)
return ret;
- VKD3D_ASSERT(program->normalisation_level == VSIR_FULLY_NORMALISED_IO);
+ VKD3D_ASSERT(program->normalisation_level == VSIR_NORMALISED_SM6);
if ((ret = msl_generator_init(&generator, program, compile_info, descriptor_info, message_context)) < 0)
return ret;
diff --git a/libs/vkd3d/libs/vkd3d-shader/preproc.l b/libs/vkd3d/libs/vkd3d-shader/preproc.l
index 4a8d0fddae1..d167415c356 100644
--- a/libs/vkd3d/libs/vkd3d-shader/preproc.l
+++ b/libs/vkd3d/libs/vkd3d-shader/preproc.l
@@ -20,6 +20,7 @@
%{
+#include "preproc.h"
#include "preproc.tab.h"
#undef ERROR /* defined in wingdi.h */
diff --git a/libs/vkd3d/libs/vkd3d-shader/spirv.c b/libs/vkd3d/libs/vkd3d-shader/spirv.c
index bdfd632ad12..b1caf61d512 100644
--- a/libs/vkd3d/libs/vkd3d-shader/spirv.c
+++ b/libs/vkd3d/libs/vkd3d-shader/spirv.c
@@ -34,6 +34,32 @@
# include "vulkan/GLSL.std.450.h"
#endif /* HAVE_SPIRV_UNIFIED1_GLSL_STD_450_H */
+#define VKD3D_SPIRV_VERSION_1_0 0x00010000
+#define VKD3D_SPIRV_VERSION_1_3 0x00010300
+#define VKD3D_SPIRV_GENERATOR_ID 18
+#define VKD3D_SPIRV_GENERATOR_VERSION 14
+#define VKD3D_SPIRV_GENERATOR_MAGIC vkd3d_make_u32(VKD3D_SPIRV_GENERATOR_VERSION, VKD3D_SPIRV_GENERATOR_ID)
+#ifndef VKD3D_SHADER_UNSUPPORTED_SPIRV_PARSER
+# define VKD3D_SHADER_UNSUPPORTED_SPIRV_PARSER 0
+#endif
+
+#define VKD3D_SPIRV_HEADER_SIZE 5
+
+#define VKD3D_SPIRV_VERSION_MAJOR_SHIFT 16u
+#define VKD3D_SPIRV_VERSION_MAJOR_MASK (0xffu << VKD3D_SPIRV_VERSION_MAJOR_SHIFT)
+#define VKD3D_SPIRV_VERSION_MINOR_SHIFT 8u
+#define VKD3D_SPIRV_VERSION_MINOR_MASK (0xffu << VKD3D_SPIRV_VERSION_MINOR_SHIFT)
+
+#define VKD3D_SPIRV_GENERATOR_ID_SHIFT 16u
+#define VKD3D_SPIRV_GENERATOR_ID_MASK (0xffffu << VKD3D_SPIRV_GENERATOR_ID_SHIFT)
+#define VKD3D_SPIRV_GENERATOR_VERSION_SHIFT 0u
+#define VKD3D_SPIRV_GENERATOR_VERSION_MASK (0xffffu << VKD3D_SPIRV_GENERATOR_VERSION_SHIFT)
+
+#define VKD3D_SPIRV_INSTRUCTION_WORD_COUNT_SHIFT 16u
+#define VKD3D_SPIRV_INSTRUCTION_WORD_COUNT_MASK (0xffffu << VKD3D_SPIRV_INSTRUCTION_WORD_COUNT_SHIFT)
+#define VKD3D_SPIRV_INSTRUCTION_OP_SHIFT 0u
+#define VKD3D_SPIRV_INSTRUCTION_OP_MASK (0xffffu << VKD3D_SPIRV_INSTRUCTION_OP_SHIFT)
+
#ifdef HAVE_SPIRV_TOOLS
# include "spirv-tools/libspirv.h"
@@ -82,7 +108,7 @@ static uint32_t get_binary_to_text_options(enum vkd3d_shader_compile_option_form
return out;
}
-static enum vkd3d_result vkd3d_spirv_binary_to_text(const struct vkd3d_shader_code *spirv,
+static enum vkd3d_result spirv_tools_binary_to_text(const struct vkd3d_shader_code *spirv,
enum vkd3d_shader_spirv_environment environment,
enum vkd3d_shader_compile_option_formatting_flags formatting, struct vkd3d_shader_code *out)
{
@@ -143,20 +169,6 @@ static enum vkd3d_result vkd3d_spirv_binary_to_text(const struct vkd3d_shader_co
return result;
}
-static void vkd3d_spirv_dump(const struct vkd3d_shader_code *spirv,
- enum vkd3d_shader_spirv_environment environment)
-{
- static const enum vkd3d_shader_compile_option_formatting_flags formatting
- = VKD3D_SHADER_COMPILE_OPTION_FORMATTING_INDENT | VKD3D_SHADER_COMPILE_OPTION_FORMATTING_HEADER;
- struct vkd3d_shader_code text;
-
- if (!vkd3d_spirv_binary_to_text(spirv, environment, formatting, &text))
- {
- vkd3d_shader_trace_text(text.code, text.size);
- vkd3d_shader_free_shader_code(&text);
- }
-}
-
static bool vkd3d_spirv_validate(struct vkd3d_string_buffer *buffer, const struct vkd3d_shader_code *spirv,
enum vkd3d_shader_spirv_environment environment)
{
@@ -180,14 +192,13 @@ static bool vkd3d_spirv_validate(struct vkd3d_string_buffer *buffer, const struc
#else
-static enum vkd3d_result vkd3d_spirv_binary_to_text(const struct vkd3d_shader_code *spirv,
+static enum vkd3d_result spirv_tools_binary_to_text(const struct vkd3d_shader_code *spirv,
enum vkd3d_shader_spirv_environment environment,
enum vkd3d_shader_compile_option_formatting_flags formatting, struct vkd3d_shader_code *out)
{
return VKD3D_ERROR;
}
-static void vkd3d_spirv_dump(const struct vkd3d_shader_code *spirv,
- enum vkd3d_shader_spirv_environment environment) {}
+
static bool vkd3d_spirv_validate(struct vkd3d_string_buffer *buffer, const struct vkd3d_shader_code *spirv,
enum vkd3d_shader_spirv_environment environment)
{
@@ -196,6 +207,312 @@ static bool vkd3d_spirv_validate(struct vkd3d_string_buffer *buffer, const struc
#endif /* HAVE_SPIRV_TOOLS */
+struct spirv_colours
+{
+ const char *reset;
+ const char *comment;
+};
+
+struct spirv_parser
+{
+ struct vkd3d_string_buffer_cache string_buffers;
+ struct vkd3d_shader_location location;
+ struct vkd3d_shader_message_context *message_context;
+ enum vkd3d_shader_compile_option_formatting_flags formatting;
+ struct spirv_colours colours;
+ bool failed;
+
+ const uint32_t *code;
+ size_t pos;
+ size_t size;
+
+ struct vkd3d_string_buffer *text;
+};
+
+static void VKD3D_PRINTF_FUNC(3, 4) spirv_parser_error(struct spirv_parser *parser,
+ enum vkd3d_shader_error error, const char *format, ...)
+{
+ va_list args;
+
+ va_start(args, format);
+ vkd3d_shader_verror(parser->message_context, &parser->location, error, format, args);
+ va_end(args);
+ parser->failed = true;
+}
+
+static uint32_t spirv_parser_read_u32(struct spirv_parser *parser)
+{
+ if (parser->pos >= parser->size)
+ {
+ parser->failed = true;
+ return 0;
+ }
+
+ return parser->code[parser->pos++];
+}
+
+static void VKD3D_PRINTF_FUNC(2, 3) spirv_parser_print_comment(struct spirv_parser *parser, const char *format, ...)
+{
+ va_list args;
+
+ if (!parser->text)
+ return;
+
+ va_start(args, format);
+ vkd3d_string_buffer_printf(parser->text, "%s; ", parser->colours.comment);
+ vkd3d_string_buffer_vprintf(parser->text, format, args);
+ vkd3d_string_buffer_printf(parser->text, "%s\n", parser->colours.reset);
+ va_end(args);
+}
+
+static void spirv_parser_print_generator(struct spirv_parser *parser, uint32_t magic)
+{
+ unsigned int id, version;
+ const char *name;
+
+ id = (magic & VKD3D_SPIRV_GENERATOR_ID_MASK) >> VKD3D_SPIRV_GENERATOR_ID_SHIFT;
+ version = (magic & VKD3D_SPIRV_GENERATOR_VERSION_MASK) >> VKD3D_SPIRV_GENERATOR_VERSION_SHIFT;
+
+ switch (id)
+ {
+ case VKD3D_SPIRV_GENERATOR_ID:
+ name = "Wine VKD3D Shader Compiler";
+ break;
+
+ default:
+ name = NULL;
+ break;
+ }
+
+ if (name)
+ spirv_parser_print_comment(parser, "Generator: %s; %u", name, version);
+ else
+ spirv_parser_print_comment(parser, "Generator: Unknown (%#x); %u", id, version);
+}
+
+static enum vkd3d_result spirv_parser_read_header(struct spirv_parser *parser)
+{
+ uint32_t magic, version, generator, bound, schema;
+ unsigned int major, minor;
+
+ if (parser->pos > parser->size || parser->size - parser->pos < VKD3D_SPIRV_HEADER_SIZE)
+ {
+ spirv_parser_error(parser, VKD3D_SHADER_ERROR_SPV_INVALID_SHADER,
+ "Unexpected end while reading the SPIR-V header.");
+ return VKD3D_ERROR_INVALID_SHADER;
+ }
+
+ magic = spirv_parser_read_u32(parser);
+ version = spirv_parser_read_u32(parser);
+ generator = spirv_parser_read_u32(parser);
+ bound = spirv_parser_read_u32(parser);
+ schema = spirv_parser_read_u32(parser);
+
+ if (magic != SpvMagicNumber)
+ {
+ spirv_parser_error(parser, VKD3D_SHADER_ERROR_SPV_INVALID_SHADER,
+ "Invalid magic number %#08x.", magic);
+ return VKD3D_ERROR_INVALID_SHADER;
+ }
+
+ if (version & ~(VKD3D_SPIRV_VERSION_MAJOR_MASK | VKD3D_SPIRV_VERSION_MINOR_MASK))
+ {
+ spirv_parser_error(parser, VKD3D_SHADER_ERROR_SPV_INVALID_SHADER,
+ "Invalid version token %#08x.", version);
+ return VKD3D_ERROR_INVALID_SHADER;
+ }
+
+ major = (version & VKD3D_SPIRV_VERSION_MAJOR_MASK) >> VKD3D_SPIRV_VERSION_MAJOR_SHIFT;
+ minor = (version & VKD3D_SPIRV_VERSION_MINOR_MASK) >> VKD3D_SPIRV_VERSION_MINOR_SHIFT;
+ if (major != 1 || minor > 0)
+ {
+ spirv_parser_error(parser, VKD3D_SHADER_ERROR_SPV_NOT_IMPLEMENTED,
+ "Unable to parse SPIR-V version %u.%u.", major, minor);
+ return VKD3D_ERROR_NOT_IMPLEMENTED;
+ }
+
+ if (!bound)
+ {
+ spirv_parser_error(parser, VKD3D_SHADER_ERROR_SPV_INVALID_SHADER,
+ "Invalid zero id bound.");
+ return VKD3D_ERROR_INVALID_SHADER;
+ }
+
+ if (schema)
+ {
+ spirv_parser_error(parser, VKD3D_SHADER_ERROR_SPV_NOT_IMPLEMENTED,
+ "Unable to handle instruction schema %#08x.", schema);
+ return VKD3D_ERROR_NOT_IMPLEMENTED;
+ }
+
+ if (parser->formatting & VKD3D_SHADER_COMPILE_OPTION_FORMATTING_HEADER)
+ {
+ spirv_parser_print_comment(parser, "SPIR-V");
+ spirv_parser_print_comment(parser, "Version: %u.%u", major, minor);
+ spirv_parser_print_generator(parser, generator);
+ spirv_parser_print_comment(parser, "Bound: %u", bound);
+ spirv_parser_print_comment(parser, "Schema: %u", schema);
+ }
+
+ return VKD3D_OK;
+}
+
+static enum vkd3d_result spirv_parser_parse_instruction(struct spirv_parser *parser)
+{
+ struct vkd3d_string_buffer *buffer;
+ uint16_t op, count;
+ unsigned int i;
+ uint32_t word;
+
+ word = spirv_parser_read_u32(parser);
+ count = (word & VKD3D_SPIRV_INSTRUCTION_WORD_COUNT_MASK) >> VKD3D_SPIRV_INSTRUCTION_WORD_COUNT_SHIFT;
+ op = (word & VKD3D_SPIRV_INSTRUCTION_OP_MASK) >> VKD3D_SPIRV_INSTRUCTION_OP_SHIFT;
+
+ if (!count)
+ {
+ spirv_parser_error(parser, VKD3D_SHADER_ERROR_SPV_INVALID_SHADER,
+ "Invalid word count %u.", count);
+ return VKD3D_ERROR_INVALID_SHADER;
+ }
+
+ --count;
+ buffer = vkd3d_string_buffer_get(&parser->string_buffers);
+ for (i = 0; i < count; ++i)
+ {
+ word = spirv_parser_read_u32(parser);
+ vkd3d_string_buffer_printf(buffer, " 0x%08x", word);
+ }
+ spirv_parser_print_comment(parser, "<unrecognised instruction %#x>%s", op, buffer->buffer);
+ vkd3d_string_buffer_release(&parser->string_buffers, buffer);
+
+ spirv_parser_error(parser, VKD3D_SHADER_ERROR_SPV_NOT_IMPLEMENTED,
+ "Unrecognised instruction %#x.", op);
+
+ return VKD3D_OK;
+}
+
+static enum vkd3d_result spirv_parser_parse(struct spirv_parser *parser, struct vkd3d_shader_code *text)
+{
+ enum vkd3d_result ret;
+
+ if (text)
+ parser->text = vkd3d_string_buffer_get(&parser->string_buffers);
+
+ if ((ret = spirv_parser_read_header(parser)) < 0)
+ goto fail;
+ while (parser->pos < parser->size)
+ {
+ ++parser->location.line;
+ if ((ret = spirv_parser_parse_instruction(parser)) < 0)
+ goto fail;
+ }
+
+ if (parser->failed)
+ {
+ ret = VKD3D_ERROR_INVALID_SHADER;
+ goto fail;
+ }
+
+ if (text)
+ vkd3d_shader_code_from_string_buffer(text, parser->text);
+
+ return VKD3D_OK;
+
+fail:
+ if (parser->text)
+ {
+ if (TRACE_ON())
+ vkd3d_string_buffer_trace(parser->text);
+ vkd3d_string_buffer_release(&parser->string_buffers, parser->text);
+ }
+ return ret;
+}
+
+static void spirv_parser_cleanup(struct spirv_parser *parser)
+{
+ vkd3d_string_buffer_cache_cleanup(&parser->string_buffers);
+}
+
+static enum vkd3d_result spirv_parser_init(struct spirv_parser *parser, const struct vkd3d_shader_code *source,
+ const char *source_name, enum vkd3d_shader_compile_option_formatting_flags formatting,
+ struct vkd3d_shader_message_context *message_context)
+{
+ static const struct spirv_colours no_colours =
+ {
+ .reset = "",
+ .comment = "",
+ };
+ static const struct spirv_colours colours =
+ {
+ .reset = "\x1b[m",
+ .comment = "\x1b[36m",
+ };
+
+ memset(parser, 0, sizeof(*parser));
+ parser->location.source_name = source_name;
+ parser->message_context = message_context;
+ vkd3d_string_buffer_cache_init(&parser->string_buffers);
+
+ if (source->size % 4)
+ {
+ vkd3d_string_buffer_cache_cleanup(&parser->string_buffers);
+ spirv_parser_error(parser, VKD3D_SHADER_ERROR_SPV_INVALID_SHADER,
+ "Shader size %zu is not a multiple of four.", source->size);
+ return VKD3D_ERROR_INVALID_SHADER;
+ }
+
+ parser->formatting = formatting;
+ if (formatting & VKD3D_SHADER_COMPILE_OPTION_FORMATTING_COLOUR)
+ parser->colours = colours;
+ else
+ parser->colours = no_colours;
+ parser->code = source->code;
+ parser->size = source->size / 4;
+
+ return VKD3D_OK;
+}
+
+static enum vkd3d_result vkd3d_spirv_binary_to_text(const struct vkd3d_shader_code *spirv,
+ const char *source_name, enum vkd3d_shader_spirv_environment environment,
+ enum vkd3d_shader_compile_option_formatting_flags formatting,
+ struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context)
+{
+ struct spirv_parser parser;
+ enum vkd3d_result ret;
+
+ if (!VKD3D_SHADER_UNSUPPORTED_SPIRV_PARSER)
+ return spirv_tools_binary_to_text(spirv, environment, formatting, out);
+
+ MESSAGE("Creating a SPIR-V parser. This is unsupported; you get to keep all the pieces if it breaks.\n");
+
+ if ((ret = spirv_parser_init(&parser, spirv, source_name, formatting, message_context)) < 0)
+ return ret;
+
+ ret = spirv_parser_parse(&parser, out);
+
+ spirv_parser_cleanup(&parser);
+
+ return ret;
+}
+
+static void vkd3d_spirv_dump(const struct vkd3d_shader_code *spirv, enum vkd3d_shader_spirv_environment environment)
+{
+ static const enum vkd3d_shader_compile_option_formatting_flags formatting
+ = VKD3D_SHADER_COMPILE_OPTION_FORMATTING_INDENT | VKD3D_SHADER_COMPILE_OPTION_FORMATTING_HEADER;
+ struct vkd3d_shader_message_context message_context;
+ struct vkd3d_shader_code text;
+
+ vkd3d_shader_message_context_init(&message_context, VKD3D_SHADER_LOG_INFO);
+
+ if (!vkd3d_spirv_binary_to_text(spirv, NULL, environment, formatting, &text, &message_context))
+ {
+ vkd3d_shader_trace_text(text.code, text.size);
+ vkd3d_shader_free_shader_code(&text);
+ }
+
+ vkd3d_shader_message_context_cleanup(&message_context);
+}
+
enum vkd3d_shader_input_sysval_semantic vkd3d_siv_from_sysval_indexed(enum vkd3d_shader_sysval_semantic sysval,
unsigned int index)
{
@@ -247,12 +564,6 @@ enum vkd3d_shader_input_sysval_semantic vkd3d_siv_from_sysval_indexed(enum vkd3d
}
}
-#define VKD3D_SPIRV_VERSION_1_0 0x00010000
-#define VKD3D_SPIRV_VERSION_1_3 0x00010300
-#define VKD3D_SPIRV_GENERATOR_ID 18
-#define VKD3D_SPIRV_GENERATOR_VERSION 14
-#define VKD3D_SPIRV_GENERATOR_MAGIC vkd3d_make_u32(VKD3D_SPIRV_GENERATOR_VERSION, VKD3D_SPIRV_GENERATOR_ID)
-
struct vkd3d_spirv_stream
{
uint32_t *words;
@@ -2406,6 +2717,7 @@ struct vkd3d_hull_shader_variables
struct ssa_register_info
{
enum vkd3d_data_type data_type;
+ uint8_t write_mask;
uint32_t id;
};
@@ -2471,7 +2783,6 @@ struct spirv_compiler
bool emit_point_size;
enum vkd3d_shader_opcode phase;
- bool emit_default_control_point_phase;
struct vkd3d_shader_phase control_point_phase;
struct vkd3d_shader_phase patch_constant_phase;
@@ -3316,13 +3627,19 @@ static uint32_t spirv_compiler_emit_variable(struct spirv_compiler *compiler,
static const struct vkd3d_spec_constant_info
{
enum vkd3d_shader_parameter_name name;
- uint32_t default_value;
+ union
+ {
+ uint32_t u;
+ float f;
+ } default_value;
const char *debug_name;
}
vkd3d_shader_parameters[] =
{
- {VKD3D_SHADER_PARAMETER_NAME_RASTERIZER_SAMPLE_COUNT, 1, "sample_count"},
- {VKD3D_SHADER_PARAMETER_NAME_ALPHA_TEST_REF, 0, "alpha_test_ref"},
+ {VKD3D_SHADER_PARAMETER_NAME_RASTERIZER_SAMPLE_COUNT, {.u = 1}, "sample_count"},
+ {VKD3D_SHADER_PARAMETER_NAME_ALPHA_TEST_REF, {.f = 0.0f}, "alpha_test_ref"},
+ {VKD3D_SHADER_PARAMETER_NAME_FOG_END, {.f = 1.0f}, "fog_end"},
+ {VKD3D_SHADER_PARAMETER_NAME_FOG_SCALE, {.f = 1.0f}, "fog_scale"},
};
static const struct vkd3d_spec_constant_info *get_spec_constant_info(enum vkd3d_shader_parameter_name name)
@@ -3383,7 +3700,7 @@ static uint32_t spirv_compiler_emit_spec_constant(struct spirv_compiler *compile
const struct vkd3d_spec_constant_info *info;
info = get_spec_constant_info(name);
- default_value = info ? info->default_value : 0;
+ default_value = info ? info->default_value.u : 0;
scalar_type_id = vkd3d_spirv_get_type_id(builder, vkd3d_component_type_from_data_type(type), 1);
vector_type_id = vkd3d_spirv_get_type_id(builder, vkd3d_component_type_from_data_type(type), component_count);
@@ -3574,6 +3891,24 @@ static bool spirv_compiler_get_register_info(struct spirv_compiler *compiler,
register_info->is_aggregate = false;
return true;
}
+ else if (reg->type == VKD3DSPR_SSA)
+ {
+ const struct ssa_register_info *ssa = &compiler->ssa_register_info[reg->idx[0].offset];
+
+ if (!ssa->id)
+ {
+ /* Should only be from a missing instruction implementation. */
+ VKD3D_ASSERT(compiler->failed);
+ return 0;
+ }
+
+ memset(register_info, 0, sizeof(*register_info));
+ register_info->id = ssa->id;
+ register_info->storage_class = SpvStorageClassMax;
+ register_info->component_type = vkd3d_component_type_from_data_type(ssa->data_type);
+ register_info->write_mask = ssa->write_mask;
+ return true;
+ }
vkd3d_symbol_make_register(&reg_symbol, reg);
if (!(entry = rb_get(&compiler->symbol_table, &reg_symbol)))
@@ -4181,67 +4516,14 @@ static uint32_t spirv_compiler_emit_constant_array(struct spirv_compiler *compil
return const_id;
}
-static const struct ssa_register_info *spirv_compiler_get_ssa_register_info(const struct spirv_compiler *compiler,
- const struct vkd3d_shader_register *reg)
-{
- VKD3D_ASSERT(reg->idx[0].offset < compiler->ssa_register_count);
- VKD3D_ASSERT(reg->idx_count == 1);
- return &compiler->ssa_register_info[reg->idx[0].offset];
-}
-
static void spirv_compiler_set_ssa_register_info(const struct spirv_compiler *compiler,
- const struct vkd3d_shader_register *reg, uint32_t val_id)
+ const struct vkd3d_shader_register *reg, uint32_t write_mask, uint32_t val_id)
{
unsigned int i = reg->idx[0].offset;
VKD3D_ASSERT(i < compiler->ssa_register_count);
compiler->ssa_register_info[i].data_type = reg->data_type;
compiler->ssa_register_info[i].id = val_id;
-}
-
-static uint32_t spirv_compiler_emit_load_ssa_reg(struct spirv_compiler *compiler,
- const struct vkd3d_shader_register *reg, enum vkd3d_shader_component_type component_type,
- uint32_t swizzle)
-{
- struct vkd3d_spirv_builder *builder = &compiler->spirv_builder;
- enum vkd3d_shader_component_type reg_component_type;
- const struct ssa_register_info *ssa;
- unsigned int component_idx;
- uint32_t type_id, val_id;
-
- ssa = spirv_compiler_get_ssa_register_info(compiler, reg);
- val_id = ssa->id;
- if (!val_id)
- {
- /* Should only be from a missing instruction implementation. */
- VKD3D_ASSERT(compiler->failed);
- return 0;
- }
- VKD3D_ASSERT(vkd3d_swizzle_is_scalar(swizzle, reg));
-
- reg_component_type = vkd3d_component_type_from_data_type(ssa->data_type);
-
- if (reg->dimension == VSIR_DIMENSION_SCALAR)
- {
- if (component_type != reg_component_type)
- {
- type_id = vkd3d_spirv_get_type_id(builder, component_type, 1);
- val_id = vkd3d_spirv_build_op_bitcast(builder, type_id, val_id);
- }
-
- return val_id;
- }
-
- if (component_type != reg_component_type)
- {
- /* Required for resource loads with sampled type int, because DXIL has no signedness.
- * Only 128-bit vector sizes are used. */
- type_id = vkd3d_spirv_get_type_id(builder, component_type, VKD3D_VEC4_SIZE);
- val_id = vkd3d_spirv_build_op_bitcast(builder, type_id, val_id);
- }
-
- type_id = vkd3d_spirv_get_type_id(builder, component_type, 1);
- component_idx = vsir_swizzle_get_component(swizzle, 0);
- return vkd3d_spirv_build_op_composite_extract1(builder, type_id, val_id, component_idx);
+ compiler->ssa_register_info[i].write_mask = write_mask;
}
static uint32_t spirv_compiler_emit_load_reg(struct spirv_compiler *compiler,
@@ -4267,9 +4549,6 @@ static uint32_t spirv_compiler_emit_load_reg(struct spirv_compiler *compiler,
component_count = vsir_write_mask_component_count(write_mask);
component_type = vkd3d_component_type_from_data_type(reg->data_type);
- if (reg->type == VKD3DSPR_SSA)
- return spirv_compiler_emit_load_ssa_reg(compiler, reg, component_type, swizzle);
-
if (!spirv_compiler_get_register_info(compiler, reg, &reg_info))
{
type_id = vkd3d_spirv_get_type_id(builder, component_type, component_count);
@@ -4294,9 +4573,9 @@ static uint32_t spirv_compiler_emit_load_reg(struct spirv_compiler *compiler,
type_id = vkd3d_spirv_get_type_id(builder,
reg_info.component_type, vsir_write_mask_component_count(reg_info.write_mask));
val_id = vkd3d_spirv_build_op_load(builder, type_id, reg_info.id, SpvMemoryAccessMaskNone);
+ swizzle = data_type_is_64_bit(reg->data_type) ? vsir_swizzle_32_from_64(swizzle) : swizzle;
}
- swizzle = data_type_is_64_bit(reg->data_type) ? vsir_swizzle_32_from_64(swizzle) : swizzle;
val_id = spirv_compiler_emit_swizzle(compiler,
val_id, reg_info.write_mask, reg_info.component_type, swizzle, val_write_mask);
@@ -4497,7 +4776,7 @@ static void spirv_compiler_emit_store_reg(struct spirv_compiler *compiler,
if (reg->type == VKD3DSPR_SSA)
{
- spirv_compiler_set_ssa_register_info(compiler, reg, val_id);
+ spirv_compiler_set_ssa_register_info(compiler, reg, write_mask, val_id);
return;
}
@@ -4883,35 +5162,36 @@ static const struct vkd3d_spirv_builtin vkd3d_output_point_size_builtin =
static const struct
{
enum vkd3d_shader_register_type reg_type;
+ SpvStorageClass storage_class;
struct vkd3d_spirv_builtin builtin;
}
vkd3d_register_builtins[] =
{
- {VKD3DSPR_THREADID, {VKD3D_SHADER_COMPONENT_INT, 3, SpvBuiltInGlobalInvocationId}},
- {VKD3DSPR_LOCALTHREADID, {VKD3D_SHADER_COMPONENT_INT, 3, SpvBuiltInLocalInvocationId}},
- {VKD3DSPR_LOCALTHREADINDEX, {VKD3D_SHADER_COMPONENT_INT, 1, SpvBuiltInLocalInvocationIndex}},
- {VKD3DSPR_THREADGROUPID, {VKD3D_SHADER_COMPONENT_INT, 3, SpvBuiltInWorkgroupId}},
+ {VKD3DSPR_THREADID, SpvStorageClassInput, {VKD3D_SHADER_COMPONENT_INT, 3, SpvBuiltInGlobalInvocationId}},
+ {VKD3DSPR_LOCALTHREADID, SpvStorageClassInput, {VKD3D_SHADER_COMPONENT_INT, 3, SpvBuiltInLocalInvocationId}},
+ {VKD3DSPR_LOCALTHREADINDEX, SpvStorageClassInput, {VKD3D_SHADER_COMPONENT_INT, 1, SpvBuiltInLocalInvocationIndex}},
+ {VKD3DSPR_THREADGROUPID, SpvStorageClassInput, {VKD3D_SHADER_COMPONENT_INT, 3, SpvBuiltInWorkgroupId}},
- {VKD3DSPR_GSINSTID, {VKD3D_SHADER_COMPONENT_INT, 1, SpvBuiltInInvocationId}},
- {VKD3DSPR_OUTPOINTID, {VKD3D_SHADER_COMPONENT_INT, 1, SpvBuiltInInvocationId}},
+ {VKD3DSPR_GSINSTID, SpvStorageClassInput, {VKD3D_SHADER_COMPONENT_INT, 1, SpvBuiltInInvocationId}},
+ {VKD3DSPR_OUTPOINTID, SpvStorageClassInput, {VKD3D_SHADER_COMPONENT_INT, 1, SpvBuiltInInvocationId}},
- {VKD3DSPR_PRIMID, {VKD3D_SHADER_COMPONENT_INT, 1, SpvBuiltInPrimitiveId}},
+ {VKD3DSPR_PRIMID, SpvStorageClassInput, {VKD3D_SHADER_COMPONENT_INT, 1, SpvBuiltInPrimitiveId}},
- {VKD3DSPR_TESSCOORD, {VKD3D_SHADER_COMPONENT_FLOAT, 3, SpvBuiltInTessCoord}},
+ {VKD3DSPR_TESSCOORD, SpvStorageClassInput, {VKD3D_SHADER_COMPONENT_FLOAT, 3, SpvBuiltInTessCoord}},
- {VKD3DSPR_POINT_COORD, {VKD3D_SHADER_COMPONENT_FLOAT, 2, SpvBuiltInPointCoord}},
+ {VKD3DSPR_POINT_COORD, SpvStorageClassInput, {VKD3D_SHADER_COMPONENT_FLOAT, 2, SpvBuiltInPointCoord}},
- {VKD3DSPR_COVERAGE, {VKD3D_SHADER_COMPONENT_UINT, 1, SpvBuiltInSampleMask, NULL, 1}},
- {VKD3DSPR_SAMPLEMASK, {VKD3D_SHADER_COMPONENT_UINT, 1, SpvBuiltInSampleMask, NULL, 1}},
+ {VKD3DSPR_COVERAGE, SpvStorageClassInput, {VKD3D_SHADER_COMPONENT_UINT, 1, SpvBuiltInSampleMask, NULL, 1}},
+ {VKD3DSPR_SAMPLEMASK, SpvStorageClassOutput, {VKD3D_SHADER_COMPONENT_UINT, 1, SpvBuiltInSampleMask, NULL, 1}},
- {VKD3DSPR_DEPTHOUT, {VKD3D_SHADER_COMPONENT_FLOAT, 1, SpvBuiltInFragDepth}},
- {VKD3DSPR_DEPTHOUTGE, {VKD3D_SHADER_COMPONENT_FLOAT, 1, SpvBuiltInFragDepth}},
- {VKD3DSPR_DEPTHOUTLE, {VKD3D_SHADER_COMPONENT_FLOAT, 1, SpvBuiltInFragDepth}},
+ {VKD3DSPR_DEPTHOUT, SpvStorageClassOutput, {VKD3D_SHADER_COMPONENT_FLOAT, 1, SpvBuiltInFragDepth}},
+ {VKD3DSPR_DEPTHOUTGE, SpvStorageClassOutput, {VKD3D_SHADER_COMPONENT_FLOAT, 1, SpvBuiltInFragDepth}},
+ {VKD3DSPR_DEPTHOUTLE, SpvStorageClassOutput, {VKD3D_SHADER_COMPONENT_FLOAT, 1, SpvBuiltInFragDepth}},
- {VKD3DSPR_OUTSTENCILREF, {VKD3D_SHADER_COMPONENT_UINT, 1, SpvBuiltInFragStencilRefEXT}},
+ {VKD3DSPR_OUTSTENCILREF, SpvStorageClassOutput, {VKD3D_SHADER_COMPONENT_UINT, 1, SpvBuiltInFragStencilRefEXT}},
- {VKD3DSPR_WAVELANECOUNT, {VKD3D_SHADER_COMPONENT_UINT, 1, SpvBuiltInSubgroupSize}},
- {VKD3DSPR_WAVELANEINDEX, {VKD3D_SHADER_COMPONENT_UINT, 1, SpvBuiltInSubgroupLocalInvocationId}},
+ {VKD3DSPR_WAVELANECOUNT, SpvStorageClassInput, {VKD3D_SHADER_COMPONENT_UINT, 1, SpvBuiltInSubgroupSize}},
+ {VKD3DSPR_WAVELANEINDEX, SpvStorageClassInput, {VKD3D_SHADER_COMPONENT_UINT, 1, SpvBuiltInSubgroupLocalInvocationId}},
};
static void spirv_compiler_emit_register_execution_mode(struct spirv_compiler *compiler,
@@ -4970,14 +5250,18 @@ static const struct vkd3d_spirv_builtin *get_spirv_builtin_for_sysval(
}
static const struct vkd3d_spirv_builtin *get_spirv_builtin_for_register(
- enum vkd3d_shader_register_type reg_type)
+ enum vkd3d_shader_register_type reg_type, SpvStorageClass *storage_class)
{
unsigned int i;
for (i = 0; i < ARRAY_SIZE(vkd3d_register_builtins); ++i)
{
if (vkd3d_register_builtins[i].reg_type == reg_type)
+ {
+ if (storage_class)
+ *storage_class = vkd3d_register_builtins[i].storage_class;
return &vkd3d_register_builtins[i].builtin;
+ }
}
return NULL;
@@ -4990,7 +5274,7 @@ static const struct vkd3d_spirv_builtin *vkd3d_get_spirv_builtin(const struct sp
if ((builtin = get_spirv_builtin_for_sysval(compiler, sysval)))
return builtin;
- if ((builtin = get_spirv_builtin_for_register(reg_type)))
+ if ((builtin = get_spirv_builtin_for_register(reg_type, NULL)))
return builtin;
if ((sysval != VKD3D_SHADER_SV_NONE && sysval != VKD3D_SHADER_SV_TARGET)
@@ -5322,21 +5606,26 @@ static uint32_t spirv_compiler_emit_input(struct spirv_compiler *compiler,
return input_id;
}
-static void spirv_compiler_emit_input_register(struct spirv_compiler *compiler,
+static void spirv_compiler_emit_io_register(struct spirv_compiler *compiler,
const struct vkd3d_shader_dst_param *dst)
{
struct vkd3d_spirv_builder *builder = &compiler->spirv_builder;
const struct vkd3d_shader_register *reg = &dst->reg;
const struct vkd3d_spirv_builtin *builtin;
struct vkd3d_symbol reg_symbol;
+ SpvStorageClass storage_class;
+ uint32_t write_mask, id;
struct rb_entry *entry;
- uint32_t write_mask;
- uint32_t input_id;
VKD3D_ASSERT(!reg->idx_count || !reg->idx[0].rel_addr);
VKD3D_ASSERT(reg->idx_count < 2);
- if (!(builtin = get_spirv_builtin_for_register(reg->type)))
+ if (reg->type == VKD3DSPR_RASTOUT && reg->idx[0].offset == VSIR_RASTOUT_POINT_SIZE)
+ {
+ builtin = &vkd3d_output_point_size_builtin;
+ storage_class = SpvStorageClassOutput;
+ }
+ else if (!(builtin = get_spirv_builtin_for_register(reg->type, &storage_class)))
{
FIXME("Unhandled register %#x.\n", reg->type);
return;
@@ -5347,14 +5636,15 @@ static void spirv_compiler_emit_input_register(struct spirv_compiler *compiler,
if ((entry = rb_get(&compiler->symbol_table, &reg_symbol)))
return;
- input_id = spirv_compiler_emit_builtin_variable(compiler, builtin, SpvStorageClassInput, 0);
+ id = spirv_compiler_emit_builtin_variable(compiler, builtin, storage_class, 0);
write_mask = vkd3d_write_mask_from_component_count(builtin->component_count);
- vkd3d_symbol_set_register_info(&reg_symbol, input_id,
- SpvStorageClassInput, builtin->component_type, write_mask);
+ vkd3d_symbol_set_register_info(&reg_symbol, id,
+ storage_class, builtin->component_type, write_mask);
reg_symbol.info.reg.is_aggregate = builtin->spirv_array_size;
spirv_compiler_put_symbol(compiler, &reg_symbol);
- spirv_compiler_emit_register_debug_name(builder, input_id, reg);
+ spirv_compiler_emit_register_execution_mode(compiler, reg->type);
+ spirv_compiler_emit_register_debug_name(builder, id, reg);
}
static unsigned int get_shader_output_swizzle(const struct spirv_compiler *compiler,
@@ -5458,41 +5748,6 @@ static void spirv_compiler_emit_shader_signature_outputs(struct spirv_compiler *
}
}
-static void spirv_compiler_emit_output_register(struct spirv_compiler *compiler,
- const struct vkd3d_shader_dst_param *dst)
-{
- struct vkd3d_spirv_builder *builder = &compiler->spirv_builder;
- const struct vkd3d_shader_register *reg = &dst->reg;
- const struct vkd3d_spirv_builtin *builtin;
- struct vkd3d_symbol reg_symbol;
- uint32_t write_mask;
- uint32_t output_id;
-
- VKD3D_ASSERT(!reg->idx_count || !reg->idx[0].rel_addr);
- VKD3D_ASSERT(reg->idx_count < 2);
-
- if (reg->type == VKD3DSPR_RASTOUT && reg->idx[0].offset == VSIR_RASTOUT_POINT_SIZE)
- {
- builtin = &vkd3d_output_point_size_builtin;
- }
- else if (!(builtin = get_spirv_builtin_for_register(reg->type)))
- {
- FIXME("Unhandled register %#x.\n", reg->type);
- return;
- }
-
- output_id = spirv_compiler_emit_builtin_variable(compiler, builtin, SpvStorageClassOutput, 0);
-
- vkd3d_symbol_make_register(&reg_symbol, reg);
- write_mask = vkd3d_write_mask_from_component_count(builtin->component_count);
- vkd3d_symbol_set_register_info(&reg_symbol, output_id,
- SpvStorageClassOutput, builtin->component_type, write_mask);
- reg_symbol.info.reg.is_aggregate = builtin->spirv_array_size;
- spirv_compiler_put_symbol(compiler, &reg_symbol);
- spirv_compiler_emit_register_execution_mode(compiler, reg->type);
- spirv_compiler_emit_register_debug_name(builder, output_id, reg);
-}
-
static uint32_t spirv_compiler_emit_shader_phase_builtin_variable(struct spirv_compiler *compiler,
const struct vkd3d_spirv_builtin *builtin, const unsigned int *array_sizes, unsigned int size_count)
{
@@ -5857,16 +6112,6 @@ static void spirv_compiler_emit_shader_epilogue_function(struct spirv_compiler *
compiler->epilogue_function_id = 0;
}
-static void spirv_compiler_emit_hull_shader_builtins(struct spirv_compiler *compiler)
-{
- struct vkd3d_shader_dst_param dst;
-
- memset(&dst, 0, sizeof(dst));
- vsir_register_init(&dst.reg, VKD3DSPR_OUTPOINTID, VKD3D_DATA_FLOAT, 0);
- dst.write_mask = VKD3DSP_WRITEMASK_0;
- spirv_compiler_emit_input_register(compiler, &dst);
-}
-
static void spirv_compiler_emit_initial_declarations(struct spirv_compiler *compiler)
{
const struct vkd3d_shader_transform_feedback_info *xfb_info = compiler->xfb_info;
@@ -5879,7 +6124,6 @@ static void spirv_compiler_emit_initial_declarations(struct spirv_compiler *comp
break;
case VKD3D_SHADER_TYPE_HULL:
vkd3d_spirv_set_execution_model(builder, SpvExecutionModelTessellationControl);
- spirv_compiler_emit_hull_shader_builtins(compiler);
break;
case VKD3D_SHADER_TYPE_DOMAIN:
vkd3d_spirv_set_execution_model(builder, SpvExecutionModelTessellationEvaluation);
@@ -6699,27 +6943,6 @@ static void spirv_compiler_emit_dcl_tgsm_structured(struct spirv_compiler *compi
tgsm_structured->structure_count * stride, stride, tgsm_structured->zero_init);
}
-static void spirv_compiler_emit_dcl_input(struct spirv_compiler *compiler,
- const struct vkd3d_shader_instruction *instruction)
-{
- const struct vkd3d_shader_dst_param *dst = &instruction->declaration.dst;
-
- /* INPUT and PATCHCONST are handled in spirv_compiler_emit_io_declarations().
- * OUTPOINTID is handled in spirv_compiler_emit_hull_shader_builtins(). */
- if (dst->reg.type != VKD3DSPR_INPUT && dst->reg.type != VKD3DSPR_PATCHCONST
- && dst->reg.type != VKD3DSPR_OUTPOINTID)
- spirv_compiler_emit_input_register(compiler, dst);
-}
-
-static void spirv_compiler_emit_dcl_output(struct spirv_compiler *compiler,
- const struct vkd3d_shader_instruction *instruction)
-{
- const struct vkd3d_shader_dst_param *dst = &instruction->declaration.dst;
-
- if (dst->reg.type != VKD3DSPR_OUTPUT && dst->reg.type != VKD3DSPR_PATCHCONST)
- spirv_compiler_emit_output_register(compiler, dst);
-}
-
static void spirv_compiler_emit_dcl_stream(struct spirv_compiler *compiler,
const struct vkd3d_shader_instruction *instruction)
{
@@ -6822,15 +7045,11 @@ static void spirv_compiler_emit_dcl_gs_instances(struct spirv_compiler *compiler
compiler->spirv_builder.invocation_count = instruction->declaration.count;
}
-static void spirv_compiler_emit_dcl_tessellator_domain(struct spirv_compiler *compiler,
- const struct vkd3d_shader_instruction *instruction)
+static void spirv_compiler_emit_tessellator_domain(struct spirv_compiler *compiler,
+ enum vkd3d_tessellator_domain domain)
{
- enum vkd3d_tessellator_domain domain = instruction->declaration.tessellator_domain;
SpvExecutionMode mode;
- if (compiler->shader_type == VKD3D_SHADER_TYPE_HULL && spirv_compiler_is_opengl_target(compiler))
- return;
-
switch (domain)
{
case VKD3D_TESSELLATOR_DOMAIN_LINE:
@@ -6916,15 +7135,10 @@ static void spirv_compiler_emit_thread_group_size(struct spirv_compiler *compile
SpvExecutionModeLocalSize, local_size, ARRAY_SIZE(local_size));
}
-static void spirv_compiler_emit_default_control_point_phase(struct spirv_compiler *compiler);
-
static void spirv_compiler_leave_shader_phase(struct spirv_compiler *compiler)
{
struct vkd3d_spirv_builder *builder = &compiler->spirv_builder;
- if (is_in_control_point_phase(compiler) && compiler->emit_default_control_point_phase)
- spirv_compiler_emit_default_control_point_phase(compiler);
-
vkd3d_spirv_build_op_function_end(builder);
if (is_in_control_point_phase(compiler))
@@ -6969,9 +7183,6 @@ static void spirv_compiler_enter_shader_phase(struct spirv_compiler *compiler,
phase->function_id = function_id;
/* The insertion location must be set after the label is emitted. */
phase->function_location = 0;
-
- if (instruction->opcode == VKD3DSIH_HS_CONTROL_POINT_PHASE)
- compiler->emit_default_control_point_phase = instruction->flags;
}
static void spirv_compiler_initialise_block(struct spirv_compiler *compiler)
@@ -7000,63 +7211,6 @@ static void spirv_compiler_initialise_block(struct spirv_compiler *compiler)
}
}
-static void spirv_compiler_emit_default_control_point_phase(struct spirv_compiler *compiler)
-{
- const struct shader_signature *output_signature = &compiler->output_signature;
- const struct shader_signature *input_signature = &compiler->input_signature;
- uint32_t type_id, output_ptr_type_id, input_id, dst_id, invocation_id;
- struct vkd3d_spirv_builder *builder = &compiler->spirv_builder;
- enum vkd3d_shader_component_type component_type;
- struct vkd3d_shader_src_param invocation;
- struct vkd3d_shader_register input_reg;
- unsigned int component_count;
- unsigned int i;
-
- vkd3d_spirv_build_op_label(builder, vkd3d_spirv_alloc_id(builder));
- spirv_compiler_initialise_block(compiler);
- invocation_id = spirv_compiler_emit_load_invocation_id(compiler);
-
- memset(&invocation, 0, sizeof(invocation));
- vsir_register_init(&invocation.reg, VKD3DSPR_OUTPOINTID, VKD3D_DATA_INT, 0);
- invocation.swizzle = VKD3D_SHADER_NO_SWIZZLE;
-
- vsir_register_init(&input_reg, VKD3DSPR_INPUT, VKD3D_DATA_FLOAT, 2);
- input_reg.idx[0].offset = 0;
- input_reg.idx[0].rel_addr = &invocation;
- input_reg.idx[1].offset = 0;
- input_id = spirv_compiler_get_register_id(compiler, &input_reg);
-
- VKD3D_ASSERT(input_signature->element_count == output_signature->element_count);
- for (i = 0; i < output_signature->element_count; ++i)
- {
- const struct signature_element *output = &output_signature->elements[i];
- const struct signature_element *input = &input_signature->elements[i];
- struct vkd3d_shader_register_info output_reg_info;
- struct vkd3d_shader_register output_reg;
-
- VKD3D_ASSERT(input->mask == output->mask);
- VKD3D_ASSERT(input->component_type == output->component_type);
-
- input_reg.idx[1].offset = i;
- input_id = spirv_compiler_get_register_id(compiler, &input_reg);
-
- vsir_register_init(&output_reg, VKD3DSPR_OUTPUT, VKD3D_DATA_FLOAT, 1);
- output_reg.idx[0].offset = i;
- spirv_compiler_get_register_info(compiler, &output_reg, &output_reg_info);
-
- component_type = output->component_type;
- component_count = vsir_write_mask_component_count(output->mask);
- type_id = vkd3d_spirv_get_type_id(builder, component_type, component_count);
- output_ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, SpvStorageClassOutput, type_id);
-
- dst_id = vkd3d_spirv_build_op_access_chain1(builder, output_ptr_type_id, output_reg_info.id, invocation_id);
-
- vkd3d_spirv_build_op_copy_memory(builder, dst_id, input_id, SpvMemoryAccessMaskNone);
- }
-
- vkd3d_spirv_build_op_return(builder);
-}
-
static void spirv_compiler_emit_barrier(struct spirv_compiler *compiler,
SpvScope execution_scope, SpvScope memory_scope, SpvMemorySemanticsMask semantics)
{
@@ -7105,12 +7259,13 @@ static void spirv_compiler_emit_shader_epilogue_invocation(struct spirv_compiler
static void spirv_compiler_emit_hull_shader_main(struct spirv_compiler *compiler)
{
+ size_t table_count = compiler->offset_info.descriptor_table_count;
struct vkd3d_spirv_builder *builder = &compiler->spirv_builder;
uint32_t void_id;
/* If a patch constant function used descriptor indexing the offsets must be reloaded. */
- memset(compiler->descriptor_offset_ids, 0, compiler->offset_info.descriptor_table_count
- * sizeof(*compiler->descriptor_offset_ids));
+ if (table_count)
+ memset(compiler->descriptor_offset_ids, 0, table_count * sizeof(*compiler->descriptor_offset_ids));
vkd3d_spirv_builder_begin_main_function(builder);
vkd3d_spirv_build_op_label(builder, vkd3d_spirv_alloc_id(builder));
@@ -7147,7 +7302,6 @@ static SpvOp spirv_compiler_map_alu_instruction(const struct vkd3d_shader_instru
{VKD3DSIH_DDIV, SpvOpFDiv},
{VKD3DSIH_DIV, SpvOpFDiv},
{VKD3DSIH_DMUL, SpvOpFMul},
- {VKD3DSIH_DTOF, SpvOpFConvert},
{VKD3DSIH_DTOI, SpvOpConvertFToS},
{VKD3DSIH_DTOU, SpvOpConvertFToU},
{VKD3DSIH_FREM, SpvOpFRem},
@@ -7501,7 +7655,7 @@ static void spirv_compiler_emit_mov(struct spirv_compiler *compiler,
general_implementation:
write_mask = dst->write_mask;
- if (src->reg.type == VKD3DSPR_IMMCONST64 && !data_type_is_64_bit(dst->reg.data_type))
+ if (data_type_is_64_bit(src->reg.data_type) && !data_type_is_64_bit(dst->reg.data_type))
write_mask = vsir_write_mask_64_from_32(write_mask);
else if (!data_type_is_64_bit(src->reg.data_type) && data_type_is_64_bit(dst->reg.data_type))
write_mask = vsir_write_mask_32_from_64(write_mask);
@@ -7785,6 +7939,7 @@ static void spirv_compiler_emit_ftoi(struct spirv_compiler *compiler,
uint32_t src_type_id, dst_type_id, condition_type_id;
enum vkd3d_shader_component_type component_type;
unsigned int component_count;
+ uint32_t write_mask;
VKD3D_ASSERT(instruction->dst_count == 1);
VKD3D_ASSERT(instruction->src_count == 1);
@@ -7794,21 +7949,23 @@ static void spirv_compiler_emit_ftoi(struct spirv_compiler *compiler,
* and for NaN to yield zero. */
component_count = vsir_write_mask_component_count(dst->write_mask);
- src_type_id = spirv_compiler_get_type_id_for_reg(compiler, &src->reg, dst->write_mask);
- dst_type_id = spirv_compiler_get_type_id_for_dst(compiler, dst);
- src_id = spirv_compiler_emit_load_src(compiler, src, dst->write_mask);
if (src->reg.data_type == VKD3D_DATA_DOUBLE)
{
+ write_mask = vkd3d_write_mask_from_component_count(component_count);
int_min_id = spirv_compiler_get_constant_double_vector(compiler, -2147483648.0, component_count);
float_max_id = spirv_compiler_get_constant_double_vector(compiler, 2147483648.0, component_count);
}
else
{
+ write_mask = dst->write_mask;
int_min_id = spirv_compiler_get_constant_float_vector(compiler, -2147483648.0f, component_count);
float_max_id = spirv_compiler_get_constant_float_vector(compiler, 2147483648.0f, component_count);
}
+ src_type_id = spirv_compiler_get_type_id_for_reg(compiler, &src->reg, write_mask);
+ dst_type_id = spirv_compiler_get_type_id_for_dst(compiler, dst);
+ src_id = spirv_compiler_emit_load_src(compiler, src, write_mask);
val_id = vkd3d_spirv_build_op_glsl_std450_max(builder, src_type_id, src_id, int_min_id);
/* VSIR allows the destination of a signed conversion to be unsigned. */
@@ -7838,6 +7995,7 @@ static void spirv_compiler_emit_ftou(struct spirv_compiler *compiler,
const struct vkd3d_shader_src_param *src = instruction->src;
uint32_t src_type_id, dst_type_id, condition_type_id;
unsigned int component_count;
+ uint32_t write_mask;
VKD3D_ASSERT(instruction->dst_count == 1);
VKD3D_ASSERT(instruction->src_count == 1);
@@ -7847,21 +8005,23 @@ static void spirv_compiler_emit_ftou(struct spirv_compiler *compiler,
* and for NaN to yield zero. */
component_count = vsir_write_mask_component_count(dst->write_mask);
- src_type_id = spirv_compiler_get_type_id_for_reg(compiler, &src->reg, dst->write_mask);
- dst_type_id = spirv_compiler_get_type_id_for_dst(compiler, dst);
- src_id = spirv_compiler_emit_load_src(compiler, src, dst->write_mask);
if (src->reg.data_type == VKD3D_DATA_DOUBLE)
{
+ write_mask = vkd3d_write_mask_from_component_count(component_count);
zero_id = spirv_compiler_get_constant_double_vector(compiler, 0.0, component_count);
float_max_id = spirv_compiler_get_constant_double_vector(compiler, 4294967296.0, component_count);
}
else
{
+ write_mask = dst->write_mask;
zero_id = spirv_compiler_get_constant_float_vector(compiler, 0.0f, component_count);
float_max_id = spirv_compiler_get_constant_float_vector(compiler, 4294967296.0f, component_count);
}
+ src_type_id = spirv_compiler_get_type_id_for_reg(compiler, &src->reg, write_mask);
+ dst_type_id = spirv_compiler_get_type_id_for_dst(compiler, dst);
+ src_id = spirv_compiler_emit_load_src(compiler, src, write_mask);
val_id = vkd3d_spirv_build_op_glsl_std450_max(builder, src_type_id, src_id, zero_id);
uint_max_id = spirv_compiler_get_constant_uint_vector(compiler, UINT_MAX, component_count);
@@ -7875,6 +8035,29 @@ static void spirv_compiler_emit_ftou(struct spirv_compiler *compiler,
spirv_compiler_emit_store_dst(compiler, dst, val_id);
}
+static void spirv_compiler_emit_dtof(struct spirv_compiler *compiler,
+ const struct vkd3d_shader_instruction *instruction)
+{
+ struct vkd3d_spirv_builder *builder = &compiler->spirv_builder;
+ const struct vkd3d_shader_dst_param *dst = instruction->dst;
+ const struct vkd3d_shader_src_param *src = instruction->src;
+ uint32_t type_id, val_id, src_id;
+ unsigned int component_count;
+ uint32_t write_mask;
+
+ component_count = vsir_write_mask_component_count(dst->write_mask);
+ write_mask = vkd3d_write_mask_from_component_count(component_count);
+
+ src_id = spirv_compiler_emit_load_src(compiler, src, write_mask);
+
+ type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_FLOAT, component_count);
+ val_id = vkd3d_spirv_build_op_tr1(builder, &builder->function_stream, SpvOpFConvert, type_id, src_id);
+ if (instruction->flags & VKD3DSI_PRECISE_XYZW)
+ vkd3d_spirv_build_op_decorate(builder, val_id, SpvDecorationNoContraction, NULL, 0);
+
+ spirv_compiler_emit_store_dst(compiler, dst, val_id);
+}
+
static void spirv_compiler_emit_bitfield_instruction(struct spirv_compiler *compiler,
const struct vkd3d_shader_instruction *instruction)
{
@@ -10214,13 +10397,6 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler,
case VKD3DSIH_DCL_TGSM_STRUCTURED:
spirv_compiler_emit_dcl_tgsm_structured(compiler, instruction);
break;
- case VKD3DSIH_DCL_INPUT_PS:
- case VKD3DSIH_DCL_INPUT:
- spirv_compiler_emit_dcl_input(compiler, instruction);
- break;
- case VKD3DSIH_DCL_OUTPUT:
- spirv_compiler_emit_dcl_output(compiler, instruction);
- break;
case VKD3DSIH_DCL_STREAM:
spirv_compiler_emit_dcl_stream(compiler, instruction);
break;
@@ -10239,9 +10415,6 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler,
case VKD3DSIH_DCL_OUTPUT_CONTROL_POINT_COUNT:
spirv_compiler_emit_output_vertex_count(compiler, instruction);
break;
- case VKD3DSIH_DCL_TESSELLATOR_DOMAIN:
- spirv_compiler_emit_dcl_tessellator_domain(compiler, instruction);
- break;
case VKD3DSIH_DCL_TESSELLATOR_OUTPUT_PRIMITIVE:
spirv_compiler_emit_tessellator_output_primitive(compiler,
instruction->declaration.tessellator_output_primitive);
@@ -10275,7 +10448,6 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler,
case VKD3DSIH_DDIV:
case VKD3DSIH_DIV:
case VKD3DSIH_DMUL:
- case VKD3DSIH_DTOF:
case VKD3DSIH_FREM:
case VKD3DSIH_FTOD:
case VKD3DSIH_IADD:
@@ -10363,6 +10535,9 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler,
case VKD3DSIH_FTOU:
spirv_compiler_emit_ftou(compiler, instruction);
break;
+ case VKD3DSIH_DTOF:
+ spirv_compiler_emit_dtof(compiler, instruction);
+ break;
case VKD3DSIH_DEQO:
case VKD3DSIH_DGEO:
case VKD3DSIH_DLT:
@@ -10561,11 +10736,6 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler,
break;
case VKD3DSIH_DCL_HS_MAX_TESSFACTOR:
case VKD3DSIH_DCL_INPUT_CONTROL_POINT_COUNT:
- case VKD3DSIH_DCL_INPUT_SGV:
- case VKD3DSIH_DCL_INPUT_SIV:
- case VKD3DSIH_DCL_INPUT_PS_SGV:
- case VKD3DSIH_DCL_INPUT_PS_SIV:
- case VKD3DSIH_DCL_OUTPUT_SIV:
case VKD3DSIH_DCL_RESOURCE_RAW:
case VKD3DSIH_DCL_RESOURCE_STRUCTURED:
case VKD3DSIH_DCL_UAV_RAW:
@@ -10586,6 +10756,8 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler,
static void spirv_compiler_emit_io_declarations(struct spirv_compiler *compiler)
{
+ struct vkd3d_shader_dst_param dst;
+
for (unsigned int i = 0; i < compiler->input_signature.element_count; ++i)
spirv_compiler_emit_input(compiler, VKD3DSPR_INPUT, i);
@@ -10609,19 +10781,27 @@ static void spirv_compiler_emit_io_declarations(struct spirv_compiler *compiler)
if (compiler->program->has_point_size)
{
- struct vkd3d_shader_dst_param dst;
-
vsir_dst_param_init(&dst, VKD3DSPR_RASTOUT, VKD3D_DATA_FLOAT, 1);
dst.reg.idx[0].offset = VSIR_RASTOUT_POINT_SIZE;
- spirv_compiler_emit_output_register(compiler, &dst);
+ spirv_compiler_emit_io_register(compiler, &dst);
}
if (compiler->program->has_point_coord)
{
- struct vkd3d_shader_dst_param dst;
-
vsir_dst_param_init(&dst, VKD3DSPR_POINT_COORD, VKD3D_DATA_FLOAT, 0);
- spirv_compiler_emit_input_register(compiler, &dst);
+ spirv_compiler_emit_io_register(compiler, &dst);
+ }
+
+ for (unsigned int i = 0; i < sizeof(compiler->program->io_dcls) * CHAR_BIT; ++i)
+ {
+ /* For hull shaders we internally generate references to OUTPOINTID,
+ * so that must always be enabled. */
+ if (bitmap_is_set(compiler->program->io_dcls, i)
+ || (compiler->program->shader_version.type == VKD3D_SHADER_TYPE_HULL && i == VKD3DSPR_OUTPOINTID))
+ {
+ vsir_dst_param_init(&dst, i, VKD3D_DATA_FLOAT, 0);
+ spirv_compiler_emit_io_register(compiler, &dst);
+ }
}
}
@@ -10677,7 +10857,7 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, struct
compile_info, compiler->message_context)) < 0)
return result;
- VKD3D_ASSERT(program->normalisation_level == VSIR_FULLY_NORMALISED_IO);
+ VKD3D_ASSERT(program->normalisation_level == VSIR_NORMALISED_SM6);
max_element_count = max(program->output_signature.element_count, program->patch_constant_signature.element_count);
if (!(compiler->output_info = vkd3d_calloc(max_element_count, sizeof(*compiler->output_info))))
@@ -10743,6 +10923,10 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, struct
compiler->input_control_point_count = program->input_control_point_count;
compiler->output_control_point_count = program->output_control_point_count;
+ if (program->shader_version.type == VKD3D_SHADER_TYPE_DOMAIN
+ || (program->shader_version.type == VKD3D_SHADER_TYPE_HULL && !spirv_compiler_is_opengl_target(compiler)))
+ spirv_compiler_emit_tessellator_domain(compiler, program->tess_domain);
+
if (compiler->shader_type != VKD3D_SHADER_TYPE_HULL)
spirv_compiler_emit_shader_signature_outputs(compiler);
@@ -10823,7 +11007,8 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, struct
if (compile_info->target_type == VKD3D_SHADER_TARGET_SPIRV_TEXT)
{
struct vkd3d_shader_code text;
- if (vkd3d_spirv_binary_to_text(spirv, environment, compiler->formatting, &text) != VKD3D_OK)
+ if (vkd3d_spirv_binary_to_text(spirv, compile_info->source_name, environment,
+ compiler->formatting, &text, compiler->message_context) != VKD3D_OK)
return VKD3D_ERROR;
vkd3d_shader_free_shader_code(spirv);
*spirv = text;
diff --git a/libs/vkd3d/libs/vkd3d-shader/tpf.c b/libs/vkd3d/libs/vkd3d-shader/tpf.c
index 9c41e2c2053..1ecfe32de45 100644
--- a/libs/vkd3d/libs/vkd3d-shader/tpf.c
+++ b/libs/vkd3d/libs/vkd3d-shader/tpf.c
@@ -21,9 +21,7 @@
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
*/
-#include "hlsl.h"
#include "vkd3d_shader_private.h"
-#include "d3dcommon.h"
#define SM4_MAX_SRC_COUNT 6
#define SM4_MAX_DST_COUNT 2
@@ -163,24 +161,6 @@ STATIC_ASSERT(SM4_MAX_SRC_COUNT <= SPIRV_MAX_SRC_COUNT);
#define VKD3D_SM4_TYPE_COMPONENT(com, i) (((com) >> (4 * (i))) & 0xfu)
-/* The shift that corresponds to the D3D_SIF_TEXTURE_COMPONENTS mask. */
-#define VKD3D_SM4_SIF_TEXTURE_COMPONENTS_SHIFT 2
-
-#define VKD3D_SM4_REQUIRES_DOUBLES 0x00000001
-#define VKD3D_SM4_REQUIRES_EARLY_DEPTH_STENCIL 0x00000002
-#define VKD3D_SM4_REQUIRES_UAVS_AT_EVERY_STAGE 0x00000004
-#define VKD3D_SM4_REQUIRES_64_UAVS 0x00000008
-#define VKD3D_SM4_REQUIRES_MINIMUM_PRECISION 0x00000010
-#define VKD3D_SM4_REQUIRES_11_1_DOUBLE_EXTENSIONS 0x00000020
-#define VKD3D_SM4_REQUIRES_11_1_SHADER_EXTENSIONS 0x00000040
-#define VKD3D_SM4_REQUIRES_LEVEL_9_COMPARISON_FILTERING 0x00000080
-#define VKD3D_SM4_REQUIRES_TILED_RESOURCES 0x00000100
-#define VKD3D_SM4_REQUIRES_STENCIL_REF 0x00000200
-#define VKD3D_SM4_REQUIRES_INNER_COVERAGE 0x00000400
-#define VKD3D_SM4_REQUIRES_TYPED_UAV_LOAD_ADDITIONAL_FORMATS 0x00000800
-#define VKD3D_SM4_REQUIRES_ROVS 0x00001000
-#define VKD3D_SM4_REQUIRES_VIEWPORT_AND_RT_ARRAY_INDEX_FROM_ANY_SHADER_FEEDING_RASTERIZER 0x00002000
-
enum vkd3d_sm4_opcode
{
VKD3D_SM4_OP_ADD = 0x00,
@@ -707,6 +687,7 @@ struct vkd3d_sm4_opcode_info
char src_info[SM4_MAX_SRC_COUNT];
void (*read_opcode_func)(struct vkd3d_shader_instruction *ins, uint32_t opcode, uint32_t opcode_token,
const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv);
+ bool is_conditional_op;
};
static const enum vkd3d_primitive_type output_primitive_type_table[] =
@@ -1268,6 +1249,7 @@ static void shader_sm5_read_dcl_tessellator_domain(struct vkd3d_shader_instructi
{
ins->declaration.tessellator_domain = (opcode_token & VKD3D_SM5_TESSELLATOR_MASK)
>> VKD3D_SM5_TESSELLATOR_SHIFT;
+ priv->p.program->tess_domain = ins->declaration.tessellator_domain;
}
static void shader_sm5_read_dcl_tessellator_partitioning(struct vkd3d_shader_instruction *ins, uint32_t opcode,
@@ -1275,6 +1257,7 @@ static void shader_sm5_read_dcl_tessellator_partitioning(struct vkd3d_shader_ins
{
ins->declaration.tessellator_partitioning = (opcode_token & VKD3D_SM5_TESSELLATOR_MASK)
>> VKD3D_SM5_TESSELLATOR_SHIFT;
+ priv->p.program->tess_partitioning = ins->declaration.tessellator_partitioning;
}
static void shader_sm5_read_dcl_tessellator_output_primitive(struct vkd3d_shader_instruction *ins, uint32_t opcode,
@@ -1282,6 +1265,7 @@ static void shader_sm5_read_dcl_tessellator_output_primitive(struct vkd3d_shader
{
ins->declaration.tessellator_output_primitive = (opcode_token & VKD3D_SM5_TESSELLATOR_MASK)
>> VKD3D_SM5_TESSELLATOR_SHIFT;
+ priv->p.program->tess_output_primitive = ins->declaration.tessellator_output_primitive;
}
static void shader_sm5_read_dcl_hs_max_tessfactor(struct vkd3d_shader_instruction *ins, uint32_t opcode,
@@ -1407,8 +1391,6 @@ struct sm4_stat
struct tpf_compiler
{
- /* OBJECTIVE: We want to get rid of this HLSL IR specific field. */
- struct hlsl_ctx *ctx;
struct vsir_program *program;
struct vkd3d_sm4_lookup_tables lookup;
struct sm4_stat *stat;
@@ -1439,18 +1421,18 @@ static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup)
{VKD3D_SM4_OP_AND, VKD3DSIH_AND, "u", "uu"},
{VKD3D_SM4_OP_BREAK, VKD3DSIH_BREAK, "", ""},
{VKD3D_SM4_OP_BREAKC, VKD3DSIH_BREAKP, "", "u",
- shader_sm4_read_conditional_op},
+ shader_sm4_read_conditional_op, true},
{VKD3D_SM4_OP_CASE, VKD3DSIH_CASE, "", "u",
shader_sm4_read_case_condition},
{VKD3D_SM4_OP_CONTINUE, VKD3DSIH_CONTINUE, "", ""},
{VKD3D_SM4_OP_CONTINUEC, VKD3DSIH_CONTINUEP, "", "u",
- shader_sm4_read_conditional_op},
+ shader_sm4_read_conditional_op, true},
{VKD3D_SM4_OP_CUT, VKD3DSIH_CUT, "", ""},
{VKD3D_SM4_OP_DEFAULT, VKD3DSIH_DEFAULT, "", ""},
{VKD3D_SM4_OP_DERIV_RTX, VKD3DSIH_DSX, "f", "f"},
{VKD3D_SM4_OP_DERIV_RTY, VKD3DSIH_DSY, "f", "f"},
{VKD3D_SM4_OP_DISCARD, VKD3DSIH_DISCARD, "", "u",
- shader_sm4_read_conditional_op},
+ shader_sm4_read_conditional_op, true},
{VKD3D_SM4_OP_DIV, VKD3DSIH_DIV, "f", "ff"},
{VKD3D_SM4_OP_DP2, VKD3DSIH_DP2, "f", "ff"},
{VKD3D_SM4_OP_DP3, VKD3DSIH_DP3, "f", "ff"},
@@ -1468,7 +1450,7 @@ static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup)
{VKD3D_SM4_OP_GE, VKD3DSIH_GEO, "u", "ff"},
{VKD3D_SM4_OP_IADD, VKD3DSIH_IADD, "i", "ii"},
{VKD3D_SM4_OP_IF, VKD3DSIH_IF, "", "u",
- shader_sm4_read_conditional_op},
+ shader_sm4_read_conditional_op, true},
{VKD3D_SM4_OP_IEQ, VKD3DSIH_IEQ, "u", "ii"},
{VKD3D_SM4_OP_IGE, VKD3DSIH_IGE, "u", "ii"},
{VKD3D_SM4_OP_ILT, VKD3DSIH_ILT, "u", "ii"},
@@ -1502,7 +1484,7 @@ static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup)
{VKD3D_SM4_OP_RESINFO, VKD3DSIH_RESINFO, "f", "i*"},
{VKD3D_SM4_OP_RET, VKD3DSIH_RET, "", ""},
{VKD3D_SM4_OP_RETC, VKD3DSIH_RETP, "", "u",
- shader_sm4_read_conditional_op},
+ shader_sm4_read_conditional_op, true},
{VKD3D_SM4_OP_ROUND_NE, VKD3DSIH_ROUND_NE, "f", "f"},
{VKD3D_SM4_OP_ROUND_NI, VKD3DSIH_ROUND_NI, "f", "f"},
{VKD3D_SM4_OP_ROUND_PI, VKD3DSIH_ROUND_PI, "f", "f"},
@@ -1967,16 +1949,6 @@ static const struct vkd3d_sm4_register_type_info *get_info_from_vkd3d_register_t
return lookup->register_type_info_from_vkd3d[vkd3d_type];
}
-static enum vkd3d_sm4_swizzle_type vkd3d_sm4_get_default_swizzle_type(
- const struct vkd3d_sm4_lookup_tables *lookup, enum vkd3d_shader_register_type vkd3d_type)
-{
- const struct vkd3d_sm4_register_type_info *register_type_info =
- get_info_from_vkd3d_register_type(lookup, vkd3d_type);
-
- VKD3D_ASSERT(register_type_info);
- return register_type_info->default_src_swizzle_type;
-}
-
static enum vkd3d_sm4_stat_field get_stat_field_from_sm4_opcode(
const struct vkd3d_sm4_lookup_tables *lookup, enum vkd3d_sm4_opcode sm4_opcode)
{
@@ -2816,7 +2788,7 @@ static bool shader_sm4_init(struct vkd3d_shader_sm4_parser *sm4, struct vsir_pro
/* Estimate instruction count to avoid reallocation in most shaders. */
if (!vsir_program_init(program, compile_info,
- &version, token_count / 7u + 20, VSIR_CF_STRUCTURED, VSIR_NOT_NORMALISED))
+ &version, token_count / 7u + 20, VSIR_CF_STRUCTURED, VSIR_NORMALISED_SM4))
return false;
vkd3d_shader_parser_init(&sm4->p, program, message_context, compile_info->source_name);
sm4->ptr = sm4->start;
@@ -2925,6 +2897,7 @@ int tpf_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t con
program->input_signature = dxbc_desc.input_signature;
program->output_signature = dxbc_desc.output_signature;
program->patch_constant_signature = dxbc_desc.patch_constant_signature;
+ program->features = dxbc_desc.features;
memset(&dxbc_desc, 0, sizeof(dxbc_desc));
/* DXBC stores used masks inverted for output signatures, for some reason.
@@ -2993,8 +2966,6 @@ int tpf_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t con
return VKD3D_OK;
}
-static void write_sm4_block(struct tpf_compiler *tpf, const struct hlsl_block *block);
-
bool sm4_register_from_semantic_name(const struct vkd3d_shader_version *version,
const char *semantic_name, bool output, enum vkd3d_shader_register_type *type, bool *has_idx)
{
@@ -3217,18 +3188,17 @@ bool sm4_sysval_semantic_from_semantic_name(enum vkd3d_shader_sysval_semantic *s
return true;
}
-static void add_section(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc,
- uint32_t tag, struct vkd3d_bytecode_buffer *buffer)
+static void add_section(struct tpf_compiler *tpf, uint32_t tag, struct vkd3d_bytecode_buffer *buffer)
{
/* Native D3DDisassemble() expects at least the sizes of the ISGN and OSGN
* sections to be aligned. Without this, the sections themselves will be
* aligned, but their reported sizes won't. */
size_t size = bytecode_align(buffer);
- dxbc_writer_add_section(dxbc, tag, buffer->data, size);
+ dxbc_writer_add_section(&tpf->dxbc, tag, buffer->data, size);
if (buffer->status < 0)
- ctx->result = buffer->status;
+ tpf->result = buffer->status;
}
static int signature_element_pointer_compare(const void *x, const void *y)
@@ -3289,747 +3259,33 @@ static void tpf_write_signature(struct tpf_compiler *tpf, const struct shader_si
set_u32(&buffer, (2 + i * 6) * sizeof(uint32_t), string_offset);
}
- add_section(tpf->ctx, &tpf->dxbc, tag, &buffer);
+ add_section(tpf, tag, &buffer);
vkd3d_free(sorted_elements);
}
-static D3D_SHADER_VARIABLE_CLASS sm4_class(const struct hlsl_type *type)
-{
- switch (type->class)
- {
- case HLSL_CLASS_MATRIX:
- VKD3D_ASSERT(type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK);
- if (type->modifiers & HLSL_MODIFIER_COLUMN_MAJOR)
- return D3D_SVC_MATRIX_COLUMNS;
- else
- return D3D_SVC_MATRIX_ROWS;
- case HLSL_CLASS_SCALAR:
- return D3D_SVC_SCALAR;
- case HLSL_CLASS_VECTOR:
- return D3D_SVC_VECTOR;
-
- case HLSL_CLASS_ARRAY:
- case HLSL_CLASS_DEPTH_STENCIL_STATE:
- case HLSL_CLASS_DEPTH_STENCIL_VIEW:
- case HLSL_CLASS_EFFECT_GROUP:
- case HLSL_CLASS_ERROR:
- case HLSL_CLASS_STRUCT:
- case HLSL_CLASS_PASS:
- case HLSL_CLASS_PIXEL_SHADER:
- case HLSL_CLASS_RASTERIZER_STATE:
- case HLSL_CLASS_RENDER_TARGET_VIEW:
- case HLSL_CLASS_SAMPLER:
- case HLSL_CLASS_STRING:
- case HLSL_CLASS_TECHNIQUE:
- case HLSL_CLASS_TEXTURE:
- case HLSL_CLASS_UAV:
- case HLSL_CLASS_VERTEX_SHADER:
- case HLSL_CLASS_VOID:
- case HLSL_CLASS_CONSTANT_BUFFER:
- case HLSL_CLASS_COMPUTE_SHADER:
- case HLSL_CLASS_DOMAIN_SHADER:
- case HLSL_CLASS_HULL_SHADER:
- case HLSL_CLASS_GEOMETRY_SHADER:
- case HLSL_CLASS_BLEND_STATE:
- case HLSL_CLASS_NULL:
- break;
- }
- vkd3d_unreachable();
-}
-
-static D3D_SHADER_VARIABLE_TYPE sm4_base_type(const struct hlsl_type *type)
-{
- switch (type->e.numeric.type)
- {
- case HLSL_TYPE_BOOL:
- return D3D_SVT_BOOL;
- case HLSL_TYPE_DOUBLE:
- return D3D_SVT_DOUBLE;
- case HLSL_TYPE_FLOAT:
- case HLSL_TYPE_HALF:
- return D3D_SVT_FLOAT;
- case HLSL_TYPE_INT:
- return D3D_SVT_INT;
- case HLSL_TYPE_UINT:
- return D3D_SVT_UINT;
- default:
- vkd3d_unreachable();
- }
-}
-
-static void write_sm4_type(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, struct hlsl_type *type)
-{
- const struct hlsl_type *array_type = hlsl_get_multiarray_element_type(type);
- const char *name = array_type->name ? array_type->name : "<unnamed>";
- const struct hlsl_profile_info *profile = ctx->profile;
- unsigned int array_size = 0;
- size_t name_offset = 0;
- size_t i;
-
- if (type->bytecode_offset)
- return;
-
- if (profile->major_version >= 5)
- name_offset = put_string(buffer, name);
-
- if (type->class == HLSL_CLASS_ARRAY)
- array_size = hlsl_get_multiarray_size(type);
-
- if (array_type->class == HLSL_CLASS_STRUCT)
- {
- unsigned int field_count = 0;
- size_t fields_offset = 0;
-
- for (i = 0; i < array_type->e.record.field_count; ++i)
- {
- struct hlsl_struct_field *field = &array_type->e.record.fields[i];
-
- if (!field->type->reg_size[HLSL_REGSET_NUMERIC])
- continue;
-
- field->name_bytecode_offset = put_string(buffer, field->name);
- write_sm4_type(ctx, buffer, field->type);
- ++field_count;
- }
-
- fields_offset = bytecode_align(buffer);
-
- for (i = 0; i < array_type->e.record.field_count; ++i)
- {
- struct hlsl_struct_field *field = &array_type->e.record.fields[i];
-
- if (!field->type->reg_size[HLSL_REGSET_NUMERIC])
- continue;
-
- put_u32(buffer, field->name_bytecode_offset);
- put_u32(buffer, field->type->bytecode_offset);
- put_u32(buffer, field->reg_offset[HLSL_REGSET_NUMERIC] * sizeof(float));
- }
- type->bytecode_offset = put_u32(buffer, vkd3d_make_u32(D3D_SVC_STRUCT, D3D_SVT_VOID));
- put_u32(buffer, vkd3d_make_u32(1, hlsl_type_component_count(array_type)));
- put_u32(buffer, vkd3d_make_u32(array_size, field_count));
- put_u32(buffer, fields_offset);
- }
- else
- {
- VKD3D_ASSERT(array_type->class <= HLSL_CLASS_LAST_NUMERIC);
- type->bytecode_offset = put_u32(buffer, vkd3d_make_u32(sm4_class(array_type), sm4_base_type(array_type)));
- put_u32(buffer, vkd3d_make_u32(array_type->dimy, array_type->dimx));
- put_u32(buffer, vkd3d_make_u32(array_size, 0));
- put_u32(buffer, 1);
- }
-
- if (profile->major_version >= 5)
- {
- put_u32(buffer, 0); /* FIXME: unknown */
- put_u32(buffer, 0); /* FIXME: unknown */
- put_u32(buffer, 0); /* FIXME: unknown */
- put_u32(buffer, 0); /* FIXME: unknown */
- put_u32(buffer, name_offset);
- }
-}
-
-static D3D_SHADER_INPUT_TYPE sm4_resource_type(const struct hlsl_type *type)
-{
- switch (type->class)
- {
- case HLSL_CLASS_SAMPLER:
- return D3D_SIT_SAMPLER;
- case HLSL_CLASS_TEXTURE:
- return D3D_SIT_TEXTURE;
- case HLSL_CLASS_UAV:
- return D3D_SIT_UAV_RWTYPED;
- default:
- break;
- }
-
- vkd3d_unreachable();
-}
-
-static enum vkd3d_sm4_data_type sm4_data_type(const struct hlsl_type *type)
-{
- const struct hlsl_type *format = type->e.resource.format;
-
- switch (format->e.numeric.type)
- {
- case HLSL_TYPE_DOUBLE:
- return VKD3D_SM4_DATA_DOUBLE;
-
- case HLSL_TYPE_FLOAT:
- case HLSL_TYPE_HALF:
- if (format->modifiers & HLSL_MODIFIER_UNORM)
- return VKD3D_SM4_DATA_UNORM;
- if (format->modifiers & HLSL_MODIFIER_SNORM)
- return VKD3D_SM4_DATA_SNORM;
- return VKD3D_SM4_DATA_FLOAT;
-
- case HLSL_TYPE_INT:
- return VKD3D_SM4_DATA_INT;
- break;
-
- case HLSL_TYPE_BOOL:
- case HLSL_TYPE_UINT:
- return VKD3D_SM4_DATA_UINT;
-
- default:
- vkd3d_unreachable();
- }
-}
-
-static D3D_SRV_DIMENSION sm4_rdef_resource_dimension(const struct hlsl_type *type)
-{
- switch (type->sampler_dim)
- {
- case HLSL_SAMPLER_DIM_1D:
- return D3D_SRV_DIMENSION_TEXTURE1D;
- case HLSL_SAMPLER_DIM_2D:
- return D3D_SRV_DIMENSION_TEXTURE2D;
- case HLSL_SAMPLER_DIM_3D:
- return D3D_SRV_DIMENSION_TEXTURE3D;
- case HLSL_SAMPLER_DIM_CUBE:
- return D3D_SRV_DIMENSION_TEXTURECUBE;
- case HLSL_SAMPLER_DIM_1DARRAY:
- return D3D_SRV_DIMENSION_TEXTURE1DARRAY;
- case HLSL_SAMPLER_DIM_2DARRAY:
- return D3D_SRV_DIMENSION_TEXTURE2DARRAY;
- case HLSL_SAMPLER_DIM_2DMS:
- return D3D_SRV_DIMENSION_TEXTURE2DMS;
- case HLSL_SAMPLER_DIM_2DMSARRAY:
- return D3D_SRV_DIMENSION_TEXTURE2DMSARRAY;
- case HLSL_SAMPLER_DIM_CUBEARRAY:
- return D3D_SRV_DIMENSION_TEXTURECUBEARRAY;
- case HLSL_SAMPLER_DIM_BUFFER:
- case HLSL_SAMPLER_DIM_RAW_BUFFER:
- case HLSL_SAMPLER_DIM_STRUCTURED_BUFFER:
- return D3D_SRV_DIMENSION_BUFFER;
- default:
- vkd3d_unreachable();
- }
-}
-
-struct extern_resource
-{
- /* var is only not NULL if this resource is a whole variable, so it may be responsible for more
- * than one component. */
- const struct hlsl_ir_var *var;
- const struct hlsl_buffer *buffer;
-
- char *name;
- bool is_user_packed;
-
- /* The data type of a single component of the resource.
- * This might be different from the data type of the resource itself in 4.0
- * profiles, where an array (or multi-dimensional array) is handled as a
- * single resource, unlike in 5.0. */
- struct hlsl_type *component_type;
-
- enum hlsl_regset regset;
- unsigned int id, space, index, bind_count;
-
- struct vkd3d_shader_location loc;
-};
-
-static int sm4_compare_extern_resources(const void *a, const void *b)
-{
- const struct extern_resource *aa = (const struct extern_resource *)a;
- const struct extern_resource *bb = (const struct extern_resource *)b;
- int r;
-
- if ((r = vkd3d_u32_compare(aa->regset, bb->regset)))
- return r;
-
- if ((r = vkd3d_u32_compare(aa->space, bb->space)))
- return r;
-
- return vkd3d_u32_compare(aa->index, bb->index);
-}
-
-static void sm4_free_extern_resources(struct extern_resource *extern_resources, unsigned int count)
-{
- unsigned int i;
-
- for (i = 0; i < count; ++i)
- vkd3d_free(extern_resources[i].name);
- vkd3d_free(extern_resources);
-}
-
-static const char *string_skip_tag(const char *string)
-{
- if (!strncmp(string, "<resource>", strlen("<resource>")))
- return string + strlen("<resource>");
- return string;
-}
-
-static struct extern_resource *sm4_get_extern_resources(struct hlsl_ctx *ctx, unsigned int *count)
-{
- bool separate_components = ctx->profile->major_version == 5 && ctx->profile->minor_version == 0;
- struct extern_resource *extern_resources = NULL;
- const struct hlsl_ir_var *var;
- struct hlsl_buffer *buffer;
- enum hlsl_regset regset;
- size_t capacity = 0;
- char *name;
-
- *count = 0;
-
- LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry)
- {
- if (separate_components)
- {
- unsigned int component_count = hlsl_type_component_count(var->data_type);
- unsigned int k, regset_offset;
-
- for (k = 0; k < component_count; ++k)
- {
- struct hlsl_type *component_type = hlsl_type_get_component_type(ctx, var->data_type, k);
- struct vkd3d_string_buffer *name_buffer;
-
- if (!hlsl_type_is_resource(component_type))
- continue;
-
- regset_offset = hlsl_type_get_component_offset(ctx, var->data_type, k, &regset);
-
- if (regset_offset > var->regs[regset].allocation_size)
- continue;
-
- if (var->objects_usage[regset][regset_offset].used)
- {
- if (!(hlsl_array_reserve(ctx, (void **)&extern_resources, &capacity, *count + 1,
- sizeof(*extern_resources))))
- {
- sm4_free_extern_resources(extern_resources, *count);
- *count = 0;
- return NULL;
- }
-
- if (!(name_buffer = hlsl_component_to_string(ctx, var, k)))
- {
- sm4_free_extern_resources(extern_resources, *count);
- *count = 0;
- return NULL;
- }
- if (!(name = hlsl_strdup(ctx, string_skip_tag(name_buffer->buffer))))
- {
- sm4_free_extern_resources(extern_resources, *count);
- *count = 0;
- hlsl_release_string_buffer(ctx, name_buffer);
- return NULL;
- }
- hlsl_release_string_buffer(ctx, name_buffer);
-
- extern_resources[*count].var = NULL;
- extern_resources[*count].buffer = NULL;
-
- extern_resources[*count].name = name;
- extern_resources[*count].is_user_packed = !!var->reg_reservation.reg_type;
-
- extern_resources[*count].component_type = component_type;
-
- extern_resources[*count].regset = regset;
- extern_resources[*count].id = var->regs[regset].id;
- extern_resources[*count].space = var->regs[regset].space;
- extern_resources[*count].index = var->regs[regset].index + regset_offset;
- extern_resources[*count].bind_count = 1;
- extern_resources[*count].loc = var->loc;
-
- ++*count;
- }
- }
- }
- else
- {
- unsigned int r;
-
- if (!hlsl_type_is_resource(var->data_type))
- continue;
-
- for (r = 0; r <= HLSL_REGSET_LAST; ++r)
- {
- if (!var->regs[r].allocated)
- continue;
-
- if (!(hlsl_array_reserve(ctx, (void **)&extern_resources, &capacity, *count + 1,
- sizeof(*extern_resources))))
- {
- sm4_free_extern_resources(extern_resources, *count);
- *count = 0;
- return NULL;
- }
-
- if (!(name = hlsl_strdup(ctx, string_skip_tag(var->name))))
- {
- sm4_free_extern_resources(extern_resources, *count);
- *count = 0;
- return NULL;
- }
-
- extern_resources[*count].var = var;
- extern_resources[*count].buffer = NULL;
-
- extern_resources[*count].name = name;
- /* For some reason 5.1 resources aren't marked as
- * user-packed, but cbuffers still are. */
- extern_resources[*count].is_user_packed = hlsl_version_lt(ctx, 5, 1)
- && !!var->reg_reservation.reg_type;
-
- extern_resources[*count].component_type = hlsl_type_get_component_type(ctx, var->data_type, 0);
-
- extern_resources[*count].regset = r;
- extern_resources[*count].id = var->regs[r].id;
- extern_resources[*count].space = var->regs[r].space;
- extern_resources[*count].index = var->regs[r].index;
- extern_resources[*count].bind_count = var->bind_count[r];
- extern_resources[*count].loc = var->loc;
-
- ++*count;
- }
- }
- }
-
- LIST_FOR_EACH_ENTRY(buffer, &ctx->buffers, struct hlsl_buffer, entry)
- {
- if (!buffer->reg.allocated)
- continue;
-
- if (!(hlsl_array_reserve(ctx, (void **)&extern_resources, &capacity, *count + 1,
- sizeof(*extern_resources))))
- {
- sm4_free_extern_resources(extern_resources, *count);
- *count = 0;
- return NULL;
- }
-
- if (!(name = hlsl_strdup(ctx, buffer->name)))
- {
- sm4_free_extern_resources(extern_resources, *count);
- *count = 0;
- return NULL;
- }
-
- extern_resources[*count].var = NULL;
- extern_resources[*count].buffer = buffer;
-
- extern_resources[*count].name = name;
- extern_resources[*count].is_user_packed = !!buffer->reservation.reg_type;
-
- extern_resources[*count].component_type = NULL;
-
- extern_resources[*count].regset = HLSL_REGSET_NUMERIC;
- extern_resources[*count].id = buffer->reg.id;
- extern_resources[*count].space = buffer->reg.space;
- extern_resources[*count].index = buffer->reg.index;
- extern_resources[*count].bind_count = 1;
- extern_resources[*count].loc = buffer->loc;
-
- ++*count;
- }
-
- qsort(extern_resources, *count, sizeof(*extern_resources), sm4_compare_extern_resources);
- return extern_resources;
-}
-
-/* For some reason, for matrices, values from default value initializers end up in different
- * components than from regular initializers. Default value initializers fill the matrix in
- * vertical reading order (left-to-right top-to-bottom) instead of regular reading order
- * (top-to-bottom left-to-right), so they have to be adjusted.
- * An exception is that the order of matrix initializers for function parameters are row-major
- * (top-to-bottom left-to-right). */
-static unsigned int get_component_index_from_default_initializer_index(struct hlsl_type *type, unsigned int index)
-{
- unsigned int element_comp_count, element, x, y, i;
- unsigned int base = 0;
-
- switch (type->class)
- {
- case HLSL_CLASS_MATRIX:
- x = index / type->dimy;
- y = index % type->dimy;
- return y * type->dimx + x;
-
- case HLSL_CLASS_ARRAY:
- element_comp_count = hlsl_type_component_count(type->e.array.type);
- element = index / element_comp_count;
- base = element * element_comp_count;
- return base + get_component_index_from_default_initializer_index(type->e.array.type, index - base);
-
- case HLSL_CLASS_STRUCT:
- for (i = 0; i < type->e.record.field_count; ++i)
- {
- struct hlsl_type *field_type = type->e.record.fields[i].type;
-
- element_comp_count = hlsl_type_component_count(field_type);
- if (index - base < element_comp_count)
- return base + get_component_index_from_default_initializer_index(field_type, index - base);
- base += element_comp_count;
- }
- break;
-
- default:
- return index;
- }
- vkd3d_unreachable();
-}
-
-static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc)
-{
- uint32_t binding_desc_size = (hlsl_version_ge(ctx, 5, 1) ? 10 : 8) * sizeof(uint32_t);
- size_t cbuffers_offset, resources_offset, creator_offset, string_offset;
- unsigned int cbuffer_count = 0, extern_resources_count, i, j;
- size_t cbuffer_position, resource_position, creator_position;
- const struct hlsl_profile_info *profile = ctx->profile;
- struct vkd3d_bytecode_buffer buffer = {0};
- struct extern_resource *extern_resources;
- const struct hlsl_buffer *cbuffer;
- const struct hlsl_ir_var *var;
-
- static const uint16_t target_types[] =
- {
- 0xffff, /* PIXEL */
- 0xfffe, /* VERTEX */
- 0x4753, /* GEOMETRY */
- 0x4853, /* HULL */
- 0x4453, /* DOMAIN */
- 0x4353, /* COMPUTE */
- };
-
- extern_resources = sm4_get_extern_resources(ctx, &extern_resources_count);
-
- LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry)
- {
- if (cbuffer->reg.allocated)
- ++cbuffer_count;
- }
-
- put_u32(&buffer, cbuffer_count);
- cbuffer_position = put_u32(&buffer, 0);
- put_u32(&buffer, extern_resources_count);
- resource_position = put_u32(&buffer, 0);
- put_u32(&buffer, vkd3d_make_u32(vkd3d_make_u16(profile->minor_version, profile->major_version),
- target_types[profile->type]));
- put_u32(&buffer, 0); /* FIXME: compilation flags */
- creator_position = put_u32(&buffer, 0);
-
- if (profile->major_version >= 5)
- {
- put_u32(&buffer, hlsl_version_ge(ctx, 5, 1) ? TAG_RD11_REVERSE : TAG_RD11);
- put_u32(&buffer, 15 * sizeof(uint32_t)); /* size of RDEF header including this header */
- put_u32(&buffer, 6 * sizeof(uint32_t)); /* size of buffer desc */
- put_u32(&buffer, binding_desc_size); /* size of binding desc */
- put_u32(&buffer, 10 * sizeof(uint32_t)); /* size of variable desc */
- put_u32(&buffer, 9 * sizeof(uint32_t)); /* size of type desc */
- put_u32(&buffer, 3 * sizeof(uint32_t)); /* size of member desc */
- put_u32(&buffer, 0); /* unknown; possibly a null terminator */
- }
-
- /* Bound resources. */
-
- resources_offset = bytecode_align(&buffer);
- set_u32(&buffer, resource_position, resources_offset);
-
- for (i = 0; i < extern_resources_count; ++i)
- {
- const struct extern_resource *resource = &extern_resources[i];
- uint32_t flags = 0;
-
- if (resource->is_user_packed)
- flags |= D3D_SIF_USERPACKED;
-
- put_u32(&buffer, 0); /* name */
- if (resource->buffer)
- put_u32(&buffer, resource->buffer->type == HLSL_BUFFER_CONSTANT ? D3D_SIT_CBUFFER : D3D_SIT_TBUFFER);
- else
- put_u32(&buffer, sm4_resource_type(resource->component_type));
- if (resource->regset == HLSL_REGSET_TEXTURES || resource->regset == HLSL_REGSET_UAVS)
- {
- unsigned int dimx = resource->component_type->e.resource.format->dimx;
-
- put_u32(&buffer, sm4_data_type(resource->component_type));
- put_u32(&buffer, sm4_rdef_resource_dimension(resource->component_type));
- put_u32(&buffer, ~0u); /* FIXME: multisample count */
- flags |= (dimx - 1) << VKD3D_SM4_SIF_TEXTURE_COMPONENTS_SHIFT;
- }
- else
- {
- put_u32(&buffer, 0);
- put_u32(&buffer, 0);
- put_u32(&buffer, 0);
- }
- put_u32(&buffer, resource->index);
- put_u32(&buffer, resource->bind_count);
- put_u32(&buffer, flags);
-
- if (hlsl_version_ge(ctx, 5, 1))
- {
- put_u32(&buffer, resource->space);
- put_u32(&buffer, resource->id);
- }
- }
-
- for (i = 0; i < extern_resources_count; ++i)
- {
- const struct extern_resource *resource = &extern_resources[i];
-
- string_offset = put_string(&buffer, resource->name);
- set_u32(&buffer, resources_offset + i * binding_desc_size, string_offset);
- }
-
- /* Buffers. */
-
- cbuffers_offset = bytecode_align(&buffer);
- set_u32(&buffer, cbuffer_position, cbuffers_offset);
- LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry)
- {
- unsigned int var_count = 0;
-
- if (!cbuffer->reg.allocated)
- continue;
-
- LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry)
- {
- if (var->is_uniform && var->buffer == cbuffer && var->data_type->reg_size[HLSL_REGSET_NUMERIC])
- ++var_count;
- }
-
- put_u32(&buffer, 0); /* name */
- put_u32(&buffer, var_count);
- put_u32(&buffer, 0); /* variable offset */
- put_u32(&buffer, align(cbuffer->size, 4) * sizeof(float));
- put_u32(&buffer, 0); /* FIXME: flags */
- put_u32(&buffer, cbuffer->type == HLSL_BUFFER_CONSTANT ? D3D_CT_CBUFFER : D3D_CT_TBUFFER);
- }
-
- i = 0;
- LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry)
- {
- if (!cbuffer->reg.allocated)
- continue;
-
- string_offset = put_string(&buffer, cbuffer->name);
- set_u32(&buffer, cbuffers_offset + i++ * 6 * sizeof(uint32_t), string_offset);
- }
-
- i = 0;
- LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry)
- {
- size_t vars_start = bytecode_align(&buffer);
-
- if (!cbuffer->reg.allocated)
- continue;
-
- set_u32(&buffer, cbuffers_offset + (i++ * 6 + 2) * sizeof(uint32_t), vars_start);
-
- LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry)
- {
- if (var->is_uniform && var->buffer == cbuffer && var->data_type->reg_size[HLSL_REGSET_NUMERIC])
- {
- uint32_t flags = 0;
-
- if (var->is_read)
- flags |= D3D_SVF_USED;
-
- put_u32(&buffer, 0); /* name */
- put_u32(&buffer, var->buffer_offset * sizeof(float));
- put_u32(&buffer, var->data_type->reg_size[HLSL_REGSET_NUMERIC] * sizeof(float));
- put_u32(&buffer, flags);
- put_u32(&buffer, 0); /* type */
- put_u32(&buffer, 0); /* default value */
-
- if (profile->major_version >= 5)
- {
- put_u32(&buffer, 0); /* texture start */
- put_u32(&buffer, 0); /* texture count */
- put_u32(&buffer, 0); /* sampler start */
- put_u32(&buffer, 0); /* sampler count */
- }
- }
- }
-
- j = 0;
- LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry)
- {
- if (var->is_uniform && var->buffer == cbuffer && var->data_type->reg_size[HLSL_REGSET_NUMERIC])
- {
- const unsigned int var_size = (profile->major_version >= 5 ? 10 : 6);
- size_t var_offset = vars_start + j * var_size * sizeof(uint32_t);
-
- string_offset = put_string(&buffer, var->name);
- set_u32(&buffer, var_offset, string_offset);
- write_sm4_type(ctx, &buffer, var->data_type);
- set_u32(&buffer, var_offset + 4 * sizeof(uint32_t), var->data_type->bytecode_offset);
-
- if (var->default_values)
- {
- unsigned int reg_size = var->data_type->reg_size[HLSL_REGSET_NUMERIC];
- unsigned int comp_count = hlsl_type_component_count(var->data_type);
- unsigned int default_value_offset;
- unsigned int k;
-
- default_value_offset = bytecode_reserve_bytes(&buffer, reg_size * sizeof(uint32_t));
- set_u32(&buffer, var_offset + 5 * sizeof(uint32_t), default_value_offset);
-
- for (k = 0; k < comp_count; ++k)
- {
- struct hlsl_type *comp_type = hlsl_type_get_component_type(ctx, var->data_type, k);
- unsigned int comp_offset, comp_index;
- enum hlsl_regset regset;
-
- if (comp_type->class == HLSL_CLASS_STRING)
- {
- hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE,
- "Cannot write string default value.");
- continue;
- }
-
- comp_index = get_component_index_from_default_initializer_index(var->data_type, k);
- comp_offset = hlsl_type_get_component_offset(ctx, var->data_type, comp_index, &regset);
- if (regset == HLSL_REGSET_NUMERIC)
- {
- if (comp_type->e.numeric.type == HLSL_TYPE_DOUBLE)
- hlsl_fixme(ctx, &var->loc, "Write double default values.");
-
- set_u32(&buffer, default_value_offset + comp_offset * sizeof(uint32_t),
- var->default_values[k].number.u);
- }
- }
- }
- ++j;
- }
- }
- }
-
- creator_offset = put_string(&buffer, vkd3d_shader_get_version(NULL, NULL));
- set_u32(&buffer, creator_position, creator_offset);
-
- add_section(ctx, dxbc, TAG_RDEF, &buffer);
-
- sm4_free_extern_resources(extern_resources, extern_resources_count);
-}
-
-static enum vkd3d_sm4_resource_type sm4_resource_dimension(const struct hlsl_type *type)
+static enum vkd3d_sm4_resource_type sm4_resource_dimension(enum vkd3d_shader_resource_type resource_type)
{
- switch (type->sampler_dim)
+ switch (resource_type)
{
- case HLSL_SAMPLER_DIM_1D:
+ case VKD3D_SHADER_RESOURCE_TEXTURE_1D:
return VKD3D_SM4_RESOURCE_TEXTURE_1D;
- case HLSL_SAMPLER_DIM_2D:
+ case VKD3D_SHADER_RESOURCE_TEXTURE_2D:
return VKD3D_SM4_RESOURCE_TEXTURE_2D;
- case HLSL_SAMPLER_DIM_3D:
+ case VKD3D_SHADER_RESOURCE_TEXTURE_3D:
return VKD3D_SM4_RESOURCE_TEXTURE_3D;
- case HLSL_SAMPLER_DIM_CUBE:
+ case VKD3D_SHADER_RESOURCE_TEXTURE_CUBE:
return VKD3D_SM4_RESOURCE_TEXTURE_CUBE;
- case HLSL_SAMPLER_DIM_1DARRAY:
+ case VKD3D_SHADER_RESOURCE_TEXTURE_1DARRAY:
return VKD3D_SM4_RESOURCE_TEXTURE_1DARRAY;
- case HLSL_SAMPLER_DIM_2DARRAY:
+ case VKD3D_SHADER_RESOURCE_TEXTURE_2DARRAY:
return VKD3D_SM4_RESOURCE_TEXTURE_2DARRAY;
- case HLSL_SAMPLER_DIM_2DMS:
+ case VKD3D_SHADER_RESOURCE_TEXTURE_2DMS:
return VKD3D_SM4_RESOURCE_TEXTURE_2DMS;
- case HLSL_SAMPLER_DIM_2DMSARRAY:
+ case VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY:
return VKD3D_SM4_RESOURCE_TEXTURE_2DMSARRAY;
- case HLSL_SAMPLER_DIM_CUBEARRAY:
+ case VKD3D_SHADER_RESOURCE_TEXTURE_CUBEARRAY:
return VKD3D_SM4_RESOURCE_TEXTURE_CUBEARRAY;
- case HLSL_SAMPLER_DIM_BUFFER:
- case HLSL_SAMPLER_DIM_RAW_BUFFER:
- case HLSL_SAMPLER_DIM_STRUCTURED_BUFFER:
+ case VKD3D_SHADER_RESOURCE_BUFFER:
return VKD3D_SM4_RESOURCE_BUFFER;
default:
vkd3d_unreachable();
@@ -4096,297 +3352,6 @@ struct sm4_instruction
unsigned int idx_src_count;
};
-static void sm4_register_from_node(struct vkd3d_shader_register *reg, uint32_t *writemask,
- const struct hlsl_ir_node *instr)
-{
- VKD3D_ASSERT(instr->reg.allocated);
- reg->type = VKD3DSPR_TEMP;
- reg->dimension = VSIR_DIMENSION_VEC4;
- reg->idx[0].offset = instr->reg.id;
- reg->idx_count = 1;
- *writemask = instr->reg.writemask;
-}
-
-static void sm4_numeric_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_register *reg,
- enum vkd3d_shader_register_type type, uint32_t *writemask, const struct hlsl_deref *deref,
- struct sm4_instruction *sm4_instr)
-{
- const struct hlsl_ir_var *var = deref->var;
- unsigned int offset_const_deref;
-
- reg->type = type;
- reg->idx[0].offset = var->regs[HLSL_REGSET_NUMERIC].id;
- reg->dimension = VSIR_DIMENSION_VEC4;
-
- VKD3D_ASSERT(var->regs[HLSL_REGSET_NUMERIC].allocated);
-
- if (!var->indexable)
- {
- offset_const_deref = hlsl_offset_from_deref_safe(ctx, deref);
- reg->idx[0].offset += offset_const_deref / 4;
- reg->idx_count = 1;
- }
- else
- {
- offset_const_deref = deref->const_offset;
- reg->idx[1].offset = offset_const_deref / 4;
- reg->idx_count = 2;
-
- if (deref->rel_offset.node)
- {
- struct vkd3d_shader_src_param *idx_src;
- unsigned int idx_writemask;
-
- VKD3D_ASSERT(sm4_instr->idx_src_count < ARRAY_SIZE(sm4_instr->idx_srcs));
- idx_src = &sm4_instr->idx_srcs[sm4_instr->idx_src_count++];
- memset(idx_src, 0, sizeof(*idx_src));
-
- reg->idx[1].rel_addr = idx_src;
- sm4_register_from_node(&idx_src->reg, &idx_writemask, deref->rel_offset.node);
- VKD3D_ASSERT(idx_writemask != 0);
- idx_src->swizzle = swizzle_from_sm4(hlsl_swizzle_from_writemask(idx_writemask));
- }
- }
-
- *writemask = 0xf & (0xf << (offset_const_deref % 4));
- if (var->regs[HLSL_REGSET_NUMERIC].writemask)
- *writemask = hlsl_combine_writemasks(var->regs[HLSL_REGSET_NUMERIC].writemask, *writemask);
-}
-
-static void sm4_register_from_deref(const struct tpf_compiler *tpf, struct vkd3d_shader_register *reg,
- uint32_t *writemask, const struct hlsl_deref *deref, struct sm4_instruction *sm4_instr)
-{
- const struct vkd3d_shader_version *version = &tpf->program->shader_version;
- const struct hlsl_type *data_type = hlsl_deref_get_type(tpf->ctx, deref);
- const struct hlsl_ir_var *var = deref->var;
- struct hlsl_ctx *ctx = tpf->ctx;
-
- if (var->is_uniform)
- {
- enum hlsl_regset regset = hlsl_deref_get_regset(ctx, deref);
-
- if (regset == HLSL_REGSET_TEXTURES)
- {
- reg->type = VKD3DSPR_RESOURCE;
- reg->dimension = VSIR_DIMENSION_VEC4;
- if (vkd3d_shader_ver_ge(version, 5, 1))
- {
- reg->idx[0].offset = var->regs[HLSL_REGSET_TEXTURES].id;
- reg->idx[1].offset = var->regs[HLSL_REGSET_TEXTURES].index; /* FIXME: array index */
- reg->idx_count = 2;
- }
- else
- {
- reg->idx[0].offset = var->regs[HLSL_REGSET_TEXTURES].index;
- reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref);
- reg->idx_count = 1;
- }
- VKD3D_ASSERT(regset == HLSL_REGSET_TEXTURES);
- *writemask = VKD3DSP_WRITEMASK_ALL;
- }
- else if (regset == HLSL_REGSET_UAVS)
- {
- reg->type = VKD3DSPR_UAV;
- reg->dimension = VSIR_DIMENSION_VEC4;
- if (vkd3d_shader_ver_ge(version, 5, 1))
- {
- reg->idx[0].offset = var->regs[HLSL_REGSET_UAVS].id;
- reg->idx[1].offset = var->regs[HLSL_REGSET_UAVS].index; /* FIXME: array index */
- reg->idx_count = 2;
- }
- else
- {
- reg->idx[0].offset = var->regs[HLSL_REGSET_UAVS].index;
- reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref);
- reg->idx_count = 1;
- }
- VKD3D_ASSERT(regset == HLSL_REGSET_UAVS);
- *writemask = VKD3DSP_WRITEMASK_ALL;
- }
- else if (regset == HLSL_REGSET_SAMPLERS)
- {
- reg->type = VKD3DSPR_SAMPLER;
- reg->dimension = VSIR_DIMENSION_NONE;
- if (vkd3d_shader_ver_ge(version, 5, 1))
- {
- reg->idx[0].offset = var->regs[HLSL_REGSET_SAMPLERS].id;
- reg->idx[1].offset = var->regs[HLSL_REGSET_SAMPLERS].index; /* FIXME: array index */
- reg->idx_count = 2;
- }
- else
- {
- reg->idx[0].offset = var->regs[HLSL_REGSET_SAMPLERS].index;
- reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref);
- reg->idx_count = 1;
- }
- VKD3D_ASSERT(regset == HLSL_REGSET_SAMPLERS);
- *writemask = VKD3DSP_WRITEMASK_ALL;
- }
- else
- {
- unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref) + var->buffer_offset;
-
- VKD3D_ASSERT(data_type->class <= HLSL_CLASS_VECTOR);
- reg->type = VKD3DSPR_CONSTBUFFER;
- reg->dimension = VSIR_DIMENSION_VEC4;
- if (vkd3d_shader_ver_ge(version, 5, 1))
- {
- reg->idx[0].offset = var->buffer->reg.id;
- reg->idx[1].offset = var->buffer->reg.index; /* FIXME: array index */
- reg->idx[2].offset = offset / 4;
- reg->idx_count = 3;
- }
- else
- {
- reg->idx[0].offset = var->buffer->reg.index;
- reg->idx[1].offset = offset / 4;
- reg->idx_count = 2;
- }
- *writemask = ((1u << data_type->dimx) - 1) << (offset & 3);
- }
- }
- else if (var->is_input_semantic)
- {
- bool has_idx;
-
- if (sm4_register_from_semantic_name(version, var->semantic.name, false, &reg->type, &has_idx))
- {
- unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref);
-
- if (has_idx)
- {
- reg->idx[0].offset = var->semantic.index + offset / 4;
- reg->idx_count = 1;
- }
-
- if (shader_sm4_is_scalar_register(reg))
- reg->dimension = VSIR_DIMENSION_SCALAR;
- else
- reg->dimension = VSIR_DIMENSION_VEC4;
- *writemask = ((1u << data_type->dimx) - 1) << (offset % 4);
- }
- else
- {
- struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref);
-
- VKD3D_ASSERT(hlsl_reg.allocated);
-
- if (version->type == VKD3D_SHADER_TYPE_DOMAIN)
- reg->type = VKD3DSPR_PATCHCONST;
- else
- reg->type = VKD3DSPR_INPUT;
- reg->dimension = VSIR_DIMENSION_VEC4;
- reg->idx[0].offset = hlsl_reg.id;
- reg->idx_count = 1;
- *writemask = hlsl_reg.writemask;
- }
- }
- else if (var->is_output_semantic)
- {
- bool has_idx;
-
- if (sm4_register_from_semantic_name(version, var->semantic.name, true, &reg->type, &has_idx))
- {
- unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref);
-
- if (has_idx)
- {
- reg->idx[0].offset = var->semantic.index + offset / 4;
- reg->idx_count = 1;
- }
-
- if (shader_sm4_is_scalar_register(reg))
- reg->dimension = VSIR_DIMENSION_SCALAR;
- else
- reg->dimension = VSIR_DIMENSION_VEC4;
- *writemask = ((1u << data_type->dimx) - 1) << (offset % 4);
- }
- else
- {
- struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref);
-
- VKD3D_ASSERT(hlsl_reg.allocated);
- reg->type = VKD3DSPR_OUTPUT;
- reg->dimension = VSIR_DIMENSION_VEC4;
- reg->idx[0].offset = hlsl_reg.id;
- reg->idx_count = 1;
- *writemask = hlsl_reg.writemask;
- }
- }
- else
- {
- enum vkd3d_shader_register_type type = deref->var->indexable ? VKD3DSPR_IDXTEMP : VKD3DSPR_TEMP;
-
- sm4_numeric_register_from_deref(ctx, reg, type, writemask, deref, sm4_instr);
- }
-}
-
-static void sm4_src_from_deref(const struct tpf_compiler *tpf, struct vkd3d_shader_src_param *src,
- const struct hlsl_deref *deref, unsigned int map_writemask, struct sm4_instruction *sm4_instr)
-{
- unsigned int hlsl_swizzle;
- uint32_t writemask;
-
- sm4_register_from_deref(tpf, &src->reg, &writemask, deref, sm4_instr);
- if (vkd3d_sm4_get_default_swizzle_type(&tpf->lookup, src->reg.type) == VKD3D_SM4_SWIZZLE_VEC4)
- {
- hlsl_swizzle = hlsl_map_swizzle(hlsl_swizzle_from_writemask(writemask), map_writemask);
- src->swizzle = swizzle_from_sm4(hlsl_swizzle);
- }
-}
-
-static void sm4_dst_from_node(struct vkd3d_shader_dst_param *dst, const struct hlsl_ir_node *instr)
-{
- sm4_register_from_node(&dst->reg, &dst->write_mask, instr);
-}
-
-static void sm4_src_from_constant_value(struct vkd3d_shader_src_param *src,
- const struct hlsl_constant_value *value, unsigned int width, unsigned int map_writemask)
-{
- src->swizzle = 0;
- src->reg.type = VKD3DSPR_IMMCONST;
- if (width == 1)
- {
- src->reg.dimension = VSIR_DIMENSION_SCALAR;
- src->reg.u.immconst_u32[0] = value->u[0].u;
- }
- else
- {
- unsigned int i, j = 0;
-
- src->reg.dimension = VSIR_DIMENSION_VEC4;
- for (i = 0; i < 4; ++i)
- {
- if ((map_writemask & (1u << i)) && (j < width))
- src->reg.u.immconst_u32[i] = value->u[j++].u;
- else
- src->reg.u.immconst_u32[i] = 0;
- }
- }
-}
-
-static void sm4_src_from_node(const struct tpf_compiler *tpf, struct vkd3d_shader_src_param *src,
- const struct hlsl_ir_node *instr, uint32_t map_writemask)
-{
- unsigned int hlsl_swizzle;
- uint32_t writemask;
-
- if (instr->type == HLSL_IR_CONSTANT)
- {
- struct hlsl_ir_constant *constant = hlsl_ir_constant(instr);
-
- sm4_src_from_constant_value(src, &constant->value, instr->data_type->dimx, map_writemask);
- return;
- }
-
- sm4_register_from_node(&src->reg, &writemask, instr);
- if (vkd3d_sm4_get_default_swizzle_type(&tpf->lookup, src->reg.type) == VKD3D_SM4_SWIZZLE_VEC4)
- {
- hlsl_swizzle = hlsl_map_swizzle(hlsl_swizzle_from_writemask(writemask), map_writemask);
- src->swizzle = swizzle_from_sm4(hlsl_swizzle);
- }
-}
-
static unsigned int sm4_get_index_addressing_from_reg(const struct vkd3d_shader_register *reg,
unsigned int i)
{
@@ -4650,204 +3615,41 @@ static void write_sm4_instruction(const struct tpf_compiler *tpf, const struct s
sm4_update_stat_counters(tpf, instr);
}
-static bool encode_texel_offset_as_aoffimmi(struct sm4_instruction *instr,
- const struct hlsl_ir_node *texel_offset)
-{
- struct sm4_instruction_modifier modif;
- struct hlsl_ir_constant *offset;
-
- if (!texel_offset || texel_offset->type != HLSL_IR_CONSTANT)
- return false;
- offset = hlsl_ir_constant(texel_offset);
-
- modif.type = VKD3D_SM4_MODIFIER_AOFFIMMI;
- modif.u.aoffimmi.u = offset->value.u[0].i;
- modif.u.aoffimmi.v = 0;
- modif.u.aoffimmi.w = 0;
- if (offset->node.data_type->dimx > 1)
- modif.u.aoffimmi.v = offset->value.u[1].i;
- if (offset->node.data_type->dimx > 2)
- modif.u.aoffimmi.w = offset->value.u[2].i;
- if (modif.u.aoffimmi.u < -8 || modif.u.aoffimmi.u > 7
- || modif.u.aoffimmi.v < -8 || modif.u.aoffimmi.v > 7
- || modif.u.aoffimmi.w < -8 || modif.u.aoffimmi.w > 7)
- return false;
-
- instr->modifiers[instr->modifier_count++] = modif;
- return true;
-}
-
-static void write_sm4_dcl_constant_buffer(const struct tpf_compiler *tpf, const struct hlsl_buffer *cbuffer)
+static void tpf_dcl_constant_buffer(const struct tpf_compiler *tpf, const struct vkd3d_shader_instruction *ins)
{
- size_t size = (cbuffer->used_size + 3) / 4;
+ const struct vkd3d_shader_constant_buffer *cb = &ins->declaration.cb;
+ size_t size = (cb->size + 3) / 4;
struct sm4_instruction instr =
{
.opcode = VKD3D_SM4_OP_DCL_CONSTANT_BUFFER,
- .srcs[0].reg.dimension = VSIR_DIMENSION_VEC4,
- .srcs[0].reg.type = VKD3DSPR_CONSTBUFFER,
- .srcs[0].swizzle = VKD3D_SHADER_NO_SWIZZLE,
+ .srcs[0] = cb->src,
.src_count = 1,
};
- if (hlsl_version_ge(tpf->ctx, 5, 1))
+ if (vkd3d_shader_ver_ge(&tpf->program->shader_version, 5, 1))
{
- instr.srcs[0].reg.idx[0].offset = cbuffer->reg.id;
- instr.srcs[0].reg.idx[1].offset = cbuffer->reg.index;
- instr.srcs[0].reg.idx[2].offset = cbuffer->reg.index; /* FIXME: array end */
+ instr.srcs[0].reg.idx[0].offset = cb->src.reg.idx[0].offset;
+ instr.srcs[0].reg.idx[1].offset = cb->range.first;
+ instr.srcs[0].reg.idx[2].offset = cb->range.last;
instr.srcs[0].reg.idx_count = 3;
instr.idx[0] = size;
- instr.idx[1] = cbuffer->reg.space;
+ instr.idx[1] = cb->range.space;
instr.idx_count = 2;
}
else
{
- instr.srcs[0].reg.idx[0].offset = cbuffer->reg.index;
+ instr.srcs[0].reg.idx[0].offset = cb->range.first;
instr.srcs[0].reg.idx[1].offset = size;
instr.srcs[0].reg.idx_count = 2;
}
- write_sm4_instruction(tpf, &instr);
-}
-
-static void write_sm4_dcl_samplers(const struct tpf_compiler *tpf, const struct extern_resource *resource)
-{
- unsigned int i;
- struct sm4_instruction instr =
- {
- .opcode = VKD3D_SM4_OP_DCL_SAMPLER,
-
- .dsts[0].reg.type = VKD3DSPR_SAMPLER,
- .dst_count = 1,
- };
-
- VKD3D_ASSERT(resource->regset == HLSL_REGSET_SAMPLERS);
-
- if (resource->component_type->sampler_dim == HLSL_SAMPLER_DIM_COMPARISON)
- instr.extra_bits |= VKD3D_SM4_SAMPLER_COMPARISON << VKD3D_SM4_SAMPLER_MODE_SHIFT;
-
- for (i = 0; i < resource->bind_count; ++i)
- {
- if (resource->var && !resource->var->objects_usage[HLSL_REGSET_SAMPLERS][i].used)
- continue;
-
- if (hlsl_version_ge(tpf->ctx, 5, 1))
- {
- VKD3D_ASSERT(!i);
- instr.dsts[0].reg.idx[0].offset = resource->id;
- instr.dsts[0].reg.idx[1].offset = resource->index;
- instr.dsts[0].reg.idx[2].offset = resource->index; /* FIXME: array end */
- instr.dsts[0].reg.idx_count = 3;
-
- instr.idx[0] = resource->space;
- instr.idx_count = 1;
- }
- else
- {
- instr.dsts[0].reg.idx[0].offset = resource->index + i;
- instr.dsts[0].reg.idx_count = 1;
- }
- write_sm4_instruction(tpf, &instr);
- }
-}
-
-static void write_sm4_dcl_textures(const struct tpf_compiler *tpf, const struct extern_resource *resource,
- bool uav)
-{
- const struct vkd3d_shader_version *version = &tpf->program->shader_version;
- enum hlsl_regset regset = uav ? HLSL_REGSET_UAVS : HLSL_REGSET_TEXTURES;
- struct hlsl_type *component_type;
- struct sm4_instruction instr;
- bool multisampled;
- unsigned int i;
-
- VKD3D_ASSERT(resource->regset == regset);
-
- component_type = resource->component_type;
-
- for (i = 0; i < resource->bind_count; ++i)
- {
- if (resource->var && !resource->var->objects_usage[regset][i].used)
- continue;
-
- instr = (struct sm4_instruction)
- {
- .dsts[0].reg.type = uav ? VKD3DSPR_UAV : VKD3DSPR_RESOURCE,
- .dsts[0].reg.idx[0].offset = resource->id + i,
- .dsts[0].reg.idx_count = 1,
- .dst_count = 1,
-
- .idx[0] = sm4_data_type(component_type) * 0x1111,
- .idx_count = 1,
- };
-
- multisampled = component_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS
- || component_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY;
-
- if (!vkd3d_shader_ver_ge(version, 4, 1) && multisampled && !component_type->sample_count)
- {
- hlsl_error(tpf->ctx, &resource->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE,
- "Multisampled texture object declaration needs sample count for profile %u.%u.",
- version->major, version->minor);
- }
-
- if (vkd3d_shader_ver_ge(version, 5, 1))
- {
- VKD3D_ASSERT(!i);
- instr.dsts[0].reg.idx[0].offset = resource->id;
- instr.dsts[0].reg.idx[1].offset = resource->index;
- instr.dsts[0].reg.idx[2].offset = resource->index; /* FIXME: array end */
- instr.dsts[0].reg.idx_count = 3;
-
- instr.idx[1] = resource->space;
- instr.idx_count = 2;
- }
- else
- {
- instr.dsts[0].reg.idx[0].offset = resource->index + i;
- instr.dsts[0].reg.idx_count = 1;
- }
-
- if (uav)
- {
- switch (component_type->sampler_dim)
- {
- case HLSL_SAMPLER_DIM_STRUCTURED_BUFFER:
- instr.opcode = VKD3D_SM5_OP_DCL_UAV_STRUCTURED;
- instr.byte_stride = component_type->e.resource.format->reg_size[HLSL_REGSET_NUMERIC] * 4;
- break;
- case HLSL_SAMPLER_DIM_RAW_BUFFER:
- instr.opcode = VKD3D_SM5_OP_DCL_UAV_RAW;
- break;
- default:
- instr.opcode = VKD3D_SM5_OP_DCL_UAV_TYPED;
- break;
- }
-
- if (component_type->e.resource.rasteriser_ordered)
- instr.opcode |= VKD3DSUF_RASTERISER_ORDERED_VIEW << VKD3D_SM5_UAV_FLAGS_SHIFT;
- }
- else
- {
- switch (component_type->sampler_dim)
- {
- case HLSL_SAMPLER_DIM_RAW_BUFFER:
- instr.opcode = VKD3D_SM5_OP_DCL_RESOURCE_RAW;
- break;
- default:
- instr.opcode = VKD3D_SM4_OP_DCL_RESOURCE;
- break;
- }
- }
- instr.extra_bits |= (sm4_resource_dimension(component_type) << VKD3D_SM4_RESOURCE_TYPE_SHIFT);
-
- if (multisampled)
- instr.extra_bits |= component_type->sample_count << VKD3D_SM4_RESOURCE_SAMPLE_COUNT_SHIFT;
+ if (ins->flags & VKD3DSI_INDEXED_DYNAMIC)
+ instr.extra_bits |= VKD3D_SM4_INDEX_TYPE_MASK;
- write_sm4_instruction(tpf, &instr);
- }
+ write_sm4_instruction(tpf, &instr);
}
static void tpf_dcl_temps(const struct tpf_compiler *tpf, unsigned int count)
@@ -4924,42 +3726,116 @@ static void tpf_dcl_thread_group(const struct tpf_compiler *tpf, const struct vs
write_sm4_instruction(tpf, &instr);
}
-static void write_sm4_dcl_global_flags(const struct tpf_compiler *tpf, uint32_t flags)
+static void tpf_dcl_sampler(const struct tpf_compiler *tpf, const struct vkd3d_shader_instruction *ins)
{
+ const struct vkd3d_shader_sampler *sampler = &ins->declaration.sampler;
struct sm4_instruction instr =
{
- .opcode = VKD3D_SM4_OP_DCL_GLOBAL_FLAGS,
- .extra_bits = flags << VKD3D_SM4_GLOBAL_FLAGS_SHIFT,
+ .opcode = VKD3D_SM4_OP_DCL_SAMPLER,
+ .extra_bits = ins->flags << VKD3D_SM4_SAMPLER_MODE_SHIFT,
+
+ .dsts[0].reg.type = VKD3DSPR_SAMPLER,
+ .dst_count = 1,
};
+ if (vkd3d_shader_ver_ge(&tpf->program->shader_version, 5, 1))
+ {
+ instr.dsts[0].reg.idx[0].offset = sampler->src.reg.idx[0].offset;
+ instr.dsts[0].reg.idx[1].offset = sampler->range.first;
+ instr.dsts[0].reg.idx[2].offset = sampler->range.last;
+ instr.dsts[0].reg.idx_count = 3;
+
+ instr.idx[0] = ins->declaration.sampler.range.space;
+ instr.idx_count = 1;
+ }
+ else
+ {
+ instr.dsts[0].reg.idx[0].offset = sampler->range.first;
+ instr.dsts[0].reg.idx_count = 1;
+ }
+
write_sm4_instruction(tpf, &instr);
}
-static void tpf_write_hs_decls(const struct tpf_compiler *tpf)
+static void tpf_dcl_texture(const struct tpf_compiler *tpf, const struct vkd3d_shader_instruction *ins)
{
- struct sm4_instruction instr =
+ const struct vkd3d_shader_structured_resource *structured_resource = &ins->declaration.structured_resource;
+ const struct vkd3d_shader_semantic *semantic = &ins->declaration.semantic;
+ const struct vkd3d_shader_version *version = &tpf->program->shader_version;
+ const struct vkd3d_sm4_opcode_info *info;
+ struct sm4_instruction instr = {0};
+ unsigned int i, k;
+ bool uav;
+
+ info = get_info_from_vsir_opcode(&tpf->lookup, ins->opcode);
+ VKD3D_ASSERT(info);
+
+ uav = ins->opcode == VKD3DSIH_DCL_UAV_TYPED
+ || ins->opcode == VKD3DSIH_DCL_UAV_RAW
+ || ins->opcode == VKD3DSIH_DCL_UAV_STRUCTURED;
+
+ instr.opcode = info->opcode;
+
+ instr.dsts[0] = semantic->resource.reg;
+ instr.dst_count = 1;
+
+ for (k = 0; k < 4; ++k)
{
- .opcode = VKD3D_SM5_OP_HS_DECLS,
- };
+ for (i = ARRAY_SIZE(data_type_table) - 1; i < ARRAY_SIZE(data_type_table); --i)
+ {
+ if (semantic->resource_data_type[k] == data_type_table[i])
+ {
+ instr.idx[0] |= i << (4 * k);
+ break;
+ }
+ }
+ }
+ instr.idx_count = 1;
+
+ if (vkd3d_shader_ver_ge(version, 5, 1))
+ {
+ instr.dsts[0].reg.idx[0].offset = semantic->resource.reg.reg.idx[0].offset;
+ instr.dsts[0].reg.idx[1].offset = semantic->resource.range.first;
+ instr.dsts[0].reg.idx[2].offset = semantic->resource.range.last;
+ instr.dsts[0].reg.idx_count = 3;
+
+ instr.idx[1] = semantic->resource.range.space;
+ instr.idx_count = 2;
+ }
+ else
+ {
+ instr.dsts[0].reg.idx[0].offset = semantic->resource.range.first;
+ instr.dsts[0].reg.idx_count = 1;
+ }
+
+ if (uav)
+ instr.extra_bits |= ins->flags << VKD3D_SM5_UAV_FLAGS_SHIFT;
+
+ instr.extra_bits |= (sm4_resource_dimension(ins->resource_type) << VKD3D_SM4_RESOURCE_TYPE_SHIFT);
+ instr.extra_bits |= semantic->sample_count << VKD3D_SM4_RESOURCE_SAMPLE_COUNT_SHIFT;
+
+ if (ins->structured)
+ instr.byte_stride = structured_resource->byte_stride;
write_sm4_instruction(tpf, &instr);
}
-static void tpf_write_hs_control_point_phase(const struct tpf_compiler *tpf)
+static void write_sm4_dcl_global_flags(const struct tpf_compiler *tpf, uint32_t flags)
{
struct sm4_instruction instr =
{
- .opcode = VKD3D_SM5_OP_HS_CONTROL_POINT_PHASE,
+ .opcode = VKD3D_SM4_OP_DCL_GLOBAL_FLAGS,
+ .extra_bits = flags << VKD3D_SM4_GLOBAL_FLAGS_SHIFT,
};
write_sm4_instruction(tpf, &instr);
}
-static void tpf_write_hs_fork_phase(const struct tpf_compiler *tpf)
+static void tpf_write_hs_decls(const struct tpf_compiler *tpf)
{
struct sm4_instruction instr =
{
- .opcode = VKD3D_SM5_OP_HS_FORK_PHASE,
+ .opcode = VKD3D_SM5_OP_HS_DECLS,
};
write_sm4_instruction(tpf, &instr);
@@ -5022,594 +3898,9 @@ static void tpf_write_dcl_tessellator_output_primitive(const struct tpf_compiler
write_sm4_instruction(tpf, &instr);
}
-static void write_sm4_ret(const struct tpf_compiler *tpf)
-{
- struct sm4_instruction instr =
- {
- .opcode = VKD3D_SM4_OP_RET,
- };
-
- write_sm4_instruction(tpf, &instr);
-}
-
-static void write_sm4_ld(const struct tpf_compiler *tpf, const struct hlsl_ir_node *dst,
- const struct hlsl_deref *resource, const struct hlsl_ir_node *coords,
- const struct hlsl_ir_node *sample_index, const struct hlsl_ir_node *texel_offset,
- enum hlsl_sampler_dim dim)
-{
- const struct hlsl_type *resource_type = hlsl_deref_get_type(tpf->ctx, resource);
- bool multisampled = resource_type->class == HLSL_CLASS_TEXTURE
- && (resource_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS || resource_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY);
- bool uav = (hlsl_deref_get_regset(tpf->ctx, resource) == HLSL_REGSET_UAVS);
- const struct vkd3d_shader_version *version = &tpf->program->shader_version;
- bool raw = resource_type->sampler_dim == HLSL_SAMPLER_DIM_RAW_BUFFER;
- unsigned int coords_writemask = VKD3DSP_WRITEMASK_ALL;
- struct sm4_instruction instr;
-
- memset(&instr, 0, sizeof(instr));
- if (uav)
- instr.opcode = VKD3D_SM5_OP_LD_UAV_TYPED;
- else if (raw)
- instr.opcode = VKD3D_SM5_OP_LD_RAW;
- else
- instr.opcode = multisampled ? VKD3D_SM4_OP_LD2DMS : VKD3D_SM4_OP_LD;
-
- if (texel_offset)
- {
- if (!encode_texel_offset_as_aoffimmi(&instr, texel_offset))
- {
- hlsl_error(tpf->ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET,
- "Offset must resolve to integer literal in the range -8 to 7.");
- return;
- }
- }
-
- sm4_dst_from_node(&instr.dsts[0], dst);
- instr.dst_count = 1;
-
- if (!uav)
- {
- /* Mipmap level is in the last component in the IR, but needs to be in the W
- * component in the instruction. */
- unsigned int dim_count = hlsl_sampler_dim_count(dim);
-
- if (dim_count == 1)
- coords_writemask = VKD3DSP_WRITEMASK_0 | VKD3DSP_WRITEMASK_3;
- if (dim_count == 2)
- coords_writemask = VKD3DSP_WRITEMASK_0 | VKD3DSP_WRITEMASK_1 | VKD3DSP_WRITEMASK_3;
- }
-
- sm4_src_from_node(tpf, &instr.srcs[0], coords, coords_writemask);
-
- sm4_src_from_deref(tpf, &instr.srcs[1], resource, instr.dsts[0].write_mask, &instr);
-
- instr.src_count = 2;
-
- if (multisampled)
- {
- if (sample_index->type == HLSL_IR_CONSTANT)
- {
- struct vkd3d_shader_register *reg = &instr.srcs[2].reg;
- struct hlsl_ir_constant *index;
-
- index = hlsl_ir_constant(sample_index);
-
- memset(&instr.srcs[2], 0, sizeof(instr.srcs[2]));
- reg->type = VKD3DSPR_IMMCONST;
- reg->dimension = VSIR_DIMENSION_SCALAR;
- reg->u.immconst_u32[0] = index->value.u[0].u;
- }
- else if (version->major == 4 && version->minor == 0)
- {
- hlsl_error(tpf->ctx, &sample_index->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Expected literal sample index.");
- }
- else
- {
- sm4_src_from_node(tpf, &instr.srcs[2], sample_index, 0);
- }
-
- ++instr.src_count;
- }
-
- write_sm4_instruction(tpf, &instr);
-}
-
-static void write_sm4_sample(const struct tpf_compiler *tpf, const struct hlsl_ir_resource_load *load)
-{
- const struct hlsl_ir_node *texel_offset = load->texel_offset.node;
- const struct hlsl_ir_node *coords = load->coords.node;
- const struct hlsl_deref *resource = &load->resource;
- const struct hlsl_deref *sampler = &load->sampler;
- const struct hlsl_ir_node *dst = &load->node;
- struct sm4_instruction instr;
-
- memset(&instr, 0, sizeof(instr));
- switch (load->load_type)
- {
- case HLSL_RESOURCE_SAMPLE:
- instr.opcode = VKD3D_SM4_OP_SAMPLE;
- break;
-
- case HLSL_RESOURCE_SAMPLE_CMP:
- instr.opcode = VKD3D_SM4_OP_SAMPLE_C;
- break;
-
- case HLSL_RESOURCE_SAMPLE_CMP_LZ:
- instr.opcode = VKD3D_SM4_OP_SAMPLE_C_LZ;
- break;
-
- case HLSL_RESOURCE_SAMPLE_LOD:
- instr.opcode = VKD3D_SM4_OP_SAMPLE_LOD;
- break;
-
- case HLSL_RESOURCE_SAMPLE_LOD_BIAS:
- instr.opcode = VKD3D_SM4_OP_SAMPLE_B;
- break;
-
- case HLSL_RESOURCE_SAMPLE_GRAD:
- instr.opcode = VKD3D_SM4_OP_SAMPLE_GRAD;
- break;
-
- default:
- vkd3d_unreachable();
- }
-
- if (texel_offset)
- {
- if (!encode_texel_offset_as_aoffimmi(&instr, texel_offset))
- {
- hlsl_error(tpf->ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET,
- "Offset must resolve to integer literal in the range -8 to 7.");
- return;
- }
- }
-
- sm4_dst_from_node(&instr.dsts[0], dst);
- instr.dst_count = 1;
-
- sm4_src_from_node(tpf, &instr.srcs[0], coords, VKD3DSP_WRITEMASK_ALL);
- sm4_src_from_deref(tpf, &instr.srcs[1], resource, instr.dsts[0].write_mask, &instr);
- sm4_src_from_deref(tpf, &instr.srcs[2], sampler, VKD3DSP_WRITEMASK_ALL, &instr);
- instr.src_count = 3;
-
- if (load->load_type == HLSL_RESOURCE_SAMPLE_LOD
- || load->load_type == HLSL_RESOURCE_SAMPLE_LOD_BIAS)
- {
- sm4_src_from_node(tpf, &instr.srcs[3], load->lod.node, VKD3DSP_WRITEMASK_ALL);
- ++instr.src_count;
- }
- else if (load->load_type == HLSL_RESOURCE_SAMPLE_GRAD)
- {
- sm4_src_from_node(tpf, &instr.srcs[3], load->ddx.node, VKD3DSP_WRITEMASK_ALL);
- sm4_src_from_node(tpf, &instr.srcs[4], load->ddy.node, VKD3DSP_WRITEMASK_ALL);
- instr.src_count += 2;
- }
- else if (load->load_type == HLSL_RESOURCE_SAMPLE_CMP
- || load->load_type == HLSL_RESOURCE_SAMPLE_CMP_LZ)
- {
- sm4_src_from_node(tpf, &instr.srcs[3], load->cmp.node, VKD3DSP_WRITEMASK_ALL);
- ++instr.src_count;
- }
-
- write_sm4_instruction(tpf, &instr);
-}
-
-static void write_sm4_sampleinfo(const struct tpf_compiler *tpf, const struct hlsl_ir_resource_load *load)
-{
- const struct hlsl_deref *resource = &load->resource;
- const struct hlsl_ir_node *dst = &load->node;
- struct sm4_instruction instr;
-
- VKD3D_ASSERT(dst->data_type->e.numeric.type == HLSL_TYPE_UINT || dst->data_type->e.numeric.type == HLSL_TYPE_FLOAT);
-
- memset(&instr, 0, sizeof(instr));
- instr.opcode = VKD3D_SM4_OP_SAMPLE_INFO;
- if (dst->data_type->e.numeric.type == HLSL_TYPE_UINT)
- instr.extra_bits |= VKD3DSI_SAMPLE_INFO_UINT << VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT;
-
- sm4_dst_from_node(&instr.dsts[0], dst);
- instr.dst_count = 1;
-
- sm4_src_from_deref(tpf, &instr.srcs[0], resource, instr.dsts[0].write_mask, &instr);
- instr.src_count = 1;
-
- write_sm4_instruction(tpf, &instr);
-}
-
-static void write_sm4_resinfo(const struct tpf_compiler *tpf, const struct hlsl_ir_resource_load *load)
-{
- const struct hlsl_deref *resource = &load->resource;
- const struct hlsl_ir_node *dst = &load->node;
- struct sm4_instruction instr;
-
- if (resource->data_type->sampler_dim == HLSL_SAMPLER_DIM_BUFFER
- || resource->data_type->sampler_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER)
- {
- hlsl_fixme(tpf->ctx, &load->node.loc, "resinfo for buffers.");
- return;
- }
-
- VKD3D_ASSERT(dst->data_type->e.numeric.type == HLSL_TYPE_UINT || dst->data_type->e.numeric.type == HLSL_TYPE_FLOAT);
-
- memset(&instr, 0, sizeof(instr));
- instr.opcode = VKD3D_SM4_OP_RESINFO;
- if (dst->data_type->e.numeric.type == HLSL_TYPE_UINT)
- instr.extra_bits |= VKD3DSI_RESINFO_UINT << VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT;
-
- sm4_dst_from_node(&instr.dsts[0], dst);
- instr.dst_count = 1;
-
- sm4_src_from_node(tpf, &instr.srcs[0], load->lod.node, VKD3DSP_WRITEMASK_ALL);
- sm4_src_from_deref(tpf, &instr.srcs[1], resource, instr.dsts[0].write_mask, &instr);
- instr.src_count = 2;
-
- write_sm4_instruction(tpf, &instr);
-}
-
-static void write_sm4_if(struct tpf_compiler *tpf, const struct hlsl_ir_if *iff)
-{
- struct sm4_instruction instr =
- {
- .opcode = VKD3D_SM4_OP_IF,
- .extra_bits = VKD3D_SM4_CONDITIONAL_NZ,
- .src_count = 1,
- };
-
- VKD3D_ASSERT(iff->condition.node->data_type->dimx == 1);
-
- sm4_src_from_node(tpf, &instr.srcs[0], iff->condition.node, VKD3DSP_WRITEMASK_ALL);
- write_sm4_instruction(tpf, &instr);
-
- write_sm4_block(tpf, &iff->then_block);
-
- if (!list_empty(&iff->else_block.instrs))
- {
- instr.opcode = VKD3D_SM4_OP_ELSE;
- instr.src_count = 0;
- write_sm4_instruction(tpf, &instr);
-
- write_sm4_block(tpf, &iff->else_block);
- }
-
- instr.opcode = VKD3D_SM4_OP_ENDIF;
- instr.src_count = 0;
- write_sm4_instruction(tpf, &instr);
-}
-
-static void write_sm4_jump(const struct tpf_compiler *tpf, const struct hlsl_ir_jump *jump)
-{
- struct sm4_instruction instr = {0};
-
- switch (jump->type)
- {
- case HLSL_IR_JUMP_BREAK:
- instr.opcode = VKD3D_SM4_OP_BREAK;
- break;
-
- case HLSL_IR_JUMP_CONTINUE:
- instr.opcode = VKD3D_SM4_OP_CONTINUE;
- break;
-
- case HLSL_IR_JUMP_DISCARD_NZ:
- {
- instr.opcode = VKD3D_SM4_OP_DISCARD;
- instr.extra_bits = VKD3D_SM4_CONDITIONAL_NZ;
-
- memset(&instr.srcs[0], 0, sizeof(*instr.srcs));
- instr.src_count = 1;
- sm4_src_from_node(tpf, &instr.srcs[0], jump->condition.node, VKD3DSP_WRITEMASK_ALL);
- break;
- }
-
- case HLSL_IR_JUMP_RETURN:
- vkd3d_unreachable();
-
- default:
- hlsl_fixme(tpf->ctx, &jump->node.loc, "Jump type %s.", hlsl_jump_type_to_string(jump->type));
- return;
- }
-
- write_sm4_instruction(tpf, &instr);
-}
-
-/* Does this variable's data come directly from the API user, rather than being
- * temporary or from a previous shader stage?
- * I.e. is it a uniform or VS input? */
-static bool var_is_user_input(const struct vkd3d_shader_version *version, const struct hlsl_ir_var *var)
-{
- if (var->is_uniform)
- return true;
-
- return var->is_input_semantic && version->type == VKD3D_SHADER_TYPE_VERTEX;
-}
-
-static void write_sm4_load(const struct tpf_compiler *tpf, const struct hlsl_ir_load *load)
-{
- const struct vkd3d_shader_version *version = &tpf->program->shader_version;
- const struct hlsl_type *type = load->node.data_type;
- struct sm4_instruction instr;
-
- memset(&instr, 0, sizeof(instr));
-
- sm4_dst_from_node(&instr.dsts[0], &load->node);
- instr.dst_count = 1;
-
- VKD3D_ASSERT(hlsl_is_numeric_type(type));
- if (type->e.numeric.type == HLSL_TYPE_BOOL && var_is_user_input(version, load->src.var))
- {
- struct hlsl_constant_value value;
-
- /* Uniform bools can be specified as anything, but internal bools always
- * have 0 for false and ~0 for true. Normalize that here. */
-
- instr.opcode = VKD3D_SM4_OP_MOVC;
-
- sm4_src_from_deref(tpf, &instr.srcs[0], &load->src, instr.dsts[0].write_mask, &instr);
-
- memset(&value, 0xff, sizeof(value));
- sm4_src_from_constant_value(&instr.srcs[1], &value, type->dimx, instr.dsts[0].write_mask);
- memset(&value, 0, sizeof(value));
- sm4_src_from_constant_value(&instr.srcs[2], &value, type->dimx, instr.dsts[0].write_mask);
- instr.src_count = 3;
- }
- else
- {
- instr.opcode = VKD3D_SM4_OP_MOV;
-
- sm4_src_from_deref(tpf, &instr.srcs[0], &load->src, instr.dsts[0].write_mask, &instr);
- instr.src_count = 1;
- }
-
- write_sm4_instruction(tpf, &instr);
-}
-
-static void write_sm4_loop(struct tpf_compiler *tpf, const struct hlsl_ir_loop *loop)
-{
- struct sm4_instruction instr =
- {
- .opcode = VKD3D_SM4_OP_LOOP,
- };
-
- write_sm4_instruction(tpf, &instr);
-
- write_sm4_block(tpf, &loop->body);
-
- instr.opcode = VKD3D_SM4_OP_ENDLOOP;
- write_sm4_instruction(tpf, &instr);
-}
-
-static void write_sm4_gather(const struct tpf_compiler *tpf, const struct hlsl_ir_node *dst,
- const struct hlsl_deref *resource, const struct hlsl_deref *sampler,
- const struct hlsl_ir_node *coords, uint32_t swizzle, const struct hlsl_ir_node *texel_offset)
-{
- const struct vkd3d_shader_version *version = &tpf->program->shader_version;
- struct vkd3d_shader_src_param *src;
- struct sm4_instruction instr;
-
- memset(&instr, 0, sizeof(instr));
-
- instr.opcode = VKD3D_SM4_OP_GATHER4;
-
- sm4_dst_from_node(&instr.dsts[0], dst);
- instr.dst_count = 1;
-
- sm4_src_from_node(tpf, &instr.srcs[instr.src_count++], coords, VKD3DSP_WRITEMASK_ALL);
-
- if (texel_offset)
- {
- if (!encode_texel_offset_as_aoffimmi(&instr, texel_offset))
- {
- if (!vkd3d_shader_ver_ge(version, 5, 0))
- {
- hlsl_error(tpf->ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET,
- "Offset must resolve to integer literal in the range -8 to 7 for profiles < 5.");
- return;
- }
- instr.opcode = VKD3D_SM5_OP_GATHER4_PO;
- sm4_src_from_node(tpf, &instr.srcs[instr.src_count++], texel_offset, VKD3DSP_WRITEMASK_ALL);
- }
- }
-
- sm4_src_from_deref(tpf, &instr.srcs[instr.src_count++], resource, instr.dsts[0].write_mask, &instr);
-
- src = &instr.srcs[instr.src_count++];
- sm4_src_from_deref(tpf, src, sampler, VKD3DSP_WRITEMASK_ALL, &instr);
- src->reg.dimension = VSIR_DIMENSION_VEC4;
- src->swizzle = swizzle;
-
- write_sm4_instruction(tpf, &instr);
-}
-
-static void write_sm4_resource_load(const struct tpf_compiler *tpf, const struct hlsl_ir_resource_load *load)
-{
- const struct hlsl_ir_node *texel_offset = load->texel_offset.node;
- const struct hlsl_ir_node *sample_index = load->sample_index.node;
- const struct hlsl_ir_node *coords = load->coords.node;
-
- if (load->sampler.var && !load->sampler.var->is_uniform)
- {
- hlsl_fixme(tpf->ctx, &load->node.loc, "Sample using non-uniform sampler variable.");
- return;
- }
-
- if (!load->resource.var->is_uniform)
- {
- hlsl_fixme(tpf->ctx, &load->node.loc, "Load from non-uniform resource variable.");
- return;
- }
-
- switch (load->load_type)
- {
- case HLSL_RESOURCE_LOAD:
- write_sm4_ld(tpf, &load->node, &load->resource,
- coords, sample_index, texel_offset, load->sampling_dim);
- break;
-
- case HLSL_RESOURCE_SAMPLE:
- case HLSL_RESOURCE_SAMPLE_CMP:
- case HLSL_RESOURCE_SAMPLE_CMP_LZ:
- case HLSL_RESOURCE_SAMPLE_LOD:
- case HLSL_RESOURCE_SAMPLE_LOD_BIAS:
- case HLSL_RESOURCE_SAMPLE_GRAD:
- /* Combined sample expressions were lowered. */
- VKD3D_ASSERT(load->sampler.var);
- write_sm4_sample(tpf, load);
- break;
-
- case HLSL_RESOURCE_GATHER_RED:
- write_sm4_gather(tpf, &load->node, &load->resource, &load->sampler, coords,
- VKD3D_SHADER_SWIZZLE(X, X, X, X), texel_offset);
- break;
-
- case HLSL_RESOURCE_GATHER_GREEN:
- write_sm4_gather(tpf, &load->node, &load->resource, &load->sampler, coords,
- VKD3D_SHADER_SWIZZLE(Y, Y, Y, Y), texel_offset);
- break;
-
- case HLSL_RESOURCE_GATHER_BLUE:
- write_sm4_gather(tpf, &load->node, &load->resource, &load->sampler, coords,
- VKD3D_SHADER_SWIZZLE(Z, Z, Z, Z), texel_offset);
- break;
-
- case HLSL_RESOURCE_GATHER_ALPHA:
- write_sm4_gather(tpf, &load->node, &load->resource, &load->sampler, coords,
- VKD3D_SHADER_SWIZZLE(W, W, W, W), texel_offset);
- break;
-
- case HLSL_RESOURCE_SAMPLE_INFO:
- write_sm4_sampleinfo(tpf, load);
- break;
-
- case HLSL_RESOURCE_RESINFO:
- write_sm4_resinfo(tpf, load);
- break;
-
- case HLSL_RESOURCE_SAMPLE_PROJ:
- vkd3d_unreachable();
- }
-}
-
-static void write_sm4_resource_store(const struct tpf_compiler *tpf, const struct hlsl_ir_resource_store *store)
-{
- struct hlsl_type *resource_type = hlsl_deref_get_type(tpf->ctx, &store->resource);
- struct hlsl_ir_node *coords = store->coords.node, *value = store->value.node;
- struct sm4_instruction instr;
-
- if (!store->resource.var->is_uniform)
- {
- hlsl_fixme(tpf->ctx, &store->node.loc, "Store to non-uniform resource variable.");
- return;
- }
-
- if (resource_type->sampler_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER)
- {
- hlsl_fixme(tpf->ctx, &store->node.loc, "Structured buffers store is not implemented.");
- return;
- }
-
- memset(&instr, 0, sizeof(instr));
-
- sm4_register_from_deref(tpf, &instr.dsts[0].reg, &instr.dsts[0].write_mask, &store->resource, &instr);
- instr.dst_count = 1;
- if (resource_type->sampler_dim == HLSL_SAMPLER_DIM_RAW_BUFFER)
- {
- instr.opcode = VKD3D_SM5_OP_STORE_RAW;
- instr.dsts[0].write_mask = vkd3d_write_mask_from_component_count(value->data_type->dimx);
- }
- else
- {
- instr.opcode = VKD3D_SM5_OP_STORE_UAV_TYPED;
- }
-
- sm4_src_from_node(tpf, &instr.srcs[0], coords, VKD3DSP_WRITEMASK_ALL);
- sm4_src_from_node(tpf, &instr.srcs[1], value, VKD3DSP_WRITEMASK_ALL);
- instr.src_count = 2;
-
- write_sm4_instruction(tpf, &instr);
-}
-
-static void write_sm4_store(const struct tpf_compiler *tpf, const struct hlsl_ir_store *store)
-{
- const struct hlsl_ir_node *rhs = store->rhs.node;
- struct sm4_instruction instr;
- uint32_t writemask;
-
- memset(&instr, 0, sizeof(instr));
- instr.opcode = VKD3D_SM4_OP_MOV;
-
- sm4_register_from_deref(tpf, &instr.dsts[0].reg, &writemask, &store->lhs, &instr);
- instr.dsts[0].write_mask = hlsl_combine_writemasks(writemask, store->writemask);
- instr.dst_count = 1;
-
- sm4_src_from_node(tpf, &instr.srcs[0], rhs, instr.dsts[0].write_mask);
- instr.src_count = 1;
-
- write_sm4_instruction(tpf, &instr);
-}
-
-static void write_sm4_switch(struct tpf_compiler *tpf, const struct hlsl_ir_switch *s)
-{
- const struct hlsl_ir_node *selector = s->selector.node;
- struct hlsl_ir_switch_case *c;
- struct sm4_instruction instr;
-
- memset(&instr, 0, sizeof(instr));
- instr.opcode = VKD3D_SM4_OP_SWITCH;
-
- sm4_src_from_node(tpf, &instr.srcs[0], selector, VKD3DSP_WRITEMASK_ALL);
- instr.src_count = 1;
-
- write_sm4_instruction(tpf, &instr);
-
- LIST_FOR_EACH_ENTRY(c, &s->cases, struct hlsl_ir_switch_case, entry)
- {
- memset(&instr, 0, sizeof(instr));
- if (c->is_default)
- {
- instr.opcode = VKD3D_SM4_OP_DEFAULT;
- }
- else
- {
- struct hlsl_constant_value value = { .u[0].u = c->value };
-
- instr.opcode = VKD3D_SM4_OP_CASE;
- sm4_src_from_constant_value(&instr.srcs[0], &value, 1, VKD3DSP_WRITEMASK_ALL);
- instr.src_count = 1;
- }
-
- write_sm4_instruction(tpf, &instr);
- write_sm4_block(tpf, &c->body);
- }
-
- memset(&instr, 0, sizeof(instr));
- instr.opcode = VKD3D_SM4_OP_ENDSWITCH;
-
- write_sm4_instruction(tpf, &instr);
-}
-
-static void write_sm4_swizzle(const struct tpf_compiler *tpf, const struct hlsl_ir_swizzle *swizzle)
-{
- unsigned int hlsl_swizzle;
- struct sm4_instruction instr;
- uint32_t writemask;
-
- memset(&instr, 0, sizeof(instr));
- instr.opcode = VKD3D_SM4_OP_MOV;
-
- sm4_dst_from_node(&instr.dsts[0], &swizzle->node);
- instr.dst_count = 1;
-
- sm4_register_from_node(&instr.srcs[0].reg, &writemask, swizzle->val.node);
- hlsl_swizzle = hlsl_map_swizzle(hlsl_combine_swizzles(hlsl_swizzle_from_writemask(writemask),
- swizzle->swizzle, swizzle->node.data_type->dimx), instr.dsts[0].write_mask);
- instr.srcs[0].swizzle = swizzle_from_sm4(hlsl_swizzle);
- instr.src_count = 1;
-
- write_sm4_instruction(tpf, &instr);
-}
-
-static void tpf_simple_instruction(struct tpf_compiler *tpf, const struct vkd3d_shader_instruction *ins)
+static void tpf_simple_instruction(struct tpf_compiler *tpf, const struct vkd3d_shader_instruction *ins)
{
+ struct sm4_instruction_modifier *modifier;
const struct vkd3d_sm4_opcode_info *info;
struct sm4_instruction instr = {0};
unsigned int dst_count, src_count;
@@ -5655,6 +3946,22 @@ static void tpf_simple_instruction(struct tpf_compiler *tpf, const struct vkd3d_
for (unsigned int i = 0; i < ins->src_count; ++i)
instr.srcs[i] = ins->src[i];
+ if (ins->texel_offset.u || ins->texel_offset.v || ins->texel_offset.w)
+ {
+ VKD3D_ASSERT(instr.modifier_count < ARRAY_SIZE(instr.modifiers));
+ modifier = &instr.modifiers[instr.modifier_count++];
+ modifier->type = VKD3D_SM4_MODIFIER_AOFFIMMI;
+ modifier->u.aoffimmi.u = ins->texel_offset.u;
+ modifier->u.aoffimmi.v = ins->texel_offset.v;
+ modifier->u.aoffimmi.w = ins->texel_offset.w;
+ }
+
+ if (info->is_conditional_op)
+ {
+ if (ins->flags == VKD3D_SHADER_CONDITIONAL_OP_NZ)
+ instr.extra_bits = VKD3D_SM4_CONDITIONAL_NZ;
+ }
+
write_sm4_instruction(tpf, &instr);
}
@@ -5662,6 +3969,10 @@ static void tpf_handle_instruction(struct tpf_compiler *tpf, const struct vkd3d_
{
switch (ins->opcode)
{
+ case VKD3DSIH_DCL_CONSTANT_BUFFER:
+ tpf_dcl_constant_buffer(tpf, ins);
+ break;
+
case VKD3DSIH_DCL_TEMPS:
tpf_dcl_temps(tpf, ins->declaration.count);
break;
@@ -5702,8 +4013,34 @@ static void tpf_handle_instruction(struct tpf_compiler *tpf, const struct vkd3d_
tpf_dcl_siv_semantic(tpf, VKD3D_SM4_OP_DCL_OUTPUT_SIV, &ins->declaration.register_semantic, 0);
break;
+ case VKD3DSIH_DCL_SAMPLER:
+ tpf_dcl_sampler(tpf, ins);
+ break;
+
+ case VKD3DSIH_DCL:
+ case VKD3DSIH_DCL_RESOURCE_RAW:
+ case VKD3DSIH_DCL_UAV_RAW:
+ case VKD3DSIH_DCL_UAV_STRUCTURED:
+ case VKD3DSIH_DCL_UAV_TYPED:
+ tpf_dcl_texture(tpf, ins);
+ break;
+
case VKD3DSIH_ADD:
+ case VKD3DSIH_ATOMIC_AND:
+ case VKD3DSIH_ATOMIC_CMP_STORE:
+ case VKD3DSIH_ATOMIC_IADD:
+ case VKD3DSIH_ATOMIC_IMAX:
+ case VKD3DSIH_ATOMIC_IMIN:
+ case VKD3DSIH_ATOMIC_UMAX:
+ case VKD3DSIH_ATOMIC_UMIN:
+ case VKD3DSIH_ATOMIC_OR:
+ case VKD3DSIH_ATOMIC_XOR:
case VKD3DSIH_AND:
+ case VKD3DSIH_BREAK:
+ case VKD3DSIH_CASE:
+ case VKD3DSIH_CONTINUE:
+ case VKD3DSIH_DEFAULT:
+ case VKD3DSIH_DISCARD:
case VKD3DSIH_DIV:
case VKD3DSIH_DP2:
case VKD3DSIH_DP3:
@@ -5714,6 +4051,10 @@ static void tpf_handle_instruction(struct tpf_compiler *tpf, const struct vkd3d_
case VKD3DSIH_DSY:
case VKD3DSIH_DSY_COARSE:
case VKD3DSIH_DSY_FINE:
+ case VKD3DSIH_ELSE:
+ case VKD3DSIH_ENDIF:
+ case VKD3DSIH_ENDLOOP:
+ case VKD3DSIH_ENDSWITCH:
case VKD3DSIH_EQO:
case VKD3DSIH_EXP:
case VKD3DSIH_F16TOF32:
@@ -5721,21 +4062,43 @@ static void tpf_handle_instruction(struct tpf_compiler *tpf, const struct vkd3d_
case VKD3DSIH_FRC:
case VKD3DSIH_FTOI:
case VKD3DSIH_FTOU:
+ case VKD3DSIH_GATHER4:
+ case VKD3DSIH_GATHER4_PO:
+ case VKD3DSIH_GATHER4_C:
+ case VKD3DSIH_GATHER4_PO_C:
case VKD3DSIH_GEO:
+ case VKD3DSIH_HS_CONTROL_POINT_PHASE:
+ case VKD3DSIH_HS_FORK_PHASE:
case VKD3DSIH_IADD:
case VKD3DSIH_IEQ:
+ case VKD3DSIH_IF:
case VKD3DSIH_IGE:
case VKD3DSIH_ILT:
case VKD3DSIH_IMAD:
case VKD3DSIH_IMAX:
case VKD3DSIH_IMIN:
+ case VKD3DSIH_IMM_ATOMIC_AND:
+ case VKD3DSIH_IMM_ATOMIC_CMP_EXCH:
+ case VKD3DSIH_IMM_ATOMIC_EXCH:
+ case VKD3DSIH_IMM_ATOMIC_IADD:
+ case VKD3DSIH_IMM_ATOMIC_IMAX:
+ case VKD3DSIH_IMM_ATOMIC_IMIN:
+ case VKD3DSIH_IMM_ATOMIC_UMAX:
+ case VKD3DSIH_IMM_ATOMIC_UMIN:
+ case VKD3DSIH_IMM_ATOMIC_OR:
+ case VKD3DSIH_IMM_ATOMIC_XOR:
case VKD3DSIH_IMUL:
case VKD3DSIH_INE:
case VKD3DSIH_INEG:
case VKD3DSIH_ISHL:
case VKD3DSIH_ISHR:
case VKD3DSIH_ITOF:
+ case VKD3DSIH_LD:
+ case VKD3DSIH_LD2DMS:
+ case VKD3DSIH_LD_RAW:
+ case VKD3DSIH_LD_UAV_TYPED:
case VKD3DSIH_LOG:
+ case VKD3DSIH_LOOP:
case VKD3DSIH_LTO:
case VKD3DSIH_MAD:
case VKD3DSIH_MAX:
@@ -5747,14 +4110,25 @@ static void tpf_handle_instruction(struct tpf_compiler *tpf, const struct vkd3d_
case VKD3DSIH_NOT:
case VKD3DSIH_OR:
case VKD3DSIH_RCP:
+ case VKD3DSIH_RESINFO:
+ case VKD3DSIH_RET:
case VKD3DSIH_ROUND_NE:
case VKD3DSIH_ROUND_NI:
case VKD3DSIH_ROUND_PI:
case VKD3DSIH_ROUND_Z:
case VKD3DSIH_RSQ:
+ case VKD3DSIH_SAMPLE:
+ case VKD3DSIH_SAMPLE_B:
+ case VKD3DSIH_SAMPLE_C:
+ case VKD3DSIH_SAMPLE_C_LZ:
+ case VKD3DSIH_SAMPLE_GRAD:
case VKD3DSIH_SAMPLE_INFO:
+ case VKD3DSIH_SAMPLE_LOD:
case VKD3DSIH_SINCOS:
case VKD3DSIH_SQRT:
+ case VKD3DSIH_STORE_RAW:
+ case VKD3DSIH_STORE_UAV_TYPED:
+ case VKD3DSIH_SWITCH:
case VKD3DSIH_UDIV:
case VKD3DSIH_UGE:
case VKD3DSIH_ULT:
@@ -5772,102 +4146,23 @@ static void tpf_handle_instruction(struct tpf_compiler *tpf, const struct vkd3d_
}
}
-static void write_sm4_block(struct tpf_compiler *tpf, const struct hlsl_block *block)
+static void tpf_write_program(struct tpf_compiler *tpf, const struct vsir_program *program)
{
- const struct hlsl_ir_node *instr;
- unsigned int vsir_instr_idx;
-
- LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry)
- {
- if (instr->data_type)
- {
- if (instr->data_type->class != HLSL_CLASS_SCALAR && instr->data_type->class != HLSL_CLASS_VECTOR)
- {
- hlsl_fixme(tpf->ctx, &instr->loc, "Class %#x should have been lowered or removed.",
- instr->data_type->class);
- break;
- }
-
- if (!instr->reg.allocated)
- {
- VKD3D_ASSERT(instr->type == HLSL_IR_CONSTANT);
- continue;
- }
- }
-
- switch (instr->type)
- {
- case HLSL_IR_CALL:
- case HLSL_IR_CONSTANT:
- vkd3d_unreachable();
-
- case HLSL_IR_IF:
- write_sm4_if(tpf, hlsl_ir_if(instr));
- break;
-
- case HLSL_IR_JUMP:
- write_sm4_jump(tpf, hlsl_ir_jump(instr));
- break;
-
- case HLSL_IR_LOAD:
- write_sm4_load(tpf, hlsl_ir_load(instr));
- break;
-
- case HLSL_IR_RESOURCE_LOAD:
- write_sm4_resource_load(tpf, hlsl_ir_resource_load(instr));
- break;
-
- case HLSL_IR_RESOURCE_STORE:
- write_sm4_resource_store(tpf, hlsl_ir_resource_store(instr));
- break;
-
- case HLSL_IR_LOOP:
- write_sm4_loop(tpf, hlsl_ir_loop(instr));
- break;
-
- case HLSL_IR_STORE:
- write_sm4_store(tpf, hlsl_ir_store(instr));
- break;
-
- case HLSL_IR_SWITCH:
- write_sm4_switch(tpf, hlsl_ir_switch(instr));
- break;
-
- case HLSL_IR_SWIZZLE:
- write_sm4_swizzle(tpf, hlsl_ir_swizzle(instr));
- break;
-
- case HLSL_IR_VSIR_INSTRUCTION_REF:
- vsir_instr_idx = hlsl_ir_vsir_instruction_ref(instr)->vsir_instr_idx;
- tpf_handle_instruction(tpf, &tpf->program->instructions.elements[vsir_instr_idx]);
- break;
-
- default:
- hlsl_fixme(tpf->ctx, &instr->loc, "Instruction type %s.", hlsl_node_type_to_string(instr->type));
- }
- }
-}
+ unsigned int i;
-static void tpf_write_shader_function(struct tpf_compiler *tpf, struct hlsl_ir_function_decl *func)
-{
if (tpf->program->shader_version.type == VKD3D_SHADER_TYPE_COMPUTE)
tpf_dcl_thread_group(tpf, &tpf->program->thread_group_size);
- write_sm4_block(tpf, &func->body);
-
- write_sm4_ret(tpf);
+ for (i = 0; i < program->instructions.count; ++i)
+ tpf_handle_instruction(tpf, &program->instructions.elements[i]);
}
-static void tpf_write_shdr(struct tpf_compiler *tpf, struct hlsl_ir_function_decl *entry_func)
+static void tpf_write_shdr(struct tpf_compiler *tpf)
{
- const struct vkd3d_shader_version *version = &tpf->program->shader_version;
+ const struct vsir_program *program = tpf->program;
+ const struct vkd3d_shader_version *version;
struct vkd3d_bytecode_buffer buffer = {0};
- struct extern_resource *extern_resources;
- unsigned int extern_resources_count, i;
- const struct hlsl_buffer *cbuffer;
- struct hlsl_ctx *ctx = tpf->ctx;
size_t token_count_position;
- uint32_t global_flags = 0;
static const uint16_t shader_types[VKD3D_SHADER_TYPE_COUNT] =
{
@@ -5884,101 +4179,45 @@ static void tpf_write_shdr(struct tpf_compiler *tpf, struct hlsl_ir_function_dec
tpf->buffer = &buffer;
- extern_resources = sm4_get_extern_resources(ctx, &extern_resources_count);
-
+ version = &program->shader_version;
put_u32(&buffer, vkd3d_make_u32((version->major << 4) | version->minor, shader_types[version->type]));
token_count_position = put_u32(&buffer, 0);
- if (version->major == 4)
- {
- for (i = 0; i < extern_resources_count; ++i)
- {
- const struct extern_resource *resource = &extern_resources[i];
- const struct hlsl_type *type = resource->component_type;
-
- if (type && type->class == HLSL_CLASS_TEXTURE && type->sampler_dim == HLSL_SAMPLER_DIM_RAW_BUFFER)
- {
- global_flags |= VKD3DSGF_ENABLE_RAW_AND_STRUCTURED_BUFFERS;
- break;
- }
- }
- }
-
- if (entry_func->early_depth_test && vkd3d_shader_ver_ge(version, 5, 0))
- global_flags |= VKD3DSGF_FORCE_EARLY_DEPTH_STENCIL;
-
- if (global_flags)
- write_sm4_dcl_global_flags(tpf, global_flags);
+ if (program->global_flags)
+ write_sm4_dcl_global_flags(tpf, program->global_flags);
if (version->type == VKD3D_SHADER_TYPE_HULL)
{
tpf_write_hs_decls(tpf);
- tpf_write_dcl_input_control_point_count(tpf, 1); /* TODO: Obtain from InputPatch */
- tpf_write_dcl_output_control_point_count(tpf, ctx->output_control_point_count);
- tpf_write_dcl_tessellator_domain(tpf, ctx->domain);
- tpf_write_dcl_tessellator_partitioning(tpf, ctx->partitioning);
- tpf_write_dcl_tessellator_output_primitive(tpf, ctx->output_primitive);
+ tpf_write_dcl_input_control_point_count(tpf, program->input_control_point_count);
+ tpf_write_dcl_output_control_point_count(tpf, program->output_control_point_count);
+ tpf_write_dcl_tessellator_domain(tpf, program->tess_domain);
+ tpf_write_dcl_tessellator_partitioning(tpf, program->tess_partitioning);
+ tpf_write_dcl_tessellator_output_primitive(tpf, program->tess_output_primitive);
}
else if (version->type == VKD3D_SHADER_TYPE_DOMAIN)
{
- tpf_write_dcl_input_control_point_count(tpf, 0); /* TODO: Obtain from OutputPatch */
- tpf_write_dcl_tessellator_domain(tpf, ctx->domain);
- }
-
- LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry)
- {
- if (cbuffer->reg.allocated)
- write_sm4_dcl_constant_buffer(tpf, cbuffer);
- }
-
- for (i = 0; i < extern_resources_count; ++i)
- {
- const struct extern_resource *resource = &extern_resources[i];
-
- if (resource->regset == HLSL_REGSET_SAMPLERS)
- write_sm4_dcl_samplers(tpf, resource);
- else if (resource->regset == HLSL_REGSET_TEXTURES)
- write_sm4_dcl_textures(tpf, resource, false);
- else if (resource->regset == HLSL_REGSET_UAVS)
- write_sm4_dcl_textures(tpf, resource, true);
+ tpf_write_dcl_input_control_point_count(tpf, program->input_control_point_count);
+ tpf_write_dcl_tessellator_domain(tpf, program->tess_domain);
}
- if (version->type == VKD3D_SHADER_TYPE_HULL)
- tpf_write_hs_control_point_phase(tpf);
-
- tpf_write_shader_function(tpf, entry_func);
-
- if (version->type == VKD3D_SHADER_TYPE_HULL)
- {
- tpf_write_hs_fork_phase(tpf);
- tpf_write_shader_function(tpf, ctx->patch_constant_func);
- }
+ tpf_write_program(tpf, program);
set_u32(&buffer, token_count_position, bytecode_get_size(&buffer) / sizeof(uint32_t));
- add_section(ctx, &tpf->dxbc, TAG_SHDR, &buffer);
+ add_section(tpf, TAG_SHDR, &buffer);
tpf->buffer = NULL;
-
- sm4_free_extern_resources(extern_resources, extern_resources_count);
}
static void tpf_write_sfi0(struct tpf_compiler *tpf)
{
- struct extern_resource *extern_resources;
- unsigned int extern_resources_count;
- struct hlsl_ctx *ctx = tpf->ctx;
uint64_t *flags;
flags = vkd3d_calloc(1, sizeof(*flags));
- extern_resources = sm4_get_extern_resources(ctx, &extern_resources_count);
- for (unsigned int i = 0; i < extern_resources_count; ++i)
- {
- if (extern_resources[i].component_type && extern_resources[i].component_type->e.resource.rasteriser_ordered)
- *flags |= VKD3D_SM4_REQUIRES_ROVS;
- }
- sm4_free_extern_resources(extern_resources, extern_resources_count);
+ if (tpf->program->features.rovs)
+ *flags |= DXBC_SFI0_REQUIRES_ROVS;
/* FIXME: We also emit code that should require UAVS_AT_EVERY_STAGE,
* STENCIL_REF, and TYPED_UAV_LOAD_ADDITIONAL_FORMATS. */
@@ -5993,7 +4232,6 @@ static void tpf_write_stat(struct tpf_compiler *tpf)
{
struct vkd3d_bytecode_buffer buffer = {0};
const struct sm4_stat *stat = tpf->stat;
- struct hlsl_ctx *ctx = tpf->ctx;
put_u32(&buffer, stat->fields[VKD3D_STAT_INSTR_COUNT]);
put_u32(&buffer, stat->fields[VKD3D_STAT_TEMPS]);
@@ -6025,7 +4263,7 @@ static void tpf_write_stat(struct tpf_compiler *tpf)
put_u32(&buffer, stat->fields[VKD3D_STAT_LOD]);
put_u32(&buffer, 0); /* Sample frequency */
- if (hlsl_version_ge(ctx, 5, 0))
+ if (vkd3d_shader_ver_ge(&tpf->program->shader_version, 5, 0))
{
put_u32(&buffer, stat->fields[VKD3D_STAT_DCL_GS_INSTANCES]);
put_u32(&buffer, stat->fields[VKD3D_STAT_TESS_CONTROL_POINT_COUNT]);
@@ -6037,15 +4275,19 @@ static void tpf_write_stat(struct tpf_compiler *tpf)
put_u32(&buffer, stat->fields[VKD3D_STAT_STORE]);
}
- add_section(ctx, &tpf->dxbc, TAG_STAT, &buffer);
+ add_section(tpf, TAG_STAT, &buffer);
+}
+
+static void tpf_write_section(struct tpf_compiler *tpf, uint32_t tag, const struct vkd3d_shader_code *code)
+{
+ struct vkd3d_bytecode_buffer buffer = {0};
+
+ bytecode_put_bytes(&buffer, code->code, code->size);
+ add_section(tpf, tag, &buffer);
}
-/* OBJECTIVE: Stop relying on ctx and entry_func on this function, receiving
- * data from the other parameters instead, so they can be removed from the
- * arguments and this function can be independent of HLSL structs. */
-int tpf_compile(struct vsir_program *program, uint64_t config_flags,
- struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context,
- struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func)
+int tpf_compile(struct vsir_program *program, uint64_t config_flags, const struct vkd3d_shader_code *rdef,
+ struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context)
{
enum vkd3d_shader_type shader_type = program->shader_version.type;
struct tpf_compiler tpf = {0};
@@ -6053,7 +4295,6 @@ int tpf_compile(struct vsir_program *program, uint64_t config_flags,
size_t i;
int ret;
- tpf.ctx = ctx;
tpf.program = program;
tpf.buffer = NULL;
tpf.stat = &stat;
@@ -6064,14 +4305,12 @@ int tpf_compile(struct vsir_program *program, uint64_t config_flags,
tpf_write_signature(&tpf, &program->output_signature, TAG_OSGN);
if (shader_type == VKD3D_SHADER_TYPE_HULL || shader_type == VKD3D_SHADER_TYPE_DOMAIN)
tpf_write_signature(&tpf, &program->patch_constant_signature, TAG_PCSG);
- write_sm4_rdef(ctx, &tpf.dxbc);
- tpf_write_shdr(&tpf, entry_func);
+ tpf_write_section(&tpf, TAG_RDEF, rdef);
+ tpf_write_shdr(&tpf);
tpf_write_sfi0(&tpf);
tpf_write_stat(&tpf);
ret = VKD3D_OK;
- if (ctx->result)
- ret = ctx->result;
if (tpf.result)
ret = tpf.result;
diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c
index d751f2dc6bf..c7ad407f6fb 100644
--- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c
+++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c
@@ -23,6 +23,8 @@
#include <stdio.h>
#include <math.h>
+/* VKD3D_DEBUG_ENV_NAME("VKD3D_SHADER_DEBUG"); */
+
static inline int char_to_int(char c)
{
if ('0' <= c && c <= '9')
@@ -377,7 +379,8 @@ size_t bytecode_align(struct vkd3d_bytecode_buffer *buffer)
return aligned_size;
}
- memset(buffer->data + buffer->size, 0xab, aligned_size - buffer->size);
+ if (aligned_size > buffer->size)
+ memset(&buffer->data[buffer->size], 0xab, aligned_size - buffer->size);
buffer->size = aligned_size;
return aligned_size;
}
@@ -394,7 +397,8 @@ size_t bytecode_put_bytes_unaligned(struct vkd3d_bytecode_buffer *buffer, const
buffer->status = VKD3D_ERROR_OUT_OF_MEMORY;
return offset;
}
- memcpy(buffer->data + offset, bytes, size);
+ if (size)
+ memcpy(&buffer->data[offset], bytes, size);
buffer->size = offset + size;
return offset;
}
@@ -805,6 +809,9 @@ struct vkd3d_shader_scan_context
struct vkd3d_shader_scan_combined_resource_sampler_info *combined_sampler_info;
size_t combined_samplers_size;
+
+ enum vkd3d_shader_tessellator_output_primitive output_primitive;
+ enum vkd3d_shader_tessellator_partitioning partitioning;
};
static VKD3D_PRINTF_FUNC(3, 4) void vkd3d_shader_scan_error(struct vkd3d_shader_scan_context *context,
@@ -1262,6 +1269,12 @@ static int vkd3d_shader_scan_instruction(struct vkd3d_shader_scan_context *conte
VKD3D_SHADER_RESOURCE_BUFFER, VKD3D_SHADER_RESOURCE_DATA_UINT, 0,
instruction->declaration.structured_resource.byte_stride, false, instruction->flags);
break;
+ case VKD3DSIH_DCL_TESSELLATOR_OUTPUT_PRIMITIVE:
+ context->output_primitive = instruction->declaration.tessellator_output_primitive;
+ break;
+ case VKD3DSIH_DCL_TESSELLATOR_PARTITIONING:
+ context->partitioning = instruction->declaration.tessellator_partitioning;
+ break;
case VKD3DSIH_IF:
case VKD3DSIH_IFC:
cf_info = vkd3d_shader_scan_push_cf_info(context);
@@ -1502,6 +1515,7 @@ static int vsir_program_scan(struct vsir_program *program, const struct vkd3d_sh
struct vkd3d_shader_scan_descriptor_info1 *descriptor_info1)
{
struct vkd3d_shader_scan_combined_resource_sampler_info *combined_sampler_info;
+ struct vkd3d_shader_scan_hull_shader_tessellation_info *tessellation_info;
struct vkd3d_shader_scan_descriptor_info1 local_descriptor_info1 = {0};
struct vkd3d_shader_scan_descriptor_info *descriptor_info;
struct vkd3d_shader_scan_signature_info *signature_info;
@@ -1530,6 +1544,8 @@ static int vsir_program_scan(struct vsir_program *program, const struct vkd3d_sh
descriptor_info1 = &local_descriptor_info1;
}
+ tessellation_info = vkd3d_find_struct(compile_info->next, SCAN_HULL_SHADER_TESSELLATION_INFO);
+
vkd3d_shader_scan_context_init(&context, &program->shader_version, compile_info,
descriptor_info1, combined_sampler_info, message_context);
@@ -1573,6 +1589,12 @@ static int vsir_program_scan(struct vsir_program *program, const struct vkd3d_sh
if (!ret && descriptor_info)
ret = convert_descriptor_info(descriptor_info, descriptor_info1);
+ if (!ret && tessellation_info)
+ {
+ tessellation_info->output_primitive = context.output_primitive;
+ tessellation_info->partitioning = context.partitioning;
+ }
+
if (ret < 0)
{
if (combined_sampler_info)
@@ -1959,7 +1981,7 @@ const enum vkd3d_shader_target_type *vkd3d_shader_get_supported_target_types(
static const enum vkd3d_shader_target_type dxbc_tpf_types[] =
{
VKD3D_SHADER_TARGET_SPIRV_BINARY,
-#ifdef HAVE_SPIRV_TOOLS
+#if defined(HAVE_SPIRV_TOOLS) || defined(VKD3D_SHADER_UNSUPPORTED_SPIRV_PARSER)
VKD3D_SHADER_TARGET_SPIRV_TEXT,
#endif
VKD3D_SHADER_TARGET_D3D_ASM,
@@ -1974,7 +1996,7 @@ const enum vkd3d_shader_target_type *vkd3d_shader_get_supported_target_types(
static const enum vkd3d_shader_target_type hlsl_types[] =
{
VKD3D_SHADER_TARGET_SPIRV_BINARY,
-#ifdef HAVE_SPIRV_TOOLS
+#if defined(HAVE_SPIRV_TOOLS) || defined(VKD3D_SHADER_UNSUPPORTED_SPIRV_PARSER)
VKD3D_SHADER_TARGET_SPIRV_TEXT,
#endif
VKD3D_SHADER_TARGET_D3D_ASM,
@@ -1986,7 +2008,7 @@ const enum vkd3d_shader_target_type *vkd3d_shader_get_supported_target_types(
static const enum vkd3d_shader_target_type d3dbc_types[] =
{
VKD3D_SHADER_TARGET_SPIRV_BINARY,
-#ifdef HAVE_SPIRV_TOOLS
+#if defined(HAVE_SPIRV_TOOLS) || defined(VKD3D_SHADER_UNSUPPORTED_SPIRV_PARSER)
VKD3D_SHADER_TARGET_SPIRV_TEXT,
#endif
VKD3D_SHADER_TARGET_D3D_ASM,
@@ -1996,7 +2018,7 @@ const enum vkd3d_shader_target_type *vkd3d_shader_get_supported_target_types(
static const enum vkd3d_shader_target_type dxbc_dxil_types[] =
{
VKD3D_SHADER_TARGET_SPIRV_BINARY,
-# ifdef HAVE_SPIRV_TOOLS
+#if defined(HAVE_SPIRV_TOOLS) || defined(VKD3D_SHADER_UNSUPPORTED_SPIRV_PARSER)
VKD3D_SHADER_TARGET_SPIRV_TEXT,
# endif
VKD3D_SHADER_TARGET_D3D_ASM,
diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h
index be7c0b73a22..7e8ec156aad 100644
--- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h
+++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h
@@ -62,6 +62,8 @@
#define VKD3D_SHADER_COMPONENT_TYPE_COUNT (VKD3D_SHADER_COMPONENT_UINT64 + 1)
#define VKD3D_SHADER_MINIMUM_PRECISION_COUNT (VKD3D_SHADER_MINIMUM_PRECISION_UINT_16 + 1)
+#define VKD3D_MAX_STREAM_COUNT 4
+
enum vkd3d_shader_error
{
VKD3D_SHADER_ERROR_DXBC_INVALID_SIZE = 1,
@@ -163,6 +165,7 @@ enum vkd3d_shader_error
VKD3D_SHADER_ERROR_HLSL_INVALID_OUTPUT_PRIMITIVE = 5037,
VKD3D_SHADER_ERROR_HLSL_INVALID_PARTITIONING = 5038,
VKD3D_SHADER_ERROR_HLSL_MISPLACED_SAMPLER_STATE = 5039,
+ VKD3D_SHADER_ERROR_HLSL_AMBIGUOUS_CALL = 5040,
VKD3D_SHADER_WARNING_HLSL_IMPLICIT_TRUNCATION = 5300,
VKD3D_SHADER_WARNING_HLSL_DIVISION_BY_ZERO = 5301,
@@ -247,6 +250,7 @@ enum vkd3d_shader_error
VKD3D_SHADER_ERROR_VSIR_INVALID_PARAMETER = 9020,
VKD3D_SHADER_ERROR_VSIR_MISSING_SEMANTIC = 9021,
VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE = 9022,
+ VKD3D_SHADER_ERROR_VSIR_INVALID_RANGE = 9023,
VKD3D_SHADER_WARNING_VSIR_DYNAMIC_DESCRIPTOR_ARRAY = 9300,
@@ -591,28 +595,25 @@ enum vkd3d_shader_opcode
enum vkd3d_shader_register_type
{
- VKD3DSPR_TEMP = 0,
- VKD3DSPR_INPUT = 1,
- VKD3DSPR_CONST = 2,
- VKD3DSPR_ADDR = 3,
- VKD3DSPR_TEXTURE = 3,
- VKD3DSPR_RASTOUT = 4,
- VKD3DSPR_ATTROUT = 5,
- VKD3DSPR_TEXCRDOUT = 6,
- VKD3DSPR_OUTPUT = 6,
- VKD3DSPR_CONSTINT = 7,
- VKD3DSPR_COLOROUT = 8,
- VKD3DSPR_DEPTHOUT = 9,
- VKD3DSPR_COMBINED_SAMPLER = 10,
- VKD3DSPR_CONST2 = 11,
- VKD3DSPR_CONST3 = 12,
- VKD3DSPR_CONST4 = 13,
- VKD3DSPR_CONSTBOOL = 14,
- VKD3DSPR_LOOP = 15,
- VKD3DSPR_TEMPFLOAT16 = 16,
- VKD3DSPR_MISCTYPE = 17,
- VKD3DSPR_LABEL = 18,
- VKD3DSPR_PREDICATE = 19,
+ VKD3DSPR_TEMP,
+ VKD3DSPR_INPUT,
+ VKD3DSPR_CONST,
+ VKD3DSPR_ADDR,
+ VKD3DSPR_TEXTURE,
+ VKD3DSPR_RASTOUT,
+ VKD3DSPR_ATTROUT,
+ VKD3DSPR_TEXCRDOUT,
+ VKD3DSPR_OUTPUT,
+ VKD3DSPR_CONSTINT,
+ VKD3DSPR_COLOROUT,
+ VKD3DSPR_DEPTHOUT,
+ VKD3DSPR_COMBINED_SAMPLER,
+ VKD3DSPR_CONSTBOOL,
+ VKD3DSPR_LOOP,
+ VKD3DSPR_TEMPFLOAT16,
+ VKD3DSPR_MISCTYPE,
+ VKD3DSPR_LABEL,
+ VKD3DSPR_PREDICATE,
VKD3DSPR_IMMCONST,
VKD3DSPR_IMMCONST64,
VKD3DSPR_CONSTBUFFER,
@@ -1123,6 +1124,12 @@ bool vsir_signature_find_sysval(const struct shader_signature *signature,
enum vkd3d_shader_sysval_semantic sysval, unsigned int semantic_index, unsigned int *element_index);
void shader_signature_cleanup(struct shader_signature *signature);
+struct vsir_features
+{
+ /* The shader requires rasteriser-ordered views. */
+ bool rovs;
+};
+
struct dxbc_shader_desc
{
const uint32_t *byte_code;
@@ -1131,6 +1138,7 @@ struct dxbc_shader_desc
struct shader_signature input_signature;
struct shader_signature output_signature;
struct shader_signature patch_constant_signature;
+ struct vsir_features features;
};
struct vkd3d_shader_register_semantic
@@ -1400,9 +1408,10 @@ enum vsir_control_flow_type
enum vsir_normalisation_level
{
- VSIR_NOT_NORMALISED,
+ VSIR_NORMALISED_SM1,
+ VSIR_NORMALISED_SM4,
VSIR_NORMALISED_HULL_CONTROL_POINT_IO,
- VSIR_FULLY_NORMALISED_IO,
+ VSIR_NORMALISED_SM6,
};
struct vsir_program
@@ -1428,9 +1437,16 @@ struct vsir_program
bool use_vocp;
bool has_point_size;
bool has_point_coord;
+ bool has_fog;
uint8_t diffuse_written_mask;
enum vsir_control_flow_type cf_type;
enum vsir_normalisation_level normalisation_level;
+ enum vkd3d_tessellator_domain tess_domain;
+ enum vkd3d_shader_tessellator_partitioning tess_partitioning;
+ enum vkd3d_shader_tessellator_output_primitive tess_output_primitive;
+ uint32_t io_dcls[VKD3D_BITMAP_SIZE(VKD3DSPR_COUNT)];
+
+ struct vsir_features features;
const char **block_names;
size_t block_name_count;
@@ -1643,6 +1659,10 @@ int shader_extract_from_dxbc(const struct vkd3d_shader_code *dxbc,
int shader_parse_input_signature(const struct vkd3d_shader_code *dxbc,
struct vkd3d_shader_message_context *message_context, struct shader_signature *signature);
+int d3dbc_compile(struct vsir_program *program, uint64_t config_flags,
+ const struct vkd3d_shader_compile_info *compile_info, const struct vkd3d_shader_code *ctab,
+ struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context);
+
int glsl_compile(struct vsir_program *program, uint64_t config_flags,
const struct vkd3d_shader_scan_descriptor_info1 *descriptor_info,
const struct vkd3d_shader_scan_combined_resource_sampler_info *combined_sampler_info,
@@ -1661,6 +1681,9 @@ int msl_compile(struct vsir_program *program, uint64_t config_flags,
const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *out,
struct vkd3d_shader_message_context *message_context);
+int tpf_compile(struct vsir_program *program, uint64_t config_flags, const struct vkd3d_shader_code *rdef,
+ struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context);
+
enum vkd3d_md5_variant
{
VKD3D_MD5_STANDARD,
@@ -1942,6 +1965,21 @@ static inline void *vkd3d_find_struct_(const struct vkd3d_struct *chain,
#define DXBC_MAX_SECTION_COUNT 7
+#define DXBC_SFI0_REQUIRES_DOUBLES 0x00000001u
+#define DXBC_SFI0_REQUIRES_EARLY_DEPTH_STENCIL 0x00000002u
+#define DXBC_SFI0_REQUIRES_UAVS_AT_EVERY_STAGE 0x00000004u
+#define DXBC_SFI0_REQUIRES_64_UAVS 0x00000008u
+#define DXBC_SFI0_REQUIRES_MINIMUM_PRECISION 0x00000010u
+#define DXBC_SFI0_REQUIRES_11_1_DOUBLE_EXTENSIONS 0x00000020u
+#define DXBC_SFI0_REQUIRES_11_1_SHADER_EXTENSIONS 0x00000040u
+#define DXBC_SFI0_REQUIRES_LEVEL_9_COMPARISON_FILTERING 0x00000080u
+#define DXBC_SFI0_REQUIRES_TILED_RESOURCES 0x00000100u
+#define DXBC_SFI0_REQUIRES_STENCIL_REF 0x00000200u
+#define DXBC_SFI0_REQUIRES_INNER_COVERAGE 0x00000400u
+#define DXBC_SFI0_REQUIRES_TYPED_UAV_LOAD_ADDITIONAL_FORMATS 0x00000800u
+#define DXBC_SFI0_REQUIRES_ROVS 0x00001000u
+#define DXBC_SFI0_REQUIRES_VIEWPORT_AND_RT_ARRAY_INDEX_FROM_ANY_SHADER_FEEDING_RASTERIZER 0x00002000u
+
struct dxbc_writer
{
unsigned int section_count;
diff --git a/libs/vkd3d/libs/vkd3d/command.c b/libs/vkd3d/libs/vkd3d/command.c
index a55a97f6f2f..ce0c3b9128f 100644
--- a/libs/vkd3d/libs/vkd3d/command.c
+++ b/libs/vkd3d/libs/vkd3d/command.c
@@ -327,9 +327,12 @@ static void *vkd3d_fence_worker_main(void *arg)
struct vkd3d_waiting_fence *old_fences, *cur_fences = NULL;
struct vkd3d_fence_worker *worker = arg;
unsigned int i;
+ bool timeline;
vkd3d_set_thread_name("vkd3d_fence");
+ timeline = worker->device->vk_info.KHR_timeline_semaphore;
+
for (;;)
{
vkd3d_mutex_lock(&worker->mutex);
@@ -357,7 +360,12 @@ static void *vkd3d_fence_worker_main(void *arg)
vkd3d_mutex_unlock(&worker->mutex);
for (i = 0; i < cur_fence_count; ++i)
- worker->wait_for_gpu_fence(worker, &cur_fences[i]);
+ {
+ if (timeline)
+ vkd3d_wait_for_gpu_timeline_semaphore(worker, &cur_fences[i]);
+ else
+ vkd3d_wait_for_gpu_fence(worker, &cur_fences[i]);
+ }
}
vkd3d_free(cur_fences);
@@ -379,9 +387,6 @@ static HRESULT vkd3d_fence_worker_start(struct vkd3d_fence_worker *worker,
worker->fences = NULL;
worker->fences_size = 0;
- worker->wait_for_gpu_fence = device->vk_info.KHR_timeline_semaphore
- ? vkd3d_wait_for_gpu_timeline_semaphore : vkd3d_wait_for_gpu_fence;
-
vkd3d_mutex_init(&worker->mutex);
vkd3d_cond_init(&worker->cond);
@@ -399,6 +404,7 @@ static HRESULT vkd3d_fence_worker_start(struct vkd3d_fence_worker *worker,
static HRESULT vkd3d_fence_worker_stop(struct vkd3d_fence_worker *worker,
struct d3d12_device *device)
{
+ unsigned int i;
HRESULT hr;
TRACE("worker %p.\n", worker);
@@ -416,6 +422,9 @@ static HRESULT vkd3d_fence_worker_stop(struct vkd3d_fence_worker *worker,
vkd3d_mutex_destroy(&worker->mutex);
vkd3d_cond_destroy(&worker->cond);
+ for (i = 0; i < worker->fence_count; ++i)
+ d3d12_fence_decref(worker->fences[i].fence);
+
vkd3d_free(worker->fences);
return S_OK;
@@ -556,7 +565,8 @@ static void d3d12_fence_destroy_vk_objects(struct d3d12_fence *fence)
fence->old_vk_fences[i] = VK_NULL_HANDLE;
}
- d3d12_fence_garbage_collect_vk_semaphores_locked(fence, true);
+ if (!device->vk_info.KHR_timeline_semaphore)
+ d3d12_fence_garbage_collect_vk_semaphores_locked(fence, true);
VK_CALL(vkDestroySemaphore(device->vk_device, fence->timeline_semaphore, NULL));
vkd3d_mutex_unlock(&fence->mutex);
@@ -1255,6 +1265,74 @@ VkResult vkd3d_create_timeline_semaphore(const struct d3d12_device *device, uint
return VK_CALL(vkCreateSemaphore(device->vk_device, &info, NULL, timeline_semaphore));
}
+static void vkd3d_vk_descriptor_pool_array_cleanup(struct vkd3d_vk_descriptor_pool_array *array)
+{
+ vkd3d_free(array->pools);
+}
+
+static void vkd3d_vk_descriptor_pool_array_init(struct vkd3d_vk_descriptor_pool_array *array)
+{
+ memset(array, 0, sizeof(*array));
+}
+
+static bool vkd3d_vk_descriptor_pool_array_push_array(struct vkd3d_vk_descriptor_pool_array *array,
+ const struct vkd3d_vk_descriptor_pool *pools, size_t count)
+{
+ if (!vkd3d_array_reserve((void **)&array->pools, &array->capacity, array->count + count, sizeof(*array->pools)))
+ return false;
+
+ memcpy(&array->pools[array->count], pools, count * sizeof(*pools));
+ array->count += count;
+
+ return true;
+}
+
+static bool vkd3d_vk_descriptor_pool_array_push(struct vkd3d_vk_descriptor_pool_array *array,
+ unsigned int descriptor_count, VkDescriptorPool vk_pool)
+{
+ struct vkd3d_vk_descriptor_pool pool =
+ {
+ .descriptor_count = descriptor_count,
+ .vk_pool = vk_pool,
+ };
+
+ return vkd3d_vk_descriptor_pool_array_push_array(array, &pool, 1);
+}
+
+static VkDescriptorPool vkd3d_vk_descriptor_pool_array_find(struct vkd3d_vk_descriptor_pool_array *array,
+ unsigned int *descriptor_count)
+{
+ VkDescriptorPool vk_pool;
+ size_t i;
+
+ for (i = 0; i < array->count; ++i)
+ {
+ if (array->pools[i].descriptor_count >= *descriptor_count)
+ {
+ *descriptor_count = array->pools[i].descriptor_count;
+ vk_pool = array->pools[i].vk_pool;
+ array->pools[i] = array->pools[--array->count];
+
+ return vk_pool;
+ }
+ }
+
+ return VK_NULL_HANDLE;
+}
+
+static void vkd3d_vk_descriptor_pool_array_destroy_pools(struct vkd3d_vk_descriptor_pool_array *array,
+ const struct d3d12_device *device)
+{
+ const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
+ size_t i;
+
+ for (i = 0; i < array->count; ++i)
+ {
+ VK_CALL(vkDestroyDescriptorPool(device->vk_device, array->pools[i].vk_pool, NULL));
+ }
+ array->count = 0;
+}
+
/* Command buffers */
static void d3d12_command_list_mark_as_invalid(struct d3d12_command_list *list,
const char *message, ...)
@@ -1376,18 +1454,6 @@ static bool d3d12_command_allocator_add_framebuffer(struct d3d12_command_allocat
return true;
}
-static bool d3d12_command_allocator_add_descriptor_pool(struct d3d12_command_allocator *allocator,
- VkDescriptorPool pool)
-{
- if (!vkd3d_array_reserve((void **)&allocator->descriptor_pools, &allocator->descriptor_pools_size,
- allocator->descriptor_pool_count + 1, sizeof(*allocator->descriptor_pools)))
- return false;
-
- allocator->descriptor_pools[allocator->descriptor_pool_count++] = pool;
-
- return true;
-}
-
static bool d3d12_command_allocator_add_view(struct d3d12_command_allocator *allocator,
struct vkd3d_view *view)
{
@@ -1426,37 +1492,71 @@ static bool d3d12_command_allocator_add_transfer_buffer(struct d3d12_command_all
}
static VkDescriptorPool d3d12_command_allocator_allocate_descriptor_pool(
- struct d3d12_command_allocator *allocator)
+ struct d3d12_command_allocator *allocator, enum vkd3d_shader_descriptor_type descriptor_type,
+ unsigned int descriptor_count, bool unbounded)
{
struct d3d12_device *device = allocator->device;
const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
struct VkDescriptorPoolCreateInfo pool_desc;
VkDevice vk_device = device->vk_device;
+ VkDescriptorPoolSize vk_pool_sizes[2];
+ unsigned int pool_size, pool_limit;
VkDescriptorPool vk_pool;
VkResult vr;
- if (allocator->free_descriptor_pool_count > 0)
- {
- vk_pool = allocator->free_descriptor_pools[allocator->free_descriptor_pool_count - 1];
- allocator->free_descriptor_pools[allocator->free_descriptor_pool_count - 1] = VK_NULL_HANDLE;
- --allocator->free_descriptor_pool_count;
- }
- else
+ if (!(vk_pool = vkd3d_vk_descriptor_pool_array_find(&allocator->free_descriptor_pools[descriptor_type],
+ &descriptor_count)))
{
+ pool_limit = device->vk_pool_limits[descriptor_type];
+
+ if (descriptor_count > pool_limit)
+ {
+ if (!unbounded)
+ {
+ ERR("Descriptor count %u exceeds maximum pool size %u.\n", descriptor_count, pool_limit);
+ return VK_NULL_HANDLE;
+ }
+
+ WARN("Clamping descriptor count %u to maximum pool size %u for unbounded allocation.\n",
+ descriptor_count, pool_limit);
+ descriptor_count = pool_limit;
+ }
+
+ pool_size = allocator->vk_pool_sizes[descriptor_type];
+ if (descriptor_count > pool_size)
+ {
+ pool_size = 1u << (vkd3d_log2i(descriptor_count - 1) + 1);
+ pool_size = min(pool_limit, pool_size);
+ }
+ descriptor_count = pool_size;
+
+ vk_pool_sizes[0].type = vk_descriptor_type_from_vkd3d_descriptor_type(descriptor_type, true);
+ vk_pool_sizes[0].descriptorCount = descriptor_count;
+
+ vk_pool_sizes[1].type = vk_descriptor_type_from_vkd3d_descriptor_type(descriptor_type, false);
+ vk_pool_sizes[1].descriptorCount = descriptor_count;
+
pool_desc.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO;
pool_desc.pNext = NULL;
pool_desc.flags = 0;
pool_desc.maxSets = 512;
- pool_desc.poolSizeCount = device->vk_pool_count;
- pool_desc.pPoolSizes = device->vk_pool_sizes;
+ pool_desc.poolSizeCount = 1;
+ if (vk_pool_sizes[1].type != vk_pool_sizes[0].type)
+ ++pool_desc.poolSizeCount;
+ pool_desc.pPoolSizes = vk_pool_sizes;
+
if ((vr = VK_CALL(vkCreateDescriptorPool(vk_device, &pool_desc, NULL, &vk_pool))) < 0)
{
ERR("Failed to create descriptor pool, vr %d.\n", vr);
return VK_NULL_HANDLE;
}
+
+ if (!unbounded || descriptor_count < pool_limit)
+ allocator->vk_pool_sizes[descriptor_type] = min(pool_limit, descriptor_count * 2);
}
- if (!(d3d12_command_allocator_add_descriptor_pool(allocator, vk_pool)))
+ if (!(vkd3d_vk_descriptor_pool_array_push(&allocator->descriptor_pools[descriptor_type],
+ descriptor_count, vk_pool)))
{
ERR("Failed to add descriptor pool.\n");
VK_CALL(vkDestroyDescriptorPool(vk_device, vk_pool, NULL));
@@ -1466,9 +1566,9 @@ static VkDescriptorPool d3d12_command_allocator_allocate_descriptor_pool(
return vk_pool;
}
-static VkDescriptorSet d3d12_command_allocator_allocate_descriptor_set(
- struct d3d12_command_allocator *allocator, VkDescriptorSetLayout vk_set_layout,
- unsigned int variable_binding_size, bool unbounded)
+static VkDescriptorSet d3d12_command_allocator_allocate_descriptor_set(struct d3d12_command_allocator *allocator,
+ enum vkd3d_shader_descriptor_type descriptor_type, unsigned int descriptor_count,
+ VkDescriptorSetLayout vk_set_layout, unsigned int variable_binding_size, bool unbounded)
{
struct d3d12_device *device = allocator->device;
const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
@@ -1478,14 +1578,15 @@ static VkDescriptorSet d3d12_command_allocator_allocate_descriptor_set(
VkDescriptorSet vk_descriptor_set;
VkResult vr;
- if (!allocator->vk_descriptor_pool)
- allocator->vk_descriptor_pool = d3d12_command_allocator_allocate_descriptor_pool(allocator);
- if (!allocator->vk_descriptor_pool)
+ if (!allocator->vk_descriptor_pools[descriptor_type])
+ allocator->vk_descriptor_pools[descriptor_type] = d3d12_command_allocator_allocate_descriptor_pool(allocator,
+ descriptor_type, descriptor_count, unbounded);
+ if (!allocator->vk_descriptor_pools[descriptor_type])
return VK_NULL_HANDLE;
set_desc.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO;
set_desc.pNext = NULL;
- set_desc.descriptorPool = allocator->vk_descriptor_pool;
+ set_desc.descriptorPool = allocator->vk_descriptor_pools[descriptor_type];
set_desc.descriptorSetCount = 1;
set_desc.pSetLayouts = &vk_set_layout;
if (unbounded)
@@ -1499,16 +1600,17 @@ static VkDescriptorSet d3d12_command_allocator_allocate_descriptor_set(
if ((vr = VK_CALL(vkAllocateDescriptorSets(vk_device, &set_desc, &vk_descriptor_set))) >= 0)
return vk_descriptor_set;
- allocator->vk_descriptor_pool = VK_NULL_HANDLE;
+ allocator->vk_descriptor_pools[descriptor_type] = VK_NULL_HANDLE;
if (vr == VK_ERROR_FRAGMENTED_POOL || vr == VK_ERROR_OUT_OF_POOL_MEMORY_KHR)
- allocator->vk_descriptor_pool = d3d12_command_allocator_allocate_descriptor_pool(allocator);
- if (!allocator->vk_descriptor_pool)
+ allocator->vk_descriptor_pools[descriptor_type] = d3d12_command_allocator_allocate_descriptor_pool(allocator,
+ descriptor_type, descriptor_count, unbounded);
+ if (!allocator->vk_descriptor_pools[descriptor_type])
{
ERR("Failed to allocate descriptor set, vr %d.\n", vr);
return VK_NULL_HANDLE;
}
- set_desc.descriptorPool = allocator->vk_descriptor_pool;
+ set_desc.descriptorPool = allocator->vk_descriptor_pools[descriptor_type];
if ((vr = VK_CALL(vkAllocateDescriptorSets(vk_device, &set_desc, &vk_descriptor_set))) < 0)
{
FIXME("Failed to allocate descriptor set from a new pool, vr %d.\n", vr);
@@ -1534,38 +1636,50 @@ static void vkd3d_buffer_destroy(struct vkd3d_buffer *buffer, struct d3d12_devic
VK_CALL(vkDestroyBuffer(device->vk_device, buffer->vk_buffer, NULL));
}
+static void d3d12_command_allocator_reset_descriptor_pool_array(struct d3d12_command_allocator *allocator,
+ enum vkd3d_shader_descriptor_type type)
+{
+ struct vkd3d_vk_descriptor_pool_array *array = &allocator->descriptor_pools[type];
+ struct d3d12_device *device = allocator->device;
+ const struct vkd3d_vk_device_procs *vk_procs;
+ const struct vkd3d_vk_descriptor_pool *pool;
+ size_t i;
+
+ vk_procs = &device->vk_procs;
+ for (i = 0; i < array->count; ++i)
+ {
+ pool = &array->pools[i];
+ if (pool->descriptor_count < allocator->vk_pool_sizes[type]
+ || !vkd3d_vk_descriptor_pool_array_push_array(&allocator->free_descriptor_pools[type], pool, 1))
+ VK_CALL(vkDestroyDescriptorPool(device->vk_device, pool->vk_pool, NULL));
+ else
+ VK_CALL(vkResetDescriptorPool(device->vk_device, pool->vk_pool, 0));
+ }
+ array->count = 0;
+}
+
static void d3d12_command_allocator_free_resources(struct d3d12_command_allocator *allocator,
bool keep_reusable_resources)
{
struct d3d12_device *device = allocator->device;
const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
- unsigned int i, j;
+ unsigned int i;
- allocator->vk_descriptor_pool = VK_NULL_HANDLE;
+ memset(allocator->vk_descriptor_pools, 0, sizeof(allocator->vk_descriptor_pools));
if (keep_reusable_resources)
{
- if (vkd3d_array_reserve((void **)&allocator->free_descriptor_pools,
- &allocator->free_descriptor_pools_size,
- allocator->free_descriptor_pool_count + allocator->descriptor_pool_count,
- sizeof(*allocator->free_descriptor_pools)))
+ for (i = 0; i < ARRAY_SIZE(allocator->descriptor_pools); ++i)
{
- for (i = 0, j = allocator->free_descriptor_pool_count; i < allocator->descriptor_pool_count; ++i, ++j)
- {
- VK_CALL(vkResetDescriptorPool(device->vk_device, allocator->descriptor_pools[i], 0));
- allocator->free_descriptor_pools[j] = allocator->descriptor_pools[i];
- }
- allocator->free_descriptor_pool_count += allocator->descriptor_pool_count;
- allocator->descriptor_pool_count = 0;
+ d3d12_command_allocator_reset_descriptor_pool_array(allocator, i);
}
}
else
{
- for (i = 0; i < allocator->free_descriptor_pool_count; ++i)
+ for (i = 0; i < ARRAY_SIZE(allocator->free_descriptor_pools); ++i)
{
- VK_CALL(vkDestroyDescriptorPool(device->vk_device, allocator->free_descriptor_pools[i], NULL));
+ vkd3d_vk_descriptor_pool_array_destroy_pools(&allocator->free_descriptor_pools[i], device);
}
- allocator->free_descriptor_pool_count = 0;
}
for (i = 0; i < allocator->transfer_buffer_count; ++i)
@@ -1586,11 +1700,10 @@ static void d3d12_command_allocator_free_resources(struct d3d12_command_allocato
}
allocator->view_count = 0;
- for (i = 0; i < allocator->descriptor_pool_count; ++i)
+ for (i = 0; i < ARRAY_SIZE(allocator->descriptor_pools); ++i)
{
- VK_CALL(vkDestroyDescriptorPool(device->vk_device, allocator->descriptor_pools[i], NULL));
+ vkd3d_vk_descriptor_pool_array_destroy_pools(&allocator->descriptor_pools[i], device);
}
- allocator->descriptor_pool_count = 0;
for (i = 0; i < allocator->framebuffer_count; ++i)
{
@@ -1647,6 +1760,7 @@ static ULONG STDMETHODCALLTYPE d3d12_command_allocator_Release(ID3D12CommandAllo
{
struct d3d12_command_allocator *allocator = impl_from_ID3D12CommandAllocator(iface);
unsigned int refcount = vkd3d_atomic_decrement_u32(&allocator->refcount);
+ size_t i;
TRACE("%p decreasing refcount to %u.\n", allocator, refcount);
@@ -1664,8 +1778,11 @@ static ULONG STDMETHODCALLTYPE d3d12_command_allocator_Release(ID3D12CommandAllo
vkd3d_free(allocator->transfer_buffers);
vkd3d_free(allocator->buffer_views);
vkd3d_free(allocator->views);
- vkd3d_free(allocator->descriptor_pools);
- vkd3d_free(allocator->free_descriptor_pools);
+ for (i = 0; i < ARRAY_SIZE(allocator->free_descriptor_pools); ++i)
+ {
+ vkd3d_vk_descriptor_pool_array_cleanup(&allocator->descriptor_pools[i]);
+ vkd3d_vk_descriptor_pool_array_cleanup(&allocator->free_descriptor_pools[i]);
+ }
vkd3d_free(allocator->framebuffers);
vkd3d_free(allocator->passes);
@@ -1822,6 +1939,7 @@ static HRESULT d3d12_command_allocator_init(struct d3d12_command_allocator *allo
struct vkd3d_queue *queue;
VkResult vr;
HRESULT hr;
+ size_t i;
if (FAILED(hr = vkd3d_private_store_init(&allocator->private_store)))
return hr;
@@ -1851,11 +1969,12 @@ static HRESULT d3d12_command_allocator_init(struct d3d12_command_allocator *allo
return hresult_from_vk_result(vr);
}
- allocator->vk_descriptor_pool = VK_NULL_HANDLE;
+ memset(allocator->vk_descriptor_pools, 0, sizeof(allocator->vk_descriptor_pools));
- allocator->free_descriptor_pools = NULL;
- allocator->free_descriptor_pools_size = 0;
- allocator->free_descriptor_pool_count = 0;
+ for (i = 0; i < ARRAY_SIZE(allocator->free_descriptor_pools); ++i)
+ {
+ vkd3d_vk_descriptor_pool_array_init(&allocator->free_descriptor_pools[i]);
+ }
allocator->passes = NULL;
allocator->passes_size = 0;
@@ -1865,9 +1984,11 @@ static HRESULT d3d12_command_allocator_init(struct d3d12_command_allocator *allo
allocator->framebuffers_size = 0;
allocator->framebuffer_count = 0;
- allocator->descriptor_pools = NULL;
- allocator->descriptor_pools_size = 0;
- allocator->descriptor_pool_count = 0;
+ for (i = 0; i < ARRAY_SIZE(allocator->descriptor_pools); ++i)
+ {
+ vkd3d_vk_descriptor_pool_array_init(&allocator->descriptor_pools[i]);
+ allocator->vk_pool_sizes[i] = min(VKD3D_INITIAL_DESCRIPTORS_POOL_SIZE, device->vk_pool_limits[i]);
+ }
allocator->views = NULL;
allocator->views_size = 0;
@@ -2749,7 +2870,8 @@ static void d3d12_command_list_prepare_descriptors(struct d3d12_command_list *li
}
vk_descriptor_set = d3d12_command_allocator_allocate_descriptor_set(list->allocator,
- layout->vk_layout, variable_binding_size, unbounded_offset != UINT_MAX);
+ layout->descriptor_type, layout->descriptor_count + variable_binding_size, layout->vk_layout,
+ variable_binding_size, unbounded_offset != UINT_MAX);
bindings->descriptor_sets[bindings->descriptor_set_count++] = vk_descriptor_set;
}
@@ -2805,15 +2927,8 @@ static bool vk_write_descriptor_set_from_d3d12_desc(VkWriteDescriptorSet *vk_des
break;
}
- if (range->descriptor_count == UINT_MAX)
- {
- vk_descriptor_write->dstSet = vk_descriptor_sets[set + 1];
- vk_descriptor_write->dstBinding = 0;
- }
- else
- {
- vk_descriptor_write->dstBinding += use_array ? 1 : range->descriptor_count;
- }
+ vk_descriptor_write->dstSet = vk_descriptor_sets[range->image_set];
+ vk_descriptor_write->dstBinding = use_array ? range->image_binding : range->image_binding + index;
vk_image_info->sampler = VK_NULL_HANDLE;
vk_image_info->imageView = u.view->v.u.vk_image_view;
@@ -2934,10 +3049,11 @@ static void d3d12_command_list_update_descriptor_table(struct d3d12_command_list
}
static bool vk_write_descriptor_set_from_root_descriptor(VkWriteDescriptorSet *vk_descriptor_write,
- const struct d3d12_root_parameter *root_parameter, VkDescriptorSet vk_descriptor_set,
+ const struct d3d12_root_parameter *root_parameter, const VkDescriptorSet *vk_descriptor_sets,
VkBufferView *vk_buffer_view, const VkDescriptorBufferInfo *vk_buffer_info)
{
const struct d3d12_root_descriptor *root_descriptor;
+ VkDescriptorSet vk_descriptor_set;
switch (root_parameter->parameter_type)
{
@@ -2956,6 +3072,7 @@ static bool vk_write_descriptor_set_from_root_descriptor(VkWriteDescriptorSet *v
}
root_descriptor = &root_parameter->u.descriptor;
+ vk_descriptor_set = vk_descriptor_sets ? vk_descriptor_sets[root_descriptor->set] : VK_NULL_HANDLE;
vk_descriptor_write->sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
vk_descriptor_write->pNext = NULL;
@@ -3011,7 +3128,7 @@ static void d3d12_command_list_update_push_descriptors(struct d3d12_command_list
}
if (!vk_write_descriptor_set_from_root_descriptor(&descriptor_writes[descriptor_count],
- root_parameter, bindings->descriptor_sets[0], vk_buffer_view, vk_buffer_info))
+ root_parameter, bindings->descriptor_sets, vk_buffer_view, vk_buffer_info))
continue;
++descriptor_count;
@@ -3039,8 +3156,8 @@ static void d3d12_command_list_update_uav_counter_descriptors(struct d3d12_comma
uav_counter_count = state->uav_counters.binding_count;
if (!(vk_descriptor_writes = vkd3d_calloc(uav_counter_count, sizeof(*vk_descriptor_writes))))
return;
- if (!(vk_descriptor_set = d3d12_command_allocator_allocate_descriptor_set(
- list->allocator, state->uav_counters.vk_set_layout, 0, false)))
+ if (!(vk_descriptor_set = d3d12_command_allocator_allocate_descriptor_set(list->allocator,
+ VKD3D_SHADER_DESCRIPTOR_TYPE_UAV, uav_counter_count, state->uav_counters.vk_set_layout, 0, false)))
goto done;
for (i = 0; i < uav_counter_count; ++i)
@@ -4612,8 +4729,7 @@ static void d3d12_command_list_set_root_cbv(struct d3d12_command_list *list,
if (vk_info->KHR_push_descriptor)
{
- vk_write_descriptor_set_from_root_descriptor(&descriptor_write,
- root_parameter, VK_NULL_HANDLE, NULL, &buffer_info);
+ vk_write_descriptor_set_from_root_descriptor(&descriptor_write, root_parameter, NULL, NULL, &buffer_info);
VK_CALL(vkCmdPushDescriptorSetKHR(list->vk_command_buffer, bindings->vk_bind_point,
root_signature->vk_pipeline_layout, 0, 1, &descriptor_write));
}
@@ -4621,7 +4737,7 @@ static void d3d12_command_list_set_root_cbv(struct d3d12_command_list *list,
{
d3d12_command_list_prepare_descriptors(list, bind_point);
vk_write_descriptor_set_from_root_descriptor(&descriptor_write,
- root_parameter, bindings->descriptor_sets[0], NULL, &buffer_info);
+ root_parameter, bindings->descriptor_sets, NULL, &buffer_info);
VK_CALL(vkUpdateDescriptorSets(list->device->vk_device, 1, &descriptor_write, 0, NULL));
VKD3D_ASSERT(index < ARRAY_SIZE(bindings->push_descriptors));
@@ -4685,8 +4801,7 @@ static void d3d12_command_list_set_root_descriptor(struct d3d12_command_list *li
if (vk_info->KHR_push_descriptor)
{
- vk_write_descriptor_set_from_root_descriptor(&descriptor_write,
- root_parameter, VK_NULL_HANDLE, &vk_buffer_view, NULL);
+ vk_write_descriptor_set_from_root_descriptor(&descriptor_write, root_parameter, NULL, &vk_buffer_view, NULL);
VK_CALL(vkCmdPushDescriptorSetKHR(list->vk_command_buffer, bindings->vk_bind_point,
root_signature->vk_pipeline_layout, 0, 1, &descriptor_write));
}
@@ -4694,7 +4809,7 @@ static void d3d12_command_list_set_root_descriptor(struct d3d12_command_list *li
{
d3d12_command_list_prepare_descriptors(list, bind_point);
vk_write_descriptor_set_from_root_descriptor(&descriptor_write,
- root_parameter, bindings->descriptor_sets[0], &vk_buffer_view, NULL);
+ root_parameter, bindings->descriptor_sets, &vk_buffer_view, NULL);
VK_CALL(vkUpdateDescriptorSets(list->device->vk_device, 1, &descriptor_write, 0, NULL));
VKD3D_ASSERT(index < ARRAY_SIZE(bindings->push_descriptors));
@@ -5371,8 +5486,8 @@ static void d3d12_command_list_clear_uav(struct d3d12_command_list *list,
view->info.texture.vk_view_type, view->format->type, &pipeline);
}
- if (!(write_set.dstSet = d3d12_command_allocator_allocate_descriptor_set(
- list->allocator, pipeline.vk_set_layout, 0, false)))
+ if (!(write_set.dstSet = d3d12_command_allocator_allocate_descriptor_set(list->allocator,
+ VKD3D_SHADER_DESCRIPTOR_TYPE_UAV, 1, pipeline.vk_set_layout, 0, false)))
{
ERR("Failed to allocate descriptor set.\n");
return;
@@ -6345,6 +6460,7 @@ static void d3d12_command_queue_destroy_op(struct vkd3d_cs_op_data *op)
break;
case VKD3D_CS_OP_SIGNAL:
+ case VKD3D_CS_OP_SIGNAL_ON_CPU:
d3d12_fence_decref(op->u.signal.fence);
break;
@@ -7335,6 +7451,7 @@ static HRESULT d3d12_command_queue_flush_ops_locked(struct d3d12_command_queue *
struct vkd3d_cs_op_data *op;
struct d3d12_fence *fence;
unsigned int i;
+ HRESULT hr;
queue->is_flushing = true;
@@ -7368,6 +7485,11 @@ static HRESULT d3d12_command_queue_flush_ops_locked(struct d3d12_command_queue *
d3d12_command_queue_signal(queue, op->u.signal.fence, op->u.signal.value);
break;
+ case VKD3D_CS_OP_SIGNAL_ON_CPU:
+ if (FAILED(hr = d3d12_fence_Signal(&op->u.signal.fence->ID3D12Fence1_iface, op->u.signal.value)))
+ ERR("Failed to signal fence %p, hr %s.\n", op->u.signal.fence, debugstr_hresult(hr));
+ break;
+
case VKD3D_CS_OP_EXECUTE:
d3d12_command_queue_execute(queue, op->u.execute.buffers, op->u.execute.buffer_count);
break;
@@ -7510,6 +7632,36 @@ void vkd3d_release_vk_queue(ID3D12CommandQueue *queue)
return vkd3d_queue_release(d3d12_queue->vkd3d_queue);
}
+HRESULT vkd3d_queue_signal_on_cpu(ID3D12CommandQueue *iface, ID3D12Fence *fence_iface, uint64_t value)
+{
+ struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface);
+ struct d3d12_fence *fence = unsafe_impl_from_ID3D12Fence(fence_iface);
+ struct vkd3d_cs_op_data *op;
+ HRESULT hr = S_OK;
+
+ TRACE("iface %p, fence %p, value %#"PRIx64".\n", iface, fence_iface, value);
+
+ vkd3d_mutex_lock(&command_queue->op_mutex);
+
+ if (!(op = d3d12_command_queue_op_array_require_space(&command_queue->op_queue)))
+ {
+ ERR("Failed to add op.\n");
+ hr = E_OUTOFMEMORY;
+ goto done;
+ }
+ op->opcode = VKD3D_CS_OP_SIGNAL_ON_CPU;
+ op->u.signal.fence = fence;
+ op->u.signal.value = value;
+
+ d3d12_fence_incref(fence);
+
+ d3d12_command_queue_submit_locked(command_queue);
+
+done:
+ vkd3d_mutex_unlock(&command_queue->op_mutex);
+ return hr;
+}
+
/* ID3D12CommandSignature */
static inline struct d3d12_command_signature *impl_from_ID3D12CommandSignature(ID3D12CommandSignature *iface)
{
diff --git a/libs/vkd3d/libs/vkd3d/device.c b/libs/vkd3d/libs/vkd3d/device.c
index e92373a36fa..b51e2963efa 100644
--- a/libs/vkd3d/libs/vkd3d/device.c
+++ b/libs/vkd3d/libs/vkd3d/device.c
@@ -1473,16 +1473,21 @@ static void vkd3d_device_vk_heaps_descriptor_limits_init(struct vkd3d_device_des
uav_divisor = properties->maxDescriptorSetUpdateAfterBindSampledImages >= (3u << 20) ? 3 : 2;
}
- limits->uniform_buffer_max_descriptors = min(properties->maxDescriptorSetUpdateAfterBindUniformBuffers,
- properties->maxPerStageDescriptorUpdateAfterBindUniformBuffers - root_provision);
- limits->sampled_image_max_descriptors = min(properties->maxDescriptorSetUpdateAfterBindSampledImages,
- properties->maxPerStageDescriptorUpdateAfterBindSampledImages / srv_divisor - root_provision);
- limits->storage_buffer_max_descriptors = min(properties->maxDescriptorSetUpdateAfterBindStorageBuffers,
- properties->maxPerStageDescriptorUpdateAfterBindStorageBuffers - root_provision);
- limits->storage_image_max_descriptors = min(properties->maxDescriptorSetUpdateAfterBindStorageImages,
- properties->maxPerStageDescriptorUpdateAfterBindStorageImages / uav_divisor - root_provision);
- limits->sampler_max_descriptors = min(properties->maxDescriptorSetUpdateAfterBindSamplers,
- properties->maxPerStageDescriptorUpdateAfterBindSamplers - root_provision);
+ limits->uniform_buffer_max_descriptors = min(min(properties->maxDescriptorSetUpdateAfterBindUniformBuffers,
+ properties->maxPerStageDescriptorUpdateAfterBindUniformBuffers - root_provision),
+ VKD3D_MAX_DESCRIPTOR_SET_CBVS_SRVS_UAVS);
+ limits->sampled_image_max_descriptors = min(min(properties->maxDescriptorSetUpdateAfterBindSampledImages,
+ properties->maxPerStageDescriptorUpdateAfterBindSampledImages / srv_divisor - root_provision),
+ VKD3D_MAX_DESCRIPTOR_SET_CBVS_SRVS_UAVS);
+ limits->storage_buffer_max_descriptors = min(min(properties->maxDescriptorSetUpdateAfterBindStorageBuffers,
+ properties->maxPerStageDescriptorUpdateAfterBindStorageBuffers - root_provision),
+ VKD3D_MAX_DESCRIPTOR_SET_CBVS_SRVS_UAVS);
+ limits->storage_image_max_descriptors = min(min(properties->maxDescriptorSetUpdateAfterBindStorageImages,
+ properties->maxPerStageDescriptorUpdateAfterBindStorageImages / uav_divisor - root_provision),
+ VKD3D_MAX_DESCRIPTOR_SET_CBVS_SRVS_UAVS);
+ limits->sampler_max_descriptors = min(min(properties->maxDescriptorSetUpdateAfterBindSamplers,
+ properties->maxPerStageDescriptorUpdateAfterBindSamplers - root_provision),
+ VKD3D_MAX_DESCRIPTOR_SET_CBVS_SRVS_UAVS);
limits->sampler_max_descriptors = min(limits->sampler_max_descriptors, VKD3D_MAX_DESCRIPTOR_SET_SAMPLERS);
}
@@ -2677,39 +2682,16 @@ static void vkd3d_time_domains_init(struct d3d12_device *device)
static void device_init_descriptor_pool_sizes(struct d3d12_device *device)
{
const struct vkd3d_device_descriptor_limits *limits = &device->vk_info.descriptor_limits;
- VkDescriptorPoolSize *pool_sizes = device->vk_pool_sizes;
+ unsigned int *pool_sizes = device->vk_pool_limits;
- if (device->use_vk_heaps)
- {
- pool_sizes[0].type = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER;
- pool_sizes[0].descriptorCount = min(limits->storage_image_max_descriptors,
- VKD3D_MAX_UAV_CLEAR_DESCRIPTORS_PER_TYPE);
- pool_sizes[1].type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
- pool_sizes[1].descriptorCount = pool_sizes[0].descriptorCount;
- pool_sizes[2].type = VK_DESCRIPTOR_TYPE_SAMPLER;
- pool_sizes[2].descriptorCount = min(limits->sampler_max_descriptors, D3D12_MAX_LIVE_STATIC_SAMPLERS);
- device->vk_pool_count = 3;
- return;
- }
-
- VKD3D_ASSERT(ARRAY_SIZE(device->vk_pool_sizes) >= 6);
- pool_sizes[0].type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
- pool_sizes[0].descriptorCount = min(limits->uniform_buffer_max_descriptors,
+ pool_sizes[VKD3D_SHADER_DESCRIPTOR_TYPE_CBV] = min(limits->uniform_buffer_max_descriptors,
VKD3D_MAX_VIRTUAL_HEAP_DESCRIPTORS_PER_TYPE);
- pool_sizes[1].type = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER;
- pool_sizes[1].descriptorCount = min(limits->sampled_image_max_descriptors,
+ pool_sizes[VKD3D_SHADER_DESCRIPTOR_TYPE_SRV] = min(limits->sampled_image_max_descriptors,
VKD3D_MAX_VIRTUAL_HEAP_DESCRIPTORS_PER_TYPE);
- pool_sizes[2].type = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE;
- pool_sizes[2].descriptorCount = pool_sizes[1].descriptorCount;
- pool_sizes[3].type = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER;
- pool_sizes[3].descriptorCount = min(limits->storage_image_max_descriptors,
+ pool_sizes[VKD3D_SHADER_DESCRIPTOR_TYPE_UAV] = min(limits->storage_image_max_descriptors,
VKD3D_MAX_VIRTUAL_HEAP_DESCRIPTORS_PER_TYPE);
- pool_sizes[4].type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
- pool_sizes[4].descriptorCount = pool_sizes[3].descriptorCount;
- pool_sizes[5].type = VK_DESCRIPTOR_TYPE_SAMPLER;
- pool_sizes[5].descriptorCount = min(limits->sampler_max_descriptors,
+ pool_sizes[VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER] = min(limits->sampler_max_descriptors,
VKD3D_MAX_VIRTUAL_HEAP_DESCRIPTORS_PER_TYPE);
- device->vk_pool_count = 6;
};
static void vkd3d_desc_object_cache_init(struct vkd3d_desc_object_cache *cache, size_t size)
@@ -3461,6 +3443,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CheckFeatureSupport(ID3D12Device9
for (i = 0; i < data->NumFeatureLevels; ++i)
{
D3D_FEATURE_LEVEL fl = data->pFeatureLevelsRequested[i];
+ TRACE("Requested feature level %#x.\n", fl);
if (data->MaxSupportedFeatureLevel < fl && fl <= vulkan_info->max_feature_level)
data->MaxSupportedFeatureLevel = fl;
}
@@ -3574,12 +3557,6 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CheckFeatureSupport(ID3D12Device9
return E_INVALIDARG;
}
- if (data->Format == DXGI_FORMAT_UNKNOWN)
- {
- data->PlaneCount = 1;
- return S_OK;
- }
-
if (!(format = vkd3d_get_format(device, data->Format, false)))
format = vkd3d_get_format(device, data->Format, true);
if (!format)
@@ -4385,7 +4362,7 @@ static void d3d12_device_get_resource1_allocation_info(struct d3d12_device *devi
{
desc = &resource_descs[i];
- if (FAILED(d3d12_resource_validate_desc(desc, device)))
+ if (FAILED(d3d12_resource_validate_desc(desc, device, 0)))
{
WARN("Invalid resource desc.\n");
goto invalid;
@@ -4716,10 +4693,11 @@ static void d3d12_device_get_copyable_footprints(struct d3d12_device *device,
uint64_t base_offset, D3D12_PLACED_SUBRESOURCE_FOOTPRINT *layouts, UINT *row_counts,
UINT64 *row_sizes, UINT64 *total_bytes)
{
- unsigned int i, sub_resource_idx, miplevel_idx, row_count, row_size, row_pitch;
+ unsigned int i, sub_resource_idx, plane_idx, miplevel_idx, row_count, row_size, row_pitch;
unsigned int width, height, depth, plane_count, sub_resources_per_plane;
const struct vkd3d_format *format;
uint64_t offset, size, total;
+ DXGI_FORMAT plane_format;
if (layouts)
memset(layouts, 0xff, sizeof(*layouts) * sub_resource_count);
@@ -4730,20 +4708,19 @@ static void d3d12_device_get_copyable_footprints(struct d3d12_device *device,
if (total_bytes)
*total_bytes = ~(uint64_t)0;
- if (!(format = vkd3d_format_from_d3d12_resource_desc(device, desc, 0)))
+ if (!(format = vkd3d_get_format(device, desc->Format, true)))
{
WARN("Invalid format %#x.\n", desc->Format);
return;
}
- if (FAILED(d3d12_resource_validate_desc(desc, device)))
+ if (FAILED(d3d12_resource_validate_desc(desc, device, VKD3D_VALIDATE_FORCE_ALLOW_DS)))
{
WARN("Invalid resource desc.\n");
return;
}
- plane_count = ((format->vk_aspect_mask & VK_IMAGE_ASPECT_DEPTH_BIT)
- && (format->vk_aspect_mask & VK_IMAGE_ASPECT_STENCIL_BIT)) ? 2 : 1;
+ plane_count = format->plane_count;
sub_resources_per_plane = d3d12_resource_desc_get_sub_resource_count(desc);
if (!vkd3d_bound_range(first_sub_resource, sub_resource_count, sub_resources_per_plane * plane_count))
@@ -4754,21 +4731,31 @@ static void d3d12_device_get_copyable_footprints(struct d3d12_device *device,
offset = 0;
total = 0;
+ plane_format = desc->Format;
for (i = 0; i < sub_resource_count; ++i)
{
sub_resource_idx = (first_sub_resource + i) % sub_resources_per_plane;
+ plane_idx = (first_sub_resource + i) / sub_resources_per_plane;
miplevel_idx = sub_resource_idx % desc->MipLevels;
+
+ if (plane_count > 1)
+ {
+ plane_format = !plane_idx ? DXGI_FORMAT_R32_TYPELESS : DXGI_FORMAT_R8_TYPELESS;
+ format = vkd3d_get_format(device, plane_format, true);
+ }
+
width = align(d3d12_resource_desc_get_width(desc, miplevel_idx), format->block_width);
height = align(d3d12_resource_desc_get_height(desc, miplevel_idx), format->block_height);
depth = d3d12_resource_desc_get_depth(desc, miplevel_idx);
row_count = height / format->block_height;
row_size = (width / format->block_width) * format->byte_count * format->block_byte_count;
- row_pitch = align(row_size, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT);
+ /* Direct3D 12 requires double the alignment for dual planes. */
+ row_pitch = align(row_size, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT * plane_count);
if (layouts)
{
layouts[i].Offset = base_offset + offset;
- layouts[i].Footprint.Format = desc->Format;
+ layouts[i].Footprint.Format = plane_format;
layouts[i].Footprint.Width = width;
layouts[i].Footprint.Height = height;
layouts[i].Footprint.Depth = depth;
@@ -4780,7 +4767,7 @@ static void d3d12_device_get_copyable_footprints(struct d3d12_device *device,
row_sizes[i] = row_size;
size = max(0, row_count - 1) * row_pitch + row_size;
- size = max(0, depth - 1) * align(size, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT) + size;
+ size = max(0, depth - 1) * align(size, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT * plane_count) + size;
total = offset + size;
offset = align(total, D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT);
diff --git a/libs/vkd3d/libs/vkd3d/resource.c b/libs/vkd3d/libs/vkd3d/resource.c
index 1f7d90eb95f..eab97715944 100644
--- a/libs/vkd3d/libs/vkd3d/resource.c
+++ b/libs/vkd3d/libs/vkd3d/resource.c
@@ -951,7 +951,7 @@ HRESULT vkd3d_get_image_allocation_info(struct d3d12_device *device,
HRESULT hr;
VKD3D_ASSERT(desc->Dimension != D3D12_RESOURCE_DIMENSION_BUFFER);
- VKD3D_ASSERT(d3d12_resource_validate_desc(desc, device) == S_OK);
+ VKD3D_ASSERT(d3d12_resource_validate_desc(desc, device, 0) == S_OK);
if (!desc->MipLevels)
{
@@ -1847,7 +1847,7 @@ static bool d3d12_resource_validate_texture_alignment(const D3D12_RESOURCE_DESC1
return true;
}
-HRESULT d3d12_resource_validate_desc(const D3D12_RESOURCE_DESC1 *desc, struct d3d12_device *device)
+HRESULT d3d12_resource_validate_desc(const D3D12_RESOURCE_DESC1 *desc, struct d3d12_device *device, uint32_t flags)
{
const D3D12_MIP_REGION *mip_region = &desc->SamplerFeedbackMipRegion;
const struct vkd3d_format *format;
@@ -1893,7 +1893,8 @@ HRESULT d3d12_resource_validate_desc(const D3D12_RESOURCE_DESC1 *desc, struct d3
return E_INVALIDARG;
}
- if (!(format = vkd3d_format_from_d3d12_resource_desc(device, desc, 0)))
+ if (!(format = vkd3d_get_format(device, desc->Format,
+ desc->Flags & D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL || flags & VKD3D_VALIDATE_FORCE_ALLOW_DS)))
{
WARN("Invalid format %#x.\n", desc->Format);
return E_INVALIDARG;
@@ -2013,7 +2014,7 @@ static HRESULT d3d12_resource_init(struct d3d12_resource *resource, struct d3d12
resource->gpu_address = 0;
resource->flags = 0;
- if (FAILED(hr = d3d12_resource_validate_desc(&resource->desc, device)))
+ if (FAILED(hr = d3d12_resource_validate_desc(&resource->desc, device, 0)))
return hr;
resource->format = vkd3d_format_from_d3d12_resource_desc(device, desc, 0);
diff --git a/libs/vkd3d/libs/vkd3d/state.c b/libs/vkd3d/libs/vkd3d/state.c
index 2b0f81d3812..bd3c3758ecb 100644
--- a/libs/vkd3d/libs/vkd3d/state.c
+++ b/libs/vkd3d/libs/vkd3d/state.c
@@ -265,25 +265,6 @@ static enum vkd3d_shader_visibility vkd3d_shader_visibility_from_d3d12(D3D12_SHA
}
}
-static VkDescriptorType vk_descriptor_type_from_vkd3d_descriptor_type(enum vkd3d_shader_descriptor_type type,
- bool is_buffer)
-{
- switch (type)
- {
- case VKD3D_SHADER_DESCRIPTOR_TYPE_SRV:
- return is_buffer ? VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER : VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE;
- case VKD3D_SHADER_DESCRIPTOR_TYPE_UAV:
- return is_buffer ? VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER : VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
- case VKD3D_SHADER_DESCRIPTOR_TYPE_CBV:
- return VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
- case VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER:
- return VK_DESCRIPTOR_TYPE_SAMPLER;
- default:
- FIXME("Unhandled descriptor range type type %#x.\n", type);
- return VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE;
- }
-}
-
static enum vkd3d_shader_descriptor_type vkd3d_descriptor_type_from_d3d12_range_type(
D3D12_DESCRIPTOR_RANGE_TYPE type)
{
@@ -597,8 +578,9 @@ static HRESULT d3d12_root_signature_info_from_desc(struct d3d12_root_signature_i
goto done;
}
- qsort(info->ranges, info->range_count, sizeof(*info->ranges),
- d3d12_root_signature_info_range_compare);
+ if (info->ranges)
+ qsort(info->ranges, info->range_count, sizeof(*info->ranges),
+ d3d12_root_signature_info_range_compare);
for (i = D3D12_SHADER_VISIBILITY_VERTEX; i <= D3D12_SHADER_VISIBILITY_MESH; ++i)
{
@@ -717,6 +699,8 @@ struct vk_binding_array
VkDescriptorSetLayoutBinding *bindings;
size_t capacity, count;
+ enum vkd3d_shader_descriptor_type descriptor_type;
+ unsigned int descriptor_set;
unsigned int table_index;
unsigned int unbounded_offset;
VkDescriptorSetLayoutCreateFlags flags;
@@ -754,14 +738,24 @@ static bool vk_binding_array_add_binding(struct vk_binding_array *array,
return true;
}
+static void vk_binding_array_make_unbound(struct vk_binding_array *array,
+ unsigned int offset, unsigned int table_index)
+{
+ array->unbounded_offset = offset;
+ array->table_index = table_index;
+}
+
struct vkd3d_descriptor_set_context
{
struct vk_binding_array vk_bindings[VKD3D_MAX_DESCRIPTOR_SETS];
+ struct vk_binding_array *current_binding_array[VKD3D_SHADER_DESCRIPTOR_TYPE_COUNT];
unsigned int table_index;
- unsigned int unbounded_offset;
unsigned int descriptor_index;
unsigned int uav_counter_index;
unsigned int push_constant_index;
+
+ struct vk_binding_array *push_descriptor_set;
+ bool push_descriptor;
};
static void descriptor_set_context_cleanup(struct vkd3d_descriptor_set_context *context)
@@ -786,46 +780,66 @@ static bool vkd3d_validate_descriptor_set_count(struct d3d12_device *device, uns
return true;
}
-static struct vk_binding_array *d3d12_root_signature_current_vk_binding_array(
- struct d3d12_root_signature *root_signature, struct vkd3d_descriptor_set_context *context)
+static struct vk_binding_array *d3d12_root_signature_append_vk_binding_array(
+ struct d3d12_root_signature *root_signature, enum vkd3d_shader_descriptor_type descriptor_type,
+ VkDescriptorSetLayoutCreateFlags flags, struct vkd3d_descriptor_set_context *context)
{
+ struct vk_binding_array *array;
+ unsigned int set;
+
if (root_signature->vk_set_count >= ARRAY_SIZE(context->vk_bindings))
return NULL;
- return &context->vk_bindings[root_signature->vk_set_count];
+ set = root_signature->vk_set_count++;
+ array = &context->vk_bindings[set];
+ array->descriptor_type = descriptor_type;
+ array->descriptor_set = set;
+ array->unbounded_offset = UINT_MAX;
+ array->flags = flags;
+
+ return array;
}
-static void d3d12_root_signature_append_vk_binding_array(struct d3d12_root_signature *root_signature,
- VkDescriptorSetLayoutCreateFlags flags, struct vkd3d_descriptor_set_context *context)
+static struct vk_binding_array *d3d12_root_signature_vk_binding_array_for_type(
+ struct d3d12_root_signature *root_signature, enum vkd3d_shader_descriptor_type descriptor_type,
+ struct vkd3d_descriptor_set_context *context)
{
- struct vk_binding_array *array;
+ struct vk_binding_array *array, **current;
- if (!(array = d3d12_root_signature_current_vk_binding_array(root_signature, context)) || !array->count)
- return;
+ if (context->push_descriptor)
+ {
+ if (!context->push_descriptor_set)
+ context->push_descriptor_set = d3d12_root_signature_append_vk_binding_array(root_signature,
+ descriptor_type, VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR, context);
- array->table_index = context->table_index;
- array->unbounded_offset = context->unbounded_offset;
- array->flags = flags;
+ return context->push_descriptor_set;
+ }
- ++root_signature->vk_set_count;
+ current = context->current_binding_array;
+ if (!(array = current[descriptor_type]))
+ {
+ array = d3d12_root_signature_append_vk_binding_array(root_signature, descriptor_type, 0, context);
+ current[descriptor_type] = array;
+ }
+
+ return array;
}
static HRESULT d3d12_root_signature_append_vk_binding(struct d3d12_root_signature *root_signature,
- enum vkd3d_shader_descriptor_type descriptor_type, unsigned int register_space,
- unsigned int register_idx, bool buffer_descriptor, enum vkd3d_shader_visibility shader_visibility,
- unsigned int descriptor_count, struct vkd3d_descriptor_set_context *context,
- const VkSampler *immutable_sampler, unsigned int *binding_idx)
+ struct vk_binding_array *array, enum vkd3d_shader_descriptor_type descriptor_type,
+ unsigned int register_space, unsigned int register_idx, bool buffer_descriptor,
+ enum vkd3d_shader_visibility shader_visibility, unsigned int descriptor_count,
+ struct vkd3d_descriptor_set_context *context, const VkSampler *immutable_sampler)
{
struct vkd3d_shader_descriptor_offset *offset = root_signature->descriptor_offsets
? &root_signature->descriptor_offsets[context->descriptor_index] : NULL;
struct vkd3d_shader_resource_binding *mapping;
- struct vk_binding_array *array;
+ VkDescriptorType vk_descriptor_type;
unsigned int idx;
- if (!(array = d3d12_root_signature_current_vk_binding_array(root_signature, context))
- || !(vk_binding_array_add_binding(&context->vk_bindings[root_signature->vk_set_count],
- vk_descriptor_type_from_vkd3d_descriptor_type(descriptor_type, buffer_descriptor), descriptor_count,
- stage_flags_from_vkd3d_shader_visibility(shader_visibility), immutable_sampler, &idx)))
+ vk_descriptor_type = vk_descriptor_type_from_vkd3d_descriptor_type(descriptor_type, buffer_descriptor);
+ if (!vk_binding_array_add_binding(array, vk_descriptor_type, descriptor_count,
+ stage_flags_from_vkd3d_shader_visibility(shader_visibility), immutable_sampler, &idx))
return E_OUTOFMEMORY;
mapping = &root_signature->descriptor_mapping[context->descriptor_index++];
@@ -834,7 +848,7 @@ static HRESULT d3d12_root_signature_append_vk_binding(struct d3d12_root_signatur
mapping->register_index = register_idx;
mapping->shader_visibility = shader_visibility;
mapping->flags = buffer_descriptor ? VKD3D_SHADER_BINDING_FLAG_BUFFER : VKD3D_SHADER_BINDING_FLAG_IMAGE;
- mapping->binding.set = root_signature->vk_set_count;
+ mapping->binding.set = array->descriptor_set;
mapping->binding.binding = idx;
mapping->binding.count = descriptor_count;
if (offset)
@@ -843,12 +857,6 @@ static HRESULT d3d12_root_signature_append_vk_binding(struct d3d12_root_signatur
offset->dynamic_offset_index = ~0u;
}
- if (context->unbounded_offset != UINT_MAX)
- d3d12_root_signature_append_vk_binding_array(root_signature, 0, context);
-
- if (binding_idx)
- *binding_idx = idx;
-
return S_OK;
}
@@ -911,7 +919,7 @@ static unsigned int vk_binding_count_from_descriptor_range(const struct d3d12_ro
}
static HRESULT d3d12_root_signature_init_descriptor_table_binding(struct d3d12_root_signature *root_signature,
- const struct d3d12_root_descriptor_table_range *range, D3D12_SHADER_VISIBILITY visibility,
+ struct d3d12_root_descriptor_table_range *range, D3D12_SHADER_VISIBILITY visibility,
unsigned int vk_binding_array_count, unsigned int bindings_per_range,
struct vkd3d_descriptor_set_context *context)
{
@@ -919,34 +927,49 @@ static HRESULT d3d12_root_signature_init_descriptor_table_binding(struct d3d12_r
bool is_buffer = range->type != VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER;
enum vkd3d_shader_descriptor_type descriptor_type = range->type;
unsigned int i, register_space = range->register_space;
+ struct vk_binding_array *array;
HRESULT hr;
- if (range->descriptor_count == UINT_MAX)
- context->unbounded_offset = range->offset;
+ if (!(array = d3d12_root_signature_vk_binding_array_for_type(root_signature, descriptor_type, context)))
+ return E_OUTOFMEMORY;
+ range->set = array->descriptor_set - root_signature->main_set;
+ range->binding = array->count;
for (i = 0; i < bindings_per_range; ++i)
{
- if (FAILED(hr = d3d12_root_signature_append_vk_binding(root_signature, descriptor_type,
+ if (FAILED(hr = d3d12_root_signature_append_vk_binding(root_signature, array, descriptor_type,
register_space, range->base_register_idx + i, is_buffer, shader_visibility,
- vk_binding_array_count, context, NULL, NULL)))
+ vk_binding_array_count, context, NULL)))
return hr;
}
- if (descriptor_type != VKD3D_SHADER_DESCRIPTOR_TYPE_SRV && descriptor_type != VKD3D_SHADER_DESCRIPTOR_TYPE_UAV)
+ if (range->descriptor_count == UINT_MAX)
{
- context->unbounded_offset = UINT_MAX;
- return S_OK;
+ vk_binding_array_make_unbound(array, range->offset, context->table_index);
+ context->current_binding_array[descriptor_type] = NULL;
}
+ if (descriptor_type != VKD3D_SHADER_DESCRIPTOR_TYPE_SRV && descriptor_type != VKD3D_SHADER_DESCRIPTOR_TYPE_UAV)
+ return S_OK;
+
+ if (!(array = d3d12_root_signature_vk_binding_array_for_type(root_signature, descriptor_type, context)))
+ return E_OUTOFMEMORY;
+
+ range->image_set = array->descriptor_set - root_signature->main_set;
+ range->image_binding = array->count;
for (i = 0; i < bindings_per_range; ++i)
{
- if (FAILED(hr = d3d12_root_signature_append_vk_binding(root_signature, descriptor_type,
+ if (FAILED(hr = d3d12_root_signature_append_vk_binding(root_signature, array, descriptor_type,
register_space, range->base_register_idx + i, false, shader_visibility,
- vk_binding_array_count, context, NULL, NULL)))
+ vk_binding_array_count, context, NULL)))
return hr;
}
- context->unbounded_offset = UINT_MAX;
+ if (range->descriptor_count == UINT_MAX)
+ {
+ vk_binding_array_make_unbound(array, range->offset, context->table_index);
+ context->current_binding_array[descriptor_type] = NULL;
+ }
return S_OK;
}
@@ -1199,16 +1222,16 @@ static HRESULT d3d12_root_signature_init_root_descriptor_tables(struct d3d12_roo
if (use_vk_heaps)
{
- /* set, binding and vk_binding_count are not used. */
+ /* set, binding, image_set, image_binding, and vk_binding_count are not used. */
range->set = 0;
range->binding = 0;
+ range->image_set = 0;
+ range->image_binding = 0;
range->vk_binding_count = 0;
d3d12_root_signature_map_descriptor_heap_binding(root_signature, range, shader_visibility, context);
continue;
}
- range->set = root_signature->vk_set_count - root_signature->main_set;
-
if (root_signature->use_descriptor_arrays)
{
if (j && range->type != table->ranges[j - 1].type)
@@ -1229,6 +1252,8 @@ static HRESULT d3d12_root_signature_init_root_descriptor_tables(struct d3d12_roo
range->set = base_range->set;
range->binding = base_range->binding;
+ range->image_set = base_range->image_set;
+ range->image_binding = base_range->image_binding;
range->vk_binding_count = base_range->vk_binding_count - rel_offset;
d3d12_root_signature_map_descriptor_unbounded_binding(root_signature, range,
rel_offset, shader_visibility, context);
@@ -1251,8 +1276,6 @@ static HRESULT d3d12_root_signature_init_root_descriptor_tables(struct d3d12_roo
bindings_per_range = range->descriptor_count;
}
- range->binding = context->vk_bindings[root_signature->vk_set_count].count;
-
if (FAILED(hr = d3d12_root_signature_init_descriptor_table_binding(root_signature, range,
p->ShaderVisibility, vk_binding_array_count, bindings_per_range, context)))
return hr;
@@ -1266,7 +1289,9 @@ static HRESULT d3d12_root_signature_init_root_descriptor_tables(struct d3d12_roo
static HRESULT d3d12_root_signature_init_root_descriptors(struct d3d12_root_signature *root_signature,
const D3D12_ROOT_SIGNATURE_DESC *desc, struct vkd3d_descriptor_set_context *context)
{
- unsigned int binding, i;
+ enum vkd3d_shader_descriptor_type descriptor_type;
+ struct vk_binding_array *array;
+ unsigned int i;
HRESULT hr;
root_signature->push_descriptor_mask = 0;
@@ -1281,14 +1306,19 @@ static HRESULT d3d12_root_signature_init_root_descriptors(struct d3d12_root_sign
root_signature->push_descriptor_mask |= 1u << i;
- if (FAILED(hr = d3d12_root_signature_append_vk_binding(root_signature,
- vkd3d_descriptor_type_from_d3d12_root_parameter_type(p->ParameterType),
+ descriptor_type = vkd3d_descriptor_type_from_d3d12_root_parameter_type(p->ParameterType);
+ if (!(array = d3d12_root_signature_vk_binding_array_for_type(root_signature, descriptor_type, context)))
+ return E_OUTOFMEMORY;
+
+ root_signature->parameters[i].parameter_type = p->ParameterType;
+ root_signature->parameters[i].u.descriptor.set = array->descriptor_set;
+ root_signature->parameters[i].u.descriptor.binding = array->count;
+
+ if (FAILED(hr = d3d12_root_signature_append_vk_binding(root_signature, array, descriptor_type,
p->u.Descriptor.RegisterSpace, p->u.Descriptor.ShaderRegister, true,
- vkd3d_shader_visibility_from_d3d12(p->ShaderVisibility), 1, context, NULL, &binding)))
+ vkd3d_shader_visibility_from_d3d12(p->ShaderVisibility), 1, context, NULL)))
return hr;
- root_signature->parameters[i].parameter_type = p->ParameterType;
- root_signature->parameters[i].u.descriptor.binding = binding;
}
return S_OK;
@@ -1298,10 +1328,19 @@ static HRESULT d3d12_root_signature_init_static_samplers(struct d3d12_root_signa
struct d3d12_device *device, const D3D12_ROOT_SIGNATURE_DESC *desc,
struct vkd3d_descriptor_set_context *context)
{
+ struct vk_binding_array *array;
unsigned int i;
HRESULT hr;
VKD3D_ASSERT(root_signature->static_sampler_count == desc->NumStaticSamplers);
+
+ if (!desc->NumStaticSamplers)
+ return S_OK;
+
+ if (!(array = d3d12_root_signature_vk_binding_array_for_type(root_signature,
+ VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER, context)))
+ return E_OUTOFMEMORY;
+
for (i = 0; i < desc->NumStaticSamplers; ++i)
{
const D3D12_STATIC_SAMPLER_DESC *s = &desc->pStaticSamplers[i];
@@ -1309,16 +1348,13 @@ static HRESULT d3d12_root_signature_init_static_samplers(struct d3d12_root_signa
if (FAILED(hr = vkd3d_create_static_sampler(device, s, &root_signature->static_samplers[i])))
return hr;
- if (FAILED(hr = d3d12_root_signature_append_vk_binding(root_signature,
+ if (FAILED(hr = d3d12_root_signature_append_vk_binding(root_signature, array,
VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER, s->RegisterSpace, s->ShaderRegister, false,
vkd3d_shader_visibility_from_d3d12(s->ShaderVisibility), 1, context,
- &root_signature->static_samplers[i], NULL)))
+ &root_signature->static_samplers[i])))
return hr;
}
- if (device->use_vk_heaps)
- d3d12_root_signature_append_vk_binding_array(root_signature, 0, context);
-
return S_OK;
}
@@ -1450,29 +1486,52 @@ static HRESULT vkd3d_create_pipeline_layout(struct d3d12_device *device,
return S_OK;
}
+static HRESULT d3d12_descriptor_set_layout_init(struct d3d12_descriptor_set_layout *layout,
+ struct d3d12_device *device, const struct vk_binding_array *array)
+{
+ unsigned int descriptor_count;
+ bool unbounded;
+ HRESULT hr;
+ size_t i;
+
+ descriptor_count = array->unbounded_offset;
+ if (!(unbounded = descriptor_count != UINT_MAX))
+ {
+ for (i = 0, descriptor_count = 0; i < array->count; ++i)
+ {
+ descriptor_count += array->bindings[i].descriptorCount;
+ }
+ }
+
+ if (FAILED(hr = vkd3d_create_descriptor_set_layout(device, array->flags,
+ array->count, unbounded, array->bindings, &layout->vk_layout)))
+ return hr;
+ layout->descriptor_type = array->descriptor_type;
+ layout->descriptor_count = descriptor_count;
+ layout->unbounded_offset = array->unbounded_offset;
+ layout->table_index = array->table_index;
+
+ return S_OK;
+}
+
static HRESULT d3d12_root_signature_create_descriptor_set_layouts(struct d3d12_root_signature *root_signature,
struct vkd3d_descriptor_set_context *context)
{
unsigned int i;
HRESULT hr;
- d3d12_root_signature_append_vk_binding_array(root_signature, 0, context);
-
if (!vkd3d_validate_descriptor_set_count(root_signature->device, root_signature->vk_set_count))
return E_INVALIDARG;
for (i = 0; i < root_signature->vk_set_count; ++i)
{
- struct d3d12_descriptor_set_layout *layout = &root_signature->descriptor_set_layouts[i];
- struct vk_binding_array *array = &context->vk_bindings[i];
+ const struct vk_binding_array *array = &context->vk_bindings[i];
VKD3D_ASSERT(array->count);
- if (FAILED(hr = vkd3d_create_descriptor_set_layout(root_signature->device, array->flags, array->count,
- array->unbounded_offset != UINT_MAX, array->bindings, &layout->vk_layout)))
+ if (FAILED(hr = d3d12_descriptor_set_layout_init(&root_signature->descriptor_set_layouts[i],
+ root_signature->device, array)))
return hr;
- layout->unbounded_offset = array->unbounded_offset;
- layout->table_index = array->table_index;
}
return S_OK;
@@ -1518,7 +1577,6 @@ static HRESULT d3d12_root_signature_init(struct d3d12_root_signature *root_signa
HRESULT hr;
memset(&context, 0, sizeof(context));
- context.unbounded_offset = UINT_MAX;
root_signature->ID3D12RootSignature_iface.lpVtbl = &d3d12_root_signature_vtbl;
root_signature->refcount = 1;
@@ -1580,17 +1638,11 @@ static HRESULT d3d12_root_signature_init(struct d3d12_root_signature *root_signa
sizeof(*root_signature->static_samplers))))
goto fail;
+ context.push_descriptor = vk_info->KHR_push_descriptor;
if (FAILED(hr = d3d12_root_signature_init_root_descriptors(root_signature, desc, &context)))
goto fail;
-
- /* We use KHR_push_descriptor for root descriptor parameters. */
- if (vk_info->KHR_push_descriptor)
- {
- d3d12_root_signature_append_vk_binding_array(root_signature,
- VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR, &context);
- }
-
- root_signature->main_set = root_signature->vk_set_count;
+ root_signature->main_set = !!context.push_descriptor_set;
+ context.push_descriptor = false;
if (FAILED(hr = d3d12_root_signature_init_push_constants(root_signature, desc,
root_signature->push_constant_ranges, &root_signature->push_constant_range_count)))
diff --git a/libs/vkd3d/libs/vkd3d/utils.c b/libs/vkd3d/libs/vkd3d/utils.c
index 839bb173854..c2832a61f67 100644
--- a/libs/vkd3d/libs/vkd3d/utils.c
+++ b/libs/vkd3d/libs/vkd3d/utils.c
@@ -29,7 +29,7 @@
#define UINT VKD3D_FORMAT_TYPE_UINT
static const struct vkd3d_format vkd3d_formats[] =
{
- {DXGI_FORMAT_UNKNOWN, VK_FORMAT_UNDEFINED, 1, 1, 1, 1},
+ {DXGI_FORMAT_UNKNOWN, VK_FORMAT_UNDEFINED, 1, 1, 1, 1, 0, 1},
{DXGI_FORMAT_R32G32B32A32_TYPELESS, VK_FORMAT_R32G32B32A32_SFLOAT, 16, 1, 1, 1, COLOR, 1, TYPELESS},
{DXGI_FORMAT_R32G32B32A32_FLOAT, VK_FORMAT_R32G32B32A32_SFLOAT, 16, 1, 1, 1, COLOR, 1},
{DXGI_FORMAT_R32G32B32A32_UINT, VK_FORMAT_R32G32B32A32_UINT, 16, 1, 1, 1, COLOR, 1, UINT},
diff --git a/libs/vkd3d/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/libs/vkd3d/vkd3d_private.h
index 97a99782d6a..fd1fbb1679a 100644
--- a/libs/vkd3d/libs/vkd3d/vkd3d_private.h
+++ b/libs/vkd3d/libs/vkd3d/vkd3d_private.h
@@ -58,12 +58,19 @@
#define VKD3D_MAX_VK_SYNC_OBJECTS 4u
#define VKD3D_MAX_DEVICE_BLOCKED_QUEUES 16u
#define VKD3D_MAX_DESCRIPTOR_SETS 64u
+/* Direct3D 12 binding tier 3 has a limit of "1,000,000+" CBVs, SRVs and UAVs.
+ * I am not sure what the "+" is supposed to mean: it probably hints that
+ * implementations may have an even higher limit, but that's pretty obvious,
+ * that table is for guaranteed minimum limits. */
+#define VKD3D_MAX_DESCRIPTOR_SET_CBVS_SRVS_UAVS 1000000u
/* D3D12 binding tier 3 has a limit of 2048 samplers. */
#define VKD3D_MAX_DESCRIPTOR_SET_SAMPLERS 2048u
-/* The main limitation here is the simple descriptor pool recycling scheme
- * requiring each pool to contain all descriptor types used by vkd3d. Limit
- * this number to prevent excessive pool memory use. */
#define VKD3D_MAX_VIRTUAL_HEAP_DESCRIPTORS_PER_TYPE (16 * 1024u)
+#define VKD3D_INITIAL_DESCRIPTORS_POOL_SIZE 1024u
+
+#define VKD3D_SHADER_DESCRIPTOR_TYPE_COUNT (VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER + 1)
+
+#define VKD3D_VALIDATE_FORCE_ALLOW_DS 0x1u
extern uint64_t object_global_serial_id;
@@ -235,8 +242,6 @@ struct vkd3d_fence_worker
struct vkd3d_waiting_fence *fences;
size_t fences_size;
- void (*wait_for_gpu_fence)(struct vkd3d_fence_worker *worker, const struct vkd3d_waiting_fence *enqueued_fence);
-
struct vkd3d_queue *queue;
struct d3d12_device *device;
};
@@ -529,7 +534,7 @@ struct vkd3d_resource_allocation_info
};
bool d3d12_resource_is_cpu_accessible(const struct d3d12_resource *resource);
-HRESULT d3d12_resource_validate_desc(const D3D12_RESOURCE_DESC1 *desc, struct d3d12_device *device);
+HRESULT d3d12_resource_validate_desc(const D3D12_RESOURCE_DESC1 *desc, struct d3d12_device *device, uint32_t flags);
void d3d12_resource_get_tiling(struct d3d12_device *device, const struct d3d12_resource *resource,
UINT *total_tile_count, D3D12_PACKED_MIP_INFO *packed_mip_info, D3D12_TILE_SHAPE *standard_tile_shape,
UINT *sub_resource_tiling_count, UINT first_sub_resource_tiling,
@@ -770,6 +775,25 @@ static inline struct d3d12_dsv_desc *d3d12_dsv_desc_from_cpu_handle(D3D12_CPU_DE
void d3d12_dsv_desc_create_dsv(struct d3d12_dsv_desc *dsv_desc, struct d3d12_device *device,
struct d3d12_resource *resource, const D3D12_DEPTH_STENCIL_VIEW_DESC *desc);
+static inline VkDescriptorType vk_descriptor_type_from_vkd3d_descriptor_type(enum vkd3d_shader_descriptor_type type,
+ bool is_buffer)
+{
+ switch (type)
+ {
+ case VKD3D_SHADER_DESCRIPTOR_TYPE_SRV:
+ return is_buffer ? VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER : VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE;
+ case VKD3D_SHADER_DESCRIPTOR_TYPE_UAV:
+ return is_buffer ? VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER : VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
+ case VKD3D_SHADER_DESCRIPTOR_TYPE_CBV:
+ return VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
+ case VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER:
+ return VK_DESCRIPTOR_TYPE_SAMPLER;
+ default:
+ FIXME("Unhandled descriptor range type type %#x.\n", type);
+ return VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE;
+ }
+}
+
enum vkd3d_vk_descriptor_set_index
{
VKD3D_SET_INDEX_SAMPLER,
@@ -899,6 +923,8 @@ struct d3d12_root_descriptor_table_range
unsigned int vk_binding_count;
uint32_t set;
uint32_t binding;
+ uint32_t image_set;
+ uint32_t image_binding;
enum vkd3d_shader_descriptor_type type;
uint32_t descriptor_magic;
@@ -920,6 +946,7 @@ struct d3d12_root_constant
struct d3d12_root_descriptor
{
+ uint32_t set;
uint32_t binding;
};
@@ -936,7 +963,9 @@ struct d3d12_root_parameter
struct d3d12_descriptor_set_layout
{
+ enum vkd3d_shader_descriptor_type descriptor_type;
VkDescriptorSetLayout vk_layout;
+ unsigned int descriptor_count;
unsigned int unbounded_offset;
unsigned int table_index;
};
@@ -1135,6 +1164,18 @@ struct vkd3d_buffer
VkDeviceMemory vk_memory;
};
+struct vkd3d_vk_descriptor_pool
+{
+ unsigned int descriptor_count;
+ VkDescriptorPool vk_pool;
+};
+
+struct vkd3d_vk_descriptor_pool_array
+{
+ struct vkd3d_vk_descriptor_pool *pools;
+ size_t capacity, count;
+};
+
/* ID3D12CommandAllocator */
struct d3d12_command_allocator
{
@@ -1146,11 +1187,9 @@ struct d3d12_command_allocator
VkCommandPool vk_command_pool;
- VkDescriptorPool vk_descriptor_pool;
+ VkDescriptorPool vk_descriptor_pools[VKD3D_SHADER_DESCRIPTOR_TYPE_COUNT];
- VkDescriptorPool *free_descriptor_pools;
- size_t free_descriptor_pools_size;
- size_t free_descriptor_pool_count;
+ struct vkd3d_vk_descriptor_pool_array free_descriptor_pools[VKD3D_SHADER_DESCRIPTOR_TYPE_COUNT];
VkRenderPass *passes;
size_t passes_size;
@@ -1160,9 +1199,8 @@ struct d3d12_command_allocator
size_t framebuffers_size;
size_t framebuffer_count;
- VkDescriptorPool *descriptor_pools;
- size_t descriptor_pools_size;
- size_t descriptor_pool_count;
+ struct vkd3d_vk_descriptor_pool_array descriptor_pools[VKD3D_SHADER_DESCRIPTOR_TYPE_COUNT];
+ unsigned int vk_pool_sizes[VKD3D_SHADER_DESCRIPTOR_TYPE_COUNT];
struct vkd3d_view **views;
size_t views_size;
@@ -1324,6 +1362,7 @@ enum vkd3d_cs_op
{
VKD3D_CS_OP_WAIT,
VKD3D_CS_OP_SIGNAL,
+ VKD3D_CS_OP_SIGNAL_ON_CPU,
VKD3D_CS_OP_EXECUTE,
VKD3D_CS_OP_UPDATE_MAPPINGS,
VKD3D_CS_OP_COPY_MAPPINGS,
@@ -1516,8 +1555,6 @@ struct vkd3d_desc_object_cache
size_t size;
};
-#define VKD3D_DESCRIPTOR_POOL_COUNT 6
-
/* ID3D12Device */
struct d3d12_device
{
@@ -1536,8 +1573,7 @@ struct d3d12_device
struct vkd3d_desc_object_cache view_desc_cache;
struct vkd3d_desc_object_cache cbuffer_desc_cache;
- VkDescriptorPoolSize vk_pool_sizes[VKD3D_DESCRIPTOR_POOL_COUNT];
- unsigned int vk_pool_count;
+ unsigned int vk_pool_limits[VKD3D_SHADER_DESCRIPTOR_TYPE_COUNT];
struct vkd3d_vk_descriptor_heap_layout vk_descriptor_heap_layouts[VKD3D_SET_INDEX_COUNT];
bool use_vk_heaps;
--
2.45.2