diff --git a/patches/vkd3d-latest/0001-Updated-vkd3d-to-e383834049825dde8feb0a230c39d499e58.patch b/patches/vkd3d-latest/0001-Updated-vkd3d-to-e383834049825dde8feb0a230c39d499e58.patch
deleted file mode 100644
index 51e6c899..00000000
--- a/patches/vkd3d-latest/0001-Updated-vkd3d-to-e383834049825dde8feb0a230c39d499e58.patch
+++ /dev/null
@@ -1,29283 +0,0 @@
-From abcbb54af650bd1699f695bdbbffcbffe6ef84fe Mon Sep 17 00:00:00 2001
-From: Alistair Leslie-Hughes <leslie_alistair@hotmail.com>
-Date: Tue, 3 Sep 2024 07:18:49 +1000
-Subject: [PATCH] Updated vkd3d to e383834049825dde8feb0a230c39d499e580cdf1.
- libs/vkd3d/Makefile.in                        |    1 +
- libs/vkd3d/include/private/vkd3d_common.h     |    4 +-
- libs/vkd3d/include/vkd3d.h                    |    1 +
- libs/vkd3d/include/vkd3d_shader.h             |  219 +-
- libs/vkd3d/libs/vkd3d-common/blob.c           |    1 +
- libs/vkd3d/libs/vkd3d-shader/checksum.c       |   49 +-
- libs/vkd3d/libs/vkd3d-shader/d3d_asm.c        |  107 +-
- libs/vkd3d/libs/vkd3d-shader/d3dbc.c          | 1262 ++---
- libs/vkd3d/libs/vkd3d-shader/dxbc.c           |   21 +-
- libs/vkd3d/libs/vkd3d-shader/dxil.c           |  167 +-
- libs/vkd3d/libs/vkd3d-shader/fx.c             | 2016 ++++++--
- libs/vkd3d/libs/vkd3d-shader/glsl.c           | 2369 +++++++++-
- libs/vkd3d/libs/vkd3d-shader/hlsl.c           |  430 +-
- libs/vkd3d/libs/vkd3d-shader/hlsl.h           |  181 +-
- libs/vkd3d/libs/vkd3d-shader/hlsl.l           |   10 +-
- libs/vkd3d/libs/vkd3d-shader/hlsl.y           | 1348 ++++--
- libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c   | 3775 +++++++++++++--
- .../libs/vkd3d-shader/hlsl_constant_ops.c     |   20 +-
- libs/vkd3d/libs/vkd3d-shader/ir.c             | 4159 ++++++++++++-----
- libs/vkd3d/libs/vkd3d-shader/msl.c            |  898 ++++
- libs/vkd3d/libs/vkd3d-shader/preproc.h        |    3 +-
- libs/vkd3d/libs/vkd3d-shader/preproc.l        |   56 +-
- libs/vkd3d/libs/vkd3d-shader/preproc.y        |   13 -
- libs/vkd3d/libs/vkd3d-shader/spirv.c          |  530 ++-
- libs/vkd3d/libs/vkd3d-shader/tpf.c            | 2647 +++++------
- .../libs/vkd3d-shader/vkd3d_shader_main.c     |  269 +-
- .../libs/vkd3d-shader/vkd3d_shader_private.h  |  134 +-
- libs/vkd3d/libs/vkd3d/command.c               |  123 +-
- libs/vkd3d/libs/vkd3d/device.c                |  175 +-
- libs/vkd3d/libs/vkd3d/resource.c              |   14 +-
- libs/vkd3d/libs/vkd3d/state.c                 |  410 +-
- libs/vkd3d/libs/vkd3d/utils.c                 |    3 +-
- libs/vkd3d/libs/vkd3d/vkd3d_main.c            |    1 +
- libs/vkd3d/libs/vkd3d/vkd3d_private.h         |   27 +-
- 34 files changed, 15965 insertions(+), 5478 deletions(-)
- create mode 100644 libs/vkd3d/libs/vkd3d-shader/msl.c
-diff --git a/libs/vkd3d/Makefile.in b/libs/vkd3d/Makefile.in
-index 94e4833dc9a..b073790d986 100644
---- a/libs/vkd3d/Makefile.in
-+++ b/libs/vkd3d/Makefile.in
-@@ -25,6 +25,7 @@ SOURCES = \
- 	libs/vkd3d-shader/hlsl_codegen.c \
- 	libs/vkd3d-shader/hlsl_constant_ops.c \
- 	libs/vkd3d-shader/ir.c \
-+	libs/vkd3d-shader/msl.c \
- 	libs/vkd3d-shader/preproc.l \
- 	libs/vkd3d-shader/preproc.y \
- 	libs/vkd3d-shader/spirv.c \
-diff --git a/libs/vkd3d/include/private/vkd3d_common.h b/libs/vkd3d/include/private/vkd3d_common.h
-index 39145a97df1..fd62730f948 100644
---- a/libs/vkd3d/include/private/vkd3d_common.h
-+++ b/libs/vkd3d/include/private/vkd3d_common.h
-@@ -62,6 +62,8 @@
- #define VKD3D_STRINGIFY(x) #x
-+#define vkd3d_clamp(value, lower, upper) max(min(value, upper), lower)
- #define TAG_AON9 VKD3D_MAKE_TAG('A', 'o', 'n', '9')
- #define TAG_DXBC VKD3D_MAKE_TAG('D', 'X', 'B', 'C')
- #define TAG_DXIL VKD3D_MAKE_TAG('D', 'X', 'I', 'L')
-@@ -273,7 +275,7 @@ static inline unsigned int vkd3d_popcount(unsigned int v)
- {
- #ifdef _MSC_VER
-     return __popcnt(v);
--#elif defined(__MINGW32__)
-+#elif defined(HAVE_BUILTIN_POPCOUNT)
-     return __builtin_popcount(v);
- #else
-     v -= (v >> 1) & 0x55555555;
-diff --git a/libs/vkd3d/include/vkd3d.h b/libs/vkd3d/include/vkd3d.h
-index 398ae2442d6..b18fd14f4c3 100644
---- a/libs/vkd3d/include/vkd3d.h
-+++ b/libs/vkd3d/include/vkd3d.h
-@@ -98,6 +98,7 @@ enum vkd3d_api_version
-     VKD3D_API_VERSION_1_11,
-     VKD3D_API_VERSION_1_12,
-     VKD3D_API_VERSION_1_13,
-+    VKD3D_API_VERSION_1_14,
- };
-diff --git a/libs/vkd3d/include/vkd3d_shader.h b/libs/vkd3d/include/vkd3d_shader.h
-index d9a355d3bc9..cb561d7f079 100644
---- a/libs/vkd3d/include/vkd3d_shader.h
-+++ b/libs/vkd3d/include/vkd3d_shader.h
-@@ -56,6 +56,7 @@ enum vkd3d_shader_api_version
- };
-@@ -111,6 +112,11 @@ enum vkd3d_shader_structure_type
-      * \since 1.13
-      */
-+    /**
-+     * The structure is a vkd3d_shader_scan_hull_shader_tessellation_info structure.
-+     * \since 1.15
-+     */
- };
-@@ -190,6 +196,17 @@ enum vkd3d_shader_compile_option_backward_compatibility
-      *  - DEPTH to SV_Depth for pixel shader outputs.
-      */
-+    /**
-+     *  Causes 'double' to behave as an alias for 'float'. This option only
-+     *  applies to HLSL sources with shader model 1-3 target profiles. Without
-+     *  this option using the 'double' type produces compilation errors in
-+     *  these target profiles.
-+     *
-+     *  This option is disabled by default.
-+     *
-+     *  \since 1.14
-+     */
- };
-@@ -469,8 +486,8 @@ enum vkd3d_shader_parameter_type
-     /** The parameter value is embedded directly in the shader. */
-     /**
--     * The parameter value is provided to the shader via a specialization
--     * constant. This value is only supported for the SPIR-V target type.
-+     * The parameter value is provided to the shader via specialization
-+     * constants. This value is only supported for the SPIR-V target type.
-      */
-     /**
-@@ -495,6 +512,13 @@ enum vkd3d_shader_parameter_data_type
-     /** The parameter is provided as a 32-bit float. \since 1.13 */
-+    /**
-+     * The parameter is provided as a 4-dimensional vector of 32-bit floats.
-+     * This parameter must be used with struct vkd3d_shader_parameter1;
-+     * it cannot be used with struct vkd3d_shader_parameter.
-+     * \since 1.14
-+     */
- };
-@@ -578,6 +602,143 @@ enum vkd3d_shader_parameter_name
-      * \since 1.13
-      */
-+    /**
-+     * A mask of enabled clip planes.
-+     *
-+     * When this parameter is provided to a vertex shader, for each nonzero bit
-+     * of this mask, a user clip distance will be generated from vertex position
-+     * in clip space, and the clip plane defined by the indexed vector, taken
-+     * from the VKD3D_SHADER_PARAMETER_NAME_CLIP_PLANE_# parameter.
-+     *
-+     * Regardless of the specific clip planes which are enabled, the clip
-+     * distances which are output are a contiguous array starting from clip
-+     * distance 0. This affects the interface of OpenGL. For example, if only
-+     * clip planes 1 and 3 are enabled (and so the value of the mask is 0xa),
-+     * the user should enable only GL_CLIP_DISTANCE0 and GL_CLIP_DISTANCE1.
-+     *
-+     * The default value is zero, i.e. do not enable any clip planes.
-+     *
-+     * The data type for this parameter must be
-+     *
-+     * version of vkd3d-shader.
-+     *
-+     * If the source shader writes clip distances and this parameter is nonzero,
-+     * compilation fails.
-+     *
-+     * \since 1.14
-+     */
-+    /**
-+     * Clip plane values.
-+     * See VKD3D_SHADER_PARAMETER_NAME_CLIP_PLANE_MASK for documentation of
-+     * clip planes.
-+     *
-+     * These enum values are contiguous and arithmetic may safely be performed
-+     * on them. That is, VKD3D_SHADER_PARAMETER_NAME_CLIP_PLANE_[n] is
-+     *
-+     * The data type for each parameter must be
-+     *
-+     * The default value for each plane is a (0, 0, 0, 0) vector.
-+     *
-+     * \since 1.14
-+     */
-+    /**
-+     * Point size.
-+     *
-+     * When this parameter is provided to a vertex, tessellation, or geometry
-+     * shader, and the source shader does not write point size, it specifies a
-+     * uniform value which will be written to point size.
-+     * If the source shader writes point size, this parameter is ignored.
-+     *
-+     * This parameter can be used to implement fixed function point size, as
-+     * present in Direct3D versions 8 and 9, if the target environment does not
-+     * support point size as part of its own fixed-function API (as Vulkan and
-+     * core OpenGL).
-+     *
-+     * The data type for this parameter must be
-+     *
-+     * \since 1.14
-+     */
-+    /**
-+     * Minimum point size.
-+     *
-+     * When this parameter is provided to a vertex, tessellation, or geometry
-+     * shader, and the source shader writes point size or uses the
-+     * VKD3D_SHADER_PARAMETER_NAME_POINT_SIZE parameter, the point size will
-+     * be clamped to the provided minimum value.
-+     * If point size is not written in one of these ways,
-+     * this parameter is ignored.
-+     * If this parameter is not provided, the point size will not be clamped
-+     * to a minimum size by vkd3d-shader.
-+     *
-+     * This parameter can be used to implement fixed function point size, as
-+     * present in Direct3D versions 8 and 9, if the target environment does not
-+     * support point size as part of its own fixed-function API (as Vulkan and
-+     * core OpenGL).
-+     *
-+     * The data type for this parameter must be
-+     *
-+     * \since 1.14
-+     */
-+    /**
-+     * Maximum point size.
-+     *
-+     * This parameter has identical behaviour to
-+     * VKD3D_SHADER_PARAMETER_NAME_POINT_SIZE_MIN, except that it provides
-+     * the maximum size rather than the minimum.
-+     *
-+     * \since 1.14
-+     */
-+    /**
-+     * Whether texture coordinate inputs should take their values from the
-+     * point coordinate.
-+     *
-+     * When this parameter is provided to a pixel shader, and the value is
-+     * nonzero, any fragment shader input with the semantic name "TEXCOORD"
-+     * takes its value from the point coordinates instead of from the previous
-+     * shader. The point coordinates here are defined as a four-component vector
-+     * whose X and Y components are the X and Y coordinates of the fragment
-+     * within a point being rasterized, and whose Z and W components are zero.
-+     *
-+     * In GLSL, the X and Y components are drawn from gl_PointCoord; in SPIR-V,
-+     * they are drawn from a variable with the BuiltinPointCoord decoration.
-+     *
-+     * This includes t# fragment shader inputs in shader model 2 shaders,
-+     * as well as texture sampling in shader model 1 shaders.
-+     *
-+     * This parameter can be used to implement fixed function point sprite, as
-+     * present in Direct3D versions 8 and 9, if the target environment does not
-+     * support point sprite as part of its own fixed-function API (as Vulkan and
-+     * core OpenGL).
-+     *
-+     * The data type for this parameter must be
-+     *
-+     * The default value is zero, i.e. use the original varyings.
-+     *
-+     * version of vkd3d-shader.
-+     *
-+     * \since 1.14
-+     */
- };
-@@ -625,6 +786,13 @@ struct vkd3d_shader_parameter_immediate_constant1
-          */
-         float f32;
-+        /**
-+         * A pointer to the value if the parameter's data type is
-+         *
-+         * \since 1.14
-+         */
-+        float f32_vec4[4];
-         void *_pointer_pad;
-         uint32_t _pad[4];
-     } u;
-@@ -636,7 +804,13 @@ struct vkd3d_shader_parameter_immediate_constant1
-  */
- struct vkd3d_shader_parameter_specialization_constant
- {
--    /** The ID of the specialization constant. */
-+    /**
-+     * The ID of the specialization constant.
-+     * If the type comprises more than one constant, such as
-+     * VKD3D_SHADER_PARAMETER_DATA_TYPE_FLOAT32_VEC4, then a contiguous
-+     * array of specialization constants should be used, one for each component,
-+     * and this ID should point to the first component.
-+     */
-     uint32_t id;
- };
-@@ -1046,6 +1220,11 @@ enum vkd3d_shader_source_type
-      * the format used for Direct3D shader model 6 shaders. \since 1.9
-      */
-+    /**
-+     * Binary format used by Direct3D 9/10.x/11 effects.
-+     * Input is a raw FX section without container. \since 1.14
-+     */
- };
-@@ -1087,6 +1266,10 @@ enum vkd3d_shader_target_type
-      * Output is a raw FX section without container. \since 1.11
-      */
-+    /**
-+     * A 'Metal Shading Language' shader. \since 1.14
-+     */
- };
-@@ -1292,7 +1475,8 @@ typedef int (*PFN_vkd3d_shader_open_include)(const char *filename, bool local,
-  * vkd3d_shader_preprocess_info.
-  *
-  * \param code Contents of the included file, which were allocated by the
-- * \ref pfn_open_include callback. The user must free them.
-+ * vkd3d_shader_preprocess_info.pfn_open_include callback.
-+ * The user must free them.
-  *
-  * \param context The user-defined pointer passed to struct
-  * vkd3d_shader_preprocess_info.
-@@ -1319,8 +1503,8 @@ struct vkd3d_shader_preprocess_info
-     /**
-      * Pointer to an array of predefined macros. Each macro in this array will
--     * be expanded as if a corresponding #define statement were prepended to the
--     * source code.
-+     * be expanded as if a corresponding \#define statement were prepended to
-+     * the source code.
-      *
-      * If the same macro is specified multiple times, only the last value is
-      * used.
-@@ -1861,6 +2045,26 @@ struct vkd3d_shader_scan_combined_resource_sampler_info
-     unsigned int combined_sampler_count;
- };
-+ * A chained structure describing the tessellation information in a hull shader.
-+ *
-+ * This structure extends vkd3d_shader_compile_info.
-+ *
-+ * \since 1.15
-+ */
-+struct vkd3d_shader_scan_hull_shader_tessellation_info
-+    enum vkd3d_shader_structure_type type;
-+    /** Optional pointer to a structure containing further parameters. */
-+    const void *next;
-+    /** The tessellation output primitive. */
-+    enum vkd3d_shader_tessellator_output_primitive output_primitive;
-+    /** The tessellation partitioning mode. */
-+    enum vkd3d_shader_tessellator_partitioning partitioning;
- /**
-  * Data type of a shader varying, returned as part of struct
-  * vkd3d_shader_signature_element.
-@@ -2333,6 +2537,7 @@ VKD3D_SHADER_API const enum vkd3d_shader_target_type *vkd3d_shader_get_supported
-  *
-  * Supported transformations can also be detected at runtime with the functions
-  * vkd3d_shader_get_supported_source_types() and
-@@ -2798,7 +3003,7 @@ VKD3D_SHADER_API void vkd3d_shader_free_scan_signature_info(struct vkd3d_shader_
-  * \param input_signature The input signature of the second shader.
-  *
-  * \param count On output, contains the number of entries written into
-- * \ref varyings.
-+ * "varyings".
-  *
-  * \param varyings Pointer to an output array of varyings.
-  * This must point to space for N varyings, where N is the number of elements
-diff --git a/libs/vkd3d/libs/vkd3d-common/blob.c b/libs/vkd3d/libs/vkd3d-common/blob.c
-index f60ef7db769..c2c6ad67804 100644
---- a/libs/vkd3d/libs/vkd3d-common/blob.c
-+++ b/libs/vkd3d/libs/vkd3d-common/blob.c
-@@ -20,6 +20,7 @@
- #endif
- #define COBJMACROS
- #define CONST_VTABLE
- #include "vkd3d.h"
- #include "vkd3d_blob.h"
-diff --git a/libs/vkd3d/libs/vkd3d-shader/checksum.c b/libs/vkd3d/libs/vkd3d-shader/checksum.c
-index d9560628c77..45de1c92513 100644
---- a/libs/vkd3d/libs/vkd3d-shader/checksum.c
-+++ b/libs/vkd3d/libs/vkd3d-shader/checksum.c
-@@ -33,6 +33,11 @@
-  * will fill a supplied 16-byte array with the digest.
-  */
-+ * DXBC uses a variation of the MD5 algorithm, which only changes the way
-+ * the message is padded in the final step.
-+ */
- #include "vkd3d_shader_private.h"
-@@ -230,10 +235,9 @@ static void md5_update(struct md5_ctx *ctx, const unsigned char *buf, unsigned i
-     memcpy(ctx->in, buf, len);
- }
--static void dxbc_checksum_final(struct md5_ctx *ctx)
-+static void md5_final(struct md5_ctx *ctx, enum vkd3d_md5_variant variant)
- {
-     unsigned int padding;
--    unsigned int length;
-     unsigned int count;
-     unsigned char *p;
-@@ -260,7 +264,7 @@ static void dxbc_checksum_final(struct md5_ctx *ctx)
-         /* Now fill the next block */
-         memset(ctx->in, 0, DXBC_CHECKSUM_BLOCK_SIZE);
-     }
--    else
-+    else if (variant == VKD3D_MD5_DXBC)
-     {
-         /* Make place for bitcount at the beginning of the block */
-         memmove(&ctx->in[4], ctx->in, count);
-@@ -268,33 +272,44 @@ static void dxbc_checksum_final(struct md5_ctx *ctx)
-         /* Pad block to 60 bytes */
-         memset(p + 4, 0, padding - 4);
-     }
-+    else
-+    {
-+        /* Pad block to 56 bytes */
-+        memset(p, 0, padding - 8);
-+    }
-     /* Append length in bits and transform */
--    length = ctx->i[0];
--    memcpy(&ctx->in[0], &length, sizeof(length));
--    byte_reverse(&ctx->in[4], 14);
--    length = ctx->i[0] >> 2 | 0x1;
--    memcpy(&ctx->in[DXBC_CHECKSUM_BLOCK_SIZE - 4], &length, sizeof(length));
-+    if (variant == VKD3D_MD5_DXBC)
-+    {
-+        unsigned int length;
-+        length = ctx->i[0];
-+        memcpy(&ctx->in[0], &length, sizeof(length));
-+        byte_reverse(&ctx->in[4], 14);
-+        length = ctx->i[0] >> 2 | 0x1;
-+        memcpy(&ctx->in[DXBC_CHECKSUM_BLOCK_SIZE - 4], &length, sizeof(length));
-+    }
-+    else
-+    {
-+        byte_reverse(ctx->in, 14);
-+        ((unsigned int *)ctx->in)[14] = ctx->i[0];
-+        ((unsigned int *)ctx->in)[15] = ctx->i[1];
-+    }
-     md5_transform(ctx->buf, (unsigned int *)ctx->in);
-     byte_reverse((unsigned char *)ctx->buf, 4);
-     memcpy(ctx->digest, ctx->buf, 16);
- }
--void vkd3d_compute_dxbc_checksum(const void *dxbc, size_t size, uint32_t checksum[4])
-+void vkd3d_compute_md5(const void *data, size_t size, uint32_t checksum[4], enum vkd3d_md5_variant variant)
- {
--    const uint8_t *ptr = dxbc;
-+    const uint8_t *ptr = data;
-     struct md5_ctx ctx;
-     md5_init(&ctx);
-     md5_update(&ctx, ptr, size);
--    dxbc_checksum_final(&ctx);
-+    md5_final(&ctx, variant);
-     memcpy(checksum, ctx.digest, sizeof(ctx.digest));
- }
-diff --git a/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c
-index 77e9711300f..7c5444f63a3 100644
---- a/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c
-+++ b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c
-@@ -79,7 +79,7 @@ static const char * const shader_opcode_names[] =
-     [VKD3DSIH_DCL_INDEXABLE_TEMP              ] = "dcl_indexableTemp",
-     [VKD3DSIH_DCL_INPUT                       ] = "dcl_input",
-     [VKD3DSIH_DCL_INPUT_CONTROL_POINT_COUNT   ] = "dcl_input_control_point_count",
--    [VKD3DSIH_DCL_INPUT_PRIMITIVE             ] = "dcl_inputPrimitive",
-+    [VKD3DSIH_DCL_INPUT_PRIMITIVE             ] = "dcl_inputprimitive",
-     [VKD3DSIH_DCL_INPUT_PS                    ] = "dcl_input_ps",
-     [VKD3DSIH_DCL_INPUT_PS_SGV                ] = "dcl_input_ps_sgv",
-     [VKD3DSIH_DCL_INPUT_PS_SIV                ] = "dcl_input_ps_siv",
-@@ -89,7 +89,7 @@ static const char * const shader_opcode_names[] =
-     [VKD3DSIH_DCL_OUTPUT                      ] = "dcl_output",
-     [VKD3DSIH_DCL_OUTPUT_CONTROL_POINT_COUNT  ] = "dcl_output_control_point_count",
-     [VKD3DSIH_DCL_OUTPUT_SIV                  ] = "dcl_output_siv",
--    [VKD3DSIH_DCL_OUTPUT_TOPOLOGY             ] = "dcl_outputTopology",
-+    [VKD3DSIH_DCL_OUTPUT_TOPOLOGY             ] = "dcl_outputtopology",
-     [VKD3DSIH_DCL_RESOURCE_RAW                ] = "dcl_resource_raw",
-     [VKD3DSIH_DCL_RESOURCE_STRUCTURED         ] = "dcl_resource_structured",
-     [VKD3DSIH_DCL_SAMPLER                     ] = "dcl_sampler",
-@@ -104,7 +104,7 @@ static const char * const shader_opcode_names[] =
-     [VKD3DSIH_DCL_UAV_RAW                     ] = "dcl_uav_raw",
-     [VKD3DSIH_DCL_UAV_STRUCTURED              ] = "dcl_uav_structured",
-     [VKD3DSIH_DCL_UAV_TYPED                   ] = "dcl_uav_typed",
--    [VKD3DSIH_DCL_VERTICES_OUT                ] = "dcl_maxOutputVertexCount",
-+    [VKD3DSIH_DCL_VERTICES_OUT                ] = "dcl_maxout",
-     [VKD3DSIH_DDIV                            ] = "ddiv",
-     [VKD3DSIH_DEF                             ] = "def",
-     [VKD3DSIH_DEFAULT                         ] = "default",
-@@ -393,14 +393,13 @@ static unsigned int shader_get_float_offset(enum vkd3d_shader_register_type regi
-     }
- }
--static void shader_dump_global_flags(struct vkd3d_d3d_asm_compiler *compiler,
--        enum vkd3d_shader_global_flags global_flags)
-+static void shader_dump_global_flags(struct vkd3d_d3d_asm_compiler *compiler, enum vsir_global_flags global_flags)
- {
-     unsigned int i;
-     static const struct
-     {
--        enum vkd3d_shader_global_flags flag;
-+        enum vsir_global_flags flag;
-         const char *name;
-     }
-     global_flag_info[] =
-@@ -675,9 +674,6 @@ static void shader_dump_data_type(struct vkd3d_d3d_asm_compiler *compiler, enum
-     {
-         [VKD3D_DATA_FLOAT    ] = "float",
-         [VKD3D_DATA_INT      ] = "int",
--        [VKD3D_DATA_RESOURCE ] = "resource",
--        [VKD3D_DATA_SAMPLER  ] = "sampler",
--        [VKD3D_DATA_UAV      ] = "uav",
-         [VKD3D_DATA_UINT     ] = "uint",
-         [VKD3D_DATA_UNORM    ] = "unorm",
-         [VKD3D_DATA_SNORM    ] = "snorm",
-@@ -1193,6 +1189,14 @@ static void shader_print_register(struct vkd3d_d3d_asm_compiler *compiler, const
-             vkd3d_string_buffer_printf(buffer, "vWaveLaneIndex");
-             break;
-+        case VKD3DSPR_PARAMETER:
-+            vkd3d_string_buffer_printf(buffer, "parameter");
-+            break;
-+        case VKD3DSPR_POINT_COORD:
-+            vkd3d_string_buffer_printf(buffer, "vPointCoord");
-+            break;
-         default:
-             vkd3d_string_buffer_printf(buffer, "%s<unhandled register type %#x>%s",
-                     compiler->colours.error, reg->type, compiler->colours.reset);
-@@ -1229,8 +1233,6 @@ static void shader_print_register(struct vkd3d_d3d_asm_compiler *compiler, const
-                     case VKD3D_DATA_INT:
-                         shader_print_int_literal(compiler, "", reg->u.immconst_u32[0], "");
-                         break;
--                    case VKD3D_DATA_RESOURCE:
--                    case VKD3D_DATA_SAMPLER:
-                     case VKD3D_DATA_UINT:
-                         shader_print_uint_literal(compiler, "", reg->u.immconst_u32[0], "");
-                         break;
-@@ -1266,8 +1268,6 @@ static void shader_print_register(struct vkd3d_d3d_asm_compiler *compiler, const
-                         shader_print_int_literal(compiler, ", ", reg->u.immconst_u32[2], "");
-                         shader_print_int_literal(compiler, ", ", reg->u.immconst_u32[3], "");
-                         break;
--                    case VKD3D_DATA_RESOURCE:
--                    case VKD3D_DATA_SAMPLER:
-                     case VKD3D_DATA_UINT:
-                         shader_print_uint_literal(compiler, "", reg->u.immconst_u32[0], "");
-                         shader_print_uint_literal(compiler, ", ", reg->u.immconst_u32[1], "");
-@@ -1319,6 +1319,23 @@ static void shader_print_register(struct vkd3d_d3d_asm_compiler *compiler, const
-         }
-         vkd3d_string_buffer_printf(buffer, ")");
-     }
-+    else if (compiler->flags & VSIR_ASM_FLAG_DUMP_ALL_INDICES)
-+    {
-+        unsigned int i = 0;
-+        if (reg->idx_count == 0 || reg->idx[0].rel_addr)
-+        {
-+            vkd3d_string_buffer_printf(buffer, "%s", compiler->colours.reset);
-+        }
-+        else
-+        {
-+            vkd3d_string_buffer_printf(buffer, "%u%s", offset, compiler->colours.reset);
-+            i = 1;
-+        }
-+        for (; i < reg->idx_count; ++i)
-+            shader_print_subscript(compiler, reg->idx[i].offset, reg->idx[i].rel_addr);
-+    }
-     else if (reg->type != VKD3DSPR_RASTOUT
-             && reg->type != VKD3DSPR_MISCTYPE
-             && reg->type != VKD3DSPR_NULL
-@@ -2258,7 +2275,7 @@ static const char *get_semantic_register_name(enum vkd3d_shader_sysval_semantic
-     }
- }
--static enum vkd3d_result dump_signature(struct vkd3d_d3d_asm_compiler *compiler,
-+static enum vkd3d_result dump_dxbc_signature(struct vkd3d_d3d_asm_compiler *compiler,
-         const char *name, const char *register_name, const struct shader_signature *signature)
- {
-     struct vkd3d_string_buffer *buffer = &compiler->buffer;
-@@ -2325,21 +2342,21 @@ static enum vkd3d_result dump_signature(struct vkd3d_d3d_asm_compiler *compiler,
-     return VKD3D_OK;
- }
--static enum vkd3d_result dump_signatures(struct vkd3d_d3d_asm_compiler *compiler,
-+static enum vkd3d_result dump_dxbc_signatures(struct vkd3d_d3d_asm_compiler *compiler,
-         const struct vsir_program *program)
- {
-     enum vkd3d_result ret;
--    if ((ret = dump_signature(compiler, ".input",
-+    if ((ret = dump_dxbc_signature(compiler, ".input",
-             program->shader_version.type == VKD3D_SHADER_TYPE_DOMAIN ? "vicp" : "v",
-             &program->input_signature)) < 0)
-         return ret;
--    if ((ret = dump_signature(compiler, ".output", "o",
-+    if ((ret = dump_dxbc_signature(compiler, ".output", "o",
-             &program->output_signature)) < 0)
-         return ret;
--    if ((ret = dump_signature(compiler, ".patch_constant",
-+    if ((ret = dump_dxbc_signature(compiler, ".patch_constant",
-             program->shader_version.type == VKD3D_SHADER_TYPE_DOMAIN ? "vpc" : "o",
-             &program->patch_constant_signature)) < 0)
-         return ret;
-@@ -2427,7 +2444,7 @@ enum vkd3d_result d3d_asm_compile(const struct vsir_program *program,
-      * doesn't even have an explicit concept of signature. */
-     if (formatting & VKD3D_SHADER_COMPILE_OPTION_FORMATTING_IO_SIGNATURES && shader_version->major >= 4)
-     {
--        if ((result = dump_signatures(&compiler, program)) < 0)
-+        if ((result = dump_dxbc_signatures(&compiler, program)) < 0)
-         {
-             vkd3d_string_buffer_cleanup(buffer);
-             return result;
-@@ -2489,12 +2506,58 @@ enum vkd3d_result d3d_asm_compile(const struct vsir_program *program,
-     return result;
- }
--void vkd3d_shader_trace(const struct vsir_program *program)
-+/* This is meant exclusively for development use. Therefore, differently from
-+ * dump_dxbc_signature(), it doesn't try particularly hard to make the output
-+ * nice or easily parsable, and it dumps all fields, not just the DXBC ones.
-+ * This format isn't meant to be stable. */
-+static void trace_signature(const struct shader_signature *signature, const char *signature_type)
- {
--    const char *p, *q, *end;
-+    struct vkd3d_string_buffer buffer;
-+    unsigned int i;
-+    TRACE("%s signature:%s\n", signature_type, signature->element_count == 0 ? " empty" : "");
-+    vkd3d_string_buffer_init(&buffer);
-+    for (i = 0; i < signature->element_count; ++i)
-+    {
-+        const struct signature_element *element = &signature->elements[i];
-+        vkd3d_string_buffer_clear(&buffer);
-+        vkd3d_string_buffer_printf(&buffer, "Element %u: %s %u-%u %s", i,
-+                get_component_type_name(element->component_type),
-+                element->register_index, element->register_index + element->register_count,
-+                element->semantic_name);
-+        if (element->semantic_index != -1)
-+            vkd3d_string_buffer_printf(&buffer, "%u", element->semantic_index);
-+        vkd3d_string_buffer_printf(&buffer,
-+                " mask %#x used_mask %#x sysval %s min_precision %s interpolation %u stream %u",
-+                element->mask, element->used_mask, get_sysval_semantic_name(element->sysval_semantic),
-+                get_minimum_precision_name(element->min_precision), element->interpolation_mode,
-+                element->stream_index);
-+        if (element->target_location != -1)
-+            vkd3d_string_buffer_printf(&buffer, " target %u", element->target_location);
-+        else
-+            vkd3d_string_buffer_printf(&buffer, " unused");
-+        TRACE("%s\n", buffer.buffer);
-+    }
-+    vkd3d_string_buffer_cleanup(&buffer);
-+void vsir_program_trace(const struct vsir_program *program)
-+    const unsigned int flags = VSIR_ASM_FLAG_DUMP_TYPES | VSIR_ASM_FLAG_DUMP_ALL_INDICES;
-     struct vkd3d_shader_code code;
-+    const char *p, *q, *end;
-+    trace_signature(&program->input_signature, "Input");
-+    trace_signature(&program->output_signature, "Output");
-+    trace_signature(&program->patch_constant_signature, "Patch-constant");
--    if (d3d_asm_compile(program, NULL, &code, VSIR_ASM_FLAG_DUMP_TYPES) != VKD3D_OK)
-+    if (d3d_asm_compile(program, NULL, &code, flags) != VKD3D_OK)
-         return;
-     end = (const char *)code.code + code.size;
-diff --git a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c
-index d05394c3ab7..bda9bc72f56 100644
---- a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c
-+++ b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c
-@@ -104,6 +104,12 @@ enum vkd3d_sm1_resource_type
-     VKD3D_SM1_RESOURCE_TEXTURE_3D   = 0x4,
- };
-+enum vkd3d_sm1_misc_register
-+    VKD3D_SM1_MISC_POSITION         = 0x0,
-+    VKD3D_SM1_MISC_FACE             = 0x1,
- enum vkd3d_sm1_opcode
- {
-     VKD3D_SM1_OP_NOP          = 0x00,
-@@ -444,17 +450,36 @@ static uint32_t swizzle_from_sm1(uint32_t swizzle)
-             shader_sm1_get_swizzle_component(swizzle, 3));
- }
-+/* D3DBC doesn't have the concept of index count. All registers implicitly have
-+ * exactly one index. However for some register types the index doesn't make
-+ * sense, so we remove it. */
-+static unsigned int idx_count_from_reg_type(enum vkd3d_shader_register_type reg_type)
-+    switch (reg_type)
-+    {
-+        case VKD3DSPR_DEPTHOUT:
-+            return 0;
-+        default:
-+            return 1;
-+    }
- static void shader_sm1_parse_src_param(uint32_t param, struct vkd3d_shader_src_param *rel_addr,
-         struct vkd3d_shader_src_param *src)
- {
-     enum vkd3d_shader_register_type reg_type = ((param & VKD3D_SM1_REGISTER_TYPE_MASK) >> VKD3D_SM1_REGISTER_TYPE_SHIFT)
-             | ((param & VKD3D_SM1_REGISTER_TYPE_MASK2) >> VKD3D_SM1_REGISTER_TYPE_SHIFT2);
-+    unsigned int idx_count = idx_count_from_reg_type(reg_type);
--    vsir_register_init(&src->reg, reg_type, VKD3D_DATA_FLOAT, 1);
-+    vsir_register_init(&src->reg, reg_type, VKD3D_DATA_FLOAT, idx_count);
-     src->reg.non_uniform = false;
--    src->reg.idx[0].offset = param & VKD3D_SM1_REGISTER_NUMBER_MASK;
--    src->reg.idx[0].rel_addr = rel_addr;
-+    if (idx_count == 1)
-+    {
-+        src->reg.idx[0].offset = param & VKD3D_SM1_REGISTER_NUMBER_MASK;
-+        src->reg.idx[0].rel_addr = rel_addr;
-+    }
-     if (src->reg.type == VKD3DSPR_SAMPLER)
-         src->reg.dimension = VSIR_DIMENSION_NONE;
-     else if (src->reg.type == VKD3DSPR_DEPTHOUT)
-@@ -470,12 +495,16 @@ static void shader_sm1_parse_dst_param(uint32_t param, struct vkd3d_shader_src_p
- {
-     enum vkd3d_shader_register_type reg_type = ((param & VKD3D_SM1_REGISTER_TYPE_MASK) >> VKD3D_SM1_REGISTER_TYPE_SHIFT)
-             | ((param & VKD3D_SM1_REGISTER_TYPE_MASK2) >> VKD3D_SM1_REGISTER_TYPE_SHIFT2);
-+    unsigned int idx_count = idx_count_from_reg_type(reg_type);
--    vsir_register_init(&dst->reg, reg_type, VKD3D_DATA_FLOAT, 1);
-+    vsir_register_init(&dst->reg, reg_type, VKD3D_DATA_FLOAT, idx_count);
-     dst->reg.non_uniform = false;
--    dst->reg.idx[0].offset = param & VKD3D_SM1_REGISTER_NUMBER_MASK;
--    dst->reg.idx[0].rel_addr = rel_addr;
-+    if (idx_count == 1)
-+    {
-+        dst->reg.idx[0].offset = param & VKD3D_SM1_REGISTER_NUMBER_MASK;
-+        dst->reg.idx[0].rel_addr = rel_addr;
-+    }
-     if (dst->reg.type == VKD3DSPR_SAMPLER)
-         dst->reg.dimension = VSIR_DIMENSION_NONE;
-     else if (dst->reg.type == VKD3DSPR_DEPTHOUT)
-@@ -532,6 +561,21 @@ static struct signature_element *find_signature_element_by_register_index(
-     return NULL;
- }
-+/* Add missing bits to a mask to make it contiguous. */
-+static unsigned int make_mask_contiguous(unsigned int mask)
-+    static const unsigned int table[] =
-+    {
-+        0x0, 0x1, 0x2, 0x3,
-+        0x4, 0x7, 0x6, 0x7,
-+        0x8, 0xf, 0xe, 0xf,
-+        0xc, 0xf, 0xe, 0xf,
-+    };
-+    VKD3D_ASSERT(mask < ARRAY_SIZE(table));
-+    return table[mask];
- static bool add_signature_element(struct vkd3d_shader_sm1_parser *sm1, bool output,
-         const char *name, unsigned int index, enum vkd3d_shader_sysval_semantic sysval,
-         unsigned int register_index, bool is_dcl, unsigned int mask)
-@@ -547,7 +591,7 @@ static bool add_signature_element(struct vkd3d_shader_sm1_parser *sm1, bool outp
-     if ((element = find_signature_element(signature, name, index)))
-     {
--        element->mask |= mask;
-+        element->mask = make_mask_contiguous(element->mask | mask);
-         if (!is_dcl)
-             element->used_mask |= mask;
-         return true;
-@@ -567,7 +611,7 @@ static bool add_signature_element(struct vkd3d_shader_sm1_parser *sm1, bool outp
-     element->register_index = register_index;
-     element->target_location = register_index;
-     element->register_count = 1;
--    element->mask = mask;
-+    element->mask = make_mask_contiguous(mask);
-     element->used_mask = is_dcl ? 0 : mask;
-     if (program->shader_version.type == VKD3D_SHADER_TYPE_PIXEL && !output)
-         element->interpolation_mode = VKD3DSIM_LINEAR;
-@@ -601,7 +645,7 @@ static bool add_signature_element_from_register(struct vkd3d_shader_sm1_parser *
-         const struct vkd3d_shader_register *reg, bool is_dcl, unsigned int mask)
- {
-     const struct vkd3d_shader_version *version = &sm1->p.program->shader_version;
--    unsigned int register_index = reg->idx[0].offset;
-+    unsigned int register_index = reg->idx_count > 0 ? reg->idx[0].offset : 0;
-     switch (reg->type)
-     {
-@@ -921,6 +965,9 @@ static void shader_sm1_read_dst_param(struct vkd3d_shader_sm1_parser *sm1, const
-         shader_sm1_parse_src_param(addr_token, NULL, dst_rel_addr);
-     }
-     shader_sm1_parse_dst_param(token, dst_rel_addr, dst_param);
-+    if (dst_param->reg.type == VKD3DSPR_RASTOUT && dst_param->reg.idx[0].offset == VSIR_RASTOUT_POINT_SIZE)
-+        sm1->p.program->has_point_size = true;
- }
- static void shader_sm1_read_semantic(struct vkd3d_shader_sm1_parser *sm1,
-@@ -1272,7 +1319,8 @@ static enum vkd3d_result shader_sm1_init(struct vkd3d_shader_sm1_parser *sm1, st
-     sm1->end = &code[token_count];
-     /* Estimate instruction count to avoid reallocation in most shaders. */
--    if (!vsir_program_init(program, compile_info, &version, code_size != ~(size_t)0 ? token_count / 4u + 4 : 16))
-+    if (!vsir_program_init(program, compile_info, &version,
-+            code_size != ~(size_t)0 ? token_count / 4u + 4 : 16, VSIR_CF_STRUCTURED, VSIR_NOT_NORMALISED))
-         return VKD3D_ERROR_OUT_OF_MEMORY;
-     vkd3d_shader_parser_init(&sm1->p, program, message_context, compile_info->source_name);
-@@ -1338,23 +1386,19 @@ int d3dbc_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t c
-     for (i = 0; i < ARRAY_SIZE(program->flat_constant_count); ++i)
-         program->flat_constant_count[i] = get_external_constant_count(&sm1, i);
--    if (!sm1.p.failed)
--        ret = vkd3d_shader_parser_validate(&sm1.p, config_flags);
-     if (sm1.p.failed && ret >= 0)
-     if (ret < 0)
-     {
--        WARN("Failed to parse shader.\n");
-         vsir_program_cleanup(program);
-         return ret;
-     }
--    return ret;
-+    return VKD3D_OK;
- }
--bool hlsl_sm1_register_from_semantic(const struct vkd3d_shader_version *version, const char *semantic_name,
-+bool sm1_register_from_semantic_name(const struct vkd3d_shader_version *version, const char *semantic_name,
-         unsigned int semantic_index, bool output, enum vkd3d_shader_register_type *type, unsigned int *reg)
- {
-     unsigned int i;
-@@ -1384,22 +1428,22 @@ bool hlsl_sm1_register_from_semantic(const struct vkd3d_shader_version *version,
-         {"depth",       true,  VKD3D_SHADER_TYPE_PIXEL, 3, VKD3DSPR_DEPTHOUT},
-         {"sv_depth",    true,  VKD3D_SHADER_TYPE_PIXEL, 3, VKD3DSPR_DEPTHOUT},
-         {"sv_target",   true,  VKD3D_SHADER_TYPE_PIXEL, 3, VKD3DSPR_COLOROUT},
--        {"sv_position", false, VKD3D_SHADER_TYPE_PIXEL, 3, VKD3DSPR_MISCTYPE,    D3DSMO_POSITION},
--        {"vface",       false, VKD3D_SHADER_TYPE_PIXEL, 3, VKD3DSPR_MISCTYPE,    D3DSMO_FACE},
--        {"vpos",        false, VKD3D_SHADER_TYPE_PIXEL, 3, VKD3DSPR_MISCTYPE,    D3DSMO_POSITION},
-+        {"sv_position", false, VKD3D_SHADER_TYPE_PIXEL, 3, VKD3DSPR_MISCTYPE,     VKD3D_SM1_MISC_POSITION},
-+        {"vface",       false, VKD3D_SHADER_TYPE_PIXEL, 3, VKD3DSPR_MISCTYPE,     VKD3D_SM1_MISC_FACE},
-+        {"vpos",        false, VKD3D_SHADER_TYPE_PIXEL, 3, VKD3DSPR_MISCTYPE,     VKD3D_SM1_MISC_POSITION},
-         {"color",       true,  VKD3D_SHADER_TYPE_VERTEX, 1, VKD3DSPR_ATTROUT},
--        {"fog",         true,  VKD3D_SHADER_TYPE_VERTEX, 1, VKD3DSPR_RASTOUT,     D3DSRO_FOG},
--        {"position",    true,  VKD3D_SHADER_TYPE_VERTEX, 1, VKD3DSPR_RASTOUT,     D3DSRO_POSITION},
--        {"psize",       true,  VKD3D_SHADER_TYPE_VERTEX, 1, VKD3DSPR_RASTOUT,     D3DSRO_POINT_SIZE},
--        {"sv_position", true,  VKD3D_SHADER_TYPE_VERTEX, 1, VKD3DSPR_RASTOUT,     D3DSRO_POSITION},
-+        {"fog",         true,  VKD3D_SHADER_TYPE_VERTEX, 1, VKD3DSPR_RASTOUT,     VSIR_RASTOUT_FOG},
-+        {"position",    true,  VKD3D_SHADER_TYPE_VERTEX, 1, VKD3DSPR_RASTOUT,     VSIR_RASTOUT_POSITION},
-+        {"psize",       true,  VKD3D_SHADER_TYPE_VERTEX, 1, VKD3DSPR_RASTOUT,     VSIR_RASTOUT_POINT_SIZE},
-+        {"sv_position", true,  VKD3D_SHADER_TYPE_VERTEX, 1, VKD3DSPR_RASTOUT,     VSIR_RASTOUT_POSITION},
-         {"texcoord",    true,  VKD3D_SHADER_TYPE_VERTEX, 1, VKD3DSPR_TEXCRDOUT},
-         {"color",       true,  VKD3D_SHADER_TYPE_VERTEX, 2, VKD3DSPR_ATTROUT},
--        {"fog",         true,  VKD3D_SHADER_TYPE_VERTEX, 2, VKD3DSPR_RASTOUT,     D3DSRO_FOG},
--        {"position",    true,  VKD3D_SHADER_TYPE_VERTEX, 2, VKD3DSPR_RASTOUT,     D3DSRO_POSITION},
--        {"psize",       true,  VKD3D_SHADER_TYPE_VERTEX, 2, VKD3DSPR_RASTOUT,     D3DSRO_POINT_SIZE},
--        {"sv_position", true,  VKD3D_SHADER_TYPE_VERTEX, 2, VKD3DSPR_RASTOUT,     D3DSRO_POSITION},
-+        {"fog",         true,  VKD3D_SHADER_TYPE_VERTEX, 2, VKD3DSPR_RASTOUT,     VSIR_RASTOUT_FOG},
-+        {"position",    true,  VKD3D_SHADER_TYPE_VERTEX, 2, VKD3DSPR_RASTOUT,     VSIR_RASTOUT_POSITION},
-+        {"psize",       true,  VKD3D_SHADER_TYPE_VERTEX, 2, VKD3DSPR_RASTOUT,     VSIR_RASTOUT_POINT_SIZE},
-+        {"sv_position", true,  VKD3D_SHADER_TYPE_VERTEX, 2, VKD3DSPR_RASTOUT,     VSIR_RASTOUT_POSITION},
-         {"texcoord",    true,  VKD3D_SHADER_TYPE_VERTEX, 2, VKD3DSPR_TEXCRDOUT},
-     };
-@@ -1422,33 +1466,33 @@ bool hlsl_sm1_register_from_semantic(const struct vkd3d_shader_version *version,
-     return false;
- }
--bool hlsl_sm1_usage_from_semantic(const char *semantic_name,
--        uint32_t semantic_index, D3DDECLUSAGE *usage, uint32_t *usage_idx)
-+bool sm1_usage_from_semantic_name(const char *semantic_name,
-+        uint32_t semantic_index, enum vkd3d_decl_usage *usage, uint32_t *usage_idx)
- {
-     static const struct
-     {
-         const char *name;
--        D3DDECLUSAGE usage;
-+        enum vkd3d_decl_usage usage;
-     }
-     semantics[] =
-     {
--        {"binormal",        D3DDECLUSAGE_BINORMAL},
--        {"blendindices",    D3DDECLUSAGE_BLENDINDICES},
--        {"blendweight",     D3DDECLUSAGE_BLENDWEIGHT},
--        {"color",           D3DDECLUSAGE_COLOR},
--        {"depth",           D3DDECLUSAGE_DEPTH},
--        {"fog",             D3DDECLUSAGE_FOG},
--        {"normal",          D3DDECLUSAGE_NORMAL},
--        {"position",        D3DDECLUSAGE_POSITION},
--        {"positiont",       D3DDECLUSAGE_POSITIONT},
--        {"psize",           D3DDECLUSAGE_PSIZE},
--        {"sample",          D3DDECLUSAGE_SAMPLE},
--        {"sv_depth",        D3DDECLUSAGE_DEPTH},
--        {"sv_position",     D3DDECLUSAGE_POSITION},
--        {"sv_target",       D3DDECLUSAGE_COLOR},
--        {"tangent",         D3DDECLUSAGE_TANGENT},
--        {"tessfactor",      D3DDECLUSAGE_TESSFACTOR},
--        {"texcoord",        D3DDECLUSAGE_TEXCOORD},
-+        {"binormal",        VKD3D_DECL_USAGE_BINORMAL},
-+        {"blendindices",    VKD3D_DECL_USAGE_BLEND_INDICES},
-+        {"blendweight",     VKD3D_DECL_USAGE_BLEND_WEIGHT},
-+        {"color",           VKD3D_DECL_USAGE_COLOR},
-+        {"depth",           VKD3D_DECL_USAGE_DEPTH},
-+        {"fog",             VKD3D_DECL_USAGE_FOG},
-+        {"normal",          VKD3D_DECL_USAGE_NORMAL},
-+        {"position",        VKD3D_DECL_USAGE_POSITION},
-+        {"positiont",       VKD3D_DECL_USAGE_POSITIONT},
-+        {"psize",           VKD3D_DECL_USAGE_PSIZE},
-+        {"sample",          VKD3D_DECL_USAGE_SAMPLE},
-+        {"sv_depth",        VKD3D_DECL_USAGE_DEPTH},
-+        {"sv_position",     VKD3D_DECL_USAGE_POSITION},
-+        {"sv_target",       VKD3D_DECL_USAGE_COLOR},
-+        {"tangent",         VKD3D_DECL_USAGE_TANGENT},
-+        {"tessfactor",      VKD3D_DECL_USAGE_TESS_FACTOR},
-+        {"texcoord",        VKD3D_DECL_USAGE_TEXCOORD},
-     };
-     unsigned int i;
-@@ -1468,21 +1512,17 @@ bool hlsl_sm1_usage_from_semantic(const char *semantic_name,
- struct d3dbc_compiler
- {
-+    const struct vkd3d_sm1_opcode_info *opcode_table;
-     struct vsir_program *program;
-     struct vkd3d_bytecode_buffer buffer;
-     struct vkd3d_shader_message_context *message_context;
--    /* OBJECTIVE: Store all the required information in the other fields so
--     * that this hlsl_ctx is no longer necessary. */
--    struct hlsl_ctx *ctx;
-+    bool failed;
- };
- static uint32_t sm1_version(enum vkd3d_shader_type type, unsigned int major, unsigned int minor)
- {
--    if (type == VKD3D_SHADER_TYPE_VERTEX)
--        return D3DVS_VERSION(major, minor);
--    else
--        return D3DPS_VERSION(major, minor);
-+    return vkd3d_make_u32(vkd3d_make_u16(minor, major),
-+            type == VKD3D_SHADER_TYPE_VERTEX ? VKD3D_SM1_VS : VKD3D_SM1_PS);
- }
- D3DXPARAMETER_CLASS hlsl_sm1_class(const struct hlsl_type *type)
-@@ -1512,6 +1552,7 @@ D3DXPARAMETER_CLASS hlsl_sm1_class(const struct hlsl_type *type)
-+        case HLSL_CLASS_ERROR:
-         case HLSL_CLASS_PASS:
-@@ -1524,6 +1565,7 @@ D3DXPARAMETER_CLASS hlsl_sm1_class(const struct hlsl_type *type)
-         case HLSL_CLASS_HULL_SHADER:
-         case HLSL_CLASS_BLEND_STATE:
-         case HLSL_CLASS_NULL:
-             break;
-     }
-@@ -1617,6 +1659,7 @@ D3DXPARAMETER_TYPE hlsl_sm1_base_type(const struct hlsl_type *type)
-+        case HLSL_CLASS_ERROR:
-         case HLSL_CLASS_PASS:
-@@ -1629,6 +1672,7 @@ D3DXPARAMETER_TYPE hlsl_sm1_base_type(const struct hlsl_type *type)
-         case HLSL_CLASS_HULL_SHADER:
-         case HLSL_CLASS_BLEND_STATE:
-         case HLSL_CLASS_NULL:
-             break;
-     }
-@@ -1709,7 +1753,7 @@ static void sm1_sort_externs(struct hlsl_ctx *ctx)
- void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer)
- {
--    size_t ctab_offset, ctab_start, ctab_end, vars_start, size_offset, creator_offset, offset;
-+    size_t ctab_offset, ctab_start, ctab_end, vars_offset, vars_start, size_offset, creator_offset, offset;
-     unsigned int uniform_count = 0;
-     struct hlsl_ir_var *var;
-@@ -1741,15 +1785,16 @@ void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buff
-     size_offset = put_u32(buffer, 0);
-     ctab_offset = put_u32(buffer, VKD3D_MAKE_TAG('C','T','A','B'));
--    ctab_start = put_u32(buffer, sizeof(D3DXSHADER_CONSTANTTABLE));
-+    ctab_start = put_u32(buffer, 7 * sizeof(uint32_t)); /* CTAB header size. */
-     creator_offset = put_u32(buffer, 0);
-     put_u32(buffer, sm1_version(ctx->profile->type, ctx->profile->major_version, ctx->profile->minor_version));
-     put_u32(buffer, uniform_count);
--    put_u32(buffer, sizeof(D3DXSHADER_CONSTANTTABLE)); /* offset of constants */
-+    vars_offset = put_u32(buffer, 0);
-     put_u32(buffer, 0); /* FIXME: flags */
-     put_u32(buffer, 0); /* FIXME: target string */
-     vars_start = bytecode_align(buffer);
-+    set_u32(buffer, vars_offset, vars_start - ctab_start);
-     LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry)
-     {
-@@ -1825,8 +1870,10 @@ void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buff
-                         switch (comp_type->e.numeric.type)
-                         {
-                             case HLSL_TYPE_DOUBLE:
--                                hlsl_fixme(ctx, &var->loc, "Write double default values.");
--                                uni.u = 0;
-+                                if (ctx->double_as_float_alias)
-+                                    uni.u = var->default_values[k].number.u;
-+                                else
-+                                    uni.u = 0;
-                                 break;
-                             case HLSL_TYPE_INT:
-@@ -1860,24 +1907,24 @@ void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buff
-     set_u32(buffer, creator_offset, offset - ctab_start);
-     ctab_end = bytecode_align(buffer);
--    set_u32(buffer, size_offset, vkd3d_make_u32(D3DSIO_COMMENT, (ctab_end - ctab_offset) / sizeof(uint32_t)));
-+    set_u32(buffer, size_offset, vkd3d_make_u32(VKD3D_SM1_OP_COMMENT, (ctab_end - ctab_offset) / sizeof(uint32_t)));
- }
- static uint32_t sm1_encode_register_type(enum vkd3d_shader_register_type type)
- {
--    return ((type << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK)
--            | ((type << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2);
-+            | ((type << VKD3D_SM1_REGISTER_TYPE_SHIFT2) & VKD3D_SM1_REGISTER_TYPE_MASK2);
- }
- struct sm1_instruction
- {
-+    enum vkd3d_sm1_opcode opcode;
-     unsigned int flags;
-     struct sm1_dst_register
-     {
-         enum vkd3d_shader_register_type type;
-+        enum vkd3d_shader_dst_modifier mod;
-         unsigned int writemask;
-         uint32_t reg;
-     } dst;
-@@ -1885,7 +1932,7 @@ struct sm1_instruction
-     struct sm1_src_register
-     {
-         enum vkd3d_shader_register_type type;
-+        enum vkd3d_shader_src_modifier mod;
-         unsigned int swizzle;
-         uint32_t reg;
-     } srcs[4];
-@@ -1900,11 +1947,11 @@ static bool is_inconsequential_instr(const struct sm1_instruction *instr)
-     const struct sm1_dst_register *dst = &instr->dst;
-     unsigned int i;
--    if (instr->opcode != D3DSIO_MOV)
-+    if (instr->opcode != VKD3D_SM1_OP_MOV)
-         return false;
--    if (dst->mod != D3DSPDM_NONE)
-+    if (dst->mod != VKD3DSPDM_NONE)
-         return false;
--    if (src->mod != D3DSPSM_NONE)
-+    if (src->mod != VKD3DSPSM_NONE)
-         return false;
-     if (src->type != dst->type)
-         return false;
-@@ -1923,13 +1970,19 @@ static bool is_inconsequential_instr(const struct sm1_instruction *instr)
- static void write_sm1_dst_register(struct vkd3d_bytecode_buffer *buffer, const struct sm1_dst_register *reg)
- {
-     VKD3D_ASSERT(reg->writemask);
--    put_u32(buffer, (1u << 31) | sm1_encode_register_type(reg->type) | reg->mod | (reg->writemask << 16) | reg->reg);
-+    put_u32(buffer, VKD3D_SM1_INSTRUCTION_PARAMETER
-+            | sm1_encode_register_type(reg->type)
-+            | (reg->mod << VKD3D_SM1_DST_MODIFIER_SHIFT)
-+            | (reg->writemask << VKD3D_SM1_WRITEMASK_SHIFT) | reg->reg);
- }
- static void write_sm1_src_register(struct vkd3d_bytecode_buffer *buffer,
-         const struct sm1_src_register *reg)
- {
--    put_u32(buffer, (1u << 31) | sm1_encode_register_type(reg->type) | reg->mod | (reg->swizzle << 16) | reg->reg);
-+    put_u32(buffer, VKD3D_SM1_INSTRUCTION_PARAMETER
-+            | sm1_encode_register_type(reg->type)
-+            | (reg->mod << VKD3D_SM1_SRC_MODIFIER_SHIFT)
-+            | (reg->swizzle << VKD3D_SM1_SWIZZLE_SHIFT) | reg->reg);
- }
- static void d3dbc_write_instruction(struct d3dbc_compiler *d3dbc, const struct sm1_instruction *instr)
-@@ -1945,7 +1998,7 @@ static void d3dbc_write_instruction(struct d3dbc_compiler *d3dbc, const struct s
-     if (version->major > 1)
--        token |= (instr->has_dst + instr->src_count) << D3DSI_INSTLENGTH_SHIFT;
-+        token |= (instr->has_dst + instr->src_count) << VKD3D_SM1_INSTRUCTION_LENGTH_SHIFT;
-     put_u32(buffer, token);
-     if (instr->has_dst)
-@@ -1955,346 +2008,112 @@ static void d3dbc_write_instruction(struct d3dbc_compiler *d3dbc, const struct s
-         write_sm1_src_register(buffer, &instr->srcs[i]);
- };
--static void sm1_map_src_swizzle(struct sm1_src_register *src, unsigned int map_writemask)
--    src->swizzle = hlsl_map_swizzle(src->swizzle, map_writemask);
--static void d3dbc_write_dp2add(struct d3dbc_compiler *d3dbc, const struct hlsl_reg *dst,
--        const struct hlsl_reg *src1, const struct hlsl_reg *src2, const struct hlsl_reg *src3)
-+static const struct vkd3d_sm1_opcode_info *shader_sm1_get_opcode_info_from_vsir(
-+        struct d3dbc_compiler *d3dbc, enum vkd3d_shader_opcode vkd3d_opcode)
- {
--    struct sm1_instruction instr =
--    {
--        .opcode = D3DSIO_DP2ADD,
--        .dst.type = VKD3DSPR_TEMP,
--        .dst.writemask = dst->writemask,
--        .dst.reg = dst->id,
--        .has_dst = 1,
--        .srcs[0].type = VKD3DSPR_TEMP,
--        .srcs[0].swizzle = hlsl_swizzle_from_writemask(src1->writemask),
--        .srcs[0].reg = src1->id,
--        .srcs[1].type = VKD3DSPR_TEMP,
--        .srcs[1].swizzle = hlsl_swizzle_from_writemask(src2->writemask),
--        .srcs[1].reg = src2->id,
--        .srcs[2].type = VKD3DSPR_TEMP,
--        .srcs[2].swizzle = hlsl_swizzle_from_writemask(src3->writemask),
--        .srcs[2].reg = src3->id,
--        .src_count = 3,
--    };
--    d3dbc_write_instruction(d3dbc, &instr);
--static void d3dbc_write_ternary_op(struct d3dbc_compiler *d3dbc,
--        D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, const struct hlsl_reg *dst,
--        const struct hlsl_reg *src1, const struct hlsl_reg *src2, const struct hlsl_reg *src3)
--    struct sm1_instruction instr =
--    {
--        .opcode = opcode,
--        .dst.type = VKD3DSPR_TEMP,
--        .dst.writemask = dst->writemask,
--        .dst.reg = dst->id,
--        .has_dst = 1,
--        .srcs[0].type = VKD3DSPR_TEMP,
--        .srcs[0].swizzle = hlsl_swizzle_from_writemask(src1->writemask),
--        .srcs[0].reg = src1->id,
--        .srcs[1].type = VKD3DSPR_TEMP,
--        .srcs[1].swizzle = hlsl_swizzle_from_writemask(src2->writemask),
--        .srcs[1].reg = src2->id,
--        .srcs[2].type = VKD3DSPR_TEMP,
--        .srcs[2].swizzle = hlsl_swizzle_from_writemask(src3->writemask),
--        .srcs[2].reg = src3->id,
--        .src_count = 3,
--    };
--    sm1_map_src_swizzle(&instr.srcs[0], instr.dst.writemask);
--    sm1_map_src_swizzle(&instr.srcs[1], instr.dst.writemask);
--    sm1_map_src_swizzle(&instr.srcs[2], instr.dst.writemask);
--    d3dbc_write_instruction(d3dbc, &instr);
--static void d3dbc_write_binary_op(struct d3dbc_compiler *d3dbc, D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode,
--        const struct hlsl_reg *dst, const struct hlsl_reg *src1, const struct hlsl_reg *src2)
--    struct sm1_instruction instr =
--    {
--        .opcode = opcode,
--        .dst.type = VKD3DSPR_TEMP,
--        .dst.writemask = dst->writemask,
--        .dst.reg = dst->id,
--        .has_dst = 1,
--        .srcs[0].type = VKD3DSPR_TEMP,
--        .srcs[0].swizzle = hlsl_swizzle_from_writemask(src1->writemask),
--        .srcs[0].reg = src1->id,
--        .srcs[1].type = VKD3DSPR_TEMP,
--        .srcs[1].swizzle = hlsl_swizzle_from_writemask(src2->writemask),
--        .srcs[1].reg = src2->id,
--        .src_count = 2,
--    };
--    sm1_map_src_swizzle(&instr.srcs[0], instr.dst.writemask);
--    sm1_map_src_swizzle(&instr.srcs[1], instr.dst.writemask);
--    d3dbc_write_instruction(d3dbc, &instr);
-+    const struct vkd3d_shader_version *version = &d3dbc->program->shader_version;
-+    const struct vkd3d_sm1_opcode_info *info;
-+    unsigned int i = 0;
--static void d3dbc_write_dot(struct d3dbc_compiler *d3dbc, D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode,
--        const struct hlsl_reg *dst, const struct hlsl_reg *src1, const struct hlsl_reg *src2)
--    struct sm1_instruction instr =
-+    for (;;)
-     {
--        .opcode = opcode,
--        .dst.type = VKD3DSPR_TEMP,
--        .dst.writemask = dst->writemask,
--        .dst.reg = dst->id,
--        .has_dst = 1,
--        .srcs[0].type = VKD3DSPR_TEMP,
--        .srcs[0].swizzle = hlsl_swizzle_from_writemask(src1->writemask),
--        .srcs[0].reg = src1->id,
--        .srcs[1].type = VKD3DSPR_TEMP,
--        .srcs[1].swizzle = hlsl_swizzle_from_writemask(src2->writemask),
--        .srcs[1].reg = src2->id,
--        .src_count = 2,
--    };
-+        info = &d3dbc->opcode_table[i++];
-+        if (info->vkd3d_opcode == VKD3DSIH_INVALID)
-+            return NULL;
--    d3dbc_write_instruction(d3dbc, &instr);
-+        if (vkd3d_opcode == info->vkd3d_opcode
-+                && vkd3d_shader_ver_ge(version, info->min_version.major, info->min_version.minor)
-+                && (vkd3d_shader_ver_le(version, info->max_version.major, info->max_version.minor)
-+                        || !info->max_version.major))
-+            return info;
-+    }
- }
--static void d3dbc_write_unary_op(struct d3dbc_compiler *d3dbc, D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode,
--        const struct hlsl_reg *dst, const struct hlsl_reg *src,
-+static uint32_t swizzle_from_vsir(uint32_t swizzle)
- {
--    struct sm1_instruction instr =
--    {
--        .opcode = opcode,
-+    uint32_t x = vsir_swizzle_get_component(swizzle, 0);
-+    uint32_t y = vsir_swizzle_get_component(swizzle, 1);
-+    uint32_t z = vsir_swizzle_get_component(swizzle, 2);
-+    uint32_t w = vsir_swizzle_get_component(swizzle, 3);
--        .dst.type = VKD3DSPR_TEMP,
--        .dst.mod = dst_mod,
--        .dst.writemask = dst->writemask,
--        .dst.reg = dst->id,
--        .has_dst = 1,
-+    if (x & ~0x3u || y & ~0x3u || z & ~0x3u || w & ~0x3u)
-+        ERR("Unexpected vsir swizzle: 0x%08x.\n", swizzle);
--        .srcs[0].type = VKD3DSPR_TEMP,
--        .srcs[0].swizzle = hlsl_swizzle_from_writemask(src->writemask),
--        .srcs[0].reg = src->id,
--        .srcs[0].mod = src_mod,
--        .src_count = 1,
--    };
--    sm1_map_src_swizzle(&instr.srcs[0], instr.dst.writemask);
--    d3dbc_write_instruction(d3dbc, &instr);
-+    return ((x & 0x3u) << VKD3D_SM1_SWIZZLE_COMPONENT_SHIFT(0))
-+            | ((y & 0x3) << VKD3D_SM1_SWIZZLE_COMPONENT_SHIFT(1))
-+            | ((z & 0x3) << VKD3D_SM1_SWIZZLE_COMPONENT_SHIFT(2))
-+            | ((w & 0x3) << VKD3D_SM1_SWIZZLE_COMPONENT_SHIFT(3));
- }
--static void d3dbc_write_cast(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr)
-+static void sm1_src_reg_from_vsir(struct d3dbc_compiler *d3dbc, const struct vkd3d_shader_src_param *param,
-+        struct sm1_src_register *src, const struct vkd3d_shader_location *loc)
- {
--    struct hlsl_ir_expr *expr = hlsl_ir_expr(instr);
--    const struct hlsl_ir_node *arg1 = expr->operands[0].node;
--    const struct hlsl_type *dst_type = expr->node.data_type;
--    const struct hlsl_type *src_type = arg1->data_type;
--    struct hlsl_ctx *ctx = d3dbc->ctx;
-+    src->mod = param->modifiers;
-+    src->reg = param->reg.idx[0].offset;
-+    src->type = param->reg.type;
-+    src->swizzle = swizzle_from_vsir(param->swizzle);
--    /* Narrowing casts were already lowered. */
--    VKD3D_ASSERT(src_type->dimx == dst_type->dimx);
--    switch (dst_type->e.numeric.type)
-+    if (param->reg.idx[0].rel_addr)
-     {
--        case HLSL_TYPE_HALF:
--        case HLSL_TYPE_FLOAT:
--            switch (src_type->e.numeric.type)
--            {
--                case HLSL_TYPE_INT:
--                case HLSL_TYPE_UINT:
--                case HLSL_TYPE_BOOL:
--                    /* Integrals are internally represented as floats, so no change is necessary.*/
--                case HLSL_TYPE_HALF:
--                case HLSL_TYPE_FLOAT:
--                    d3dbc_write_unary_op(d3dbc, D3DSIO_MOV, &instr->reg, &arg1->reg, 0, 0);
--                    break;
--                case HLSL_TYPE_DOUBLE:
--                    hlsl_fixme(ctx, &instr->loc, "SM1 cast from double to float.");
--                    break;
--                default:
--                    vkd3d_unreachable();
--            }
--            break;
--        case HLSL_TYPE_INT:
--        case HLSL_TYPE_UINT:
--            switch(src_type->e.numeric.type)
--            {
--                case HLSL_TYPE_HALF:
--                case HLSL_TYPE_FLOAT:
--                    /* A compilation pass turns these into FLOOR+REINTERPRET, so we should not
--                     * reach this case unless we are missing something. */
--                    hlsl_fixme(ctx, &instr->loc, "Unlowered SM1 cast from float to integer.");
--                    break;
--                case HLSL_TYPE_INT:
--                case HLSL_TYPE_UINT:
--                    d3dbc_write_unary_op(d3dbc, D3DSIO_MOV, &instr->reg, &arg1->reg, 0, 0);
--                    break;
--                case HLSL_TYPE_BOOL:
--                    hlsl_fixme(ctx, &instr->loc, "SM1 cast from bool to integer.");
--                    break;
--                case HLSL_TYPE_DOUBLE:
--                    hlsl_fixme(ctx, &instr->loc, "SM1 cast from double to integer.");
--                    break;
--                default:
--                    vkd3d_unreachable();
--            }
--            break;
--        case HLSL_TYPE_DOUBLE:
--            hlsl_fixme(ctx, &instr->loc, "SM1 cast to double.");
--            break;
--        case HLSL_TYPE_BOOL:
--            /* Casts to bool should have already been lowered. */
--        default:
--            hlsl_fixme(ctx, &expr->node.loc, "SM1 cast from %s to %s.",
--                debug_hlsl_type(ctx, src_type), debug_hlsl_type(ctx, dst_type));
--            break;
-+        vkd3d_shader_error(d3dbc->message_context, loc, VKD3D_SHADER_ERROR_D3DBC_NOT_IMPLEMENTED,
-+                "Unhandled relative addressing on source register.");
-+        d3dbc->failed = true;
-     }
- }
--static void d3dbc_write_constant_defs(struct d3dbc_compiler *d3dbc)
-+static void sm1_dst_reg_from_vsir(struct d3dbc_compiler *d3dbc, const struct vkd3d_shader_dst_param *param,
-+        struct sm1_dst_register *dst, const struct vkd3d_shader_location *loc)
- {
--    const struct vkd3d_shader_version *version = &d3dbc->program->shader_version;
--    struct vkd3d_bytecode_buffer *buffer = &d3dbc->buffer;
--    struct hlsl_ctx *ctx = d3dbc->ctx;
--    unsigned int i, x;
-+    dst->mod = param->modifiers;
-+    dst->reg = param->reg.idx[0].offset;
-+    dst->type = param->reg.type;
-+    dst->writemask = param->write_mask;
--    for (i = 0; i < ctx->constant_defs.count; ++i)
-+    if (param->reg.idx[0].rel_addr)
-     {
--        const struct hlsl_constant_register *constant_reg = &ctx->constant_defs.regs[i];
--        uint32_t token = D3DSIO_DEF;
--        const struct sm1_dst_register reg =
--        {
--            .type = VKD3DSPR_CONST,
--            .writemask = VKD3DSP_WRITEMASK_ALL,
--            .reg = constant_reg->index,
--        };
--        if (version->major > 1)
--            token |= 5 << D3DSI_INSTLENGTH_SHIFT;
--        put_u32(buffer, token);
--        write_sm1_dst_register(buffer, &reg);
--        for (x = 0; x < 4; ++x)
--            put_f32(buffer, constant_reg->value.f[x]);
-+        vkd3d_shader_error(d3dbc->message_context, loc, VKD3D_SHADER_ERROR_D3DBC_NOT_IMPLEMENTED,
-+                "Unhandled relative addressing on destination register.");
-+        d3dbc->failed = true;
-     }
- }
--static void d3dbc_write_semantic_dcl(struct d3dbc_compiler *d3dbc,
--        const struct signature_element *element, bool output)
-+static void d3dbc_write_vsir_def(struct d3dbc_compiler *d3dbc, const struct vkd3d_shader_instruction *ins)
- {
-     const struct vkd3d_shader_version *version = &d3dbc->program->shader_version;
-     struct vkd3d_bytecode_buffer *buffer = &d3dbc->buffer;
--    struct sm1_dst_register reg = {0};
--    uint32_t token, usage_idx;
--    D3DDECLUSAGE usage;
--    bool ret;
-+    uint32_t token;
--    if (hlsl_sm1_register_from_semantic(version, element->semantic_name,
--            element->semantic_index, output, &reg.type, &reg.reg))
--    {
--        usage = 0;
--        usage_idx = 0;
--    }
--    else
-+    const struct sm1_dst_register reg =
-     {
--        ret = hlsl_sm1_usage_from_semantic(element->semantic_name, element->semantic_index, &usage, &usage_idx);
--        VKD3D_ASSERT(ret);
--        reg.type = output ? VKD3DSPR_OUTPUT : VKD3DSPR_INPUT;
--        reg.reg = element->register_index;
--    }
-+        .type = VKD3DSPR_CONST,
-+        .writemask = VKD3DSP_WRITEMASK_ALL,
-+        .reg = ins->dst[0].reg.idx[0].offset,
-+    };
--    token = D3DSIO_DCL;
-+    token = VKD3D_SM1_OP_DEF;
-     if (version->major > 1)
--        token |= 2 << D3DSI_INSTLENGTH_SHIFT;
-+        token |= 5 << VKD3D_SM1_INSTRUCTION_LENGTH_SHIFT;
-     put_u32(buffer, token);
--    token = (1u << 31);
--    token |= usage << D3DSP_DCL_USAGE_SHIFT;
--    token |= usage_idx << D3DSP_DCL_USAGEINDEX_SHIFT;
--    put_u32(buffer, token);
--    reg.writemask = element->mask;
-     write_sm1_dst_register(buffer, &reg);
-+    for (unsigned int x = 0; x < 4; ++x)
-+        put_f32(buffer, ins->src[0].reg.u.immconst_f32[x]);
- }
--static void d3dbc_write_semantic_dcls(struct d3dbc_compiler *d3dbc)
--    struct vsir_program *program = d3dbc->program;
--    const struct vkd3d_shader_version *version;
--    bool write_in = false, write_out = false;
--    version = &program->shader_version;
--    if (version->type == VKD3D_SHADER_TYPE_PIXEL && version->major >= 2)
--        write_in = true;
--    else if (version->type == VKD3D_SHADER_TYPE_VERTEX && version->major == 3)
--        write_in = write_out = true;
--    else if (version->type == VKD3D_SHADER_TYPE_VERTEX && version->major < 3)
--        write_in = true;
--    if (write_in)
--    {
--        for (unsigned int i = 0; i < program->input_signature.element_count; ++i)
--            d3dbc_write_semantic_dcl(d3dbc, &program->input_signature.elements[i], false);
--    }
--    if (write_out)
--    {
--        for (unsigned int i = 0; i < program->output_signature.element_count; ++i)
--            d3dbc_write_semantic_dcl(d3dbc, &program->output_signature.elements[i], true);
--    }
--static void d3dbc_write_sampler_dcl(struct d3dbc_compiler *d3dbc,
--        unsigned int reg_id, enum hlsl_sampler_dim sampler_dim)
-+static void d3dbc_write_vsir_sampler_dcl(struct d3dbc_compiler *d3dbc,
-+        unsigned int reg_id, enum vkd3d_sm1_resource_type res_type)
- {
-     const struct vkd3d_shader_version *version = &d3dbc->program->shader_version;
-     struct vkd3d_bytecode_buffer *buffer = &d3dbc->buffer;
-     struct sm1_dst_register reg = {0};
--    uint32_t token, res_type = 0;
-+    uint32_t token;
--    token = D3DSIO_DCL;
-+    token = VKD3D_SM1_OP_DCL;
-     if (version->major > 1)
--        token |= 2 << D3DSI_INSTLENGTH_SHIFT;
-+        token |= 2 << VKD3D_SM1_INSTRUCTION_LENGTH_SHIFT;
-     put_u32(buffer, token);
--    switch (sampler_dim)
--    {
--        case HLSL_SAMPLER_DIM_2D:
--            res_type = VKD3D_SM1_RESOURCE_TEXTURE_2D;
--            break;
--        case HLSL_SAMPLER_DIM_CUBE:
--            res_type = VKD3D_SM1_RESOURCE_TEXTURE_CUBE;
--            break;
--        case HLSL_SAMPLER_DIM_3D:
--            res_type = VKD3D_SM1_RESOURCE_TEXTURE_3D;
--            break;
--        default:
--            vkd3d_unreachable();
--            break;
--    }
--    token = (1u << 31);
-     token |= res_type << VKD3D_SM1_RESOURCE_TYPE_SHIFT;
-     put_u32(buffer, token);
-@@ -2305,618 +2124,283 @@ static void d3dbc_write_sampler_dcl(struct d3dbc_compiler *d3dbc,
-     write_sm1_dst_register(buffer, &reg);
- }
--static void d3dbc_write_sampler_dcls(struct d3dbc_compiler *d3dbc)
-+static void d3dbc_write_vsir_dcl(struct d3dbc_compiler *d3dbc, const struct vkd3d_shader_instruction *ins)
- {
-     const struct vkd3d_shader_version *version = &d3dbc->program->shader_version;
--    struct hlsl_ctx *ctx = d3dbc->ctx;
--    enum hlsl_sampler_dim sampler_dim;
--    unsigned int i, count, reg_id;
--    struct hlsl_ir_var *var;
-+    const struct vkd3d_shader_semantic *semantic = &ins->declaration.semantic;
-+    unsigned int reg_id;
-     if (version->major < 2)
-         return;
--    LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry)
--    {
--        if (!var->regs[HLSL_REGSET_SAMPLERS].allocated)
--            continue;
--        count = var->bind_count[HLSL_REGSET_SAMPLERS];
--        for (i = 0; i < count; ++i)
--        {
--            if (var->objects_usage[HLSL_REGSET_SAMPLERS][i].used)
--            {
--                sampler_dim = var->objects_usage[HLSL_REGSET_SAMPLERS][i].sampler_dim;
--                if (sampler_dim == HLSL_SAMPLER_DIM_GENERIC)
--                {
--                    /* These can appear in sm4-style combined sample instructions. */
--                    hlsl_fixme(ctx, &var->loc, "Generic samplers need to be lowered.");
--                    continue;
--                }
--                reg_id = var->regs[HLSL_REGSET_SAMPLERS].index + i;
--                d3dbc_write_sampler_dcl(d3dbc, reg_id, sampler_dim);
--            }
--        }
--    }
-+    reg_id = semantic->resource.reg.reg.idx[0].offset;
--static void d3dbc_write_constant(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr)
--    const struct hlsl_ir_constant *constant = hlsl_ir_constant(instr);
--    struct sm1_instruction sm1_instr =
-+    if (semantic->resource.reg.reg.type != VKD3DSPR_SAMPLER)
-     {
--        .opcode = D3DSIO_MOV,
--        .dst.type = VKD3DSPR_TEMP,
--        .dst.reg = instr->reg.id,
--        .dst.writemask = instr->reg.writemask,
--        .has_dst = 1,
--        .srcs[0].type = VKD3DSPR_CONST,
--        .srcs[0].reg = constant->reg.id,
--        .srcs[0].swizzle = hlsl_swizzle_from_writemask(constant->reg.writemask),
--        .src_count = 1,
--    };
--    VKD3D_ASSERT(instr->reg.allocated);
--    VKD3D_ASSERT(constant->reg.allocated);
--    sm1_map_src_swizzle(&sm1_instr.srcs[0], sm1_instr.dst.writemask);
--    d3dbc_write_instruction(d3dbc, &sm1_instr);
--static void d3dbc_write_per_component_unary_op(struct d3dbc_compiler *d3dbc,
--        const struct hlsl_ir_node *instr, D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode)
--    struct hlsl_ir_expr *expr = hlsl_ir_expr(instr);
--    struct hlsl_ir_node *arg1 = expr->operands[0].node;
--    unsigned int i;
--    for (i = 0; i < instr->data_type->dimx; ++i)
--    {
--        struct hlsl_reg src = arg1->reg, dst = instr->reg;
--        src.writemask = hlsl_combine_writemasks(src.writemask, 1u << i);
--        dst.writemask = hlsl_combine_writemasks(dst.writemask, 1u << i);
--        d3dbc_write_unary_op(d3dbc, opcode, &dst, &src, 0, 0);
--    }
--static void d3dbc_write_sincos(struct d3dbc_compiler *d3dbc, enum hlsl_ir_expr_op op,
--        const struct hlsl_reg *dst, const struct hlsl_reg *src)
--    struct sm1_instruction instr =
--    {
--        .opcode = D3DSIO_SINCOS,
--        .dst.type = VKD3DSPR_TEMP,
--        .dst.writemask = dst->writemask,
--        .dst.reg = dst->id,
--        .has_dst = 1,
--        .srcs[0].type = VKD3DSPR_TEMP,
--        .srcs[0].swizzle = hlsl_swizzle_from_writemask(src->writemask),
--        .srcs[0].reg = src->id,
--        .src_count = 1,
--    };
--    if (op == HLSL_OP1_COS_REDUCED)
--        VKD3D_ASSERT(dst->writemask == VKD3DSP_WRITEMASK_0);
--    else /* HLSL_OP1_SIN_REDUCED */
--        VKD3D_ASSERT(dst->writemask == VKD3DSP_WRITEMASK_1);
--    if (d3dbc->ctx->profile->major_version < 3)
--    {
--        instr.src_count = 3;
--        instr.srcs[1].type = VKD3DSPR_CONST;
--        instr.srcs[1].swizzle = hlsl_swizzle_from_writemask(VKD3DSP_WRITEMASK_ALL);
--        instr.srcs[1].reg = d3dbc->ctx->d3dsincosconst1.id;
--        instr.srcs[2].type = VKD3DSPR_CONST;
--        instr.srcs[2].swizzle = hlsl_swizzle_from_writemask(VKD3DSP_WRITEMASK_ALL);
--        instr.srcs[2].reg = d3dbc->ctx->d3dsincosconst2.id;
--    }
--    d3dbc_write_instruction(d3dbc, &instr);
--static void d3dbc_write_expr(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr)
--    const struct vkd3d_shader_version *version = &d3dbc->program->shader_version;
--    struct hlsl_ir_expr *expr = hlsl_ir_expr(instr);
--    struct hlsl_ir_node *arg1 = expr->operands[0].node;
--    struct hlsl_ir_node *arg2 = expr->operands[1].node;
--    struct hlsl_ir_node *arg3 = expr->operands[2].node;
--    struct hlsl_ctx *ctx = d3dbc->ctx;
--    VKD3D_ASSERT(instr->reg.allocated);
--    if (expr->op == HLSL_OP1_REINTERPRET)
--    {
--        d3dbc_write_unary_op(d3dbc, D3DSIO_MOV, &instr->reg, &arg1->reg, 0, 0);
-+        vkd3d_shader_error(d3dbc->message_context, &ins->location, VKD3D_SHADER_ERROR_D3DBC_INVALID_REGISTER_TYPE,
-+                "dcl instruction with register type %u.", semantic->resource.reg.reg.type);
-+        d3dbc->failed = true;
-         return;
-     }
--    if (expr->op == HLSL_OP1_CAST)
-+    switch (semantic->resource_type)
-     {
--        d3dbc_write_cast(d3dbc, instr);
--        return;
--    }
--    if (instr->data_type->e.numeric.type != HLSL_TYPE_FLOAT)
--    {
--        /* These need to be lowered. */
--        hlsl_fixme(ctx, &instr->loc, "SM1 non-float expression.");
--        return;
--    }
--    switch (expr->op)
--    {
--        case HLSL_OP1_ABS:
--            d3dbc_write_unary_op(d3dbc, D3DSIO_ABS, &instr->reg, &arg1->reg, 0, 0);
-+            d3dbc_write_vsir_sampler_dcl(d3dbc, reg_id, VKD3D_SM1_RESOURCE_TEXTURE_2D);
-             break;
--        case HLSL_OP1_DSX:
--            d3dbc_write_unary_op(d3dbc, D3DSIO_DSX, &instr->reg, &arg1->reg, 0, 0);
-+            d3dbc_write_vsir_sampler_dcl(d3dbc, reg_id, VKD3D_SM1_RESOURCE_TEXTURE_CUBE);
-             break;
--        case HLSL_OP1_DSY:
--            d3dbc_write_unary_op(d3dbc, D3DSIO_DSY, &instr->reg, &arg1->reg, 0, 0);
--            break;
--        case HLSL_OP1_EXP2:
--            d3dbc_write_per_component_unary_op(d3dbc, instr, D3DSIO_EXP);
--            break;
--        case HLSL_OP1_LOG2:
--            d3dbc_write_per_component_unary_op(d3dbc, instr, D3DSIO_LOG);
--            break;
--        case HLSL_OP1_NEG:
--            d3dbc_write_unary_op(d3dbc, D3DSIO_MOV, &instr->reg, &arg1->reg, D3DSPSM_NEG, 0);
--            break;
--        case HLSL_OP1_SAT:
--            d3dbc_write_unary_op(d3dbc, D3DSIO_MOV, &instr->reg, &arg1->reg, 0, D3DSPDM_SATURATE);
--            break;
--        case HLSL_OP1_RCP:
--            d3dbc_write_per_component_unary_op(d3dbc, instr, D3DSIO_RCP);
--            break;
--        case HLSL_OP1_RSQ:
--            d3dbc_write_per_component_unary_op(d3dbc, instr, D3DSIO_RSQ);
--            break;
--        case HLSL_OP1_COS_REDUCED:
--        case HLSL_OP1_SIN_REDUCED:
--            d3dbc_write_sincos(d3dbc, expr->op, &instr->reg, &arg1->reg);
--            break;
--        case HLSL_OP2_ADD:
--            d3dbc_write_binary_op(d3dbc, D3DSIO_ADD, &instr->reg, &arg1->reg, &arg2->reg);
--            break;
--        case HLSL_OP2_MAX:
--            d3dbc_write_binary_op(d3dbc, D3DSIO_MAX, &instr->reg, &arg1->reg, &arg2->reg);
--            break;
--        case HLSL_OP2_MIN:
--            d3dbc_write_binary_op(d3dbc, D3DSIO_MIN, &instr->reg, &arg1->reg, &arg2->reg);
--            break;
--        case HLSL_OP2_MUL:
--            d3dbc_write_binary_op(d3dbc, D3DSIO_MUL, &instr->reg, &arg1->reg, &arg2->reg);
--            break;
--        case HLSL_OP1_FRACT:
--            d3dbc_write_unary_op(d3dbc, D3DSIO_FRC, &instr->reg, &arg1->reg, D3DSPSM_NONE, 0);
--            break;
--        case HLSL_OP2_DOT:
--            switch (arg1->data_type->dimx)
--            {
--                case 4:
--                    d3dbc_write_dot(d3dbc, D3DSIO_DP4, &instr->reg, &arg1->reg, &arg2->reg);
--                    break;
--                case 3:
--                    d3dbc_write_dot(d3dbc, D3DSIO_DP3, &instr->reg, &arg1->reg, &arg2->reg);
--                    break;
--                default:
--                    vkd3d_unreachable();
--            }
--            break;
--        case HLSL_OP2_LOGIC_AND:
--            d3dbc_write_binary_op(d3dbc, D3DSIO_MIN, &instr->reg, &arg1->reg, &arg2->reg);
--            break;
--        case HLSL_OP2_LOGIC_OR:
--            d3dbc_write_binary_op(d3dbc, D3DSIO_MAX, &instr->reg, &arg1->reg, &arg2->reg);
--            break;
--        case HLSL_OP2_SLT:
--            if (version->type == VKD3D_SHADER_TYPE_PIXEL)
--                hlsl_fixme(ctx, &instr->loc, "Lower SLT instructions for pixel shaders.");
--            d3dbc_write_binary_op(d3dbc, D3DSIO_SLT, &instr->reg, &arg1->reg, &arg2->reg);
--            break;
--        case HLSL_OP3_CMP:
--            if (version->type == VKD3D_SHADER_TYPE_VERTEX)
--                hlsl_fixme(ctx, &instr->loc, "Lower CMP instructions for vertex shaders.");
--            d3dbc_write_ternary_op(d3dbc, D3DSIO_CMP, &instr->reg, &arg1->reg, &arg2->reg, &arg3->reg);
--            break;
--        case HLSL_OP3_DP2ADD:
--            d3dbc_write_dp2add(d3dbc, &instr->reg, &arg1->reg, &arg2->reg, &arg3->reg);
--            break;
--        case HLSL_OP3_MAD:
--            d3dbc_write_ternary_op(d3dbc, D3DSIO_MAD, &instr->reg, &arg1->reg, &arg2->reg, &arg3->reg);
-+            d3dbc_write_vsir_sampler_dcl(d3dbc, reg_id, VKD3D_SM1_RESOURCE_TEXTURE_3D);
-             break;
-         default:
--            hlsl_fixme(ctx, &instr->loc, "SM1 \"%s\" expression.", debug_hlsl_expr_op(expr->op));
--            break;
--    }
--static void d3dbc_write_block(struct d3dbc_compiler *d3dbc, const struct hlsl_block *block);
--static void d3dbc_write_if(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr)
--    const struct hlsl_ir_if *iff = hlsl_ir_if(instr);
--    const struct hlsl_ir_node *condition;
--    struct sm1_instruction sm1_ifc, sm1_else, sm1_endif;
--    condition = iff->condition.node;
--    VKD3D_ASSERT(condition->data_type->dimx == 1 && condition->data_type->dimy == 1);
--    sm1_ifc = (struct sm1_instruction)
--    {
--        .opcode = D3DSIO_IFC,
--        .flags = VKD3D_SHADER_REL_OP_NE, /* Make it a "if_ne" instruction. */
--        .srcs[0].type = VKD3DSPR_TEMP,
--        .srcs[0].swizzle = hlsl_swizzle_from_writemask(condition->reg.writemask),
--        .srcs[0].reg = condition->reg.id,
--        .srcs[0].mod = 0,
--        .srcs[1].type = VKD3DSPR_TEMP,
--        .srcs[1].swizzle = hlsl_swizzle_from_writemask(condition->reg.writemask),
--        .srcs[1].reg = condition->reg.id,
--        .srcs[1].mod = D3DSPSM_NEG,
--        .src_count = 2,
--    };
--    d3dbc_write_instruction(d3dbc, &sm1_ifc);
--    d3dbc_write_block(d3dbc, &iff->then_block);
--    if (!list_empty(&iff->else_block.instrs))
--    {
--        sm1_else = (struct sm1_instruction){.opcode = D3DSIO_ELSE};
--        d3dbc_write_instruction(d3dbc, &sm1_else);
--        d3dbc_write_block(d3dbc, &iff->else_block);
-+            vkd3d_shader_error(d3dbc->message_context, &ins->location, VKD3D_SHADER_ERROR_D3DBC_INVALID_RESOURCE_TYPE,
-+                   "dcl instruction with resource_type %u.", semantic->resource_type);
-+            d3dbc->failed = true;
-+            return;
-     }
--    sm1_endif = (struct sm1_instruction){.opcode = D3DSIO_ENDIF};
--    d3dbc_write_instruction(d3dbc, &sm1_endif);
- }
--static void d3dbc_write_jump(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr)
-+static const struct vkd3d_sm1_opcode_info *shader_sm1_get_opcode_info_from_vsir_instruction(
-+        struct d3dbc_compiler *d3dbc, const struct vkd3d_shader_instruction *ins)
- {
--    const struct hlsl_ir_jump *jump = hlsl_ir_jump(instr);
-+    const struct vkd3d_sm1_opcode_info *info;
--    switch (jump->type)
-+    if (!(info = shader_sm1_get_opcode_info_from_vsir(d3dbc, ins->opcode)))
-     {
--        case HLSL_IR_JUMP_DISCARD_NEG:
--        {
--            struct hlsl_reg *reg = &jump->condition.node->reg;
--            struct sm1_instruction sm1_instr =
--            {
--                .opcode = D3DSIO_TEXKILL,
--                .dst.type = VKD3DSPR_TEMP,
--                .dst.reg = reg->id,
--                .dst.writemask = reg->writemask,
--                .has_dst = 1,
--            };
--            d3dbc_write_instruction(d3dbc, &sm1_instr);
--            break;
--        }
--        default:
--            hlsl_fixme(d3dbc->ctx, &jump->node.loc, "Jump type %s.", hlsl_jump_type_to_string(jump->type));
-+        vkd3d_shader_error(d3dbc->message_context, &ins->location, VKD3D_SHADER_ERROR_D3DBC_INVALID_OPCODE,
-+                "Opcode %#x not supported for shader profile.", ins->opcode);
-+        d3dbc->failed = true;
-+        return NULL;
-     }
--static void d3dbc_write_load(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr)
--    const struct hlsl_ir_load *load = hlsl_ir_load(instr);
--    struct hlsl_ctx *ctx = d3dbc->ctx;
--    const struct hlsl_reg reg = hlsl_reg_from_deref(ctx, &load->src);
--    struct sm1_instruction sm1_instr =
--    {
--        .opcode = D3DSIO_MOV,
--        .dst.type = VKD3DSPR_TEMP,
--        .dst.reg = instr->reg.id,
--        .dst.writemask = instr->reg.writemask,
--        .has_dst = 1,
--        .srcs[0].type = VKD3DSPR_TEMP,
--        .srcs[0].reg = reg.id,
--        .srcs[0].swizzle = hlsl_swizzle_from_writemask(reg.writemask),
--        .src_count = 1,
--    };
--    VKD3D_ASSERT(instr->reg.allocated);
--    if (load->src.var->is_uniform)
-+    if (ins->dst_count != info->dst_count)
-     {
--        VKD3D_ASSERT(reg.allocated);
--        sm1_instr.srcs[0].type = VKD3DSPR_CONST;
-+        vkd3d_shader_error(d3dbc->message_context, &ins->location, VKD3D_SHADER_ERROR_D3DBC_INVALID_REGISTER_COUNT,
-+                "Invalid destination count %u for vsir instruction %#x (expected %u).",
-+                ins->dst_count, ins->opcode, info->dst_count);
-+        d3dbc->failed = true;
-+        return NULL;
-     }
--    else if (load->src.var->is_input_semantic)
-+    if (ins->src_count != info->src_count)
-     {
--        if (!hlsl_sm1_register_from_semantic(&d3dbc->program->shader_version, load->src.var->semantic.name,
--                load->src.var->semantic.index, false, &sm1_instr.srcs[0].type, &sm1_instr.srcs[0].reg))
--        {
--            VKD3D_ASSERT(reg.allocated);
--            sm1_instr.srcs[0].type = VKD3DSPR_INPUT;
--            sm1_instr.srcs[0].reg = reg.id;
--        }
--        else
--            sm1_instr.srcs[0].swizzle = hlsl_swizzle_from_writemask((1 << load->src.var->data_type->dimx) - 1);
-+        vkd3d_shader_error(d3dbc->message_context, &ins->location, VKD3D_SHADER_ERROR_D3DBC_INVALID_REGISTER_COUNT,
-+                "Invalid source count %u for vsir instruction %#x (expected %u).",
-+                ins->src_count, ins->opcode, info->src_count);
-+        d3dbc->failed = true;
-+        return NULL;
-     }
--    sm1_map_src_swizzle(&sm1_instr.srcs[0], sm1_instr.dst.writemask);
--    d3dbc_write_instruction(d3dbc, &sm1_instr);
-+    return info;
- }
--static void d3dbc_write_resource_load(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr)
-+static void d3dbc_write_vsir_simple_instruction(struct d3dbc_compiler *d3dbc,
-+        const struct vkd3d_shader_instruction *ins)
- {
--    const struct hlsl_ir_resource_load *load = hlsl_ir_resource_load(instr);
--    struct hlsl_ir_node *coords = load->coords.node;
--    struct hlsl_ir_node *ddx = load->ddx.node;
--    struct hlsl_ir_node *ddy = load->ddy.node;
--    unsigned int sampler_offset, reg_id;
--    struct hlsl_ctx *ctx = d3dbc->ctx;
--    struct sm1_instruction sm1_instr;
-+    struct sm1_instruction instr = {0};
-+    const struct vkd3d_sm1_opcode_info *info;
--    sampler_offset = hlsl_offset_from_deref_safe(ctx, &load->resource);
--    reg_id = load->resource.var->regs[HLSL_REGSET_SAMPLERS].index + sampler_offset;
-+    if (!(info = shader_sm1_get_opcode_info_from_vsir_instruction(d3dbc, ins)))
-+        return;
--    sm1_instr = (struct sm1_instruction)
--    {
--        .dst.type = VKD3DSPR_TEMP,
--        .dst.reg = instr->reg.id,
--        .dst.writemask = instr->reg.writemask,
--        .has_dst = 1,
-+    instr.opcode = info->sm1_opcode;
-+    instr.flags = ins->flags;
-+    instr.has_dst = info->dst_count;
-+    instr.src_count = info->src_count;
--        .srcs[0].type = VKD3DSPR_TEMP,
--        .srcs[0].reg = coords->reg.id,
--        .srcs[0].swizzle = hlsl_swizzle_from_writemask(coords->reg.writemask),
-+    if (instr.has_dst)
-+        sm1_dst_reg_from_vsir(d3dbc, &ins->dst[0], &instr.dst, &ins->location);
-+    for (unsigned int i = 0; i < instr.src_count; ++i)
-+        sm1_src_reg_from_vsir(d3dbc, &ins->src[i], &instr.srcs[i], &ins->location);
--        .srcs[1].type = VKD3DSPR_COMBINED_SAMPLER,
--        .srcs[1].reg = reg_id,
--        .srcs[1].swizzle = hlsl_swizzle_from_writemask(VKD3DSP_WRITEMASK_ALL),
-+    d3dbc_write_instruction(d3dbc, &instr);
--        .src_count = 2,
--    };
-+static void d3dbc_write_vsir_instruction(struct d3dbc_compiler *d3dbc, const struct vkd3d_shader_instruction *ins)
-+    uint32_t writemask;
--    switch (load->load_type)
-+    switch (ins->opcode)
-     {
--        case HLSL_RESOURCE_SAMPLE:
--            sm1_instr.opcode = D3DSIO_TEX;
-+        case VKD3DSIH_DEF:
-+            d3dbc_write_vsir_def(d3dbc, ins);
-             break;
--            sm1_instr.opcode = D3DSIO_TEX;
--            sm1_instr.opcode |= VKD3DSI_TEXLD_PROJECT << VKD3D_SM1_INSTRUCTION_FLAGS_SHIFT;
-+        case VKD3DSIH_DCL:
-+            d3dbc_write_vsir_dcl(d3dbc, ins);
-             break;
--            sm1_instr.opcode = D3DSIO_TEX;
--            sm1_instr.opcode |= VKD3DSI_TEXLD_BIAS << VKD3D_SM1_INSTRUCTION_FLAGS_SHIFT;
-+        case VKD3DSIH_ABS:
-+        case VKD3DSIH_ADD:
-+        case VKD3DSIH_CMP:
-+        case VKD3DSIH_DP2ADD:
-+        case VKD3DSIH_DP3:
-+        case VKD3DSIH_DP4:
-+        case VKD3DSIH_DSX:
-+        case VKD3DSIH_DSY:
-+        case VKD3DSIH_ELSE:
-+        case VKD3DSIH_ENDIF:
-+        case VKD3DSIH_FRC:
-+        case VKD3DSIH_IFC:
-+        case VKD3DSIH_MAD:
-+        case VKD3DSIH_MAX:
-+        case VKD3DSIH_MIN:
-+        case VKD3DSIH_MOV:
-+        case VKD3DSIH_MUL:
-+        case VKD3DSIH_SINCOS:
-+        case VKD3DSIH_SLT:
-+        case VKD3DSIH_TEX:
-+        case VKD3DSIH_TEXKILL:
-+        case VKD3DSIH_TEXLDD:
-+            d3dbc_write_vsir_simple_instruction(d3dbc, ins);
-             break;
--            sm1_instr.opcode = D3DSIO_TEXLDD;
--            sm1_instr.srcs[2].type = VKD3DSPR_TEMP;
--            sm1_instr.srcs[2].reg = ddx->reg.id;
--            sm1_instr.srcs[2].swizzle = hlsl_swizzle_from_writemask(ddx->reg.writemask);
--            sm1_instr.srcs[3].type = VKD3DSPR_TEMP;
--            sm1_instr.srcs[3].reg = ddy->reg.id;
--            sm1_instr.srcs[3].swizzle = hlsl_swizzle_from_writemask(ddy->reg.writemask);
--            sm1_instr.src_count += 2;
-+        case VKD3DSIH_EXP:
-+        case VKD3DSIH_LOG:
-+        case VKD3DSIH_RCP:
-+        case VKD3DSIH_RSQ:
-+            writemask = ins->dst->write_mask;
-+            if (writemask != VKD3DSP_WRITEMASK_0 && writemask != VKD3DSP_WRITEMASK_1
-+                    && writemask != VKD3DSP_WRITEMASK_2 && writemask != VKD3DSP_WRITEMASK_3)
-+            {
-+                vkd3d_shader_error(d3dbc->message_context, &ins->location,
-+                        "writemask %#x for vsir instruction with opcode %#x is not single component.",
-+                        writemask, ins->opcode);
-+                d3dbc->failed = true;
-+            }
-+            d3dbc_write_vsir_simple_instruction(d3dbc, ins);
-             break;
-         default:
--            hlsl_fixme(ctx, &instr->loc, "Resource load type %u.", load->load_type);
--            return;
-+            vkd3d_shader_error(d3dbc->message_context, &ins->location, VKD3D_SHADER_ERROR_D3DBC_INVALID_OPCODE,
-+                   "vsir instruction with opcode %#x.", ins->opcode);
-+            d3dbc->failed = true;
-+            break;
-     }
--    VKD3D_ASSERT(instr->reg.allocated);
--    d3dbc_write_instruction(d3dbc, &sm1_instr);
- }
--static void d3dbc_write_store(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr)
-+static void d3dbc_write_semantic_dcl(struct d3dbc_compiler *d3dbc,
-+        const struct signature_element *element, bool output)
- {
-     const struct vkd3d_shader_version *version = &d3dbc->program->shader_version;
--    const struct hlsl_ir_store *store = hlsl_ir_store(instr);
--    struct hlsl_ctx *ctx = d3dbc->ctx;
--    const struct hlsl_reg reg = hlsl_reg_from_deref(ctx, &store->lhs);
--    const struct hlsl_ir_node *rhs = store->rhs.node;
--    struct sm1_instruction sm1_instr =
--    {
--        .opcode = D3DSIO_MOV,
--        .dst.type = VKD3DSPR_TEMP,
--        .dst.reg = reg.id,
--        .dst.writemask = hlsl_combine_writemasks(reg.writemask, store->writemask),
--        .has_dst = 1,
--        .srcs[0].type = VKD3DSPR_TEMP,
--        .srcs[0].reg = rhs->reg.id,
--        .srcs[0].swizzle = hlsl_swizzle_from_writemask(rhs->reg.writemask),
--        .src_count = 1,
--    };
-+    struct vkd3d_bytecode_buffer *buffer = &d3dbc->buffer;
-+    struct sm1_dst_register reg = {0};
-+    enum vkd3d_decl_usage usage;
-+    uint32_t token, usage_idx;
-+    bool ret;
--    if (store->lhs.var->is_output_semantic)
-+    if (sm1_register_from_semantic_name(version, element->semantic_name,
-+            element->semantic_index, output, &reg.type, &reg.reg))
-     {
--        if (version->type == VKD3D_SHADER_TYPE_PIXEL && version->major == 1)
--        {
--            sm1_instr.dst.type = VKD3DSPR_TEMP;
--            sm1_instr.dst.reg = 0;
--        }
--        else if (!hlsl_sm1_register_from_semantic(&d3dbc->program->shader_version, store->lhs.var->semantic.name,
--                store->lhs.var->semantic.index, true, &sm1_instr.dst.type, &sm1_instr.dst.reg))
--        {
--            VKD3D_ASSERT(reg.allocated);
--            sm1_instr.dst.type = VKD3DSPR_OUTPUT;
--            sm1_instr.dst.reg = reg.id;
--        }
--        else
--            sm1_instr.dst.writemask = (1u << store->lhs.var->data_type->dimx) - 1;
-+        usage = 0;
-+        usage_idx = 0;
-     }
-     else
--        VKD3D_ASSERT(reg.allocated);
--    sm1_map_src_swizzle(&sm1_instr.srcs[0], sm1_instr.dst.writemask);
--    d3dbc_write_instruction(d3dbc, &sm1_instr);
--static void d3dbc_write_swizzle(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr)
--    const struct hlsl_ir_swizzle *swizzle = hlsl_ir_swizzle(instr);
--    const struct hlsl_ir_node *val = swizzle->val.node;
--    struct sm1_instruction sm1_instr =
-     {
--        .opcode = D3DSIO_MOV,
-+        ret = sm1_usage_from_semantic_name(element->semantic_name, element->semantic_index, &usage, &usage_idx);
-+        VKD3D_ASSERT(ret);
-+        reg.type = output ? VKD3DSPR_OUTPUT : VKD3DSPR_INPUT;
-+        reg.reg = element->register_index;
-+    }
--        .dst.type = VKD3DSPR_TEMP,
--        .dst.reg = instr->reg.id,
--        .dst.writemask = instr->reg.writemask,
--        .has_dst = 1,
-+    token = VKD3D_SM1_OP_DCL;
-+    if (version->major > 1)
-+        token |= 2 << VKD3D_SM1_INSTRUCTION_LENGTH_SHIFT;
-+    put_u32(buffer, token);
--        .srcs[0].type = VKD3DSPR_TEMP,
--        .srcs[0].reg = val->reg.id,
--        .srcs[0].swizzle = hlsl_combine_swizzles(hlsl_swizzle_from_writemask(val->reg.writemask),
--                swizzle->swizzle, instr->data_type->dimx),
--        .src_count = 1,
--    };
-+    token = (1u << 31);
-+    token |= usage << VKD3D_SM1_DCL_USAGE_SHIFT;
-+    token |= usage_idx << VKD3D_SM1_DCL_USAGE_INDEX_SHIFT;
-+    put_u32(buffer, token);
--    VKD3D_ASSERT(instr->reg.allocated);
--    VKD3D_ASSERT(val->reg.allocated);
--    sm1_map_src_swizzle(&sm1_instr.srcs[0], sm1_instr.dst.writemask);
--    d3dbc_write_instruction(d3dbc, &sm1_instr);
-+    reg.writemask = element->mask;
-+    write_sm1_dst_register(buffer, &reg);
- }
--static void d3dbc_write_block(struct d3dbc_compiler *d3dbc, const struct hlsl_block *block)
-+static void d3dbc_write_semantic_dcls(struct d3dbc_compiler *d3dbc)
- {
--    struct hlsl_ctx *ctx = d3dbc->ctx;
--    const struct hlsl_ir_node *instr;
--    LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry)
--    {
--        if (instr->data_type)
--        {
--            if (instr->data_type->class != HLSL_CLASS_SCALAR && instr->data_type->class != HLSL_CLASS_VECTOR)
--            {
--                hlsl_fixme(ctx, &instr->loc, "Class %#x should have been lowered or removed.", instr->data_type->class);
--                break;
--            }
--        }
--        switch (instr->type)
--        {
--            case HLSL_IR_CALL:
--                vkd3d_unreachable();
--            case HLSL_IR_CONSTANT:
--                d3dbc_write_constant(d3dbc, instr);
--                break;
--            case HLSL_IR_EXPR:
--                d3dbc_write_expr(d3dbc, instr);
--                break;
--            case HLSL_IR_IF:
--                if (hlsl_version_ge(ctx, 2, 1))
--                    d3dbc_write_if(d3dbc, instr);
--                else
--                    hlsl_fixme(ctx, &instr->loc, "Flatten \"if\" conditionals branches.");
--                break;
--            case HLSL_IR_JUMP:
--                d3dbc_write_jump(d3dbc, instr);
--                break;
-+    struct vsir_program *program = d3dbc->program;
-+    const struct vkd3d_shader_version *version;
-+    bool write_in = false, write_out = false;
--            case HLSL_IR_LOAD:
--                d3dbc_write_load(d3dbc, instr);
--                break;
-+    version = &program->shader_version;
-+    if (version->type == VKD3D_SHADER_TYPE_PIXEL && version->major >= 2)
-+        write_in = true;
-+    else if (version->type == VKD3D_SHADER_TYPE_VERTEX && version->major == 3)
-+        write_in = write_out = true;
-+    else if (version->type == VKD3D_SHADER_TYPE_VERTEX && version->major < 3)
-+        write_in = true;
--            case HLSL_IR_RESOURCE_LOAD:
--                d3dbc_write_resource_load(d3dbc, instr);
--                break;
-+    if (write_in)
-+    {
-+        for (unsigned int i = 0; i < program->input_signature.element_count; ++i)
-+            d3dbc_write_semantic_dcl(d3dbc, &program->input_signature.elements[i], false);
-+    }
--            case HLSL_IR_STORE:
--                d3dbc_write_store(d3dbc, instr);
--                break;
-+    if (write_out)
-+    {
-+        for (unsigned int i = 0; i < program->output_signature.element_count; ++i)
-+            d3dbc_write_semantic_dcl(d3dbc, &program->output_signature.elements[i], true);
-+    }
--            case HLSL_IR_SWIZZLE:
--                d3dbc_write_swizzle(d3dbc, instr);
--                break;
-+static void d3dbc_write_program_instructions(struct d3dbc_compiler *d3dbc)
-+    struct vsir_program *program = d3dbc->program;
-+    unsigned int i;
--            default:
--                hlsl_fixme(ctx, &instr->loc, "Instruction type %s.", hlsl_node_type_to_string(instr->type));
--        }
--    }
-+    for (i = 0; i < program->instructions.count; ++i)
-+        d3dbc_write_vsir_instruction(d3dbc, &program->instructions.elements[i]);
- }
--/* OBJECTIVE: Stop relying on ctx and entry_func on this function, receiving
-- * data from the other parameters instead, so it can be removed as an argument
-- * and be declared in vkd3d_shader_private.h and used without relying on HLSL
-- * IR structs. */
- int d3dbc_compile(struct vsir_program *program, uint64_t config_flags,
-         const struct vkd3d_shader_compile_info *compile_info, const struct vkd3d_shader_code *ctab,
--        struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context,
--        struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func)
-+        struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context)
- {
-     const struct vkd3d_shader_version *version = &program->shader_version;
-     struct d3dbc_compiler d3dbc = {0};
-     struct vkd3d_bytecode_buffer *buffer = &d3dbc.buffer;
-+    int result;
--    d3dbc.ctx = ctx;
-     d3dbc.program = program;
-     d3dbc.message_context = message_context;
-+    switch (version->type)
-+    {
-+        case VKD3D_SHADER_TYPE_VERTEX:
-+            d3dbc.opcode_table = vs_opcode_table;
-+            break;
-+        case VKD3D_SHADER_TYPE_PIXEL:
-+            d3dbc.opcode_table = ps_opcode_table;
-+            break;
-+        default:
-+            vkd3d_shader_error(message_context, NULL, VKD3D_SHADER_ERROR_D3DBC_INVALID_PROFILE,
-+                    "Invalid shader type %u.", version->type);
-+            return VKD3D_ERROR_INVALID_SHADER;
-+    }
-     put_u32(buffer, sm1_version(version->type, version->major, version->minor));
-     bytecode_put_bytes(buffer, ctab->code, ctab->size);
--    d3dbc_write_constant_defs(&d3dbc);
-     d3dbc_write_semantic_dcls(&d3dbc);
--    d3dbc_write_sampler_dcls(&d3dbc);
--    d3dbc_write_block(&d3dbc, &entry_func->body);
-+    d3dbc_write_program_instructions(&d3dbc);
--    put_u32(buffer, D3DSIO_END);
-+    put_u32(buffer, VKD3D_SM1_OP_END);
-+    result = VKD3D_OK;
-     if (buffer->status)
--        ctx->result = buffer->status;
-+        result = buffer->status;
-+    if (d3dbc.failed)
-+        result = VKD3D_ERROR_INVALID_SHADER;
--    if (!ctx->result)
-+    if (!result)
-     {
-         out->code = buffer->data;
-         out->size = buffer->size;
-@@ -2925,5 +2409,5 @@ int d3dbc_compile(struct vsir_program *program, uint64_t config_flags,
-     {
-         vkd3d_free(buffer->data);
-     }
--    return ctx->result;
-+    return result;
- }
-diff --git a/libs/vkd3d/libs/vkd3d-shader/dxbc.c b/libs/vkd3d/libs/vkd3d-shader/dxbc.c
-index 184788dc57e..f6ac8e0829e 100644
---- a/libs/vkd3d/libs/vkd3d-shader/dxbc.c
-+++ b/libs/vkd3d/libs/vkd3d-shader/dxbc.c
-@@ -20,6 +20,19 @@
- #include "vkd3d_shader_private.h"
-+static void compute_dxbc_checksum(const void *dxbc, size_t size, uint32_t checksum[4])
-+    const uint8_t *ptr = dxbc;
-+    vkd3d_compute_md5(ptr, size, checksum, VKD3D_MD5_DXBC);
- void dxbc_writer_init(struct dxbc_writer *dxbc)
- {
-     memset(dxbc, 0, sizeof(*dxbc));
-@@ -72,7 +85,7 @@ int vkd3d_shader_serialize_dxbc(size_t section_count, const struct vkd3d_shader_
-     }
-     set_u32(&buffer, size_position, bytecode_get_size(&buffer));
--    vkd3d_compute_dxbc_checksum(buffer.data, buffer.size, checksum);
-+    compute_dxbc_checksum(buffer.data, buffer.size, checksum);
-     for (i = 0; i < 4; ++i)
-         set_u32(&buffer, checksum_position + i * sizeof(uint32_t), checksum[i]);
-@@ -188,7 +201,7 @@ static int parse_dxbc(const struct vkd3d_shader_code *dxbc, struct vkd3d_shader_
-     checksum[3] = read_u32(&ptr);
-     {
--        vkd3d_compute_dxbc_checksum(data, data_size, calculated_checksum);
-+        compute_dxbc_checksum(data, data_size, calculated_checksum);
-         if (memcmp(checksum, calculated_checksum, sizeof(checksum)))
-         {
-             WARN("Checksum {0x%08x, 0x%08x, 0x%08x, 0x%08x} does not match "
-@@ -406,8 +419,6 @@ static int shader_parse_signature(const struct vkd3d_shader_dxbc_section_desc *s
-         const char *name;
-         uint32_t mask;
--        e[i].sort_index = i;
-         if (has_stream_index)
-             e[i].stream_index = read_u32(&ptr);
-         else
-@@ -1488,7 +1499,7 @@ int vkd3d_shader_serialize_root_signature(const struct vkd3d_shader_versioned_ro
-     dxbc->code = context.buffer.data;
-     dxbc->size = total_size;
--    vkd3d_compute_dxbc_checksum(dxbc->code, dxbc->size, checksum);
-+    compute_dxbc_checksum(dxbc->code, dxbc->size, checksum);
-     for (i = 0; i < 4; ++i)
-         set_u32(&context.buffer, (i + 1) * sizeof(uint32_t), checksum[i]);
-diff --git a/libs/vkd3d/libs/vkd3d-shader/dxil.c b/libs/vkd3d/libs/vkd3d-shader/dxil.c
-index 4a17c62292b..d467693bd59 100644
---- a/libs/vkd3d/libs/vkd3d-shader/dxil.c
-+++ b/libs/vkd3d/libs/vkd3d-shader/dxil.c
-@@ -430,6 +430,7 @@ enum dx_intrinsic_opcode
-     DX_DERIV_COARSEY                =  84,
-     DX_DERIV_FINEX                  =  85,
-     DX_DERIV_FINEY                  =  86,
-+    DX_SAMPLE_INDEX                 =  90,
-     DX_COVERAGE                     =  91,
-     DX_THREAD_ID                    =  93,
-     DX_GROUP_ID                     =  94,
-@@ -3827,6 +3828,11 @@ static enum vkd3d_shader_register_type register_type_from_dxil_semantic_kind(
- {
-     switch (sysval_semantic)
-     {
-+        /* VSIR does not use an I/O register for SV_SampleIndex, but its
-+         * signature element has a register index of UINT_MAX and it is
-+         * convenient to return a valid register type here to handle it. */
-+            return VKD3DSPR_NULL;
-         case VKD3D_SHADER_SV_COVERAGE:
-             return VKD3DSPR_COVERAGE;
-         case VKD3D_SHADER_SV_DEPTH:
-@@ -3844,6 +3850,7 @@ static void sm6_parser_init_signature(struct sm6_parser *sm6, const struct shade
-         bool is_input, enum vkd3d_shader_register_type reg_type, struct vkd3d_shader_dst_param *params)
- {
-     enum vkd3d_shader_type shader_type = sm6->p.program->shader_version.type;
-+    enum vkd3d_shader_register_type io_reg_type;
-     bool is_patch_constant, is_control_point;
-     struct vkd3d_shader_dst_param *param;
-     const struct signature_element *e;
-@@ -3876,9 +3883,10 @@ static void sm6_parser_init_signature(struct sm6_parser *sm6, const struct shade
-         param = &params[i];
--        if (e->register_index == UINT_MAX)
-+        if (e->register_index == UINT_MAX
-+                && (io_reg_type = register_type_from_dxil_semantic_kind(e->sysval_semantic)) != VKD3DSPR_NULL)
-         {
--            dst_param_io_init(param, e, register_type_from_dxil_semantic_kind(e->sysval_semantic));
-+            dst_param_io_init(param, e, io_reg_type);
-             continue;
-         }
-@@ -3888,7 +3896,7 @@ static void sm6_parser_init_signature(struct sm6_parser *sm6, const struct shade
-         if (is_control_point)
-         {
-             if (reg_type == VKD3DSPR_OUTPUT)
--                param->reg.idx[count].rel_addr = instruction_array_create_outpointid_param(&sm6->p.program->instructions);
-+                param->reg.idx[count].rel_addr = vsir_program_create_outpointid_param(sm6->p.program);
-             param->reg.idx[count++].offset = 0;
-         }
-@@ -4161,8 +4169,7 @@ static void sm6_parser_emit_atomicrmw(struct sm6_parser *sm6, const struct dxil_
-     dst_param_init(&dst_params[0]);
-     dst_params[1].reg = ptr->u.reg;
--    /* The groupshared register has data type UAV when accessed. */
--    dst_params[1].reg.data_type = VKD3D_DATA_UAV;
-+    dst_params[1].reg.data_type = VKD3D_DATA_UNUSED;
-     dst_params[1].reg.idx[1].rel_addr = NULL;
-     dst_params[1].reg.idx[1].offset = ~0u;
-     dst_params[1].reg.idx_count = 1;
-@@ -4175,6 +4182,7 @@ static enum vkd3d_shader_opcode map_binary_op(uint64_t code, const struct sm6_ty
-         const struct sm6_type *type_b, struct sm6_parser *sm6)
- {
-     bool is_int = sm6_type_is_bool_i16_i32_i64(type_a);
-+    bool is_double = sm6_type_is_double(type_a);
-     bool is_bool = sm6_type_is_bool(type_a);
-     enum vkd3d_shader_opcode op;
-     bool is_valid;
-@@ -4199,7 +4207,7 @@ static enum vkd3d_shader_opcode map_binary_op(uint64_t code, const struct sm6_ty
-         case BINOP_ADD:
-         case BINOP_SUB:
-             /* NEG is applied later for subtraction. */
--            op = is_int ? VKD3DSIH_IADD : VKD3DSIH_ADD;
-+            op = is_int ? VKD3DSIH_IADD : (is_double ? VKD3DSIH_DADD : VKD3DSIH_ADD);
-             is_valid = !is_bool;
-             break;
-         case BINOP_AND:
-@@ -4215,7 +4223,7 @@ static enum vkd3d_shader_opcode map_binary_op(uint64_t code, const struct sm6_ty
-             is_valid = is_int && !is_bool;
-             break;
-         case BINOP_MUL:
--            op = is_int ? VKD3DSIH_UMUL : VKD3DSIH_MUL;
-+            op = is_int ? VKD3DSIH_UMUL : (is_double ? VKD3DSIH_DMUL : VKD3DSIH_MUL);
-             is_valid = !is_bool;
-             break;
-         case BINOP_OR:
-@@ -4223,7 +4231,7 @@ static enum vkd3d_shader_opcode map_binary_op(uint64_t code, const struct sm6_ty
-             is_valid = is_int;
-             break;
-         case BINOP_SDIV:
--            op = is_int ? VKD3DSIH_IDIV : VKD3DSIH_DIV;
-+            op = is_int ? VKD3DSIH_IDIV : (is_double ? VKD3DSIH_DDIV : VKD3DSIH_DIV);
-             is_valid = !is_bool;
-             break;
-         case BINOP_SREM:
-@@ -4865,8 +4873,10 @@ static void sm6_parser_emit_dx_cbuffer_load(struct sm6_parser *sm6, enum dx_intr
-     if (!(src_param = instruction_src_params_alloc(ins, 1, sm6)))
-         return;
-     src_param_init_vector_from_reg(src_param, &buffer->u.handle.reg);
-+    /* Differently from other descriptors, constant buffers require an
-+     * additional index, used to index within the constant buffer itself. */
-+    src_param->reg.idx_count = 3;
-     register_index_address_init(&src_param->reg.idx[2], operands[1], sm6);
--    VKD3D_ASSERT(src_param->reg.idx_count == 3);
-     type = sm6_type_get_scalar_type(dst->type, 0);
-     VKD3D_ASSERT(type);
-@@ -4965,8 +4975,7 @@ static void sm6_parser_emit_dx_create_handle(struct sm6_parser *sm6, enum dx_int
-     dst->u.handle.d = d;
-     reg = &dst->u.handle.reg;
--    /* Set idx_count to 3 for use with load/store instructions. */
--    vsir_register_init(reg, d->reg_type, d->reg_data_type, 3);
-+    vsir_register_init(reg, d->reg_type, d->reg_data_type, 2);
-     reg->dimension = VSIR_DIMENSION_VEC4;
-     reg->idx[0].offset = id;
-     register_index_address_init(&reg->idx[1], operands[2], sm6);
-@@ -5794,6 +5803,34 @@ static void sm6_parser_emit_dx_sample(struct sm6_parser *sm6, enum dx_intrinsic_
-     instruction_dst_param_init_ssa_vector(ins, component_count, sm6);
- }
-+static void sm6_parser_emit_dx_sample_index(struct sm6_parser *sm6, enum dx_intrinsic_opcode op,
-+        const struct sm6_value **operands, struct function_emission_state *state)
-+    const struct shader_signature *signature = &sm6->p.program->input_signature;
-+    struct vkd3d_shader_instruction *ins = state->ins;
-+    struct vkd3d_shader_src_param *src_param;
-+    unsigned int element_idx;
-+    vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_MOV);
-+    /* SV_SampleIndex is identified in VSIR by its signature element index,
-+     * but the index is not supplied as a parameter to the DXIL intrinsic. */
-+    if (!vsir_signature_find_sysval(signature, VKD3D_SHADER_SV_SAMPLE_INDEX, 0, &element_idx))
-+    {
-+        WARN("Sample index is not in the signature.\n");
-+        vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_SIGNATURE,
-+                "Sample index signature element for a sample index operation is missing.");
-+        return;
-+    }
-+    if (!(src_param = instruction_src_params_alloc(ins, 1, sm6)))
-+        return;
-+    src_param->reg = sm6->input_params[element_idx].reg;
-+    src_param_init(src_param);
-+    instruction_dst_param_init_ssa_scalar(ins, sm6);
- static void sm6_parser_emit_dx_saturate(struct sm6_parser *sm6, enum dx_intrinsic_opcode op,
-         const struct sm6_value **operands, struct function_emission_state *state)
- {
-@@ -5871,6 +5908,8 @@ static void sm6_parser_emit_dx_store_output(struct sm6_parser *sm6, enum dx_intr
-         return;
-     }
-     e = &signature->elements[row_index];
-+    if (!e->sysval_semantic)
-+        column_index += vsir_write_mask_get_component_idx(e->mask);
-     if (column_index >= VKD3D_VEC4_SIZE)
-     {
-@@ -6297,6 +6336,7 @@ static const struct sm6_dx_opcode_info sm6_dx_op_table[] =
-     [DX_SAMPLE_C                      ] = {"o", "HHffffiiiff", sm6_parser_emit_dx_sample},
-     [DX_SAMPLE_C_LZ                   ] = {"o", "HHffffiiif", sm6_parser_emit_dx_sample},
-     [DX_SAMPLE_GRAD                   ] = {"o", "HHffffiiifffffff", sm6_parser_emit_dx_sample},
-+    [DX_SAMPLE_INDEX                  ] = {"i", "",     sm6_parser_emit_dx_sample_index},
-     [DX_SAMPLE_LOD                    ] = {"o", "HHffffiiif", sm6_parser_emit_dx_sample},
-     [DX_SATURATE                      ] = {"g", "R",    sm6_parser_emit_dx_saturate},
-     [DX_SIN                           ] = {"g", "R",    sm6_parser_emit_dx_sincos},
-@@ -6861,7 +6901,6 @@ static void sm6_parser_emit_cmpxchg(struct sm6_parser *sm6, const struct dxil_re
-     struct vkd3d_shader_dst_param *dst_params;
-     struct vkd3d_shader_src_param *src_params;
-     const struct sm6_value *ptr, *cmp, *new;
--    const struct sm6_type *type;
-     unsigned int i = 0;
-     bool is_volatile;
-     uint64_t code;
-@@ -6887,9 +6926,10 @@ static void sm6_parser_emit_cmpxchg(struct sm6_parser *sm6, const struct dxil_re
-         return;
-     }
--    type = ptr->type->u.pointer.type;
--    cmp = sm6_parser_get_value_by_ref(sm6, record, type, &i);
--    new = sm6_parser_get_value_by_ref(sm6, record, type, &i);
-+    /* Forward-referenced comparands are stored as value/type pairs, even
-+     * though in principle we could use the destination type. */
-+    cmp = sm6_parser_get_value_by_ref(sm6, record, NULL, &i);
-+    new = sm6_parser_get_value_by_ref(sm6, record, ptr->type->u.pointer.type, &i);
-     if (!cmp || !new)
-         return;
-@@ -7287,7 +7327,6 @@ static void sm6_parser_emit_store(struct sm6_parser *sm6, const struct dxil_reco
-     unsigned int i = 0, alignment, operand_count;
-     struct vkd3d_shader_src_param *src_params;
-     struct vkd3d_shader_dst_param *dst_param;
--    const struct sm6_type *pointee_type;
-     const struct sm6_value *ptr, *src;
-     uint64_t alignment_code;
-@@ -7299,13 +7338,14 @@ static void sm6_parser_emit_store(struct sm6_parser *sm6, const struct dxil_reco
-         return;
-     }
--    pointee_type = ptr->type->u.pointer.type;
--    if (!(src = sm6_parser_get_value_by_ref(sm6, record, pointee_type, &i)))
-+    /* Forward-referenced sources are stored as value/type pairs, even
-+     * though in principle we could use the destination type. */
-+    if (!(src = sm6_parser_get_value_by_ref(sm6, record, NULL, &i)))
-         return;
-     if (!sm6_value_validate_is_numeric(src, sm6))
-         return;
--    if (pointee_type != src->type)
-+    if (ptr->type->u.pointer.type != src->type)
-     {
-         WARN("Type mismatch.\n");
-         vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_TYPE_MISMATCH,
-@@ -8510,6 +8550,7 @@ static const enum vkd3d_shader_sysval_semantic sysval_semantic_table[] =
-@@ -8908,7 +8949,7 @@ static enum vkd3d_result sm6_parser_resources_load_srv(struct sm6_parser *sm6,
-     d->resource_type = ins->resource_type;
-     d->kind = kind;
-     d->reg_type = VKD3DSPR_RESOURCE;
--    d->reg_data_type = (ins->resource_type == VKD3D_SHADER_RESOURCE_BUFFER) ? VKD3D_DATA_UINT : VKD3D_DATA_RESOURCE;
-+    d->reg_data_type = VKD3D_DATA_UNUSED;
-     d->resource_data_type = (ins->opcode == VKD3DSIH_DCL)
-             ? ins->declaration.semantic.resource_data_type[0] : VKD3D_DATA_UNUSED;
-@@ -8982,7 +9023,7 @@ static enum vkd3d_result sm6_parser_resources_load_uav(struct sm6_parser *sm6,
-     d->resource_type = ins->resource_type;
-     d->kind = values[0];
-     d->reg_type = VKD3DSPR_UAV;
--    d->reg_data_type = (ins->resource_type == VKD3D_SHADER_RESOURCE_BUFFER) ? VKD3D_DATA_UINT : VKD3D_DATA_UAV;
-+    d->reg_data_type = VKD3D_DATA_UNUSED;
-     d->resource_data_type = (ins->opcode == VKD3DSIH_DCL_UAV_TYPED)
-             ? ins->declaration.semantic.resource_data_type[0] : VKD3D_DATA_UNUSED;
-@@ -9346,7 +9387,7 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const
-             WARN("Signature element is not a node.\n");
-             vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_SIGNATURE,
-                     "Signature element is not a metadata node.");
--            return VKD3D_ERROR_INVALID_SHADER;
-+            goto invalid;
-         }
-         element_node = m->u.node;
-@@ -9355,7 +9396,7 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const
-             WARN("Invalid operand count %u.\n", element_node->operand_count);
-             vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_SIGNATURE,
-                     "Invalid signature element operand count %u.", element_node->operand_count);
--            return VKD3D_ERROR_INVALID_SHADER;
-+            goto invalid;
-         }
-         if (element_node->operand_count > 11)
-         {
-@@ -9374,7 +9415,7 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const
-                 WARN("Failed to load uint value at index %u.\n", j);
-                 vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_SIGNATURE,
-                         "Signature element value at index %u is not an integer.", j);
--                return VKD3D_ERROR_INVALID_SHADER;
-+                goto invalid;
-             }
-         }
-@@ -9385,7 +9426,7 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const
-             FIXME("Unsupported element id %u not equal to its index %u.\n", values[0], i);
-             vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_SIGNATURE,
-                     "A non-sequential and non-zero-based element id is not supported.");
--            return VKD3D_ERROR_INVALID_SHADER;
-+            goto invalid;
-         }
-         if (!sm6_metadata_value_is_string(element_node->operands[1]))
-@@ -9393,7 +9434,7 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const
-             WARN("Element name is not a string.\n");
-             vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_SIGNATURE,
-                     "Signature element name is not a metadata string.");
--            return VKD3D_ERROR_INVALID_SHADER;
-+            goto invalid;
-         }
-         e->semantic_name = element_node->operands[1]->u.string_value;
-@@ -9407,7 +9448,7 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const
-             WARN("Unhandled semantic kind %u.\n", j);
-             vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_SIGNATURE,
-                     "DXIL semantic kind %u is unhandled.", j);
--            return VKD3D_ERROR_INVALID_SHADER;
-+            goto invalid;
-         }
-         if ((e->interpolation_mode = values[5]) >= VKD3DSIM_COUNT)
-@@ -9415,7 +9456,7 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const
-             WARN("Unhandled interpolation mode %u.\n", e->interpolation_mode);
-             vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_SIGNATURE,
-                     "Interpolation mode %u is unhandled.", e->interpolation_mode);
--            return VKD3D_ERROR_INVALID_SHADER;
-+            goto invalid;
-         }
-         e->register_count = values[6];
-@@ -9430,7 +9471,7 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const
-                 WARN("Unhandled I/O register semantic kind %u.\n", j);
-                 vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_SIGNATURE,
-                         "DXIL semantic kind %u is unhandled for an I/O register.", j);
--                return VKD3D_ERROR_INVALID_SHADER;
-+                goto invalid;
-             }
-         }
-         else if (e->register_index > MAX_REG_OUTPUT || e->register_count > MAX_REG_OUTPUT - e->register_index)
-@@ -9439,7 +9480,7 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const
-             vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_SIGNATURE,
-                     "A signature element starting row of %u with count %u is invalid.",
-                     e->register_index, e->register_count);
--            return VKD3D_ERROR_INVALID_SHADER;
-+            goto invalid;
-         }
-         index = values[9];
-@@ -9448,7 +9489,7 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const
-             WARN("Invalid column start %u with count %u.\n", index, column_count);
-             vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_SIGNATURE,
-                     "A signature element starting column %u with count %u is invalid.", index, column_count);
--            return VKD3D_ERROR_INVALID_SHADER;
-+            goto invalid;
-         }
-         e->mask = vkd3d_write_mask_from_component_count(column_count);
-@@ -9471,7 +9512,7 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const
-             WARN("Semantic index list is not a node.\n");
-             vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_SIGNATURE,
-                     "Signature element semantic index list is not a metadata node.");
--            return VKD3D_ERROR_INVALID_SHADER;
-+            goto invalid;
-         }
-         element_node = m->u.node;
-@@ -9516,6 +9557,10 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const
-     s->element_count = operand_count;
-     return VKD3D_OK;
-+    vkd3d_free(elements);
- }
- static enum vkd3d_result sm6_parser_signatures_init(struct sm6_parser *sm6, const struct sm6_metadata_value *m,
-@@ -9557,7 +9602,7 @@ static enum vkd3d_result sm6_parser_signatures_init(struct sm6_parser *sm6, cons
- static void sm6_parser_emit_global_flags(struct sm6_parser *sm6, const struct sm6_metadata_value *m)
- {
--    enum vkd3d_shader_global_flags global_flags, mask, rotated_flags;
-+    enum vsir_global_flags global_flags, mask, rotated_flags;
-     struct vkd3d_shader_instruction *ins;
-     if (!sm6_metadata_get_uint64_value(sm6, m, (uint64_t*)&global_flags))
-@@ -9567,7 +9612,7 @@ static void sm6_parser_emit_global_flags(struct sm6_parser *sm6, const struct sm
-                 "Global flags metadata value is not an integer.");
-         return;
-     }
--    /* Rotate SKIP_OPTIMIZATION from bit 0 to bit 4 to match vkd3d_shader_global_flags. */
-+    /* Rotate SKIP_OPTIMIZATION from bit 0 to bit 4 to match vsir_global_flags. */
-     mask = (VKD3DSGF_SKIP_OPTIMIZATION << 1) - 1;
-     rotated_flags = global_flags & mask;
-     rotated_flags = (rotated_flags >> 1) | ((rotated_flags & 1) << 4);
-@@ -9575,6 +9620,7 @@ static void sm6_parser_emit_global_flags(struct sm6_parser *sm6, const struct sm
-     ins = sm6_parser_add_instruction(sm6, VKD3DSIH_DCL_GLOBAL_FLAGS);
-     ins->declaration.global_flags = global_flags;
-+    sm6->p.program->global_flags = global_flags;
- }
- static enum vkd3d_result sm6_parser_emit_thread_group(struct sm6_parser *sm6, const struct sm6_metadata_value *m)
-@@ -9633,6 +9679,7 @@ static enum vkd3d_result sm6_parser_emit_thread_group(struct sm6_parser *sm6, co
-     ins->declaration.thread_group_size.x = group_sizes[0];
-     ins->declaration.thread_group_size.y = group_sizes[1];
-     ins->declaration.thread_group_size.z = group_sizes[2];
-+    sm6->p.program->thread_group_size = ins->declaration.thread_group_size;
-     return VKD3D_OK;
- }
-@@ -9670,12 +9717,13 @@ static void sm6_parser_emit_dcl_tessellator_domain(struct sm6_parser *sm6,
-     ins = sm6_parser_add_instruction(sm6, VKD3DSIH_DCL_TESSELLATOR_DOMAIN);
-     ins->declaration.tessellator_domain = tessellator_domain;
-+    sm6->p.program->tess_domain = tessellator_domain;
- }
--static void sm6_parser_validate_control_point_count(struct sm6_parser *sm6, unsigned int count,
--        const char *type)
-+static void sm6_parser_validate_control_point_count(struct sm6_parser *sm6,
-+        unsigned int count, bool allow_zero, const char *type)
- {
--    if (!count || count > 32)
-+    if ((!count && !allow_zero) || count > 32)
-     {
-         WARN("%s control point count %u invalid.\n", type, count);
-         vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES,
-@@ -9904,7 +9952,7 @@ static enum vkd3d_tessellator_domain sm6_parser_ds_properties_init(struct sm6_pa
-     }
-     sm6_parser_emit_dcl_tessellator_domain(sm6, operands[0]);
--    sm6_parser_validate_control_point_count(sm6, operands[1], "Domain shader input");
-+    sm6_parser_validate_control_point_count(sm6, operands[1], true, "Domain shader input");
-     sm6->p.program->input_control_point_count = operands[1];
-     return operands[0];
-@@ -9963,9 +10011,9 @@ static enum vkd3d_tessellator_domain sm6_parser_hs_properties_init(struct sm6_pa
-         }
-     }
--    sm6_parser_validate_control_point_count(sm6, operands[1], "Hull shader input");
-+    sm6_parser_validate_control_point_count(sm6, operands[1], false, "Hull shader input");
-     program->input_control_point_count = operands[1];
--    sm6_parser_validate_control_point_count(sm6, operands[2], "Hull shader output");
-+    sm6_parser_validate_control_point_count(sm6, operands[2], false, "Hull shader output");
-     sm6_parser_emit_dcl_count(sm6, VKD3DSIH_DCL_OUTPUT_CONTROL_POINT_COUNT, operands[2]);
-     program->output_control_point_count = operands[2];
-     sm6_parser_emit_dcl_tessellator_domain(sm6, operands[3]);
-@@ -10303,12 +10351,28 @@ static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, struct vsir_pro
-     /* Estimate instruction count to avoid reallocation in most shaders. */
-     count = max(token_count, 400) - 400;
--    if (!vsir_program_init(program, compile_info, &version, (count + (count >> 2)) / 2u + 10))
-+    if (!vsir_program_init(program, compile_info, &version,
-+            (count + (count >> 2)) / 2u + 10, VSIR_CF_BLOCKS, VSIR_FULLY_NORMALISED_IO))
-         return VKD3D_ERROR_OUT_OF_MEMORY;
-     vkd3d_shader_parser_init(&sm6->p, program, message_context, compile_info->source_name);
-     sm6->ptr = &sm6->start[1];
-     sm6->bitpos = 2;
-+    switch (program->shader_version.type)
-+    {
-+        case VKD3D_SHADER_TYPE_HULL:
-+        case VKD3D_SHADER_TYPE_DOMAIN:
-+            break;
-+        default:
-+            if (program->patch_constant_signature.element_count != 0)
-+            {
-+                WARN("The patch constant signature only makes sense for Hull and Domain Shaders, ignoring it.\n");
-+                shader_signature_cleanup(&program->patch_constant_signature);
-+            }
-+            break;
-+    }
-     input_signature = &program->input_signature;
-     output_signature = &program->output_signature;
-     patch_constant_signature = &program->patch_constant_signature;
-@@ -10526,9 +10590,16 @@ static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, struct vsir_pro
-     dxil_block_destroy(&sm6->root_block);
-+    if (sm6->p.failed)
-+    {
-+        goto fail;
-+    }
-     return VKD3D_OK;
- fail:
-+    sm6_parser_cleanup(sm6);
-     vsir_program_cleanup(program);
-     return ret;
- }
-@@ -10570,18 +10641,10 @@ int dxil_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t co
-     free_dxbc_shader_desc(&dxbc_desc);
-     vkd3d_free(byte_code);
--    if (!sm6.p.failed && ret >= 0)
--        ret = vkd3d_shader_parser_validate(&sm6.p, config_flags);
--    if (sm6.p.failed && ret >= 0)
--    sm6_parser_cleanup(&sm6);
-     if (ret < 0)
--    {
--        WARN("Failed to parse shader.\n");
-         return ret;
--    }
--    return ret;
-+    sm6_parser_cleanup(&sm6);
-+    return VKD3D_OK;
- }
-diff --git a/libs/vkd3d/libs/vkd3d-shader/fx.c b/libs/vkd3d/libs/vkd3d-shader/fx.c
-index a1d1fd6572f..cb42551ee8b 100644
---- a/libs/vkd3d/libs/vkd3d-shader/fx.c
-+++ b/libs/vkd3d/libs/vkd3d-shader/fx.c
-@@ -25,6 +25,17 @@ static inline size_t put_u32_unaligned(struct vkd3d_bytecode_buffer *buffer, uin
-     return bytecode_put_bytes_unaligned(buffer, &value, sizeof(value));
- }
-+struct fx_4_binary_type
-+    uint32_t name;
-+    uint32_t class;
-+    uint32_t element_count;
-+    uint32_t unpacked_size;
-+    uint32_t stride;
-+    uint32_t packed_size;
-+    uint32_t typeinfo;
- struct string_entry
- {
-     struct rb_entry entry;
-@@ -38,6 +49,7 @@ struct type_entry
-     struct list entry;
-     const char *name;
-     uint32_t elements_count;
-+    uint32_t modifiers;
-     uint32_t offset;
- };
-@@ -181,6 +193,7 @@ struct fx_write_context
-     struct vkd3d_bytecode_buffer unstructured;
-     struct vkd3d_bytecode_buffer structured;
-+    struct vkd3d_bytecode_buffer objects;
-     struct rb_tree strings;
-     struct list types;
-@@ -223,11 +236,6 @@ static void set_status(struct fx_write_context *fx, int status)
-         fx->status = status;
- }
--static bool has_annotations(const struct hlsl_ir_var *var)
--    return var->annotations && !list_empty(&var->annotations->vars);
- static uint32_t write_string(const char *string, struct fx_write_context *fx)
- {
-     return fx->ops->write_string(string, fx);
-@@ -278,9 +286,9 @@ static void write_fx_4_state_block(struct hlsl_ir_var *var, unsigned int block_i
- static uint32_t write_type(const struct hlsl_type *type, struct fx_write_context *fx)
- {
-+    unsigned int elements_count, modifiers;
-     const struct hlsl_type *element_type;
-     struct type_entry *type_entry;
--    unsigned int elements_count;
-     const char *name;
-     VKD3D_ASSERT(fx->ctx->profile->major_version >= 4);
-@@ -297,6 +305,7 @@ static uint32_t write_type(const struct hlsl_type *type, struct fx_write_context
-     }
-     name = get_fx_4_type_name(element_type);
-+    modifiers = element_type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK;
-     LIST_FOR_EACH_ENTRY(type_entry, &fx->types, struct type_entry, entry)
-     {
-@@ -306,6 +315,9 @@ static uint32_t write_type(const struct hlsl_type *type, struct fx_write_context
-         if (type_entry->elements_count != elements_count)
-             continue;
-+        if (type_entry->modifiers != modifiers)
-+            continue;
-         return type_entry->offset;
-     }
-@@ -315,6 +327,7 @@ static uint32_t write_type(const struct hlsl_type *type, struct fx_write_context
-     type_entry->offset = write_fx_4_type(type, fx);
-     type_entry->name = name;
-     type_entry->elements_count = elements_count;
-+    type_entry->modifiers = modifiers;
-     list_add_tail(&fx->types, &type_entry->entry);
-@@ -429,17 +442,26 @@ static void write_fx_4_pass(struct hlsl_ir_var *var, struct fx_write_context *fx
-     write_fx_4_state_block(var, 0, count_offset, fx);
- }
-+static void write_fx_2_annotations(struct hlsl_ir_var *var, uint32_t count_offset, struct fx_write_context *fx)
-+    struct vkd3d_bytecode_buffer *buffer = &fx->structured;
-+    uint32_t count;
-+    count = write_annotations(var->annotations, fx);
-+    set_u32(buffer, count_offset, count);
- static void write_fx_2_pass(struct hlsl_ir_var *var, struct fx_write_context *fx)
- {
-     struct vkd3d_bytecode_buffer *buffer = &fx->structured;
--    uint32_t name_offset;
-+    uint32_t name_offset, annotation_count_offset;
-     name_offset = write_string(var->name, fx);
-     put_u32(buffer, name_offset);
--    put_u32(buffer, 0); /* Annotation count. */
-+    annotation_count_offset = put_u32(buffer, 0);
-     put_u32(buffer, 0); /* Assignment count. */
--    /* TODO: annotations */
-+    write_fx_2_annotations(var, annotation_count_offset, fx);
-     /* TODO: assignments */
-     if (var->state_block_count && var->state_blocks[0]->count)
-@@ -459,25 +481,93 @@ static uint32_t get_fx_4_type_size(const struct hlsl_type *type)
-     return type->reg_size[HLSL_REGSET_NUMERIC] * sizeof(float) * elements_count;
- }
--static const uint32_t fx_4_numeric_base_type[] =
-+enum fx_4_type_constants
-+    /* Numeric types encoding */
-+    FX_4_NUMERIC_TYPE_INT   = 2,
-+    FX_4_NUMERIC_TYPE_UINT  = 3,
-+    FX_4_NUMERIC_TYPE_BOOL  = 4,
-+    /* Object types */
-+    FX_4_OBJECT_TYPE_STRING = 0x1,
-+    FX_4_OBJECT_TYPE_TEXTURE_1D = 0xa,
-+    FX_4_OBJECT_TYPE_TEXTURE_2D = 0xc,
-+    FX_4_OBJECT_TYPE_TEXTURE_3D = 0x10,
-+    FX_4_OBJECT_TYPE_RTV = 0x13,
-+    FX_4_OBJECT_TYPE_DSV = 0x14,
-+    FX_5_OBJECT_TYPE_UAV_1D = 0x1f,
-+    FX_5_OBJECT_TYPE_UAV_1DARRAY = 0x20,
-+    FX_5_OBJECT_TYPE_UAV_2D = 0x21,
-+    FX_5_OBJECT_TYPE_UAV_2DARRAY = 0x22,
-+    FX_5_OBJECT_TYPE_UAV_3D = 0x23,
-+    /* Types */
-+    /* Assignment types */
-+static const uint32_t fx_4_numeric_base_types[] =
- {
--    [HLSL_TYPE_FLOAT] = 1,
--    [HLSL_TYPE_INT  ] = 2,
--    [HLSL_TYPE_UINT ] = 3,
--    [HLSL_TYPE_BOOL ] = 4,
- };
- static uint32_t get_fx_4_numeric_type_description(const struct hlsl_type *type, struct fx_write_context *fx)
- {
--    static const unsigned int NUMERIC_BASE_TYPE_SHIFT = 3;
--    static const unsigned int NUMERIC_ROWS_SHIFT = 8;
--    static const unsigned int NUMERIC_COLUMNS_SHIFT = 11;
--    static const unsigned int NUMERIC_COLUMN_MAJOR_MASK = 0x4000;
-     static const uint32_t numeric_type_class[] =
-     {
--        [HLSL_CLASS_SCALAR] = 1,
--        [HLSL_CLASS_VECTOR] = 2,
--        [HLSL_CLASS_MATRIX] = 3,
-     };
-     struct hlsl_ctx *ctx = fx->ctx;
-     uint32_t value = 0;
-@@ -497,20 +587,21 @@ static uint32_t get_fx_4_numeric_type_description(const struct hlsl_type *type,
-     switch (type->e.numeric.type)
-     {
-         case HLSL_TYPE_FLOAT:
-+        case HLSL_TYPE_HALF:
-         case HLSL_TYPE_INT:
-         case HLSL_TYPE_UINT:
-         case HLSL_TYPE_BOOL:
--            value |= (fx_4_numeric_base_type[type->e.numeric.type] << NUMERIC_BASE_TYPE_SHIFT);
-+            value |= (fx_4_numeric_base_types[type->e.numeric.type] << FX_4_NUMERIC_BASE_TYPE_SHIFT);
-             break;
-         default:
-             hlsl_fixme(ctx, &ctx->location, "Not implemented for base type %u.", type->e.numeric.type);
-             return 0;
-     }
--    value |= (type->dimy & 0x7) << NUMERIC_ROWS_SHIFT;
--    value |= (type->dimx & 0x7) << NUMERIC_COLUMNS_SHIFT;
-+    value |= (type->dimy & 0x7) << FX_4_NUMERIC_ROWS_SHIFT;
-+    value |= (type->dimx & 0x7) << FX_4_NUMERIC_COLUMNS_SHIFT;
-     if (type->modifiers & HLSL_MODIFIER_COLUMN_MAJOR)
--        value |= NUMERIC_COLUMN_MAJOR_MASK;
-+        value |= FX_4_NUMERIC_COLUMN_MAJOR_MASK;
-     return value;
- }
-@@ -539,6 +630,7 @@ static const char * get_fx_4_type_name(const struct hlsl_type *type)
-         [HLSL_SAMPLER_DIM_3D]                = "RWTexture3D",
-         [HLSL_SAMPLER_DIM_BUFFER]            = "RWBuffer",
-         [HLSL_SAMPLER_DIM_STRUCTURED_BUFFER] = "RWStructuredBuffer",
-+        [HLSL_SAMPLER_DIM_RAW_BUFFER]        = "RWByteAddressBuffer",
-     };
-     switch (type->class)
-@@ -564,17 +656,32 @@ static const char * get_fx_4_type_name(const struct hlsl_type *type)
-             return "VertexShader";
-+            return "GeometryShader";
-             return "PixelShader";
-         case HLSL_CLASS_STRING:
-             return "String";
-+        case HLSL_CLASS_SCALAR:
-+        case HLSL_CLASS_VECTOR:
-+        case HLSL_CLASS_MATRIX:
-+            if (type->e.numeric.type == HLSL_TYPE_HALF)
-+                return "float";
-+            /* fall-through */
-         default:
-             return type->name;
-     }
- }
-+static bool is_numeric_fx_4_type(const struct hlsl_type *type)
-+    type = hlsl_get_multiarray_element_type(type);
-+    return type->class == HLSL_CLASS_STRUCT || hlsl_is_numeric_type(type);
- static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_context *fx)
- {
-     struct field_offsets
-@@ -584,48 +691,46 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co
-         uint32_t offset;
-         uint32_t type;
-     };
--    uint32_t name_offset, offset, total_size, packed_size, stride, numeric_desc;
-+    uint32_t name_offset, offset, unpacked_size, packed_size, stride, numeric_desc;
-     struct vkd3d_bytecode_buffer *buffer = &fx->unstructured;
-     struct field_offsets *field_offsets = NULL;
-+    const struct hlsl_type *element_type;
-     struct hlsl_ctx *ctx = fx->ctx;
-     uint32_t elements_count = 0;
-     const char *name;
-     size_t i;
--    /* Resolve arrays to element type and number of elements. */
-     if (type->class == HLSL_CLASS_ARRAY)
--    {
-         elements_count = hlsl_get_multiarray_size(type);
--        type = hlsl_get_multiarray_element_type(type);
--    }
-+    element_type = hlsl_get_multiarray_element_type(type);
--    name = get_fx_4_type_name(type);
-+    name = get_fx_4_type_name(element_type);
-     name_offset = write_string(name, fx);
--    if (type->class == HLSL_CLASS_STRUCT)
-+    if (element_type->class == HLSL_CLASS_STRUCT)
-     {
--        if (!(field_offsets = hlsl_calloc(ctx, type->e.record.field_count, sizeof(*field_offsets))))
-+        if (!(field_offsets = hlsl_calloc(ctx, element_type->e.record.field_count, sizeof(*field_offsets))))
-             return 0;
--        for (i = 0; i < type->e.record.field_count; ++i)
-+        for (i = 0; i < element_type->e.record.field_count; ++i)
-         {
--            const struct hlsl_struct_field *field = &type->e.record.fields[i];
-+            const struct hlsl_struct_field *field = &element_type->e.record.fields[i];
-             field_offsets[i].name = write_string(field->name, fx);
-             field_offsets[i].semantic = write_string(field->semantic.raw_name, fx);
--            field_offsets[i].offset = field->reg_offset[HLSL_REGSET_NUMERIC];
-+            field_offsets[i].offset = field->reg_offset[HLSL_REGSET_NUMERIC] * sizeof(float);
-             field_offsets[i].type = write_type(field->type, fx);
-         }
-     }
-     offset = put_u32_unaligned(buffer, name_offset);
--    switch (type->class)
-+    switch (element_type->class)
-     {
-         case HLSL_CLASS_SCALAR:
-         case HLSL_CLASS_VECTOR:
-         case HLSL_CLASS_MATRIX:
--            put_u32_unaligned(buffer, 1);
-+            put_u32_unaligned(buffer, FX_4_TYPE_CLASS_NUMERIC);
-             break;
-@@ -643,48 +748,50 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co
-         case HLSL_CLASS_BLEND_STATE:
-         case HLSL_CLASS_STRING:
--            put_u32_unaligned(buffer, 2);
-+            put_u32_unaligned(buffer, FX_4_TYPE_CLASS_OBJECT);
-             break;
-         case HLSL_CLASS_STRUCT:
--            put_u32_unaligned(buffer, 3);
-+            put_u32_unaligned(buffer, FX_4_TYPE_CLASS_STRUCT);
-             break;
-         case HLSL_CLASS_ARRAY:
-+        case HLSL_CLASS_ERROR:
-         case HLSL_CLASS_PASS:
-         case HLSL_CLASS_TECHNIQUE:
-         case HLSL_CLASS_NULL:
-             vkd3d_unreachable();
-         case HLSL_CLASS_VOID:
--            FIXME("Writing type class %u is not implemented.\n", type->class);
-+            FIXME("Writing type class %u is not implemented.\n", element_type->class);
-             set_status(fx, VKD3D_ERROR_NOT_IMPLEMENTED);
-             return 0;
-     }
-     /* Structures can only contain numeric fields, this is validated during variable declaration. */
--    total_size = stride = type->reg_size[HLSL_REGSET_NUMERIC] * sizeof(float);
-+    unpacked_size = type->reg_size[HLSL_REGSET_NUMERIC] * sizeof(float);
-     packed_size = 0;
--    if (type->class == HLSL_CLASS_STRUCT || hlsl_is_numeric_type(type))
--        packed_size = hlsl_type_component_count(type) * sizeof(float);
-+    if (is_numeric_fx_4_type(element_type))
-+        packed_size = hlsl_type_component_count(element_type) * sizeof(float);
-     if (elements_count)
--    {
--        total_size *= elements_count;
-         packed_size *= elements_count;
--    }
-+    stride = element_type->reg_size[HLSL_REGSET_NUMERIC] * sizeof(float);
-     stride = align(stride, 4 * sizeof(float));
-     put_u32_unaligned(buffer, elements_count);
--    put_u32_unaligned(buffer, total_size);
-+    put_u32_unaligned(buffer, unpacked_size);
-     put_u32_unaligned(buffer, stride);
-     put_u32_unaligned(buffer, packed_size);
--    if (type->class == HLSL_CLASS_STRUCT)
-+    if (element_type->class == HLSL_CLASS_STRUCT)
-     {
--        put_u32_unaligned(buffer, type->e.record.field_count);
--        for (i = 0; i < type->e.record.field_count; ++i)
-+        put_u32_unaligned(buffer, element_type->e.record.field_count);
-+        for (i = 0; i < element_type->e.record.field_count; ++i)
-         {
-             const struct field_offsets *field = &field_offsets[i];
-@@ -700,95 +807,96 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co
-             put_u32_unaligned(buffer, 0); /* Interface count */
-         }
-     }
--    else if (type->class == HLSL_CLASS_TEXTURE)
-+    else if (element_type->class == HLSL_CLASS_TEXTURE)
-     {
-         static const uint32_t texture_type[] =
-         {
--            [HLSL_SAMPLER_DIM_GENERIC]   = 9,
--            [HLSL_SAMPLER_DIM_1D]        = 10,
--            [HLSL_SAMPLER_DIM_1DARRAY]   = 11,
--            [HLSL_SAMPLER_DIM_2D]        = 12,
--            [HLSL_SAMPLER_DIM_2DARRAY]   = 13,
--            [HLSL_SAMPLER_DIM_2DMS]      = 14,
--            [HLSL_SAMPLER_DIM_2DMSARRAY] = 15,
--            [HLSL_SAMPLER_DIM_3D]        = 16,
--            [HLSL_SAMPLER_DIM_CUBE]      = 17,
--            [HLSL_SAMPLER_DIM_CUBEARRAY] = 23,
-+            [HLSL_SAMPLER_DIM_1D]        = FX_4_OBJECT_TYPE_TEXTURE_1D,
-+            [HLSL_SAMPLER_DIM_2D]        = FX_4_OBJECT_TYPE_TEXTURE_2D,
-+            [HLSL_SAMPLER_DIM_3D]        = FX_4_OBJECT_TYPE_TEXTURE_3D,
-         };
--        put_u32_unaligned(buffer, texture_type[type->sampler_dim]);
-+        put_u32_unaligned(buffer, texture_type[element_type->sampler_dim]);
-     }
--    else if (type->class == HLSL_CLASS_SAMPLER)
-+    else if (element_type->class == HLSL_CLASS_SAMPLER)
-     {
--        put_u32_unaligned(buffer, 21);
-+        put_u32_unaligned(buffer, FX_4_OBJECT_TYPE_SAMPLER_STATE);
-     }
--    else if (type->class == HLSL_CLASS_UAV)
-+    else if (element_type->class == HLSL_CLASS_UAV)
-     {
-         static const uint32_t uav_type[] =
-         {
--            [HLSL_SAMPLER_DIM_1D]                = 31,
--            [HLSL_SAMPLER_DIM_1DARRAY]           = 32,
--            [HLSL_SAMPLER_DIM_2D]                = 33,
--            [HLSL_SAMPLER_DIM_2DARRAY]           = 34,
--            [HLSL_SAMPLER_DIM_3D]                = 35,
--            [HLSL_SAMPLER_DIM_BUFFER]            = 36,
-+            [HLSL_SAMPLER_DIM_1D]                = FX_5_OBJECT_TYPE_UAV_1D,
-+            [HLSL_SAMPLER_DIM_1DARRAY]           = FX_5_OBJECT_TYPE_UAV_1DARRAY,
-+            [HLSL_SAMPLER_DIM_2D]                = FX_5_OBJECT_TYPE_UAV_2D,
-+            [HLSL_SAMPLER_DIM_2DARRAY]           = FX_5_OBJECT_TYPE_UAV_2DARRAY,
-+            [HLSL_SAMPLER_DIM_3D]                = FX_5_OBJECT_TYPE_UAV_3D,
-         };
--        put_u32_unaligned(buffer, uav_type[type->sampler_dim]);
-+        put_u32_unaligned(buffer, uav_type[element_type->sampler_dim]);
-     }
--    else if (type->class == HLSL_CLASS_DEPTH_STENCIL_VIEW)
-+    else if (element_type->class == HLSL_CLASS_DEPTH_STENCIL_VIEW)
-     {
--        put_u32_unaligned(buffer, 20);
-+        put_u32_unaligned(buffer, FX_4_OBJECT_TYPE_DSV);
-     }
--    else if (type->class == HLSL_CLASS_RENDER_TARGET_VIEW)
-+    else if (element_type->class == HLSL_CLASS_RENDER_TARGET_VIEW)
-     {
--        put_u32_unaligned(buffer, 19);
-+        put_u32_unaligned(buffer, FX_4_OBJECT_TYPE_RTV);
-     }
--    else if (type->class == HLSL_CLASS_PIXEL_SHADER)
-+    else if (element_type->class == HLSL_CLASS_PIXEL_SHADER)
-     {
--        put_u32_unaligned(buffer, 5);
-+        put_u32_unaligned(buffer, FX_4_OBJECT_TYPE_PIXEL_SHADER);
-     }
--    else if (type->class == HLSL_CLASS_VERTEX_SHADER)
-+    else if (element_type->class == HLSL_CLASS_VERTEX_SHADER)
-     {
--        put_u32_unaligned(buffer, 6);
-+        put_u32_unaligned(buffer, FX_4_OBJECT_TYPE_VERTEX_SHADER);
-     }
--    else if (type->class == HLSL_CLASS_RASTERIZER_STATE)
-+    else if (element_type->class == HLSL_CLASS_RASTERIZER_STATE)
-     {
--        put_u32_unaligned(buffer, 4);
-+        put_u32_unaligned(buffer, FX_4_OBJECT_TYPE_RASTERIZER_STATE);
-     }
--    else if (type->class == HLSL_CLASS_DEPTH_STENCIL_STATE)
-+    else if (element_type->class == HLSL_CLASS_DEPTH_STENCIL_STATE)
-     {
--        put_u32_unaligned(buffer, 3);
-+        put_u32_unaligned(buffer, FX_4_OBJECT_TYPE_DEPTH_STENCIL_STATE);
-     }
--    else if (type->class == HLSL_CLASS_BLEND_STATE)
-+    else if (element_type->class == HLSL_CLASS_BLEND_STATE)
-     {
--        put_u32_unaligned(buffer, 2);
-+        put_u32_unaligned(buffer, FX_4_OBJECT_TYPE_BLEND_STATE);
-     }
--    else if (type->class == HLSL_CLASS_STRING)
-+    else if (element_type->class == HLSL_CLASS_STRING)
-     {
--        put_u32_unaligned(buffer, 1);
-+        put_u32_unaligned(buffer, FX_4_OBJECT_TYPE_STRING);
-     }
--    else if (hlsl_is_numeric_type(type))
-+    else if (hlsl_is_numeric_type(element_type))
-     {
--        numeric_desc = get_fx_4_numeric_type_description(type, fx);
-+        numeric_desc = get_fx_4_numeric_type_description(element_type, fx);
-         put_u32_unaligned(buffer, numeric_desc);
-     }
--    else if (type->class == HLSL_CLASS_COMPUTE_SHADER)
-+    else if (element_type->class == HLSL_CLASS_COMPUTE_SHADER)
-     {
--        put_u32_unaligned(buffer, 28);
-+        put_u32_unaligned(buffer, FX_5_OBJECT_TYPE_COMPUTE_SHADER);
-     }
--    else if (type->class == HLSL_CLASS_HULL_SHADER)
-+    else if (element_type->class == HLSL_CLASS_HULL_SHADER)
-     {
--        put_u32_unaligned(buffer, 29);
-+        put_u32_unaligned(buffer, FX_5_OBJECT_TYPE_HULL_SHADER);
-     }
--    else if (type->class == HLSL_CLASS_DOMAIN_SHADER)
-+    else if (element_type->class == HLSL_CLASS_DOMAIN_SHADER)
-     {
--        put_u32_unaligned(buffer, 30);
-+        put_u32_unaligned(buffer, FX_5_OBJECT_TYPE_DOMAIN_SHADER);
-     }
-     else
-     {
--        FIXME("Type %u is not supported.\n", type->class);
-+        FIXME("Type %u is not supported.\n", element_type->class);
-         set_status(fx, VKD3D_ERROR_NOT_IMPLEMENTED);
-     }
-@@ -963,16 +1071,16 @@ static uint32_t write_fx_2_parameter(const struct hlsl_type *type, const char *n
- static void write_fx_2_technique(struct hlsl_ir_var *var, struct fx_write_context *fx)
- {
-+    uint32_t name_offset, pass_count_offset, annotation_count_offset, count = 0;
-     struct vkd3d_bytecode_buffer *buffer = &fx->structured;
--    uint32_t name_offset, count_offset, count = 0;
-     struct hlsl_ir_var *pass;
-     name_offset = write_string(var->name, fx);
-     put_u32(buffer, name_offset);
--    put_u32(buffer, 0); /* Annotation count. */
--    count_offset = put_u32(buffer, 0); /* Pass count. */
-+    annotation_count_offset = put_u32(buffer, 0);
-+    pass_count_offset = put_u32(buffer, 0);
--    /* FIXME: annotations */
-+    write_fx_2_annotations(var, annotation_count_offset, fx);
-     LIST_FOR_EACH_ENTRY(pass, &var->scope->vars, struct hlsl_ir_var, scope_entry)
-     {
-@@ -980,47 +1088,128 @@ static void write_fx_2_technique(struct hlsl_ir_var *var, struct fx_write_contex
-         ++count;
-     }
--    set_u32(buffer, count_offset, count);
-+    set_u32(buffer, pass_count_offset, count);
- }
--static uint32_t get_fx_2_type_size(const struct hlsl_type *type)
-+static uint32_t write_fx_2_default_value(struct hlsl_type *value_type, struct hlsl_default_value *value,
-+            struct fx_write_context *fx)
- {
--    uint32_t size = 0, elements_count;
--    size_t i;
-+    const struct hlsl_type *type = hlsl_get_multiarray_element_type(value_type);
-+    uint32_t elements_count = hlsl_get_multiarray_size(value_type), i, j;
-+    struct vkd3d_bytecode_buffer *buffer = &fx->unstructured;
-+    struct hlsl_ctx *ctx = fx->ctx;
-+    uint32_t offset = buffer->size;
-+    unsigned int comp_count;
--    if (type->class == HLSL_CLASS_ARRAY)
-+    if (!value)
-+        return 0;
-+    comp_count = hlsl_type_component_count(type);
-+    for (i = 0; i < elements_count; ++i)
-     {
--        elements_count = hlsl_get_multiarray_size(type);
--        type = hlsl_get_multiarray_element_type(type);
--        return get_fx_2_type_size(type) * elements_count;
-+        switch (type->class)
-+        {
-+            case HLSL_CLASS_SCALAR:
-+            case HLSL_CLASS_VECTOR:
-+            case HLSL_CLASS_MATRIX:
-+            {
-+                switch (type->e.numeric.type)
-+                {
-+                    case HLSL_TYPE_FLOAT:
-+                    case HLSL_TYPE_HALF:
-+                    case HLSL_TYPE_INT:
-+                    case HLSL_TYPE_UINT:
-+                    case HLSL_TYPE_BOOL:
-+                        for (j = 0; j < comp_count; ++j)
-+                        {
-+                            put_u32(buffer, value->number.u);
-+                            value++;
-+                        }
-+                        break;
-+                    default:
-+                        hlsl_fixme(ctx, &ctx->location, "Writing default values for numeric type %u is not implemented.",
-+                                type->e.numeric.type);
-+                }
-+                break;
-+            }
-+            case HLSL_CLASS_STRUCT:
-+            {
-+                struct hlsl_struct_field *fields = type->e.record.fields;
-+                for (j = 0; j < type->e.record.field_count; ++j)
-+                {
-+                    write_fx_2_default_value(fields[i].type, value, fx);
-+                    value += hlsl_type_component_count(fields[i].type);
-+                }
-+                break;
-+            }
-+            default:
-+                hlsl_fixme(ctx, &ctx->location, "Writing default values for class %u is not implemented.", type->class);
-+        }
-     }
--    else if (type->class == HLSL_CLASS_STRUCT)
-+    return offset;
-+static uint32_t write_fx_2_object_initializer(const struct hlsl_ir_var *var, struct fx_write_context *fx)
-+    const struct hlsl_type *type = hlsl_get_multiarray_element_type(var->data_type);
-+    unsigned int i, elements_count = hlsl_get_multiarray_size(var->data_type);
-+    struct vkd3d_bytecode_buffer *buffer = &fx->objects;
-+    uint32_t offset = fx->unstructured.size, id, size;
-+    struct hlsl_ctx *ctx = fx->ctx;
-+    const void *data;
-+    for (i = 0; i < elements_count; ++i)
-     {
--        for (i = 0; i < type->e.record.field_count; ++i)
-+        if (type->class == HLSL_CLASS_SAMPLER)
-         {
--            const struct hlsl_struct_field *field = &type->e.record.fields[i];
--            size += get_fx_2_type_size(field->type);
-+            hlsl_fixme(ctx, &var->loc, "Writing fx_2_0 sampler objects initializers is not implemented.");
-         }
-+        else
-+        {
-+            switch (type->class)
-+            {
-+                case HLSL_CLASS_STRING:
-+                {
-+                    const char *string = var->default_values[i].string ? var->default_values[i].string : "";
-+                    size = strlen(string) + 1;
-+                    data = string;
-+                    break;
-+                }
-+                case HLSL_CLASS_TEXTURE:
-+                    size = 0;
-+                    break;
-+                case HLSL_CLASS_PIXEL_SHADER:
-+                case HLSL_CLASS_VERTEX_SHADER:
-+                    size = 0;
-+                    hlsl_fixme(ctx, &var->loc, "Writing fx_2_0 shader objects initializers is not implemented.");
-+                    break;
-+                default:
-+                    vkd3d_unreachable();
-+            }
-+            id = fx->object_variable_count++;
--        return size;
-+            put_u32(&fx->unstructured, id);
-+            put_u32(buffer, id);
-+            put_u32(buffer, size);
-+            if (size)
-+                bytecode_put_bytes(buffer, data, size);
-+        }
-     }
--    return type->dimx * type->dimy * sizeof(float);
-+    return offset;
- }
- static uint32_t write_fx_2_initial_value(const struct hlsl_ir_var *var, struct fx_write_context *fx)
- {
--    struct vkd3d_bytecode_buffer *buffer = &fx->unstructured;
--    const struct hlsl_type *type = var->data_type;
--    uint32_t offset, size, elements_count = 1;
--    size = get_fx_2_type_size(type);
--    if (type->class == HLSL_CLASS_ARRAY)
--    {
--        elements_count = hlsl_get_multiarray_size(type);
--        type = hlsl_get_multiarray_element_type(type);
--    }
-+    const struct hlsl_type *type = hlsl_get_multiarray_element_type(var->data_type);
-+    struct hlsl_ctx *ctx = fx->ctx;
-+    uint32_t offset;
-     /* Note that struct fields must all be numeric;
-      * this was validated in check_invalid_object_fields(). */
-@@ -1030,21 +1219,20 @@ static uint32_t write_fx_2_initial_value(const struct hlsl_ir_var *var, struct f
-         case HLSL_CLASS_VECTOR:
-         case HLSL_CLASS_MATRIX:
-         case HLSL_CLASS_STRUCT:
--            /* FIXME: write actual initial value */
--            if (var->default_values)
--                hlsl_fixme(fx->ctx, &var->loc, "Write default values.\n");
--            offset = put_u32(buffer, 0);
-+            offset = write_fx_2_default_value(var->data_type, var->default_values, fx);
-+            break;
--            for (uint32_t i = 1; i < size / sizeof(uint32_t); ++i)
--                put_u32(buffer, 0);
-+        case HLSL_CLASS_SAMPLER:
-+        case HLSL_CLASS_TEXTURE:
-+        case HLSL_CLASS_STRING:
-+            offset = write_fx_2_object_initializer(var, fx);
-             break;
-         default:
--            /* Objects are given sequential ids. */
--            offset = put_u32(buffer, fx->object_variable_count++);
--            for (uint32_t i = 1; i < elements_count; ++i)
--                put_u32(buffer, fx->object_variable_count++);
-+            offset = 0;
-+            hlsl_fixme(ctx, &var->loc, "Writing initializer not implemented for parameter class %#x.", type->class);
-             break;
-     }
-@@ -1070,6 +1258,7 @@ static bool is_type_supported_fx_2(struct hlsl_ctx *ctx, const struct hlsl_type
-             return is_type_supported_fx_2(ctx, type->e.array.type, loc);
-         case HLSL_CLASS_TEXTURE:
-+        case HLSL_CLASS_SAMPLER:
-             switch (type->sampler_dim)
-             {
-                 case HLSL_SAMPLER_DIM_1D:
-@@ -1083,9 +1272,10 @@ static bool is_type_supported_fx_2(struct hlsl_ctx *ctx, const struct hlsl_type
-             }
-             break;
--        case HLSL_CLASS_SAMPLER:
-         case HLSL_CLASS_STRING:
-+            return true;
-             hlsl_fixme(ctx, loc, "Write fx 2.0 parameter class %#x.", type->class);
-             return false;
-@@ -1104,10 +1294,12 @@ static bool is_type_supported_fx_2(struct hlsl_ctx *ctx, const struct hlsl_type
-             return false;
-+        case HLSL_CLASS_ERROR:
-         case HLSL_CLASS_PASS:
-         case HLSL_CLASS_TECHNIQUE:
-         case HLSL_CLASS_NULL:
-             /* This cannot appear as an extern variable. */
-             break;
-     }
-@@ -1117,8 +1309,8 @@ static bool is_type_supported_fx_2(struct hlsl_ctx *ctx, const struct hlsl_type
- static void write_fx_2_parameters(struct fx_write_context *fx)
- {
-+    uint32_t desc_offset, value_offset, flags, annotation_count_offset;
-     struct vkd3d_bytecode_buffer *buffer = &fx->structured;
--    uint32_t desc_offset, value_offset, flags;
-     struct hlsl_ctx *ctx = fx->ctx;
-     struct hlsl_ir_var *var;
-     enum fx_2_parameter_flags
-@@ -1138,23 +1330,35 @@ static void write_fx_2_parameters(struct fx_write_context *fx)
-         if (var->storage_modifiers & HLSL_STORAGE_SHARED)
-             flags |= IS_SHARED;
--        put_u32(buffer, desc_offset); /* Parameter description */
--        put_u32(buffer, value_offset); /* Value */
--        put_u32(buffer, flags); /* Flags */
-+        put_u32(buffer, desc_offset);
-+        put_u32(buffer, value_offset);
-+        put_u32(buffer, flags);
--        put_u32(buffer, 0); /* Annotations count */
--        if (has_annotations(var))
--            hlsl_fixme(ctx, &ctx->location, "Writing annotations for parameters is not implemented.");
-+        annotation_count_offset = put_u32(buffer, 0);
-+        write_fx_2_annotations(var, annotation_count_offset, fx);
-         ++fx->parameter_count;
-     }
- }
-+static void write_fx_2_annotation(struct hlsl_ir_var *var, struct fx_write_context *fx)
-+    struct vkd3d_bytecode_buffer *buffer = &fx->structured;
-+    uint32_t desc_offset, value_offset;
-+    desc_offset = write_fx_2_parameter(var->data_type, var->name, &var->semantic, fx);
-+    value_offset = write_fx_2_initial_value(var, fx);
-+    put_u32(buffer, desc_offset);
-+    put_u32(buffer, value_offset);
- static const struct fx_write_context_ops fx_2_ops =
- {
-     .write_string = write_fx_2_string,
-     .write_technique = write_fx_2_technique,
-     .write_pass = write_fx_2_pass,
-+    .write_annotation = write_fx_2_annotation,
- };
- static int hlsl_fx_2_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out)
-@@ -1180,19 +1384,18 @@ static int hlsl_fx_2_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out)
-     object_count = put_u32(structured, 0);
-     write_fx_2_parameters(&fx);
--    set_u32(structured, parameter_count, fx.parameter_count);
--    set_u32(structured, object_count, fx.object_variable_count);
-     write_techniques(ctx->globals, &fx);
--    set_u32(structured, technique_count, fx.technique_count);
--    set_u32(structured, shader_count, fx.shader_count);
--    put_u32(structured, 0); /* String count */
-+    put_u32(structured, fx.object_variable_count - 1);
-     put_u32(structured, 0); /* Resource count */
--    /* TODO: strings */
-+    bytecode_put_bytes(structured, fx.objects.data, fx.objects.size);
-     /* TODO: resources */
-+    set_u32(structured, parameter_count, fx.parameter_count);
-+    set_u32(structured, object_count, fx.object_variable_count);
-+    set_u32(structured, technique_count, fx.technique_count);
-+    set_u32(structured, shader_count, fx.shader_count);
-     size = align(fx.unstructured.size, 4);
-     set_u32(&buffer, offset, size);
-@@ -1201,6 +1404,7 @@ static int hlsl_fx_2_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out)
-     vkd3d_free(fx.unstructured.data);
-     vkd3d_free(fx.structured.data);
-+    vkd3d_free(fx.objects.data);
-     if (!fx.technique_count)
-         hlsl_error(ctx, &ctx->location, VKD3D_SHADER_ERROR_HLSL_MISSING_TECHNIQUE, "No techniques found.");
-@@ -1252,6 +1456,7 @@ static uint32_t write_fx_4_default_value(struct hlsl_type *value_type, struct hl
-                 switch (type->e.numeric.type)
-                 {
-                     case HLSL_TYPE_FLOAT:
-+                    case HLSL_TYPE_HALF:
-                     case HLSL_TYPE_INT:
-                     case HLSL_TYPE_UINT:
-                     case HLSL_TYPE_BOOL:
-@@ -1412,20 +1617,17 @@ static uint32_t write_fx_4_state_numeric_value(struct hlsl_ir_constant *value, s
-     for (i = 0; i < count; ++i)
-     {
--        if (hlsl_is_numeric_type(data_type))
-+        switch (data_type->e.numeric.type)
-         {
--            switch (data_type->e.numeric.type)
--            {
--                case HLSL_TYPE_FLOAT:
--                case HLSL_TYPE_INT:
--                case HLSL_TYPE_UINT:
--                case HLSL_TYPE_BOOL:
--                    type = fx_4_numeric_base_type[data_type->e.numeric.type];
--                    break;
--                default:
--                    type = 0;
--                    hlsl_fixme(ctx, &ctx->location, "Unsupported numeric state value type %u.", data_type->e.numeric.type);
--            }
-+            case HLSL_TYPE_FLOAT:
-+            case HLSL_TYPE_INT:
-+            case HLSL_TYPE_UINT:
-+            case HLSL_TYPE_BOOL:
-+                type = fx_4_numeric_base_types[data_type->e.numeric.type];
-+                break;
-+            default:
-+                type = 0;
-+                hlsl_fixme(ctx, &ctx->location, "Unsupported numeric state value type %u.", data_type->e.numeric.type);
-         }
-         put_u32_unaligned(buffer, type);
-@@ -1438,11 +1640,14 @@ static uint32_t write_fx_4_state_numeric_value(struct hlsl_ir_constant *value, s
- static void write_fx_4_state_assignment(const struct hlsl_ir_var *var, struct hlsl_state_block_entry *entry,
-         struct fx_write_context *fx)
- {
--    uint32_t value_offset = 0, assignment_type = 0, rhs_offset;
--    uint32_t type_offset;
-+    uint32_t value_offset = 0, assignment_type = 0, rhs_offset, type_offset, offset;
-+    struct vkd3d_bytecode_buffer *unstructured = &fx->unstructured;
-     struct vkd3d_bytecode_buffer *buffer = &fx->structured;
--    struct hlsl_ctx *ctx = fx->ctx;
-     struct hlsl_ir_node *value = entry->args->node;
-+    struct hlsl_ctx *ctx = fx->ctx;
-+    struct hlsl_ir_var *index_var;
-+    struct hlsl_ir_constant *c;
-+    struct hlsl_ir_load *load;
-     put_u32(buffer, entry->name_id);
-     put_u32(buffer, entry->lhs_index);
-@@ -1453,21 +1658,77 @@ static void write_fx_4_state_assignment(const struct hlsl_ir_var *var, struct hl
-     {
-         case HLSL_IR_CONSTANT:
-         {
--            struct hlsl_ir_constant *c = hlsl_ir_constant(value);
-+            c = hlsl_ir_constant(value);
-             value_offset = write_fx_4_state_numeric_value(c, fx);
--            assignment_type = 1;
-+            assignment_type = FX_4_ASSIGNMENT_CONSTANT;
-             break;
-         }
-         case HLSL_IR_LOAD:
-         {
--            struct hlsl_ir_load *l = hlsl_ir_load(value);
-+            load = hlsl_ir_load(value);
--            if (l->src.path_len)
-+            if (load->src.path_len)
-                 hlsl_fixme(ctx, &var->loc, "Indexed access in RHS values is not implemented.");
--            value_offset = write_fx_4_string(l->src.var->name, fx);
--            assignment_type = 2;
-+            value_offset = write_fx_4_string(load->src.var->name, fx);
-+            assignment_type = FX_4_ASSIGNMENT_VARIABLE;
-+            break;
-+        }
-+        case HLSL_IR_INDEX:
-+        {
-+            struct hlsl_ir_index *index = hlsl_ir_index(value);
-+            struct hlsl_ir_node *val = index->val.node;
-+            struct hlsl_ir_node *idx = index->idx.node;
-+            struct hlsl_type *type;
-+            if (val->type != HLSL_IR_LOAD)
-+            {
-+                hlsl_fixme(ctx, &var->loc, "Unexpected indexed RHS value type.");
-+                break;
-+            }
-+            load = hlsl_ir_load(val);
-+            value_offset = write_fx_4_string(load->src.var->name, fx);
-+            type = load->src.var->data_type;
-+            switch (idx->type)
-+            {
-+                case HLSL_IR_CONSTANT:
-+                {
-+                    c = hlsl_ir_constant(idx);
-+                    value_offset = put_u32(unstructured, value_offset);
-+                    put_u32(unstructured, c->value.u[0].u);
-+                    assignment_type = FX_4_ASSIGNMENT_ARRAY_CONSTANT_INDEX;
-+                    if (c->value.u[0].u >= type->e.array.elements_count)
-+                        hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_OFFSET_OUT_OF_BOUNDS,
-+                                "Array index %u exceeds array size %u.", c->value.u[0].u, type->e.array.elements_count);
-+                    break;
-+                }
-+                case HLSL_IR_LOAD:
-+                {
-+                    load = hlsl_ir_load(idx);
-+                    index_var = load->src.var;
-+                    /* Special case for uint index variables, for anything more complex use an expression. */
-+                    if (hlsl_types_are_equal(index_var->data_type, hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT))
-+                            && !load->src.path_len)
-+                    {
-+                        offset = write_fx_4_string(index_var->name, fx);
-+                        value_offset = put_u32(unstructured, value_offset);
-+                        put_u32(unstructured, offset);
-+                        assignment_type = FX_4_ASSIGNMENT_ARRAY_VARIABLE_INDEX;
-+                        break;
-+                    }
-+                }
-+                /* fall through */
-+                default:
-+                    hlsl_fixme(ctx, &var->loc, "Complex array index expressions in RHS values are not implemented.");
-+            }
-             break;
-         }
-         default:
-@@ -1575,6 +1836,7 @@ enum state_property_component_type
-     FX_BLEND,
- };
- static inline bool is_object_fx_type(enum state_property_component_type type)
-@@ -1645,230 +1907,227 @@ static inline enum hlsl_base_type hlsl_type_from_fx_type(enum state_property_com
-      }
- }
--static void resolve_fx_4_state_block_values(struct hlsl_ir_var *var, struct hlsl_state_block_entry *entry,
--        struct fx_write_context *fx)
--    static const struct rhs_named_value filter_values[] =
--    {
--        { "MIN_MAG_MIP_POINT", 0x00 },
--        { "MIN_MAG_POINT_MIP_LINEAR", 0x01 },
--        { "MIN_POINT_MAG_LINEAR_MIP_POINT", 0x04 },
--        { "MIN_POINT_MAG_MIP_LINEAR", 0x05 },
--        { "MIN_LINEAR_MAG_MIP_POINT", 0x10 },
--        { "MIN_LINEAR_MAG_POINT_MIP_LINEAR", 0x11 },
--        { "MIN_MAG_LINEAR_MIP_POINT", 0x14 },
--        { "MIN_MAG_MIP_LINEAR", 0x15 },
--        { "ANISOTROPIC", 0x55 },
--        { "COMPARISON_MIN_MAG_MIP_POINT", 0x80 },
--        { "COMPARISON_MIN_MAG_MIP_LINEAR", 0x95 },
--        { "COMPARISON_ANISOTROPIC", 0xd5 },
--        { NULL },
--    };
--    static const struct rhs_named_value address_values[] =
--    {
--        { "WRAP", 1 },
--        { "MIRROR", 2 },
--        { "CLAMP", 3 },
--        { "BORDER", 4 },
--        { "MIRROR_ONCE", 5 },
--        { NULL },
--    };
-+static const struct rhs_named_value filter_values[] =
-+    { "MIN_MAG_MIP_POINT", 0x00 },
-+    { "MIN_MAG_POINT_MIP_LINEAR", 0x01 },
-+    { "MIN_POINT_MAG_MIP_LINEAR", 0x05 },
-+    { "MIN_LINEAR_MAG_MIP_POINT", 0x10 },
-+    { "MIN_MAG_LINEAR_MIP_POINT", 0x14 },
-+    { "MIN_MAG_MIP_LINEAR", 0x15 },
-+    { "ANISOTROPIC", 0x55 },
-+    { NULL },
--    static const struct rhs_named_value compare_func_values[] =
--    {
--        { "NEVER",         1 },
--        { "LESS",          2 },
--        { "EQUAL",         3 },
--        { "LESS_EQUAL",    4 },
--        { "GREATER",       5 },
--        { "NOT_EQUAL",     6 },
--        { "GREATER_EQUAL", 7 },
--        { "ALWAYS",        8 },
--        { NULL }
--    };
-+static const struct rhs_named_value address_values[] =
-+    { "WRAP", 1 },
-+    { "MIRROR", 2 },
-+    { "CLAMP", 3 },
-+    { "BORDER", 4 },
-+    { "MIRROR_ONCE", 5 },
-+    { NULL },
--    static const struct rhs_named_value depth_write_mask_values[] =
--    {
--        { "ZERO", 0 },
--        { "ALL",  1 },
--        { NULL }
--    };
-+static const struct rhs_named_value compare_func_values[] =
-+    { "NEVER",         1 },
-+    { "LESS",          2 },
-+    { "EQUAL",         3 },
-+    { "LESS_EQUAL",    4 },
-+    { "GREATER",       5 },
-+    { "NOT_EQUAL",     6 },
-+    { "GREATER_EQUAL", 7 },
-+    { "ALWAYS",        8 },
-+    { NULL }
--    static const struct rhs_named_value comparison_values[] =
--    {
--        { "NEVER", 1 },
--        { "LESS",  2 },
--        { "EQUAL", 3 },
--        { "LESS_EQUAL", 4 },
--        { "GREATER", 5 },
--        { "NOT_EQUAL", 6 },
--        { "GREATER_EQUAL", 7 },
--        { "ALWAYS", 8 },
--        { NULL }
--    };
-+static const struct rhs_named_value depth_write_mask_values[] =
-+    { "ZERO", 0 },
-+    { "ALL",  1 },
-+    { NULL }
--    static const struct rhs_named_value stencil_op_values[] =
--    {
--        { "KEEP", 1 },
--        { "ZERO", 2 },
--        { "REPLACE", 3 },
--        { "INCR_SAT", 4 },
--        { "DECR_SAT", 5 },
--        { "INVERT", 6 },
--        { "INCR", 7 },
--        { "DECR", 8 },
--        { NULL }
--    };
-+static const struct rhs_named_value comparison_values[] =
-+    { "NEVER", 1 },
-+    { "LESS",  2 },
-+    { "EQUAL", 3 },
-+    { "LESS_EQUAL", 4 },
-+    { "GREATER", 5 },
-+    { "NOT_EQUAL", 6 },
-+    { "GREATER_EQUAL", 7 },
-+    { "ALWAYS", 8 },
-+    { NULL }
--    static const struct rhs_named_value fill_values[] =
--    {
--        { "WIREFRAME", 2 },
--        { "SOLID", 3 },
--        { NULL }
--    };
-+static const struct rhs_named_value stencil_op_values[] =
-+    { "KEEP", 1 },
-+    { "ZERO", 2 },
-+    { "REPLACE", 3 },
-+    { "INCR_SAT", 4 },
-+    { "DECR_SAT", 5 },
-+    { "INVERT", 6 },
-+    { "INCR", 7 },
-+    { "DECR", 8 },
-+    { NULL }
--    static const struct rhs_named_value cull_values[] =
--    {
--        { "NONE", 1 },
--        { "FRONT", 2 },
--        { "BACK", 3 },
--        { NULL }
--    };
-+static const struct rhs_named_value fill_values[] =
-+    { "WIREFRAME", 2 },
-+    { "SOLID", 3 },
-+    { NULL }
--    static const struct rhs_named_value blend_values[] =
--    {
--        { "ZERO", 1 },
--        { "ONE", 2 },
--        { "SRC_COLOR", 3 },
--        { "INV_SRC_COLOR", 4 },
--        { "SRC_ALPHA", 5 },
--        { "INV_SRC_ALPHA", 6 },
--        { "DEST_ALPHA", 7 },
--        { "INV_DEST_ALPHA", 8 },
--        { "DEST_COLOR", 9 },
--        { "INV_DEST_COLOR", 10 },
--        { "SRC_ALPHA_SAT", 11 },
--        { "BLEND_FACTOR", 14 },
--        { "INV_BLEND_FACTOR", 15 },
--        { "SRC1_COLOR", 16 },
--        { "INV_SRC1_COLOR", 17 },
--        { "SRC1_ALPHA", 18 },
--        { "INV_SRC1_ALPHA", 19 },
--        { NULL }
--    };
-+static const struct rhs_named_value cull_values[] =
-+    { "NONE", 1 },
-+    { "FRONT", 2 },
-+    { "BACK", 3 },
-+    { NULL }
--    static const struct rhs_named_value blendop_values[] =
--    {
--        { "ADD", 1 },
--        { "SUBTRACT", 2 },
--        { "REV_SUBTRACT", 3 },
--        { "MIN", 4 },
--        { "MAX", 5 },
--        { NULL }
--    };
-+static const struct rhs_named_value blend_values[] =
-+    { "ZERO", 1 },
-+    { "ONE", 2 },
-+    { "SRC_COLOR", 3 },
-+    { "INV_SRC_COLOR", 4 },
-+    { "SRC_ALPHA", 5 },
-+    { "INV_SRC_ALPHA", 6 },
-+    { "DEST_ALPHA", 7 },
-+    { "INV_DEST_ALPHA", 8 },
-+    { "DEST_COLOR", 9 },
-+    { "INV_DEST_COLOR", 10 },
-+    { "SRC_ALPHA_SAT", 11 },
-+    { "BLEND_FACTOR", 14 },
-+    { "INV_BLEND_FACTOR", 15 },
-+    { "SRC1_COLOR", 16 },
-+    { "INV_SRC1_COLOR", 17 },
-+    { "SRC1_ALPHA", 18 },
-+    { "INV_SRC1_ALPHA", 19 },
-+    { NULL }
--    static const struct rhs_named_value bool_values[] =
--    {
--        { "FALSE", 0 },
--        { "TRUE", 1 },
--        { NULL }
--    };
-+static const struct rhs_named_value blendop_values[] =
-+    { "ADD", 1 },
-+    { "SUBTRACT", 2 },
-+    { "REV_SUBTRACT", 3 },
-+    { "MIN", 4 },
-+    { "MAX", 5 },
-+    { NULL }
--    static const struct rhs_named_value null_values[] =
--    {
--        { "NULL", 0 },
--        { NULL }
--    };
-+static const struct rhs_named_value bool_values[] =
-+    { "FALSE", 0 },
-+    { "TRUE", 1 },
-+    { NULL }
--    static const struct state
--    {
--        const char *name;
--        enum hlsl_type_class container;
--        enum hlsl_type_class class;
--        enum state_property_component_type type;
--        unsigned int dimx;
--        unsigned int array_size;
--        uint32_t id;
--        const struct rhs_named_value *values;
--    }
--    states[] =
--    {
--        { "RasterizerState",       HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_RASTERIZER,       1, 1, 0 },
--        { "DepthStencilState",     HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_DEPTHSTENCIL,     1, 1, 1 },
--        { "BlendState",            HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_BLEND,            1, 1, 2 },
--        { "RenderTargetView",      HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_RENDERTARGETVIEW, 1, 8, 3 },
--        { "DepthStencilView",      HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_DEPTHSTENCILVIEW, 1, 1, 4 },
--        { "VertexShader",          HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_VERTEXSHADER,     1, 1, 6 },
--        { "PixelShader",           HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_PIXELSHADER,      1, 1, 7 },
--        { "DS_StencilRef",         HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 9 },
--        { "AB_BlendFactor",        HLSL_CLASS_PASS, HLSL_CLASS_VECTOR, FX_FLOAT, 4, 1, 10 },
--        { "AB_SampleMask",         HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 11 },
--        { "FillMode",              HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 12, fill_values },
--        { "CullMode",              HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 13, cull_values },
--        { "FrontCounterClockwise", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_BOOL,  1, 1, 14, bool_values },
--        { "DepthBias",             HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 15 },
--        { "DepthBiasClamp",        HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 16 },
--        { "SlopeScaledDepthBias",  HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 17 },
--        { "DepthClipEnable",       HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_BOOL,  1, 1, 18, bool_values },
--        { "ScissorEnable",         HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_BOOL,  1, 1, 19, bool_values },
--        { "MultisampleEnable",     HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_BOOL,  1, 1, 20, bool_values },
--        { "AntializedLineEnable",  HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_BOOL,  1, 1, 21, bool_values },
--        { "DepthEnable",               HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_BOOL,  1, 1, 22, bool_values },
--        { "DepthWriteMask",            HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 23, depth_write_mask_values },
--        { "DepthFunc",                 HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 24, comparison_values },
--        { "StencilEnable",             HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_BOOL,  1, 1, 25, bool_values },
--        { "StencilReadMask",           HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT8, 1, 1, 26 },
--        { "StencilWriteMask",          HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT8, 1, 1, 27 },
--        { "FrontFaceStencilFail",      HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 28, stencil_op_values },
--        { "FrontFaceStencilDepthFail", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 29, stencil_op_values },
--        { "FrontFaceStencilPass",      HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 30, stencil_op_values },
--        { "FrontFaceStencilFunc",      HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 31, comparison_values },
--        { "BackFaceStencilFail",       HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 32, stencil_op_values },
--        { "BackFaceStencilDepthFail",  HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 33, stencil_op_values },
--        { "BackFaceStencilPass",       HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 34, stencil_op_values },
--        { "BackFaceStencilFunc",       HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 35, comparison_values },
--        { "Filter",         HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR,  FX_UINT,    1, 1, 45, filter_values },
--        { "AddressU",       HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR,  FX_UINT,    1, 1, 46, address_values },
--        { "AddressV",       HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR,  FX_UINT,    1, 1, 47, address_values },
--        { "AddressW",       HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR,  FX_UINT,    1, 1, 48, address_values },
--        { "MipLODBias",     HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR,  FX_FLOAT,   1, 1, 49 },
--        { "MaxAnisotropy",  HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR,  FX_UINT,    1, 1, 50 },
--        { "ComparisonFunc", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR,  FX_UINT,    1, 1, 51, compare_func_values },
--        { "BorderColor",    HLSL_CLASS_SAMPLER, HLSL_CLASS_VECTOR,  FX_FLOAT,   4, 1, 52 },
--        { "MinLOD",         HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR,  FX_FLOAT,   1, 1, 53 },
--        { "MaxLOD",         HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR,  FX_FLOAT,   1, 1, 54 },
--        { "Texture",        HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR,  FX_TEXTURE, 1, 1, 55, null_values },
--        { "HullShader",     HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_HULLSHADER,    1, 1, 56 },
--        { "DomainShader",   HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_DOMAINSHADER,  1, 1, 57 },
--        { "ComputeShader",  HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_COMPUTESHADER, 1, 1, 58 },
--    };
-+static const struct rhs_named_value null_values[] =
-+    { "NULL", 0 },
-+    { NULL }
--    static const struct state fx_4_blend_states[] =
--    {
--        { "AlphaToCoverageEnable", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_BOOL,  1, 1, 36, bool_values },
--        { "BlendEnable",           HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_BOOL,  1, 8, 37, bool_values },
--        { "SrcBlend",              HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 38, blend_values },
--        { "DestBlend",             HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 39, blend_values },
--        { "BlendOp",               HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 40, blendop_values },
--        { "SrcBlendAlpha",         HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 41, blend_values },
--        { "DestBlendAlpha",        HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 42, blend_values },
--        { "BlendOpAlpha",          HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 43, blendop_values },
--        { "RenderTargetWriteMask", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_UINT8, 1, 8, 44 },
--    };
-+static const struct fx_4_state
-+    const char *name;
-+    enum hlsl_type_class container;
-+    enum hlsl_type_class class;
-+    enum state_property_component_type type;
-+    unsigned int dimx;
-+    unsigned int array_size;
-+    int id;
-+    const struct rhs_named_value *values;
-+fx_4_states[] =
-+    { "RasterizerState",       HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_RASTERIZER,       1, 1, 0 },
-+    { "DepthStencilState",     HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_DEPTHSTENCIL,     1, 1, 1 },
-+    { "BlendState",            HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_BLEND,            1, 1, 2 },
-+    { "RenderTargetView",      HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_RENDERTARGETVIEW, 1, 8, 3 },
-+    { "DepthStencilView",      HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_DEPTHSTENCILVIEW, 1, 1, 4 },
-+    { "VertexShader",          HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_VERTEXSHADER,     1, 1, 6 },
-+    { "PixelShader",           HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_PIXELSHADER,      1, 1, 7 },
-+    { "DS_StencilRef",         HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 9 },
-+    { "AB_BlendFactor",        HLSL_CLASS_PASS, HLSL_CLASS_VECTOR, FX_FLOAT, 4, 1, 10 },
-+    { "AB_SampleMask",         HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 11 },
-+    { "FillMode",              HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 12, fill_values },
-+    { "CullMode",              HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 13, cull_values },
-+    { "FrontCounterClockwise", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_BOOL,  1, 1, 14, bool_values },
-+    { "DepthBias",             HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 15 },
-+    { "DepthBiasClamp",        HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 16 },
-+    { "SlopeScaledDepthBias",  HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 17 },
-+    { "DepthClipEnable",       HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_BOOL,  1, 1, 18, bool_values },
-+    { "ScissorEnable",         HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_BOOL,  1, 1, 19, bool_values },
-+    { "MultisampleEnable",     HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_BOOL,  1, 1, 20, bool_values },
-+    { "AntializedLineEnable",  HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_BOOL,  1, 1, 21, bool_values },
-+    { "DepthEnable",               HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_BOOL,  1, 1, 22, bool_values },
-+    { "DepthWriteMask",            HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 23, depth_write_mask_values },
-+    { "DepthFunc",                 HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 24, comparison_values },
-+    { "StencilEnable",             HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_BOOL,  1, 1, 25, bool_values },
-+    { "StencilReadMask",           HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT8, 1, 1, 26 },
-+    { "StencilWriteMask",          HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT8, 1, 1, 27 },
-+    { "FrontFaceStencilFail",      HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 28, stencil_op_values },
-+    { "FrontFaceStencilDepthFail", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 29, stencil_op_values },
-+    { "FrontFaceStencilPass",      HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 30, stencil_op_values },
-+    { "FrontFaceStencilFunc",      HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 31, comparison_values },
-+    { "BackFaceStencilFail",       HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 32, stencil_op_values },
-+    { "BackFaceStencilDepthFail",  HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 33, stencil_op_values },
-+    { "BackFaceStencilPass",       HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 34, stencil_op_values },
-+    { "BackFaceStencilFunc",       HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 35, comparison_values },
-+    { "AlphaToCoverageEnable", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_BOOL,  1, 1, 36, bool_values },
-+    { "BlendEnable",           HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_BOOL,  1, 8, 37, bool_values },
-+    { "SrcBlend",              HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 38, blend_values },
-+    { "DestBlend",             HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 39, blend_values },
-+    { "BlendOp",               HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 40, blendop_values },
-+    { "SrcBlendAlpha",         HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 41, blend_values },
-+    { "DestBlendAlpha",        HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 42, blend_values },
-+    { "BlendOpAlpha",          HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 43, blendop_values },
-+    { "RenderTargetWriteMask", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_UINT8, 1, 8, 44 },
-+    { "Filter",         HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR,  FX_UINT,    1, 1, 45, filter_values },
-+    { "AddressU",       HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR,  FX_UINT,    1, 1, 46, address_values },
-+    { "AddressV",       HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR,  FX_UINT,    1, 1, 47, address_values },
-+    { "AddressW",       HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR,  FX_UINT,    1, 1, 48, address_values },
-+    { "MipLODBias",     HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR,  FX_FLOAT,   1, 1, 49 },
-+    { "MaxAnisotropy",  HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR,  FX_UINT,    1, 1, 50 },
-+    { "ComparisonFunc", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR,  FX_UINT,    1, 1, 51, compare_func_values },
-+    { "BorderColor",    HLSL_CLASS_SAMPLER, HLSL_CLASS_VECTOR,  FX_FLOAT,   4, 1, 52 },
-+    { "MinLOD",         HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR,  FX_FLOAT,   1, 1, 53 },
-+    { "MaxLOD",         HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR,  FX_FLOAT,   1, 1, 54 },
-+    { "Texture",        HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR,  FX_TEXTURE, 1, 1, 55, null_values },
-+    { "HullShader",     HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_HULLSHADER,    1, 1, 56 },
-+    { "DomainShader",   HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_DOMAINSHADER,  1, 1, 57 },
-+    { "ComputeShader",  HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_COMPUTESHADER, 1, 1, 58 },
--    static const struct state fx_5_blend_states[] =
-+static void resolve_fx_4_state_block_values(struct hlsl_ir_var *var, struct hlsl_state_block_entry *entry,
-+        struct fx_write_context *fx)
-+    static const struct fx_4_state fx_5_blend_states[] =
-     {
-         { "AlphaToCoverageEnable", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_BOOL,  1, 1, 36, bool_values },
-         { "BlendEnable",           HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_BOOL,  1, 8, 37, bool_values },
-@@ -1883,36 +2142,28 @@ static void resolve_fx_4_state_block_values(struct hlsl_ir_var *var, struct hlsl
-     struct state_table
-     {
--        const struct state *ptr;
-+        const struct fx_4_state *ptr;
-         unsigned int count;
-     } table;
-     const struct hlsl_type *type = hlsl_get_multiarray_element_type(var->data_type);
-     struct replace_state_context replace_context;
-+    const struct fx_4_state *state = NULL;
-     struct hlsl_type *state_type = NULL;
-     struct hlsl_ir_node *node, *cast;
--    const struct state *state = NULL;
-     struct hlsl_ctx *ctx = fx->ctx;
-     enum hlsl_base_type base_type;
-     unsigned int i;
--    if (type->class == HLSL_CLASS_BLEND_STATE)
-+    if (type->class == HLSL_CLASS_BLEND_STATE && ctx->profile->major_version == 5)
-     {
--        if (ctx->profile->major_version == 4)
--        {
--            table.ptr = fx_4_blend_states;
--            table.count = ARRAY_SIZE(fx_4_blend_states);
--        }
--        else
--        {
--            table.ptr = fx_5_blend_states;
--            table.count = ARRAY_SIZE(fx_5_blend_states);
--        }
-+        table.ptr = fx_5_blend_states;
-+        table.count = ARRAY_SIZE(fx_5_blend_states);
-     }
-     else
-     {
--        table.ptr = states;
--        table.count = ARRAY_SIZE(states);
-+        table.ptr = fx_4_states;
-+        table.count = ARRAY_SIZE(fx_4_states);
-     }
-     for (i = 0; i < table.count; ++i)
-@@ -2118,7 +2369,7 @@ static unsigned int decompose_fx_4_state_function_call(struct hlsl_ir_var *var,
-         const struct function_component *comp = &components[i];
-         unsigned int arg_index = (i + 1) % entry->args_count;
-         block->entries[entry_index + i] = clone_stateblock_entry(ctx, entry, comp->name,
--                comp->lhs_has_index, comp->lhs_index, arg_index);
-+                comp->lhs_has_index, comp->lhs_index, true, arg_index);
-     }
-     hlsl_free_state_block_entry(entry);
-@@ -2126,7 +2377,7 @@ static unsigned int decompose_fx_4_state_function_call(struct hlsl_ir_var *var,
- }
- /* For some states assignment sets all of the elements. This behaviour is limited to certain states of BlendState
--   object, and only when fx_5_0 profile is used. */
-+   object, and only when fx_4_1 or fx_5_0 profile is used. */
- static unsigned int decompose_fx_4_state_block_expand_array(struct hlsl_ir_var *var, struct hlsl_state_block *block,
-         unsigned int entry_index, struct fx_write_context *fx)
- {
-@@ -2140,7 +2391,7 @@ static unsigned int decompose_fx_4_state_block_expand_array(struct hlsl_ir_var *
-     if (type->class != HLSL_CLASS_BLEND_STATE)
-         return 1;
--    if (ctx->profile->major_version != 5)
-+    if (hlsl_version_lt(ctx, 4, 1))
-         return 1;
-     if (entry->lhs_has_index)
-         return 1;
-@@ -2164,7 +2415,7 @@ static unsigned int decompose_fx_4_state_block_expand_array(struct hlsl_ir_var *
-     for (i = 1; i < array_size; ++i)
-     {
-         block->entries[entry_index + i] = clone_stateblock_entry(ctx, entry,
--                entry->name, true, i, 0);
-+                entry->name, true, i, true, 0);
-     }
-     return array_size;
-@@ -2401,6 +2652,9 @@ static void write_fx_4_buffer(struct hlsl_buffer *b, struct fx_write_context *fx
-     size = 0;
-     LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry)
-     {
-+        if (!is_numeric_fx_4_type(var->data_type))
-+            continue;
-         if (var->buffer != b)
-             continue;
-@@ -2629,3 +2883,949 @@ int hlsl_emit_effect_binary(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out)
-         vkd3d_unreachable();
-     }
- }
-+struct fx_parser
-+    const uint8_t *ptr, *start, *end;
-+    struct vkd3d_shader_message_context *message_context;
-+    struct vkd3d_string_buffer buffer;
-+    unsigned int indent;
-+    unsigned int version;
-+    struct
-+    {
-+        const uint8_t *ptr;
-+        const uint8_t *end;
-+        uint32_t size;
-+    } unstructured;
-+    uint32_t buffer_count;
-+    uint32_t object_count;
-+    uint32_t group_count;
-+    bool failed;
-+static uint32_t fx_parser_read_u32(struct fx_parser *parser)
-+    uint32_t ret;
-+    if ((parser->end - parser->ptr) < sizeof(uint32_t))
-+    {
-+        parser->failed = true;
-+        return 0;
-+    }
-+    ret = *(uint32_t *)parser->ptr;
-+    parser->ptr += sizeof(uint32_t);
-+    return ret;
-+static void fx_parser_read_u32s(struct fx_parser *parser, void *dst, size_t size)
-+    uint32_t *ptr = dst;
-+    size_t i;
-+    for (i = 0; i < size / sizeof(uint32_t); ++i)
-+        ptr[i] = fx_parser_read_u32(parser);
-+static void fx_parser_skip(struct fx_parser *parser, size_t size)
-+    if ((parser->end - parser->ptr) < size)
-+    {
-+        parser->ptr = parser->end;
-+        parser->failed = true;
-+        return;
-+    }
-+    parser->ptr += size;
-+static void VKD3D_PRINTF_FUNC(3, 4) fx_parser_error(struct fx_parser *parser, enum vkd3d_shader_error error,
-+        const char *format, ...)
-+    va_list args;
-+    va_start(args, format);
-+    vkd3d_shader_verror(parser->message_context, NULL, error, format, args);
-+    va_end(args);
-+    parser->failed = true;
-+static int fx_2_parse(struct fx_parser *parser)
-+    fx_parser_error(parser, VKD3D_SHADER_ERROR_FX_NOT_IMPLEMENTED, "Parsing fx_2_0 binaries is not implemented.\n");
-+    return -1;
-+static const void *fx_parser_get_unstructured_ptr(struct fx_parser *parser, uint32_t offset, size_t size)
-+    const uint8_t *ptr = parser->unstructured.ptr;
-+    if (offset >= parser->unstructured.size
-+            || size > parser->unstructured.size - offset)
-+    {
-+        parser->failed = true;
-+        return NULL;
-+    }
-+    return &ptr[offset];
-+static void fx_parser_read_unstructured(struct fx_parser *parser, void *dst, uint32_t offset, size_t size)
-+    const uint8_t *ptr;
-+    memset(dst, 0, size);
-+    if (!(ptr = fx_parser_get_unstructured_ptr(parser, offset, size)))
-+        return;
-+    memcpy(dst, ptr, size);
-+static const char *fx_4_get_string(struct fx_parser *parser, uint32_t offset)
-+    const uint8_t *ptr = parser->unstructured.ptr;
-+    const uint8_t *end = parser->unstructured.end;
-+    if (offset >= parser->unstructured.size)
-+    {
-+        parser->failed = true;
-+        return "<invalid>";
-+    }
-+    ptr += offset;
-+    while (ptr < end && *ptr)
-+        ++ptr;
-+    if (*ptr)
-+    {
-+        parser->failed = true;
-+        return "<invalid>";
-+    }
-+    return (const char *)(parser->unstructured.ptr + offset);
-+static void parse_fx_start_indent(struct fx_parser *parser)
-+    ++parser->indent;
-+static void parse_fx_end_indent(struct fx_parser *parser)
-+    --parser->indent;
-+static void parse_fx_print_indent(struct fx_parser *parser)
-+    vkd3d_string_buffer_printf(&parser->buffer, "%*s", 4 * parser->indent, "");
-+static void parse_fx_4_numeric_value(struct fx_parser *parser, uint32_t offset,
-+        const struct fx_4_binary_type *type)
-+    unsigned int base_type, comp_count;
-+    size_t i;
-+    base_type = (type->typeinfo >> FX_4_NUMERIC_BASE_TYPE_SHIFT) & 0xf;
-+    comp_count = type->packed_size / sizeof(uint32_t);
-+    for (i = 0; i < comp_count; ++i)
-+    {
-+        union hlsl_constant_value_component value;
-+        fx_parser_read_unstructured(parser, &value, offset, sizeof(uint32_t));
-+        if (base_type == FX_4_NUMERIC_TYPE_FLOAT)
-+            vkd3d_string_buffer_printf(&parser->buffer, "%f", value.f);
-+        else if (base_type == FX_4_NUMERIC_TYPE_INT)
-+            vkd3d_string_buffer_printf(&parser->buffer, "%d", value.i);
-+        else if (base_type == FX_4_NUMERIC_TYPE_UINT)
-+            vkd3d_string_buffer_printf(&parser->buffer, "%u", value.u);
-+        else if (base_type == FX_4_NUMERIC_TYPE_BOOL)
-+            vkd3d_string_buffer_printf(&parser->buffer, "%s", value.u ? "true" : "false" );
-+        else
-+            vkd3d_string_buffer_printf(&parser->buffer, "%#x", value.u);
-+        if (i < comp_count - 1)
-+            vkd3d_string_buffer_printf(&parser->buffer, ", ");
-+        offset += sizeof(uint32_t);
-+    }
-+static void fx_4_parse_string_initializer(struct fx_parser *parser, uint32_t offset)
-+    const char *str = fx_4_get_string(parser, offset);
-+    vkd3d_string_buffer_printf(&parser->buffer, "\"%s\"", str);
-+static void fx_parse_fx_4_annotations(struct fx_parser *parser)
-+    struct fx_4_annotation
-+    {
-+        uint32_t name;
-+        uint32_t type;
-+    } var;
-+    struct fx_4_binary_type type;
-+    const char *name, *type_name;
-+    uint32_t count, i, value;
-+    if (parser->failed)
-+        return;
-+    count = fx_parser_read_u32(parser);
-+    if (!count)
-+        return;
-+    vkd3d_string_buffer_printf(&parser->buffer, "\n");
-+    parse_fx_print_indent(parser);
-+    vkd3d_string_buffer_printf(&parser->buffer, "<\n");
-+    parse_fx_start_indent(parser);
-+    for (i = 0; i < count; ++i)
-+    {
-+        fx_parser_read_u32s(parser, &var, sizeof(var));
-+        fx_parser_read_unstructured(parser, &type, var.type, sizeof(type));
-+        name = fx_4_get_string(parser, var.name);
-+        type_name = fx_4_get_string(parser, type.name);
-+        parse_fx_print_indent(parser);
-+        vkd3d_string_buffer_printf(&parser->buffer, "%s %s", type_name, name);
-+        if (type.element_count)
-+            vkd3d_string_buffer_printf(&parser->buffer, "[%u]", type.element_count);
-+        vkd3d_string_buffer_printf(&parser->buffer, " = ");
-+        if (type.element_count)
-+            vkd3d_string_buffer_printf(&parser->buffer, "{ ");
-+        if (type.class == FX_4_TYPE_CLASS_NUMERIC)
-+        {
-+            value = fx_parser_read_u32(parser);
-+            parse_fx_4_numeric_value(parser, value, &type);
-+        }
-+        else if (type.class == FX_4_TYPE_CLASS_OBJECT && type.typeinfo == FX_4_OBJECT_TYPE_STRING)
-+        {
-+            uint32_t element_count = max(type.element_count, 1);
-+            for (uint32_t j = 0; j < element_count; ++j)
-+            {
-+                value = fx_parser_read_u32(parser);
-+                fx_4_parse_string_initializer(parser, value);
-+                if (j < element_count - 1)
-+                    vkd3d_string_buffer_printf(&parser->buffer, ", ");
-+            }
-+        }
-+        else
-+        {
-+            fx_parser_error(parser, VKD3D_SHADER_ERROR_FX_INVALID_DATA,
-+                    "Only numeric and string types are supported in annotations.\n");
-+        }
-+        if (type.element_count)
-+            vkd3d_string_buffer_printf(&parser->buffer, " }");
-+        vkd3d_string_buffer_printf(&parser->buffer, ";\n");
-+    }
-+    parse_fx_end_indent(parser);
-+    parse_fx_print_indent(parser);
-+    vkd3d_string_buffer_printf(&parser->buffer, ">");
-+static void fx_parse_fx_4_numeric_variables(struct fx_parser *parser, uint32_t count)
-+    struct fx_4_numeric_variable
-+    {
-+        uint32_t name;
-+        uint32_t type;
-+        uint32_t semantic;
-+        uint32_t offset;
-+        uint32_t value;
-+        uint32_t flags;
-+    } var;
-+    const char *name, *semantic, *type_name;
-+    struct fx_4_binary_type type;
-+    uint32_t i;
-+    for (i = 0; i < count; ++i)
-+    {
-+        fx_parser_read_u32s(parser, &var, sizeof(var));
-+        fx_parser_read_unstructured(parser, &type, var.type, sizeof(type));
-+        name = fx_4_get_string(parser, var.name);
-+        type_name = fx_4_get_string(parser, type.name);
-+        vkd3d_string_buffer_printf(&parser->buffer, "    %s %s", type_name, name);
-+        if (type.element_count)
-+            vkd3d_string_buffer_printf(&parser->buffer, "[%u]", type.element_count);
-+        if (var.semantic)
-+        {
-+            semantic = fx_4_get_string(parser, var.semantic);
-+            vkd3d_string_buffer_printf(&parser->buffer, " : %s", semantic);
-+        }
-+        fx_parse_fx_4_annotations(parser);
-+        if (var.value)
-+        {
-+            vkd3d_string_buffer_printf(&parser->buffer, " = { ");
-+            parse_fx_4_numeric_value(parser, var.value, &type);
-+            vkd3d_string_buffer_printf(&parser->buffer, " }");
-+        }
-+        vkd3d_string_buffer_printf(&parser->buffer, ";    // Offset: %u, size %u.\n", var.offset, type.unpacked_size);
-+    }
-+static void fx_parse_buffers(struct fx_parser *parser)
-+    struct fx_buffer
-+    {
-+        uint32_t name;
-+        uint32_t size;
-+        uint32_t flags;
-+        uint32_t count;
-+        uint32_t bind_point;
-+    } buffer;
-+    const char *name;
-+    uint32_t i;
-+    if (parser->failed)
-+        return;
-+    for (i = 0; i < parser->buffer_count; ++i)
-+    {
-+        fx_parser_read_u32s(parser, &buffer, sizeof(buffer));
-+        name = fx_4_get_string(parser, buffer.name);
-+        vkd3d_string_buffer_printf(&parser->buffer, "cbuffer %s", name);
-+        fx_parse_fx_4_annotations(parser);
-+        vkd3d_string_buffer_printf(&parser->buffer, "\n{\n");
-+        parse_fx_start_indent(parser);
-+        fx_parse_fx_4_numeric_variables(parser, buffer.count);
-+        parse_fx_end_indent(parser);
-+        vkd3d_string_buffer_printf(&parser->buffer, "}\n\n");
-+    }
-+static void fx_4_parse_shader_initializer(struct fx_parser *parser, unsigned int object_type)
-+    struct vkd3d_shader_compile_info info = { 0 };
-+    struct vkd3d_shader_code output;
-+    uint32_t data_size, offset;
-+    const void *data = NULL;
-+    const char *p, *q, *end;
-+    struct fx_5_shader
-+    {
-+        uint32_t offset;
-+        uint32_t sodecl[4];
-+        uint32_t sodecl_count;
-+        uint32_t rast_stream;
-+        uint32_t iface_bindings_count;
-+        uint32_t iface_bindings;
-+    } shader5;
-+    struct fx_4_gs_so
-+    {
-+        uint32_t offset;
-+        uint32_t sodecl;
-+    } gs_so;
-+    int ret;
-+    static const struct vkd3d_shader_compile_option options[] =
-+    {
-+    };
-+    switch (object_type)
-+    {
-+            offset = fx_parser_read_u32(parser);
-+            break;
-+            fx_parser_read_u32s(parser, &gs_so, sizeof(gs_so));
-+            offset = gs_so.offset;
-+            break;
-+        case FX_5_OBJECT_TYPE_HULL_SHADER:
-+            fx_parser_read_u32s(parser, &shader5, sizeof(shader5));
-+            offset = shader5.offset;
-+            break;
-+        default:
-+            parser->failed = true;
-+            return;
-+    }
-+    fx_parser_read_unstructured(parser, &data_size, offset, sizeof(data_size));
-+    if (data_size)
-+        data = fx_parser_get_unstructured_ptr(parser, offset + 4, data_size);
-+    if (!data)
-+        return;
-+    info.source.code = data;
-+    info.source.size = data_size;
-+    info.source_type = VKD3D_SHADER_SOURCE_DXBC_TPF;
-+    info.target_type = VKD3D_SHADER_TARGET_D3D_ASM;
-+    info.options = options;
-+    info.option_count = ARRAY_SIZE(options);
-+    info.log_level = VKD3D_SHADER_LOG_INFO;
-+    if ((ret = vkd3d_shader_compile(&info, &output, NULL)) < 0)
-+    {
-+        fx_parser_error(parser, VKD3D_SHADER_ERROR_FX_INVALID_DATA,
-+                "Failed to disassemble shader blob.\n");
-+        return;
-+    }
-+    parse_fx_print_indent(parser);
-+    vkd3d_string_buffer_printf(&parser->buffer, "asm {\n");
-+    parse_fx_start_indent(parser);
-+    end = (const char *)output.code + output.size;
-+    for (p = output.code; p < end; p = q)
-+    {
-+        if (!(q = memchr(p, '\n', end - p)))
-+            q = end;
-+        else
-+            ++q;
-+        parse_fx_print_indent(parser);
-+        vkd3d_string_buffer_printf(&parser->buffer, "%.*s", (int)(q - p), p);
-+    }
-+    parse_fx_end_indent(parser);
-+    parse_fx_print_indent(parser);
-+    vkd3d_string_buffer_printf(&parser->buffer, "}");
-+    if (object_type == FX_4_OBJECT_TYPE_GEOMETRY_SHADER_SO && gs_so.sodecl)
-+    {
-+        vkd3d_string_buffer_printf(&parser->buffer, "\n/* Stream output declaration: \"%s\" */",
-+                fx_4_get_string(parser, gs_so.sodecl));
-+    }
-+    else if (object_type == FX_5_OBJECT_TYPE_GEOMETRY_SHADER)
-+    {
-+        for (unsigned int i = 0; i < ARRAY_SIZE(shader5.sodecl); ++i)
-+        {
-+           if (shader5.sodecl[i])
-+               vkd3d_string_buffer_printf(&parser->buffer, "\n/* Stream output %u declaration: \"%s\" */",
-+                       i, fx_4_get_string(parser, shader5.sodecl[i]));
-+        }
-+        if (shader5.sodecl_count)
-+            vkd3d_string_buffer_printf(&parser->buffer, "\n/* Rasterized stream %u */", shader5.rast_stream);
-+    }
-+    vkd3d_shader_free_shader_code(&output);
-+static bool fx_4_object_has_initializer(const struct fx_4_binary_type *type)
-+    switch (type->typeinfo)
-+    {
-+        case FX_4_OBJECT_TYPE_STRING:
-+        case FX_4_OBJECT_TYPE_BLEND_STATE:
-+        case FX_5_OBJECT_TYPE_HULL_SHADER:
-+            return true;
-+        default:
-+            return false;
-+    }
-+static int fx_4_state_id_compare(const void *a, const void *b)
-+    const struct fx_4_state *state = b;
-+    int id = *(int *)a;
-+    return id - state->id;
-+static void fx_4_parse_state_object_initializer(struct fx_parser *parser, uint32_t count,
-+        enum hlsl_type_class type_class)
-+    struct fx_4_assignment
-+    {
-+        uint32_t id;
-+        uint32_t lhs_index;
-+        uint32_t type;
-+        uint32_t value;
-+    } entry;
-+    struct
-+    {
-+        uint32_t name;
-+        uint32_t index;
-+    } index;
-+    struct
-+    {
-+        uint32_t type;
-+        union
-+        {
-+            uint32_t u;
-+            float f;
-+        };
-+    } value;
-+    static const char *value_types[FX_COMPONENT_TYPE_COUNT] =
-+    {
-+        [FX_BOOL]  = "bool",
-+        [FX_FLOAT] = "float",
-+        [FX_UINT]  = "uint",
-+        [FX_UINT8] = "byte",
-+    };
-+    const struct rhs_named_value *named_value;
-+    uint32_t i, j, comp_count;
-+    struct fx_4_state *state;
-+    for (i = 0; i < count; ++i)
-+    {
-+        fx_parser_read_u32s(parser, &entry, sizeof(entry));
-+        if (!(state = bsearch(&entry.id, fx_4_states, ARRAY_SIZE(fx_4_states),
-+                sizeof(*fx_4_states), fx_4_state_id_compare)))
-+        {
-+            fx_parser_error(parser, VKD3D_SHADER_ERROR_FX_INVALID_DATA, "Unrecognized state id %#x.\n", entry.id);
-+            break;
-+        }
-+        if (state->container != type_class)
-+        {
-+            fx_parser_error(parser, VKD3D_SHADER_ERROR_FX_INVALID_DATA,
-+                    "State '%s' does not belong to object type class %#x.", state->name, type_class);
-+            break;
-+        }
-+        parse_fx_print_indent(parser);
-+        vkd3d_string_buffer_printf(&parser->buffer, "%s", state->name);
-+        if (state->array_size > 1)
-+            vkd3d_string_buffer_printf(&parser->buffer, "[%u]", entry.lhs_index);
-+        vkd3d_string_buffer_printf(&parser->buffer, " = ");
-+        switch (entry.type)
-+        {
-+            case FX_4_ASSIGNMENT_CONSTANT:
-+                if (value_types[state->type])
-+                    vkd3d_string_buffer_printf(&parser->buffer, "%s", value_types[state->type]);
-+                if (state->dimx > 1)
-+                    vkd3d_string_buffer_printf(&parser->buffer, "%u", state->dimx);
-+                vkd3d_string_buffer_printf(&parser->buffer, "(");
-+                fx_parser_read_unstructured(parser, &comp_count, entry.value, sizeof(uint32_t));
-+                named_value = NULL;
-+                if (comp_count == 1 && state->values && (state->type == FX_UINT || state->type == FX_BOOL))
-+                {
-+                    const struct rhs_named_value *ptr = state->values;
-+                    fx_parser_read_unstructured(parser, &value, entry.value + 4, sizeof(value));
-+                    while (ptr->name)
-+                    {
-+                        if (value.u == ptr->value)
-+                        {
-+                            named_value = ptr;
-+                            break;
-+                        }
-+                        ++ptr;
-+                    }
-+                }
-+                if (named_value)
-+                {
-+                    vkd3d_string_buffer_printf(&parser->buffer, "%s /* %u */", named_value->name, named_value->value);
-+                }
-+                else
-+                {
-+                    uint32_t offset = entry.value + 4;
-+                    for (j = 0; j < comp_count; ++j, offset += sizeof(value))
-+                    {
-+                        fx_parser_read_unstructured(parser, &value, offset, sizeof(value));
-+                        if (state->type == FX_UINT8)
-+                            vkd3d_string_buffer_printf(&parser->buffer, "0x%.2x", value.u);
-+                        else if (state->type == FX_UINT)
-+                            vkd3d_string_buffer_printf(&parser->buffer, "%u", value.u);
-+                        else if (state->type == FX_FLOAT)
-+                            vkd3d_string_buffer_printf(&parser->buffer, "%g", value.f);
-+                        if (comp_count > 1 && j < comp_count - 1)
-+                            vkd3d_string_buffer_printf(&parser->buffer, ", ");
-+                    }
-+                }
-+                vkd3d_string_buffer_printf(&parser->buffer, ")");
-+                break;
-+            case FX_4_ASSIGNMENT_VARIABLE:
-+                vkd3d_string_buffer_printf(&parser->buffer, "%s", fx_4_get_string(parser, entry.value));
-+                break;
-+                fx_parser_read_unstructured(parser, &index, entry.value, sizeof(index));
-+                vkd3d_string_buffer_printf(&parser->buffer, "%s[%u]", fx_4_get_string(parser, index.name), index.index);
-+                break;
-+                fx_parser_read_unstructured(parser, &index, entry.value, sizeof(index));
-+                vkd3d_string_buffer_printf(&parser->buffer, "%s[%s]", fx_4_get_string(parser, index.name),
-+                        fx_4_get_string(parser, index.index));
-+                break;
-+            default:
-+                fx_parser_error(parser, VKD3D_SHADER_ERROR_FX_NOT_IMPLEMENTED,
-+                        "Unsupported assignment type %u.\n", entry.type);
-+        }
-+        vkd3d_string_buffer_printf(&parser->buffer, ";\n");
-+    }
-+static void fx_4_parse_object_initializer(struct fx_parser *parser, const struct fx_4_binary_type *type)
-+    static const enum hlsl_type_class type_classes[] =
-+    {
-+    };
-+    unsigned int i, element_count, count;
-+    uint32_t value;
-+    if (!fx_4_object_has_initializer(type))
-+        return;
-+    vkd3d_string_buffer_printf(&parser->buffer, " = {\n");
-+    element_count = max(type->element_count, 1);
-+    for (i = 0; i < element_count; ++i)
-+    {
-+        switch (type->typeinfo)
-+        {
-+            case FX_4_OBJECT_TYPE_STRING:
-+                vkd3d_string_buffer_printf(&parser->buffer, "    ");
-+                value = fx_parser_read_u32(parser);
-+                fx_4_parse_string_initializer(parser, value);
-+                break;
-+            case FX_4_OBJECT_TYPE_BLEND_STATE:
-+            case FX_4_OBJECT_TYPE_SAMPLER_STATE:
-+                count = fx_parser_read_u32(parser);
-+                parse_fx_start_indent(parser);
-+                fx_4_parse_state_object_initializer(parser, count, type_classes[type->typeinfo]);
-+                parse_fx_end_indent(parser);
-+                break;
-+            case FX_4_OBJECT_TYPE_PIXEL_SHADER:
-+            case FX_4_OBJECT_TYPE_VERTEX_SHADER:
-+            case FX_4_OBJECT_TYPE_GEOMETRY_SHADER:
-+            case FX_5_OBJECT_TYPE_GEOMETRY_SHADER:
-+            case FX_5_OBJECT_TYPE_COMPUTE_SHADER:
-+            case FX_5_OBJECT_TYPE_HULL_SHADER:
-+            case FX_5_OBJECT_TYPE_DOMAIN_SHADER:
-+                parse_fx_start_indent(parser);
-+                fx_4_parse_shader_initializer(parser, type->typeinfo);
-+                parse_fx_end_indent(parser);
-+                break;
-+            default:
-+                fx_parser_error(parser, VKD3D_SHADER_ERROR_FX_NOT_IMPLEMENTED,
-+                        "Parsing object type %u is not implemented.", type->typeinfo);
-+                return;
-+        }
-+        vkd3d_string_buffer_printf(&parser->buffer, ",\n");
-+    }
-+    vkd3d_string_buffer_printf(&parser->buffer, "}");
-+static void fx_4_parse_objects(struct fx_parser *parser)
-+    struct fx_4_object_variable
-+    {
-+        uint32_t name;
-+        uint32_t type;
-+        uint32_t semantic;
-+        uint32_t bind_point;
-+    } var;
-+    struct fx_4_binary_type type;
-+    const char *name, *type_name;
-+    uint32_t i;
-+    if (parser->failed)
-+        return;
-+    for (i = 0; i < parser->object_count; ++i)
-+    {
-+        if (parser->failed)
-+            return;
-+        fx_parser_read_u32s(parser, &var, sizeof(var));
-+        fx_parser_read_unstructured(parser, &type, var.type, sizeof(type));
-+        name = fx_4_get_string(parser, var.name);
-+        type_name = fx_4_get_string(parser, type.name);
-+        vkd3d_string_buffer_printf(&parser->buffer, "%s %s", type_name, name);
-+        if (type.element_count)
-+            vkd3d_string_buffer_printf(&parser->buffer, "[%u]", type.element_count);
-+        fx_4_parse_object_initializer(parser, &type);
-+        vkd3d_string_buffer_printf(&parser->buffer, ";\n");
-+        fx_parse_fx_4_annotations(parser);
-+    }
-+static void fx_parse_fx_4_technique(struct fx_parser *parser)
-+    struct fx_technique
-+    {
-+        uint32_t name;
-+        uint32_t count;
-+    } technique;
-+    struct fx_pass
-+    {
-+        uint32_t name;
-+        uint32_t count;
-+    } pass;
-+    const char *name;
-+    uint32_t i;
-+    if (parser->failed)
-+        return;
-+    fx_parser_read_u32s(parser, &technique, sizeof(technique));
-+    name = fx_4_get_string(parser, technique.name);
-+    parse_fx_print_indent(parser);
-+    vkd3d_string_buffer_printf(&parser->buffer, "technique%u %s", parser->version, name);
-+    fx_parse_fx_4_annotations(parser);
-+    vkd3d_string_buffer_printf(&parser->buffer, "\n");
-+    parse_fx_print_indent(parser);
-+    vkd3d_string_buffer_printf(&parser->buffer, "{\n");
-+    parse_fx_start_indent(parser);
-+    for (i = 0; i < technique.count; ++i)
-+    {
-+        fx_parser_read_u32s(parser, &pass, sizeof(pass));
-+        name = fx_4_get_string(parser, pass.name);
-+        parse_fx_print_indent(parser);
-+        vkd3d_string_buffer_printf(&parser->buffer, "pass %s", name);
-+        fx_parse_fx_4_annotations(parser);
-+        vkd3d_string_buffer_printf(&parser->buffer, "\n");
-+        parse_fx_print_indent(parser);
-+        vkd3d_string_buffer_printf(&parser->buffer, "{\n");
-+        parse_fx_start_indent(parser);
-+        fx_4_parse_state_object_initializer(parser, pass.count, HLSL_CLASS_PASS);
-+        parse_fx_end_indent(parser);
-+        parse_fx_print_indent(parser);
-+        vkd3d_string_buffer_printf(&parser->buffer, "}\n\n");
-+    }
-+    parse_fx_end_indent(parser);
-+    parse_fx_print_indent(parser);
-+    vkd3d_string_buffer_printf(&parser->buffer, "}\n\n");
-+static void fx_parse_groups(struct fx_parser *parser)
-+    struct fx_group
-+    {
-+        uint32_t name;
-+        uint32_t count;
-+    } group;
-+    const char *name;
-+    uint32_t i, j;
-+    if (parser->failed)
-+        return;
-+    for (i = 0; i < parser->group_count; ++i)
-+    {
-+        fx_parser_read_u32s(parser, &group, sizeof(group));
-+        name = fx_4_get_string(parser, group.name);
-+        vkd3d_string_buffer_printf(&parser->buffer, "fxgroup %s", name);
-+        fx_parse_fx_4_annotations(parser);
-+        vkd3d_string_buffer_printf(&parser->buffer, "\n{\n");
-+        parse_fx_start_indent(parser);
-+        for (j = 0; j < group.count; ++j)
-+            fx_parse_fx_4_technique(parser);
-+        parse_fx_end_indent(parser);
-+        vkd3d_string_buffer_printf(&parser->buffer, "}\n\n");
-+    }
-+static int fx_4_parse(struct fx_parser *parser)
-+    struct fx_4_header
-+    {
-+        uint32_t version;
-+        uint32_t buffer_count;
-+        uint32_t numeric_variable_count;
-+        uint32_t object_count;
-+        uint32_t shared_buffer_count;
-+        uint32_t shared_numeric_variable_count;
-+        uint32_t shared_object_count;
-+        uint32_t technique_count;
-+        uint32_t unstructured_size;
-+        uint32_t string_count;
-+        uint32_t texture_count;
-+        uint32_t depth_stencil_state_count;
-+        uint32_t blend_state_count;
-+        uint32_t rasterizer_state_count;
-+        uint32_t sampler_state_count;
-+        uint32_t rtv_count;
-+        uint32_t dsv_count;
-+        uint32_t shader_count;
-+        uint32_t inline_shader_count;
-+    } header;
-+    uint32_t i;
-+    parser->version = 10;
-+    fx_parser_read_u32s(parser, &header, sizeof(header));
-+    parser->buffer_count = header.buffer_count;
-+    parser->object_count = header.object_count;
-+    if (parser->end - parser->ptr < header.unstructured_size)
-+    {
-+        parser->failed = true;
-+        return -1;
-+    }
-+    parser->unstructured.ptr = parser->ptr;
-+    parser->unstructured.end = parser->ptr + header.unstructured_size;
-+    parser->unstructured.size = header.unstructured_size;
-+    fx_parser_skip(parser, header.unstructured_size);
-+    fx_parse_buffers(parser);
-+    fx_4_parse_objects(parser);
-+    for (i = 0; i < header.technique_count; ++i)
-+        fx_parse_fx_4_technique(parser);
-+    return parser->failed ? - 1 : 0;
-+static int fx_5_parse(struct fx_parser *parser)
-+    struct fx_5_header
-+    {
-+        uint32_t version;
-+        uint32_t buffer_count;
-+        uint32_t numeric_variable_count;
-+        uint32_t object_count;
-+        uint32_t shared_buffer_count;
-+        uint32_t shared_numeric_variable_count;
-+        uint32_t shared_object_count;
-+        uint32_t technique_count;
-+        uint32_t unstructured_size;
-+        uint32_t string_count;
-+        uint32_t texture_count;
-+        uint32_t depth_stencil_state_count;
-+        uint32_t blend_state_count;
-+        uint32_t rasterizer_state_count;
-+        uint32_t sampler_state_count;
-+        uint32_t rtv_count;
-+        uint32_t dsv_count;
-+        uint32_t shader_count;
-+        uint32_t inline_shader_count;
-+        uint32_t group_count;
-+        uint32_t uav_count;
-+        uint32_t interface_variable_count;
-+        uint32_t interface_variable_element_count;
-+        uint32_t class_instance_element_count;
-+    } header;
-+    parser->version = 11;
-+    fx_parser_read_u32s(parser, &header, sizeof(header));
-+    parser->buffer_count = header.buffer_count;
-+    parser->object_count = header.object_count;
-+    parser->group_count = header.group_count;
-+    if (parser->end - parser->ptr < header.unstructured_size)
-+    {
-+        parser->failed = true;
-+        return -1;
-+    }
-+    parser->unstructured.ptr = parser->ptr;
-+    parser->unstructured.end = parser->ptr + header.unstructured_size;
-+    parser->unstructured.size = header.unstructured_size;
-+    fx_parser_skip(parser, header.unstructured_size);
-+    fx_parse_buffers(parser);
-+    fx_4_parse_objects(parser);
-+    fx_parse_groups(parser);
-+    return parser->failed ? - 1 : 0;
-+int fx_parse(const struct vkd3d_shader_compile_info *compile_info,
-+        struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context)
-+    struct fx_parser parser =
-+    {
-+        .start = compile_info->source.code,
-+        .ptr = compile_info->source.code,
-+        .end = (uint8_t *)compile_info->source.code + compile_info->source.size,
-+        .message_context = message_context,
-+    };
-+    uint32_t version;
-+    int ret;
-+    vkd3d_string_buffer_init(&parser.buffer);
-+    if (parser.end - parser.start < sizeof(version))
-+        return -1;
-+    version = *(uint32_t *)parser.ptr;
-+    switch (version)
-+    {
-+        case 0xfeff0901:
-+            ret = fx_2_parse(&parser);
-+            break;
-+        case 0xfeff1001:
-+        case 0xfeff1011:
-+            ret = fx_4_parse(&parser);
-+            break;
-+        case 0xfeff2001:
-+            ret = fx_5_parse(&parser);
-+            break;
-+        default:
-+            fx_parser_error(&parser, VKD3D_SHADER_ERROR_FX_INVALID_VERSION,
-+                    "Invalid effect binary version value 0x%08x.", version);
-+            ret = -1;
-+    }
-+    vkd3d_shader_code_from_string_buffer(out, &parser.buffer);
-+    return ret;
-diff --git a/libs/vkd3d/libs/vkd3d-shader/glsl.c b/libs/vkd3d/libs/vkd3d-shader/glsl.c
-index d1f02ab568b..0df0e30f399 100644
---- a/libs/vkd3d/libs/vkd3d-shader/glsl.c
-+++ b/libs/vkd3d/libs/vkd3d-shader/glsl.c
-@@ -18,6 +18,19 @@
- #include "vkd3d_shader_private.h"
-+struct glsl_resource_type_info
-+    /* The number of coordinates needed to sample the resource type. */
-+    size_t coord_size;
-+    /* Whether the resource type is an array type. */
-+    bool array;
-+    /* Whether the resource type has a shadow/comparison variant. */
-+    bool shadow;
-+    /* The type suffix for resource type. I.e., the "2D" part of "usampler2D"
-+     * or "iimage2D". */
-+    const char *type_suffix;
- struct glsl_src
- {
-     struct vkd3d_string_buffer *str;
-@@ -38,9 +51,26 @@ struct vkd3d_glsl_generator
-     struct vkd3d_shader_location location;
-     struct vkd3d_shader_message_context *message_context;
-     unsigned int indent;
-+    const char *prefix;
-     bool failed;
-+    struct shader_limits
-+    {
-+        unsigned int input_count;
-+        unsigned int output_count;
-+    } limits;
-+    bool interstage_input;
-+    bool interstage_output;
-+    const struct vkd3d_shader_interface_info *interface_info;
-+    const struct vkd3d_shader_descriptor_offset_info *offset_info;
-+    const struct vkd3d_shader_scan_descriptor_info1 *descriptor_info;
-+    const struct vkd3d_shader_scan_combined_resource_sampler_info *combined_sampler_info;
- };
-+static void shader_glsl_print_subscript(struct vkd3d_string_buffer *buffer, struct vkd3d_glsl_generator *gen,
-+        const struct vkd3d_shader_src_param *rel_addr, unsigned int offset);
- static void VKD3D_PRINTF_FUNC(3, 4) vkd3d_glsl_compiler_error(
-         struct vkd3d_glsl_generator *generator,
-         enum vkd3d_shader_error error, const char *fmt, ...)
-@@ -53,11 +83,110 @@ static void VKD3D_PRINTF_FUNC(3, 4) vkd3d_glsl_compiler_error(
-     generator->failed = true;
- }
-+static const char *shader_glsl_get_prefix(enum vkd3d_shader_type type)
-+    switch (type)
-+    {
-+        case VKD3D_SHADER_TYPE_VERTEX:
-+            return "vs";
-+        case VKD3D_SHADER_TYPE_HULL:
-+            return "hs";
-+        case VKD3D_SHADER_TYPE_DOMAIN:
-+            return "ds";
-+            return "gs";
-+        case VKD3D_SHADER_TYPE_PIXEL:
-+            return "ps";
-+            return "cs";
-+        default:
-+            return NULL;
-+    }
-+static const struct glsl_resource_type_info *shader_glsl_get_resource_type_info(enum vkd3d_shader_resource_type t)
-+    static const struct glsl_resource_type_info info[] =
-+    {
-+        {0, 0, 0, "None"},      /* VKD3D_SHADER_RESOURCE_NONE */
-+        {1, 0, 0, "Buffer"},    /* VKD3D_SHADER_RESOURCE_BUFFER */
-+        {1, 0, 1, "1D"},        /* VKD3D_SHADER_RESOURCE_TEXTURE_1D */
-+        {2, 0, 1, "2D"},        /* VKD3D_SHADER_RESOURCE_TEXTURE_2D */
-+        {2, 0, 0, "2DMS"},      /* VKD3D_SHADER_RESOURCE_TEXTURE_2DMS */
-+        {3, 0, 0, "3D"},        /* VKD3D_SHADER_RESOURCE_TEXTURE_3D */
-+        {3, 0, 1, "Cube"},      /* VKD3D_SHADER_RESOURCE_TEXTURE_CUBE */
-+        {2, 1, 1, "1DArray"},   /* VKD3D_SHADER_RESOURCE_TEXTURE_1DARRAY */
-+        {3, 1, 1, "2DArray"},   /* VKD3D_SHADER_RESOURCE_TEXTURE_2DARRAY */
-+        {3, 1, 0, "2DMSArray"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY */
-+        {4, 1, 1, "CubeArray"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_CUBEARRAY */
-+    };
-+    if (!t || t >= ARRAY_SIZE(info))
-+        return NULL;
-+    return &info[t];
-+static const struct vkd3d_shader_descriptor_info1 *shader_glsl_get_descriptor(struct vkd3d_glsl_generator *gen,
-+        enum vkd3d_shader_descriptor_type type, unsigned int idx, unsigned int space)
-+    const struct vkd3d_shader_scan_descriptor_info1 *info = gen->descriptor_info;
-+    for (unsigned int i = 0; i < info->descriptor_count; ++i)
-+    {
-+        const struct vkd3d_shader_descriptor_info1 *d = &info->descriptors[i];
-+        if (d->type == type && d->register_space == space && d->register_index == idx)
-+            return d;
-+    }
-+    return NULL;
-+static const struct vkd3d_shader_descriptor_info1 *shader_glsl_get_descriptor_by_id(
-+        struct vkd3d_glsl_generator *gen, enum vkd3d_shader_descriptor_type type, unsigned int id)
-+    const struct vkd3d_shader_scan_descriptor_info1 *info = gen->descriptor_info;
-+    for (unsigned int i = 0; i < info->descriptor_count; ++i)
-+    {
-+        const struct vkd3d_shader_descriptor_info1 *d = &info->descriptors[i];
-+        if (d->type == type && d->register_id == id)
-+            return d;
-+    }
-+    return NULL;
- static void shader_glsl_print_indent(struct vkd3d_string_buffer *buffer, unsigned int indent)
- {
-     vkd3d_string_buffer_printf(buffer, "%*s", 4 * indent, "");
- }
-+static void shader_glsl_print_combined_sampler_name(struct vkd3d_string_buffer *buffer,
-+        struct vkd3d_glsl_generator *gen, unsigned int resource_index,
-+        unsigned int resource_space, unsigned int sampler_index, unsigned int sampler_space)
-+    vkd3d_string_buffer_printf(buffer, "%s_t_%u", gen->prefix, resource_index);
-+    if (resource_space)
-+        vkd3d_string_buffer_printf(buffer, "_%u", resource_space);
-+    if (sampler_index != VKD3D_SHADER_DUMMY_SAMPLER_INDEX)
-+    {
-+        vkd3d_string_buffer_printf(buffer, "_s_%u", sampler_index);
-+        if (sampler_space)
-+            vkd3d_string_buffer_printf(buffer, "_%u", sampler_space);
-+    }
-+static void shader_glsl_print_image_name(struct vkd3d_string_buffer *buffer,
-+        struct vkd3d_glsl_generator *gen, unsigned int idx, unsigned int space)
-+    vkd3d_string_buffer_printf(buffer, "%s_image_%u", gen->prefix, idx);
-+    if (space)
-+        vkd3d_string_buffer_printf(buffer, "_%u", space);
- static void shader_glsl_print_register_name(struct vkd3d_string_buffer *buffer,
-         struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_register *reg)
- {
-@@ -67,6 +196,99 @@ static void shader_glsl_print_register_name(struct vkd3d_string_buffer *buffer,
-             vkd3d_string_buffer_printf(buffer, "r[%u]", reg->idx[0].offset);
-             break;
-+        case VKD3DSPR_INPUT:
-+            if (reg->idx_count != 1)
-+            {
-+                vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL,
-+                        "Internal compiler error: Unhandled input register index count %u.", reg->idx_count);
-+                vkd3d_string_buffer_printf(buffer, "<unhandled register %#x>", reg->type);
-+                break;
-+            }
-+            if (reg->idx[0].rel_addr)
-+            {
-+                vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL,
-+                        "Internal compiler error: Unhandled input register indirect addressing.");
-+                vkd3d_string_buffer_printf(buffer, "<unhandled register %#x>", reg->type);
-+                break;
-+            }
-+            vkd3d_string_buffer_printf(buffer, "%s_in[%u]", gen->prefix, reg->idx[0].offset);
-+            break;
-+        case VKD3DSPR_OUTPUT:
-+            if (reg->idx_count != 1)
-+            {
-+                vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL,
-+                        "Internal compiler error: Unhandled output register index count %u.", reg->idx_count);
-+                vkd3d_string_buffer_printf(buffer, "<unhandled register %#x>", reg->type);
-+                break;
-+            }
-+            if (reg->idx[0].rel_addr)
-+            {
-+                vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL,
-+                        "Internal compiler error: Unhandled output register indirect addressing.");
-+                vkd3d_string_buffer_printf(buffer, "<unhandled register %#x>", reg->type);
-+                break;
-+            }
-+            vkd3d_string_buffer_printf(buffer, "%s_out[%u]", gen->prefix, reg->idx[0].offset);
-+            break;
-+        case VKD3DSPR_DEPTHOUT:
-+            if (gen->program->shader_version.type != VKD3D_SHADER_TYPE_PIXEL)
-+                vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL,
-+                        "Internal compiler error: Unhandled depth output in shader type #%x.",
-+                        gen->program->shader_version.type);
-+            vkd3d_string_buffer_printf(buffer, "gl_FragDepth");
-+            break;
-+        case VKD3DSPR_IMMCONST:
-+            switch (reg->dimension)
-+            {
-+                case VSIR_DIMENSION_SCALAR:
-+                    vkd3d_string_buffer_printf(buffer, "%#xu", reg->u.immconst_u32[0]);
-+                    break;
-+                case VSIR_DIMENSION_VEC4:
-+                    vkd3d_string_buffer_printf(buffer, "uvec4(%#xu, %#xu, %#xu, %#xu)",
-+                            reg->u.immconst_u32[0], reg->u.immconst_u32[1],
-+                            reg->u.immconst_u32[2], reg->u.immconst_u32[3]);
-+                    break;
-+                default:
-+                    vkd3d_string_buffer_printf(buffer, "<unhandled_dimension %#x>", reg->dimension);
-+                    vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL,
-+                            "Internal compiler error: Unhandled dimension %#x.", reg->dimension);
-+                    break;
-+            }
-+            break;
-+        case VKD3DSPR_CONSTBUFFER:
-+            if (reg->idx_count != 3)
-+            {
-+                vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL,
-+                        "Internal compiler error: Unhandled constant buffer register index count %u.", reg->idx_count);
-+                vkd3d_string_buffer_printf(buffer, "<unhandled register %#x>", reg->type);
-+                break;
-+            }
-+            if (reg->idx[0].rel_addr || reg->idx[2].rel_addr)
-+            {
-+                vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL,
-+                        "Internal compiler error: Unhandled constant buffer register indirect addressing.");
-+                vkd3d_string_buffer_printf(buffer, "<unhandled register %#x>", reg->type);
-+                break;
-+            }
-+            vkd3d_string_buffer_printf(buffer, "%s_cb_%u[%u]",
-+                    gen->prefix, reg->idx[0].offset, reg->idx[2].offset);
-+            break;
-+        case VKD3DSPR_THREADID:
-+            vkd3d_string_buffer_printf(buffer, "gl_GlobalInvocationID");
-+            break;
-+        case VKD3DSPR_IDXTEMP:
-+            vkd3d_string_buffer_printf(buffer, "x%u", reg->idx[0].offset);
-+            shader_glsl_print_subscript(buffer, gen, reg->idx[1].rel_addr, reg->idx[1].offset);
-+            break;
-         default:
-             vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL,
-                     "Internal compiler error: Unhandled register type %#x.", reg->type);
-@@ -106,23 +328,118 @@ static void glsl_src_cleanup(struct glsl_src *src, struct vkd3d_string_buffer_ca
-     vkd3d_string_buffer_release(cache, src->str);
- }
--static void glsl_src_init(struct glsl_src *glsl_src, struct vkd3d_glsl_generator *gen,
--        const struct vkd3d_shader_src_param *vsir_src, uint32_t mask)
-+static void shader_glsl_print_bitcast(struct vkd3d_string_buffer *dst, struct vkd3d_glsl_generator *gen,
-+        const char *src, enum vkd3d_data_type dst_data_type, enum vkd3d_data_type src_data_type, unsigned int size)
-+    if (dst_data_type == VKD3D_DATA_UNORM || dst_data_type == VKD3D_DATA_SNORM)
-+        dst_data_type = VKD3D_DATA_FLOAT;
-+    if (src_data_type == VKD3D_DATA_UNORM || src_data_type == VKD3D_DATA_SNORM)
-+        src_data_type = VKD3D_DATA_FLOAT;
-+    if (dst_data_type == src_data_type)
-+    {
-+        vkd3d_string_buffer_printf(dst, "%s", src);
-+        return;
-+    }
-+    if (src_data_type == VKD3D_DATA_FLOAT)
-+    {
-+        switch (dst_data_type)
-+        {
-+            case VKD3D_DATA_INT:
-+                vkd3d_string_buffer_printf(dst, "floatBitsToInt(%s)", src);
-+                return;
-+            case VKD3D_DATA_UINT:
-+                vkd3d_string_buffer_printf(dst, "floatBitsToUint(%s)", src);
-+                return;
-+            default:
-+                break;
-+        }
-+    }
-+    if (src_data_type == VKD3D_DATA_UINT)
-+    {
-+        switch (dst_data_type)
-+        {
-+            case VKD3D_DATA_FLOAT:
-+                vkd3d_string_buffer_printf(dst, "uintBitsToFloat(%s)", src);
-+                return;
-+            case VKD3D_DATA_INT:
-+                if (size == 1)
-+                    vkd3d_string_buffer_printf(dst, "int(%s)", src);
-+                else
-+                    vkd3d_string_buffer_printf(dst, "ivec%u(%s)", size, src);
-+                return;
-+            default:
-+                break;
-+        }
-+    }
-+    vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL,
-+            "Internal compiler error: Unhandled bitcast from %#x to %#x.",
-+            src_data_type, dst_data_type);
-+    vkd3d_string_buffer_printf(dst, "%s", src);
-+static void shader_glsl_print_src(struct vkd3d_string_buffer *buffer, struct vkd3d_glsl_generator *gen,
-+        const struct vkd3d_shader_src_param *vsir_src, uint32_t mask, enum vkd3d_data_type data_type)
- {
-     const struct vkd3d_shader_register *reg = &vsir_src->reg;
-+    struct vkd3d_string_buffer *register_name, *str;
-+    enum vkd3d_data_type src_data_type;
-+    unsigned int size;
--    glsl_src->str = vkd3d_string_buffer_get(&gen->string_buffers);
-+    register_name = vkd3d_string_buffer_get(&gen->string_buffers);
-     if (reg->non_uniform)
-         vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL,
-                 "Internal compiler error: Unhandled 'non-uniform' modifier.");
--    if (vsir_src->modifiers)
--        vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL,
--                "Internal compiler error: Unhandled source modifier(s) %#x.", vsir_src->modifiers);
--    shader_glsl_print_register_name(glsl_src->str, gen, reg);
-+    if (reg->type == VKD3DSPR_IMMCONST || reg->type == VKD3DSPR_THREADID)
-+        src_data_type = VKD3D_DATA_UINT;
-+    else
-+        src_data_type = VKD3D_DATA_FLOAT;
-+    shader_glsl_print_register_name(register_name, gen, reg);
-+    if (!vsir_src->modifiers)
-+        str = buffer;
-+    else
-+        str = vkd3d_string_buffer_get(&gen->string_buffers);
-+    size = reg->dimension == VSIR_DIMENSION_VEC4 ? 4 : 1;
-+    shader_glsl_print_bitcast(str, gen, register_name->buffer, data_type, src_data_type, size);
-     if (reg->dimension == VSIR_DIMENSION_VEC4)
--        shader_glsl_print_swizzle(glsl_src->str, vsir_src->swizzle, mask);
-+        shader_glsl_print_swizzle(str, vsir_src->swizzle, mask);
-+    switch (vsir_src->modifiers)
-+    {
-+        case VKD3DSPSM_NONE:
-+            break;
-+        case VKD3DSPSM_NEG:
-+            vkd3d_string_buffer_printf(buffer, "-%s", str->buffer);
-+            break;
-+        case VKD3DSPSM_ABS:
-+            vkd3d_string_buffer_printf(buffer, "abs(%s)", str->buffer);
-+            break;
-+        default:
-+            vkd3d_string_buffer_printf(buffer, "<unhandled modifier %#x>(%s)",
-+                    vsir_src->modifiers, str->buffer);
-+            vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL,
-+                    "Internal compiler error: Unhandled source modifier(s) %#x.", vsir_src->modifiers);
-+            break;
-+    }
-+    if (str != buffer)
-+        vkd3d_string_buffer_release(&gen->string_buffers, str);
-+    vkd3d_string_buffer_release(&gen->string_buffers, register_name);
-+static void glsl_src_init(struct glsl_src *glsl_src, struct vkd3d_glsl_generator *gen,
-+        const struct vkd3d_shader_src_param *vsir_src, uint32_t mask)
-+    glsl_src->str = vkd3d_string_buffer_get(&gen->string_buffers);
-+    shader_glsl_print_src(glsl_src->str, gen, vsir_src, mask, vsir_src->reg.data_type);
- }
- static void glsl_dst_cleanup(struct glsl_dst *dst, struct vkd3d_string_buffer_cache *cache)
-@@ -153,26 +470,89 @@ static uint32_t glsl_dst_init(struct glsl_dst *glsl_dst, struct vkd3d_glsl_gener
-     return write_mask;
- }
--static void VKD3D_PRINTF_FUNC(3, 4) shader_glsl_print_assignment(
--        struct vkd3d_glsl_generator *gen, struct glsl_dst *dst, const char *format, ...)
-+static void shader_glsl_print_subscript(struct vkd3d_string_buffer *buffer, struct vkd3d_glsl_generator *gen,
-+        const struct vkd3d_shader_src_param *rel_addr, unsigned int offset)
- {
--    va_list args;
-+    struct glsl_src r;
-+    if (!rel_addr)
-+    {
-+        vkd3d_string_buffer_printf(buffer, "[%u]", offset);
-+        return;
-+    }
-+    glsl_src_init(&r, gen, rel_addr, VKD3DSP_WRITEMASK_0);
-+    vkd3d_string_buffer_printf(buffer, "[%s", r.str->buffer);
-+    if (offset)
-+        vkd3d_string_buffer_printf(buffer, " + %u", offset);
-+    else
-+        vkd3d_string_buffer_printf(buffer, "]");
-+    glsl_src_cleanup(&r, &gen->string_buffers);
-+static void VKD3D_PRINTF_FUNC(4, 0) shader_glsl_vprint_assignment(struct vkd3d_glsl_generator *gen,
-+        struct glsl_dst *dst, enum vkd3d_data_type data_type, const char *format, va_list args)
-+    struct vkd3d_string_buffer *buffer = gen->buffer;
-+    uint32_t modifiers = dst->vsir->modifiers;
-+    bool close = true;
-     if (dst->vsir->shift)
-         vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL,
-                 "Internal compiler error: Unhandled destination shift %#x.", dst->vsir->shift);
--    if (dst->vsir->modifiers)
-+    if (modifiers & ~VKD3DSPDM_SATURATE)
-         vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL,
--                "Internal compiler error: Unhandled destination modifier(s) %#x.", dst->vsir->modifiers);
-+                "Internal compiler error: Unhandled destination modifier(s) %#x.", modifiers);
--    shader_glsl_print_indent(gen->buffer, gen->indent);
--    vkd3d_string_buffer_printf(gen->buffer, "%s%s = ", dst->register_name->buffer, dst->mask->buffer);
-+    shader_glsl_print_indent(buffer, gen->indent);
-+    vkd3d_string_buffer_printf(buffer, "%s%s = ", dst->register_name->buffer, dst->mask->buffer);
-+    if (modifiers & VKD3DSPDM_SATURATE)
-+        vkd3d_string_buffer_printf(buffer, "clamp(");
-+    switch (data_type)
-+    {
-+        default:
-+            vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL,
-+                    "Internal compiler error: Unhandled destination register data type %#x.", data_type);
-+            /* fall through */
-+        case VKD3D_DATA_FLOAT:
-+            close = false;
-+            break;
-+        case VKD3D_DATA_INT:
-+            vkd3d_string_buffer_printf(buffer, "intBitsToFloat(");
-+            break;
-+        case VKD3D_DATA_UINT:
-+            vkd3d_string_buffer_printf(buffer, "uintBitsToFloat(");
-+            break;
-+    }
-+    vkd3d_string_buffer_vprintf(buffer, format, args);
-+    if (close)
-+        vkd3d_string_buffer_printf(buffer, ")");
-+    if (modifiers & VKD3DSPDM_SATURATE)
-+        vkd3d_string_buffer_printf(buffer, ", 0.0, 1.0)");
-+    vkd3d_string_buffer_printf(buffer, ";\n");
-+static void VKD3D_PRINTF_FUNC(3, 4) shader_glsl_print_assignment(
-+        struct vkd3d_glsl_generator *gen, struct glsl_dst *dst, const char *format, ...)
-+    va_list args;
-     va_start(args, format);
--    vkd3d_string_buffer_vprintf(gen->buffer, format, args);
-+    shader_glsl_vprint_assignment(gen, dst, dst->vsir->reg.data_type, format, args);
-     va_end(args);
-+static void VKD3D_PRINTF_FUNC(4, 5) shader_glsl_print_assignment_ext(struct vkd3d_glsl_generator *gen,
-+        struct glsl_dst *dst, enum vkd3d_data_type data_type, const char *format, ...)
-+    va_list args;
--    vkd3d_string_buffer_printf(gen->buffer, ";\n");
-+    va_start(args, format);
-+    shader_glsl_vprint_assignment(gen, dst, data_type, format, args);
-+    va_end(args);
- }
- static void shader_glsl_unhandled(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_instruction *ins)
-@@ -183,138 +563,1923 @@ static void shader_glsl_unhandled(struct vkd3d_glsl_generator *gen, const struct
-             "Internal compiler error: Unhandled instruction %#x.", ins->opcode);
- }
--static void shader_glsl_mov(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_instruction *ins)
-+static void shader_glsl_binop(struct vkd3d_glsl_generator *gen,
-+        const struct vkd3d_shader_instruction *ins, const char *op)
- {
--    struct glsl_src src;
-+    struct glsl_src src[2];
-     struct glsl_dst dst;
-     uint32_t mask;
-     mask = glsl_dst_init(&dst, gen, ins, &ins->dst[0]);
--    glsl_src_init(&src, gen, &ins->src[0], mask);
-+    glsl_src_init(&src[0], gen, &ins->src[0], mask);
-+    glsl_src_init(&src[1], gen, &ins->src[1], mask);
--    shader_glsl_print_assignment(gen, &dst, "%s", src.str->buffer);
-+    shader_glsl_print_assignment(gen, &dst, "%s %s %s", src[0].str->buffer, op, src[1].str->buffer);
--    glsl_src_cleanup(&src, &gen->string_buffers);
-+    glsl_src_cleanup(&src[1], &gen->string_buffers);
-+    glsl_src_cleanup(&src[0], &gen->string_buffers);
-     glsl_dst_cleanup(&dst, &gen->string_buffers);
- }
--static void shader_glsl_ret(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_instruction *ins)
-+static void shader_glsl_dot(struct vkd3d_glsl_generator *gen,
-+        const struct vkd3d_shader_instruction *ins, uint32_t src_mask)
- {
--    const struct vkd3d_shader_version *version = &gen->program->shader_version;
-+    unsigned int component_count;
-+    struct glsl_src src[2];
-+    struct glsl_dst dst;
-+    uint32_t dst_mask;
--    /*
--    * TODO: Implement in_subroutine
--    * TODO: shader_glsl_generate_shader_epilogue(generator);
--    */
--    if (version->major >= 4)
--    {
--        shader_glsl_print_indent(gen->buffer, gen->indent);
--        vkd3d_string_buffer_printf(gen->buffer, "return;\n");
--    }
-+    dst_mask = glsl_dst_init(&dst, gen, ins, &ins->dst[0]);
-+    glsl_src_init(&src[0], gen, &ins->src[0], src_mask);
-+    glsl_src_init(&src[1], gen, &ins->src[1], src_mask);
-+    if ((component_count = vsir_write_mask_component_count(dst_mask)) > 1)
-+        shader_glsl_print_assignment(gen, &dst, "vec%d(dot(%s, %s))",
-+                component_count, src[0].str->buffer, src[1].str->buffer);
-+    else
-+        shader_glsl_print_assignment(gen, &dst, "dot(%s, %s)",
-+                src[0].str->buffer, src[1].str->buffer);
-+    glsl_src_cleanup(&src[1], &gen->string_buffers);
-+    glsl_src_cleanup(&src[0], &gen->string_buffers);
-+    glsl_dst_cleanup(&dst, &gen->string_buffers);
- }
--static void vkd3d_glsl_handle_instruction(struct vkd3d_glsl_generator *gen,
--        const struct vkd3d_shader_instruction *ins)
-+static void shader_glsl_intrinsic(struct vkd3d_glsl_generator *gen,
-+        const struct vkd3d_shader_instruction *ins, const char *op)
- {
--    gen->location = ins->location;
-+    struct vkd3d_string_buffer *args;
-+    struct glsl_src src;
-+    struct glsl_dst dst;
-+    unsigned int i;
-+    uint32_t mask;
--    switch (ins->opcode)
-+    mask = glsl_dst_init(&dst, gen, ins, &ins->dst[0]);
-+    args = vkd3d_string_buffer_get(&gen->string_buffers);
-+    for (i = 0; i < ins->src_count; ++i)
-     {
--        case VKD3DSIH_DCL_INPUT:
--        case VKD3DSIH_DCL_OUTPUT:
--        case VKD3DSIH_DCL_OUTPUT_SIV:
--        case VKD3DSIH_NOP:
--            break;
--        case VKD3DSIH_MOV:
--            shader_glsl_mov(gen, ins);
--            break;
--        case VKD3DSIH_RET:
--            shader_glsl_ret(gen, ins);
--            break;
--        default:
--            shader_glsl_unhandled(gen, ins);
--            break;
-+        glsl_src_init(&src, gen, &ins->src[i], mask);
-+        vkd3d_string_buffer_printf(args, "%s%s", i ? ", " : "", src.str->buffer);
-+        glsl_src_cleanup(&src, &gen->string_buffers);
-     }
-+    shader_glsl_print_assignment(gen, &dst, "%s(%s)", op, args->buffer);
-+    vkd3d_string_buffer_release(&gen->string_buffers, args);
-+    glsl_dst_cleanup(&dst, &gen->string_buffers);
- }
--static void shader_glsl_generate_declarations(struct vkd3d_glsl_generator *gen)
-+static void shader_glsl_relop(struct vkd3d_glsl_generator *gen,
-+        const struct vkd3d_shader_instruction *ins, const char *scalar_op, const char *vector_op)
- {
--    const struct vsir_program *program = gen->program;
--    struct vkd3d_string_buffer *buffer = gen->buffer;
-+    unsigned int mask_size;
-+    struct glsl_src src[2];
-+    struct glsl_dst dst;
-+    uint32_t mask;
--    if (program->temp_count)
--        vkd3d_string_buffer_printf(buffer, "vec4 r[%u];\n\n", program->temp_count);
-+    mask = glsl_dst_init(&dst, gen, ins, &ins->dst[0]);
-+    glsl_src_init(&src[0], gen, &ins->src[0], mask);
-+    glsl_src_init(&src[1], gen, &ins->src[1], mask);
-+    if ((mask_size = vsir_write_mask_component_count(mask)) > 1)
-+        shader_glsl_print_assignment(gen, &dst, "uvec%u(%s(%s, %s)) * 0xffffffffu",
-+                mask_size, vector_op, src[0].str->buffer, src[1].str->buffer);
-+    else
-+        shader_glsl_print_assignment(gen, &dst, "%s %s %s ? 0xffffffffu : 0u",
-+                src[0].str->buffer, scalar_op, src[1].str->buffer);
-+    glsl_src_cleanup(&src[1], &gen->string_buffers);
-+    glsl_src_cleanup(&src[0], &gen->string_buffers);
-+    glsl_dst_cleanup(&dst, &gen->string_buffers);
- }
--static int vkd3d_glsl_generator_generate(struct vkd3d_glsl_generator *gen, struct vkd3d_shader_code *out)
-+static void shader_glsl_cast(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_instruction *ins,
-+        const char *scalar_constructor, const char *vector_constructor)
- {
--    const struct vkd3d_shader_instruction_array *instructions = &gen->program->instructions;
--    struct vkd3d_string_buffer *buffer = gen->buffer;
--    unsigned int i;
--    void *code;
--    MESSAGE("Generating a GLSL shader. This is unsupported; you get to keep all the pieces if it breaks.\n");
-+    unsigned int component_count;
-+    struct glsl_src src;
-+    struct glsl_dst dst;
-+    uint32_t mask;
--    vkd3d_string_buffer_printf(buffer, "#version 440\n\n");
-+    mask = glsl_dst_init(&dst, gen, ins, &ins->dst[0]);
-+    glsl_src_init(&src, gen, &ins->src[0], mask);
--    vkd3d_string_buffer_printf(buffer, "/* Generated by %s. */\n\n", vkd3d_shader_get_version(NULL, NULL));
-+    if ((component_count = vsir_write_mask_component_count(mask)) > 1)
-+        shader_glsl_print_assignment(gen, &dst, "%s%u(%s)",
-+                vector_constructor, component_count, src.str->buffer);
-+    else
-+        shader_glsl_print_assignment(gen, &dst, "%s(%s)",
-+                scalar_constructor, src.str->buffer);
--    shader_glsl_generate_declarations(gen);
-+    glsl_src_cleanup(&src, &gen->string_buffers);
-+    glsl_dst_cleanup(&dst, &gen->string_buffers);
--    vkd3d_string_buffer_printf(buffer, "void main()\n{\n");
-+static void shader_glsl_end_block(struct vkd3d_glsl_generator *gen)
-+    --gen->indent;
-+    shader_glsl_print_indent(gen->buffer, gen->indent);
-+    vkd3d_string_buffer_printf(gen->buffer, "}\n");
-+static void shader_glsl_begin_block(struct vkd3d_glsl_generator *gen)
-+    shader_glsl_print_indent(gen->buffer, gen->indent);
-+    vkd3d_string_buffer_printf(gen->buffer, "{\n");
-     ++gen->indent;
--    for (i = 0; i < instructions->count; ++i)
--    {
--        vkd3d_glsl_handle_instruction(gen, &instructions->elements[i]);
--    }
--    vkd3d_string_buffer_printf(buffer, "}\n");
-+static void shader_glsl_if(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_instruction *ins)
-+    const char *condition;
-+    struct glsl_src src;
--    if (TRACE_ON())
--        vkd3d_string_buffer_trace(buffer);
-+    glsl_src_init(&src, gen, &ins->src[0], VKD3DSP_WRITEMASK_0);
--    if (gen->failed)
--        return VKD3D_ERROR_INVALID_SHADER;
-+    shader_glsl_print_indent(gen->buffer, gen->indent);
-+    condition = ins->flags == VKD3D_SHADER_CONDITIONAL_OP_NZ ? "bool" : "!bool";
-+    vkd3d_string_buffer_printf(gen->buffer, "if (%s(%s))\n", condition, src.str->buffer);
--    if ((code = vkd3d_malloc(buffer->buffer_size)))
--    {
--        memcpy(code, buffer->buffer, buffer->content_size);
--        out->size = buffer->content_size;
--        out->code = code;
--    }
--    else return VKD3D_ERROR_OUT_OF_MEMORY;
-+    glsl_src_cleanup(&src, &gen->string_buffers);
--    return VKD3D_OK;
-+    shader_glsl_begin_block(gen);
- }
--static void vkd3d_glsl_generator_cleanup(struct vkd3d_glsl_generator *gen)
-+static void shader_glsl_else(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_instruction *ins)
- {
--    vkd3d_string_buffer_release(&gen->string_buffers, gen->buffer);
--    vkd3d_string_buffer_cache_cleanup(&gen->string_buffers);
-+    shader_glsl_end_block(gen);
-+    shader_glsl_print_indent(gen->buffer, gen->indent);
-+    vkd3d_string_buffer_printf(gen->buffer, "else\n");
-+    shader_glsl_begin_block(gen);
- }
--static void vkd3d_glsl_generator_init(struct vkd3d_glsl_generator *gen,
--        struct vsir_program *program, struct vkd3d_shader_message_context *message_context)
-+static void shader_glsl_loop(struct vkd3d_glsl_generator *gen)
- {
--    memset(gen, 0, sizeof(*gen));
--    gen->program = program;
--    vkd3d_string_buffer_cache_init(&gen->string_buffers);
--    gen->buffer = vkd3d_string_buffer_get(&gen->string_buffers);
--    gen->message_context = message_context;
-+    shader_glsl_print_indent(gen->buffer, gen->indent);
-+    vkd3d_string_buffer_printf(gen->buffer, "for (;;)\n");
-+    shader_glsl_begin_block(gen);
- }
--int glsl_compile(struct vsir_program *program, uint64_t config_flags,
--        const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *out,
--        struct vkd3d_shader_message_context *message_context)
-+static void shader_glsl_break(struct vkd3d_glsl_generator *gen)
- {
--    struct vkd3d_glsl_generator generator;
--    int ret;
-+    shader_glsl_print_indent(gen->buffer, gen->indent);
-+    vkd3d_string_buffer_printf(gen->buffer, "break;\n");
--    if ((ret = vsir_program_normalise(program, config_flags, compile_info, message_context)) < 0)
--        return ret;
-+static void shader_glsl_continue(struct vkd3d_glsl_generator *gen)
-+    shader_glsl_print_indent(gen->buffer, gen->indent);
-+    vkd3d_string_buffer_printf(gen->buffer, "continue;\n");
-+static void shader_glsl_switch(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_instruction *ins)
-+    struct glsl_src src;
-+    glsl_src_init(&src, gen, &ins->src[0], VKD3DSP_WRITEMASK_0);
-+    shader_glsl_print_indent(gen->buffer, gen->indent);
-+    vkd3d_string_buffer_printf(gen->buffer, "switch (%s)\n", src.str->buffer);
-+    shader_glsl_begin_block(gen);
-+    glsl_src_cleanup(&src, &gen->string_buffers);
-+static void shader_glsl_case(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_instruction *ins)
-+    struct glsl_src src;
-+    glsl_src_init(&src, gen, &ins->src[0], VKD3DSP_WRITEMASK_0);
-+    shader_glsl_print_indent(gen->buffer, gen->indent);
-+    vkd3d_string_buffer_printf(gen->buffer, "case %s:\n", src.str->buffer);
-+    glsl_src_cleanup(&src, &gen->string_buffers);
-+static void shader_glsl_default(struct vkd3d_glsl_generator *gen)
-+    shader_glsl_print_indent(gen->buffer, gen->indent);
-+    vkd3d_string_buffer_printf(gen->buffer, "default:\n");
-+static void shader_glsl_print_texel_offset(struct vkd3d_string_buffer *buffer, struct vkd3d_glsl_generator *gen,
-+        unsigned int offset_size, const struct vkd3d_shader_texel_offset *offset)
-+    switch (offset_size)
-+    {
-+        case 1:
-+            vkd3d_string_buffer_printf(buffer, "%d", offset->u);
-+            break;
-+        case 2:
-+            vkd3d_string_buffer_printf(buffer, "ivec2(%d, %d)", offset->u, offset->v);
-+            break;
-+        default:
-+            vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL,
-+                    "Internal compiler error: Invalid texel offset size %u.", offset_size);
-+            /* fall through */
-+        case 3:
-+            vkd3d_string_buffer_printf(buffer, "ivec3(%d, %d, %d)", offset->u, offset->v, offset->w);
-+            break;
-+    }
-+static void shader_glsl_ld(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_instruction *ins)
-+    unsigned int resource_id, resource_idx, resource_space, sample_count;
-+    const struct glsl_resource_type_info *resource_type_info;
-+    const struct vkd3d_shader_descriptor_info1 *d;
-+    enum vkd3d_shader_component_type sampled_type;
-+    enum vkd3d_shader_resource_type resource_type;
-+    struct vkd3d_string_buffer *fetch;
-+    enum vkd3d_data_type data_type;
-+    struct glsl_src coord;
-+    struct glsl_dst dst;
-+    uint32_t coord_mask;
-+    if (vkd3d_shader_instruction_has_texel_offset(ins))
-+        vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL,
-+                "Internal compiler error: Unhandled texel fetch offset.");
-+    if (ins->src[1].reg.idx[0].rel_addr || ins->src[1].reg.idx[1].rel_addr)
-+        vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_UNSUPPORTED,
-+                "Descriptor indexing is not supported.");
-+    resource_id = ins->src[1].reg.idx[0].offset;
-+    resource_idx = ins->src[1].reg.idx[1].offset;
-+    if ((d = shader_glsl_get_descriptor_by_id(gen, VKD3D_SHADER_DESCRIPTOR_TYPE_SRV, resource_id)))
-+    {
-+        resource_type = d->resource_type;
-+        resource_space = d->register_space;
-+        sample_count = d->sample_count;
-+        sampled_type = vkd3d_component_type_from_resource_data_type(d->resource_data_type);
-+        data_type = vkd3d_data_type_from_component_type(sampled_type);
-+    }
-+    else
-+    {
-+        vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL,
-+                "Internal compiler error: Undeclared resource descriptor %u.", resource_id);
-+        resource_space = 0;
-+        resource_type = VKD3D_SHADER_RESOURCE_TEXTURE_2D;
-+        sample_count = 1;
-+        data_type = VKD3D_DATA_FLOAT;
-+    }
-+    if ((resource_type_info = shader_glsl_get_resource_type_info(resource_type)))
-+    {
-+        coord_mask = vkd3d_write_mask_from_component_count(resource_type_info->coord_size);
-+    }
-+    else
-+    {
-+        vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL,
-+                "Internal compiler error: Unhandled resource type %#x.", resource_type);
-+        coord_mask = vkd3d_write_mask_from_component_count(2);
-+    }
-+    glsl_dst_init(&dst, gen, ins, &ins->dst[0]);
-+    glsl_src_init(&coord, gen, &ins->src[0], coord_mask);
-+    fetch = vkd3d_string_buffer_get(&gen->string_buffers);
-+    vkd3d_string_buffer_printf(fetch, "texelFetch(");
-+    shader_glsl_print_combined_sampler_name(fetch, gen, resource_idx,
-+            resource_space, VKD3D_SHADER_DUMMY_SAMPLER_INDEX, 0);
-+    vkd3d_string_buffer_printf(fetch, ", %s", coord.str->buffer);
-+    if (resource_type != VKD3D_SHADER_RESOURCE_BUFFER)
-+    {
-+        vkd3d_string_buffer_printf(fetch, ", ");
-+        if (ins->opcode != VKD3DSIH_LD2DMS)
-+            shader_glsl_print_src(fetch, gen, &ins->src[0], VKD3DSP_WRITEMASK_3, ins->src[0].reg.data_type);
-+        else if (sample_count == 1)
-+            /* If the resource isn't a true multisample resource, this is the
-+             * "lod" parameter instead of the "sample" parameter. */
-+            vkd3d_string_buffer_printf(fetch, "0");
-+        else
-+            shader_glsl_print_src(fetch, gen, &ins->src[2], VKD3DSP_WRITEMASK_0, ins->src[2].reg.data_type);
-+    }
-+    vkd3d_string_buffer_printf(fetch, ")");
-+    shader_glsl_print_swizzle(fetch, ins->src[1].swizzle, ins->dst[0].write_mask);
-+    shader_glsl_print_assignment_ext(gen, &dst, data_type, "%s", fetch->buffer);
-+    vkd3d_string_buffer_release(&gen->string_buffers, fetch);
-+    glsl_src_cleanup(&coord, &gen->string_buffers);
-+    glsl_dst_cleanup(&dst, &gen->string_buffers);
-+static void shader_glsl_print_shadow_coord(struct vkd3d_string_buffer *buffer, struct vkd3d_glsl_generator *gen,
-+        const struct vkd3d_shader_src_param *coord, const struct vkd3d_shader_src_param *ref, unsigned int coord_size)
-+    uint32_t coord_mask = vkd3d_write_mask_from_component_count(coord_size);
-+    switch (coord_size)
-+    {
-+        case 1:
-+            vkd3d_string_buffer_printf(buffer, "vec3(");
-+            shader_glsl_print_src(buffer, gen, coord, coord_mask, coord->reg.data_type);
-+            vkd3d_string_buffer_printf(buffer, ", 0.0, ");
-+            shader_glsl_print_src(buffer, gen, ref, VKD3DSP_WRITEMASK_0, ref->reg.data_type);
-+            vkd3d_string_buffer_printf(buffer, ")");
-+            break;
-+        case 4:
-+            shader_glsl_print_src(buffer, gen, coord, coord_mask, coord->reg.data_type);
-+            vkd3d_string_buffer_printf(buffer, ", ");
-+            shader_glsl_print_src(buffer, gen, ref, VKD3DSP_WRITEMASK_0, ref->reg.data_type);
-+            break;
-+        default:
-+            vkd3d_string_buffer_printf(buffer, "vec%u(", coord_size + 1);
-+            shader_glsl_print_src(buffer, gen, coord, coord_mask, coord->reg.data_type);
-+            vkd3d_string_buffer_printf(buffer, ", ");
-+            shader_glsl_print_src(buffer, gen, ref, VKD3DSP_WRITEMASK_0, ref->reg.data_type);
-+            vkd3d_string_buffer_printf(buffer, ")");
-+            break;
-+    }
-+static void shader_glsl_sample(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_instruction *ins)
-+    bool shadow_sampler, array, bias, dynamic_offset, gather, grad, lod, lod_zero, offset, shadow;
-+    const struct glsl_resource_type_info *resource_type_info;
-+    const struct vkd3d_shader_src_param *resource, *sampler;
-+    unsigned int resource_id, resource_idx, resource_space;
-+    unsigned int sampler_id, sampler_idx, sampler_space;
-+    const struct vkd3d_shader_descriptor_info1 *d;
-+    enum vkd3d_shader_component_type sampled_type;
-+    enum vkd3d_shader_resource_type resource_type;
-+    unsigned int component_idx, coord_size;
-+    struct vkd3d_string_buffer *sample;
-+    enum vkd3d_data_type data_type;
-+    struct glsl_dst dst;
-+    bias = ins->opcode == VKD3DSIH_SAMPLE_B;
-+    dynamic_offset = ins->opcode == VKD3DSIH_GATHER4_PO;
-+    gather = ins->opcode == VKD3DSIH_GATHER4 || ins->opcode == VKD3DSIH_GATHER4_PO;
-+    grad = ins->opcode == VKD3DSIH_SAMPLE_GRAD;
-+    lod = ins->opcode == VKD3DSIH_SAMPLE_LOD || ins->opcode == VKD3DSIH_SAMPLE_C_LZ;
-+    lod_zero = ins->opcode == VKD3DSIH_SAMPLE_C_LZ;
-+    offset = dynamic_offset || vkd3d_shader_instruction_has_texel_offset(ins);
-+    shadow = ins->opcode == VKD3DSIH_SAMPLE_C || ins->opcode == VKD3DSIH_SAMPLE_C_LZ;
-+    resource = &ins->src[1 + dynamic_offset];
-+    sampler = &ins->src[2 + dynamic_offset];
-+    if (resource->reg.idx[0].rel_addr || resource->reg.idx[1].rel_addr
-+            || sampler->reg.idx[0].rel_addr || sampler->reg.idx[1].rel_addr)
-+        vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_UNSUPPORTED,
-+                "Descriptor indexing is not supported.");
-+    resource_id = resource->reg.idx[0].offset;
-+    resource_idx = resource->reg.idx[1].offset;
-+    if ((d = shader_glsl_get_descriptor_by_id(gen, VKD3D_SHADER_DESCRIPTOR_TYPE_SRV, resource_id)))
-+    {
-+        resource_type = d->resource_type;
-+        resource_space = d->register_space;
-+        sampled_type = vkd3d_component_type_from_resource_data_type(d->resource_data_type);
-+        data_type = vkd3d_data_type_from_component_type(sampled_type);
-+    }
-+    else
-+    {
-+        vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL,
-+                "Internal compiler error: Undeclared resource descriptor %u.", resource_id);
-+        resource_space = 0;
-+        resource_type = VKD3D_SHADER_RESOURCE_TEXTURE_2D;
-+        data_type = VKD3D_DATA_FLOAT;
-+    }
-+    if ((resource_type_info = shader_glsl_get_resource_type_info(resource_type)))
-+    {
-+        coord_size = resource_type_info->coord_size;
-+        array = resource_type_info->array;
-+    }
-+    else
-+    {
-+        vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL,
-+                "Internal compiler error: Unhandled resource type %#x.", resource_type);
-+        coord_size = 2;
-+        array = false;
-+    }
-+    sampler_id = sampler->reg.idx[0].offset;
-+    sampler_idx = sampler->reg.idx[1].offset;
-+    if ((d = shader_glsl_get_descriptor_by_id(gen, VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER, sampler_id)))
-+    {
-+        sampler_space = d->register_space;
-+        if (shadow)
-+        {
-+            if (!shadow_sampler)
-+                vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL,
-+                        "Internal compiler error: Sampler %u is not a comparison sampler.", sampler_id);
-+        }
-+        else
-+        {
-+            if (shadow_sampler)
-+                vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL,
-+                        "Internal compiler error: Sampler %u is a comparison sampler.", sampler_id);
-+        }
-+    }
-+    else
-+    {
-+        vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL,
-+                "Internal compiler error: Undeclared sampler descriptor %u.", sampler_id);
-+        sampler_space = 0;
-+    }
-+    glsl_dst_init(&dst, gen, ins, &ins->dst[0]);
-+    sample = vkd3d_string_buffer_get(&gen->string_buffers);
-+    if (gather)
-+        vkd3d_string_buffer_printf(sample, "textureGather");
-+    else if (grad)
-+        vkd3d_string_buffer_printf(sample, "textureGrad");
-+    else if (lod)
-+        vkd3d_string_buffer_printf(sample, "textureLod");
-+    else
-+        vkd3d_string_buffer_printf(sample, "texture");
-+    vkd3d_string_buffer_printf(sample, "%s(", offset ? "Offset" : "");
-+    shader_glsl_print_combined_sampler_name(sample, gen, resource_idx, resource_space, sampler_idx, sampler_space);
-+    vkd3d_string_buffer_printf(sample, ", ");
-+    if (shadow)
-+        shader_glsl_print_shadow_coord(sample, gen, &ins->src[0], &ins->src[3], coord_size);
-+    else
-+        shader_glsl_print_src(sample, gen, &ins->src[0],
-+                vkd3d_write_mask_from_component_count(coord_size), ins->src[0].reg.data_type);
-+    if (grad)
-+    {
-+        vkd3d_string_buffer_printf(sample, ", ");
-+        shader_glsl_print_src(sample, gen, &ins->src[3],
-+                vkd3d_write_mask_from_component_count(coord_size - array), ins->src[3].reg.data_type);
-+        vkd3d_string_buffer_printf(sample, ", ");
-+        shader_glsl_print_src(sample, gen, &ins->src[4],
-+                vkd3d_write_mask_from_component_count(coord_size - array), ins->src[4].reg.data_type);
-+    }
-+    else if (lod_zero)
-+    {
-+        vkd3d_string_buffer_printf(sample, ", 0.0");
-+    }
-+    else if (lod)
-+    {
-+        vkd3d_string_buffer_printf(sample, ", ");
-+        shader_glsl_print_src(sample, gen, &ins->src[3], VKD3DSP_WRITEMASK_0, ins->src[3].reg.data_type);
-+    }
-+    if (offset)
-+    {
-+        vkd3d_string_buffer_printf(sample, ", ");
-+        if (dynamic_offset)
-+            shader_glsl_print_src(sample, gen, &ins->src[1],
-+                    vkd3d_write_mask_from_component_count(coord_size - array), ins->src[1].reg.data_type);
-+        else
-+            shader_glsl_print_texel_offset(sample, gen, coord_size - array, &ins->texel_offset);
-+    }
-+    if (bias)
-+    {
-+        vkd3d_string_buffer_printf(sample, ", ");
-+        shader_glsl_print_src(sample, gen, &ins->src[3], VKD3DSP_WRITEMASK_0, ins->src[3].reg.data_type);
-+    }
-+    else if (gather)
-+    {
-+        if ((component_idx = vsir_swizzle_get_component(sampler->swizzle, 0)))
-+            vkd3d_string_buffer_printf(sample, ", %d", component_idx);
-+    }
-+    vkd3d_string_buffer_printf(sample, ")");
-+    shader_glsl_print_swizzle(sample, resource->swizzle, ins->dst[0].write_mask);
-+    shader_glsl_print_assignment_ext(gen, &dst, data_type, "%s", sample->buffer);
-+    vkd3d_string_buffer_release(&gen->string_buffers, sample);
-+    glsl_dst_cleanup(&dst, &gen->string_buffers);
-+static void shader_glsl_load_uav_typed(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_instruction *ins)
-+    const struct glsl_resource_type_info *resource_type_info;
-+    enum vkd3d_shader_component_type component_type;
-+    const struct vkd3d_shader_descriptor_info1 *d;
-+    enum vkd3d_shader_resource_type resource_type;
-+    unsigned int uav_id, uav_idx, uav_space;
-+    struct vkd3d_string_buffer *load;
-+    struct glsl_src coord;
-+    struct glsl_dst dst;
-+    uint32_t coord_mask;
-+    if (ins->src[1].reg.idx[0].rel_addr || ins->src[1].reg.idx[1].rel_addr)
-+        vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_UNSUPPORTED,
-+                "Descriptor indexing is not supported.");
-+    uav_id = ins->src[1].reg.idx[0].offset;
-+    uav_idx = ins->src[1].reg.idx[1].offset;
-+    if ((d = shader_glsl_get_descriptor_by_id(gen, VKD3D_SHADER_DESCRIPTOR_TYPE_UAV, uav_id)))
-+    {
-+        resource_type = d->resource_type;
-+        uav_space = d->register_space;
-+        component_type = vkd3d_component_type_from_resource_data_type(d->resource_data_type);
-+    }
-+    else
-+    {
-+        vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL,
-+                "Internal compiler error: Undeclared UAV descriptor %u.", uav_id);
-+        uav_space = 0;
-+        resource_type = VKD3D_SHADER_RESOURCE_TEXTURE_2D;
-+        component_type = VKD3D_SHADER_COMPONENT_FLOAT;
-+    }
-+    if ((resource_type_info = shader_glsl_get_resource_type_info(resource_type)))
-+    {
-+        coord_mask = vkd3d_write_mask_from_component_count(resource_type_info->coord_size);
-+    }
-+    else
-+    {
-+        vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL,
-+                "Internal compiler error: Unhandled UAV type %#x.", resource_type);
-+        coord_mask = vkd3d_write_mask_from_component_count(2);
-+    }
-+    glsl_dst_init(&dst, gen, ins, &ins->dst[0]);
-+    glsl_src_init(&coord, gen, &ins->src[0], coord_mask);
-+    load = vkd3d_string_buffer_get(&gen->string_buffers);
-+    vkd3d_string_buffer_printf(load, "imageLoad(");
-+    shader_glsl_print_image_name(load, gen, uav_idx, uav_space);
-+    vkd3d_string_buffer_printf(load, ", %s)", coord.str->buffer);
-+    shader_glsl_print_swizzle(load, ins->src[1].swizzle, ins->dst[0].write_mask);
-+    shader_glsl_print_assignment_ext(gen, &dst,
-+            vkd3d_data_type_from_component_type(component_type), "%s", load->buffer);
-+    vkd3d_string_buffer_release(&gen->string_buffers, load);
-+    glsl_src_cleanup(&coord, &gen->string_buffers);
-+    glsl_dst_cleanup(&dst, &gen->string_buffers);
-+static void shader_glsl_store_uav_typed(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_instruction *ins)
-+    const struct glsl_resource_type_info *resource_type_info;
-+    enum vkd3d_shader_component_type component_type;
-+    const struct vkd3d_shader_descriptor_info1 *d;
-+    enum vkd3d_shader_resource_type resource_type;
-+    unsigned int uav_id, uav_idx, uav_space;
-+    struct vkd3d_string_buffer *image_data;
-+    struct glsl_src image_coord;
-+    uint32_t coord_mask;
-+    if (ins->dst[0].reg.idx[0].rel_addr || ins->dst[0].reg.idx[1].rel_addr)
-+        vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_UNSUPPORTED,
-+                "Descriptor indexing is not supported.");
-+    uav_id = ins->dst[0].reg.idx[0].offset;
-+    uav_idx = ins->dst[0].reg.idx[1].offset;
-+    if ((d = shader_glsl_get_descriptor_by_id(gen, VKD3D_SHADER_DESCRIPTOR_TYPE_UAV, uav_id)))
-+    {
-+        resource_type = d->resource_type;
-+        uav_space = d->register_space;
-+        component_type = vkd3d_component_type_from_resource_data_type(d->resource_data_type);
-+    }
-+    else
-+    {
-+        vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL,
-+                "Internal compiler error: Undeclared UAV descriptor %u.", uav_id);
-+        uav_space = 0;
-+        resource_type = VKD3D_SHADER_RESOURCE_TEXTURE_2D;
-+        component_type = VKD3D_SHADER_COMPONENT_FLOAT;
-+    }
-+    if ((resource_type_info = shader_glsl_get_resource_type_info(resource_type)))
-+    {
-+        coord_mask = vkd3d_write_mask_from_component_count(resource_type_info->coord_size);
-+    }
-+    else
-+    {
-+        vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL,
-+                "Internal compiler error: Unhandled UAV type %#x.", resource_type);
-+        coord_mask = vkd3d_write_mask_from_component_count(2);
-+    }
-+    glsl_src_init(&image_coord, gen, &ins->src[0], coord_mask);
-+    image_data = vkd3d_string_buffer_get(&gen->string_buffers);
-+    if (ins->src[1].reg.dimension == VSIR_DIMENSION_SCALAR)
-+    {
-+        switch (component_type)
-+        {
-+            case VKD3D_SHADER_COMPONENT_UINT:
-+                vkd3d_string_buffer_printf(image_data, "uvec4(");
-+                break;
-+            case VKD3D_SHADER_COMPONENT_INT:
-+                vkd3d_string_buffer_printf(image_data, "ivec4(");
-+                break;
-+            default:
-+                vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL,
-+                        "Internal compiler error: Unhandled component type %#x.", component_type);
-+                /* fall through */
-+            case VKD3D_SHADER_COMPONENT_FLOAT:
-+                vkd3d_string_buffer_printf(image_data, "vec4(");
-+                break;
-+        }
-+    }
-+    shader_glsl_print_src(image_data, gen, &ins->src[1], VKD3DSP_WRITEMASK_ALL,
-+            vkd3d_data_type_from_component_type(component_type));
-+    if (ins->src[1].reg.dimension == VSIR_DIMENSION_SCALAR)
-+        vkd3d_string_buffer_printf(image_data, ", 0, 0, 0)");
-+    shader_glsl_print_indent(gen->buffer, gen->indent);
-+    vkd3d_string_buffer_printf(gen->buffer, "imageStore(");
-+    shader_glsl_print_image_name(gen->buffer, gen, uav_idx, uav_space);
-+    vkd3d_string_buffer_printf(gen->buffer, ", %s, %s);\n", image_coord.str->buffer, image_data->buffer);
-+    vkd3d_string_buffer_release(&gen->string_buffers, image_data);
-+    glsl_src_cleanup(&image_coord, &gen->string_buffers);
-+static void shader_glsl_unary_op(struct vkd3d_glsl_generator *gen,
-+        const struct vkd3d_shader_instruction *ins, const char *op)
-+    struct glsl_src src;
-+    struct glsl_dst dst;
-+    uint32_t mask;
-+    mask = glsl_dst_init(&dst, gen, ins, &ins->dst[0]);
-+    glsl_src_init(&src, gen, &ins->src[0], mask);
-+    shader_glsl_print_assignment(gen, &dst, "%s%s", op, src.str->buffer);
-+    glsl_src_cleanup(&src, &gen->string_buffers);
-+    glsl_dst_cleanup(&dst, &gen->string_buffers);
-+static void shader_glsl_mov(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_instruction *ins)
-+    struct glsl_src src;
-+    struct glsl_dst dst;
-+    uint32_t mask;
-+    mask = glsl_dst_init(&dst, gen, ins, &ins->dst[0]);
-+    glsl_src_init(&src, gen, &ins->src[0], mask);
-+    shader_glsl_print_assignment(gen, &dst, "%s", src.str->buffer);
-+    glsl_src_cleanup(&src, &gen->string_buffers);
-+    glsl_dst_cleanup(&dst, &gen->string_buffers);
-+static void shader_glsl_movc(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_instruction *ins)
-+    unsigned int component_count;
-+    struct glsl_src src[3];
-+    struct glsl_dst dst;
-+    uint32_t mask;
-+    mask = glsl_dst_init(&dst, gen, ins, &ins->dst[0]);
-+    glsl_src_init(&src[0], gen, &ins->src[0], mask);
-+    glsl_src_init(&src[1], gen, &ins->src[1], mask);
-+    glsl_src_init(&src[2], gen, &ins->src[2], mask);
-+    if ((component_count = vsir_write_mask_component_count(mask)) > 1)
-+        shader_glsl_print_assignment(gen, &dst, "mix(%s, %s, bvec%u(%s))",
-+                src[2].str->buffer, src[1].str->buffer, component_count, src[0].str->buffer);
-+    else
-+        shader_glsl_print_assignment(gen, &dst, "mix(%s, %s, bool(%s))",
-+                src[2].str->buffer, src[1].str->buffer, src[0].str->buffer);
-+    glsl_src_cleanup(&src[2], &gen->string_buffers);
-+    glsl_src_cleanup(&src[1], &gen->string_buffers);
-+    glsl_src_cleanup(&src[0], &gen->string_buffers);
-+    glsl_dst_cleanup(&dst, &gen->string_buffers);
-+static void shader_glsl_mul_extended(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_instruction *ins)
-+    struct glsl_src src[2];
-+    struct glsl_dst dst;
-+    uint32_t mask;
-+    if (ins->dst[0].reg.type != VKD3DSPR_NULL)
-+    {
-+        /* FIXME: imulExtended()/umulExtended() from ARB_gpu_shader5/GLSL 4.00+. */
-+        mask = glsl_dst_init(&dst, gen, ins, &ins->dst[0]);
-+        shader_glsl_print_assignment(gen, &dst, "<unhandled 64-bit multiplication>");
-+        glsl_dst_cleanup(&dst, &gen->string_buffers);
-+        vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL,
-+                "Internal compiler error: Unhandled 64-bit integer multiplication.");
-+    }
-+    if (ins->dst[1].reg.type != VKD3DSPR_NULL)
-+    {
-+        mask = glsl_dst_init(&dst, gen, ins, &ins->dst[1]);
-+        glsl_src_init(&src[0], gen, &ins->src[0], mask);
-+        glsl_src_init(&src[1], gen, &ins->src[1], mask);
-+        shader_glsl_print_assignment(gen, &dst, "%s * %s", src[0].str->buffer, src[1].str->buffer);
-+        glsl_src_cleanup(&src[1], &gen->string_buffers);
-+        glsl_src_cleanup(&src[0], &gen->string_buffers);
-+        glsl_dst_cleanup(&dst, &gen->string_buffers);
-+    }
-+static void shader_glsl_print_sysval_name(struct vkd3d_string_buffer *buffer, struct vkd3d_glsl_generator *gen,
-+        enum vkd3d_shader_sysval_semantic sysval, unsigned int idx)
-+    const struct vkd3d_shader_version *version = &gen->program->shader_version;
-+    switch (sysval)
-+    {
-+        case VKD3D_SHADER_SV_POSITION:
-+            if (version->type == VKD3D_SHADER_TYPE_COMPUTE)
-+            {
-+                vkd3d_string_buffer_printf(buffer, "<unhandled sysval %#x>", sysval);
-+                vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL,
-+                        "Internal compiler error: Unhandled SV_POSITION in shader type #%x.", version->type);
-+                break;
-+            }
-+            if (idx)
-+                vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL,
-+                        "Internal compiler error: Unhandled SV_POSITION index %u.", idx);
-+            if (version->type == VKD3D_SHADER_TYPE_PIXEL)
-+                vkd3d_string_buffer_printf(buffer, "gl_FragCoord");
-+            else
-+                vkd3d_string_buffer_printf(buffer, "gl_Position");
-+            break;
-+        case VKD3D_SHADER_SV_VERTEX_ID:
-+            if (version->type != VKD3D_SHADER_TYPE_VERTEX)
-+                vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL,
-+                        "Internal compiler error: Unhandled SV_VERTEX_ID in shader type #%x.", version->type);
-+            vkd3d_string_buffer_printf(buffer, "intBitsToFloat(ivec4(gl_VertexID, 0, 0, 0))");
-+            break;
-+            if (version->type != VKD3D_SHADER_TYPE_PIXEL)
-+                vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL,
-+                        "Internal compiler error: Unhandled SV_IS_FRONT_FACE in shader type #%x.", version->type);
-+            vkd3d_string_buffer_printf(buffer,
-+                    "uintBitsToFloat(uvec4(gl_FrontFacing ? 0xffffffffu : 0u, 0u, 0u, 0u))");
-+            break;
-+            if (version->type != VKD3D_SHADER_TYPE_PIXEL)
-+                vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL,
-+                        "Internal compiler error: Unhandled SV_SAMPLE_INDEX in shader type #%x.", version->type);
-+            vkd3d_string_buffer_printf(buffer, "intBitsToFloat(ivec4(gl_SampleID, 0, 0, 0))");
-+            break;
-+        case VKD3D_SHADER_SV_TARGET:
-+            if (version->type != VKD3D_SHADER_TYPE_PIXEL)
-+                vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL,
-+                        "Internal compiler error: Unhandled SV_TARGET in shader type #%x.", version->type);
-+            vkd3d_string_buffer_printf(buffer, "shader_out_%u", idx);
-+            break;
-+        default:
-+            vkd3d_string_buffer_printf(buffer, "<unhandled sysval %#x>", sysval);
-+            vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL,
-+                    "Internal compiler error: Unhandled system value %#x.", sysval);
-+            break;
-+    }
-+static void shader_glsl_shader_prologue(struct vkd3d_glsl_generator *gen)
-+    const struct shader_signature *signature = &gen->program->input_signature;
-+    struct vkd3d_string_buffer *buffer = gen->buffer;
-+    const struct signature_element *e;
-+    unsigned int i;
-+    for (i = 0; i < signature->element_count; ++i)
-+    {
-+        e = &signature->elements[i];
-+        if (e->target_location == SIGNATURE_TARGET_LOCATION_UNUSED)
-+            continue;
-+        shader_glsl_print_indent(buffer, gen->indent);
-+        vkd3d_string_buffer_printf(buffer, "%s_in[%u]", gen->prefix, e->register_index);
-+        shader_glsl_print_write_mask(buffer, e->mask);
-+        if (e->sysval_semantic == VKD3D_SHADER_SV_NONE)
-+        {
-+            if (gen->interstage_input)
-+            {
-+                vkd3d_string_buffer_printf(buffer, " = shader_in.reg_%u", e->target_location);
-+                if (e->target_location >= gen->limits.input_count)
-+                    vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL,
-+                            "Internal compiler error: Input element %u specifies target location %u, "
-+                            "but only %u inputs are supported.",
-+                            i, e->target_location, gen->limits.input_count);
-+            }
-+            else
-+            {
-+                switch (e->component_type)
-+                {
-+                    case VKD3D_SHADER_COMPONENT_UINT:
-+                        vkd3d_string_buffer_printf(buffer, " = uintBitsToFloat(shader_in_%u)", i);
-+                        break;
-+                    case VKD3D_SHADER_COMPONENT_INT:
-+                        vkd3d_string_buffer_printf(buffer, " = intBitsToFloat(shader_in_%u)", i);
-+                        break;
-+                    default:
-+                        vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL,
-+                                "Internal compiler error: Unhandled input component type %#x.", e->component_type);
-+                        /* fall through */
-+                    case VKD3D_SHADER_COMPONENT_FLOAT:
-+                        vkd3d_string_buffer_printf(buffer, " = shader_in_%u", i);
-+                        break;
-+                }
-+            }
-+        }
-+        else
-+        {
-+            vkd3d_string_buffer_printf(buffer, " = ");
-+            shader_glsl_print_sysval_name(buffer, gen, e->sysval_semantic, e->semantic_index);
-+        }
-+        shader_glsl_print_write_mask(buffer, e->mask);
-+        vkd3d_string_buffer_printf(buffer, ";\n");
-+    }
-+static void shader_glsl_shader_epilogue(struct vkd3d_glsl_generator *gen)
-+    const struct shader_signature *signature = &gen->program->output_signature;
-+    struct vkd3d_string_buffer *buffer = gen->buffer;
-+    enum vkd3d_shader_component_type type;
-+    const struct signature_element *e;
-+    unsigned int i;
-+    for (i = 0; i < signature->element_count; ++i)
-+    {
-+        e = &signature->elements[i];
-+        if (e->target_location == SIGNATURE_TARGET_LOCATION_UNUSED)
-+            continue;
-+        type = e->component_type;
-+        shader_glsl_print_indent(buffer, gen->indent);
-+        if (e->sysval_semantic == VKD3D_SHADER_SV_NONE)
-+        {
-+            if (gen->interstage_output)
-+            {
-+                type = VKD3D_SHADER_COMPONENT_FLOAT;
-+                vkd3d_string_buffer_printf(buffer, "shader_out.reg_%u", e->target_location);
-+                if (e->target_location >= gen->limits.output_count)
-+                    vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL,
-+                            "Internal compiler error: Output element %u specifies target location %u, "
-+                            "but only %u outputs are supported.",
-+                            i, e->target_location, gen->limits.output_count);
-+            }
-+            else
-+            {
-+                vkd3d_string_buffer_printf(buffer, "<unhandled output %u>", e->target_location);
-+                vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL,
-+                        "Internal compiler error: Unhandled output.");
-+            }
-+        }
-+        else
-+        {
-+            shader_glsl_print_sysval_name(buffer, gen, e->sysval_semantic, e->semantic_index);
-+        }
-+        shader_glsl_print_write_mask(buffer, e->mask);
-+        switch (type)
-+        {
-+            case VKD3D_SHADER_COMPONENT_UINT:
-+                vkd3d_string_buffer_printf(buffer, " = floatBitsToUint(%s_out[%u])", gen->prefix, e->register_index);
-+                break;
-+            case VKD3D_SHADER_COMPONENT_INT:
-+                vkd3d_string_buffer_printf(buffer, " = floatBitsToInt(%s_out[%u])", gen->prefix, e->register_index);
-+                break;
-+            default:
-+                vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL,
-+                        "Internal compiler error: Unhandled output component type %#x.", e->component_type);
-+                /* fall through */
-+            case VKD3D_SHADER_COMPONENT_FLOAT:
-+                vkd3d_string_buffer_printf(buffer, " = %s_out[%u]", gen->prefix, e->register_index);
-+                break;
-+        }
-+        shader_glsl_print_write_mask(buffer, e->mask);
-+        vkd3d_string_buffer_printf(buffer, ";\n");
-+    }
-+static void shader_glsl_ret(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_instruction *ins)
-+    const struct vkd3d_shader_version *version = &gen->program->shader_version;
-+    if (version->major >= 4)
-+    {
-+        shader_glsl_shader_epilogue(gen);
-+        shader_glsl_print_indent(gen->buffer, gen->indent);
-+        vkd3d_string_buffer_printf(gen->buffer, "return;\n");
-+    }
-+static void shader_glsl_dcl_indexable_temp(struct vkd3d_glsl_generator *gen,
-+        const struct vkd3d_shader_instruction *ins)
-+    shader_glsl_print_indent(gen->buffer, gen->indent);
-+    vkd3d_string_buffer_printf(gen->buffer, "vec4 x%u[%u];\n",
-+            ins->declaration.indexable_temp.register_idx,
-+            ins->declaration.indexable_temp.register_size);
-+static void vkd3d_glsl_handle_instruction(struct vkd3d_glsl_generator *gen,
-+        const struct vkd3d_shader_instruction *ins)
-+    gen->location = ins->location;
-+    switch (ins->opcode)
-+    {
-+        case VKD3DSIH_ADD:
-+        case VKD3DSIH_IADD:
-+            shader_glsl_binop(gen, ins, "+");
-+            break;
-+        case VKD3DSIH_AND:
-+            shader_glsl_binop(gen, ins, "&");
-+            break;
-+        case VKD3DSIH_BREAK:
-+            shader_glsl_break(gen);
-+            break;
-+        case VKD3DSIH_CASE:
-+            shader_glsl_case(gen, ins);
-+            break;
-+        case VKD3DSIH_CONTINUE:
-+            shader_glsl_continue(gen);
-+            break;
-+            shader_glsl_dcl_indexable_temp(gen, ins);
-+            break;
-+        case VKD3DSIH_DCL_INPUT:
-+        case VKD3DSIH_DCL_INPUT_PS:
-+        case VKD3DSIH_DCL_INPUT_PS_SGV:
-+        case VKD3DSIH_DCL_INPUT_PS_SIV:
-+        case VKD3DSIH_DCL_INPUT_SGV:
-+        case VKD3DSIH_DCL_OUTPUT:
-+        case VKD3DSIH_DCL_OUTPUT_SIV:
-+        case VKD3DSIH_NOP:
-+            break;
-+        case VKD3DSIH_DEFAULT:
-+            shader_glsl_default(gen);
-+            break;
-+        case VKD3DSIH_DIV:
-+            shader_glsl_binop(gen, ins, "/");
-+            break;
-+        case VKD3DSIH_DP2:
-+            shader_glsl_dot(gen, ins, vkd3d_write_mask_from_component_count(2));
-+            break;
-+        case VKD3DSIH_DP3:
-+            shader_glsl_dot(gen, ins, vkd3d_write_mask_from_component_count(3));
-+            break;
-+        case VKD3DSIH_DP4:
-+            shader_glsl_dot(gen, ins, VKD3DSP_WRITEMASK_ALL);
-+            break;
-+        case VKD3DSIH_ELSE:
-+            shader_glsl_else(gen, ins);
-+            break;
-+        case VKD3DSIH_ENDIF:
-+        case VKD3DSIH_ENDLOOP:
-+        case VKD3DSIH_ENDSWITCH:
-+            shader_glsl_end_block(gen);
-+            break;
-+        case VKD3DSIH_EQO:
-+        case VKD3DSIH_IEQ:
-+            shader_glsl_relop(gen, ins, "==", "equal");
-+            break;
-+        case VKD3DSIH_EXP:
-+            shader_glsl_intrinsic(gen, ins, "exp2");
-+            break;
-+        case VKD3DSIH_FRC:
-+            shader_glsl_intrinsic(gen, ins, "fract");
-+            break;
-+        case VKD3DSIH_FTOI:
-+            shader_glsl_cast(gen, ins, "int", "ivec");
-+            break;
-+        case VKD3DSIH_FTOU:
-+            shader_glsl_cast(gen, ins, "uint", "uvec");
-+            break;
-+        case VKD3DSIH_GATHER4:
-+        case VKD3DSIH_GATHER4_PO:
-+        case VKD3DSIH_SAMPLE:
-+        case VKD3DSIH_SAMPLE_B:
-+        case VKD3DSIH_SAMPLE_C:
-+        case VKD3DSIH_SAMPLE_C_LZ:
-+        case VKD3DSIH_SAMPLE_GRAD:
-+        case VKD3DSIH_SAMPLE_LOD:
-+            shader_glsl_sample(gen, ins);
-+            break;
-+        case VKD3DSIH_GEO:
-+        case VKD3DSIH_IGE:
-+            shader_glsl_relop(gen, ins, ">=", "greaterThanEqual");
-+            break;
-+        case VKD3DSIH_IF:
-+            shader_glsl_if(gen, ins);
-+            break;
-+        case VKD3DSIH_MAD:
-+            shader_glsl_intrinsic(gen, ins, "fma");
-+            break;
-+        case VKD3DSIH_ILT:
-+        case VKD3DSIH_LTO:
-+        case VKD3DSIH_ULT:
-+            shader_glsl_relop(gen, ins, "<", "lessThan");
-+            break;
-+        case VKD3DSIH_IMAX:
-+        case VKD3DSIH_MAX:
-+        case VKD3DSIH_UMAX:
-+            shader_glsl_intrinsic(gen, ins, "max");
-+            break;
-+        case VKD3DSIH_MIN:
-+        case VKD3DSIH_UMIN:
-+            shader_glsl_intrinsic(gen, ins, "min");
-+            break;
-+        case VKD3DSIH_IMUL:
-+            shader_glsl_mul_extended(gen, ins);
-+            break;
-+        case VKD3DSIH_INE:
-+        case VKD3DSIH_NEU:
-+            shader_glsl_relop(gen, ins, "!=", "notEqual");
-+            break;
-+        case VKD3DSIH_INEG:
-+            shader_glsl_unary_op(gen, ins, "-");
-+            break;
-+        case VKD3DSIH_ISHL:
-+            shader_glsl_binop(gen, ins, "<<");
-+            break;
-+        case VKD3DSIH_ISHR:
-+        case VKD3DSIH_USHR:
-+            shader_glsl_binop(gen, ins, ">>");
-+            break;
-+        case VKD3DSIH_ITOF:
-+        case VKD3DSIH_UTOF:
-+            shader_glsl_cast(gen, ins, "float", "vec");
-+            break;
-+        case VKD3DSIH_LD:
-+        case VKD3DSIH_LD2DMS:
-+            shader_glsl_ld(gen, ins);
-+            break;
-+        case VKD3DSIH_LD_UAV_TYPED:
-+            shader_glsl_load_uav_typed(gen, ins);
-+            break;
-+        case VKD3DSIH_LOG:
-+            shader_glsl_intrinsic(gen, ins, "log2");
-+            break;
-+        case VKD3DSIH_LOOP:
-+            shader_glsl_loop(gen);
-+            break;
-+        case VKD3DSIH_MOV:
-+            shader_glsl_mov(gen, ins);
-+            break;
-+        case VKD3DSIH_MOVC:
-+            shader_glsl_movc(gen, ins);
-+            break;
-+        case VKD3DSIH_MUL:
-+            shader_glsl_binop(gen, ins, "*");
-+            break;
-+        case VKD3DSIH_NOT:
-+            shader_glsl_unary_op(gen, ins, "~");
-+            break;
-+        case VKD3DSIH_OR:
-+            shader_glsl_binop(gen, ins, "|");
-+            break;
-+        case VKD3DSIH_RET:
-+            shader_glsl_ret(gen, ins);
-+            break;
-+        case VKD3DSIH_ROUND_NE:
-+            shader_glsl_intrinsic(gen, ins, "roundEven");
-+            break;
-+        case VKD3DSIH_ROUND_NI:
-+            shader_glsl_intrinsic(gen, ins, "floor");
-+            break;
-+        case VKD3DSIH_ROUND_PI:
-+            shader_glsl_intrinsic(gen, ins, "ceil");
-+            break;
-+        case VKD3DSIH_ROUND_Z:
-+            shader_glsl_intrinsic(gen, ins, "trunc");
-+            break;
-+        case VKD3DSIH_RSQ:
-+            shader_glsl_intrinsic(gen, ins, "inversesqrt");
-+            break;
-+        case VKD3DSIH_SQRT:
-+            shader_glsl_intrinsic(gen, ins, "sqrt");
-+            break;
-+        case VKD3DSIH_STORE_UAV_TYPED:
-+            shader_glsl_store_uav_typed(gen, ins);
-+            break;
-+        case VKD3DSIH_SWITCH:
-+            shader_glsl_switch(gen, ins);
-+            break;
-+        default:
-+            shader_glsl_unhandled(gen, ins);
-+            break;
-+    }
-+static bool shader_glsl_check_shader_visibility(const struct vkd3d_glsl_generator *gen,
-+        enum vkd3d_shader_visibility visibility)
-+    enum vkd3d_shader_type t = gen->program->shader_version.type;
-+    switch (visibility)
-+    {
-+            return true;
-+            return t == VKD3D_SHADER_TYPE_VERTEX;
-+            return t == VKD3D_SHADER_TYPE_HULL;
-+            return t == VKD3D_SHADER_TYPE_DOMAIN;
-+            return t == VKD3D_SHADER_TYPE_GEOMETRY;
-+            return t == VKD3D_SHADER_TYPE_PIXEL;
-+            return t == VKD3D_SHADER_TYPE_COMPUTE;
-+        default:
-+            WARN("Invalid shader visibility %#x.\n", visibility);
-+            return false;
-+    }
-+static bool shader_glsl_get_uav_binding(const struct vkd3d_glsl_generator *gen, unsigned int register_space,
-+        unsigned int register_idx, enum vkd3d_shader_resource_type resource_type, unsigned int *binding_idx)
-+    const struct vkd3d_shader_interface_info *interface_info = gen->interface_info;
-+    const struct vkd3d_shader_resource_binding *binding;
-+    enum vkd3d_shader_binding_flag resource_type_flag;
-+    unsigned int i;
-+    if (!interface_info)
-+        return false;
-+    resource_type_flag = resource_type == VKD3D_SHADER_RESOURCE_BUFFER
-+    for (i = 0; i < interface_info->binding_count; ++i)
-+    {
-+        binding = &interface_info->bindings[i];
-+        if (binding->type != VKD3D_SHADER_DESCRIPTOR_TYPE_UAV)
-+            continue;
-+        if (binding->register_space != register_space)
-+            continue;
-+        if (binding->register_index != register_idx)
-+            continue;
-+        if (!shader_glsl_check_shader_visibility(gen, binding->shader_visibility))
-+            continue;
-+        if (!(binding->flags & resource_type_flag))
-+            continue;
-+        *binding_idx = i;
-+        return true;
-+    }
-+    return false;
-+static void shader_glsl_generate_uav_declaration(struct vkd3d_glsl_generator *gen,
-+        const struct vkd3d_shader_descriptor_info1 *uav)
-+    const struct glsl_resource_type_info *resource_type_info;
-+    const char *image_type_prefix, *image_type, *read_format;
-+    const struct vkd3d_shader_descriptor_binding *binding;
-+    const struct vkd3d_shader_descriptor_offset *offset;
-+    struct vkd3d_string_buffer *buffer = gen->buffer;
-+    enum vkd3d_shader_component_type component_type;
-+    unsigned int binding_idx;
-+    if (uav->count != 1)
-+    {
-+        vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_UNSUPPORTED,
-+                "UAV %u has unsupported descriptor array size %u.", uav->register_id, uav->count);
-+        return;
-+    }
-+    if (!shader_glsl_get_uav_binding(gen, uav->register_space,
-+            uav->register_index, uav->resource_type, &binding_idx))
-+    {
-+        vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_BINDING_NOT_FOUND,
-+                "No descriptor binding specified for UAV %u.", uav->register_id);
-+        return;
-+    }
-+    binding = &gen->interface_info->bindings[binding_idx].binding;
-+    if (binding->set != 0)
-+    {
-+        vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_BINDING_NOT_FOUND,
-+                "Unsupported binding set %u specified for UAV %u.", binding->set, uav->register_id);
-+        return;
-+    }
-+    if (binding->count != 1)
-+    {
-+        vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_BINDING_NOT_FOUND,
-+                "Unsupported binding count %u specified for UAV %u.", binding->count, uav->register_id);
-+        return;
-+    }
-+    if (gen->offset_info && gen->offset_info->binding_offsets)
-+    {
-+        offset = &gen->offset_info->binding_offsets[binding_idx];
-+        if (offset->static_offset || offset->dynamic_offset_index != ~0u)
-+        {
-+            vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL,
-+                    "Internal compiler error: Unhandled descriptor offset specified for UAV %u.",
-+                    uav->register_id);
-+            return;
-+        }
-+    }
-+    if ((resource_type_info = shader_glsl_get_resource_type_info(uav->resource_type)))
-+    {
-+        image_type = resource_type_info->type_suffix;
-+    }
-+    else
-+    {
-+        vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL,
-+                "Internal compiler error: Unhandled UAV type %#x.", uav->resource_type);
-+        image_type = "<unhandled image type>";
-+    }
-+    switch ((component_type = vkd3d_component_type_from_resource_data_type(uav->resource_data_type)))
-+    {
-+            image_type_prefix = "u";
-+            read_format = "r32ui";
-+            break;
-+            image_type_prefix = "i";
-+            read_format = "r32i";
-+            break;
-+        default:
-+            vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL,
-+                    "Internal compiler error: Unhandled component type %#x for UAV %u.",
-+                    component_type, uav->register_id);
-+            /* fall through */
-+            image_type_prefix = "";
-+            read_format = "r32f";
-+            break;
-+    }
-+    vkd3d_string_buffer_printf(buffer, "layout(binding = %u", binding->binding);
-+        vkd3d_string_buffer_printf(buffer, ", %s) ", read_format);
-+    else
-+        vkd3d_string_buffer_printf(buffer, ") writeonly ");
-+    vkd3d_string_buffer_printf(buffer, "uniform %simage%s ", image_type_prefix, image_type);
-+    shader_glsl_print_image_name(buffer, gen, uav->register_index, uav->register_space);
-+    vkd3d_string_buffer_printf(buffer, ";\n");
-+static bool shader_glsl_get_cbv_binding(const struct vkd3d_glsl_generator *gen,
-+        unsigned int register_space, unsigned int register_idx, unsigned int *binding_idx)
-+    const struct vkd3d_shader_interface_info *interface_info = gen->interface_info;
-+    const struct vkd3d_shader_resource_binding *binding;
-+    unsigned int i;
-+    if (!interface_info)
-+        return false;
-+    for (i = 0; i < interface_info->binding_count; ++i)
-+    {
-+        binding = &interface_info->bindings[i];
-+        if (binding->type != VKD3D_SHADER_DESCRIPTOR_TYPE_CBV)
-+            continue;
-+        if (binding->register_space != register_space)
-+            continue;
-+        if (binding->register_index != register_idx)
-+            continue;
-+        if (!shader_glsl_check_shader_visibility(gen, binding->shader_visibility))
-+            continue;
-+        if (!(binding->flags & VKD3D_SHADER_BINDING_FLAG_BUFFER))
-+            continue;
-+        *binding_idx = i;
-+        return true;
-+    }
-+    return false;
-+static void shader_glsl_generate_cbv_declaration(struct vkd3d_glsl_generator *gen,
-+        const struct vkd3d_shader_descriptor_info1 *cbv)
-+    const struct vkd3d_shader_descriptor_binding *binding;
-+    const struct vkd3d_shader_descriptor_offset *offset;
-+    struct vkd3d_string_buffer *buffer = gen->buffer;
-+    const char *prefix = gen->prefix;
-+    unsigned int binding_idx;
-+    size_t size;
-+    if (cbv->count != 1)
-+    {
-+        vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_UNSUPPORTED,
-+                "Constant buffer %u has unsupported descriptor array size %u.", cbv->register_id, cbv->count);
-+        return;
-+    }
-+    if (!shader_glsl_get_cbv_binding(gen, cbv->register_space, cbv->register_index, &binding_idx))
-+    {
-+        vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_BINDING_NOT_FOUND,
-+                "No descriptor binding specified for constant buffer %u.", cbv->register_id);
-+        return;
-+    }
-+    binding = &gen->interface_info->bindings[binding_idx].binding;
-+    if (binding->set != 0)
-+    {
-+        vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_BINDING_NOT_FOUND,
-+                "Unsupported binding set %u specified for constant buffer %u.", binding->set, cbv->register_id);
-+        return;
-+    }
-+    if (binding->count != 1)
-+    {
-+        vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_BINDING_NOT_FOUND,
-+                "Unsupported binding count %u specified for constant buffer %u.", binding->count, cbv->register_id);
-+        return;
-+    }
-+    if (gen->offset_info && gen->offset_info->binding_offsets)
-+    {
-+        offset = &gen->offset_info->binding_offsets[binding_idx];
-+        if (offset->static_offset || offset->dynamic_offset_index != ~0u)
-+        {
-+            vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL,
-+                    "Internal compiler error: Unhandled descriptor offset specified for constant buffer %u.",
-+                    cbv->register_id);
-+            return;
-+        }
-+    }
-+    size = align(cbv->buffer_size, VKD3D_VEC4_SIZE * sizeof(uint32_t));
-+    size /= VKD3D_VEC4_SIZE * sizeof(uint32_t);
-+    vkd3d_string_buffer_printf(buffer,
-+            "layout(std140, binding = %u) uniform block_%s_cb_%u { vec4 %s_cb_%u[%zu]; };\n",
-+            binding->binding, prefix, cbv->register_id, prefix, cbv->register_id, size);
-+static bool shader_glsl_get_combined_sampler_binding(const struct vkd3d_glsl_generator *gen,
-+        const struct vkd3d_shader_combined_resource_sampler_info *crs,
-+        enum vkd3d_shader_resource_type resource_type, unsigned int *binding_idx)
-+    const struct vkd3d_shader_interface_info *interface_info = gen->interface_info;
-+    const struct vkd3d_shader_combined_resource_sampler *s;
-+    enum vkd3d_shader_binding_flag resource_type_flag;
-+    unsigned int i;
-+    if (!interface_info)
-+        return false;
-+    resource_type_flag = resource_type == VKD3D_SHADER_RESOURCE_BUFFER
-+    for (i = 0; i < interface_info->combined_sampler_count; ++i)
-+    {
-+        s = &interface_info->combined_samplers[i];
-+        if (s->resource_space != crs->resource_space)
-+            continue;
-+        if (s->resource_index != crs->resource_index)
-+            continue;
-+        if (crs->sampler_index != VKD3D_SHADER_DUMMY_SAMPLER_INDEX)
-+        {
-+            if (s->sampler_space != crs->sampler_space)
-+                continue;
-+            if (s->sampler_index != crs->sampler_index)
-+                continue;
-+        }
-+        if (!shader_glsl_check_shader_visibility(gen, s->shader_visibility))
-+            continue;
-+        if (!(s->flags & resource_type_flag))
-+            continue;
-+        *binding_idx = i;
-+        return true;
-+    }
-+    return false;
-+static void shader_glsl_generate_sampler_declaration(struct vkd3d_glsl_generator *gen,
-+        const struct vkd3d_shader_combined_resource_sampler_info *crs)
-+    const struct vkd3d_shader_descriptor_info1 *sampler, *srv;
-+    const struct glsl_resource_type_info *resource_type_info;
-+    const struct vkd3d_shader_descriptor_binding *binding;
-+    struct vkd3d_string_buffer *buffer = gen->buffer;
-+    enum vkd3d_shader_component_type component_type;
-+    const char *sampler_type, *sampler_type_prefix;
-+    enum vkd3d_shader_resource_type resource_type;
-+    unsigned int binding_idx;
-+    bool shadow = false;
-+    if (crs->sampler_index != VKD3D_SHADER_DUMMY_SAMPLER_INDEX)
-+    {
-+        if (!(sampler = shader_glsl_get_descriptor(gen, VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER,
-+                crs->sampler_index, crs->sampler_space)))
-+        {
-+            vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL,
-+                    "Internal compiler error: No descriptor found for sampler %u, space %u.",
-+                    crs->sampler_index, crs->sampler_space);
-+            return;
-+        }
-+    }
-+    if (!(srv = shader_glsl_get_descriptor(gen, VKD3D_SHADER_DESCRIPTOR_TYPE_SRV,
-+            crs->resource_index, crs->resource_space)))
-+    {
-+        vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL,
-+                "Internal compiler error: No descriptor found for resource %u, space %u.",
-+                crs->resource_index, crs->resource_space);
-+        return;
-+    }
-+    resource_type = srv->resource_type;
-+    if (srv->sample_count == 1)
-+    {
-+        /* The OpenGL API distinguishes between multi-sample textures with
-+         * sample count 1 and single-sample textures. Direct3D and Vulkan
-+         * don't make this distinction at the API level, but Direct3D shaders
-+         * are capable of expressing both. We therefore map such multi-sample
-+         * textures to their single-sample equivalents here. */
-+        if (resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_2DMS)
-+            resource_type = VKD3D_SHADER_RESOURCE_TEXTURE_2D;
-+        else if (resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY)
-+            resource_type = VKD3D_SHADER_RESOURCE_TEXTURE_2DARRAY;
-+    }
-+    if ((resource_type_info = shader_glsl_get_resource_type_info(resource_type)))
-+    {
-+        sampler_type = resource_type_info->type_suffix;
-+        if (shadow && !resource_type_info->shadow)
-+            vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_UNSUPPORTED,
-+                    "Comparison samplers are not supported with resource type %#x.", resource_type);
-+    }
-+    else
-+    {
-+        vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL,
-+                "Internal compiler error: Unhandled resource type %#x for combined resource/sampler "
-+                "for resource %u, space %u and sampler %u, space %u.", resource_type,
-+                crs->resource_index, crs->resource_space, crs->sampler_index, crs->sampler_space);
-+        sampler_type = "<unhandled sampler type>";
-+    }
-+    switch ((component_type = vkd3d_component_type_from_resource_data_type(srv->resource_data_type)))
-+    {
-+            sampler_type_prefix = "u";
-+            break;
-+            sampler_type_prefix = "i";
-+            break;
-+            sampler_type_prefix = "";
-+            break;
-+        default:
-+            vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL,
-+                    "Internal compiler error: Unhandled component type %#x for combined resource/sampler "
-+                    "for resource %u, space %u and sampler %u, space %u.", component_type,
-+                    crs->resource_index, crs->resource_space, crs->sampler_index, crs->sampler_space);
-+            sampler_type_prefix = "";
-+            break;
-+    }
-+    if (!shader_glsl_get_combined_sampler_binding(gen, crs, resource_type, &binding_idx))
-+    {
-+        vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_BINDING_NOT_FOUND,
-+                "No descriptor binding specified for combined resource/sampler "
-+                "for resource %u, space %u and sampler %u, space %u.",
-+                crs->resource_index, crs->resource_space, crs->sampler_index, crs->sampler_space);
-+        return;
-+    }
-+    binding = &gen->interface_info->combined_samplers[binding_idx].binding;
-+    if (binding->set != 0)
-+    {
-+        vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_BINDING_NOT_FOUND,
-+                "Unsupported binding set %u specified for combined resource/sampler "
-+                "for resource %u, space %u and sampler %u, space %u.", binding->set,
-+                crs->resource_index, crs->resource_space, crs->sampler_index, crs->sampler_space);
-+        return;
-+    }
-+    if (binding->count != 1)
-+    {
-+        vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_BINDING_NOT_FOUND,
-+                "Unsupported binding count %u specified for combined resource/sampler "
-+                "for resource %u, space %u and sampler %u, space %u.", binding->count,
-+                crs->resource_index, crs->resource_space, crs->sampler_index, crs->sampler_space);
-+        return;
-+    }
-+    vkd3d_string_buffer_printf(buffer, "layout(binding = %u) uniform %ssampler%s%s ",
-+            binding->binding, sampler_type_prefix, sampler_type, shadow ? "Shadow" : "");
-+    shader_glsl_print_combined_sampler_name(buffer, gen, crs->resource_index,
-+            crs->resource_space, crs->sampler_index, crs->sampler_space);
-+    vkd3d_string_buffer_printf(buffer, ";\n");
-+static void shader_glsl_generate_descriptor_declarations(struct vkd3d_glsl_generator *gen)
-+    const struct vkd3d_shader_scan_combined_resource_sampler_info *sampler_info = gen->combined_sampler_info;
-+    const struct vkd3d_shader_scan_descriptor_info1 *info = gen->descriptor_info;
-+    const struct vkd3d_shader_descriptor_info1 *descriptor;
-+    unsigned int i;
-+    for (i = 0; i < info->descriptor_count; ++i)
-+    {
-+        descriptor = &info->descriptors[i];
-+        switch (descriptor->type)
-+        {
-+                /* GLSL uses combined resource/sampler descriptors.*/
-+                break;
-+                shader_glsl_generate_uav_declaration(gen, descriptor);
-+                break;
-+                shader_glsl_generate_cbv_declaration(gen, descriptor);
-+                break;
-+            default:
-+                vkd3d_string_buffer_printf(gen->buffer, "/* <unhandled descriptor type %#x> */\n", descriptor->type);
-+                vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL,
-+                        "Internal compiler error: Unhandled descriptor type %#x.", descriptor->type);
-+                break;
-+        }
-+    }
-+    for (i = 0; i < sampler_info->combined_sampler_count; ++i)
-+    {
-+        shader_glsl_generate_sampler_declaration(gen, &sampler_info->combined_samplers[i]);
-+    }
-+    if (info->descriptor_count)
-+        vkd3d_string_buffer_printf(gen->buffer, "\n");
-+static const struct signature_element *signature_get_element_by_location(
-+        const struct shader_signature *signature, unsigned int location)
-+    const struct signature_element *e;
-+    unsigned int i;
-+    for (i = 0; i < signature->element_count; ++i)
-+    {
-+        e = &signature->elements[i];
-+        if (e->target_location != location)
-+            continue;
-+        return e;
-+    }
-+    return NULL;
-+static const char *shader_glsl_get_interpolation(struct vkd3d_glsl_generator *gen,
-+        const struct shader_signature *signature, const char *type, unsigned int location)
-+    enum vkd3d_shader_interpolation_mode m;
-+    const struct signature_element *e;
-+    if ((e = signature_get_element_by_location(signature, location)))
-+        m = e->interpolation_mode;
-+    else
-+        m = VKD3DSIM_NONE;
-+    switch (m)
-+    {
-+        case VKD3DSIM_NONE:
-+        case VKD3DSIM_LINEAR:
-+            return "";
-+        case VKD3DSIM_CONSTANT:
-+            return "flat ";
-+        default:
-+            vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL,
-+                    "Internal compiler error: Unhandled interpolation mode %#x for %s location %u.", m, type, location);
-+            return "";
-+    }
-+static void shader_glsl_generate_interface_block(struct vkd3d_glsl_generator *gen,
-+        const struct shader_signature *signature, const char *type, unsigned int count)
-+    struct vkd3d_string_buffer *buffer = gen->buffer;
-+    const char *interpolation;
-+    unsigned int i;
-+    vkd3d_string_buffer_printf(buffer, "%s shader_in_out\n{\n", type);
-+    for (i = 0; i < count; ++i)
-+    {
-+        interpolation = shader_glsl_get_interpolation(gen, signature, type, i);
-+        vkd3d_string_buffer_printf(buffer, "    %svec4 reg_%u;\n", interpolation, i);
-+    }
-+    vkd3d_string_buffer_printf(buffer, "} shader_%s;\n", type);
-+static void shader_glsl_generate_input_declarations(struct vkd3d_glsl_generator *gen)
-+    const struct shader_signature *signature = &gen->program->input_signature;
-+    struct vkd3d_string_buffer *buffer = gen->buffer;
-+    const struct signature_element *e;
-+    unsigned int i, count;
-+    if (!gen->interstage_input)
-+    {
-+        for (i = 0, count = 0; i < signature->element_count; ++i)
-+        {
-+            e = &signature->elements[i];
-+            if (e->target_location == SIGNATURE_TARGET_LOCATION_UNUSED || e->sysval_semantic)
-+                continue;
-+            if (e->min_precision != VKD3D_SHADER_MINIMUM_PRECISION_NONE)
-+            {
-+                vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL,
-+                        "Internal compiler error: Unhandled minimum precision %#x.", e->min_precision);
-+                continue;
-+            }
-+            if (e->interpolation_mode != VKD3DSIM_NONE)
-+            {
-+                vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL,
-+                        "Internal compiler error: Unhandled interpolation mode %#x.", e->interpolation_mode);
-+                continue;
-+            }
-+            vkd3d_string_buffer_printf(buffer, "layout(location = %u) in ", e->target_location);
-+            switch (e->component_type)
-+            {
-+                case VKD3D_SHADER_COMPONENT_UINT:
-+                    vkd3d_string_buffer_printf(buffer, "uvec4");
-+                    break;
-+                case VKD3D_SHADER_COMPONENT_INT:
-+                    vkd3d_string_buffer_printf(buffer, "ivec4");
-+                    break;
-+                case VKD3D_SHADER_COMPONENT_FLOAT:
-+                    vkd3d_string_buffer_printf(buffer, "vec4");
-+                    break;
-+                default:
-+                    vkd3d_string_buffer_printf(buffer, "<unhandled type %#x>", e->component_type);
-+                    vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL,
-+                            "Internal compiler error: Unhandled input component type %#x.", e->component_type);
-+                    break;
-+            }
-+            vkd3d_string_buffer_printf(buffer, " shader_in_%u;\n", i);
-+            ++count;
-+        }
-+        if (count)
-+            vkd3d_string_buffer_printf(buffer, "\n");
-+    }
-+    else if (gen->limits.input_count)
-+    {
-+        shader_glsl_generate_interface_block(gen, signature, "in", gen->limits.input_count);
-+        vkd3d_string_buffer_printf(buffer, "\n");
-+    }
-+static void shader_glsl_generate_output_declarations(struct vkd3d_glsl_generator *gen)
-+    const struct shader_signature *signature = &gen->program->output_signature;
-+    struct vkd3d_string_buffer *buffer = gen->buffer;
-+    const struct signature_element *e;
-+    unsigned int i, count;
-+    if (!gen->interstage_output)
-+    {
-+        for (i = 0, count = 0; i < signature->element_count; ++i)
-+        {
-+            e = &signature->elements[i];
-+            if (e->target_location == SIGNATURE_TARGET_LOCATION_UNUSED)
-+                continue;
-+            if (e->sysval_semantic != VKD3D_SHADER_SV_TARGET)
-+            {
-+                vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL,
-+                        "Internal compiler error: Unhandled system value %#x.", e->sysval_semantic);
-+                continue;
-+            }
-+            if (e->min_precision != VKD3D_SHADER_MINIMUM_PRECISION_NONE)
-+            {
-+                vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL,
-+                        "Internal compiler error: Unhandled minimum precision %#x.", e->min_precision);
-+                continue;
-+            }
-+            if (e->interpolation_mode != VKD3DSIM_NONE)
-+            {
-+                vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL,
-+                        "Internal compiler error: Unhandled interpolation mode %#x.", e->interpolation_mode);
-+                continue;
-+            }
-+            vkd3d_string_buffer_printf(buffer, "layout(location = %u) out ", e->target_location);
-+            switch (e->component_type)
-+            {
-+                case VKD3D_SHADER_COMPONENT_UINT:
-+                    vkd3d_string_buffer_printf(buffer, "uvec4");
-+                    break;
-+                case VKD3D_SHADER_COMPONENT_INT:
-+                    vkd3d_string_buffer_printf(buffer, "ivec4");
-+                    break;
-+                case VKD3D_SHADER_COMPONENT_FLOAT:
-+                    vkd3d_string_buffer_printf(buffer, "vec4");
-+                    break;
-+                default:
-+                    vkd3d_string_buffer_printf(buffer, "<unhandled type %#x>", e->component_type);
-+                    vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL,
-+                            "Internal compiler error: Unhandled output component type %#x.", e->component_type);
-+                    break;
-+            }
-+            vkd3d_string_buffer_printf(buffer, " shader_out_%u;\n", e->semantic_index);
-+            ++count;
-+        }
-+        if (count)
-+            vkd3d_string_buffer_printf(buffer, "\n");
-+    }
-+    else if (gen->limits.output_count)
-+    {
-+        shader_glsl_generate_interface_block(gen, signature, "out", gen->limits.output_count);
-+        vkd3d_string_buffer_printf(buffer, "\n");
-+    }
-+static void shader_glsl_handle_global_flags(struct vkd3d_string_buffer *buffer,
-+        struct vkd3d_glsl_generator *gen, enum vsir_global_flags flags)
-+    {
-+        vkd3d_string_buffer_printf(buffer, "layout(early_fragment_tests) in;\n");
-+    }
-+    if (flags)
-+        vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL,
-+                "Internal compiler error: Unhandled global flags %#"PRIx64".", (uint64_t)flags);
-+static void shader_glsl_generate_declarations(struct vkd3d_glsl_generator *gen)
-+    const struct vsir_program *program = gen->program;
-+    struct vkd3d_string_buffer *buffer = gen->buffer;
-+    const struct vsir_thread_group_size *group_size;
-+    if (program->shader_version.type == VKD3D_SHADER_TYPE_COMPUTE)
-+    {
-+        group_size = &program->thread_group_size;
-+        vkd3d_string_buffer_printf(buffer, "layout(local_size_x = %u, local_size_y = %u, local_size_z = %u) in;\n\n",
-+                group_size->x, group_size->y, group_size->z);
-+    }
-+    shader_glsl_handle_global_flags(buffer, gen, program->global_flags);
-+    shader_glsl_generate_descriptor_declarations(gen);
-+    shader_glsl_generate_input_declarations(gen);
-+    shader_glsl_generate_output_declarations(gen);
-+    if (gen->limits.input_count)
-+        vkd3d_string_buffer_printf(buffer, "vec4 %s_in[%u];\n", gen->prefix, gen->limits.input_count);
-+    if (gen->limits.output_count)
-+        vkd3d_string_buffer_printf(buffer, "vec4 %s_out[%u];\n", gen->prefix, gen->limits.output_count);
-+    if (program->temp_count)
-+        vkd3d_string_buffer_printf(buffer, "vec4 r[%u];\n", program->temp_count);
-+    vkd3d_string_buffer_printf(buffer, "\n");
-+static int vkd3d_glsl_generator_generate(struct vkd3d_glsl_generator *gen, struct vkd3d_shader_code *out)
-+    const struct vkd3d_shader_instruction_array *instructions = &gen->program->instructions;
-+    struct vkd3d_string_buffer *buffer = gen->buffer;
-+    unsigned int i;
-+    void *code;
-+    MESSAGE("Generating a GLSL shader. This is unsupported; you get to keep all the pieces if it breaks.\n");
-+    vkd3d_string_buffer_printf(buffer, "#version 440\n\n");
-+    vkd3d_string_buffer_printf(buffer, "/* Generated by %s. */\n\n", vkd3d_shader_get_version(NULL, NULL));
-+    shader_glsl_generate_declarations(gen);
-+    vkd3d_string_buffer_printf(buffer, "void main()\n{\n");
-+    ++gen->indent;
-+    shader_glsl_shader_prologue(gen);
-+    for (i = 0; i < instructions->count; ++i)
-+    {
-+        vkd3d_glsl_handle_instruction(gen, &instructions->elements[i]);
-+    }
-+    vkd3d_string_buffer_printf(buffer, "}\n");
-+    if (TRACE_ON())
-+        vkd3d_string_buffer_trace(buffer);
-+    if (gen->failed)
-+        return VKD3D_ERROR_INVALID_SHADER;
-+    if ((code = vkd3d_malloc(buffer->buffer_size)))
-+    {
-+        memcpy(code, buffer->buffer, buffer->content_size);
-+        out->size = buffer->content_size;
-+        out->code = code;
-+    }
-+    else return VKD3D_ERROR_OUT_OF_MEMORY;
-+    return VKD3D_OK;
-+static void vkd3d_glsl_generator_cleanup(struct vkd3d_glsl_generator *gen)
-+    vkd3d_string_buffer_release(&gen->string_buffers, gen->buffer);
-+    vkd3d_string_buffer_cache_cleanup(&gen->string_buffers);
-+static void shader_glsl_init_limits(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_version *version)
-+    struct shader_limits *limits = &gen->limits;
-+    if (version->major < 4 || version->major >= 6)
-+        vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL,
-+                "Internal compiler error: Unhandled shader version %u.%u.", version->major, version->minor);
-+    switch (version->type)
-+    {
-+        case VKD3D_SHADER_TYPE_VERTEX:
-+            limits->input_count = 32;
-+            limits->output_count = 32;
-+            break;
-+        case VKD3D_SHADER_TYPE_PIXEL:
-+            limits->input_count = 32;
-+            limits->output_count = 8;
-+            break;
-+            limits->input_count = 0;
-+            limits->output_count = 0;
-+            break;
-+        default:
-+            vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL,
-+                    "Internal compiler error: Unhandled shader type %#x.", version->type);
-+            limits->input_count = 0;
-+            limits->output_count = 0;
-+            break;
-+    }
-+static void vkd3d_glsl_generator_init(struct vkd3d_glsl_generator *gen,
-+        struct vsir_program *program, const struct vkd3d_shader_compile_info *compile_info,
-+        const struct vkd3d_shader_scan_descriptor_info1 *descriptor_info,
-+        const struct vkd3d_shader_scan_combined_resource_sampler_info *combined_sampler_info,
-+        struct vkd3d_shader_message_context *message_context)
-+    enum vkd3d_shader_type type = program->shader_version.type;
-+    memset(gen, 0, sizeof(*gen));
-+    gen->program = program;
-+    vkd3d_string_buffer_cache_init(&gen->string_buffers);
-+    gen->buffer = vkd3d_string_buffer_get(&gen->string_buffers);
-+    gen->location.source_name = compile_info->source_name;
-+    gen->message_context = message_context;
-+    if (!(gen->prefix = shader_glsl_get_prefix(type)))
-+    {
-+        vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL,
-+                "Internal compiler error: Unhandled shader type %#x.", type);
-+        gen->prefix = "unknown";
-+    }
-+    shader_glsl_init_limits(gen, &program->shader_version);
-+    gen->interstage_input = type != VKD3D_SHADER_TYPE_VERTEX && type != VKD3D_SHADER_TYPE_COMPUTE;
-+    gen->interstage_output = type != VKD3D_SHADER_TYPE_PIXEL && type != VKD3D_SHADER_TYPE_COMPUTE;
-+    gen->interface_info = vkd3d_find_struct(compile_info->next, INTERFACE_INFO);
-+    gen->offset_info = vkd3d_find_struct(compile_info->next, DESCRIPTOR_OFFSET_INFO);
-+    gen->descriptor_info = descriptor_info;
-+    gen->combined_sampler_info = combined_sampler_info;
-+int glsl_compile(struct vsir_program *program, uint64_t config_flags,
-+        const struct vkd3d_shader_scan_descriptor_info1 *descriptor_info,
-+        const struct vkd3d_shader_scan_combined_resource_sampler_info *combined_sampler_info,
-+        const struct vkd3d_shader_compile_info *compile_info,
-+        struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context)
-+    struct vkd3d_glsl_generator generator;
-+    int ret;
-+    if ((ret = vsir_program_transform(program, config_flags, compile_info, message_context)) < 0)
-+        return ret;
-+    VKD3D_ASSERT(program->normalisation_level == VSIR_FULLY_NORMALISED_IO);
--    vkd3d_glsl_generator_init(&generator, program, message_context);
-+    vkd3d_glsl_generator_init(&generator, program, compile_info,
-+            descriptor_info, combined_sampler_info, message_context);
-     ret = vkd3d_glsl_generator_generate(&generator, out);
-     vkd3d_glsl_generator_cleanup(&generator);
-diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.c b/libs/vkd3d/libs/vkd3d-shader/hlsl.c
-index bd5baacd83d..3be9ba9979b 100644
---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.c
-+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.c
-@@ -254,6 +254,47 @@ bool hlsl_type_is_resource(const struct hlsl_type *type)
-     }
- }
-+bool hlsl_type_is_shader(const struct hlsl_type *type)
-+    switch (type->class)
-+    {
-+        case HLSL_CLASS_ARRAY:
-+            return hlsl_type_is_shader(type->e.array.type);
-+        case HLSL_CLASS_HULL_SHADER:
-+            return true;
-+        case HLSL_CLASS_SCALAR:
-+        case HLSL_CLASS_VECTOR:
-+        case HLSL_CLASS_MATRIX:
-+        case HLSL_CLASS_STRUCT:
-+        case HLSL_CLASS_ERROR:
-+        case HLSL_CLASS_PASS:
-+        case HLSL_CLASS_SAMPLER:
-+        case HLSL_CLASS_STRING:
-+        case HLSL_CLASS_TECHNIQUE:
-+        case HLSL_CLASS_TEXTURE:
-+        case HLSL_CLASS_UAV:
-+        case HLSL_CLASS_BLEND_STATE:
-+        case HLSL_CLASS_VOID:
-+        case HLSL_CLASS_NULL:
-+            return false;
-+    }
-+    return false;
- /* Only intended to be used for derefs (after copies have been lowered to components or vectors) or
-  * resources, since for both their data types span across a single regset. */
- static enum hlsl_regset type_get_regset(const struct hlsl_type *type)
-@@ -379,6 +420,7 @@ static void hlsl_type_calculate_reg_size(struct hlsl_ctx *ctx, struct hlsl_type
-+        case HLSL_CLASS_ERROR:
-         case HLSL_CLASS_PASS:
-@@ -393,6 +435,7 @@ static void hlsl_type_calculate_reg_size(struct hlsl_ctx *ctx, struct hlsl_type
-         case HLSL_CLASS_HULL_SHADER:
-         case HLSL_CLASS_BLEND_STATE:
-         case HLSL_CLASS_NULL:
-             break;
-     }
-@@ -455,6 +498,7 @@ static bool type_is_single_component(const struct hlsl_type *type)
-     {
-+        case HLSL_CLASS_ERROR:
-         case HLSL_CLASS_SCALAR:
-         case HLSL_CLASS_SAMPLER:
-@@ -483,6 +527,7 @@ static bool type_is_single_component(const struct hlsl_type *type)
-         case HLSL_CLASS_PASS:
-         case HLSL_CLASS_TECHNIQUE:
-         case HLSL_CLASS_VOID:
-             break;
-     }
-     vkd3d_unreachable();
-@@ -631,12 +676,14 @@ unsigned int hlsl_type_get_component_offset(struct hlsl_ctx *ctx, struct hlsl_ty
-                 break;
-             case HLSL_CLASS_EFFECT_GROUP:
-+            case HLSL_CLASS_ERROR:
-             case HLSL_CLASS_PASS:
-             case HLSL_CLASS_TECHNIQUE:
-             case HLSL_CLASS_VOID:
-             case HLSL_CLASS_SCALAR:
-             case HLSL_CLASS_CONSTANT_BUFFER:
-             case HLSL_CLASS_NULL:
-+            case HLSL_CLASS_STREAM_OUTPUT:
-                 vkd3d_unreachable();
-         }
-         type = next_type;
-@@ -855,6 +902,22 @@ struct hlsl_type *hlsl_new_array_type(struct hlsl_ctx *ctx, struct hlsl_type *ba
-     return type;
- }
-+struct hlsl_type *hlsl_new_stream_output_type(struct hlsl_ctx *ctx,
-+        enum hlsl_so_object_type so_type, struct hlsl_type *data_type)
-+    struct hlsl_type *type;
-+    if (!(type = hlsl_alloc(ctx, sizeof(*type))))
-+        return NULL;
-+    type->class = HLSL_CLASS_STREAM_OUTPUT;
-+    type->e.so.so_type = so_type;
-+    type->e.so.type = data_type;
-+    list_add_tail(&ctx->types, &type->entry);
-+    return type;
- struct hlsl_type *hlsl_new_struct_type(struct hlsl_ctx *ctx, const char *name,
-         struct hlsl_struct_field *fields, size_t field_count)
- {
-@@ -930,6 +993,7 @@ static const char * get_case_insensitive_typename(const char *name)
-     {
-         "dword",
-         "float",
-+        "geometryshader",
-         "matrix",
-         "pixelshader",
-         "texture",
-@@ -1021,6 +1085,7 @@ unsigned int hlsl_type_component_count(const struct hlsl_type *type)
-+        case HLSL_CLASS_ERROR:
-@@ -1041,6 +1106,7 @@ unsigned int hlsl_type_component_count(const struct hlsl_type *type)
-         case HLSL_CLASS_PASS:
-         case HLSL_CLASS_TECHNIQUE:
-         case HLSL_CLASS_VOID:
-             break;
-     }
-@@ -1112,9 +1178,15 @@ bool hlsl_types_are_equal(const struct hlsl_type *t1, const struct hlsl_type *t2
-             return hlsl_types_are_equal(t1->e.resource.format, t2->e.resource.format);
-+            if (t1->e.so.so_type != t2->e.so.so_type)
-+                return false;
-+            return hlsl_types_are_equal(t1->e.so.type, t2->e.so.type);
-+        case HLSL_CLASS_ERROR:
-         case HLSL_CLASS_PASS:
-@@ -1575,7 +1647,6 @@ struct hlsl_ir_node *hlsl_new_binary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_exp
- {
-     struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {arg1, arg2};
--    VKD3D_ASSERT(hlsl_types_are_equal(arg1->data_type, arg2->data_type));
-     return hlsl_new_expr(ctx, op, operands, arg1->data_type, &arg1->loc);
- }
-@@ -1589,6 +1660,16 @@ struct hlsl_ir_node *hlsl_new_ternary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_ex
-     return hlsl_new_expr(ctx, op, operands, arg1->data_type, &arg1->loc);
- }
-+static struct hlsl_ir_node *hlsl_new_error_expr(struct hlsl_ctx *ctx)
-+    static const struct vkd3d_shader_location loc = {.source_name = "<error>"};
-+    struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {0};
-+    /* Use a dummy location; we should never report any messages related to
-+     * this expression. */
-+    return hlsl_new_expr(ctx, HLSL_OP0_ERROR, operands, ctx->builtin_types.error, &loc);
- struct hlsl_ir_node *hlsl_new_if(struct hlsl_ctx *ctx, struct hlsl_ir_node *condition,
-         struct hlsl_block *then_block, struct hlsl_block *else_block, const struct vkd3d_shader_location *loc)
- {
-@@ -1640,6 +1721,22 @@ struct hlsl_ir_node *hlsl_new_switch(struct hlsl_ctx *ctx, struct hlsl_ir_node *
-     return &s->node;
- }
-+struct hlsl_ir_node *hlsl_new_vsir_instruction_ref(struct hlsl_ctx *ctx, unsigned int vsir_instr_idx,
-+        struct hlsl_type *type, const struct hlsl_reg *reg, const struct vkd3d_shader_location *loc)
-+    struct hlsl_ir_vsir_instruction_ref *vsir_instr;
-+    if (!(vsir_instr = hlsl_alloc(ctx, sizeof(*vsir_instr))))
-+        return NULL;
-+    init_node(&vsir_instr->node, HLSL_IR_VSIR_INSTRUCTION_REF, type, loc);
-+    vsir_instr->vsir_instr_idx = vsir_instr_idx;
-+    if (reg)
-+        vsir_instr->node.reg = *reg;
-+    return &vsir_instr->node;
- struct hlsl_ir_load *hlsl_new_load_index(struct hlsl_ctx *ctx, const struct hlsl_deref *deref,
-         struct hlsl_ir_node *idx, const struct vkd3d_shader_location *loc)
- {
-@@ -1792,6 +1889,118 @@ struct hlsl_ir_node *hlsl_new_swizzle(struct hlsl_ctx *ctx, uint32_t s, unsigned
-     return &swizzle->node;
- }
-+struct hlsl_ir_node *hlsl_new_compile(struct hlsl_ctx *ctx, enum hlsl_compile_type compile_type,
-+        const char *profile_name, struct hlsl_ir_node **args, unsigned int args_count,
-+        struct hlsl_block *args_instrs, const struct vkd3d_shader_location *loc)
-+    const struct hlsl_profile_info *profile_info = NULL;
-+    struct hlsl_ir_compile *compile;
-+    struct hlsl_type *type = NULL;
-+    unsigned int i;
-+    switch (compile_type)
-+    {
-+            if (!(profile_info = hlsl_get_target_info(profile_name)))
-+            {
-+                hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_PROFILE, "Unknown profile \"%s\".", profile_name);
-+                return NULL;
-+            }
-+            if (profile_info->type == VKD3D_SHADER_TYPE_PIXEL)
-+                type = hlsl_get_type(ctx->cur_scope, "PixelShader", true, true);
-+            else if (profile_info->type == VKD3D_SHADER_TYPE_VERTEX)
-+                type = hlsl_get_type(ctx->cur_scope, "VertexShader", true, true);
-+            if (!type)
-+            {
-+                hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_PROFILE, "Invalid profile \"%s\".", profile_name);
-+                return NULL;
-+            }
-+            break;
-+            type = hlsl_get_type(ctx->cur_scope, "GeometryShader", true, true);
-+            break;
-+    }
-+    if (!(compile = hlsl_alloc(ctx, sizeof(*compile))))
-+        return NULL;
-+    init_node(&compile->node, HLSL_IR_COMPILE, type, loc);
-+    compile->compile_type = compile_type;
-+    compile->profile = profile_info;
-+    hlsl_block_init(&compile->instrs);
-+    hlsl_block_add_block(&compile->instrs, args_instrs);
-+    compile->args_count = args_count;
-+    if (!(compile->args = hlsl_alloc(ctx, sizeof(*compile->args) * args_count)))
-+    {
-+        vkd3d_free(compile);
-+        return NULL;
-+    }
-+    for (i = 0; i < compile->args_count; ++i)
-+        hlsl_src_from_node(&compile->args[i], args[i]);
-+    return &compile->node;
-+bool hlsl_state_block_add_entry(struct hlsl_state_block *state_block,
-+        struct hlsl_state_block_entry *entry)
-+    if (!vkd3d_array_reserve((void **)&state_block->entries,
-+            &state_block->capacity, state_block->count + 1,
-+            sizeof(*state_block->entries)))
-+        return false;
-+    state_block->entries[state_block->count++] = entry;
-+    return true;
-+struct hlsl_ir_node *hlsl_new_sampler_state(struct hlsl_ctx *ctx,
-+        const struct hlsl_state_block *state_block, struct vkd3d_shader_location *loc)
-+    struct hlsl_ir_sampler_state *sampler_state;
-+    struct hlsl_type *type = ctx->builtin_types.sampler[HLSL_SAMPLER_DIM_GENERIC];
-+    if (!(sampler_state = hlsl_alloc(ctx, sizeof(*sampler_state))))
-+        return NULL;
-+    init_node(&sampler_state->node, HLSL_IR_SAMPLER_STATE, type, loc);
-+    if (!(sampler_state->state_block = hlsl_alloc(ctx, sizeof(*sampler_state->state_block))))
-+    {
-+        vkd3d_free(sampler_state);
-+        return NULL;
-+    }
-+    if (state_block)
-+    {
-+        for (unsigned int i = 0; i < state_block->count; ++i)
-+        {
-+            const struct hlsl_state_block_entry *src = state_block->entries[i];
-+            struct hlsl_state_block_entry *entry;
-+            if (!(entry = clone_stateblock_entry(ctx, src, src->name, src->lhs_has_index, src->lhs_index, false, 0)))
-+            {
-+                hlsl_free_instr(&sampler_state->node);
-+                return NULL;
-+            }
-+            if (!hlsl_state_block_add_entry(sampler_state->state_block, entry))
-+            {
-+                hlsl_free_instr(&sampler_state->node);
-+                return NULL;
-+            }
-+        }
-+    }
-+    return &sampler_state->node;
- struct hlsl_ir_node *hlsl_new_stateblock_constant(struct hlsl_ctx *ctx, const char *name,
-         struct vkd3d_shader_location *loc)
- {
-@@ -2142,6 +2351,51 @@ static struct hlsl_ir_node *clone_index(struct hlsl_ctx *ctx, struct clone_instr
-     return dst;
- }
-+static struct hlsl_ir_node *clone_compile(struct hlsl_ctx *ctx,
-+        struct clone_instr_map *map, struct hlsl_ir_compile *compile)
-+    const char *profile_name = NULL;
-+    struct hlsl_ir_node **args;
-+    struct hlsl_ir_node *node;
-+    struct hlsl_block block;
-+    unsigned int i;
-+    if (!(clone_block(ctx, &block, &compile->instrs, map)))
-+        return NULL;
-+    if (!(args = hlsl_alloc(ctx, sizeof(*args) * compile->args_count)))
-+    {
-+        hlsl_block_cleanup(&block);
-+        return NULL;
-+    }
-+    for (i = 0; i < compile->args_count; ++i)
-+    {
-+        args[i] = map_instr(map, compile->args[i].node);
-+        VKD3D_ASSERT(args[i]);
-+    }
-+    if (compile->profile)
-+        profile_name = compile->profile->name;
-+    if (!(node = hlsl_new_compile(ctx, compile->compile_type, profile_name,
-+            args, compile->args_count, &block, &compile->node.loc)))
-+    {
-+        hlsl_block_cleanup(&block);
-+        vkd3d_free(args);
-+        return NULL;
-+    }
-+    vkd3d_free(args);
-+    return node;
-+static struct hlsl_ir_node *clone_sampler_state(struct hlsl_ctx *ctx,
-+        struct clone_instr_map *map, struct hlsl_ir_sampler_state *sampler_state)
-+    return hlsl_new_sampler_state(ctx, sampler_state->state_block,
-+            &sampler_state->node.loc);
- static struct hlsl_ir_node *clone_stateblock_constant(struct hlsl_ctx *ctx,
-         struct clone_instr_map *map, struct hlsl_ir_stateblock_constant *constant)
- {
-@@ -2149,8 +2403,8 @@ static struct hlsl_ir_node *clone_stateblock_constant(struct hlsl_ctx *ctx,
- }
- struct hlsl_state_block_entry *clone_stateblock_entry(struct hlsl_ctx *ctx,
--        struct hlsl_state_block_entry *src, const char *name, bool lhs_has_index,
--        unsigned int lhs_index, unsigned int arg_index)
-+        const struct hlsl_state_block_entry *src, const char *name, bool lhs_has_index,
-+        unsigned int lhs_index, bool single_arg, unsigned int arg_index)
- {
-     struct hlsl_state_block_entry *entry;
-     struct clone_instr_map map = { 0 };
-@@ -2166,7 +2420,11 @@ struct hlsl_state_block_entry *clone_stateblock_entry(struct hlsl_ctx *ctx,
-         return NULL;
-     }
--    entry->args_count = 1;
-+    if (single_arg)
-+        entry->args_count = 1;
-+    else
-+        entry->args_count = src->args_count;
-     if (!(entry->args = hlsl_alloc(ctx, sizeof(*entry->args) * entry->args_count)))
-     {
-         hlsl_free_state_block_entry(entry);
-@@ -2179,7 +2437,16 @@ struct hlsl_state_block_entry *clone_stateblock_entry(struct hlsl_ctx *ctx,
-         hlsl_free_state_block_entry(entry);
-         return NULL;
-     }
--    clone_src(&map, entry->args, &src->args[arg_index]);
-+    if (single_arg)
-+    {
-+        clone_src(&map, entry->args, &src->args[arg_index]);
-+    }
-+    else
-+    {
-+        for (unsigned int i = 0; i < src->args_count; ++i)
-+            clone_src(&map, &entry->args[i], &src->args[i]);
-+    }
-     vkd3d_free(map.instrs);
-     return entry;
-@@ -2284,8 +2551,17 @@ static struct hlsl_ir_node *clone_instr(struct hlsl_ctx *ctx,
-         case HLSL_IR_SWIZZLE:
-             return clone_swizzle(ctx, map, hlsl_ir_swizzle(instr));
-+        case HLSL_IR_COMPILE:
-+            return clone_compile(ctx, map, hlsl_ir_compile(instr));
-+        case HLSL_IR_SAMPLER_STATE:
-+            return clone_sampler_state(ctx, map, hlsl_ir_sampler_state(instr));
-             return clone_stateblock_constant(ctx, map, hlsl_ir_stateblock_constant(instr));
-+            vkd3d_unreachable();
-     }
-     vkd3d_unreachable();
-@@ -2314,6 +2590,7 @@ struct hlsl_ir_function_decl *hlsl_new_func_decl(struct hlsl_ctx *ctx,
-     decl->return_type = return_type;
-     decl->parameters = *parameters;
-     decl->loc = *loc;
-+    list_init(&decl->extern_vars);
-     if (!hlsl_types_are_equal(return_type, ctx->builtin_types.Void))
-     {
-@@ -2523,6 +2800,12 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru
-             return string;
-         case HLSL_CLASS_TEXTURE:
-+            if (type->sampler_dim == HLSL_SAMPLER_DIM_RAW_BUFFER)
-+            {
-+                vkd3d_string_buffer_printf(string, "ByteAddressBuffer");
-+                return string;
-+            }
-             if (type->sampler_dim == HLSL_SAMPLER_DIM_GENERIC)
-             {
-                 vkd3d_string_buffer_printf(string, "Texture");
-@@ -2548,6 +2831,11 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru
-             return string;
-         case HLSL_CLASS_UAV:
-+            if (type->sampler_dim == HLSL_SAMPLER_DIM_RAW_BUFFER)
-+            {
-+                vkd3d_string_buffer_printf(string, "RWByteAddressBuffer");
-+                return string;
-+            }
-             if (type->sampler_dim == HLSL_SAMPLER_DIM_BUFFER)
-                 vkd3d_string_buffer_printf(string, "RWBuffer");
-             else if (type->sampler_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER)
-@@ -2570,6 +2858,24 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru
-             }
-             return string;
-+        case HLSL_CLASS_ERROR:
-+            vkd3d_string_buffer_printf(string, "<error type>");
-+            return string;
-+            if (type->e.so.so_type == HLSL_STREAM_OUTPUT_POINT_STREAM)
-+                vkd3d_string_buffer_printf(string, "PointStream");
-+            else if (type->e.so.so_type == HLSL_STREAM_OUTPUT_LINE_STREAM)
-+                vkd3d_string_buffer_printf(string, "LineStream");
-+            else
-+                vkd3d_string_buffer_printf(string, "TriangleStream");
-+            if ((inner_string = hlsl_type_to_string(ctx, type->e.so.type)))
-+            {
-+                vkd3d_string_buffer_printf(string, "<%s>", inner_string->buffer);
-+                hlsl_release_string_buffer(ctx, inner_string);
-+            }
-+            return string;
-@@ -2698,7 +3004,11 @@ const char *hlsl_node_type_to_string(enum hlsl_ir_node_type type)
-         [HLSL_IR_STORE          ] = "HLSL_IR_STORE",
-         [HLSL_IR_SWITCH         ] = "HLSL_IR_SWITCH",
-         [HLSL_IR_SWIZZLE        ] = "HLSL_IR_SWIZZLE",
-+        [HLSL_IR_COMPILE]             = "HLSL_IR_COMPILE",
-     };
-     if (type >= ARRAY_SIZE(names))
-@@ -2907,6 +3217,7 @@ const char *debug_hlsl_expr_op(enum hlsl_ir_expr_op op)
- {
-     static const char *const op_names[] =
-     {
-+        [HLSL_OP0_ERROR]        = "error",
-         [HLSL_OP0_VOID]         = "void",
-         [HLSL_OP0_RASTERIZER_SAMPLE_COUNT] = "GetRenderTargetSampleCount",
-@@ -2924,6 +3235,7 @@ const char *debug_hlsl_expr_op(enum hlsl_ir_expr_op op)
-         [HLSL_OP1_DSY_FINE]     = "dsy_fine",
-         [HLSL_OP1_EXP2]         = "exp2",
-         [HLSL_OP1_F16TOF32]     = "f16tof32",
-+        [HLSL_OP1_F32TOF16]     = "f32tof16",
-         [HLSL_OP1_FLOOR]        = "floor",
-         [HLSL_OP1_FRACT]        = "fract",
-         [HLSL_OP1_LOG2]         = "log2",
-@@ -3146,6 +3458,40 @@ static void dump_ir_index(struct vkd3d_string_buffer *buffer, const struct hlsl_
-     vkd3d_string_buffer_printf(buffer, "]");
- }
-+static void dump_ir_compile(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer,
-+        const struct hlsl_ir_compile *compile)
-+    unsigned int i;
-+    switch (compile->compile_type)
-+    {
-+            vkd3d_string_buffer_printf(buffer, "compile %s {\n", compile->profile->name);
-+            break;
-+            vkd3d_string_buffer_printf(buffer, "ConstructGSWithSO {\n");
-+            break;
-+    }
-+    dump_block(ctx, buffer, &compile->instrs);
-+    vkd3d_string_buffer_printf(buffer, "      %10s   } (", "");
-+    for (i = 0; i < compile->args_count; ++i)
-+    {
-+        dump_src(buffer, &compile->args[i]);
-+        if (i + 1 < compile->args_count)
-+            vkd3d_string_buffer_printf(buffer, ", ");
-+    }
-+    vkd3d_string_buffer_printf(buffer, ")");
-+static void dump_ir_sampler_state(struct vkd3d_string_buffer *buffer,
-+        const struct hlsl_ir_sampler_state *sampler_state)
-+    vkd3d_string_buffer_printf(buffer, "sampler_state {...}");
- static void dump_ir_stateblock_constant(struct vkd3d_string_buffer *buffer,
-         const struct hlsl_ir_stateblock_constant *constant)
- {
-@@ -3245,9 +3591,22 @@ static void dump_instr(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer,
-             dump_ir_swizzle(buffer, hlsl_ir_swizzle(instr));
-             break;
-+        case HLSL_IR_COMPILE:
-+            dump_ir_compile(ctx, buffer, hlsl_ir_compile(instr));
-+            break;
-+        case HLSL_IR_SAMPLER_STATE:
-+            dump_ir_sampler_state(buffer, hlsl_ir_sampler_state(instr));
-+            break;
-             dump_ir_stateblock_constant(buffer, hlsl_ir_stateblock_constant(instr));
-             break;
-+            vkd3d_string_buffer_printf(buffer, "vsir_program instruction %u",
-+                    hlsl_ir_vsir_instruction_ref(instr)->vsir_instr_idx);
-+            break;
-     }
- }
-@@ -3308,8 +3667,8 @@ void hlsl_replace_node(struct hlsl_ir_node *old, struct hlsl_ir_node *new)
- {
-     struct hlsl_src *src, *next;
--    VKD3D_ASSERT(old->data_type->dimx == new->data_type->dimx);
--    VKD3D_ASSERT(old->data_type->dimy == new->data_type->dimy);
-+    VKD3D_ASSERT(old->data_type == new->data_type || old->data_type->dimx == new->data_type->dimx);
-+    VKD3D_ASSERT(old->data_type == new->data_type || old->data_type->dimy == new->data_type->dimy);
-     LIST_FOR_EACH_ENTRY_SAFE(src, next, &old->uses, struct hlsl_src, entry)
-     {
-@@ -3459,6 +3818,24 @@ static void free_ir_index(struct hlsl_ir_index *index)
-     vkd3d_free(index);
- }
-+static void free_ir_compile(struct hlsl_ir_compile *compile)
-+    unsigned int i;
-+    for (i = 0; i < compile->args_count; ++i)
-+        hlsl_src_remove(&compile->args[i]);
-+    hlsl_block_cleanup(&compile->instrs);
-+    vkd3d_free(compile);
-+static void free_ir_sampler_state(struct hlsl_ir_sampler_state *sampler_state)
-+    if (sampler_state->state_block)
-+        hlsl_free_state_block(sampler_state->state_block);
-+    vkd3d_free(sampler_state);
- static void free_ir_stateblock_constant(struct hlsl_ir_stateblock_constant *constant)
- {
-     vkd3d_free(constant->name);
-@@ -3527,9 +3904,21 @@ void hlsl_free_instr(struct hlsl_ir_node *node)
-             free_ir_switch(hlsl_ir_switch(node));
-             break;
-+        case HLSL_IR_COMPILE:
-+            free_ir_compile(hlsl_ir_compile(node));
-+            break;
-+        case HLSL_IR_SAMPLER_STATE:
-+            free_ir_sampler_state(hlsl_ir_sampler_state(node));
-+            break;
-             free_ir_stateblock_constant(hlsl_ir_stateblock_constant(node));
-             break;
-+            vkd3d_free(hlsl_ir_vsir_instruction_ref(node));
-+            break;
-     }
- }
-@@ -3801,12 +4190,12 @@ static void declare_predefined_types(struct hlsl_ctx *ctx)
-     static const char * const names[] =
-     {
--        "float",
--        "half",
--        "double",
--        "int",
--        "uint",
--        "bool",
-+        [HLSL_TYPE_FLOAT]  = "float",
-+        [HLSL_TYPE_HALF]   = "half",
-+        [HLSL_TYPE_DOUBLE] = "double",
-+        [HLSL_TYPE_INT]    = "int",
-+        [HLSL_TYPE_UINT]   = "uint",
-+        [HLSL_TYPE_BOOL]   = "bool",
-     };
-     static const char *const variants_float[] = {"min10float", "min16float"};
-@@ -3957,6 +4346,7 @@ static void declare_predefined_types(struct hlsl_ctx *ctx)
-     ctx->builtin_types.Void = hlsl_new_simple_type(ctx, "void", HLSL_CLASS_VOID);
-     ctx->builtin_types.null = hlsl_new_type(ctx, "NULL", HLSL_CLASS_NULL, HLSL_TYPE_UINT, 1, 1);
-     ctx->builtin_types.string = hlsl_new_simple_type(ctx, "string", HLSL_CLASS_STRING);
-+    ctx->builtin_types.error = hlsl_new_simple_type(ctx, "<error type>", HLSL_CLASS_ERROR);
-     hlsl_scope_add_type(ctx->globals, ctx->builtin_types.string);
-     hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "DepthStencilView", HLSL_CLASS_DEPTH_STENCIL_VIEW));
-     hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "DepthStencilState", HLSL_CLASS_DEPTH_STENCIL_STATE));
-@@ -4059,6 +4449,7 @@ static bool hlsl_ctx_init(struct hlsl_ctx *ctx, const struct vkd3d_shader_compil
-                 ctx->semantic_compat_mapping = option->value & VKD3D_SHADER_COMPILE_OPTION_BACKCOMPAT_MAP_SEMANTIC_NAMES;
-+                ctx->double_as_float_alias = option->value & VKD3D_SHADER_COMPILE_OPTION_DOUBLE_AS_FLOAT_ALIAS;
-                 break;
-@@ -4078,6 +4469,15 @@ static bool hlsl_ctx_init(struct hlsl_ctx *ctx, const struct vkd3d_shader_compil
-         }
-     }
-+    if (!(ctx->error_instr = hlsl_new_error_expr(ctx)))
-+        return false;
-+    hlsl_block_add_instr(&ctx->static_initializers, ctx->error_instr);
-+    ctx->output_control_point_count = UINT_MAX;
-+    ctx->output_primitive = 0;
-+    ctx->partitioning = 0;
-     return true;
- }
-@@ -4089,8 +4489,6 @@ static void hlsl_ctx_cleanup(struct hlsl_ctx *ctx)
-     struct hlsl_type *type, *next_type;
-     unsigned int i;
--    hlsl_block_cleanup(&ctx->static_initializers);
-     for (i = 0; i < ctx->source_files_count; ++i)
-         vkd3d_free((void *)ctx->source_files[i]);
-     vkd3d_free(ctx->source_files);
-@@ -4113,6 +4511,8 @@ static void hlsl_ctx_cleanup(struct hlsl_ctx *ctx)
-         }
-     }
-+    hlsl_block_cleanup(&ctx->static_initializers);
-     LIST_FOR_EACH_ENTRY_SAFE(scope, next_scope, &ctx->scopes, struct hlsl_scope, entry)
-     {
-         LIST_FOR_EACH_ENTRY_SAFE(var, next_var, &scope->vars, struct hlsl_ir_var, scope_entry)
-diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h
-index 22e25b23988..4824234ab99 100644
---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.h
-+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h
-@@ -22,7 +22,6 @@
- #include "vkd3d_shader_private.h"
- #include "wine/rbtree.h"
--#include "d3dcommon.h"
- #include "d3dx9shader.h"
- /* The general IR structure is inspired by Mesa GLSL hir, even though the code
-@@ -70,6 +69,14 @@ static inline unsigned int hlsl_swizzle_get_component(uint32_t swizzle, unsigned
-     return (swizzle >> HLSL_SWIZZLE_SHIFT(idx)) & HLSL_SWIZZLE_MASK;
- }
-+static inline uint32_t vsir_swizzle_from_hlsl(uint32_t swizzle)
-+    return vkd3d_shader_create_swizzle(hlsl_swizzle_get_component(swizzle, 0),
-+            hlsl_swizzle_get_component(swizzle, 1),
-+            hlsl_swizzle_get_component(swizzle, 2),
-+            hlsl_swizzle_get_component(swizzle, 3));
- enum hlsl_type_class
- {
-@@ -97,8 +104,10 @@ enum hlsl_type_class
- };
- enum hlsl_base_type
-@@ -128,10 +137,18 @@ enum hlsl_sampler_dim
-     /* NOTE: Remember to update object_methods[] in hlsl.y if this enum is modified. */
- };
-+enum hlsl_so_object_type
- enum hlsl_regset
- {
-@@ -210,6 +227,12 @@ struct hlsl_type
-         } resource;
-         /* Additional field to distinguish object types. Currently used only for technique types. */
-         unsigned int version;
-+        /* Additional information if type is HLSL_CLASS_STREAM_OUTPUT. */
-+        struct
-+        {
-+            struct hlsl_type *type;
-+            enum hlsl_so_object_type so_type;
-+        } so;
-     } e;
-     /* Number of numeric register components used by one value of this type, for each regset.
-@@ -316,7 +339,12 @@ enum hlsl_ir_node_type
- };
- /* Common data for every type of IR instruction node. */
-@@ -352,6 +380,9 @@ struct hlsl_block
- {
-     /* List containing instruction nodes; linked by the hlsl_ir_node.entry fields. */
-     struct list instrs;
-+    /* Instruction representing the "value" of this block, if applicable.
-+     * This may point to an instruction outside of this block! */
-+    struct hlsl_ir_node *value;
- };
- /* A reference to an instruction node (struct hlsl_ir_node), usable as a field in other structs.
-@@ -396,10 +427,12 @@ struct hlsl_attribute
- #define HLSL_MODIFIER_SINGLE             0x00020000
- #define HLSL_MODIFIER_EXPORT             0x00040000
- #define HLSL_STORAGE_ANNOTATION          0x00080000
-+#define HLSL_MODIFIER_UNORM              0x00100000
-+#define HLSL_MODIFIER_SNORM              0x00200000
-                                       HLSL_MODIFIER_CONST | HLSL_MODIFIER_ROW_MAJOR | \
--                                      HLSL_MODIFIER_COLUMN_MAJOR)
-                                            HLSL_STORAGE_NOPERSPECTIVE | HLSL_STORAGE_LINEAR)
-@@ -474,6 +507,8 @@ struct hlsl_ir_var
-      *   range). The IR instructions are numerated starting from 2, because 0 means unused, and 1
-      *   means function entry. */
-     unsigned int first_write, last_read;
-+    /* Whether the variable is read in any entry function. */
-+    bool is_read;
-     /* Offset where the variable's value is stored within its buffer in numeric register components.
-      * This in case the variable is uniform. */
-     unsigned int buffer_offset;
-@@ -498,6 +533,9 @@ struct hlsl_ir_var
-     /* Whether the shader performs dereferences with non-constant offsets in the variable. */
-     bool indexable;
-+    /* Whether this is a semantic variable that was split from an array, or is the first
-+     * element of a struct, and thus needs to be aligned when packed in the signature. */
-+    bool force_align;
-     uint32_t is_input_semantic : 1;
-     uint32_t is_output_semantic : 1;
-@@ -591,10 +629,18 @@ struct hlsl_ir_function_decl
-     unsigned int attr_count;
-     const struct hlsl_attribute *const *attrs;
-+    bool early_depth_test;
-     /* Synthetic boolean variable marking whether a return statement has been
-      * executed. Needed to deal with return statements in non-uniform control
-      * flow, since some backends can't handle them. */
-     struct hlsl_ir_var *early_return_var;
-+    /* List of all the extern semantic variables; linked by the
-+     * hlsl_ir_var.extern_entry fields. This exists as a convenience because
-+     * it is often necessary to iterate all extern variables and these can be
-+     * declared in as function parameters, or as the function return value. */
-+    struct list extern_vars;
- };
- struct hlsl_ir_call
-@@ -646,6 +692,7 @@ struct hlsl_ir_switch
- enum hlsl_ir_expr_op
- {
-     HLSL_OP0_VOID,
-@@ -663,6 +710,7 @@ enum hlsl_ir_expr_op
-     HLSL_OP1_EXP2,
-     HLSL_OP1_F16TOF32,
-+    HLSL_OP1_F32TOF16,
-     HLSL_OP1_LOG2,
-@@ -703,7 +751,7 @@ enum hlsl_ir_expr_op
-     HLSL_OP2_SLT,
-     /* DP2ADD(a, b, c) computes the scalar product of a.xy and b.xy,
--     * then adds c. */
-+     * then adds c, where c must have dimx=1. */
-     HLSL_OP3_DP2ADD,
-     /* TERNARY(a, b, c) returns 'b' if 'a' is true and 'c' otherwise. 'a' must always be boolean.
-      * CMP(a, b, c) returns 'b' if 'a' >= 0, and 'c' otherwise. It's used only for SM1-SM3 targets. */
-@@ -854,6 +902,43 @@ struct hlsl_ir_string_constant
-     char *string;
- };
-+/* Represents shader compilation call for effects, such as "CompileShader()".
-+ *
-+ * Unlike hlsl_ir_call, it is not flattened, thus, it keeps track of its
-+ * arguments and maintains its own instruction block. */
-+struct hlsl_ir_compile
-+    struct hlsl_ir_node node;
-+    enum hlsl_compile_type
-+    {
-+        /* A shader compilation through the CompileShader() function or the "compile" syntax. */
-+        /* A call to ConstructGSWithSO(), which receives a geometry shader and retrieves one as well. */
-+    } compile_type;
-+    /* Special field to store the profile argument for HLSL_COMPILE_TYPE_COMPILE. */
-+    const struct hlsl_profile_info *profile;
-+    /* Block containing the instructions required by the arguments of the
-+     * compilation call. */
-+    struct hlsl_block instrs;
-+    /* Arguments to the compilation call. For HLSL_COMPILE_TYPE_COMPILE
-+     * args[0] is an hlsl_ir_call to the specified function. */
-+    struct hlsl_src *args;
-+    unsigned int args_count;
-+/* Represents a state block initialized with the "sampler_state" keyword. */
-+struct hlsl_ir_sampler_state
-+    struct hlsl_ir_node node;
-+    struct hlsl_state_block *state_block;
- /* Stateblock constants are undeclared values found on state blocks or technique passes descriptions,
-  *   that do not concern regular pixel, vertex, or compute shaders, except for parsing. */
- struct hlsl_ir_stateblock_constant
-@@ -862,6 +947,16 @@ struct hlsl_ir_stateblock_constant
-     char *name;
- };
-+/* A vkd3d_shader_instruction that can be inserted in a hlsl_block.
-+ * Only used for the HLSL IR to vsir translation, might be removed once this translation is complete. */
-+struct hlsl_ir_vsir_instruction_ref
-+    struct hlsl_ir_node node;
-+    /* Index to a vkd3d_shader_instruction within a vkd3d_shader_instruction_array in a vsir_program. */
-+    unsigned int vsir_instr_idx;
- struct hlsl_scope
- {
-     /* Item entry for hlsl_ctx.scopes. */
-@@ -965,10 +1060,11 @@ struct hlsl_ctx
-     struct hlsl_scope *dummy_scope;
-     /* List of all the scopes in the program; linked by the hlsl_scope.entry fields. */
-     struct list scopes;
--    /* List of all the extern variables; linked by the hlsl_ir_var.extern_entry fields.
--     * This exists as a convenience because it is often necessary to iterate all extern variables
--     *   and these can be declared in global scope, as function parameters, or as the function
--     *   return value. */
-+    /* List of all the extern variables, excluding semantic variables; linked
-+     * by the hlsl_ir_var.extern_entry fields. This exists as a convenience
-+     * because it is often necessary to iterate all extern variables declared
-+     * in the global scope or as function parameters. */
-     struct list extern_vars;
-     /* List containing both the built-in HLSL buffers ($Globals and $Params) and the ones declared
-@@ -1003,8 +1099,12 @@ struct hlsl_ctx
-         struct hlsl_type *string;
-         struct hlsl_type *Void;
-         struct hlsl_type *null;
-+        struct hlsl_type *error;
-     } builtin_types;
-+    /* Pre-allocated "error" expression. */
-+    struct hlsl_ir_node *error_instr;
-     /* List of the instruction nodes for initializing static variables. */
-     struct hlsl_block static_initializers;
-@@ -1016,19 +1116,23 @@ struct hlsl_ctx
-         {
-             uint32_t index;
-             struct hlsl_vec4 value;
-+            struct vkd3d_shader_location loc;
-         } *regs;
-         size_t count, size;
-     } constant_defs;
-     /* 'c' registers where the constants expected by SM2 sincos are stored. */
-     struct hlsl_reg d3dsincosconst1, d3dsincosconst2;
--    /* Number of temp. registers required for the shader to run, i.e. the largest temp register
--     *   index that will be used in the output bytecode (+1). */
--    uint32_t temp_count;
-     /* Number of threads to be executed (on the X, Y, and Z dimensions) in a single thread group in
-      *   compute shader profiles. It is set using the numthreads() attribute in the entry point. */
-     uint32_t thread_count[3];
-+    enum vkd3d_tessellator_domain domain;
-+    unsigned int output_control_point_count;
-+    enum vkd3d_shader_tessellator_output_primitive output_primitive;
-+    enum vkd3d_shader_tessellator_partitioning partitioning;
-+    struct hlsl_ir_function_decl *patch_constant_func;
-     /* In some cases we generate opcodes by parsing an HLSL function and then
-      * invoking it. If not NULL, this field is the name of the function that we
-      * are currently parsing, "mangled" with an internal prefix to avoid
-@@ -1044,6 +1148,7 @@ struct hlsl_ctx
-     bool child_effect;
-     bool include_empty_buffers;
-     bool warn_implicit_truncation;
-+    bool double_as_float_alias;
- };
- static inline bool hlsl_version_ge(const struct hlsl_ctx *ctx, unsigned int major, unsigned int minor)
-@@ -1149,25 +1254,46 @@ static inline struct hlsl_ir_switch *hlsl_ir_switch(const struct hlsl_ir_node *n
-     return CONTAINING_RECORD(node, struct hlsl_ir_switch, node);
- }
-+static inline struct hlsl_ir_compile *hlsl_ir_compile(const struct hlsl_ir_node *node)
-+    VKD3D_ASSERT(node->type == HLSL_IR_COMPILE);
-+    return CONTAINING_RECORD(node, struct hlsl_ir_compile, node);
-+static inline struct hlsl_ir_sampler_state *hlsl_ir_sampler_state(const struct hlsl_ir_node *node)
-+    VKD3D_ASSERT(node->type == HLSL_IR_SAMPLER_STATE);
-+    return CONTAINING_RECORD(node, struct hlsl_ir_sampler_state, node);
- static inline struct hlsl_ir_stateblock_constant *hlsl_ir_stateblock_constant(const struct hlsl_ir_node *node)
- {
-     return CONTAINING_RECORD(node, struct hlsl_ir_stateblock_constant, node);
- }
-+static inline struct hlsl_ir_vsir_instruction_ref *hlsl_ir_vsir_instruction_ref(const struct hlsl_ir_node *node)
-+    return CONTAINING_RECORD(node, struct hlsl_ir_vsir_instruction_ref, node);
- static inline void hlsl_block_init(struct hlsl_block *block)
- {
-     list_init(&block->instrs);
-+    block->value = NULL;
- }
- static inline void hlsl_block_add_instr(struct hlsl_block *block, struct hlsl_ir_node *instr)
- {
-     list_add_tail(&block->instrs, &instr->entry);
-+    block->value = (instr->data_type ? instr : NULL);
- }
- static inline void hlsl_block_add_block(struct hlsl_block *block, struct hlsl_block *add)
- {
-     list_move_tail(&block->instrs, &add->instrs);
-+    block->value = add->value;
- }
- static inline void hlsl_src_from_node(struct hlsl_src *src, struct hlsl_ir_node *node)
-@@ -1283,6 +1409,7 @@ static inline unsigned int hlsl_sampler_dim_count(enum hlsl_sampler_dim dim)
-     {
-         case HLSL_SAMPLER_DIM_1D:
-             return 1;
-         case HLSL_SAMPLER_DIM_1DARRAY:
-@@ -1330,12 +1457,15 @@ bool hlsl_clone_block(struct hlsl_ctx *ctx, struct hlsl_block *dst_block, const
- void hlsl_dump_function(struct hlsl_ctx *ctx, const struct hlsl_ir_function_decl *func);
- void hlsl_dump_var_default_values(const struct hlsl_ir_var *var);
-+bool hlsl_state_block_add_entry(struct hlsl_state_block *state_block,
-+        struct hlsl_state_block_entry *entry);
- bool hlsl_validate_state_block_entry(struct hlsl_ctx *ctx, struct hlsl_state_block_entry *entry,
-         const struct vkd3d_shader_location *loc);
- struct hlsl_state_block_entry *clone_stateblock_entry(struct hlsl_ctx *ctx,
--        struct hlsl_state_block_entry *src, const char *name, bool lhs_has_index,
--        unsigned int lhs_index, unsigned int arg_index);
-+        const struct hlsl_state_block_entry *src, const char *name, bool lhs_has_index,
-+        unsigned int lhs_index, bool single_arg, unsigned int arg_index);
-+void hlsl_lower_index_loads(struct hlsl_ctx *ctx, struct hlsl_block *body);
- void hlsl_run_const_passes(struct hlsl_ctx *ctx, struct hlsl_block *body);
- int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func,
-         enum vkd3d_shader_target_type target_type, struct vkd3d_shader_code *out);
-@@ -1402,6 +1532,8 @@ struct hlsl_ir_node *hlsl_new_if(struct hlsl_ctx *ctx, struct hlsl_ir_node *cond
- struct hlsl_ir_node *hlsl_new_int_constant(struct hlsl_ctx *ctx, int32_t n, const struct vkd3d_shader_location *loc);
- struct hlsl_ir_node *hlsl_new_jump(struct hlsl_ctx *ctx,
-         enum hlsl_ir_jump_type type, struct hlsl_ir_node *condition, const struct vkd3d_shader_location *loc);
-+struct hlsl_type *hlsl_new_stream_output_type(struct hlsl_ctx *ctx,
-+        enum hlsl_so_object_type so_type, struct hlsl_type *type);
- struct hlsl_ir_node *hlsl_new_ternary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_expr_op op,
-         struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, struct hlsl_ir_node *arg3);
-@@ -1428,6 +1560,9 @@ bool hlsl_index_is_noncontiguous(struct hlsl_ir_index *index);
- bool hlsl_index_is_resource_access(struct hlsl_ir_index *index);
- bool hlsl_index_chain_has_resource_access(struct hlsl_ir_index *index);
-+struct hlsl_ir_node *hlsl_new_compile(struct hlsl_ctx *ctx, enum hlsl_compile_type compile_type,
-+        const char *profile_name, struct hlsl_ir_node **args, unsigned int args_count,
-+        struct hlsl_block *args_instrs, const struct vkd3d_shader_location *loc);
- struct hlsl_ir_node *hlsl_new_index(struct hlsl_ctx *ctx, struct hlsl_ir_node *val,
-         struct hlsl_ir_node *idx, const struct vkd3d_shader_location *loc);
- struct hlsl_ir_node *hlsl_new_loop(struct hlsl_ctx *ctx,
-@@ -1440,6 +1575,8 @@ struct hlsl_type *hlsl_new_struct_type(struct hlsl_ctx *ctx, const char *name,
-         struct hlsl_struct_field *fields, size_t field_count);
- struct hlsl_ir_node *hlsl_new_swizzle(struct hlsl_ctx *ctx, uint32_t s, unsigned int components,
-         struct hlsl_ir_node *val, const struct vkd3d_shader_location *loc);
-+struct hlsl_ir_node *hlsl_new_sampler_state(struct hlsl_ctx *ctx,
-+        const struct hlsl_state_block *state_block, struct vkd3d_shader_location *loc);
- struct hlsl_ir_node *hlsl_new_stateblock_constant(struct hlsl_ctx *ctx, const char *name,
-         struct vkd3d_shader_location *loc);
- struct hlsl_ir_node *hlsl_new_string_constant(struct hlsl_ctx *ctx, const char *str,
-@@ -1466,6 +1603,9 @@ struct hlsl_ir_switch_case *hlsl_new_switch_case(struct hlsl_ctx *ctx, unsigned
- struct hlsl_ir_node *hlsl_new_switch(struct hlsl_ctx *ctx, struct hlsl_ir_node *selector,
-         struct list *cases, const struct vkd3d_shader_location *loc);
-+struct hlsl_ir_node *hlsl_new_vsir_instruction_ref(struct hlsl_ctx *ctx, unsigned int vsir_instr_idx,
-+        struct hlsl_type *type, const struct hlsl_reg *reg, const struct vkd3d_shader_location *loc);
- void hlsl_error(struct hlsl_ctx *ctx, const struct vkd3d_shader_location *loc,
-         enum vkd3d_shader_error error, const char *fmt, ...) VKD3D_PRINTF_FUNC(4, 5);
- void hlsl_fixme(struct hlsl_ctx *ctx, const struct vkd3d_shader_location *loc,
-@@ -1493,6 +1633,7 @@ unsigned int hlsl_type_minor_size(const struct hlsl_type *type);
- unsigned int hlsl_type_major_size(const struct hlsl_type *type);
- unsigned int hlsl_type_element_count(const struct hlsl_type *type);
- bool hlsl_type_is_resource(const struct hlsl_type *type);
-+bool hlsl_type_is_shader(const struct hlsl_type *type);
- unsigned int hlsl_type_get_sm4_offset(const struct hlsl_type *type, unsigned int offset);
- bool hlsl_types_are_equal(const struct hlsl_type *t1, const struct hlsl_type *t2);
-@@ -1525,22 +1666,18 @@ bool hlsl_transform_ir(struct hlsl_ctx *ctx, bool (*func)(struct hlsl_ctx *ctx,
- D3DXPARAMETER_CLASS hlsl_sm1_class(const struct hlsl_type *type);
- D3DXPARAMETER_TYPE hlsl_sm1_base_type(const struct hlsl_type *type);
--bool hlsl_sm1_register_from_semantic(const struct vkd3d_shader_version *version, const char *semantic_name,
--        unsigned int semantic_index, bool output, enum vkd3d_shader_register_type *type, unsigned int *reg);
--bool hlsl_sm1_usage_from_semantic(const char *semantic_name,
--        uint32_t semantic_index, D3DDECLUSAGE *usage, uint32_t *usage_idx);
- void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer);
- int d3dbc_compile(struct vsir_program *program, uint64_t config_flags,
-         const struct vkd3d_shader_compile_info *compile_info, const struct vkd3d_shader_code *ctab,
-+        struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context);
-+int tpf_compile(struct vsir_program *program, uint64_t config_flags,
-         struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context,
-         struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func);
--bool hlsl_sm4_usage_from_semantic(struct hlsl_ctx *ctx,
--        const struct hlsl_semantic *semantic, bool output, D3D_NAME *usage);
--bool hlsl_sm4_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic,
--        bool output, enum vkd3d_shader_register_type *type, bool *has_idx);
--int hlsl_sm4_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, struct vkd3d_shader_code *out);
-+enum vkd3d_shader_interpolation_mode sm4_get_interpolation_mode(struct hlsl_type *type,
-+        unsigned int storage_modifiers);
- struct hlsl_ir_function_decl *hlsl_compile_internal_function(struct hlsl_ctx *ctx, const char *name, const char *hlsl);
-diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.l b/libs/vkd3d/libs/vkd3d-shader/hlsl.l
-index 0c02b27817e..31fb30521e9 100644
---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.l
-+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.l
-@@ -74,13 +74,16 @@ ANY                     (.)
- BlendState              {return KW_BLENDSTATE;          }
- break                   {return KW_BREAK;               }
- Buffer                  {return KW_BUFFER;              }
-+ByteAddressBuffer       {return KW_BYTEADDRESSBUFFER;   }
- case                    {return KW_CASE;                }
- cbuffer                 {return KW_CBUFFER;             }
- centroid                {return KW_CENTROID;            }
- column_major            {return KW_COLUMN_MAJOR;        }
- ComputeShader           {return KW_COMPUTESHADER;       }
- compile                 {return KW_COMPILE;             }
-+CompileShader           {return KW_COMPILESHADER;       }
- const                   {return KW_CONST;               }
-+ConstructGSWithSO       {return KW_CONSTRUCTGSWITHSO;   }
- continue                {return KW_CONTINUE;            }
- DepthStencilState       {return KW_DEPTHSTENCILSTATE;   }
- DepthStencilView        {return KW_DEPTHSTENCILVIEW;    }
-@@ -88,7 +91,6 @@ default                 {return KW_DEFAULT;             }
- discard                 {return KW_DISCARD;             }
- DomainShader            {return KW_DOMAINSHADER;        }
- do                      {return KW_DO;                  }
--double                  {return KW_DOUBLE;              }
- else                    {return KW_ELSE;                }
- export                  {return KW_EXPORT;              }
- extern                  {return KW_EXTERN;              }
-@@ -102,6 +104,7 @@ if                      {return KW_IF;                  }
- in                      {return KW_IN;                  }
- inline                  {return KW_INLINE;              }
- inout                   {return KW_INOUT;               }
-+LineStream              {return KW_LINESTREAM;          }
- linear                  {return KW_LINEAR;              }
- matrix                  {return KW_MATRIX;              }
- namespace               {return KW_NAMESPACE;           }
-@@ -112,6 +115,7 @@ out                     {return KW_OUT;                 }
- packoffset              {return KW_PACKOFFSET;          }
- pass                    {return KW_PASS;                }
- PixelShader             {return KW_PIXELSHADER;         }
-+PointStream             {return KW_POINTSTREAM;         }
- pixelshader             {return KW_PIXELSHADER;         }
- RasterizerOrderedBuffer           {return KW_RASTERIZERORDEREDBUFFER;            }
- RasterizerOrderedStructuredBuffer {return KW_RASTERIZERORDEREDSTRUCTUREDBUFFER;  }
-@@ -126,6 +130,7 @@ RenderTargetView        {return KW_RENDERTARGETVIEW;    }
- return                  {return KW_RETURN;              }
- row_major               {return KW_ROW_MAJOR;           }
- RWBuffer                {return KW_RWBUFFER;            }
-+RWByteAddressBuffer     {return KW_RWBYTEADDRESSBUFFER; }
- RWStructuredBuffer      {return KW_RWSTRUCTUREDBUFFER;  }
- RWTexture1D             {return KW_RWTEXTURE1D;         }
- RWTexture1DArray        {return KW_RWTEXTURE1DARRAY;    }
-@@ -141,6 +146,7 @@ samplerCUBE             {return KW_SAMPLERCUBE;         }
- SamplerState            {return KW_SAMPLER;             }
- sampler_state           {return KW_SAMPLER_STATE;       }
- shared                  {return KW_SHARED;              }
-+snorm                   {return KW_SNORM;               }
- stateblock              {return KW_STATEBLOCK;          }
- stateblock_state        {return KW_STATEBLOCK_STATE;    }
- static                  {return KW_STATIC;              }
-@@ -166,10 +172,12 @@ texture3D               {return KW_TEXTURE3D;           }
- TextureCube             {return KW_TEXTURECUBE;         }
- textureCUBE             {return KW_TEXTURECUBE;         }
- TextureCubeArray        {return KW_TEXTURECUBEARRAY;    }
-+TriangleStream          {return KW_TRIANGLESTREAM;      }
- true                    {return KW_TRUE;                }
- typedef                 {return KW_TYPEDEF;             }
- unsigned                {return KW_UNSIGNED;            }
- uniform                 {return KW_UNIFORM;             }
-+unorm                   {return KW_UNORM;               }
- vector                  {return KW_VECTOR;              }
- VertexShader            {return KW_VERTEXSHADER;        }
- vertexshader            {return KW_VERTEXSHADER;        }
-diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.y b/libs/vkd3d/libs/vkd3d-shader/hlsl.y
-index 3f319dea0d8..03a2f38e4e9 100644
---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.y
-+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.y
-@@ -40,6 +40,7 @@ struct parse_initializer
-     unsigned int args_count;
-     struct hlsl_block *instrs;
-     bool braces;
-+    struct vkd3d_shader_location loc;
- };
- struct parse_parameter
-@@ -52,7 +53,7 @@ struct parse_parameter
-     struct parse_initializer initializer;
- };
--struct parse_colon_attribute
-+struct parse_colon_attributes
- {
-     struct hlsl_semantic semantic;
-     struct hlsl_reg_reservation reg_reservation;
-@@ -147,7 +148,7 @@ static void yyerror(YYLTYPE *loc, void *scanner, struct hlsl_ctx *ctx, const cha
- static struct hlsl_ir_node *node_from_block(struct hlsl_block *block)
- {
--    return LIST_ENTRY(list_tail(&block->instrs), struct hlsl_ir_node, entry);
-+    return block->value;
- }
- static struct hlsl_block *make_empty_block(struct hlsl_ctx *ctx)
-@@ -331,6 +332,9 @@ static void check_condition_type(struct hlsl_ctx *ctx, const struct hlsl_ir_node
- {
-     const struct hlsl_type *type = cond->data_type;
-+    if (type->class == HLSL_CLASS_ERROR)
-+        return;
-     if (type->class > HLSL_CLASS_LAST_NUMERIC || type->dimx > 1 || type->dimy > 1)
-     {
-         struct vkd3d_string_buffer *string;
-@@ -437,6 +441,9 @@ static struct hlsl_ir_node *add_implicit_conversion(struct hlsl_ctx *ctx, struct
-     if (hlsl_types_are_equal(src_type, dst_type))
-         return node;
-+    if (node->type == HLSL_IR_SAMPLER_STATE && dst_type->class == HLSL_CLASS_SAMPLER)
-+        return node;
-     if (!implicit_compatible_data_types(ctx, src_type, dst_type))
-     {
-         struct vkd3d_string_buffer *src_string, *dst_string;
-@@ -458,6 +465,40 @@ static struct hlsl_ir_node *add_implicit_conversion(struct hlsl_ctx *ctx, struct
-     return add_cast(ctx, block, node, dst_type, loc);
- }
-+static bool add_explicit_conversion(struct hlsl_ctx *ctx, struct hlsl_block *block,
-+        struct hlsl_type *dst_type, const struct parse_array_sizes *arrays, const struct vkd3d_shader_location *loc)
-+    struct hlsl_ir_node *instr = node_from_block(block);
-+    struct hlsl_type *src_type = instr->data_type;
-+    unsigned int i;
-+    for (i = 0; i < arrays->count; ++i)
-+    {
-+        if (arrays->sizes[i] == HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT)
-+            hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Implicit size arrays not allowed in casts.");
-+        dst_type = hlsl_new_array_type(ctx, dst_type, arrays->sizes[i]);
-+    }
-+    if (instr->data_type->class == HLSL_CLASS_ERROR)
-+        return true;
-+    if (!explicit_compatible_data_types(ctx, src_type, dst_type))
-+    {
-+        struct vkd3d_string_buffer *src_string, *dst_string;
-+        src_string = hlsl_type_to_string(ctx, src_type);
-+        dst_string = hlsl_type_to_string(ctx, dst_type);
-+        if (src_string && dst_string)
-+            hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Can't cast from %s to %s.",
-+                    src_string->buffer, dst_string->buffer);
-+        hlsl_release_string_buffer(ctx, src_string);
-+        hlsl_release_string_buffer(ctx, dst_string);
-+        return false;
-+    }
-+    return add_cast(ctx, block, instr, dst_type, loc);
- static uint32_t add_modifiers(struct hlsl_ctx *ctx, uint32_t modifiers, uint32_t mod,
-         const struct vkd3d_shader_location *loc)
- {
-@@ -489,9 +530,10 @@ static bool append_conditional_break(struct hlsl_ctx *ctx, struct hlsl_block *co
-     check_condition_type(ctx, condition);
-     bool_type = hlsl_get_scalar_type(ctx, HLSL_TYPE_BOOL);
--    if (!(cast = hlsl_new_cast(ctx, condition, bool_type, &condition->loc)))
-+    /* We already checked for a 1-component numeric type, so
-+     * add_implicit_conversion() is equivalent to add_cast() here. */
-+    if (!(cast = add_cast(ctx, cond_block, condition, bool_type, &condition->loc)))
-         return false;
--    hlsl_block_add_instr(cond_block, cast);
-     if (!(not = hlsl_new_unary_expr(ctx, HLSL_OP1_LOGIC_NOT, cast, &condition->loc)))
-         return false;
-@@ -516,7 +558,7 @@ enum loop_type
- };
--static bool attribute_list_has_duplicates(const struct parse_attribute_list *attrs)
-+static void check_attribute_list_for_duplicates(struct hlsl_ctx *ctx, const struct parse_attribute_list *attrs)
- {
-     unsigned int i, j;
-@@ -525,11 +567,10 @@ static bool attribute_list_has_duplicates(const struct parse_attribute_list *att
-         for (j = i + 1; j < attrs->count; ++j)
-         {
-             if (!strcmp(attrs->attrs[i]->name, attrs->attrs[j]->name))
--                 return true;
-+                hlsl_error(ctx, &attrs->attrs[j]->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX,
-+                        "Found duplicate attribute \"%s\".", attrs->attrs[j]->name);
-         }
-     }
--    return false;
- }
- static void resolve_loop_continue(struct hlsl_ctx *ctx, struct hlsl_block *block, enum loop_type type,
-@@ -606,12 +647,17 @@ static struct hlsl_default_value evaluate_static_expression(struct hlsl_ctx *ctx
-     struct hlsl_block expr;
-     struct hlsl_src src;
-+    if (node_from_block(block)->data_type->class == HLSL_CLASS_ERROR)
-+        return ret;
-     LIST_FOR_EACH_ENTRY(node, &block->instrs, struct hlsl_ir_node, entry)
-     {
-         switch (node->type)
-         {
-+            case HLSL_IR_COMPILE:
-             case HLSL_IR_CONSTANT:
-             case HLSL_IR_EXPR:
-+            case HLSL_IR_SAMPLER_STATE:
-             case HLSL_IR_STRING_CONSTANT:
-             case HLSL_IR_SWIZZLE:
-             case HLSL_IR_LOAD:
-@@ -632,6 +678,8 @@ static struct hlsl_default_value evaluate_static_expression(struct hlsl_ctx *ctx
-                 hlsl_error(ctx, &node->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX,
-                         "Expected literal expression.");
-                 break;
-+            case HLSL_IR_VSIR_INSTRUCTION_REF:
-+                vkd3d_unreachable();
-         }
-     }
-@@ -639,14 +687,15 @@ static struct hlsl_default_value evaluate_static_expression(struct hlsl_ctx *ctx
-         return ret;
-     hlsl_block_add_block(&expr, block);
--    if (!add_implicit_conversion(ctx, &expr, node_from_block(&expr), dst_type, loc))
-+    if (!(node = add_implicit_conversion(ctx, &expr, node_from_block(&expr), dst_type, loc)))
-     {
-         hlsl_block_cleanup(&expr);
-         return ret;
-     }
-     /* Wrap the node into a src to allow the reference to survive the multiple const passes. */
--    hlsl_src_from_node(&src, node_from_block(&expr));
-+    hlsl_src_from_node(&src, node);
-+    hlsl_lower_index_loads(ctx, &expr);
-     hlsl_run_const_passes(ctx, &expr);
-     node = src.node;
-     hlsl_src_remove(&src);
-@@ -697,9 +746,7 @@ static struct hlsl_block *create_loop(struct hlsl_ctx *ctx, enum loop_type type,
-     unsigned int i, unroll_limit = 0;
-     struct hlsl_ir_node *loop;
--    if (attribute_list_has_duplicates(attributes))
--        hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, "Found duplicate attribute.");
-+    check_attribute_list_for_duplicates(ctx, attributes);
-     check_loop_attributes(ctx, attributes, loc);
-     /* Ignore unroll(0) attribute, and any invalid attribute. */
-@@ -897,6 +944,9 @@ static bool add_return(struct hlsl_ctx *ctx, struct hlsl_block *block,
-         {
-             struct hlsl_ir_node *store;
-+            if (return_value->data_type->class == HLSL_CLASS_ERROR)
-+                return true;
-             if (!(return_value = add_implicit_conversion(ctx, block, return_value, return_type, loc)))
-                 return false;
-@@ -974,6 +1024,12 @@ static bool add_array_access(struct hlsl_ctx *ctx, struct hlsl_block *block, str
-     const struct hlsl_type *expr_type = array->data_type, *index_type = index->data_type;
-     struct hlsl_ir_node *return_index, *cast;
-+    if (array->data_type->class == HLSL_CLASS_ERROR || index->data_type->class == HLSL_CLASS_ERROR)
-+    {
-+        block->value = ctx->error_instr;
-+        return true;
-+    }
-     if ((expr_type->class == HLSL_CLASS_TEXTURE || expr_type->class == HLSL_CLASS_UAV)
-             && expr_type->sampler_dim != HLSL_SAMPLER_DIM_GENERIC)
-     {
-@@ -1164,6 +1220,33 @@ static bool gen_struct_fields(struct hlsl_ctx *ctx, struct parse_fields *fields,
-     return true;
- }
-+static bool add_record_access_recurse(struct hlsl_ctx *ctx, struct hlsl_block *block,
-+        const char *name, const struct vkd3d_shader_location *loc)
-+    struct hlsl_ir_node *record = node_from_block(block);
-+    const struct hlsl_type *type = record->data_type;
-+    const struct hlsl_struct_field *field, *base;
-+    if ((field = get_struct_field(type->e.record.fields, type->e.record.field_count, name)))
-+    {
-+        unsigned int field_idx = field - type->e.record.fields;
-+        return add_record_access(ctx, block, record, field_idx, loc);
-+    }
-+    else if ((base = get_struct_field(type->e.record.fields, type->e.record.field_count, "$super")))
-+    {
-+        unsigned int base_idx = base - type->e.record.fields;
-+        if (!add_record_access(ctx, block, record, base_idx, loc))
-+            return false;
-+        return add_record_access_recurse(ctx, block, name, loc);
-+    }
-+    hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_NOT_DEFINED, "Field \"%s\" is not defined.", name);
-+    block->value = ctx->error_instr;
-+    return true;
- static bool add_typedef(struct hlsl_ctx *ctx, struct hlsl_type *const orig_type, struct list *list)
- {
-     struct parse_variable_def *v, *v_next;
-@@ -1227,7 +1310,8 @@ static bool add_typedef(struct hlsl_ctx *ctx, struct hlsl_type *const orig_type,
- }
- static void initialize_var_components(struct hlsl_ctx *ctx, struct hlsl_block *instrs,
--        struct hlsl_ir_var *dst, unsigned int *store_index, struct hlsl_ir_node *src);
-+        struct hlsl_ir_var *dst, unsigned int *store_index, struct hlsl_ir_node *src,
-+        bool is_default_values_initializer);
- static bool add_func_parameter(struct hlsl_ctx *ctx, struct hlsl_func_parameters *parameters,
-         struct parse_parameter *param, const struct vkd3d_shader_location *loc)
-@@ -1285,7 +1369,8 @@ static bool add_func_parameter(struct hlsl_ctx *ctx, struct hlsl_func_parameters
-         for (i = 0; i < param->initializer.args_count; ++i)
-         {
--            initialize_var_components(ctx, param->initializer.instrs, var, &store_index, param->initializer.args[i]);
-+            initialize_var_components(ctx, param->initializer.instrs, var,
-+                    &store_index, param->initializer.args[i], true);
-         }
-         free_parse_initializer(&param->initializer);
-@@ -1673,25 +1758,36 @@ static struct hlsl_ir_node *add_expr(struct hlsl_ctx *ctx, struct hlsl_block *bl
-     return expr;
- }
--static void check_integer_type(struct hlsl_ctx *ctx, const struct hlsl_ir_node *instr)
-+static bool type_is_integer(enum hlsl_base_type type)
- {
--    const struct hlsl_type *type = instr->data_type;
--    struct vkd3d_string_buffer *string;
--    switch (type->e.numeric.type)
-+    switch (type)
-     {
-         case HLSL_TYPE_BOOL:
-         case HLSL_TYPE_INT:
-         case HLSL_TYPE_UINT:
--            break;
-+            return true;
--        default:
--            if ((string = hlsl_type_to_string(ctx, type)))
--                hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE,
--                        "Expression type '%s' is not integer.", string->buffer);
--            hlsl_release_string_buffer(ctx, string);
--            break;
-+        case HLSL_TYPE_DOUBLE:
-+        case HLSL_TYPE_FLOAT:
-+        case HLSL_TYPE_HALF:
-+            return false;
-     }
-+    vkd3d_unreachable();
-+static void check_integer_type(struct hlsl_ctx *ctx, const struct hlsl_ir_node *instr)
-+    const struct hlsl_type *type = instr->data_type;
-+    struct vkd3d_string_buffer *string;
-+    if (type_is_integer(type->e.numeric.type))
-+        return;
-+    if ((string = hlsl_type_to_string(ctx, type)))
-+        hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE,
-+                "Expression type '%s' is not integer.", string->buffer);
-+    hlsl_release_string_buffer(ctx, string);
- }
- static struct hlsl_ir_node *add_unary_arithmetic_expr(struct hlsl_ctx *ctx, struct hlsl_block *block,
-@@ -1699,12 +1795,18 @@ static struct hlsl_ir_node *add_unary_arithmetic_expr(struct hlsl_ctx *ctx, stru
- {
-     struct hlsl_ir_node *args[HLSL_MAX_OPERANDS] = {arg};
-+    if (arg->data_type->class == HLSL_CLASS_ERROR)
-+        return arg;
-     return add_expr(ctx, block, op, args, arg->data_type, loc);
- }
- static struct hlsl_ir_node *add_unary_bitwise_expr(struct hlsl_ctx *ctx, struct hlsl_block *block,
-         enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg, const struct vkd3d_shader_location *loc)
- {
-+    if (arg->data_type->class == HLSL_CLASS_ERROR)
-+        return arg;
-     check_integer_type(ctx, arg);
-     return add_unary_arithmetic_expr(ctx, block, op, arg, loc);
-@@ -1716,6 +1818,9 @@ static struct hlsl_ir_node *add_unary_logical_expr(struct hlsl_ctx *ctx, struct
-     struct hlsl_ir_node *args[HLSL_MAX_OPERANDS] = {0};
-     struct hlsl_type *bool_type;
-+    if (arg->data_type->class == HLSL_CLASS_ERROR)
-+        return arg;
-     bool_type = hlsl_get_numeric_type(ctx, arg->data_type->class, HLSL_TYPE_BOOL,
-             arg->data_type->dimx, arg->data_type->dimy);
-@@ -1745,7 +1850,11 @@ static struct hlsl_ir_node *add_binary_arithmetic_expr(struct hlsl_ctx *ctx, str
-     struct hlsl_ir_node *args[HLSL_MAX_OPERANDS] = {0};
-     struct hlsl_type *common_type;
--    common_type = get_common_numeric_type(ctx, arg1, arg2, loc);
-+    if (!(common_type = get_common_numeric_type(ctx, arg1, arg2, loc)))
-+    {
-+        block->value = ctx->error_instr;
-+        return block->value;
-+    }
-     if (!(args[0] = add_implicit_conversion(ctx, block, arg1, common_type, loc)))
-         return NULL;
-@@ -1942,6 +2051,12 @@ static struct hlsl_block *add_binary_expr_merge(struct hlsl_ctx *ctx, struct hls
-     hlsl_block_add_block(block1, block2);
-     destroy_block(block2);
-+    if (arg1->data_type->class == HLSL_CLASS_ERROR || arg2->data_type->class == HLSL_CLASS_ERROR)
-+    {
-+        block1->value = ctx->error_instr;
-+        return block1;
-+    }
-     if (add_binary_expr(ctx, block1, op, arg1, arg2, loc) == NULL)
-         return NULL;
-@@ -2048,18 +2163,23 @@ static bool invert_swizzle_matrix(uint32_t *swizzle, unsigned int *writemask, un
-     return true;
- }
--static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *lhs,
-+static bool add_assignment(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *lhs,
-         enum parse_assign_op assign_op, struct hlsl_ir_node *rhs)
- {
-     struct hlsl_type *lhs_type = lhs->data_type;
--    struct hlsl_ir_node *copy;
-     unsigned int writemask = 0, width = 0;
-     bool matrix_writemask = false;
-+    if (lhs->data_type->class == HLSL_CLASS_ERROR || rhs->data_type->class == HLSL_CLASS_ERROR)
-+    {
-+        block->value = ctx->error_instr;
-+        return true;
-+    }
-     if (assign_op == ASSIGN_OP_SUB)
-     {
-         if (!(rhs = add_unary_arithmetic_expr(ctx, block, HLSL_OP1_NEG, rhs, &rhs->loc)))
--            return NULL;
-+            return false;
-         assign_op = ASSIGN_OP_ADD;
-     }
-     if (assign_op != ASSIGN_OP_ASSIGN)
-@@ -2068,7 +2188,7 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_blo
-         VKD3D_ASSERT(op);
-         if (!(rhs = add_binary_expr(ctx, block, op, lhs, rhs, &rhs->loc)))
--            return NULL;
-+            return false;
-     }
-     if (hlsl_is_numeric_type(lhs_type))
-@@ -2078,14 +2198,14 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_blo
-     }
-     if (!(rhs = add_implicit_conversion(ctx, block, rhs, lhs_type, &rhs->loc)))
--        return NULL;
-+        return false;
-     while (lhs->type != HLSL_IR_LOAD && lhs->type != HLSL_IR_INDEX)
-     {
-         if (lhs->type == HLSL_IR_EXPR && hlsl_ir_expr(lhs)->op == HLSL_OP1_CAST)
-         {
-             hlsl_fixme(ctx, &lhs->loc, "Cast on the LHS.");
--            return NULL;
-+            return false;
-         }
-         else if (lhs->type == HLSL_IR_SWIZZLE)
-         {
-@@ -2100,25 +2220,23 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_blo
-                 if (swizzle->val.node->type != HLSL_IR_LOAD && swizzle->val.node->type != HLSL_IR_INDEX)
-                 {
-                     hlsl_fixme(ctx, &lhs->loc, "Unhandled source of matrix swizzle.");
--                    return NULL;
-+                    return false;
-                 }
-                 if (!invert_swizzle_matrix(&s, &writemask, &width))
-                 {
-                     hlsl_error(ctx, &lhs->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_WRITEMASK, "Invalid writemask for matrix.");
--                    return NULL;
-+                    return false;
-                 }
-                 matrix_writemask = true;
-             }
-             else if (!invert_swizzle(&s, &writemask, &width))
-             {
-                 hlsl_error(ctx, &lhs->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_WRITEMASK, "Invalid writemask.");
--                return NULL;
-+                return false;
-             }
-             if (!(new_swizzle = hlsl_new_swizzle(ctx, s, width, rhs, &swizzle->node.loc)))
--            {
--                return NULL;
--            }
-+                return false;
-             hlsl_block_add_instr(block, new_swizzle);
-             lhs = swizzle->val.node;
-@@ -2127,7 +2245,7 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_blo
-         else
-         {
-             hlsl_error(ctx, &lhs->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_LVALUE, "Invalid lvalue.");
--            return NULL;
-+            return false;
-         }
-     }
-@@ -2142,11 +2260,11 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_blo
-         if (!hlsl_index_is_resource_access(hlsl_ir_index(lhs)))
-         {
-             hlsl_fixme(ctx, &lhs->loc, "Non-direct structured resource store.");
--            return NULL;
-+            return false;
-         }
-         if (!hlsl_init_deref_from_index_chain(ctx, &resource_deref, hlsl_ir_index(lhs)->val.node))
--            return NULL;
-+            return false;
-         resource_type = hlsl_deref_get_type(ctx, &resource_deref);
-         VKD3D_ASSERT(resource_type->class == HLSL_CLASS_TEXTURE || resource_type->class == HLSL_CLASS_UAV);
-@@ -2168,7 +2286,7 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_blo
-         if (!(store = hlsl_new_resource_store(ctx, &resource_deref, coords, rhs, &lhs->loc)))
-         {
-             hlsl_cleanup_deref(&resource_deref);
--            return NULL;
-+            return false;
-         }
-         hlsl_block_add_instr(block, store);
-         hlsl_cleanup_deref(&resource_deref);
-@@ -2195,13 +2313,13 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_blo
-                 if (!(load = hlsl_add_load_component(ctx, block, rhs, k++, &rhs->loc)))
-                 {
-                     hlsl_cleanup_deref(&deref);
--                    return NULL;
-+                    return false;
-                 }
-                 if (!hlsl_new_store_component(ctx, &store_block, &deref, component, load))
-                 {
-                     hlsl_cleanup_deref(&deref);
--                    return NULL;
-+                    return false;
-                 }
-                 hlsl_block_add_block(block, &store_block);
-             }
-@@ -2226,23 +2344,23 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_blo
-                 continue;
-             if (!(c = hlsl_new_uint_constant(ctx, i, &lhs->loc)))
--                return NULL;
-+                return false;
-             hlsl_block_add_instr(block, c);
-             if (!(cell = hlsl_new_index(ctx, &row->node, c, &lhs->loc)))
--                return NULL;
-+                return false;
-             hlsl_block_add_instr(block, cell);
-             if (!(load = hlsl_add_load_component(ctx, block, rhs, k++, &rhs->loc)))
--                return NULL;
-+                return false;
-             if (!hlsl_init_deref_from_index_chain(ctx, &deref, cell))
--                return NULL;
-+                return false;
-             if (!(store = hlsl_new_store_index(ctx, &deref, NULL, load, 0, &rhs->loc)))
-             {
-                 hlsl_cleanup_deref(&deref);
--                return NULL;
-+                return false;
-             }
-             hlsl_block_add_instr(block, store);
-             hlsl_cleanup_deref(&deref);
-@@ -2254,24 +2372,19 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_blo
-         struct hlsl_deref deref;
-         if (!hlsl_init_deref_from_index_chain(ctx, &deref, lhs))
--            return NULL;
-+            return false;
-         if (!(store = hlsl_new_store_index(ctx, &deref, NULL, rhs, writemask, &rhs->loc)))
-         {
-             hlsl_cleanup_deref(&deref);
--            return NULL;
-+            return false;
-         }
-         hlsl_block_add_instr(block, store);
-         hlsl_cleanup_deref(&deref);
-     }
--    /* Don't use the instruction itself as a source, as this makes structure
--     * splitting easier. Instead copy it here. Since we retrieve sources from
--     * the last instruction in the list, we do need to copy. */
--    if (!(copy = hlsl_new_copy(ctx, rhs)))
--        return NULL;
--    hlsl_block_add_instr(block, copy);
--    return copy;
-+    block->value = rhs;
-+    return true;
- }
- static bool add_increment(struct hlsl_ctx *ctx, struct hlsl_block *block, bool decrement, bool post,
-@@ -2280,6 +2393,9 @@ static bool add_increment(struct hlsl_ctx *ctx, struct hlsl_block *block, bool d
-     struct hlsl_ir_node *lhs = node_from_block(block);
-     struct hlsl_ir_node *one;
-+    if (lhs->data_type->class == HLSL_CLASS_ERROR)
-+        return true;
-     if (lhs->data_type->modifiers & HLSL_MODIFIER_CONST)
-         hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_MODIFIES_CONST,
-                 "Argument to %s%screment operator is const.", post ? "post" : "pre", decrement ? "de" : "in");
-@@ -2307,57 +2423,9 @@ static bool add_increment(struct hlsl_ctx *ctx, struct hlsl_block *block, bool d
-     return true;
- }
--/* For some reason, for matrices, values from default value initializers end up in different
-- * components than from regular initializers. Default value initializers fill the matrix in
-- * vertical reading order (left-to-right top-to-bottom) instead of regular reading order
-- * (top-to-bottom left-to-right), so they have to be adjusted.
-- * An exception is that the order of matrix initializers for function parameters are row-major
-- * (top-to-bottom left-to-right). */
--static unsigned int get_component_index_from_default_initializer_index(struct hlsl_ctx *ctx,
--        struct hlsl_type *type, unsigned int index)
--    unsigned int element_comp_count, element, x, y, i;
--    unsigned int base = 0;
--    if (ctx->profile->major_version < 4)
--        return index;
--    if (ctx->profile->type == VKD3D_SHADER_TYPE_EFFECT)
--        return index;
--    switch (type->class)
--    {
--        case HLSL_CLASS_MATRIX:
--            x = index / type->dimy;
--            y = index % type->dimy;
--            return y * type->dimx + x;
--        case HLSL_CLASS_ARRAY:
--            element_comp_count = hlsl_type_component_count(type->e.array.type);
--            element = index / element_comp_count;
--            base = element * element_comp_count;
--            return base + get_component_index_from_default_initializer_index(ctx, type->e.array.type, index - base);
--        case HLSL_CLASS_STRUCT:
--            for (i = 0; i < type->e.record.field_count; ++i)
--            {
--                struct hlsl_type *field_type = type->e.record.fields[i].type;
--                element_comp_count = hlsl_type_component_count(field_type);
--                if (index - base < element_comp_count)
--                    return base + get_component_index_from_default_initializer_index(ctx, field_type, index - base);
--                base += element_comp_count;
--            }
--            break;
--        default:
--            return index;
--    }
--    vkd3d_unreachable();
- static void initialize_var_components(struct hlsl_ctx *ctx, struct hlsl_block *instrs,
--        struct hlsl_ir_var *dst, unsigned int *store_index, struct hlsl_ir_node *src)
-+        struct hlsl_ir_var *dst, unsigned int *store_index, struct hlsl_ir_node *src,
-+        bool is_default_values_initializer)
- {
-     unsigned int src_comp_count = hlsl_type_component_count(src->data_type);
-     struct hlsl_deref dst_deref;
-@@ -2376,38 +2444,107 @@ static void initialize_var_components(struct hlsl_ctx *ctx, struct hlsl_block *i
-         dst_comp_type = hlsl_type_get_component_type(ctx, dst->data_type, *store_index);
--        if (dst->default_values)
-+        if (is_default_values_initializer)
-         {
-             struct hlsl_default_value default_value = {0};
--            unsigned int dst_index;
--            if (!hlsl_clone_block(ctx, &block, instrs))
--                return;
--            default_value = evaluate_static_expression(ctx, &block, dst_comp_type, &src->loc);
--            if (dst->is_param)
--                dst_index = *store_index;
-+            if (src->type == HLSL_IR_COMPILE || src->type == HLSL_IR_SAMPLER_STATE)
-+            {
-+                if (hlsl_is_numeric_type(dst_comp_type))
-+                {
-+                    /* Default values are discarded if they contain an object
-+                     * literal expression for a numeric component. */
-+                    if (dst->default_values)
-+                    {
-+                        hlsl_warning(ctx, &src->loc, VKD3D_SHADER_WARNING_HLSL_IGNORED_DEFAULT_VALUE,
-+                                "Component %u in variable '%s' initializer is object literal. Default values discarded.",
-+                                k, dst->name);
-+                        vkd3d_free(dst->default_values);
-+                        dst->default_values = NULL;
-+                    }
-+                }
-+            }
-             else
--                dst_index = get_component_index_from_default_initializer_index(ctx, dst->data_type, *store_index);
-+            {
-+                if (!hlsl_clone_block(ctx, &block, instrs))
-+                    return;
-+                default_value = evaluate_static_expression(ctx, &block, dst_comp_type, &src->loc);
--            dst->default_values[dst_index] = default_value;
-+                if (dst->default_values)
-+                    dst->default_values[*store_index] = default_value;
--            hlsl_block_cleanup(&block);
-+                hlsl_block_cleanup(&block);
-+            }
-         }
-         else
-         {
--            if (!(conv = add_implicit_conversion(ctx, instrs, load, dst_comp_type, &src->loc)))
--                return;
-+            if (src->type == HLSL_IR_SAMPLER_STATE)
-+            {
-+                /* Sampler states end up in the variable's state_blocks instead of
-+                 * being used to initialize its value. */
-+                struct hlsl_ir_sampler_state *sampler_state = hlsl_ir_sampler_state(src);
-+                if (dst_comp_type->class != HLSL_CLASS_SAMPLER)
-+                {
-+                    struct vkd3d_string_buffer *dst_string;
-+                    dst_string = hlsl_type_to_string(ctx, dst_comp_type);
-+                    if (dst_string)
-+                        hlsl_error(ctx, &src->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE,
-+                                "Cannot assign sampler_state to %s.", dst_string->buffer);
-+                    hlsl_release_string_buffer(ctx, dst_string);
-+                    return;
-+                }
-+                if (!hlsl_array_reserve(ctx, (void **)&dst->state_blocks, &dst->state_block_capacity,
-+                        dst->state_block_count + 1, sizeof(*dst->state_blocks)))
-+                    return;
-+                dst->state_blocks[dst->state_block_count] = sampler_state->state_block;
-+                sampler_state->state_block = NULL;
-+                ++dst->state_block_count;
-+            }
-+            else
-+            {
-+                if (!(conv = add_implicit_conversion(ctx, instrs, load, dst_comp_type, &src->loc)))
-+                    return;
--            if (!hlsl_new_store_component(ctx, &block, &dst_deref, *store_index, conv))
--                return;
--            hlsl_block_add_block(instrs, &block);
-+                if (!hlsl_new_store_component(ctx, &block, &dst_deref, *store_index, conv))
-+                    return;
-+                hlsl_block_add_block(instrs, &block);
-+            }
-         }
-         ++*store_index;
-     }
- }
-+static void initialize_var(struct hlsl_ctx *ctx, struct hlsl_ir_var *dst,
-+        const struct parse_initializer *initializer, bool is_default_values_initializer)
-+    unsigned int store_index = 0;
-+    /* If any of the elements has an error type, then initializer_size() is not
-+     * meaningful. */
-+    for (unsigned int i = 0; i < initializer->args_count; ++i)
-+    {
-+        if (initializer->args[i]->data_type->class == HLSL_CLASS_ERROR)
-+            return;
-+    }
-+    if (initializer_size(initializer) != hlsl_type_component_count(dst->data_type))
-+    {
-+        hlsl_error(ctx, &initializer->loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT,
-+                "Expected %u components in initializer, but got %u.",
-+                hlsl_type_component_count(dst->data_type), initializer_size(initializer));
-+        return;
-+    }
-+    for (unsigned int i = 0; i < initializer->args_count; ++i)
-+        initialize_var_components(ctx, initializer->instrs, dst, &store_index,
-+                initializer->args[i], is_default_values_initializer);
- static bool type_has_object_components(const struct hlsl_type *type)
- {
-     if (type->class == HLSL_CLASS_ARRAY)
-@@ -2733,13 +2870,15 @@ static struct hlsl_block *initialize_vars(struct hlsl_ctx *ctx, struct list *var
-         if (v->initializer.args_count)
-         {
--            unsigned int store_index = 0;
-             bool is_default_values_initializer;
--            unsigned int size, k;
-             is_default_values_initializer = (ctx->cur_buffer != ctx->globals_buffer)
-                     || (var->storage_modifiers & HLSL_STORAGE_UNIFORM)
-                     || ctx->cur_scope->annotations;
-+            if (hlsl_get_multiarray_element_type(type)->class == HLSL_CLASS_SAMPLER)
-+                is_default_values_initializer = false;
-+            if (hlsl_type_is_shader(type))
-+                is_default_values_initializer = false;
-             if (is_default_values_initializer)
-             {
-@@ -2769,19 +2908,7 @@ static struct hlsl_block *initialize_vars(struct hlsl_ctx *ctx, struct list *var
-                 v->initializer.args[0] = node_from_block(v->initializer.instrs);
-             }
--            size = initializer_size(&v->initializer);
--            if (component_count != size)
--            {
--                hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT,
--                        "Expected %u components in initializer, but got %u.", component_count, size);
--                free_parse_variable_def(v);
--                continue;
--            }
--            for (k = 0; k < v->initializer.args_count; ++k)
--            {
--                initialize_var_components(ctx, v->initializer.instrs, var, &store_index, v->initializer.args[k]);
--            }
-+            initialize_var(ctx, var, &v->initializer, is_default_values_initializer);
-             if (is_default_values_initializer)
-             {
-@@ -2795,6 +2922,9 @@ static struct hlsl_block *initialize_vars(struct hlsl_ctx *ctx, struct list *var
-             {
-                 hlsl_block_add_block(initializers, v->initializer.instrs);
-             }
-+            if (var->state_blocks)
-+                TRACE("Variable %s has %u state blocks.\n", var->name, var->state_block_count);
-         }
-         else if (var->storage_modifiers & HLSL_STORAGE_STATIC)
-         {
-@@ -2835,28 +2965,36 @@ static struct hlsl_block *initialize_vars(struct hlsl_ctx *ctx, struct list *var
-     return initializers;
- }
--static bool func_is_compatible_match(struct hlsl_ctx *ctx,
--        const struct hlsl_ir_function_decl *decl, const struct parse_initializer *args)
-+static bool func_is_compatible_match(struct hlsl_ctx *ctx, const struct hlsl_ir_function_decl *decl,
-+        bool is_compile, const struct parse_initializer *args)
- {
--    unsigned int i;
--    if (decl->parameters.count < args->args_count)
--        return false;
-+    unsigned int i, k;
--    for (i = 0; i < args->args_count; ++i)
-+    k = 0;
-+    for (i = 0; i < decl->parameters.count; ++i)
-     {
--        if (!implicit_compatible_data_types(ctx, args->args[i]->data_type, decl->parameters.vars[i]->data_type))
-+        if (is_compile && !(decl->parameters.vars[i]->storage_modifiers & HLSL_STORAGE_UNIFORM))
-+            continue;
-+        if (k >= args->args_count)
-+        {
-+            if (!decl->parameters.vars[i]->default_values)
-+                return false;
-+            return true;
-+        }
-+        if (!implicit_compatible_data_types(ctx, args->args[k]->data_type, decl->parameters.vars[i]->data_type))
-             return false;
--    }
--    if (args->args_count < decl->parameters.count && !decl->parameters.vars[args->args_count]->default_values)
-+        ++k;
-+    }
-+    if (k < args->args_count)
-         return false;
-     return true;
- }
- static struct hlsl_ir_function_decl *find_function_call(struct hlsl_ctx *ctx,
--        const char *name, const struct parse_initializer *args,
-+        const char *name, const struct parse_initializer *args, bool is_compile,
-         const struct vkd3d_shader_location *loc)
- {
-     struct hlsl_ir_function_decl *decl, *compatible_match = NULL;
-@@ -2869,7 +3007,7 @@ static struct hlsl_ir_function_decl *find_function_call(struct hlsl_ctx *ctx,
-     LIST_FOR_EACH_ENTRY(decl, &func->overloads, struct hlsl_ir_function_decl, entry)
-     {
--        if (func_is_compatible_match(ctx, decl, args))
-+        if (func_is_compatible_match(ctx, decl, is_compile, args))
-         {
-             if (compatible_match)
-             {
-@@ -2890,26 +3028,35 @@ static struct hlsl_ir_node *hlsl_new_void_expr(struct hlsl_ctx *ctx, const struc
-     return hlsl_new_expr(ctx, HLSL_OP0_VOID, operands, ctx->builtin_types.Void, loc);
- }
--static bool add_user_call(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func,
--        const struct parse_initializer *args, const struct vkd3d_shader_location *loc)
-+static struct hlsl_ir_node *add_user_call(struct hlsl_ctx *ctx,
-+        struct hlsl_ir_function_decl *func, const struct parse_initializer *args,
-+        bool is_compile, const struct vkd3d_shader_location *loc)
- {
-     struct hlsl_ir_node *call;
--    unsigned int i, j;
-+    unsigned int i, j, k;
-     VKD3D_ASSERT(args->args_count <= func->parameters.count);
--    for (i = 0; i < args->args_count; ++i)
-+    k = 0;
-+    for (i = 0; i < func->parameters.count; ++i)
-     {
-         struct hlsl_ir_var *param = func->parameters.vars[i];
--        struct hlsl_ir_node *arg = args->args[i];
-+        struct hlsl_ir_node *arg;
-+        if (is_compile && !(param->storage_modifiers & HLSL_STORAGE_UNIFORM))
-+            continue;
-+        if (k >= args->args_count)
-+            break;
-+        arg = args->args[k];
-         if (!hlsl_types_are_equal(arg->data_type, param->data_type))
-         {
-             struct hlsl_ir_node *cast;
-             if (!(cast = add_cast(ctx, args->instrs, arg, param->data_type, &arg->loc)))
--                return false;
--            args->args[i] = cast;
-+                return NULL;
-+            args->args[k] = cast;
-             arg = cast;
-         }
-@@ -2918,13 +3065,15 @@ static bool add_user_call(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *fu
-             struct hlsl_ir_node *store;
-             if (!(store = hlsl_new_simple_store(ctx, param, arg)))
--                return false;
-+                return NULL;
-             hlsl_block_add_instr(args->instrs, store);
-         }
-+        ++k;
-     }
-     /* Add default values for the remaining parameters. */
--    for (i = args->args_count; i < func->parameters.count; ++i)
-+    for (; i < func->parameters.count; ++i)
-     {
-         struct hlsl_ir_var *param = func->parameters.vars[i];
-         unsigned int comp_count = hlsl_type_component_count(param->data_type);
-@@ -2932,6 +3081,9 @@ static bool add_user_call(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *fu
-         VKD3D_ASSERT(param->default_values);
-+        if (is_compile && !(param->storage_modifiers & HLSL_STORAGE_UNIFORM))
-+            continue;
-         hlsl_init_simple_deref_from_var(&param_deref, param);
-         for (j = 0; j < comp_count; ++j)
-@@ -2945,20 +3097,23 @@ static bool add_user_call(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *fu
-             {
-                 value.u[0] = param->default_values[j].number;
-                 if (!(comp = hlsl_new_constant(ctx, type, &value, loc)))
--                    return false;
-+                    return NULL;
-                 hlsl_block_add_instr(args->instrs, comp);
-                 if (!hlsl_new_store_component(ctx, &store_block, &param_deref, j, comp))
--                    return false;
-+                    return NULL;
-                 hlsl_block_add_block(args->instrs, &store_block);
-             }
-         }
-     }
-     if (!(call = hlsl_new_call(ctx, func, loc)))
--        return false;
-+        return NULL;
-     hlsl_block_add_instr(args->instrs, call);
-+    if (is_compile)
-+        return call;
-     for (i = 0; i < args->args_count; ++i)
-     {
-         struct hlsl_ir_var *param = func->parameters.vars[i];
-@@ -2973,11 +3128,11 @@ static bool add_user_call(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *fu
-                         "Output argument to \"%s\" is const.", func->func->name);
-             if (!(load = hlsl_new_var_load(ctx, param, &arg->loc)))
--                return false;
-+                return NULL;
-             hlsl_block_add_instr(args->instrs, &load->node);
-             if (!add_assignment(ctx, args->instrs, arg, ASSIGN_OP_ASSIGN, &load->node))
--                return false;
-+                return NULL;
-         }
-     }
-@@ -2998,7 +3153,7 @@ static bool add_user_call(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *fu
-         hlsl_block_add_instr(args->instrs, expr);
-     }
--    return true;
-+    return call;
- }
- static struct hlsl_ir_node *intrinsic_float_convert_arg(struct hlsl_ctx *ctx,
-@@ -3006,7 +3161,7 @@ static struct hlsl_ir_node *intrinsic_float_convert_arg(struct hlsl_ctx *ctx,
- {
-     struct hlsl_type *type = arg->data_type;
--    if (type->e.numeric.type == HLSL_TYPE_FLOAT || type->e.numeric.type == HLSL_TYPE_HALF)
-+    if (!type_is_integer(type->e.numeric.type))
-         return arg;
-     type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_FLOAT, type->dimx, type->dimy);
-@@ -3094,14 +3249,12 @@ static bool elementwise_intrinsic_convert_args(struct hlsl_ctx *ctx,
- static bool elementwise_intrinsic_float_convert_args(struct hlsl_ctx *ctx,
-         const struct parse_initializer *params, const struct vkd3d_shader_location *loc)
- {
--    enum hlsl_base_type base_type;
-     struct hlsl_type *type;
-     if (!(type = elementwise_intrinsic_get_common_type(ctx, params, loc)))
-         return false;
--    base_type = type->e.numeric.type == HLSL_TYPE_HALF ? HLSL_TYPE_HALF : HLSL_TYPE_FLOAT;
--    type = hlsl_get_numeric_type(ctx, type->class, base_type, type->dimx, type->dimy);
-+    if (type_is_integer(type->e.numeric.type))
-+        type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_FLOAT, type->dimx, type->dimy);
-     return convert_args(ctx, params, type, loc);
- }
-@@ -3129,6 +3282,7 @@ static bool write_acos_or_asin(struct hlsl_ctx *ctx,
-         const struct parse_initializer *params, const struct vkd3d_shader_location *loc, bool asin_mode)
- {
-     struct hlsl_ir_function_decl *func;
-+    struct hlsl_ir_node *arg;
-     struct hlsl_type *type;
-     char *body;
-@@ -3152,8 +3306,9 @@ static bool write_acos_or_asin(struct hlsl_ctx *ctx,
-     const char *fn_name = asin_mode ? fn_name_asin : fn_name_acos;
--    type = params->args[0]->data_type;
--    type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_FLOAT, type->dimx, type->dimy);
-+    if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc)))
-+        return false;
-+    type = arg->data_type;
-     if (!(body = hlsl_sprintf_alloc(ctx, template,
-             type->name, fn_name, type->name,
-@@ -3165,7 +3320,7 @@ static bool write_acos_or_asin(struct hlsl_ctx *ctx,
-     if (!func)
-         return false;
--    return add_user_call(ctx, func, params, loc);
-+    return !!add_user_call(ctx, func, params, false, loc);
- }
- static bool intrinsic_acos(struct hlsl_ctx *ctx,
-@@ -3282,9 +3437,9 @@ static bool write_atan_or_atan2(struct hlsl_ctx *ctx,
-             "        : poly_approx;\n"
-             "}";
--    if (!(type = elementwise_intrinsic_get_common_type(ctx, params, loc)))
-+    if (!elementwise_intrinsic_float_convert_args(ctx, params, loc))
-         return false;
--    type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_FLOAT, type->dimx, type->dimy);
-+    type = params->args[0]->data_type;
-     if (!(buf = hlsl_get_string_buffer(ctx)))
-         return false;
-@@ -3314,7 +3469,7 @@ static bool write_atan_or_atan2(struct hlsl_ctx *ctx,
-     if (!func)
-         return false;
--    return add_user_call(ctx, func, params, loc);
-+    return !!add_user_call(ctx, func, params, false, loc);
- }
- static bool intrinsic_atan(struct hlsl_ctx *ctx,
-@@ -3507,7 +3662,7 @@ static bool write_cosh_or_sinh(struct hlsl_ctx *ctx,
-     if (!func)
-         return false;
--    return add_user_call(ctx, func, params, loc);
-+    return !!add_user_call(ctx, func, params, false, loc);
- }
- static bool intrinsic_cosh(struct hlsl_ctx *ctx,
-@@ -3525,9 +3680,8 @@ static bool intrinsic_cross(struct hlsl_ctx *ctx,
-     struct hlsl_type *cast_type;
-     enum hlsl_base_type base;
--    if (arg1->data_type->e.numeric.type == HLSL_TYPE_HALF && arg2->data_type->e.numeric.type == HLSL_TYPE_HALF)
--        base = HLSL_TYPE_HALF;
--    else
-+    base = expr_common_base_type(arg1->data_type->e.numeric.type, arg2->data_type->e.numeric.type);
-+    if (type_is_integer(base))
-         base = HLSL_TYPE_FLOAT;
-     cast_type = hlsl_get_vector_type(ctx, base, 3);
-@@ -3698,15 +3852,14 @@ static bool intrinsic_determinant(struct hlsl_ctx *ctx,
-         return false;
-     }
-+    if (!(arg = intrinsic_float_convert_arg(ctx, params, arg, loc)))
-+        return false;
-     dim = min(type->dimx, type->dimy);
-     if (dim == 1)
--    {
--        if (!(arg = intrinsic_float_convert_arg(ctx, params, arg, loc)))
--            return false;
-         return hlsl_add_load_component(ctx, params->instrs, arg, 0, loc);
--    }
--    typename = type->e.numeric.type == HLSL_TYPE_HALF ? "half" : "float";
-+    typename = hlsl_get_scalar_type(ctx, arg->data_type->e.numeric.type)->name;
-     template = templates[dim];
-     switch (dim)
-@@ -3734,7 +3887,7 @@ static bool intrinsic_determinant(struct hlsl_ctx *ctx,
-     if (!func)
-         return false;
--    return add_user_call(ctx, func, params, loc);
-+    return !!add_user_call(ctx, func, params, false, loc);
- }
- static bool intrinsic_distance(struct hlsl_ctx *ctx,
-@@ -3766,6 +3919,50 @@ static bool intrinsic_dot(struct hlsl_ctx *ctx,
-     return !!add_binary_dot_expr(ctx, params->instrs, params->args[0], params->args[1], loc);
- }
-+static bool intrinsic_dst(struct hlsl_ctx *ctx, const struct parse_initializer *params,
-+        const struct vkd3d_shader_location *loc)
-+    struct hlsl_ir_function_decl *func;
-+    struct hlsl_type *type, *vec4_type;
-+    char *body;
-+    static const char template[] =
-+            "%s dst(%s i0, %s i1)\n"
-+            "{\n"
-+            /* Scalars and vector-4s are both valid inputs, so promote scalars
-+             * if necessary. */
-+            "    %s src0 = i0, src1 = i1;\n"
-+            "    return %s(1, src0.y * src1.y, src0.z, src1.w);\n"
-+            "}";
-+    if (!elementwise_intrinsic_convert_args(ctx, params, loc))
-+        return false;
-+    type = params->args[0]->data_type;
-+    if (!(type->class == HLSL_CLASS_SCALAR
-+            || (type->class == HLSL_CLASS_VECTOR && type->dimx == 4)))
-+    {
-+        struct vkd3d_string_buffer *string;
-+        if ((string = hlsl_type_to_string(ctx, type)))
-+            hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE,
-+                    "Wrong dimension for dst(): expected scalar or 4-dimensional vector, but got %s.",
-+                    string->buffer);
-+        hlsl_release_string_buffer(ctx, string);
-+    }
-+    vec4_type = hlsl_get_vector_type(ctx, type->e.numeric.type, 4);
-+    if (!(body = hlsl_sprintf_alloc(ctx, template,
-+            vec4_type->name, type->name, type->name,
-+            vec4_type->name,
-+            vec4_type->name)))
-+        return false;
-+    func = hlsl_compile_internal_function(ctx, "dst", body);
-+    vkd3d_free(body);
-+    if (!func)
-+        return false;
-+    return !!add_user_call(ctx, func, params, false, loc);
- static bool intrinsic_exp(struct hlsl_ctx *ctx,
-         const struct parse_initializer *params, const struct vkd3d_shader_location *loc)
- {
-@@ -3809,9 +4006,9 @@ static bool intrinsic_faceforward(struct hlsl_ctx *ctx,
-             "    return dot(i, ng) < 0 ? n : -n;\n"
-             "}\n";
--    if (!(type = elementwise_intrinsic_get_common_type(ctx, params, loc)))
-+    if (!elementwise_intrinsic_float_convert_args(ctx, params, loc))
-         return false;
--    type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_FLOAT, type->dimx, type->dimy);
-+    type = params->args[0]->data_type;
-     if (!(body = hlsl_sprintf_alloc(ctx, template,
-             type->name, type->name, type->name, type->name)))
-@@ -3821,7 +4018,7 @@ static bool intrinsic_faceforward(struct hlsl_ctx *ctx,
-     if (!func)
-         return false;
--    return add_user_call(ctx, func, params, loc);
-+    return !!add_user_call(ctx, func, params, false, loc);
- }
- static bool intrinsic_f16tof32(struct hlsl_ctx *ctx,
-@@ -3839,6 +4036,21 @@ static bool intrinsic_f16tof32(struct hlsl_ctx *ctx,
-     return add_expr(ctx, params->instrs, HLSL_OP1_F16TOF32, operands, type, loc);
- }
-+static bool intrinsic_f32tof16(struct hlsl_ctx *ctx,
-+        const struct parse_initializer *params, const struct vkd3d_shader_location *loc)
-+    struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {0};
-+    struct hlsl_type *type;
-+    if (!elementwise_intrinsic_float_convert_args(ctx, params, loc))
-+        return false;
-+    type = convert_numeric_type(ctx, params->args[0]->data_type, HLSL_TYPE_UINT);
-+    operands[0] = params->args[0];
-+    return add_expr(ctx, params->instrs, HLSL_OP1_F32TOF16, operands, type, loc);
- static bool intrinsic_floor(struct hlsl_ctx *ctx,
-         const struct parse_initializer *params, const struct vkd3d_shader_location *loc)
- {
-@@ -3926,7 +4138,7 @@ static bool intrinsic_fwidth(struct hlsl_ctx *ctx,
-     if (!func)
-         return false;
--    return add_user_call(ctx, func, params, loc);
-+    return !!add_user_call(ctx, func, params, false, loc);
- }
- static bool intrinsic_ldexp(struct hlsl_ctx *ctx,
-@@ -4029,7 +4241,7 @@ static bool intrinsic_lit(struct hlsl_ctx *ctx,
-     if (!(func = hlsl_compile_internal_function(ctx, "lit", body)))
-         return false;
--    return add_user_call(ctx, func, params, loc);
-+    return !!add_user_call(ctx, func, params, false, loc);
- }
- static bool intrinsic_log(struct hlsl_ctx *ctx,
-@@ -4081,6 +4293,20 @@ static bool intrinsic_log2(struct hlsl_ctx *ctx,
-     return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_LOG2, arg, loc);
- }
-+static bool intrinsic_mad(struct hlsl_ctx *ctx,
-+        const struct parse_initializer *params, const struct vkd3d_shader_location *loc)
-+    struct hlsl_ir_node *args[HLSL_MAX_OPERANDS] = {0};
-+    if (!elementwise_intrinsic_convert_args(ctx, params, loc))
-+        return false;
-+    args[0] = params->args[0];
-+    args[1] = params->args[1];
-+    args[2] = params->args[2];
-+    return add_expr(ctx, params->instrs, HLSL_OP3_MAD, args, args[0]->data_type, loc);
- static bool intrinsic_max(struct hlsl_ctx *ctx,
-         const struct parse_initializer *params, const struct vkd3d_shader_location *loc)
- {
-@@ -4099,6 +4325,35 @@ static bool intrinsic_min(struct hlsl_ctx *ctx,
-     return !!add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MIN, params->args[0], params->args[1], loc);
- }
-+static bool intrinsic_modf(struct hlsl_ctx *ctx,
-+        const struct parse_initializer *params, const struct vkd3d_shader_location *loc)
-+    struct hlsl_ir_function_decl *func;
-+    struct hlsl_type *type;
-+    char *body;
-+    static const char template[] =
-+            "%s modf(%s x, out %s ip)\n"
-+            "{\n"
-+            "    ip = trunc(x);\n"
-+            "    return x - ip;\n"
-+            "}";
-+    if (!elementwise_intrinsic_float_convert_args(ctx, params, loc))
-+        return false;
-+    type = params->args[0]->data_type;
-+    if (!(body = hlsl_sprintf_alloc(ctx, template,
-+            type->name, type->name, type->name)))
-+        return false;
-+    func = hlsl_compile_internal_function(ctx, "modf", body);
-+    vkd3d_free(body);
-+    if (!func)
-+        return false;
-+    return !!add_user_call(ctx, func, params, false, loc);
- static bool intrinsic_mul(struct hlsl_ctx *ctx,
-         const struct parse_initializer *params, const struct vkd3d_shader_location *loc)
- {
-@@ -4285,13 +4540,9 @@ static bool intrinsic_reflect(struct hlsl_ctx *ctx,
- static bool intrinsic_refract(struct hlsl_ctx *ctx,
-         const struct parse_initializer *params, const struct vkd3d_shader_location *loc)
- {
--    struct hlsl_type *r_type = params->args[0]->data_type;
--    struct hlsl_type *n_type = params->args[1]->data_type;
--    struct hlsl_type *i_type = params->args[2]->data_type;
--    struct hlsl_type *res_type, *idx_type, *scal_type;
--    struct parse_initializer mut_params;
-+    struct hlsl_type *type, *scalar_type;
-     struct hlsl_ir_function_decl *func;
--    enum hlsl_base_type base;
-+    struct hlsl_ir_node *index;
-     char *body;
-     static const char template[] =
-@@ -4303,28 +4554,34 @@ static bool intrinsic_refract(struct hlsl_ctx *ctx,
-             "    return t >= 0.0 ? i.x * r - (i.x * d + sqrt(t)) * n : 0;\n"
-             "}";
--    if (r_type->class == HLSL_CLASS_MATRIX
--            || n_type->class == HLSL_CLASS_MATRIX
--            || i_type->class == HLSL_CLASS_MATRIX)
-+    if (params->args[0]->data_type->class == HLSL_CLASS_MATRIX
-+            || params->args[1]->data_type->class == HLSL_CLASS_MATRIX
-+            || params->args[2]->data_type->class == HLSL_CLASS_MATRIX)
-     {
-         hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Matrix arguments are not supported.");
-         return false;
-     }
--    VKD3D_ASSERT(params->args_count == 3);
--    mut_params = *params;
--    mut_params.args_count = 2;
--    if (!(res_type = elementwise_intrinsic_get_common_type(ctx, &mut_params, loc)))
-+    /* This is technically not an elementwise intrinsic, but the first two
-+     * arguments are.
-+     * The third argument is a scalar, but can be passed as a vector,
-+     * which should generate an implicit truncation warning.
-+     * Cast down to scalar explicitly, then we can just use
-+     * elementwise_intrinsic_float_convert_args().
-+     * This may result in casting the scalar back to a vector,
-+     * which we will only use the first component of. */
-+    scalar_type = hlsl_get_scalar_type(ctx, params->args[2]->data_type->e.numeric.type);
-+    if (!(index = add_implicit_conversion(ctx, params->instrs, params->args[2], scalar_type, loc)))
-         return false;
-+    params->args[2] = index;
--    base = expr_common_base_type(res_type->e.numeric.type, i_type->e.numeric.type);
--    res_type = convert_numeric_type(ctx, res_type, base);
--    idx_type = convert_numeric_type(ctx, i_type, base);
--    scal_type = hlsl_get_scalar_type(ctx, base);
-+    if (!elementwise_intrinsic_float_convert_args(ctx, params, loc))
-+        return false;
-+    type = params->args[0]->data_type;
--    if (!(body = hlsl_sprintf_alloc(ctx, template, res_type->name, res_type->name,
--            res_type->name, idx_type->name, scal_type->name)))
-+    if (!(body = hlsl_sprintf_alloc(ctx, template, type->name, type->name,
-+            type->name, type->name, scalar_type->name)))
-         return false;
-     func = hlsl_compile_internal_function(ctx, "refract", body);
-@@ -4332,7 +4589,7 @@ static bool intrinsic_refract(struct hlsl_ctx *ctx,
-     if (!func)
-         return false;
--    return add_user_call(ctx, func, params, loc);
-+    return !!add_user_call(ctx, func, params, false, loc);
- }
- static bool intrinsic_round(struct hlsl_ctx *ctx,
-@@ -4415,6 +4672,35 @@ static bool intrinsic_sin(struct hlsl_ctx *ctx,
-     return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_SIN, arg, loc);
- }
-+static bool intrinsic_sincos(struct hlsl_ctx *ctx,
-+        const struct parse_initializer *params, const struct vkd3d_shader_location *loc)
-+    struct hlsl_ir_function_decl *func;
-+    struct hlsl_type *type;
-+    char *body;
-+    static const char template[] =
-+            "void sincos(%s f, out %s s, out %s c)\n"
-+            "{\n"
-+            "    s = sin(f);\n"
-+            "    c = cos(f);\n"
-+            "}";
-+    if (!elementwise_intrinsic_float_convert_args(ctx, params, loc))
-+        return false;
-+    type = params->args[0]->data_type;
-+    if (!(body = hlsl_sprintf_alloc(ctx, template,
-+            type->name, type->name, type->name)))
-+        return false;
-+    func = hlsl_compile_internal_function(ctx, "sincos", body);
-+    vkd3d_free(body);
-+    if (!func)
-+        return false;
-+    return !!add_user_call(ctx, func, params, false, loc);
- static bool intrinsic_sinh(struct hlsl_ctx *ctx,
-         const struct parse_initializer *params, const struct vkd3d_shader_location *loc)
- {
-@@ -4436,9 +4722,9 @@ static bool intrinsic_smoothstep(struct hlsl_ctx *ctx,
-             "    return (p * p) * (3 - 2 * p);\n"
-             "}";
--    if (!(type = elementwise_intrinsic_get_common_type(ctx, params, loc)))
-+    if (!elementwise_intrinsic_float_convert_args(ctx, params, loc))
-         return false;
--    type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_FLOAT, type->dimx, type->dimy);
-+    type = params->args[0]->data_type;
-     if (!(body = hlsl_sprintf_alloc(ctx, template, type->name, type->name, type->name, type->name, type->name)))
-         return false;
-@@ -4447,7 +4733,7 @@ static bool intrinsic_smoothstep(struct hlsl_ctx *ctx,
-     if (!func)
-         return false;
--    return add_user_call(ctx, func, params, loc);
-+    return !!add_user_call(ctx, func, params, false, loc);
- }
- static bool intrinsic_sqrt(struct hlsl_ctx *ctx,
-@@ -4469,13 +4755,12 @@ static bool intrinsic_step(struct hlsl_ctx *ctx,
-     if (!elementwise_intrinsic_float_convert_args(ctx, params, loc))
-         return false;
-+    type = params->args[0]->data_type;
-     if (!(ge = add_binary_comparison_expr(ctx, params->instrs, HLSL_OP2_GEQUAL,
-             params->args[1], params->args[0], loc)))
-         return false;
--    type = ge->data_type;
--    type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_FLOAT, type->dimx, type->dimy);
-     return !!add_implicit_conversion(ctx, params->instrs, ge, type, loc);
- }
-@@ -4523,7 +4808,7 @@ static bool intrinsic_tanh(struct hlsl_ctx *ctx,
-     if (!func)
-         return false;
--    return add_user_call(ctx, func, params, loc);
-+    return !!add_user_call(ctx, func, params, false, loc);
- }
- static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer *params,
-@@ -4661,17 +4946,17 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer *
-         if (!(var = hlsl_new_synthetic_var(ctx, "coords", hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, 2), loc)))
-             return false;
--        initialize_var_components(ctx, params->instrs, var, &idx, coords);
-+        initialize_var_components(ctx, params->instrs, var, &idx, coords, false);
-         if (hlsl_version_ge(ctx, 4, 0))
-         {
-             if (!(half = hlsl_new_float_constant(ctx, 0.5f, loc)))
-                 return false;
-             hlsl_block_add_instr(params->instrs, half);
--            initialize_var_components(ctx, params->instrs, var, &idx, half);
-+            initialize_var_components(ctx, params->instrs, var, &idx, half, false);
-         }
-         else
--            initialize_var_components(ctx, params->instrs, var, &idx, coords);
-+            initialize_var_components(ctx, params->instrs, var, &idx, coords, false);
-         if (!(load = hlsl_new_var_load(ctx, var, loc)))
-             return false;
-@@ -4890,6 +5175,10 @@ static bool intrinsic_GetRenderTargetSampleCount(struct hlsl_ctx *ctx,
-     struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {0};
-     struct hlsl_ir_node *expr;
-+    if (ctx->profile->type != VKD3D_SHADER_TYPE_PIXEL || hlsl_version_lt(ctx, 4, 1))
-+        hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INCOMPATIBLE_PROFILE,
-+                "GetRenderTargetSampleCount() can only be used from a pixel shader using version 4.1 or higher.");
-     if (!(expr = hlsl_new_expr(ctx, HLSL_OP0_RASTERIZER_SAMPLE_COUNT,
-             operands, hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), loc)))
-         return false;
-@@ -4937,9 +5226,11 @@ intrinsic_functions[] =
-     {"determinant",                         1, true,  intrinsic_determinant},
-     {"distance",                            2, true,  intrinsic_distance},
-     {"dot",                                 2, true,  intrinsic_dot},
-+    {"dst",                                 2, true,  intrinsic_dst},
-     {"exp",                                 1, true,  intrinsic_exp},
-     {"exp2",                                1, true,  intrinsic_exp2},
-     {"f16tof32",                            1, true,  intrinsic_f16tof32},
-+    {"f32tof16",                            1, true,  intrinsic_f32tof16},
-     {"faceforward",                         3, true,  intrinsic_faceforward},
-     {"floor",                               1, true,  intrinsic_floor},
-     {"fmod",                                2, true,  intrinsic_fmod},
-@@ -4952,8 +5243,10 @@ intrinsic_functions[] =
-     {"log",                                 1, true,  intrinsic_log},
-     {"log10",                               1, true,  intrinsic_log10},
-     {"log2",                                1, true,  intrinsic_log2},
-+    {"mad",                                 3, true,  intrinsic_mad},
-     {"max",                                 2, true,  intrinsic_max},
-     {"min",                                 2, true,  intrinsic_min},
-+    {"modf",                                2, true,  intrinsic_modf},
-     {"mul",                                 2, true,  intrinsic_mul},
-     {"normalize",                           1, true,  intrinsic_normalize},
-     {"pow",                                 2, true,  intrinsic_pow},
-@@ -4966,6 +5259,7 @@ intrinsic_functions[] =
-     {"saturate",                            1, true,  intrinsic_saturate},
-     {"sign",                                1, true,  intrinsic_sign},
-     {"sin",                                 1, true,  intrinsic_sin},
-+    {"sincos",                              3, true,  intrinsic_sincos},
-     {"sinh",                                1, true,  intrinsic_sinh},
-     {"smoothstep",                          3, true,  intrinsic_smoothstep},
-     {"sqrt",                                1, true,  intrinsic_sqrt},
-@@ -5002,9 +5296,18 @@ static struct hlsl_block *add_call(struct hlsl_ctx *ctx, const char *name,
-     struct intrinsic_function *intrinsic;
-     struct hlsl_ir_function_decl *decl;
--    if ((decl = find_function_call(ctx, name, args, loc)))
-+    for (unsigned int i = 0; i < args->args_count; ++i)
-+    {
-+        if (args->args[i]->data_type->class == HLSL_CLASS_ERROR)
-+        {
-+            args->instrs->value = ctx->error_instr;
-+            return args->instrs;
-+        }
-+    }
-+    if ((decl = find_function_call(ctx, name, args, false, loc)))
-     {
--        if (!add_user_call(ctx, decl, args, loc))
-+        if (!add_user_call(ctx, decl, args, false, loc))
-             goto fail;
-     }
-     else if ((intrinsic = bsearch(name, intrinsic_functions, ARRAY_SIZE(intrinsic_functions),
-@@ -5060,18 +5363,94 @@ fail:
-     return NULL;
- }
-+static struct hlsl_block *add_shader_compilation(struct hlsl_ctx *ctx, const char *profile_name,
-+        const char *function_name, struct parse_initializer *args, const struct vkd3d_shader_location *loc)
-+    struct hlsl_ir_node *compile, *call_to_compile = NULL;
-+    struct hlsl_ir_function_decl *decl;
-+    if (!ctx->in_state_block && ctx->cur_scope != ctx->globals)
-+    {
-+        hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_MISPLACED_COMPILE,
-+                "Shader compilation statements must be in global scope or a state block.");
-+        free_parse_initializer(args);
-+        return NULL;
-+    }
-+    if (!(decl = find_function_call(ctx, function_name, args, true, loc)))
-+    {
-+        if (rb_get(&ctx->functions, function_name))
-+        {
-+            hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_NOT_DEFINED,
-+                    "No compatible \"%s\" declaration with %u uniform parameters found.",
-+                    function_name, args->args_count);
-+        }
-+        else
-+        {
-+            hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_NOT_DEFINED,
-+                    "Function \"%s\" is not defined.", function_name);
-+        }
-+        free_parse_initializer(args);
-+        return NULL;
-+    }
-+    if (!(call_to_compile = add_user_call(ctx, decl, args, true, loc)))
-+    {
-+        free_parse_initializer(args);
-+        return NULL;
-+    }
-+    if (!(compile = hlsl_new_compile(ctx, HLSL_COMPILE_TYPE_COMPILE,
-+            profile_name, &call_to_compile, 1, args->instrs, loc)))
-+    {
-+        free_parse_initializer(args);
-+        return NULL;
-+    }
-+    free_parse_initializer(args);
-+    return make_block(ctx, compile);
-+static struct hlsl_block *add_compile_variant(struct hlsl_ctx *ctx, enum hlsl_compile_type compile_type,
-+        struct parse_initializer *args, const struct vkd3d_shader_location *loc)
-+    struct hlsl_ir_node *compile;
-+    switch (compile_type)
-+    {
-+            vkd3d_unreachable();
-+            if (args->args_count != 2 && args->args_count != 6)
-+            {
-+                hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT,
-+                        "Wrong number of arguments to ConstructGSWithSO: expected 2 or 6, but got %u.",
-+                        args->args_count);
-+            }
-+            break;
-+    }
-+    if (!(compile = hlsl_new_compile(ctx, compile_type, NULL, args->args, args->args_count, args->instrs, loc)))
-+    {
-+        free_parse_initializer(args);
-+        return NULL;
-+    }
-+    free_parse_initializer(args);
-+    return make_block(ctx, compile);
- static struct hlsl_block *add_constructor(struct hlsl_ctx *ctx, struct hlsl_type *type,
-         struct parse_initializer *params, const struct vkd3d_shader_location *loc)
- {
-     struct hlsl_ir_load *load;
-     struct hlsl_ir_var *var;
--    unsigned int i, idx = 0;
-     if (!(var = hlsl_new_synthetic_var(ctx, "constructor", type, loc)))
-         return NULL;
--    for (i = 0; i < params->args_count; ++i)
--        initialize_var_components(ctx, params->instrs, var, &idx, params->args[i]);
-+    initialize_var(ctx, var, params, false);
-     if (!(load = hlsl_new_var_load(ctx, var, loc)))
-         return NULL;
-@@ -5088,6 +5467,14 @@ static bool add_ternary(struct hlsl_ctx *ctx, struct hlsl_block *block,
-     struct hlsl_type *cond_type = cond->data_type;
-     struct hlsl_type *common_type;
-+    if (cond->data_type->class == HLSL_CLASS_ERROR
-+            || first->data_type->class == HLSL_CLASS_ERROR
-+            || second->data_type->class == HLSL_CLASS_ERROR)
-+    {
-+        block->value = ctx->error_instr;
-+        return true;
-+    }
-     if (cond_type->class > HLSL_CLASS_LAST_NUMERIC)
-     {
-         struct vkd3d_string_buffer *string;
-@@ -5113,11 +5500,6 @@ static bool add_ternary(struct hlsl_ctx *ctx, struct hlsl_block *block,
-         }
-         else
-         {
--            cond_type = hlsl_get_numeric_type(ctx, cond_type->class, HLSL_TYPE_BOOL,
--                    cond_type->dimx, cond_type->dimy);
--            if (!(cond = add_implicit_conversion(ctx, block, cond, cond_type, &cond->loc)))
--                return false;
-             if (common_type->dimx == 1 && common_type->dimy == 1)
-             {
-                 common_type = hlsl_get_numeric_type(ctx, cond_type->class,
-@@ -5139,6 +5521,11 @@ static bool add_ternary(struct hlsl_ctx *ctx, struct hlsl_block *block,
-                 hlsl_release_string_buffer(ctx, cond_string);
-                 hlsl_release_string_buffer(ctx, value_string);
-             }
-+            cond_type = hlsl_get_numeric_type(ctx, common_type->class, HLSL_TYPE_BOOL,
-+                    common_type->dimx, common_type->dimy);
-+            if (!(cond = add_implicit_conversion(ctx, block, cond, cond_type, &cond->loc)))
-+                return false;
-         }
-         if (!(first = add_implicit_conversion(ctx, block, first, common_type, &first->loc)))
-@@ -5196,6 +5583,7 @@ static unsigned int hlsl_offset_dim_count(enum hlsl_sampler_dim dim)
-         case HLSL_SAMPLER_DIM_CUBE:
-             /* Offset parameters not supported for these types. */
-             return 0;
-         default:
-@@ -5215,6 +5603,55 @@ static bool raise_invalid_method_object_type(struct hlsl_ctx *ctx, const struct
-     return false;
- }
-+static bool add_raw_load_method_call(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *object,
-+        const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc)
-+    struct hlsl_resource_load_params load_params = {.type = HLSL_RESOURCE_LOAD};
-+    struct hlsl_ir_node *load;
-+    unsigned int value_dim;
-+    if (params->args_count != 1 && params->args_count != 2)
-+    {
-+        hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT,
-+                "Wrong number of arguments to method 'Load': expected between 1 and 2, but got %u.",
-+                params->args_count);
-+        return false;
-+    }
-+    if (params->args_count == 2)
-+    {
-+        hlsl_fixme(ctx, loc, "Tiled resource status argument.");
-+        return false;
-+    }
-+    if (params->args[0]->data_type->class != HLSL_CLASS_SCALAR)
-+    {
-+        hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Scalar address argument expected for '%s'.", name);
-+        return false;
-+    }
-+    if (!strcmp(name, "Load"))
-+        value_dim = 1;
-+    else if (!strcmp(name, "Load2"))
-+        value_dim = 2;
-+    else if (!strcmp(name, "Load3"))
-+        value_dim = 3;
-+    else
-+        value_dim = 4;
-+    if (!(load_params.coords = add_implicit_conversion(ctx, block, params->args[0],
-+            hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), loc)))
-+        return false;
-+    load_params.format = hlsl_get_vector_type(ctx, HLSL_TYPE_UINT, value_dim);
-+    load_params.resource = object;
-+    if (!(load = hlsl_new_resource_load(ctx, &load_params, loc)))
-+        return false;
-+    hlsl_block_add_instr(block, load);
-+    return true;
- static bool add_load_method_call(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *object,
-         const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc)
- {
-@@ -5224,6 +5661,9 @@ static bool add_load_method_call(struct hlsl_ctx *ctx, struct hlsl_block *block,
-     struct hlsl_ir_node *load;
-     bool multisampled;
-+    if (object_type->sampler_dim == HLSL_SAMPLER_DIM_RAW_BUFFER)
-+        return add_raw_load_method_call(ctx, block, object, name, params, loc);
-     if (object_type->sampler_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER)
-     {
-         hlsl_fixme(ctx, loc, "Method '%s' for structured buffers.", name);
-@@ -5813,32 +6253,88 @@ static bool add_sample_grad_method_call(struct hlsl_ctx *ctx, struct hlsl_block
-     return true;
- }
-+static bool add_store_method_call(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *object,
-+        const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc)
-+    struct hlsl_ir_node *offset, *rhs, *store;
-+    struct hlsl_deref resource_deref;
-+    unsigned int value_dim;
-+    if (params->args_count != 2)
-+    {
-+        hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT,
-+                "Wrong number of arguments to method '%s': expected 2.", name);
-+        return false;
-+    }
-+    if (!strcmp(name, "Store"))
-+        value_dim = 1;
-+    else if (!strcmp(name, "Store2"))
-+        value_dim = 2;
-+    else if (!strcmp(name, "Store3"))
-+        value_dim = 3;
-+    else
-+        value_dim = 4;
-+    if (!(offset = add_implicit_conversion(ctx, block, params->args[0],
-+            hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), loc)))
-+        return false;
-+    if (!(rhs = add_implicit_conversion(ctx, block, params->args[1],
-+            hlsl_get_vector_type(ctx, HLSL_TYPE_UINT, value_dim), loc)))
-+        return false;
-+    if (!hlsl_init_deref_from_index_chain(ctx, &resource_deref, object))
-+        return false;
-+    if (!(store = hlsl_new_resource_store(ctx, &resource_deref, offset, rhs, loc)))
-+    {
-+        hlsl_cleanup_deref(&resource_deref);
-+        return false;
-+    }
-+    hlsl_block_add_instr(block, store);
-+    hlsl_cleanup_deref(&resource_deref);
-+    return true;
- static const struct method_function
- {
-     const char *name;
-     bool (*handler)(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *object,
-             const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc);
--    bool valid_dims[HLSL_SAMPLER_DIM_MAX + 1];
-+    char valid_dims[HLSL_SAMPLER_DIM_MAX + 1];
- }
--object_methods[] =
-+texture_methods[] =
- {
--                                                        /*  g c   1d  2d  3d  cube  1darr  2darr  2dms  2dmsarr  cubearr  buff  sbuff*/
--    { "Gather",             add_gather_method_call,        {0,0,  0,  1,  0,  1,    0,     1,     0,    0,       1,       0,    0}},
--    { "GatherAlpha",        add_gather_method_call,        {0,0,  0,  1,  0,  1,    0,     1,     0,    0,       1,       0,    0}},
--    { "GatherBlue",         add_gather_method_call,        {0,0,  0,  1,  0,  1,    0,     1,     0,    0,       1,       0,    0}},
--    { "GatherGreen",        add_gather_method_call,        {0,0,  0,  1,  0,  1,    0,     1,     0,    0,       1,       0,    0}},
--    { "GatherRed",          add_gather_method_call,        {0,0,  0,  1,  0,  1,    0,     1,     0,    0,       1,       0,    0}},
-+    { "Gather",             add_gather_method_call,        "00010101001000" },
-+    { "GatherAlpha",        add_gather_method_call,        "00010101001000" },
-+    { "GatherBlue",         add_gather_method_call,        "00010101001000" },
-+    { "GatherGreen",        add_gather_method_call,        "00010101001000" },
-+    { "GatherRed",          add_gather_method_call,        "00010101001000" },
-+    { "GetDimensions",      add_getdimensions_method_call, "00111111111110" },
--    { "GetDimensions",      add_getdimensions_method_call, {0,0,  1,  1,  1,  1,    1,     1,     1,    1,       1,       1,    1}},
-+    { "Load",               add_load_method_call,          "00111011110111" },
-+    { "Load2",              add_raw_load_method_call,      "00000000000001" },
-+    { "Load3",              add_raw_load_method_call,      "00000000000001" },
-+    { "Load4",              add_raw_load_method_call,      "00000000000001" },
--    { "Load",               add_load_method_call,          {0,0,  1,  1,  1,  0,    1,     1,     1,    1,       0,       1,    1}},
-+    { "Sample",             add_sample_method_call,        "00111111001000" },
-+    { "SampleBias",         add_sample_lod_method_call,    "00111111001000" },
-+    { "SampleCmp",          add_sample_cmp_method_call,    "00111111001000" },
-+    { "SampleCmpLevelZero", add_sample_cmp_method_call,    "00111111001000" },
-+    { "SampleGrad",         add_sample_grad_method_call,   "00111111001000" },
-+    { "SampleLevel",        add_sample_lod_method_call,    "00111111001000" },
--    { "Sample",             add_sample_method_call,        {0,0,  1,  1,  1,  1,    1,     1,     0,    0,       1,       0,    0}},
--    { "SampleBias",         add_sample_lod_method_call,    {0,0,  1,  1,  1,  1,    1,     1,     0,    0,       1,       0,    0}},
--    { "SampleCmp",          add_sample_cmp_method_call,    {0,0,  1,  1,  1,  1,    1,     1,     0,    0,       1,       0,    0}},
--    { "SampleCmpLevelZero", add_sample_cmp_method_call,    {0,0,  1,  1,  1,  1,    1,     1,     0,    0,       1,       0,    0}},
--    { "SampleGrad",         add_sample_grad_method_call,   {0,0,  1,  1,  1,  1,    1,     1,     0,    0,       1,       0,    0}},
--    { "SampleLevel",        add_sample_lod_method_call,    {0,0,  1,  1,  1,  1,    1,     1,     0,    0,       1,       0,    0}},
-+static const struct method_function uav_methods[] =
-+    { "Store",  add_store_method_call, "00000000000001" },
-+    { "Store2", add_store_method_call, "00000000000001" },
-+    { "Store3", add_store_method_call, "00000000000001" },
-+    { "Store4", add_store_method_call, "00000000000001" },
- };
- static int object_method_function_name_compare(const void *a, const void *b)
-@@ -5852,9 +6348,35 @@ static bool add_method_call(struct hlsl_ctx *ctx, struct hlsl_block *block, stru
-         const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc)
- {
-     const struct hlsl_type *object_type = object->data_type;
--    const struct method_function *method;
-+    const struct method_function *method, *methods;
-+    unsigned int count;
-+    if (object_type->class == HLSL_CLASS_ERROR)
-+    {
-+        block->value = ctx->error_instr;
-+        return true;
-+    }
-+    for (unsigned int i = 0; i < params->args_count; ++i)
-+    {
-+        if (params->args[i]->data_type->class == HLSL_CLASS_ERROR)
-+        {
-+            block->value = ctx->error_instr;
-+            return true;
-+        }
-+    }
--    if (object_type->class != HLSL_CLASS_TEXTURE || object_type->sampler_dim == HLSL_SAMPLER_DIM_GENERIC)
-+    if (object_type->class == HLSL_CLASS_TEXTURE)
-+    {
-+        count = ARRAY_SIZE(texture_methods);
-+        methods = texture_methods;
-+    }
-+    else if (object_type->class == HLSL_CLASS_UAV)
-+    {
-+        count = ARRAY_SIZE(uav_methods);
-+        methods = uav_methods;
-+    }
-+    else
-     {
-         struct vkd3d_string_buffer *string;
-@@ -5865,10 +6387,10 @@ static bool add_method_call(struct hlsl_ctx *ctx, struct hlsl_block *block, stru
-         return false;
-     }
--    method = bsearch(name, object_methods, ARRAY_SIZE(object_methods), sizeof(*method),
-+    method = bsearch(name, methods, count, sizeof(*method),
-             object_method_function_name_compare);
--    if (method && method->valid_dims[object_type->sampler_dim])
-+    if (method && method->valid_dims[object_type->sampler_dim] == '1')
-     {
-         return method->handler(ctx, block, object, name, params, loc);
-     }
-@@ -5995,16 +6517,6 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim,
-     hlsl_release_string_buffer(ctx, string);
- }
--static bool state_block_add_entry(struct hlsl_state_block *state_block, struct hlsl_state_block_entry *entry)
--    if (!vkd3d_array_reserve((void **)&state_block->entries, &state_block->capacity, state_block->count + 1,
--            sizeof(*state_block->entries)))
--        return false;
--    state_block->entries[state_block->count++] = entry;
--    return true;
- }
- %locations
-@@ -6037,10 +6549,11 @@ static bool state_block_add_entry(struct hlsl_state_block *state_block, struct h
-     struct parse_if_body if_body;
-     enum parse_assign_op assign_op;
-     struct hlsl_reg_reservation reg_reservation;
--    struct parse_colon_attribute colon_attribute;
-+    struct parse_colon_attributes colon_attributes;
-     struct hlsl_semantic semantic;
-     enum hlsl_buffer_type buffer_type;
-     enum hlsl_sampler_dim sampler_dim;
-+    enum hlsl_so_object_type so_type;
-     struct hlsl_attribute *attr;
-     struct parse_attribute_list attr_list;
-     struct hlsl_ir_switch_case *switch_case;
-@@ -6052,14 +6565,17 @@ static bool state_block_add_entry(struct hlsl_state_block *state_block, struct h
- %token KW_BREAK
- %token KW_BUFFER
- %token KW_CASE
- %token KW_CBUFFER
- %token KW_CENTROID
- %token KW_COMPILE
- %token KW_CONST
- %token KW_CONTINUE
- %token KW_DEFAULT
-@@ -6067,7 +6583,6 @@ static bool state_block_add_entry(struct hlsl_state_block *state_block, struct h
- %token KW_DISCARD
- %token KW_DO
--%token KW_DOUBLE
- %token KW_ELSE
- %token KW_EXPORT
- %token KW_EXTERN
-@@ -6082,6 +6597,7 @@ static bool state_block_add_entry(struct hlsl_state_block *state_block, struct h
- %token KW_INLINE
- %token KW_INOUT
- %token KW_LINEAR
- %token KW_MATRIX
-@@ -6091,6 +6607,7 @@ static bool state_block_add_entry(struct hlsl_state_block *state_block, struct h
- %token KW_PASS
-@@ -6104,6 +6621,7 @@ static bool state_block_add_entry(struct hlsl_state_block *state_block, struct h
- %token KW_REGISTER
- %token KW_ROW_MAJOR
- %token KW_RWBUFFER
-@@ -6118,6 +6636,7 @@ static bool state_block_add_entry(struct hlsl_state_block *state_block, struct h
- %token KW_SHARED
-+%token KW_SNORM
- %token KW_STATIC
-@@ -6138,10 +6657,12 @@ static bool state_block_add_entry(struct hlsl_state_block *state_block, struct h
- %token KW_TEXTURE3D
- %token KW_TRUE
- %token KW_TYPEDEF
- %token KW_UNSIGNED
- %token KW_UNIFORM
-+%token KW_UNORM
- %token KW_VECTOR
- %token KW_VOID
-@@ -6230,7 +6751,7 @@ static bool state_block_add_entry(struct hlsl_state_block *state_block, struct h
- %type <buffer_type> buffer_type
--%type <colon_attribute> colon_attribute
-+%type <colon_attributes> colon_attributes
- %type <fields> field
- %type <fields> fields_list
-@@ -6267,12 +6788,15 @@ static bool state_block_add_entry(struct hlsl_state_block *state_block, struct h
- %type <semantic> semantic
-+%type <so_type> so_type
- %type <state_block> state_block
- %type <state_block_index> state_block_index_opt
- %type <switch_case> switch_case
-+%type <type> base_optional
- %type <type> field_type
- %type <type> named_struct_spec
- %type <type> unnamed_struct_spec
-@@ -6280,6 +6804,7 @@ static bool state_block_add_entry(struct hlsl_state_block *state_block, struct h
- %type <type> type
- %type <type> type_no_void
- %type <type> typedef_type
-+%type <type> resource_format
- %type <variable_def> state_block_list
- %type <variable_def> type_spec
-@@ -6416,7 +6941,7 @@ effect_group:
-         }
- buffer_declaration:
--      var_modifiers buffer_type any_identifier colon_attribute annotations_opt
-+      var_modifiers buffer_type any_identifier colon_attributes annotations_opt
-         {
-             if ($4.semantic.name)
-                 hlsl_error(ctx, &@4, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC, "Semantics are not allowed on buffers.");
-@@ -6487,11 +7012,28 @@ struct_spec:
-     | unnamed_struct_spec
- named_struct_spec:
--      KW_STRUCT any_identifier '{' fields_list '}'
-+      KW_STRUCT any_identifier base_optional '{' fields_list '}'
-         {
-             bool ret;
--            $$ = hlsl_new_struct_type(ctx, $2, $4.fields, $4.count);
-+            if ($3)
-+            {
-+                char *name;
-+                if (!(name = hlsl_strdup(ctx, "$super")))
-+                    YYABORT;
-+                if (!hlsl_array_reserve(ctx, (void **)&$5.fields, &$5.capacity, 1 + $5.count, sizeof(*$5.fields)))
-+                    YYABORT;
-+                memmove(&$5.fields[1], $5.fields, $5.count * sizeof(*$5.fields));
-+                ++$5.count;
-+                memset(&$5.fields[0], 0, sizeof($5.fields[0]));
-+                $5.fields[0].type = $3;
-+                $5.fields[0].loc = @3;
-+                $5.fields[0].name = name;
-+            }
-+            $$ = hlsl_new_struct_type(ctx, $2, $5.fields, $5.count);
-             if (hlsl_get_var(ctx->cur_scope, $2))
-             {
-@@ -6518,6 +7060,23 @@ any_identifier:
-+/* TODO: Multiple inheritance support for interfaces. */
-+      %empty
-+        {
-+            $$ = NULL;
-+        }
-+        {
-+            $$ = hlsl_get_type(ctx->cur_scope, $2, true, true);
-+            if ($$->class != HLSL_CLASS_STRUCT)
-+            {
-+                hlsl_error(ctx, &@2, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Base type \"%s\" is not a struct.", $2);
-+                YYABORT;
-+            }
-+            vkd3d_free($2);
-+        }
- fields_list:
-       %empty
-         {
-@@ -6707,7 +7266,7 @@ func_declaration:
- func_prototype_no_attrs:
-     /* var_modifiers is necessary to avoid shift/reduce conflicts. */
--      var_modifiers type var_identifier '(' parameters ')' colon_attribute
-+      var_modifiers type var_identifier '(' parameters ')' colon_attributes
-         {
-             uint32_t modifiers = $1;
-             struct hlsl_ir_var *var;
-@@ -6827,6 +7386,8 @@ func_prototype:
-       func_prototype_no_attrs
-     | attribute_list func_prototype_no_attrs
-         {
-+            check_attribute_list_for_duplicates(ctx, &$1);
-             if ($2.first)
-             {
-                 $2.decl->attr_count = $1.count;
-@@ -6882,28 +7443,39 @@ var_identifier:
-       %empty
-         {
-             $$.semantic = (struct hlsl_semantic){0};
-             $$.reg_reservation.reg_type = 0;
-             $$.reg_reservation.offset_type = 0;
-         }
--    | semantic
-+    | colon_attributes semantic
-         {
--            $$.semantic = $1;
--            $$.reg_reservation.reg_type = 0;
--            $$.reg_reservation.offset_type = 0;
-+            hlsl_cleanup_semantic(&$$.semantic);
-+            $$.semantic = $2;
-         }
--    | register_reservation
-+    | colon_attributes register_reservation
-         {
--            $$.semantic = (struct hlsl_semantic){0};
--            $$.reg_reservation = $1;
-+            if ($$.reg_reservation.reg_type)
-+                hlsl_fixme(ctx, &@2, "Multiple register() reservations.");
-+            $$.reg_reservation.reg_type = $2.reg_type;
-+            $$.reg_reservation.reg_index = $2.reg_index;
-+            $$.reg_reservation.reg_space = $2.reg_space;
-         }
--    | packoffset_reservation
-+    | colon_attributes packoffset_reservation
-         {
--            $$.semantic = (struct hlsl_semantic){0};
--            $$.reg_reservation = $1;
-+            if (ctx->cur_buffer == ctx->globals_buffer)
-+            {
-+                hlsl_error(ctx, &@2, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION,
-+                        "The packoffset() reservation is only allowed within 'cbuffer' blocks.");
-+            }
-+            else
-+            {
-+                $$.reg_reservation.offset_type = $2.offset_type;
-+                $$.reg_reservation.offset_index = $2.offset_index;
-+            }
-         }
- semantic:
-@@ -7099,7 +7671,7 @@ parameter:
-         }
- parameter_decl:
--      var_modifiers type_no_void any_identifier arrays colon_attribute
-+      var_modifiers type_no_void any_identifier arrays colon_attributes
-         {
-             uint32_t modifiers = $1;
-             struct hlsl_type *type;
-@@ -7239,6 +7811,29 @@ rov_type:
-             $$ = HLSL_SAMPLER_DIM_3D;
-         }
-+        {
-+        }
-+        {
-+        }
-+        {
-+        }
-+      var_modifiers type
-+        {
-+            uint32_t modifiers = $1;
-+            if (!($$ = apply_type_modifiers(ctx, $2, &modifiers, false, &@1)))
-+                YYABORT;
-+        }
- type_no_void:
-       KW_VECTOR '<' type ',' C_INTEGER '>'
-         {
-@@ -7332,18 +7927,18 @@ type_no_void:
-         {
-             $$ = hlsl_new_texture_type(ctx, $1, hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, 4), 0);
-         }
--    | texture_type '<' type '>'
-+    | texture_type '<' resource_format '>'
-         {
-             validate_texture_format_type(ctx, $3, &@3);
-             $$ = hlsl_new_texture_type(ctx, $1, $3, 0);
-         }
--    | texture_ms_type '<' type '>'
-+    | texture_ms_type '<' resource_format '>'
-         {
-             validate_texture_format_type(ctx, $3, &@3);
-             $$ = hlsl_new_texture_type(ctx, $1, $3, 0);
-         }
--    | texture_ms_type '<' type ',' shift_expr '>'
-+    | texture_ms_type '<' resource_format ',' shift_expr '>'
-         {
-             unsigned int sample_count;
-             struct hlsl_block block;
-@@ -7359,16 +7954,28 @@ type_no_void:
-             $$ = hlsl_new_texture_type(ctx, $1, $3, sample_count);
-         }
--    | uav_type '<' type '>'
-+        {
-+            $$ = hlsl_new_texture_type(ctx, HLSL_SAMPLER_DIM_RAW_BUFFER, hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), 0);
-+        }
-+    | uav_type '<' resource_format '>'
-         {
-             validate_uav_type(ctx, $1, $3, &@3);
-             $$ = hlsl_new_uav_type(ctx, $1, $3, false);
-         }
--    | rov_type '<' type '>'
-+    | rov_type '<' resource_format '>'
-         {
--            validate_uav_type(ctx, $1, $3, &@3);
-+            validate_uav_type(ctx, $1, $3, &@4);
-             $$ = hlsl_new_uav_type(ctx, $1, $3, true);
-         }
-+    | so_type '<' type '>'
-+        {
-+            $$ = hlsl_new_stream_output_type(ctx, $1, $3);
-+        }
-+        {
-+            $$ = hlsl_new_uav_type(ctx, HLSL_SAMPLER_DIM_RAW_BUFFER, hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), false);
-+        }
-     | KW_STRING
-         {
-             $$ = ctx->builtin_types.string;
-@@ -7587,7 +8194,7 @@ variables_def_typed:
-         }
- variable_decl:
--      any_identifier arrays colon_attribute annotations_opt
-+      any_identifier arrays colon_attributes annotations_opt
-         {
-             $$ = hlsl_alloc(ctx, sizeof(*$$));
-             $$->loc = @1;
-@@ -7614,11 +8221,21 @@ stateblock_lhs_identifier:
-             if (!($$ = hlsl_strdup(ctx, "pixelshader")))
-                 YYABORT;
-         }
-+    | KW_TEXTURE
-+        {
-+            if (!($$ = hlsl_strdup(ctx, "texture")))
-+                YYABORT;
-+        }
-         {
-             if (!($$ = hlsl_strdup(ctx, "vertexshader")))
-                 YYABORT;
-         }
-+        {
-+            if (!($$ = hlsl_strdup(ctx, "geometryshader")))
-+                YYABORT;
-+        }
- state_block_index_opt:
-       %empty
-@@ -7666,7 +8283,7 @@ state_block:
-             vkd3d_free($5.args);
-             $$ = $1;
--            state_block_add_entry($$, entry);
-+            hlsl_state_block_add_entry($$, entry);
-         }
-     | state_block any_identifier '(' func_arguments ')' ';'
-         {
-@@ -7694,7 +8311,7 @@ state_block:
-             hlsl_validate_state_block_entry(ctx, entry, &@4);
-             $$ = $1;
--            state_block_add_entry($$, entry);
-+            hlsl_state_block_add_entry($$, entry);
-         }
- state_block_list:
-@@ -7906,6 +8523,14 @@ var_modifiers:
-         {
-             $$ = add_modifiers(ctx, $2, HLSL_MODIFIER_EXPORT, &@1);
-         }
-+    | KW_UNORM var_modifiers
-+        {
-+            $$ = add_modifiers(ctx, $2, HLSL_MODIFIER_UNORM, &@1);
-+        }
-+    | KW_SNORM var_modifiers
-+        {
-+            $$ = add_modifiers(ctx, $2, HLSL_MODIFIER_SNORM, &@1);
-+        }
-     | var_identifier var_modifiers
-         {
-             $$ = $2;
-@@ -7931,6 +8556,7 @@ complex_initializer:
-             $$.args[0] = node_from_block($1);
-             $$.instrs = $1;
-             $$.braces = false;
-+            $$.loc = @$;
-         }
-     | '{' complex_initializer_list '}'
-         {
-@@ -7962,6 +8588,7 @@ complex_initializer_list:
-                 $$.args[$$.args_count++] = $3.args[i];
-             hlsl_block_add_block($$.instrs, $3.instrs);
-             free_parse_initializer(&$3);
-+            $$.loc = @$;
-         }
- initializer_expr:
-@@ -7979,6 +8606,7 @@ initializer_expr_list:
-             $$.args[0] = node_from_block($1);
-             $$.instrs = $1;
-             $$.braces = false;
-+            $$.loc = @$;
-         }
-     | initializer_expr_list ',' initializer_expr
-         {
-@@ -8092,8 +8720,7 @@ selection_statement:
-             struct hlsl_ir_node *instr;
-             unsigned int i;
--            if (attribute_list_has_duplicates(attributes))
--                hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, "Found duplicate attribute.");
-+            check_attribute_list_for_duplicates(ctx, attributes);
-             for (i = 0; i < attributes->count; ++i)
-             {
-@@ -8298,6 +8925,7 @@ func_arguments:
-             if (!($$.instrs = make_empty_block(ctx)))
-                 YYABORT;
-             $$.braces = false;
-+            $$.loc = @$;
-         }
-     | initializer_expr_list
-@@ -8391,6 +9019,34 @@ primary_expr:
-         {
-             $$ = $2;
-         }
-+    | KW_COMPILE any_identifier var_identifier '(' func_arguments ')'
-+        {
-+            if (!($$ = add_shader_compilation(ctx, $2, $3, &$5, &@1)))
-+            {
-+                vkd3d_free($2);
-+                vkd3d_free($3);
-+                YYABORT;
-+            }
-+            vkd3d_free($2);
-+            vkd3d_free($3);
-+        }
-+    | KW_COMPILESHADER '(' any_identifier ',' var_identifier '(' func_arguments ')' ')'
-+        {
-+            if (!($$ = add_shader_compilation(ctx, $3, $5, &$7, &@1)))
-+            {
-+                vkd3d_free($3);
-+                vkd3d_free($5);
-+                YYABORT;
-+            }
-+            vkd3d_free($3);
-+            vkd3d_free($5);
-+        }
-+    | KW_CONSTRUCTGSWITHSO '(' func_arguments ')'
-+        {
-+            if (!($$ = add_compile_variant(ctx, HLSL_COMPILE_TYPE_CONSTRUCTGSWITHSO, &$3, &@1)))
-+                YYABORT;
-+        }
-     | var_identifier '(' func_arguments ')'
-         {
-             if (!($$ = add_call(ctx, $1, &$3, &@1)))
-@@ -8400,6 +9056,25 @@ primary_expr:
-             }
-             vkd3d_free($1);
-         }
-+    | KW_SAMPLER_STATE '{' state_block_start state_block '}'
-+        {
-+            struct hlsl_ir_node *sampler_state;
-+            ctx->in_state_block = 0;
-+            if (!ctx->in_state_block && ctx->cur_scope != ctx->globals)
-+                hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_MISPLACED_SAMPLER_STATE,
-+                        "sampler_state must be in global scope or a state block.");
-+            if (!(sampler_state = hlsl_new_sampler_state(ctx, $4, &@1)))
-+            {
-+                hlsl_free_state_block($4);
-+                YYABORT;
-+            }
-+            hlsl_free_state_block($4);
-+            if (!($$ = make_block(ctx, sampler_state)))
-+                YYABORT;
-+        }
-         {
-             if (ctx->in_state_block)
-@@ -8416,7 +9091,11 @@ primary_expr:
-             else
-             {
-                 hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_NOT_DEFINED, "Identifier \"%s\" is not declared.", $1);
--                YYABORT;
-+                vkd3d_free($1);
-+                if (!($$ = make_empty_block(ctx)))
-+                    YYABORT;
-+                $$->value = ctx->error_instr;
-             }
-         }
-@@ -8446,46 +9125,34 @@ postfix_expr:
-             if (node->data_type->class == HLSL_CLASS_STRUCT)
-             {
--                struct hlsl_type *type = node->data_type;
--                const struct hlsl_struct_field *field;
--                unsigned int field_idx = 0;
--                if (!(field = get_struct_field(type->e.record.fields, type->e.record.field_count, $3)))
-+                if (!add_record_access_recurse(ctx, $1, $3, &@2))
-                 {
--                    hlsl_error(ctx, &@3, VKD3D_SHADER_ERROR_HLSL_NOT_DEFINED, "Field \"%s\" is not defined.", $3);
-+                    destroy_block($1);
-                     vkd3d_free($3);
-                     YYABORT;
-                 }
--                field_idx = field - type->e.record.fields;
--                if (!add_record_access(ctx, $1, node, field_idx, &@2))
--                {
--                    vkd3d_free($3);
--                    YYABORT;
--                }
--                vkd3d_free($3);
--                $$ = $1;
-             }
-             else if (hlsl_is_numeric_type(node->data_type))
-             {
-                 struct hlsl_ir_node *swizzle;
--                if (!(swizzle = get_swizzle(ctx, node, $3, &@3)))
-+                if ((swizzle = get_swizzle(ctx, node, $3, &@3)))
-+                {
-+                    hlsl_block_add_instr($1, swizzle);
-+                }
-+                else
-                 {
-                     hlsl_error(ctx, &@3, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, "Invalid swizzle \"%s\".", $3);
--                    vkd3d_free($3);
--                    YYABORT;
-+                    $1->value = ctx->error_instr;
-                 }
--                hlsl_block_add_instr($1, swizzle);
--                vkd3d_free($3);
--                $$ = $1;
-             }
--            else
-+            else if (node->data_type->class != HLSL_CLASS_ERROR)
-             {
-                 hlsl_error(ctx, &@3, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, "Invalid subscript \"%s\".", $3);
--                vkd3d_free($3);
--                YYABORT;
-+                $1->value = ctx->error_instr;
-             }
-+            vkd3d_free($3);
-+            $$ = $1;
-         }
-     | postfix_expr '[' expr ']'
-         {
-@@ -8523,14 +9190,6 @@ postfix_expr:
-                 free_parse_initializer(&$4);
-                 YYABORT;
-             }
--            if ($2->dimx * $2->dimy != initializer_size(&$4))
--            {
--                hlsl_error(ctx, &@4, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT,
--                        "Expected %u components in constructor, but got %u.",
--                        $2->dimx * $2->dimy, initializer_size(&$4));
--                free_parse_initializer(&$4);
--                YYABORT;
--            }
-             if (!($$ = add_constructor(ctx, $2, &$4, &@2)))
-             {
-@@ -8597,10 +9256,6 @@ unary_expr:
-     /* var_modifiers is necessary to avoid shift/reduce conflicts. */
-     | '(' var_modifiers type arrays ')' unary_expr
-         {
--            struct hlsl_type *src_type = node_from_block($6)->data_type;
--            struct hlsl_type *dst_type;
--            unsigned int i;
-             if ($2)
-             {
-                 hlsl_error(ctx, &@2, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER,
-@@ -8608,36 +9263,13 @@ unary_expr:
-                 YYABORT;
-             }
--            dst_type = $3;
--            for (i = 0; i < $4.count; ++i)
--            {
--                if ($4.sizes[i] == HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT)
--                {
--                    hlsl_error(ctx, &@3, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE,
--                            "Implicit size arrays not allowed in casts.");
--                }
--                dst_type = hlsl_new_array_type(ctx, dst_type, $4.sizes[i]);
--            }
--            if (!explicit_compatible_data_types(ctx, src_type, dst_type))
--            {
--                struct vkd3d_string_buffer *src_string, *dst_string;
--                src_string = hlsl_type_to_string(ctx, src_type);
--                dst_string = hlsl_type_to_string(ctx, dst_type);
--                if (src_string && dst_string)
--                    hlsl_error(ctx, &@3, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Can't cast from %s to %s.",
--                            src_string->buffer, dst_string->buffer);
--                hlsl_release_string_buffer(ctx, src_string);
--                hlsl_release_string_buffer(ctx, dst_string);
--                YYABORT;
--            }
--            if (!add_cast(ctx, $6, node_from_block($6), dst_type, &@3))
-+            if (!add_explicit_conversion(ctx, $6, $3, &$4, &@3))
-             {
-                 destroy_block($6);
-+                vkd3d_free($4.sizes);
-                 YYABORT;
-             }
-+            vkd3d_free($4.sizes);
-             $$ = $6;
-         }
-diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c
-index 92b5c71c43f..1fbf670f032 100644
---- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c
-+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c
-@@ -19,6 +19,7 @@
-  */
- #include "hlsl.h"
-+#include "vkd3d_shader_private.h"
- #include <stdio.h>
- #include <math.h>
-@@ -276,9 +277,9 @@ static bool types_are_semantic_equivalent(struct hlsl_ctx *ctx, const struct hls
-             == base_type_get_semantic_equivalent(type2->e.numeric.type);
- }
--static struct hlsl_ir_var *add_semantic_var(struct hlsl_ctx *ctx, struct hlsl_ir_var *var,
--        struct hlsl_type *type, uint32_t modifiers, struct hlsl_semantic *semantic,
--        uint32_t index, bool output, const struct vkd3d_shader_location *loc)
-+static struct hlsl_ir_var *add_semantic_var(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func,
-+        struct hlsl_ir_var *var, struct hlsl_type *type, uint32_t modifiers, struct hlsl_semantic *semantic,
-+        uint32_t index, bool output, bool force_align, const struct vkd3d_shader_location *loc)
- {
-     struct hlsl_semantic new_semantic;
-     struct hlsl_ir_var *ext_var;
-@@ -287,7 +288,7 @@ static struct hlsl_ir_var *add_semantic_var(struct hlsl_ctx *ctx, struct hlsl_ir
-     if (!(new_name = hlsl_sprintf_alloc(ctx, "<%s-%s%u>", output ? "output" : "input", semantic->name, index)))
-         return NULL;
--    LIST_FOR_EACH_ENTRY(ext_var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry)
-+    LIST_FOR_EACH_ENTRY(ext_var, &func->extern_vars, struct hlsl_ir_var, extern_entry)
-     {
-         if (!ascii_strcasecmp(ext_var->name, new_name))
-         {
-@@ -338,14 +339,32 @@ static struct hlsl_ir_var *add_semantic_var(struct hlsl_ctx *ctx, struct hlsl_ir
-     else
-         ext_var->is_input_semantic = 1;
-     ext_var->is_param = var->is_param;
-+    ext_var->force_align = force_align;
-     list_add_before(&var->scope_entry, &ext_var->scope_entry);
--    list_add_tail(&ctx->extern_vars, &ext_var->extern_entry);
-+    list_add_tail(&func->extern_vars, &ext_var->extern_entry);
-     return ext_var;
- }
--static void prepend_input_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_load *lhs,
--        uint32_t modifiers, struct hlsl_semantic *semantic, uint32_t semantic_index)
-+static uint32_t combine_field_storage_modifiers(uint32_t modifiers, uint32_t field_modifiers)
-+    field_modifiers |= modifiers;
-+    /* TODO: 'sample' modifier is not supported yet. */
-+    /* 'nointerpolation' always takes precedence, next the same is done for
-+     * 'sample', remaining modifiers are combined. */
-+    if (field_modifiers & HLSL_STORAGE_NOINTERPOLATION)
-+    {
-+        field_modifiers &= ~HLSL_INTERPOLATION_MODIFIERS_MASK;
-+        field_modifiers |= HLSL_STORAGE_NOINTERPOLATION;
-+    }
-+    return field_modifiers;
-+static void prepend_input_copy(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func, struct hlsl_ir_load *lhs,
-+        uint32_t modifiers, struct hlsl_semantic *semantic, uint32_t semantic_index, bool force_align)
- {
-     struct hlsl_type *type = lhs->node.data_type, *vector_type_src, *vector_type_dst;
-     struct vkd3d_shader_location *loc = &lhs->node.loc;
-@@ -369,14 +388,17 @@ static void prepend_input_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, s
-     if (ctx->profile->major_version < 4 && ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX)
-         vector_type_src = hlsl_get_vector_type(ctx, type->e.numeric.type, 4);
-+    if (hlsl_type_major_size(type) > 1)
-+        force_align = true;
-     for (i = 0; i < hlsl_type_major_size(type); ++i)
-     {
-         struct hlsl_ir_node *store, *cast;
-         struct hlsl_ir_var *input;
-         struct hlsl_ir_load *load;
--        if (!(input = add_semantic_var(ctx, var, vector_type_src, modifiers, semantic,
--                semantic_index + i, false, loc)))
-+        if (!(input = add_semantic_var(ctx, func, var, vector_type_src,
-+                modifiers, semantic, semantic_index + i, false, force_align, loc)))
-             return;
-         if (!(load = hlsl_new_var_load(ctx, input, &var->loc)))
-@@ -408,8 +430,9 @@ static void prepend_input_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, s
-     }
- }
--static void prepend_input_copy_recurse(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_load *lhs,
--        uint32_t modifiers, struct hlsl_semantic *semantic, uint32_t semantic_index)
-+static void prepend_input_copy_recurse(struct hlsl_ctx *ctx,
-+        struct hlsl_ir_function_decl *func, struct hlsl_ir_load *lhs, uint32_t modifiers,
-+        struct hlsl_semantic *semantic, uint32_t semantic_index, bool force_align)
- {
-     struct vkd3d_shader_location *loc = &lhs->node.loc;
-     struct hlsl_type *type = lhs->node.data_type;
-@@ -425,12 +448,14 @@ static void prepend_input_copy_recurse(struct hlsl_ctx *ctx, struct hlsl_block *
-         for (i = 0; i < hlsl_type_element_count(type); ++i)
-         {
--            uint32_t element_modifiers = modifiers;
-+            uint32_t element_modifiers;
-             if (type->class == HLSL_CLASS_ARRAY)
-             {
-                 elem_semantic_index = semantic_index
-                         + i * hlsl_type_get_array_element_reg_size(type->e.array.type, HLSL_REGSET_NUMERIC) / 4;
-+                element_modifiers = modifiers;
-+                force_align = true;
-             }
-             else
-             {
-@@ -444,17 +469,8 @@ static void prepend_input_copy_recurse(struct hlsl_ctx *ctx, struct hlsl_block *
-                 semantic = &field->semantic;
-                 elem_semantic_index = semantic->index;
-                 loc = &field->loc;
--                element_modifiers |= field->storage_modifiers;
--                /* TODO: 'sample' modifier is not supported yet */
--                /* 'nointerpolation' always takes precedence, next the same is done for 'sample',
--                   remaining modifiers are combined. */
--                if (element_modifiers & HLSL_STORAGE_NOINTERPOLATION)
--                {
--                    element_modifiers &= ~HLSL_INTERPOLATION_MODIFIERS_MASK;
--                    element_modifiers |= HLSL_STORAGE_NOINTERPOLATION;
--                }
-+                element_modifiers = combine_field_storage_modifiers(modifiers, field->storage_modifiers);
-+                force_align = (i == 0);
-             }
-             if (!(c = hlsl_new_uint_constant(ctx, i, &var->loc)))
-@@ -466,31 +482,33 @@ static void prepend_input_copy_recurse(struct hlsl_ctx *ctx, struct hlsl_block *
-                 return;
-             list_add_after(&c->entry, &element_load->node.entry);
--            prepend_input_copy_recurse(ctx, block, element_load, element_modifiers, semantic, elem_semantic_index);
-+            prepend_input_copy_recurse(ctx, func, element_load, element_modifiers,
-+                    semantic, elem_semantic_index, force_align);
-         }
-     }
-     else
-     {
--        prepend_input_copy(ctx, block, lhs, modifiers, semantic, semantic_index);
-+        prepend_input_copy(ctx, func, lhs, modifiers, semantic, semantic_index, force_align);
-     }
- }
- /* Split inputs into two variables representing the semantic and temp registers,
-  * and copy the former to the latter, so that writes to input variables work. */
--static void prepend_input_var_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_var *var)
-+static void prepend_input_var_copy(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func, struct hlsl_ir_var *var)
- {
-     struct hlsl_ir_load *load;
-     /* This redundant load is expected to be deleted later by DCE. */
-     if (!(load = hlsl_new_var_load(ctx, var, &var->loc)))
-         return;
--    list_add_head(&block->instrs, &load->node.entry);
-+    list_add_head(&func->body.instrs, &load->node.entry);
--    prepend_input_copy_recurse(ctx, block, load, var->storage_modifiers, &var->semantic, var->semantic.index);
-+    prepend_input_copy_recurse(ctx, func, load, var->storage_modifiers, &var->semantic, var->semantic.index, false);
- }
--static void append_output_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_load *rhs,
--        uint32_t modifiers, struct hlsl_semantic *semantic, uint32_t semantic_index)
-+static void append_output_copy(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func,
-+        struct hlsl_ir_load *rhs, uint32_t modifiers,
-+        struct hlsl_semantic *semantic, uint32_t semantic_index, bool force_align)
- {
-     struct hlsl_type *type = rhs->node.data_type, *vector_type;
-     struct vkd3d_shader_location *loc = &rhs->node.loc;
-@@ -511,24 +529,28 @@ static void append_output_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, s
-     vector_type = hlsl_get_vector_type(ctx, type->e.numeric.type, hlsl_type_minor_size(type));
-+    if (hlsl_type_major_size(type) > 1)
-+        force_align = true;
-     for (i = 0; i < hlsl_type_major_size(type); ++i)
-     {
-         struct hlsl_ir_node *store;
-         struct hlsl_ir_var *output;
-         struct hlsl_ir_load *load;
--        if (!(output = add_semantic_var(ctx, var, vector_type, modifiers, semantic, semantic_index + i, true, loc)))
-+        if (!(output = add_semantic_var(ctx, func, var, vector_type,
-+                modifiers, semantic, semantic_index + i, true, force_align, loc)))
-             return;
-         if (type->class == HLSL_CLASS_MATRIX)
-         {
-             if (!(c = hlsl_new_uint_constant(ctx, i, &var->loc)))
-                 return;
--            hlsl_block_add_instr(block, c);
-+            hlsl_block_add_instr(&func->body, c);
-             if (!(load = hlsl_new_load_index(ctx, &rhs->src, c, &var->loc)))
-                 return;
--            hlsl_block_add_instr(block, &load->node);
-+            hlsl_block_add_instr(&func->body, &load->node);
-         }
-         else
-         {
-@@ -536,17 +558,18 @@ static void append_output_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, s
-             if (!(load = hlsl_new_load_index(ctx, &rhs->src, NULL, &var->loc)))
-                 return;
--            hlsl_block_add_instr(block, &load->node);
-+            hlsl_block_add_instr(&func->body, &load->node);
-         }
-         if (!(store = hlsl_new_simple_store(ctx, output, &load->node)))
-             return;
--        hlsl_block_add_instr(block, store);
-+        hlsl_block_add_instr(&func->body, store);
-     }
- }
--static void append_output_copy_recurse(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_load *rhs,
--        uint32_t modifiers, struct hlsl_semantic *semantic, uint32_t semantic_index)
-+static void append_output_copy_recurse(struct hlsl_ctx *ctx,
-+        struct hlsl_ir_function_decl *func, struct hlsl_ir_load *rhs, uint32_t modifiers,
-+        struct hlsl_semantic *semantic, uint32_t semantic_index, bool force_align)
- {
-     struct vkd3d_shader_location *loc = &rhs->node.loc;
-     struct hlsl_type *type = rhs->node.data_type;
-@@ -562,10 +585,14 @@ static void append_output_copy_recurse(struct hlsl_ctx *ctx, struct hlsl_block *
-         for (i = 0; i < hlsl_type_element_count(type); ++i)
-         {
-+            uint32_t element_modifiers;
-             if (type->class == HLSL_CLASS_ARRAY)
-             {
-                 elem_semantic_index = semantic_index
-                         + i * hlsl_type_get_array_element_reg_size(type->e.array.type, HLSL_REGSET_NUMERIC) / 4;
-+                element_modifiers = modifiers;
-+                force_align = true;
-             }
-             else
-             {
-@@ -576,38 +603,41 @@ static void append_output_copy_recurse(struct hlsl_ctx *ctx, struct hlsl_block *
-                 semantic = &field->semantic;
-                 elem_semantic_index = semantic->index;
-                 loc = &field->loc;
-+                element_modifiers = combine_field_storage_modifiers(modifiers, field->storage_modifiers);
-+                force_align = (i == 0);
-             }
-             if (!(c = hlsl_new_uint_constant(ctx, i, &var->loc)))
-                 return;
--            hlsl_block_add_instr(block, c);
-+            hlsl_block_add_instr(&func->body, c);
-             if (!(element_load = hlsl_new_load_index(ctx, &rhs->src, c, loc)))
-                 return;
--            hlsl_block_add_instr(block, &element_load->node);
-+            hlsl_block_add_instr(&func->body, &element_load->node);
--            append_output_copy_recurse(ctx, block, element_load, modifiers, semantic, elem_semantic_index);
-+            append_output_copy_recurse(ctx, func, element_load, element_modifiers,
-+                    semantic, elem_semantic_index, force_align);
-         }
-     }
-     else
-     {
--        append_output_copy(ctx, block, rhs, modifiers, semantic, semantic_index);
-+        append_output_copy(ctx, func, rhs, modifiers, semantic, semantic_index, force_align);
-     }
- }
- /* Split outputs into two variables representing the temp and semantic
-  * registers, and copy the former to the latter, so that reads from output
-  * variables work. */
--static void append_output_var_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_var *var)
-+static void append_output_var_copy(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func, struct hlsl_ir_var *var)
- {
-     struct hlsl_ir_load *load;
-     /* This redundant load is expected to be deleted later by DCE. */
-     if (!(load = hlsl_new_var_load(ctx, var, &var->loc)))
-         return;
--    hlsl_block_add_instr(block, &load->node);
-+    hlsl_block_add_instr(&func->body, &load->node);
--    append_output_copy_recurse(ctx, block, load, var->storage_modifiers, &var->semantic, var->semantic.index);
-+    append_output_copy_recurse(ctx, func, load, var->storage_modifiers, &var->semantic, var->semantic.index, false);
- }
- bool hlsl_transform_ir(struct hlsl_ctx *ctx, bool (*func)(struct hlsl_ctx *ctx, struct hlsl_ir_node *, void *),
-@@ -1649,17 +1679,23 @@ static bool copy_propagation_transform_load(struct hlsl_ctx *ctx,
-         case HLSL_CLASS_BLEND_STATE:
-         case HLSL_CLASS_NULL:
-             break;
-         case HLSL_CLASS_MATRIX:
-         case HLSL_CLASS_ARRAY:
-         case HLSL_CLASS_STRUCT:
--            /* FIXME: Actually we shouldn't even get here, but we don't split
--             * matrices yet. */
-+            /* We can't handle complex types here.
-+             * They should have been already split anyway by earlier passes,
-+             * but they may not have been deleted yet. We can't rely on DCE to
-+             * solve that problem for us, since we may be called on a partial
-+             * block, but DCE deletes dead stores, so it needs to be able to
-+             * see the whole program. */
-+        case HLSL_CLASS_ERROR:
-             return false;
-         case HLSL_CLASS_PASS:
-         case HLSL_CLASS_TECHNIQUE:
-@@ -4045,17 +4081,57 @@ static bool lower_discard_neg(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr,
-     return true;
- }
-+static bool lower_discard_nz(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context)
-+    struct hlsl_ir_node *cond, *cond_cast, *abs, *neg;
-+    struct hlsl_type *float_type;
-+    struct hlsl_ir_jump *jump;
-+    struct hlsl_block block;
-+    if (instr->type != HLSL_IR_JUMP)
-+        return false;
-+    jump = hlsl_ir_jump(instr);
-+    if (jump->type != HLSL_IR_JUMP_DISCARD_NZ)
-+        return false;
-+    cond = jump->condition.node;
-+    float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, cond->data_type->dimx);
-+    hlsl_block_init(&block);
-+    if (!(cond_cast = hlsl_new_cast(ctx, cond, float_type, &instr->loc)))
-+        return false;
-+    hlsl_block_add_instr(&block, cond_cast);
-+    if (!(abs = hlsl_new_unary_expr(ctx, HLSL_OP1_ABS, cond_cast, &instr->loc)))
-+        return false;
-+    hlsl_block_add_instr(&block, abs);
-+    if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, abs, &instr->loc)))
-+        return false;
-+    hlsl_block_add_instr(&block, neg);
-+    list_move_tail(&instr->entry, &block.instrs);
-+    hlsl_src_remove(&jump->condition);
-+    hlsl_src_from_node(&jump->condition, neg);
-+    jump->type = HLSL_IR_JUMP_DISCARD_NEG;
-+    return true;
- static bool dce(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context)
- {
-     switch (instr->type)
-     {
-         case HLSL_IR_CONSTANT:
-+        case HLSL_IR_COMPILE:
-         case HLSL_IR_EXPR:
-         case HLSL_IR_INDEX:
-         case HLSL_IR_LOAD:
-         case HLSL_IR_RESOURCE_LOAD:
-         case HLSL_IR_SWIZZLE:
-+        case HLSL_IR_SAMPLER_STATE:
-             if (list_empty(&instr->uses))
-             {
-                 list_remove(&instr->entry);
-@@ -4088,6 +4164,9 @@ static bool dce(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context)
-             /* Stateblock constants should not appear in the shader program. */
-             vkd3d_unreachable();
-+            /* HLSL IR nodes are not translated to hlsl_ir_vsir_instruction_ref at this point. */
-+            vkd3d_unreachable();
-     }
-     return false;
-@@ -4106,7 +4185,7 @@ static void dump_function(struct rb_entry *entry, void *context)
-     }
- }
--static bool mark_indexable_vars(struct hlsl_ctx *ctx, struct hlsl_deref *deref,
-+static bool mark_indexable_var(struct hlsl_ctx *ctx, struct hlsl_deref *deref,
-         struct hlsl_ir_node *instr)
- {
-     if (!deref->rel_offset.node)
-@@ -4119,6 +4198,20 @@ static bool mark_indexable_vars(struct hlsl_ctx *ctx, struct hlsl_deref *deref,
-     return true;
- }
-+static void mark_indexable_vars(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func)
-+    struct hlsl_scope *scope;
-+    struct hlsl_ir_var *var;
-+    LIST_FOR_EACH_ENTRY(scope, &ctx->scopes, struct hlsl_scope, entry)
-+    {
-+        LIST_FOR_EACH_ENTRY(var, &scope->vars, struct hlsl_ir_var, scope_entry)
-+            var->indexable = false;
-+    }
-+    transform_derefs(ctx, mark_indexable_var, &entry_func->body);
- static char get_regset_name(enum hlsl_regset regset)
- {
-     switch (regset)
-@@ -4135,11 +4228,11 @@ static char get_regset_name(enum hlsl_regset regset)
-     vkd3d_unreachable();
- }
--static void allocate_register_reservations(struct hlsl_ctx *ctx)
-+static void allocate_register_reservations(struct hlsl_ctx *ctx, struct list *extern_vars)
- {
-     struct hlsl_ir_var *var;
--    LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry)
-+    LIST_FOR_EACH_ENTRY(var, extern_vars, struct hlsl_ir_var, extern_entry)
-     {
-         const struct hlsl_reg_reservation *reservation = &var->reg_reservation;
-         unsigned int r;
-@@ -4213,6 +4306,9 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop
-             /* Stateblock constants should not appear in the shader program. */
-             vkd3d_unreachable();
-+            /* HLSL IR nodes are not translated to hlsl_ir_vsir_instruction_ref at this point. */
-+            vkd3d_unreachable();
-         case HLSL_IR_STORE:
-         {
-@@ -4337,10 +4433,22 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop
-         case HLSL_IR_CONSTANT:
-             break;
-+        case HLSL_IR_COMPILE:
-+        case HLSL_IR_SAMPLER_STATE:
-+            /* These types are skipped as they are only relevant to effects. */
-+            break;
-         }
-     }
- }
-+static void init_var_liveness(struct hlsl_ir_var *var)
-+    if (var->is_uniform || var->is_input_semantic)
-+        var->first_write = 1;
-+    else if (var->is_output_semantic)
-+        var->last_read = UINT_MAX;
- static void compute_liveness(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func)
- {
-     struct hlsl_scope *scope;
-@@ -4355,16 +4463,29 @@ static void compute_liveness(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl
-     }
-     LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry)
--    {
--        if (var->is_uniform || var->is_input_semantic)
--            var->first_write = 1;
--        else if (var->is_output_semantic)
--            var->last_read = UINT_MAX;
--    }
-+        init_var_liveness(var);
-+    LIST_FOR_EACH_ENTRY(var, &entry_func->extern_vars, struct hlsl_ir_var, extern_entry)
-+        init_var_liveness(var);
-     compute_liveness_recurse(&entry_func->body, 0, 0);
- }
-+static void mark_vars_usage(struct hlsl_ctx *ctx)
-+    struct hlsl_scope *scope;
-+    struct hlsl_ir_var *var;
-+    LIST_FOR_EACH_ENTRY(scope, &ctx->scopes, struct hlsl_scope, entry)
-+    {
-+        LIST_FOR_EACH_ENTRY(var, &scope->vars, struct hlsl_ir_var, scope_entry)
-+        {
-+            if (var->last_read)
-+                var->is_read = true;
-+        }
-+    }
- struct register_allocator
- {
-     struct allocation
-@@ -4372,6 +4493,9 @@ struct register_allocator
-         uint32_t reg;
-         unsigned int writemask;
-         unsigned int first_write, last_read;
-+        /* Two allocations with different mode can't share the same register. */
-+        int mode;
-     } *allocations;
-     size_t count, capacity;
-@@ -4381,10 +4505,17 @@ struct register_allocator
-     /* Total number of registers allocated so far. Used to declare sm4 temp count. */
-     uint32_t reg_count;
-+    /* Special flag so allocations that can share registers prioritize those
-+     * that will result in smaller writemasks.
-+     * For instance, a single-register allocation would prefer to share a register
-+     * whose .xy components are already allocated (becoming .z) instead of a
-+     * register whose .xyz components are already allocated (becoming .w). */
-+    bool prioritize_smaller_writemasks;
- };
- static unsigned int get_available_writemask(const struct register_allocator *allocator,
--        unsigned int first_write, unsigned int last_read, uint32_t reg_idx)
-+        unsigned int first_write, unsigned int last_read, uint32_t reg_idx, int mode)
- {
-     unsigned int writemask = VKD3DSP_WRITEMASK_ALL;
-     size_t i;
-@@ -4399,7 +4530,11 @@ static unsigned int get_available_writemask(const struct register_allocator *all
-         if (allocation->reg == reg_idx
-                 && first_write < allocation->last_read && last_read > allocation->first_write)
-+        {
-             writemask &= ~allocation->writemask;
-+            if (allocation->mode != mode)
-+                writemask = 0;
-+        }
-         if (!writemask)
-             break;
-@@ -4408,8 +4543,8 @@ static unsigned int get_available_writemask(const struct register_allocator *all
-     return writemask;
- }
--static void record_allocation(struct hlsl_ctx *ctx, struct register_allocator *allocator,
--        uint32_t reg_idx, unsigned int writemask, unsigned int first_write, unsigned int last_read)
-+static void record_allocation(struct hlsl_ctx *ctx, struct register_allocator *allocator, uint32_t reg_idx,
-+        unsigned int writemask, unsigned int first_write, unsigned int last_read, int mode)
- {
-     struct allocation *allocation;
-@@ -4422,6 +4557,7 @@ static void record_allocation(struct hlsl_ctx *ctx, struct register_allocator *a
-     allocation->writemask = writemask;
-     allocation->first_write = first_write;
-     allocation->last_read = last_read;
-+    allocation->mode = mode;
-     allocator->reg_count = max(allocator->reg_count, reg_idx + 1);
- }
-@@ -4431,37 +4567,46 @@ static void record_allocation(struct hlsl_ctx *ctx, struct register_allocator *a
-  * register, even if they don't use it completely. */
- static struct hlsl_reg allocate_register(struct hlsl_ctx *ctx, struct register_allocator *allocator,
-         unsigned int first_write, unsigned int last_read, unsigned int reg_size,
--        unsigned int component_count)
-+        unsigned int component_count, int mode, bool force_align)
- {
--    struct hlsl_reg ret = {0};
--    unsigned int writemask;
--    uint32_t reg_idx;
-+    struct hlsl_reg ret = {.allocation_size = 1, .allocated = true};
-+    unsigned int required_size = force_align ? 4 : reg_size;
-+    unsigned int pref;
-     VKD3D_ASSERT(component_count <= reg_size);
--    for (reg_idx = 0;; ++reg_idx)
-+    pref = allocator->prioritize_smaller_writemasks ? 4 : required_size;
-+    for (; pref >= required_size; --pref)
-     {
--        writemask = get_available_writemask(allocator, first_write, last_read, reg_idx);
--        if (vkd3d_popcount(writemask) >= reg_size)
-+        for (uint32_t reg_idx = 0; reg_idx < allocator->reg_count; ++reg_idx)
-         {
--            writemask = hlsl_combine_writemasks(writemask, (1u << reg_size) - 1);
--            break;
-+            unsigned int available_writemask = get_available_writemask(allocator,
-+                    first_write, last_read, reg_idx, mode);
-+            if (vkd3d_popcount(available_writemask) >= pref)
-+            {
-+                unsigned int writemask = hlsl_combine_writemasks(available_writemask,
-+                        vkd3d_write_mask_from_component_count(reg_size));
-+                ret.id = reg_idx;
-+                ret.writemask = hlsl_combine_writemasks(writemask,
-+                        vkd3d_write_mask_from_component_count(component_count));
-+                record_allocation(ctx, allocator, reg_idx, writemask, first_write, last_read, mode);
-+                return ret;
-+            }
-         }
-     }
--    record_allocation(ctx, allocator, reg_idx, writemask, first_write, last_read);
--    ret.id = reg_idx;
--    ret.allocation_size = 1;
--    ret.writemask = hlsl_combine_writemasks(writemask, (1u << component_count) - 1);
--    ret.allocated = true;
-+    ret.id = allocator->reg_count;
-+    ret.writemask = vkd3d_write_mask_from_component_count(component_count);
-+    record_allocation(ctx, allocator, allocator->reg_count,
-+            vkd3d_write_mask_from_component_count(reg_size), first_write, last_read, mode);
-     return ret;
- }
- /* Allocate a register with writemask, while reserving reg_writemask. */
- static struct hlsl_reg allocate_register_with_masks(struct hlsl_ctx *ctx, struct register_allocator *allocator,
--        unsigned int first_write, unsigned int last_read, uint32_t reg_writemask, uint32_t writemask)
-+        unsigned int first_write, unsigned int last_read, uint32_t reg_writemask, uint32_t writemask, int mode)
- {
-     struct hlsl_reg ret = {0};
-     uint32_t reg_idx;
-@@ -4470,11 +4615,12 @@ static struct hlsl_reg allocate_register_with_masks(struct hlsl_ctx *ctx, struct
-     for (reg_idx = 0;; ++reg_idx)
-     {
--        if ((get_available_writemask(allocator, first_write, last_read, reg_idx) & reg_writemask) == reg_writemask)
-+        if ((get_available_writemask(allocator, first_write, last_read,
-+                reg_idx, mode) & reg_writemask) == reg_writemask)
-             break;
-     }
--    record_allocation(ctx, allocator, reg_idx, reg_writemask, first_write, last_read);
-+    record_allocation(ctx, allocator, reg_idx, reg_writemask, first_write, last_read, mode);
-     ret.id = reg_idx;
-     ret.allocation_size = 1;
-@@ -4483,8 +4629,8 @@ static struct hlsl_reg allocate_register_with_masks(struct hlsl_ctx *ctx, struct
-     return ret;
- }
--static bool is_range_available(const struct register_allocator *allocator,
--        unsigned int first_write, unsigned int last_read, uint32_t reg_idx, unsigned int reg_size)
-+static bool is_range_available(const struct register_allocator *allocator, unsigned int first_write,
-+        unsigned int last_read, uint32_t reg_idx, unsigned int reg_size, int mode)
- {
-     unsigned int last_reg_mask = (1u << (reg_size % 4)) - 1;
-     unsigned int writemask;
-@@ -4492,18 +4638,18 @@ static bool is_range_available(const struct register_allocator *allocator,
-     for (i = 0; i < (reg_size / 4); ++i)
-     {
--        writemask = get_available_writemask(allocator, first_write, last_read, reg_idx + i);
-+        writemask = get_available_writemask(allocator, first_write, last_read, reg_idx + i, mode);
-         if (writemask != VKD3DSP_WRITEMASK_ALL)
-             return false;
-     }
--    writemask = get_available_writemask(allocator, first_write, last_read, reg_idx + (reg_size / 4));
-+    writemask = get_available_writemask(allocator, first_write, last_read, reg_idx + (reg_size / 4), mode);
-     if ((writemask & last_reg_mask) != last_reg_mask)
-         return false;
-     return true;
- }
- static struct hlsl_reg allocate_range(struct hlsl_ctx *ctx, struct register_allocator *allocator,
--        unsigned int first_write, unsigned int last_read, unsigned int reg_size)
-+        unsigned int first_write, unsigned int last_read, unsigned int reg_size, int mode)
- {
-     struct hlsl_reg ret = {0};
-     uint32_t reg_idx;
-@@ -4511,14 +4657,15 @@ static struct hlsl_reg allocate_range(struct hlsl_ctx *ctx, struct register_allo
-     for (reg_idx = 0;; ++reg_idx)
-     {
--        if (is_range_available(allocator, first_write, last_read, reg_idx, reg_size))
-+        if (is_range_available(allocator, first_write, last_read, reg_idx, reg_size, mode))
-             break;
-     }
-     for (i = 0; i < reg_size / 4; ++i)
--        record_allocation(ctx, allocator, reg_idx + i, VKD3DSP_WRITEMASK_ALL, first_write, last_read);
-+        record_allocation(ctx, allocator, reg_idx + i, VKD3DSP_WRITEMASK_ALL, first_write, last_read, mode);
-     if (reg_size % 4)
--        record_allocation(ctx, allocator, reg_idx + (reg_size / 4), (1u << (reg_size % 4)) - 1, first_write, last_read);
-+        record_allocation(ctx, allocator, reg_idx + (reg_size / 4),
-+                (1u << (reg_size % 4)) - 1, first_write, last_read, mode);
-     ret.id = reg_idx;
-     ret.allocation_size = align(reg_size, 4) / 4;
-@@ -4534,9 +4681,9 @@ static struct hlsl_reg allocate_numeric_registers_for_type(struct hlsl_ctx *ctx,
-     /* FIXME: We could potentially pack structs or arrays more efficiently... */
-     if (type->class <= HLSL_CLASS_VECTOR)
--        return allocate_register(ctx, allocator, first_write, last_read, type->dimx, type->dimx);
-+        return allocate_register(ctx, allocator, first_write, last_read, type->dimx, type->dimx, 0, false);
-     else
--        return allocate_range(ctx, allocator, first_write, last_read, reg_size);
-+        return allocate_range(ctx, allocator, first_write, last_read, reg_size, 0);
- }
- static const char *debug_register(char class, struct hlsl_reg reg, const struct hlsl_type *type)
-@@ -4715,7 +4862,7 @@ static void allocate_instr_temp_register(struct hlsl_ctx *ctx,
-     if (reg_writemask)
-         instr->reg = allocate_register_with_masks(ctx, allocator,
--                instr->index, instr->last_read, reg_writemask, dst_writemask);
-+                instr->index, instr->last_read, reg_writemask, dst_writemask, 0);
-     else
-         instr->reg = allocate_numeric_registers_for_type(ctx, allocator,
-                 instr->index, instr->last_read, instr->data_type);
-@@ -4816,7 +4963,8 @@ static void allocate_temp_registers_recurse(struct hlsl_ctx *ctx,
-     }
- }
--static void record_constant(struct hlsl_ctx *ctx, unsigned int component_index, float f)
-+static void record_constant(struct hlsl_ctx *ctx, unsigned int component_index, float f,
-+        const struct vkd3d_shader_location *loc)
- {
-     struct hlsl_constant_defs *defs = &ctx->constant_defs;
-     struct hlsl_constant_register *reg;
-@@ -4838,6 +4986,7 @@ static void record_constant(struct hlsl_ctx *ctx, unsigned int component_index,
-     memset(reg, 0, sizeof(*reg));
-     reg->index = component_index / 4;
-     reg->value.f[component_index % 4] = f;
-+    reg->loc = *loc;
- }
- static void allocate_const_registers_recurse(struct hlsl_ctx *ctx,
-@@ -4898,7 +5047,7 @@ static void allocate_const_registers_recurse(struct hlsl_ctx *ctx,
-                             vkd3d_unreachable();
-                     }
--                    record_constant(ctx, constant->reg.id * 4 + x, f);
-+                    record_constant(ctx, constant->reg.id * 4 + x, f, &constant->node.loc);
-                 }
-                 break;
-@@ -4991,17 +5140,17 @@ static void allocate_sincos_const_registers(struct hlsl_ctx *ctx, struct hlsl_bl
-             ctx->d3dsincosconst1 = allocate_numeric_registers_for_type(ctx, allocator, 1, UINT_MAX, type);
-             TRACE("Allocated D3DSINCOSCONST1 to %s.\n", debug_register('c', ctx->d3dsincosconst1, type));
--            record_constant(ctx, ctx->d3dsincosconst1.id * 4 + 0, -1.55009923e-06f);
--            record_constant(ctx, ctx->d3dsincosconst1.id * 4 + 1, -2.17013894e-05f);
--            record_constant(ctx, ctx->d3dsincosconst1.id * 4 + 2,  2.60416674e-03f);
--            record_constant(ctx, ctx->d3dsincosconst1.id * 4 + 3,  2.60416680e-04f);
-+            record_constant(ctx, ctx->d3dsincosconst1.id * 4 + 0, -1.55009923e-06f, &instr->loc);
-+            record_constant(ctx, ctx->d3dsincosconst1.id * 4 + 1, -2.17013894e-05f, &instr->loc);
-+            record_constant(ctx, ctx->d3dsincosconst1.id * 4 + 2,  2.60416674e-03f, &instr->loc);
-+            record_constant(ctx, ctx->d3dsincosconst1.id * 4 + 3,  2.60416680e-04f, &instr->loc);
-             ctx->d3dsincosconst2 = allocate_numeric_registers_for_type(ctx, allocator, 1, UINT_MAX, type);
-             TRACE("Allocated D3DSINCOSCONST2 to %s.\n", debug_register('c', ctx->d3dsincosconst2, type));
--            record_constant(ctx, ctx->d3dsincosconst2.id * 4 + 0, -2.08333340e-02f);
--            record_constant(ctx, ctx->d3dsincosconst2.id * 4 + 1, -1.25000000e-01f);
--            record_constant(ctx, ctx->d3dsincosconst2.id * 4 + 2,  1.00000000e+00f);
--            record_constant(ctx, ctx->d3dsincosconst2.id * 4 + 3,  5.00000000e-01f);
-+            record_constant(ctx, ctx->d3dsincosconst2.id * 4 + 0, -2.08333340e-02f, &instr->loc);
-+            record_constant(ctx, ctx->d3dsincosconst2.id * 4 + 1, -1.25000000e-01f, &instr->loc);
-+            record_constant(ctx, ctx->d3dsincosconst2.id * 4 + 2,  1.00000000e+00f, &instr->loc);
-+            record_constant(ctx, ctx->d3dsincosconst2.id * 4 + 3,  5.00000000e-01f, &instr->loc);
-             return;
-         }
-@@ -5034,14 +5183,14 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi
-             {
-                 if (i < bind_count)
-                 {
--                    if (get_available_writemask(&allocator_used, 1, UINT_MAX, reg_idx + i) != VKD3DSP_WRITEMASK_ALL)
-+                    if (get_available_writemask(&allocator_used, 1, UINT_MAX, reg_idx + i, 0) != VKD3DSP_WRITEMASK_ALL)
-                     {
-                         hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION,
-                                 "Overlapping register() reservations on 'c%u'.", reg_idx + i);
-                     }
--                    record_allocation(ctx, &allocator_used, reg_idx + i, VKD3DSP_WRITEMASK_ALL, 1, UINT_MAX);
-+                    record_allocation(ctx, &allocator_used, reg_idx + i, VKD3DSP_WRITEMASK_ALL, 1, UINT_MAX, 0);
-                 }
--                record_allocation(ctx, &allocator, reg_idx + i, VKD3DSP_WRITEMASK_ALL, 1, UINT_MAX);
-+                record_allocation(ctx, &allocator, reg_idx + i, VKD3DSP_WRITEMASK_ALL, 1, UINT_MAX, 0);
-             }
-             var->regs[HLSL_REGSET_NUMERIC].id = reg_idx;
-@@ -5064,7 +5213,7 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi
-         if (!var->regs[HLSL_REGSET_NUMERIC].allocated)
-         {
--            var->regs[HLSL_REGSET_NUMERIC] = allocate_range(ctx, &allocator, 1, UINT_MAX, alloc_size);
-+            var->regs[HLSL_REGSET_NUMERIC] = allocate_range(ctx, &allocator, 1, UINT_MAX, alloc_size, 0);
-             TRACE("Allocated %s to %s.\n", var->name,
-                     debug_register('c', var->regs[HLSL_REGSET_NUMERIC], var->data_type));
-         }
-@@ -5081,9 +5230,21 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi
-  * index to all (simultaneously live) variables or intermediate values. Agnostic
-  * as to how many registers are actually available for the current backend, and
-  * does not handle constants. */
--static void allocate_temp_registers(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func)
-+static uint32_t allocate_temp_registers(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func)
- {
-     struct register_allocator allocator = {0};
-+    struct hlsl_scope *scope;
-+    struct hlsl_ir_var *var;
-+    /* Reset variable temp register allocations. */
-+    LIST_FOR_EACH_ENTRY(scope, &ctx->scopes, struct hlsl_scope, entry)
-+    {
-+        LIST_FOR_EACH_ENTRY(var, &scope->vars, struct hlsl_ir_var, scope_entry)
-+        {
-+            if (!(var->is_input_semantic || var->is_output_semantic || var->is_uniform))
-+                memset(var->regs, 0, sizeof(var->regs));
-+        }
-+    }
-     /* ps_1_* outputs are special and go in temp register 0. */
-     if (ctx->profile->major_version == 1 && ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL)
-@@ -5092,22 +5253,53 @@ static void allocate_temp_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functio
-         for (i = 0; i < entry_func->parameters.count; ++i)
-         {
--            const struct hlsl_ir_var *var = entry_func->parameters.vars[i];
-+            var = entry_func->parameters.vars[i];
-             if (var->is_output_semantic)
-             {
--                record_allocation(ctx, &allocator, 0, VKD3DSP_WRITEMASK_ALL, var->first_write, var->last_read);
-+                record_allocation(ctx, &allocator, 0, VKD3DSP_WRITEMASK_ALL, var->first_write, var->last_read, 0);
-                 break;
-             }
-         }
-     }
-     allocate_temp_registers_recurse(ctx, &entry_func->body, &allocator);
--    ctx->temp_count = allocator.reg_count;
-     vkd3d_free(allocator.allocations);
-+    return allocator.reg_count;
-+enum vkd3d_shader_interpolation_mode sm4_get_interpolation_mode(struct hlsl_type *type, unsigned int storage_modifiers)
-+    unsigned int i;
-+    static const struct
-+    {
-+        unsigned int modifiers;
-+        enum vkd3d_shader_interpolation_mode mode;
-+    }
-+    modes[] =
-+    {
-+    };
-+    if ((storage_modifiers & HLSL_STORAGE_NOINTERPOLATION)
-+            || base_type_get_semantic_equivalent(type->e.numeric.type) == HLSL_TYPE_UINT)
-+        return VKD3DSIM_CONSTANT;
-+    for (i = 0; i < ARRAY_SIZE(modes); ++i)
-+    {
-+        if ((storage_modifiers & modes[i].modifiers) == modes[i].modifiers)
-+            return modes[i].mode;
-+    }
-+    return VKD3DSIM_LINEAR;
- }
--static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var *var, unsigned int *counter, bool output)
-+static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var *var,
-+        struct register_allocator *allocator, bool output, bool optimize, bool is_patch_constant_func)
- {
-     static const char *const shader_names[] =
-     {
-@@ -5120,27 +5312,28 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var
-     };
-     enum vkd3d_shader_register_type type;
-+    struct vkd3d_shader_version version;
-     uint32_t reg;
-     bool builtin;
-     VKD3D_ASSERT(var->semantic.name);
--    if (ctx->profile->major_version < 4)
-+    version.major = ctx->profile->major_version;
-+    version.minor = ctx->profile->minor_version;
-+    version.type = ctx->profile->type;
-+    if (version.major < 4)
-     {
--        struct vkd3d_shader_version version;
--        D3DDECLUSAGE usage;
-+        enum vkd3d_decl_usage usage;
-         uint32_t usage_idx;
-         /* ps_1_* outputs are special and go in temp register 0. */
--        if (ctx->profile->major_version == 1 && output && ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL)
-+        if (version.major == 1 && output && version.type == VKD3D_SHADER_TYPE_PIXEL)
-             return;
--        version.major = ctx->profile->major_version;
--        version.minor = ctx->profile->minor_version;
--        version.type = ctx->profile->type;
--        builtin = hlsl_sm1_register_from_semantic(&version,
-+        builtin = sm1_register_from_semantic_name(&version,
-                 var->semantic.name, var->semantic.index, output, &type, &reg);
--        if (!builtin && !hlsl_sm1_usage_from_semantic(var->semantic.name, var->semantic.index, &usage, &usage_idx))
-+        if (!builtin && !sm1_usage_from_semantic_name(var->semantic.name, var->semantic.index, &usage, &usage_idx))
-         {
-             hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC,
-                     "Invalid semantic '%s'.", var->semantic.name);
-@@ -5152,50 +5345,72 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var
-     }
-     else
-     {
--        D3D_NAME usage;
-+        enum vkd3d_shader_sysval_semantic semantic;
-         bool has_idx;
--        if (!hlsl_sm4_usage_from_semantic(ctx, &var->semantic, output, &usage))
-+        if (!sm4_sysval_semantic_from_semantic_name(&semantic, &version, ctx->semantic_compat_mapping,
-+                ctx->domain, var->semantic.name, var->semantic.index, output, is_patch_constant_func))
-         {
-             hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC,
-                     "Invalid semantic '%s'.", var->semantic.name);
-             return;
-         }
--        if ((builtin = hlsl_sm4_register_from_semantic(ctx, &var->semantic, output, &type, &has_idx)))
-+        if ((builtin = sm4_register_from_semantic_name(&version, var->semantic.name, output, &type, &has_idx)))
-             reg = has_idx ? var->semantic.index : 0;
-+        if (semantic == VKD3D_SHADER_SV_TESS_FACTOR_TRIINT)
-+        {
-+            /* While SV_InsideTessFactor can be declared as 'float' for "tri"
-+             * domains, it is allocated as if it was 'float[1]'. */
-+            var->force_align = true;
-+        }
-     }
-     if (builtin)
-     {
--        TRACE("%s %s semantic %s[%u] matches predefined register %#x[%u].\n", shader_names[ctx->profile->type],
-+        TRACE("%s %s semantic %s[%u] matches predefined register %#x[%u].\n", shader_names[version.type],
-                 output ? "output" : "input", var->semantic.name, var->semantic.index, type, reg);
-     }
-     else
-     {
--        var->regs[HLSL_REGSET_NUMERIC].allocated = true;
--        var->regs[HLSL_REGSET_NUMERIC].id = (*counter)++;
--        var->regs[HLSL_REGSET_NUMERIC].allocation_size = 1;
--        var->regs[HLSL_REGSET_NUMERIC].writemask = (1 << var->data_type->dimx) - 1;
--        TRACE("Allocated %s to %s.\n", var->name, debug_register(output ? 'o' : 'v',
--                var->regs[HLSL_REGSET_NUMERIC], var->data_type));
-+        int mode = (ctx->profile->major_version < 4)
-+                ? 0 : sm4_get_interpolation_mode(var->data_type, var->storage_modifiers);
-+        unsigned int reg_size = optimize ? var->data_type->dimx : 4;
-+        var->regs[HLSL_REGSET_NUMERIC] = allocate_register(ctx, allocator, 1,
-+                UINT_MAX, reg_size, var->data_type->dimx, mode, var->force_align);
-+        TRACE("Allocated %s to %s (mode %d).\n", var->name, debug_register(output ? 'o' : 'v',
-+                var->regs[HLSL_REGSET_NUMERIC], var->data_type), mode);
-     }
- }
--static void allocate_semantic_registers(struct hlsl_ctx *ctx)
-+static void allocate_semantic_registers(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func)
- {
--    unsigned int input_counter = 0, output_counter = 0;
-+    struct register_allocator input_allocator = {0}, output_allocator = {0};
-+    bool is_vertex_shader = ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX;
-+    bool is_pixel_shader = ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL;
-+    bool is_patch_constant_func = entry_func == ctx->patch_constant_func;
-     struct hlsl_ir_var *var;
--    LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry)
-+    input_allocator.prioritize_smaller_writemasks = true;
-+    output_allocator.prioritize_smaller_writemasks = true;
-+    LIST_FOR_EACH_ENTRY(var, &entry_func->extern_vars, struct hlsl_ir_var, extern_entry)
-     {
-         if (var->is_input_semantic)
--            allocate_semantic_register(ctx, var, &input_counter, false);
-+            allocate_semantic_register(ctx, var, &input_allocator, false, !is_vertex_shader, is_patch_constant_func);
-         if (var->is_output_semantic)
--            allocate_semantic_register(ctx, var, &output_counter, true);
-+            allocate_semantic_register(ctx, var, &output_allocator, true, !is_pixel_shader, is_patch_constant_func);
-     }
-+    vkd3d_free(input_allocator.allocations);
-+    vkd3d_free(output_allocator.allocations);
- }
--static const struct hlsl_buffer *get_reserved_buffer(struct hlsl_ctx *ctx, uint32_t space, uint32_t index)
-+static const struct hlsl_buffer *get_reserved_buffer(struct hlsl_ctx *ctx,
-+        uint32_t space, uint32_t index, bool allocated_only)
- {
-     const struct hlsl_buffer *buffer;
-@@ -5203,7 +5418,12 @@ static const struct hlsl_buffer *get_reserved_buffer(struct hlsl_ctx *ctx, uint3
-     {
-         if (buffer->reservation.reg_type == 'b'
-                 && buffer->reservation.reg_space == space && buffer->reservation.reg_index == index)
-+        {
-+            if (allocated_only && !buffer->reg.allocated)
-+                continue;
-             return buffer;
-+        }
-     }
-     return NULL;
- }
-@@ -5260,7 +5480,7 @@ static void hlsl_calculate_buffer_offset(struct hlsl_ctx *ctx, struct hlsl_ir_va
-     TRACE("Allocated buffer offset %u to %s.\n", var->buffer_offset, var->name);
-     buffer->size = max(buffer->size, var->buffer_offset + var_reg_size);
--    if (var->last_read)
-+    if (var->is_read)
-         buffer->used_size = max(buffer->used_size, var->buffer_offset + var_reg_size);
- }
-@@ -5386,8 +5606,8 @@ static void allocate_buffers(struct hlsl_ctx *ctx)
-             if (reservation->reg_type == 'b')
-             {
--                const struct hlsl_buffer *reserved_buffer = get_reserved_buffer(ctx,
--                        reservation->reg_space, reservation->reg_index);
-+                const struct hlsl_buffer *allocated_buffer = get_reserved_buffer(ctx,
-+                        reservation->reg_space, reservation->reg_index, true);
-                 unsigned int max_index = get_max_cbuffer_reg_index(ctx);
-                 if (buffer->reservation.reg_index > max_index)
-@@ -5395,14 +5615,14 @@ static void allocate_buffers(struct hlsl_ctx *ctx)
-                             "Buffer reservation cb%u exceeds target's maximum (cb%u).",
-                             buffer->reservation.reg_index, max_index);
--                if (reserved_buffer && reserved_buffer != buffer)
-+                if (allocated_buffer && allocated_buffer != buffer)
-                 {
-                     hlsl_error(ctx, &buffer->loc, VKD3D_SHADER_ERROR_HLSL_OVERLAPPING_RESERVATIONS,
-                             "Multiple buffers bound to space %u, index %u.",
-                             reservation->reg_space, reservation->reg_index);
--                    hlsl_note(ctx, &reserved_buffer->loc, VKD3D_SHADER_LOG_ERROR,
-+                    hlsl_note(ctx, &allocated_buffer->loc, VKD3D_SHADER_LOG_ERROR,
-                             "Buffer %s is already bound to space %u, index %u.",
--                            reserved_buffer->name, reservation->reg_space, reservation->reg_index);
-+                            allocated_buffer->name, reservation->reg_space, reservation->reg_index);
-                 }
-                 buffer->reg.space = reservation->reg_space;
-@@ -5419,12 +5639,12 @@ static void allocate_buffers(struct hlsl_ctx *ctx)
-             else if (!reservation->reg_type)
-             {
-                 unsigned int max_index = get_max_cbuffer_reg_index(ctx);
--                while (get_reserved_buffer(ctx, 0, index))
-+                while (get_reserved_buffer(ctx, 0, index, false))
-                     ++index;
-                 if (index > max_index)
-                     hlsl_error(ctx, &buffer->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION,
--                        "Too many buffers allocated, target's maximum is %u.", max_index);
-+                        "Too many buffers reserved, target's maximum is %u.", max_index);
-                 buffer->reg.space = 0;
-                 buffer->reg.index = index;
-@@ -5491,15 +5711,15 @@ static const struct hlsl_ir_var *get_allocated_object(struct hlsl_ctx *ctx, enum
-     return NULL;
- }
--static void allocate_objects(struct hlsl_ctx *ctx, enum hlsl_regset regset)
-+static void allocate_objects(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func, enum hlsl_regset regset)
- {
-     char regset_name = get_regset_name(regset);
-     uint32_t min_index = 0, id = 0;
-     struct hlsl_ir_var *var;
--    if (regset == HLSL_REGSET_UAVS)
-+    if (regset == HLSL_REGSET_UAVS && ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL)
-     {
--        LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry)
-+        LIST_FOR_EACH_ENTRY(var, &func->extern_vars, struct hlsl_ir_var, extern_entry)
-         {
-             if (var->semantic.name && (!ascii_strcasecmp(var->semantic.name, "color")
-                     || !ascii_strcasecmp(var->semantic.name, "sv_target")))
-@@ -5786,6 +6006,26 @@ struct hlsl_reg hlsl_reg_from_deref(struct hlsl_ctx *ctx, const struct hlsl_dere
-     return ret;
- }
-+static const char *get_string_argument_value(struct hlsl_ctx *ctx, const struct hlsl_attribute *attr, unsigned int i)
-+    const struct hlsl_ir_node *instr = attr->args[i].node;
-+    const struct hlsl_type *type = instr->data_type;
-+    if (type->class != HLSL_CLASS_STRING)
-+    {
-+        struct vkd3d_string_buffer *string;
-+        if ((string = hlsl_type_to_string(ctx, type)))
-+            hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE,
-+                    "Wrong type for the argument %u of [%s]: expected string, but got %s.",
-+                    i, attr->name, string->buffer);
-+        hlsl_release_string_buffer(ctx, string);
-+        return NULL;
-+    }
-+    return hlsl_ir_string_constant(instr)->string;
- static void parse_numthreads_attribute(struct hlsl_ctx *ctx, const struct hlsl_attribute *attr)
- {
-     unsigned int i;
-@@ -5834,207 +6074,2961 @@ static void parse_numthreads_attribute(struct hlsl_ctx *ctx, const struct hlsl_a
-     }
- }
--static void remove_unreachable_code(struct hlsl_ctx *ctx, struct hlsl_block *body)
-+static void parse_domain_attribute(struct hlsl_ctx *ctx, const struct hlsl_attribute *attr)
- {
--    struct hlsl_ir_node *instr, *next;
--    struct hlsl_block block;
--    struct list *start;
-+    const char *value;
--    LIST_FOR_EACH_ENTRY_SAFE(instr, next, &body->instrs, struct hlsl_ir_node, entry)
-+    if (attr->args_count != 1)
-     {
--        if (instr->type == HLSL_IR_IF)
--        {
--            struct hlsl_ir_if *iff = hlsl_ir_if(instr);
-+        hlsl_error(ctx, &attr->loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT,
-+                "Expected 1 parameter for [domain] attribute, but got %u.", attr->args_count);
-+        return;
-+    }
--            remove_unreachable_code(ctx, &iff->then_block);
--            remove_unreachable_code(ctx, &iff->else_block);
--        }
--        else if (instr->type == HLSL_IR_LOOP)
--        {
--            struct hlsl_ir_loop *loop = hlsl_ir_loop(instr);
-+    if (!(value = get_string_argument_value(ctx, attr, 0)))
-+        return;
--            remove_unreachable_code(ctx, &loop->body);
--        }
--        else if (instr->type == HLSL_IR_SWITCH)
--        {
--            struct hlsl_ir_switch *s = hlsl_ir_switch(instr);
--            struct hlsl_ir_switch_case *c;
-+    if (!strcmp(value, "isoline"))
-+        ctx->domain = VKD3D_TESSELLATOR_DOMAIN_LINE;
-+    else if (!strcmp(value, "tri"))
-+        ctx->domain = VKD3D_TESSELLATOR_DOMAIN_TRIANGLE;
-+    else if (!strcmp(value, "quad"))
-+        ctx->domain = VKD3D_TESSELLATOR_DOMAIN_QUAD;
-+    else
-+        hlsl_error(ctx, &attr->args[0].node->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_DOMAIN,
-+                "Invalid tessellator domain \"%s\": expected \"isoline\", \"tri\", or \"quad\".",
-+                value);
--            LIST_FOR_EACH_ENTRY(c, &s->cases, struct hlsl_ir_switch_case, entry)
--            {
--                remove_unreachable_code(ctx, &c->body);
--            }
--        }
--    }
-+static void parse_outputcontrolpoints_attribute(struct hlsl_ctx *ctx, const struct hlsl_attribute *attr)
-+    const struct hlsl_ir_node *instr;
-+    const struct hlsl_type *type;
-+    const struct hlsl_ir_constant *constant;
--    /* Remove instructions past unconditional jumps. */
--    LIST_FOR_EACH_ENTRY(instr, &body->instrs, struct hlsl_ir_node, entry)
-+    if (attr->args_count != 1)
-     {
--        struct hlsl_ir_jump *jump;
--        if (instr->type != HLSL_IR_JUMP)
--            continue;
-+        hlsl_error(ctx, &attr->loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT,
-+                "Expected 1 parameter for [outputcontrolpoints] attribute, but got %u.", attr->args_count);
-+        return;
-+    }
--        jump = hlsl_ir_jump(instr);
--        if (jump->type != HLSL_IR_JUMP_BREAK && jump->type != HLSL_IR_JUMP_CONTINUE)
--            continue;
-+    instr = attr->args[0].node;
-+    type = instr->data_type;
--        if (!(start = list_next(&body->instrs, &instr->entry)))
--            break;
-+    if (type->class != HLSL_CLASS_SCALAR
-+            || (type->e.numeric.type != HLSL_TYPE_INT && type->e.numeric.type != HLSL_TYPE_UINT))
-+    {
-+        struct vkd3d_string_buffer *string;
--        hlsl_block_init(&block);
--        list_move_slice_tail(&block.instrs, start, list_tail(&body->instrs));
--        hlsl_block_cleanup(&block);
-+        if ((string = hlsl_type_to_string(ctx, type)))
-+            hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE,
-+                    "Wrong type for argument 0 of [outputcontrolpoints]: expected int or uint, but got %s.",
-+                    string->buffer);
-+        hlsl_release_string_buffer(ctx, string);
-+        return;
-+    }
--        break;
-+    if (instr->type != HLSL_IR_CONSTANT)
-+    {
-+        hlsl_fixme(ctx, &instr->loc, "Non-constant expression in [outputcontrolpoints] initializer.");
-+        return;
-     }
-+    constant = hlsl_ir_constant(instr);
-+    if ((type->e.numeric.type == HLSL_TYPE_INT && constant->value.u[0].i < 0)
-+            || constant->value.u[0].u > 32)
-+        hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_CONTROL_POINT_COUNT,
-+                "Output control point count must be between 0 and 32.");
-+    ctx->output_control_point_count = constant->value.u[0].u;
- }
--void hlsl_run_const_passes(struct hlsl_ctx *ctx, struct hlsl_block *body)
-+static void parse_outputtopology_attribute(struct hlsl_ctx *ctx, const struct hlsl_attribute *attr)
- {
--    bool progress;
--    lower_ir(ctx, lower_matrix_swizzles, body);
--    lower_ir(ctx, lower_index_loads, body);
-+    const char *value;
--    lower_ir(ctx, lower_broadcasts, body);
--    while (hlsl_transform_ir(ctx, fold_redundant_casts, body, NULL));
--    do
-+    if (attr->args_count != 1)
-     {
--        progress = hlsl_transform_ir(ctx, split_array_copies, body, NULL);
--        progress |= hlsl_transform_ir(ctx, split_struct_copies, body, NULL);
-+        hlsl_error(ctx, &attr->loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT,
-+                "Expected 1 parameter for [outputtopology] attribute, but got %u.", attr->args_count);
-+        return;
-     }
--    while (progress);
--    hlsl_transform_ir(ctx, split_matrix_copies, body, NULL);
--    lower_ir(ctx, lower_narrowing_casts, body);
--    lower_ir(ctx, lower_int_dot, body);
--    lower_ir(ctx, lower_int_division, body);
--    lower_ir(ctx, lower_int_modulus, body);
--    lower_ir(ctx, lower_int_abs, body);
--    lower_ir(ctx, lower_casts_to_bool, body);
--    lower_ir(ctx, lower_float_modulus, body);
--    hlsl_transform_ir(ctx, fold_redundant_casts, body, NULL);
-+    if (!(value = get_string_argument_value(ctx, attr, 0)))
-+        return;
--    do
--    {
--        progress = hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, body, NULL);
--        progress |= hlsl_transform_ir(ctx, hlsl_fold_constant_identities, body, NULL);
--        progress |= hlsl_transform_ir(ctx, hlsl_fold_constant_swizzles, body, NULL);
--        progress |= hlsl_copy_propagation_execute(ctx, body);
--        progress |= hlsl_transform_ir(ctx, fold_swizzle_chains, body, NULL);
--        progress |= hlsl_transform_ir(ctx, remove_trivial_swizzles, body, NULL);
--        progress |= hlsl_transform_ir(ctx, remove_trivial_conditional_branches, body, NULL);
--    } while (progress);
-+    if (!strcmp(value, "point"))
-+        ctx->output_primitive = VKD3D_SHADER_TESSELLATOR_OUTPUT_POINT;
-+    else if (!strcmp(value, "line"))
-+        ctx->output_primitive = VKD3D_SHADER_TESSELLATOR_OUTPUT_LINE;
-+    else if (!strcmp(value, "triangle_cw"))
-+        ctx->output_primitive = VKD3D_SHADER_TESSELLATOR_OUTPUT_TRIANGLE_CW;
-+    else if (!strcmp(value, "triangle_ccw"))
-+        ctx->output_primitive = VKD3D_SHADER_TESSELLATOR_OUTPUT_TRIANGLE_CCW;
-+    else
-+        hlsl_error(ctx, &attr->args[0].node->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_OUTPUT_PRIMITIVE,
-+                "Invalid tessellator output topology \"%s\": "
-+                "expected \"point\", \"line\", \"triangle_cw\", or \"triangle_ccw\".", value);
- }
--static void sm1_generate_vsir_signature_entry(struct hlsl_ctx *ctx,
--        struct vsir_program *program, bool output, struct hlsl_ir_var *var)
-+static void parse_partitioning_attribute(struct hlsl_ctx *ctx, const struct hlsl_attribute *attr)
- {
--    enum vkd3d_shader_sysval_semantic sysval = VKD3D_SHADER_SV_NONE;
--    enum vkd3d_shader_register_type type;
--    struct shader_signature *signature;
--    struct signature_element *element;
--    unsigned int register_index, mask;
-+    const char *value;
--    if ((!output && !var->last_read) || (output && !var->first_write))
-+    if (attr->args_count != 1)
-+    {
-+        hlsl_error(ctx, &attr->loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT,
-+                "Expected 1 parameter for [partitioning] attribute, but got %u.", attr->args_count);
-+        return;
-+    }
-+    if (!(value = get_string_argument_value(ctx, attr, 0)))
-+        return;
-+    if (!strcmp(value, "integer"))
-+    else if (!strcmp(value, "pow2"))
-+        ctx->partitioning = VKD3D_SHADER_TESSELLATOR_PARTITIONING_POW2;
-+    else if (!strcmp(value, "fractional_even"))
-+    else if (!strcmp(value, "fractional_odd"))
-+    else
-+        hlsl_error(ctx, &attr->args[0].node->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_PARTITIONING,
-+                "Invalid tessellator partitioning \"%s\": "
-+                "expected \"integer\", \"pow2\", \"fractional_even\", or \"fractional_odd\".", value);
-+static void parse_patchconstantfunc_attribute(struct hlsl_ctx *ctx, const struct hlsl_attribute *attr)
-+    const char *name;
-+    struct hlsl_ir_function *func;
-+    struct hlsl_ir_function_decl *decl;
-+    if (attr->args_count != 1)
-+    {
-+        hlsl_error(ctx, &attr->loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT,
-+                "Expected 1 parameter for [patchconstantfunc] attribute, but got %u.", attr->args_count);
-+        return;
-+    }
-+    if (!(name = get_string_argument_value(ctx, attr, 0)))
-+        return;
-+    ctx->patch_constant_func = NULL;
-+    if ((func = hlsl_get_function(ctx, name)))
-+    {
-+        /* Pick the last overload with a body. */
-+        LIST_FOR_EACH_ENTRY_REV(decl, &func->overloads, struct hlsl_ir_function_decl, entry)
-+        {
-+            if (decl->has_body)
-+            {
-+                ctx->patch_constant_func = decl;
-+                break;
-+            }
-+        }
-+    }
-+    if (!ctx->patch_constant_func)
-+        hlsl_error(ctx, &attr->loc, VKD3D_SHADER_ERROR_HLSL_NOT_DEFINED,
-+                "Patch constant function \"%s\" is not defined.", name);
-+static void parse_entry_function_attributes(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func)
-+    const struct hlsl_profile_info *profile = ctx->profile;
-+    unsigned int i;
-+    for (i = 0; i < entry_func->attr_count; ++i)
-+    {
-+        const struct hlsl_attribute *attr = entry_func->attrs[i];
-+        if (!strcmp(attr->name, "numthreads") && profile->type == VKD3D_SHADER_TYPE_COMPUTE)
-+            parse_numthreads_attribute(ctx, attr);
-+        else if (!strcmp(attr->name, "domain")
-+                    && (profile->type == VKD3D_SHADER_TYPE_HULL || profile->type == VKD3D_SHADER_TYPE_DOMAIN))
-+            parse_domain_attribute(ctx, attr);
-+        else if (!strcmp(attr->name, "outputcontrolpoints") && profile->type == VKD3D_SHADER_TYPE_HULL)
-+            parse_outputcontrolpoints_attribute(ctx, attr);
-+        else if (!strcmp(attr->name, "outputtopology") && profile->type == VKD3D_SHADER_TYPE_HULL)
-+            parse_outputtopology_attribute(ctx, attr);
-+        else if (!strcmp(attr->name, "partitioning") && profile->type == VKD3D_SHADER_TYPE_HULL)
-+            parse_partitioning_attribute(ctx, attr);
-+        else if (!strcmp(attr->name, "patchconstantfunc") && profile->type == VKD3D_SHADER_TYPE_HULL)
-+            parse_patchconstantfunc_attribute(ctx, attr);
-+        else if (!strcmp(attr->name, "earlydepthstencil") && profile->type == VKD3D_SHADER_TYPE_PIXEL)
-+            entry_func->early_depth_test = true;
-+        else
-+            hlsl_warning(ctx, &entry_func->attrs[i]->loc, VKD3D_SHADER_WARNING_HLSL_UNKNOWN_ATTRIBUTE,
-+                    "Ignoring unknown attribute \"%s\".", entry_func->attrs[i]->name);
-+    }
-+static void validate_hull_shader_attributes(struct hlsl_ctx *ctx, const struct hlsl_ir_function_decl *entry_func)
-+    if (ctx->domain == VKD3D_TESSELLATOR_DOMAIN_INVALID)
-+    {
-+        hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_ATTRIBUTE,
-+                "Entry point \"%s\" is missing a [domain] attribute.", entry_func->func->name);
-+    }
-+    if (ctx->output_control_point_count == UINT_MAX)
-+    {
-+        hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_ATTRIBUTE,
-+                "Entry point \"%s\" is missing a [outputcontrolpoints] attribute.", entry_func->func->name);
-+    }
-+    if (!ctx->output_primitive)
-+    {
-+        hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_ATTRIBUTE,
-+                "Entry point \"%s\" is missing a [outputtopology] attribute.", entry_func->func->name);
-+    }
-+    if (!ctx->partitioning)
-+    {
-+        hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_ATTRIBUTE,
-+                "Entry point \"%s\" is missing a [partitioning] attribute.", entry_func->func->name);
-+    }
-+    if (!ctx->patch_constant_func)
-+    {
-+        hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_ATTRIBUTE,
-+                "Entry point \"%s\" is missing a [patchconstantfunc] attribute.", entry_func->func->name);
-+    }
-+    else if (ctx->patch_constant_func == entry_func)
-+    {
-+        hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_RECURSIVE_CALL,
-+                "Patch constant function cannot be the entry point function.");
-+        /* Native returns E_NOTIMPL instead of E_FAIL here. */
-+        ctx->result = VKD3D_ERROR_NOT_IMPLEMENTED;
-+        return;
-+    }
-+    switch (ctx->domain)
-+    {
-+            if (ctx->output_primitive == VKD3D_SHADER_TESSELLATOR_OUTPUT_TRIANGLE_CW
-+                    || ctx->output_primitive == VKD3D_SHADER_TESSELLATOR_OUTPUT_TRIANGLE_CCW)
-+                hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_OUTPUT_PRIMITIVE,
-+                        "Triangle output topologies are not available for isoline domains.");
-+            break;
-+            if (ctx->output_primitive == VKD3D_SHADER_TESSELLATOR_OUTPUT_LINE)
-+                hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_OUTPUT_PRIMITIVE,
-+                        "Line output topologies are not available for triangle domains.");
-+            break;
-+            if (ctx->output_primitive == VKD3D_SHADER_TESSELLATOR_OUTPUT_LINE)
-+                hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_OUTPUT_PRIMITIVE,
-+                        "Line output topologies are not available for quad domains.");
-+            break;
-+        default:
-+            break;
-+    }
-+static void remove_unreachable_code(struct hlsl_ctx *ctx, struct hlsl_block *body)
-+    struct hlsl_ir_node *instr, *next;
-+    struct hlsl_block block;
-+    struct list *start;
-+    LIST_FOR_EACH_ENTRY_SAFE(instr, next, &body->instrs, struct hlsl_ir_node, entry)
-+    {
-+        if (instr->type == HLSL_IR_IF)
-+        {
-+            struct hlsl_ir_if *iff = hlsl_ir_if(instr);
-+            remove_unreachable_code(ctx, &iff->then_block);
-+            remove_unreachable_code(ctx, &iff->else_block);
-+        }
-+        else if (instr->type == HLSL_IR_LOOP)
-+        {
-+            struct hlsl_ir_loop *loop = hlsl_ir_loop(instr);
-+            remove_unreachable_code(ctx, &loop->body);
-+        }
-+        else if (instr->type == HLSL_IR_SWITCH)
-+        {
-+            struct hlsl_ir_switch *s = hlsl_ir_switch(instr);
-+            struct hlsl_ir_switch_case *c;
-+            LIST_FOR_EACH_ENTRY(c, &s->cases, struct hlsl_ir_switch_case, entry)
-+            {
-+                remove_unreachable_code(ctx, &c->body);
-+            }
-+        }
-+    }
-+    /* Remove instructions past unconditional jumps. */
-+    LIST_FOR_EACH_ENTRY(instr, &body->instrs, struct hlsl_ir_node, entry)
-+    {
-+        struct hlsl_ir_jump *jump;
-+        if (instr->type != HLSL_IR_JUMP)
-+            continue;
-+        jump = hlsl_ir_jump(instr);
-+        if (jump->type != HLSL_IR_JUMP_BREAK && jump->type != HLSL_IR_JUMP_CONTINUE)
-+            continue;
-+        if (!(start = list_next(&body->instrs, &instr->entry)))
-+            break;
-+        hlsl_block_init(&block);
-+        list_move_slice_tail(&block.instrs, start, list_tail(&body->instrs));
-+        hlsl_block_cleanup(&block);
-+        break;
-+    }
-+void hlsl_lower_index_loads(struct hlsl_ctx *ctx, struct hlsl_block *body)
-+    lower_ir(ctx, lower_index_loads, body);
-+void hlsl_run_const_passes(struct hlsl_ctx *ctx, struct hlsl_block *body)
-+    bool progress;
-+    lower_ir(ctx, lower_matrix_swizzles, body);
-+    lower_ir(ctx, lower_broadcasts, body);
-+    while (hlsl_transform_ir(ctx, fold_redundant_casts, body, NULL));
-+    do
-+    {
-+        progress = hlsl_transform_ir(ctx, split_array_copies, body, NULL);
-+        progress |= hlsl_transform_ir(ctx, split_struct_copies, body, NULL);
-+    }
-+    while (progress);
-+    hlsl_transform_ir(ctx, split_matrix_copies, body, NULL);
-+    lower_ir(ctx, lower_narrowing_casts, body);
-+    lower_ir(ctx, lower_int_dot, body);
-+    lower_ir(ctx, lower_int_division, body);
-+    lower_ir(ctx, lower_int_modulus, body);
-+    lower_ir(ctx, lower_int_abs, body);
-+    lower_ir(ctx, lower_casts_to_bool, body);
-+    lower_ir(ctx, lower_float_modulus, body);
-+    hlsl_transform_ir(ctx, fold_redundant_casts, body, NULL);
-+    do
-+    {
-+        progress = hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, body, NULL);
-+        progress |= hlsl_transform_ir(ctx, hlsl_fold_constant_identities, body, NULL);
-+        progress |= hlsl_transform_ir(ctx, hlsl_fold_constant_swizzles, body, NULL);
-+        progress |= hlsl_copy_propagation_execute(ctx, body);
-+        progress |= hlsl_transform_ir(ctx, fold_swizzle_chains, body, NULL);
-+        progress |= hlsl_transform_ir(ctx, remove_trivial_swizzles, body, NULL);
-+        progress |= hlsl_transform_ir(ctx, remove_trivial_conditional_branches, body, NULL);
-+    } while (progress);
-+static void generate_vsir_signature_entry(struct hlsl_ctx *ctx, struct vsir_program *program,
-+        struct shader_signature *signature, bool output, bool is_patch_constant_func, struct hlsl_ir_var *var)
-+    enum vkd3d_shader_sysval_semantic sysval = VKD3D_SHADER_SV_NONE;
-+    enum vkd3d_shader_component_type component_type;
-+    unsigned int register_index, mask, use_mask;
-+    const char *name = var->semantic.name;
-+    enum vkd3d_shader_register_type type;
-+    struct signature_element *element;
-+    if (hlsl_version_ge(ctx, 4, 0))
-+    {
-+        struct vkd3d_string_buffer *string;
-+        bool has_idx, ret;
-+        ret = sm4_sysval_semantic_from_semantic_name(&sysval, &program->shader_version, ctx->semantic_compat_mapping,
-+                ctx->domain, var->semantic.name, var->semantic.index, output, is_patch_constant_func);
-+        VKD3D_ASSERT(ret);
-+        if (sysval == ~0u)
-+            return;
-+        if (sm4_register_from_semantic_name(&program->shader_version, var->semantic.name, output, &type, &has_idx))
-+        {
-+            register_index = has_idx ? var->semantic.index : ~0u;
-+            mask = (1u << var->data_type->dimx) - 1;
-+        }
-+        else
-+        {
-+            VKD3D_ASSERT(var->regs[HLSL_REGSET_NUMERIC].allocated);
-+            register_index = var->regs[HLSL_REGSET_NUMERIC].id;
-+            mask = var->regs[HLSL_REGSET_NUMERIC].writemask;
-+        }
-+        use_mask = mask; /* FIXME: retrieve use mask accurately. */
-+        switch (var->data_type->e.numeric.type)
-+        {
-+            case HLSL_TYPE_FLOAT:
-+            case HLSL_TYPE_HALF:
-+                component_type = VKD3D_SHADER_COMPONENT_FLOAT;
-+                break;
-+            case HLSL_TYPE_INT:
-+                component_type = VKD3D_SHADER_COMPONENT_INT;
-+                break;
-+            case HLSL_TYPE_BOOL:
-+            case HLSL_TYPE_UINT:
-+                component_type = VKD3D_SHADER_COMPONENT_UINT;
-+                break;
-+            default:
-+                if ((string = hlsl_type_to_string(ctx, var->data_type)))
-+                    hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE,
-+                            "Invalid data type %s for semantic variable %s.", string->buffer, var->name);
-+                hlsl_release_string_buffer(ctx, string);
-+                component_type = VKD3D_SHADER_COMPONENT_VOID;
-+                break;
-+        }
-+        if (sysval == VKD3D_SHADER_SV_TARGET && !ascii_strcasecmp(name, "color"))
-+            name = "SV_Target";
-+        else if (sysval == VKD3D_SHADER_SV_DEPTH && !ascii_strcasecmp(name, "depth"))
-+            name ="SV_Depth";
-+        else if (sysval == VKD3D_SHADER_SV_POSITION && !ascii_strcasecmp(name, "position"))
-+            name = "SV_Position";
-+    }
-+    else
-+    {
-+        if ((!output && !var->last_read) || (output && !var->first_write))
-+            return;
-+        if (!sm1_register_from_semantic_name(&program->shader_version,
-+                var->semantic.name, var->semantic.index, output, &type, &register_index))
-+        {
-+            enum vkd3d_decl_usage usage;
-+            unsigned int usage_idx;
-+            bool ret;
-+            register_index = var->regs[HLSL_REGSET_NUMERIC].id;
-+            ret = sm1_usage_from_semantic_name(var->semantic.name, var->semantic.index, &usage, &usage_idx);
-+            VKD3D_ASSERT(ret);
-+            /* With the exception of vertex POSITION output, none of these are
-+             * system values. Pixel POSITION input is not equivalent to
-+             * SV_Position; the closer equivalent is VPOS, which is not declared
-+             * as a semantic. */
-+            if (program->shader_version.type == VKD3D_SHADER_TYPE_VERTEX
-+                    && output && usage == VKD3D_DECL_USAGE_POSITION)
-+                sysval = VKD3D_SHADER_SV_POSITION;
-+        }
-+        mask = (1 << var->data_type->dimx) - 1;
-+        if (!ascii_strcasecmp(var->semantic.name, "PSIZE") && output
-+                && program->shader_version.type == VKD3D_SHADER_TYPE_VERTEX)
-+        {
-+            if (var->data_type->dimx > 1)
-+                hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC,
-+                        "PSIZE output must have only 1 component in this shader model.");
-+            /* For some reason the writemask has all components set. */
-+            mask = VKD3DSP_WRITEMASK_ALL;
-+        }
-+        if (!ascii_strcasecmp(var->semantic.name, "FOG") && output && program->shader_version.major < 3
-+                && program->shader_version.type == VKD3D_SHADER_TYPE_VERTEX && var->data_type->dimx > 1)
-+            hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC,
-+                    "FOG output must have only 1 component in this shader model.");
-+        use_mask = mask; /* FIXME: retrieve use mask accurately. */
-+        component_type = VKD3D_SHADER_COMPONENT_FLOAT;
-+    }
-+    if (!vkd3d_array_reserve((void **)&signature->elements, &signature->elements_capacity,
-+            signature->element_count + 1, sizeof(*signature->elements)))
-+    {
-+        ctx->result = VKD3D_ERROR_OUT_OF_MEMORY;
-+        return;
-+    }
-+    element = &signature->elements[signature->element_count++];
-+    memset(element, 0, sizeof(*element));
-+    if (!(element->semantic_name = vkd3d_strdup(name)))
-+    {
-+        --signature->element_count;
-+        ctx->result = VKD3D_ERROR_OUT_OF_MEMORY;
-+        return;
-+    }
-+    element->semantic_index = var->semantic.index;
-+    element->sysval_semantic = sysval;
-+    element->component_type = component_type;
-+    element->register_index = register_index;
-+    element->target_location = register_index;
-+    element->register_count = 1;
-+    element->mask = mask;
-+    element->used_mask = use_mask;
-+    if (program->shader_version.type == VKD3D_SHADER_TYPE_PIXEL && !output)
-+        element->interpolation_mode = VKD3DSIM_LINEAR;
-+static void generate_vsir_signature(struct hlsl_ctx *ctx,
-+        struct vsir_program *program, struct hlsl_ir_function_decl *func)
-+    bool is_domain = program->shader_version.type == VKD3D_SHADER_TYPE_DOMAIN;
-+    bool is_patch_constant_func = func == ctx->patch_constant_func;
-+    struct hlsl_ir_var *var;
-+    LIST_FOR_EACH_ENTRY(var, &func->extern_vars, struct hlsl_ir_var, extern_entry)
-+    {
-+        if (var->is_input_semantic)
-+        {
-+            if (is_patch_constant_func)
-+                generate_vsir_signature_entry(ctx, program, &program->patch_constant_signature, false, true, var);
-+            else if (is_domain)
-+                generate_vsir_signature_entry(ctx, program, &program->patch_constant_signature, false, false, var);
-+            else
-+                generate_vsir_signature_entry(ctx, program, &program->input_signature, false, false, var);
-+        }
-+        if (var->is_output_semantic)
-+        {
-+            if (is_patch_constant_func)
-+                generate_vsir_signature_entry(ctx, program, &program->patch_constant_signature, true, true, var);
-+            else
-+                generate_vsir_signature_entry(ctx, program, &program->output_signature, true, false, var);
-+        }
-+    }
-+static enum vkd3d_data_type vsir_data_type_from_hlsl_type(struct hlsl_ctx *ctx, const struct hlsl_type *type)
-+    if (hlsl_version_lt(ctx, 4, 0))
-+        return VKD3D_DATA_FLOAT;
-+    if (type->class == HLSL_CLASS_ARRAY)
-+        return vsir_data_type_from_hlsl_type(ctx, type->e.array.type);
-+    if (type->class == HLSL_CLASS_STRUCT)
-+        return VKD3D_DATA_MIXED;
-+    if (type->class <= HLSL_CLASS_LAST_NUMERIC)
-+    {
-+        switch (type->e.numeric.type)
-+        {
-+            case HLSL_TYPE_DOUBLE:
-+                return VKD3D_DATA_DOUBLE;
-+            case HLSL_TYPE_FLOAT:
-+                return VKD3D_DATA_FLOAT;
-+            case HLSL_TYPE_HALF:
-+                return VKD3D_DATA_HALF;
-+            case HLSL_TYPE_INT:
-+                return VKD3D_DATA_INT;
-+            case HLSL_TYPE_UINT:
-+            case HLSL_TYPE_BOOL:
-+                return VKD3D_DATA_UINT;
-+        }
-+    }
-+    vkd3d_unreachable();
-+static enum vkd3d_data_type vsir_data_type_from_hlsl_instruction(struct hlsl_ctx *ctx,
-+        const struct hlsl_ir_node *instr)
-+    return vsir_data_type_from_hlsl_type(ctx, instr->data_type);
-+static uint32_t generate_vsir_get_src_swizzle(uint32_t src_writemask, uint32_t dst_writemask)
-+    uint32_t swizzle;
-+    swizzle = hlsl_swizzle_from_writemask(src_writemask);
-+    swizzle = hlsl_map_swizzle(swizzle, dst_writemask);
-+    swizzle = vsir_swizzle_from_hlsl(swizzle);
-+    return swizzle;
-+static void sm1_generate_vsir_constant_defs(struct hlsl_ctx *ctx, struct vsir_program *program,
-+        struct hlsl_block *block)
-+    struct vkd3d_shader_instruction_array *instructions = &program->instructions;
-+    struct vkd3d_shader_dst_param *dst_param;
-+    struct vkd3d_shader_src_param *src_param;
-+    struct vkd3d_shader_instruction *ins;
-+    unsigned int i, x;
-+    for (i = 0; i < ctx->constant_defs.count; ++i)
-+    {
-+        const struct hlsl_constant_register *constant_reg = &ctx->constant_defs.regs[i];
-+        if (!shader_instruction_array_reserve(instructions, instructions->count + 1))
-+        {
-+            ctx->result = VKD3D_ERROR_OUT_OF_MEMORY;
-+            return;
-+        }
-+        ins = &instructions->elements[instructions->count];
-+        if (!vsir_instruction_init_with_params(program, ins, &constant_reg->loc, VKD3DSIH_DEF, 1, 1))
-+        {
-+            ctx->result = VKD3D_ERROR_OUT_OF_MEMORY;
-+            return;
-+        }
-+        ++instructions->count;
-+        dst_param = &ins->dst[0];
-+        vsir_register_init(&dst_param->reg, VKD3DSPR_CONST, VKD3D_DATA_FLOAT, 1);
-+        ins->dst[0].reg.dimension = VSIR_DIMENSION_VEC4;
-+        ins->dst[0].reg.idx[0].offset = constant_reg->index;
-+        ins->dst[0].write_mask = VKD3DSP_WRITEMASK_ALL;
-+        src_param = &ins->src[0];
-+        vsir_register_init(&src_param->reg, VKD3DSPR_IMMCONST, VKD3D_DATA_FLOAT, 0);
-+        src_param->reg.type = VKD3DSPR_IMMCONST;
-+        src_param->reg.precision = VKD3D_SHADER_REGISTER_PRECISION_DEFAULT;
-+        src_param->reg.non_uniform = false;
-+        src_param->reg.data_type = VKD3D_DATA_FLOAT;
-+        src_param->reg.dimension = VSIR_DIMENSION_VEC4;
-+        for (x = 0; x < 4; ++x)
-+            src_param->reg.u.immconst_f32[x] = constant_reg->value.f[x];
-+        src_param->swizzle = VKD3D_SHADER_NO_SWIZZLE;
-+    }
-+static void sm1_generate_vsir_sampler_dcls(struct hlsl_ctx *ctx,
-+        struct vsir_program *program, struct hlsl_block *block)
-+    struct vkd3d_shader_instruction_array *instructions = &program->instructions;
-+    enum vkd3d_shader_resource_type resource_type;
-+    struct vkd3d_shader_register_range *range;
-+    struct vkd3d_shader_dst_param *dst_param;
-+    struct vkd3d_shader_semantic *semantic;
-+    struct vkd3d_shader_instruction *ins;
-+    enum hlsl_sampler_dim sampler_dim;
-+    struct hlsl_ir_var *var;
-+    unsigned int i, count;
-+    LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry)
-+    {
-+        if (!var->regs[HLSL_REGSET_SAMPLERS].allocated)
-+            continue;
-+        count = var->bind_count[HLSL_REGSET_SAMPLERS];
-+        for (i = 0; i < count; ++i)
-+        {
-+            if (var->objects_usage[HLSL_REGSET_SAMPLERS][i].used)
-+            {
-+                sampler_dim = var->objects_usage[HLSL_REGSET_SAMPLERS][i].sampler_dim;
-+                switch (sampler_dim)
-+                {
-+                    case HLSL_SAMPLER_DIM_2D:
-+                        resource_type = VKD3D_SHADER_RESOURCE_TEXTURE_2D;
-+                        break;
-+                    case HLSL_SAMPLER_DIM_CUBE:
-+                        resource_type = VKD3D_SHADER_RESOURCE_TEXTURE_CUBE;
-+                        break;
-+                    case HLSL_SAMPLER_DIM_3D:
-+                        resource_type = VKD3D_SHADER_RESOURCE_TEXTURE_3D;
-+                        break;
-+                    case HLSL_SAMPLER_DIM_GENERIC:
-+                        /* These can appear in sm4-style combined sample instructions. */
-+                        hlsl_fixme(ctx, &var->loc, "Generic samplers need to be lowered.");
-+                        continue;
-+                    default:
-+                        vkd3d_unreachable();
-+                        break;
-+                }
-+                if (!shader_instruction_array_reserve(instructions, instructions->count + 1))
-+                {
-+                    ctx->result = VKD3D_ERROR_OUT_OF_MEMORY;
-+                    return;
-+                }
-+                ins = &instructions->elements[instructions->count];
-+                if (!vsir_instruction_init_with_params(program, ins, &var->loc, VKD3DSIH_DCL, 0, 0))
-+                {
-+                    ctx->result = VKD3D_ERROR_OUT_OF_MEMORY;
-+                    return;
-+                }
-+                ++instructions->count;
-+                semantic = &ins->declaration.semantic;
-+                semantic->resource_type = resource_type;
-+                dst_param = &semantic->resource.reg;
-+                vsir_register_init(&dst_param->reg, VKD3DSPR_SAMPLER, VKD3D_DATA_FLOAT, 1);
-+                dst_param->reg.dimension = VSIR_DIMENSION_NONE;
-+                dst_param->reg.idx[0].offset = var->regs[HLSL_REGSET_SAMPLERS].index + i;
-+                dst_param->write_mask = 0;
-+                range = &semantic->resource.range;
-+                range->space = 0;
-+                range->first = range->last = dst_param->reg.idx[0].offset;
-+            }
-+        }
-+    }
-+static struct vkd3d_shader_instruction *generate_vsir_add_program_instruction(
-+        struct hlsl_ctx *ctx, struct vsir_program *program,
-+        const struct vkd3d_shader_location *loc, enum vkd3d_shader_opcode opcode,
-+        unsigned int dst_count, unsigned int src_count)
-+    struct vkd3d_shader_instruction_array *instructions = &program->instructions;
-+    struct vkd3d_shader_instruction *ins;
-+    if (!shader_instruction_array_reserve(instructions, instructions->count + 1))
-+    {
-+        ctx->result = VKD3D_ERROR_OUT_OF_MEMORY;
-+        return NULL;
-+    }
-+    ins = &instructions->elements[instructions->count];
-+    if (!vsir_instruction_init_with_params(program, ins, loc, opcode, dst_count, src_count))
-+    {
-+        ctx->result = VKD3D_ERROR_OUT_OF_MEMORY;
-+        return NULL;
-+    }
-+    ++instructions->count;
-+    return ins;
-+static void vsir_src_from_hlsl_constant_value(struct vkd3d_shader_src_param *src,
-+        struct hlsl_ctx *ctx, const struct hlsl_constant_value *value,
-+        enum vkd3d_data_type type, unsigned int width, unsigned int map_writemask)
-+    unsigned int i, j;
-+    vsir_src_param_init(src, VKD3DSPR_IMMCONST, type, 0);
-+    if (width == 1)
-+    {
-+        src->reg.u.immconst_u32[0] = value->u[0].u;
-+        return;
-+    }
-+    src->reg.dimension = VSIR_DIMENSION_VEC4;
-+    for (i = 0, j = 0; i < 4; ++i)
-+    {
-+        if ((map_writemask & (1u << i)) && (j < width))
-+            src->reg.u.immconst_u32[i] = value->u[j++].u;
-+        else
-+            src->reg.u.immconst_u32[i] = 0;
-+    }
-+static void vsir_src_from_hlsl_node(struct vkd3d_shader_src_param *src,
-+        struct hlsl_ctx *ctx, const struct hlsl_ir_node *instr, uint32_t map_writemask)
-+    struct hlsl_ir_constant *constant;
-+    if (hlsl_version_ge(ctx, 4, 0) && instr->type == HLSL_IR_CONSTANT)
-+    {
-+        /* In SM4 constants are inlined */
-+        constant = hlsl_ir_constant(instr);
-+        vsir_src_from_hlsl_constant_value(src, ctx, &constant->value,
-+                vsir_data_type_from_hlsl_instruction(ctx, instr), instr->data_type->dimx, map_writemask);
-+    }
-+    else
-+    {
-+        vsir_register_init(&src->reg, VKD3DSPR_TEMP, vsir_data_type_from_hlsl_instruction(ctx, instr), 1);
-+        src->reg.idx[0].offset = instr->reg.id;
-+        src->reg.dimension = VSIR_DIMENSION_VEC4;
-+        src->swizzle = generate_vsir_get_src_swizzle(instr->reg.writemask, map_writemask);
-+    }
-+static bool sm4_generate_vsir_numeric_reg_from_deref(struct hlsl_ctx *ctx, struct vsir_program *program,
-+        struct vkd3d_shader_register *reg, uint32_t *writemask, const struct hlsl_deref *deref)
-+    const struct hlsl_ir_var *var = deref->var;
-+    unsigned int offset_const_deref;
-+    reg->type = var->indexable ? VKD3DSPR_IDXTEMP : VKD3DSPR_TEMP;
-+    reg->idx[0].offset = var->regs[HLSL_REGSET_NUMERIC].id;
-+    reg->dimension = VSIR_DIMENSION_VEC4;
-+    VKD3D_ASSERT(var->regs[HLSL_REGSET_NUMERIC].allocated);
-+    if (!var->indexable)
-+    {
-+        offset_const_deref = hlsl_offset_from_deref_safe(ctx, deref);
-+        reg->idx[0].offset += offset_const_deref / 4;
-+        reg->idx_count = 1;
-+    }
-+    else
-+    {
-+        offset_const_deref = deref->const_offset;
-+        reg->idx[1].offset = offset_const_deref / 4;
-+        reg->idx_count = 2;
-+        if (deref->rel_offset.node)
-+        {
-+            struct vkd3d_shader_src_param *idx_src;
-+            if (!(idx_src = vsir_program_get_src_params(program, 1)))
-+            {
-+                ctx->result = VKD3D_ERROR_OUT_OF_MEMORY;
-+                return false;
-+            }
-+            memset(idx_src, 0, sizeof(*idx_src));
-+            reg->idx[1].rel_addr = idx_src;
-+            vsir_src_from_hlsl_node(idx_src, ctx, deref->rel_offset.node, VKD3DSP_WRITEMASK_ALL);
-+        }
-+    }
-+    *writemask = 0xf & (0xf << (offset_const_deref % 4));
-+    if (var->regs[HLSL_REGSET_NUMERIC].writemask)
-+        *writemask = hlsl_combine_writemasks(var->regs[HLSL_REGSET_NUMERIC].writemask, *writemask);
-+    return true;
-+static bool sm4_generate_vsir_reg_from_deref(struct hlsl_ctx *ctx, struct vsir_program *program,
-+        struct vkd3d_shader_register *reg, uint32_t *writemask, const struct hlsl_deref *deref)
-+    const struct vkd3d_shader_version *version = &program->shader_version;
-+    const struct hlsl_type *data_type = hlsl_deref_get_type(ctx, deref);
-+    const struct hlsl_ir_var *var = deref->var;
-+    if (var->is_uniform)
-+    {
-+        enum hlsl_regset regset = hlsl_deref_get_regset(ctx, deref);
-+        if (regset == HLSL_REGSET_TEXTURES)
-+        {
-+            reg->type = VKD3DSPR_RESOURCE;
-+            reg->dimension = VSIR_DIMENSION_VEC4;
-+            if (vkd3d_shader_ver_ge(version, 5, 1))
-+            {
-+                reg->idx[0].offset = var->regs[HLSL_REGSET_TEXTURES].id;
-+                reg->idx[1].offset = var->regs[HLSL_REGSET_TEXTURES].index; /* FIXME: array index */
-+                reg->idx_count = 2;
-+            }
-+            else
-+            {
-+                reg->idx[0].offset = var->regs[HLSL_REGSET_TEXTURES].index;
-+                reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref);
-+                reg->idx_count = 1;
-+            }
-+            VKD3D_ASSERT(regset == HLSL_REGSET_TEXTURES);
-+            *writemask = VKD3DSP_WRITEMASK_ALL;
-+        }
-+        else if (regset == HLSL_REGSET_UAVS)
-+        {
-+            reg->type = VKD3DSPR_UAV;
-+            reg->dimension = VSIR_DIMENSION_VEC4;
-+            if (vkd3d_shader_ver_ge(version, 5, 1))
-+            {
-+                reg->idx[0].offset = var->regs[HLSL_REGSET_UAVS].id;
-+                reg->idx[1].offset = var->regs[HLSL_REGSET_UAVS].index; /* FIXME: array index */
-+                reg->idx_count = 2;
-+            }
-+            else
-+            {
-+                reg->idx[0].offset = var->regs[HLSL_REGSET_UAVS].index;
-+                reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref);
-+                reg->idx_count = 1;
-+            }
-+            VKD3D_ASSERT(regset == HLSL_REGSET_UAVS);
-+            *writemask = VKD3DSP_WRITEMASK_ALL;
-+        }
-+        else if (regset == HLSL_REGSET_SAMPLERS)
-+        {
-+            reg->type = VKD3DSPR_SAMPLER;
-+            reg->dimension = VSIR_DIMENSION_NONE;
-+            if (vkd3d_shader_ver_ge(version, 5, 1))
-+            {
-+                reg->idx[0].offset = var->regs[HLSL_REGSET_SAMPLERS].id;
-+                reg->idx[1].offset = var->regs[HLSL_REGSET_SAMPLERS].index; /* FIXME: array index */
-+                reg->idx_count = 2;
-+            }
-+            else
-+            {
-+                reg->idx[0].offset = var->regs[HLSL_REGSET_SAMPLERS].index;
-+                reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref);
-+                reg->idx_count = 1;
-+            }
-+            VKD3D_ASSERT(regset == HLSL_REGSET_SAMPLERS);
-+            *writemask = VKD3DSP_WRITEMASK_ALL;
-+        }
-+        else
-+        {
-+            unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref) + var->buffer_offset;
-+            VKD3D_ASSERT(data_type->class <= HLSL_CLASS_VECTOR);
-+            reg->type = VKD3DSPR_CONSTBUFFER;
-+            reg->dimension = VSIR_DIMENSION_VEC4;
-+            if (vkd3d_shader_ver_ge(version, 5, 1))
-+            {
-+                reg->idx[0].offset = var->buffer->reg.id;
-+                reg->idx[1].offset = var->buffer->reg.index; /* FIXME: array index */
-+                reg->idx[2].offset = offset / 4;
-+                reg->idx_count = 3;
-+            }
-+            else
-+            {
-+                reg->idx[0].offset = var->buffer->reg.index;
-+                reg->idx[1].offset = offset / 4;
-+                reg->idx_count = 2;
-+            }
-+            *writemask = ((1u << data_type->dimx) - 1) << (offset & 3);
-+        }
-+    }
-+    else if (var->is_input_semantic)
-+    {
-+        bool has_idx;
-+        if (sm4_register_from_semantic_name(version, var->semantic.name, false, &reg->type, &has_idx))
-+        {
-+            unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref);
-+            if (has_idx)
-+            {
-+                reg->idx[0].offset = var->semantic.index + offset / 4;
-+                reg->idx_count = 1;
-+            }
-+            if (shader_sm4_is_scalar_register(reg))
-+                reg->dimension = VSIR_DIMENSION_SCALAR;
-+            else
-+                reg->dimension = VSIR_DIMENSION_VEC4;
-+            *writemask = ((1u << data_type->dimx) - 1) << (offset % 4);
-+        }
-+        else
-+        {
-+            struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref);
-+            VKD3D_ASSERT(hlsl_reg.allocated);
-+            if (version->type == VKD3D_SHADER_TYPE_DOMAIN)
-+                reg->type = VKD3DSPR_PATCHCONST;
-+            else
-+                reg->type = VKD3DSPR_INPUT;
-+            reg->dimension = VSIR_DIMENSION_VEC4;
-+            reg->idx[0].offset = hlsl_reg.id;
-+            reg->idx_count = 1;
-+            *writemask = hlsl_reg.writemask;
-+        }
-+    }
-+    else if (var->is_output_semantic)
-+    {
-+        bool has_idx;
-+        if (sm4_register_from_semantic_name(version, var->semantic.name, true, &reg->type, &has_idx))
-+        {
-+            unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref);
-+            if (has_idx)
-+            {
-+                reg->idx[0].offset = var->semantic.index + offset / 4;
-+                reg->idx_count = 1;
-+            }
-+            if (shader_sm4_is_scalar_register(reg))
-+                reg->dimension = VSIR_DIMENSION_SCALAR;
-+            else
-+                reg->dimension = VSIR_DIMENSION_VEC4;
-+            *writemask = ((1u << data_type->dimx) - 1) << (offset % 4);
-+        }
-+        else
-+        {
-+            struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref);
-+            VKD3D_ASSERT(hlsl_reg.allocated);
-+            reg->type = VKD3DSPR_OUTPUT;
-+            reg->dimension = VSIR_DIMENSION_VEC4;
-+            reg->idx[0].offset = hlsl_reg.id;
-+            reg->idx_count = 1;
-+            *writemask = hlsl_reg.writemask;
-+        }
-+    }
-+    else
-+    {
-+        return sm4_generate_vsir_numeric_reg_from_deref(ctx, program, reg, writemask, deref);
-+    }
-+    return true;
-+static bool sm4_generate_vsir_init_src_param_from_deref(struct hlsl_ctx *ctx, struct vsir_program *program,
-+        struct vkd3d_shader_src_param *src_param, const struct hlsl_deref *deref,
-+        unsigned int dst_writemask, const struct vkd3d_shader_location *loc)
-+    uint32_t writemask;
-+    if (!sm4_generate_vsir_reg_from_deref(ctx, program, &src_param->reg, &writemask, deref))
-+        return false;
-+    src_param->swizzle = generate_vsir_get_src_swizzle(writemask, dst_writemask);
-+    return true;
-+static bool sm4_generate_vsir_init_dst_param_from_deref(struct hlsl_ctx *ctx, struct vsir_program *program,
-+        struct vkd3d_shader_dst_param *dst_param, const struct hlsl_deref *deref,
-+        const struct vkd3d_shader_location *loc, unsigned int writemask)
-+    uint32_t reg_writemask;
-+    if (!sm4_generate_vsir_reg_from_deref(ctx, program, &dst_param->reg, &reg_writemask, deref))
-+        return false;
-+    dst_param->write_mask = hlsl_combine_writemasks(reg_writemask, writemask);
-+    return true;
-+static void vsir_dst_from_hlsl_node(struct vkd3d_shader_dst_param *dst,
-+        struct hlsl_ctx *ctx, const struct hlsl_ir_node *instr)
-+    VKD3D_ASSERT(instr->reg.allocated);
-+    vsir_dst_param_init(dst, VKD3DSPR_TEMP, vsir_data_type_from_hlsl_instruction(ctx, instr), 1);
-+    dst->reg.idx[0].offset = instr->reg.id;
-+    dst->reg.dimension = VSIR_DIMENSION_VEC4;
-+    dst->write_mask = instr->reg.writemask;
-+static void sm1_generate_vsir_instr_constant(struct hlsl_ctx *ctx,
-+        struct vsir_program *program, struct hlsl_ir_constant *constant)
-+    struct hlsl_ir_node *instr = &constant->node;
-+    struct vkd3d_shader_dst_param *dst_param;
-+    struct vkd3d_shader_src_param *src_param;
-+    struct vkd3d_shader_instruction *ins;
-+    VKD3D_ASSERT(instr->reg.allocated);
-+    VKD3D_ASSERT(constant->reg.allocated);
-+    if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_MOV, 1, 1)))
-+        return;
-+    src_param = &ins->src[0];
-+    vsir_register_init(&src_param->reg, VKD3DSPR_CONST, VKD3D_DATA_FLOAT, 1);
-+    src_param->reg.idx[0].offset = constant->reg.id;
-+    src_param->swizzle = generate_vsir_get_src_swizzle(constant->reg.writemask, instr->reg.writemask);
-+    dst_param = &ins->dst[0];
-+    vsir_register_init(&dst_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1);
-+    dst_param->reg.idx[0].offset = instr->reg.id;
-+    dst_param->write_mask = instr->reg.writemask;
-+static void sm4_generate_vsir_rasterizer_sample_count(struct hlsl_ctx *ctx,
-+        struct vsir_program *program, struct hlsl_ir_expr *expr)
-+    struct vkd3d_shader_src_param *src_param;
-+    struct hlsl_ir_node *instr = &expr->node;
-+    struct vkd3d_shader_instruction *ins;
-+    if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_SAMPLE_INFO, 1, 1)))
-+        return;
-+    ins->flags = VKD3DSI_SAMPLE_INFO_UINT;
-+    vsir_dst_from_hlsl_node(&ins->dst[0], ctx, instr);
-+    src_param = &ins->src[0];
-+    vsir_src_param_init(src_param, VKD3DSPR_RASTERIZER, VKD3D_DATA_UNUSED, 0);
-+    src_param->reg.dimension = VSIR_DIMENSION_VEC4;
-+    src_param->swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X);
-+/* Translate ops that can be mapped to a single vsir instruction with only one dst register. */
-+static void generate_vsir_instr_expr_single_instr_op(struct hlsl_ctx *ctx,
-+        struct vsir_program *program, struct hlsl_ir_expr *expr, enum vkd3d_shader_opcode opcode,
-+        uint32_t src_mod, uint32_t dst_mod, bool map_src_swizzles)
-+    struct hlsl_ir_node *instr = &expr->node;
-+    struct vkd3d_shader_dst_param *dst_param;
-+    struct vkd3d_shader_src_param *src_param;
-+    struct vkd3d_shader_instruction *ins;
-+    unsigned int i, src_count = 0;
-+    VKD3D_ASSERT(instr->reg.allocated);
-+    for (i = 0; i < HLSL_MAX_OPERANDS; ++i)
-+    {
-+        if (expr->operands[i].node)
-+            src_count = i + 1;
-+    }
-+    VKD3D_ASSERT(!src_mod || src_count == 1);
-+    if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, opcode, 1, src_count)))
-+        return;
-+    dst_param = &ins->dst[0];
-+    vsir_dst_from_hlsl_node(dst_param, ctx, instr);
-+    dst_param->modifiers = dst_mod;
-+    for (i = 0; i < src_count; ++i)
-+    {
-+        struct hlsl_ir_node *operand = expr->operands[i].node;
-+        src_param = &ins->src[i];
-+        vsir_src_from_hlsl_node(src_param, ctx, operand,
-+                map_src_swizzles ? dst_param->write_mask : VKD3DSP_WRITEMASK_ALL);
-+        src_param->modifiers = src_mod;
-+    }
-+/* Translate ops that have 1 src and need one instruction for each component in
-+ * the d3dbc backend. */
-+static void sm1_generate_vsir_instr_expr_per_component_instr_op(struct hlsl_ctx *ctx,
-+        struct vsir_program *program, struct hlsl_ir_expr *expr, enum vkd3d_shader_opcode opcode)
-+    struct hlsl_ir_node *operand = expr->operands[0].node;
-+    struct hlsl_ir_node *instr = &expr->node;
-+    struct vkd3d_shader_dst_param *dst_param;
-+    struct vkd3d_shader_src_param *src_param;
-+    struct vkd3d_shader_instruction *ins;
-+    uint32_t src_swizzle;
-+    unsigned int i, c;
-+    VKD3D_ASSERT(instr->reg.allocated);
-+    VKD3D_ASSERT(operand);
-+    src_swizzle = generate_vsir_get_src_swizzle(operand->reg.writemask, instr->reg.writemask);
-+    for (i = 0; i < 4; ++i)
-+    {
-+        if (instr->reg.writemask & (1u << i))
-+        {
-+            if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, opcode, 1, 1)))
-+                return;
-+            dst_param = &ins->dst[0];
-+            vsir_register_init(&dst_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1);
-+            dst_param->reg.idx[0].offset = instr->reg.id;
-+            dst_param->write_mask = 1u << i;
-+            src_param = &ins->src[0];
-+            vsir_register_init(&src_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1);
-+            src_param->reg.idx[0].offset = operand->reg.id;
-+            c = vsir_swizzle_get_component(src_swizzle, i);
-+            src_param->swizzle = vsir_swizzle_from_writemask(1u << c);
-+        }
-+    }
-+static void sm1_generate_vsir_instr_expr_sincos(struct hlsl_ctx *ctx, struct vsir_program *program,
-+        struct hlsl_ir_expr *expr)
-+    struct hlsl_ir_node *operand = expr->operands[0].node;
-+    struct hlsl_ir_node *instr = &expr->node;
-+    struct vkd3d_shader_dst_param *dst_param;
-+    struct vkd3d_shader_src_param *src_param;
-+    struct vkd3d_shader_instruction *ins;
-+    unsigned int src_count = 0;
-+    VKD3D_ASSERT(instr->reg.allocated);
-+    src_count = (ctx->profile->major_version < 3) ? 3 : 1;
-+    if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_SINCOS, 1, src_count)))
-+        return;
-+    dst_param = &ins->dst[0];
-+    vsir_register_init(&dst_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1);
-+    dst_param->reg.idx[0].offset = instr->reg.id;
-+    dst_param->write_mask = instr->reg.writemask;
-+    src_param = &ins->src[0];
-+    vsir_register_init(&src_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1);
-+    src_param->reg.idx[0].offset = operand->reg.id;
-+    src_param->swizzle = generate_vsir_get_src_swizzle(operand->reg.writemask, VKD3DSP_WRITEMASK_ALL);
-+    if (ctx->profile->major_version < 3)
-+    {
-+        src_param = &ins->src[1];
-+        vsir_register_init(&src_param->reg, VKD3DSPR_CONST, VKD3D_DATA_FLOAT, 1);
-+        src_param->reg.idx[0].offset = ctx->d3dsincosconst1.id;
-+        src_param->swizzle = VKD3D_SHADER_NO_SWIZZLE;
-+        src_param = &ins->src[1];
-+        vsir_register_init(&src_param->reg, VKD3DSPR_CONST, VKD3D_DATA_FLOAT, 1);
-+        src_param->reg.idx[0].offset = ctx->d3dsincosconst2.id;
-+        src_param->swizzle = VKD3D_SHADER_NO_SWIZZLE;
-+    }
-+static bool sm1_generate_vsir_instr_expr_cast(struct hlsl_ctx *ctx,
-+        struct vsir_program *program, struct hlsl_ir_expr *expr)
-+    const struct hlsl_type *src_type, *dst_type;
-+    const struct hlsl_ir_node *arg1, *instr;
-+    arg1 = expr->operands[0].node;
-+    src_type = arg1->data_type;
-+    instr = &expr->node;
-+    dst_type = instr->data_type;
-+    /* Narrowing casts were already lowered. */
-+    VKD3D_ASSERT(src_type->dimx == dst_type->dimx);
-+    switch (dst_type->e.numeric.type)
-+    {
-+        case HLSL_TYPE_HALF:
-+        case HLSL_TYPE_FLOAT:
-+            switch (src_type->e.numeric.type)
-+            {
-+                case HLSL_TYPE_INT:
-+                case HLSL_TYPE_UINT:
-+                case HLSL_TYPE_BOOL:
-+                    /* Integrals are internally represented as floats, so no change is necessary.*/
-+                case HLSL_TYPE_HALF:
-+                case HLSL_TYPE_FLOAT:
-+                    generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, 0, true);
-+                    return true;
-+                case HLSL_TYPE_DOUBLE:
-+                    if (ctx->double_as_float_alias)
-+                    {
-+                        generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, 0, true);
-+                        return true;
-+                    }
-+                    hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE,
-+                            "The 'double' type is not supported for the %s profile.", ctx->profile->name);
-+                    break;
-+                default:
-+                    vkd3d_unreachable();
-+            }
-+            break;
-+        case HLSL_TYPE_INT:
-+        case HLSL_TYPE_UINT:
-+            switch(src_type->e.numeric.type)
-+            {
-+                case HLSL_TYPE_HALF:
-+                case HLSL_TYPE_FLOAT:
-+                    /* A compilation pass turns these into FLOOR+REINTERPRET, so we should not
-+                     * reach this case unless we are missing something. */
-+                    hlsl_fixme(ctx, &instr->loc, "Unlowered SM1 cast from float to integer.");
-+                    break;
-+                case HLSL_TYPE_INT:
-+                case HLSL_TYPE_UINT:
-+                case HLSL_TYPE_BOOL:
-+                    generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, 0, true);
-+                    return true;
-+                case HLSL_TYPE_DOUBLE:
-+                    hlsl_fixme(ctx, &instr->loc, "SM1 cast from double to integer.");
-+                    break;
-+                default:
-+                    vkd3d_unreachable();
-+            }
-+            break;
-+        case HLSL_TYPE_DOUBLE:
-+            switch (src_type->e.numeric.type)
-+            {
-+                case HLSL_TYPE_FLOAT:
-+                    if (ctx->double_as_float_alias)
-+                    {
-+                        generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, 0, true);
-+                        return true;
-+                    }
-+                    hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE,
-+                            "The 'double' type is not supported for the %s profile.", ctx->profile->name);
-+                    break;
-+                default:
-+                    hlsl_fixme(ctx, &instr->loc, "SM1 cast to double.");
-+                    break;
-+            }
-+            break;
-+        case HLSL_TYPE_BOOL:
-+            /* Casts to bool should have already been lowered. */
-+        default:
-+            hlsl_fixme(ctx, &expr->node.loc, "SM1 cast from %s to %s.",
-+                debug_hlsl_type(ctx, src_type), debug_hlsl_type(ctx, dst_type));
-+            break;
-+    }
-+    return false;
-+static bool sm1_generate_vsir_instr_expr(struct hlsl_ctx *ctx, struct vsir_program *program,
-+        struct hlsl_ir_expr *expr)
-+    struct hlsl_ir_node *instr = &expr->node;
-+    if (expr->op != HLSL_OP1_REINTERPRET && expr->op != HLSL_OP1_CAST
-+            && instr->data_type->e.numeric.type != HLSL_TYPE_FLOAT)
-+    {
-+        /* These need to be lowered. */
-+        hlsl_fixme(ctx, &instr->loc, "SM1 non-float expression.");
-+        return false;
-+    }
-+    switch (expr->op)
-+    {
-+        case HLSL_OP1_ABS:
-+            generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_ABS, 0, 0, true);
-+            break;
-+        case HLSL_OP1_CAST:
-+            return sm1_generate_vsir_instr_expr_cast(ctx, program, expr);
-+        case HLSL_OP1_COS_REDUCED:
-+            VKD3D_ASSERT(expr->node.reg.writemask == VKD3DSP_WRITEMASK_0);
-+            sm1_generate_vsir_instr_expr_sincos(ctx, program, expr);
-+            break;
-+        case HLSL_OP1_DSX:
-+            generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DSX, 0, 0, true);
-+            break;
-+        case HLSL_OP1_DSY:
-+            generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DSY, 0, 0, true);
-+            break;
-+        case HLSL_OP1_EXP2:
-+            sm1_generate_vsir_instr_expr_per_component_instr_op(ctx, program, expr, VKD3DSIH_EXP);
-+            break;
-+        case HLSL_OP1_LOG2:
-+            sm1_generate_vsir_instr_expr_per_component_instr_op(ctx, program, expr, VKD3DSIH_LOG);
-+            break;
-+        case HLSL_OP1_NEG:
-+            generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, VKD3DSPSM_NEG, 0, true);
-+            break;
-+        case HLSL_OP1_RCP:
-+            sm1_generate_vsir_instr_expr_per_component_instr_op(ctx, program, expr, VKD3DSIH_RCP);
-+            break;
-+        case HLSL_OP1_REINTERPRET:
-+            generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, 0, true);
-+            break;
-+        case HLSL_OP1_RSQ:
-+            sm1_generate_vsir_instr_expr_per_component_instr_op(ctx, program, expr, VKD3DSIH_RSQ);
-+            break;
-+        case HLSL_OP1_SAT:
-+            generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, VKD3DSPDM_SATURATE, true);
-+            break;
-+        case HLSL_OP1_SIN_REDUCED:
-+            VKD3D_ASSERT(expr->node.reg.writemask == VKD3DSP_WRITEMASK_1);
-+            sm1_generate_vsir_instr_expr_sincos(ctx, program, expr);
-+            break;
-+        case HLSL_OP2_ADD:
-+            generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_ADD, 0, 0, true);
-+            break;
-+        case HLSL_OP2_DOT:
-+            switch (expr->operands[0].node->data_type->dimx)
-+            {
-+                case 3:
-+                    generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DP3, 0, 0, false);
-+                    break;
-+                case 4:
-+                    generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DP4, 0, 0, false);
-+                    break;
-+                default:
-+                    vkd3d_unreachable();
-+                    return false;
-+            }
-+            break;
-+        case HLSL_OP2_MAX:
-+            generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MAX, 0, 0, true);
-+            break;
-+        case HLSL_OP2_MIN:
-+            generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MIN, 0, 0, true);
-+            break;
-+        case HLSL_OP2_MUL:
-+            generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MUL, 0, 0, true);
-+            break;
-+        case HLSL_OP1_FRACT:
-+            generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_FRC, 0, 0, true);
-+            break;
-+        case HLSL_OP2_LOGIC_AND:
-+            generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MIN, 0, 0, true);
-+            break;
-+        case HLSL_OP2_LOGIC_OR:
-+            generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MAX, 0, 0, true);
-+            break;
-+        case HLSL_OP2_SLT:
-+            generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_SLT, 0, 0, true);
-+            break;
-+        case HLSL_OP3_CMP:
-+            generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_CMP, 0, 0, true);
-+            break;
-+        case HLSL_OP3_DP2ADD:
-+            generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DP2ADD, 0, 0, false);
-+            break;
-+        case HLSL_OP3_MAD:
-+            generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MAD, 0, 0, true);
-+            break;
-+        default:
-+            hlsl_fixme(ctx, &instr->loc, "SM1 \"%s\" expression.", debug_hlsl_expr_op(expr->op));
-+            return false;
-+    }
-+    return true;
-+static void sm1_generate_vsir_init_dst_param_from_deref(struct hlsl_ctx *ctx,
-+        struct vkd3d_shader_dst_param *dst_param, struct hlsl_deref *deref,
-+        const struct vkd3d_shader_location *loc, unsigned int writemask)
-+    enum vkd3d_shader_register_type type = VKD3DSPR_TEMP;
-+    struct vkd3d_shader_version version;
-+    uint32_t register_index;
-+    struct hlsl_reg reg;
-+    reg = hlsl_reg_from_deref(ctx, deref);
-+    register_index = reg.id;
-+    writemask = hlsl_combine_writemasks(reg.writemask, writemask);
-+    if (deref->var->is_output_semantic)
-+    {
-+        const char *semantic_name = deref->var->semantic.name;
-+        version.major = ctx->profile->major_version;
-+        version.minor = ctx->profile->minor_version;
-+        version.type = ctx->profile->type;
-+        if (version.type == VKD3D_SHADER_TYPE_PIXEL && version.major == 1)
-+        {
-+            type = VKD3DSPR_TEMP;
-+            register_index = 0;
-+        }
-+        else if (!sm1_register_from_semantic_name(&version, semantic_name,
-+                deref->var->semantic.index, true, &type, &register_index))
-+        {
-+            VKD3D_ASSERT(reg.allocated);
-+            type = VKD3DSPR_OUTPUT;
-+            register_index = reg.id;
-+        }
-+        else
-+            writemask = (1u << deref->var->data_type->dimx) - 1;
-+        if (version.type == VKD3D_SHADER_TYPE_PIXEL && (!ascii_strcasecmp(semantic_name, "PSIZE")
-+                || (!ascii_strcasecmp(semantic_name, "FOG") && version.major < 3)))
-+        {
-+            /* These are always 1-component, but for some reason are written
-+             * with a writemask containing all components. */
-+            writemask = VKD3DSP_WRITEMASK_ALL;
-+        }
-+    }
-+    else
-+        VKD3D_ASSERT(reg.allocated);
-+    vsir_register_init(&dst_param->reg, type, VKD3D_DATA_FLOAT, 1);
-+    dst_param->write_mask = writemask;
-+    dst_param->reg.idx[0].offset = register_index;
-+    if (deref->rel_offset.node)
-+        hlsl_fixme(ctx, loc, "Translate relative addressing on dst register for vsir.");
-+static void sm1_generate_vsir_init_src_param_from_deref(struct hlsl_ctx *ctx,
-+        struct vkd3d_shader_src_param *src_param, struct hlsl_deref *deref,
-+        unsigned int dst_writemask, const struct vkd3d_shader_location *loc)
-+    enum vkd3d_shader_register_type type = VKD3DSPR_TEMP;
-+    struct vkd3d_shader_version version;
-+    uint32_t register_index;
-+    unsigned int writemask;
-+    struct hlsl_reg reg;
-+    if (hlsl_type_is_resource(deref->var->data_type))
-+    {
-+        unsigned int sampler_offset;
-+        sampler_offset = hlsl_offset_from_deref_safe(ctx, deref);
-+        register_index = deref->var->regs[HLSL_REGSET_SAMPLERS].index + sampler_offset;
-+        writemask = VKD3DSP_WRITEMASK_ALL;
-+    }
-+    else if (deref->var->is_uniform)
-+    {
-+        type = VKD3DSPR_CONST;
-+        reg = hlsl_reg_from_deref(ctx, deref);
-+        register_index = reg.id;
-+        writemask = reg.writemask;
-+        VKD3D_ASSERT(reg.allocated);
-+    }
-+    else if (deref->var->is_input_semantic)
-+    {
-+        version.major = ctx->profile->major_version;
-+        version.minor = ctx->profile->minor_version;
-+        version.type = ctx->profile->type;
-+        if (sm1_register_from_semantic_name(&version, deref->var->semantic.name,
-+                deref->var->semantic.index, false, &type, &register_index))
-+        {
-+            writemask = (1 << deref->var->data_type->dimx) - 1;
-+        }
-+        else
-+        {
-+            type = VKD3DSPR_INPUT;
-+            reg = hlsl_reg_from_deref(ctx, deref);
-+            register_index = reg.id;
-+            writemask = reg.writemask;
-+            VKD3D_ASSERT(reg.allocated);
-+        }
-+    }
-+    else
-+    {
-+        type = VKD3DSPR_TEMP;
-+        reg = hlsl_reg_from_deref(ctx, deref);
-+        register_index = reg.id;
-+        writemask = reg.writemask;
-+    }
-+    vsir_register_init(&src_param->reg, type, VKD3D_DATA_FLOAT, 1);
-+    src_param->reg.idx[0].offset = register_index;
-+    src_param->swizzle = generate_vsir_get_src_swizzle(writemask, dst_writemask);
-+    if (deref->rel_offset.node)
-+        hlsl_fixme(ctx, loc, "Translate relative addressing on src register for vsir.");
-+static void sm1_generate_vsir_instr_load(struct hlsl_ctx *ctx, struct vsir_program *program,
-+        struct hlsl_ir_load *load)
-+    struct hlsl_ir_node *instr = &load->node;
-+    struct vkd3d_shader_dst_param *dst_param;
-+    struct vkd3d_shader_instruction *ins;
-+    VKD3D_ASSERT(instr->reg.allocated);
-+    if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_MOV, 1, 1)))
-+        return;
-+    dst_param = &ins->dst[0];
-+    vsir_register_init(&dst_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1);
-+    dst_param->reg.idx[0].offset = instr->reg.id;
-+    dst_param->write_mask = instr->reg.writemask;
-+    sm1_generate_vsir_init_src_param_from_deref(ctx, &ins->src[0], &load->src, dst_param->write_mask,
-+            &ins->location);
-+static void sm1_generate_vsir_instr_resource_load(struct hlsl_ctx *ctx,
-+        struct vsir_program *program, struct hlsl_ir_resource_load *load)
-+    struct hlsl_ir_node *coords = load->coords.node;
-+    struct hlsl_ir_node *ddx = load->ddx.node;
-+    struct hlsl_ir_node *ddy = load->ddy.node;
-+    struct hlsl_ir_node *instr = &load->node;
-+    struct vkd3d_shader_dst_param *dst_param;
-+    struct vkd3d_shader_src_param *src_param;
-+    struct vkd3d_shader_instruction *ins;
-+    enum vkd3d_shader_opcode opcode;
-+    unsigned int src_count = 2;
-+    uint32_t flags = 0;
-+    VKD3D_ASSERT(instr->reg.allocated);
-+    switch (load->load_type)
-+    {
-+        case HLSL_RESOURCE_SAMPLE:
-+            opcode = VKD3DSIH_TEX;
-+            break;
-+            opcode = VKD3DSIH_TEX;
-+            flags |= VKD3DSI_TEXLD_PROJECT;
-+            break;
-+            opcode = VKD3DSIH_TEX;
-+            flags |= VKD3DSI_TEXLD_BIAS;
-+            break;
-+            opcode = VKD3DSIH_TEXLDD;
-+            src_count += 2;
-+            break;
-+        default:
-+            hlsl_fixme(ctx, &instr->loc, "Resource load type %u.", load->load_type);
-+            return;
-+    }
-+    if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, opcode, 1, src_count)))
-+        return;
-+    ins->flags = flags;
-+    dst_param = &ins->dst[0];
-+    vsir_register_init(&dst_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1);
-+    dst_param->reg.idx[0].offset = instr->reg.id;
-+    dst_param->write_mask = instr->reg.writemask;
-+    src_param = &ins->src[0];
-+    vsir_src_from_hlsl_node(src_param, ctx, coords, VKD3DSP_WRITEMASK_ALL);
-+    sm1_generate_vsir_init_src_param_from_deref(ctx, &ins->src[1], &load->resource,
-+            VKD3DSP_WRITEMASK_ALL, &ins->location);
-+    if (load->load_type == HLSL_RESOURCE_SAMPLE_GRAD)
-+    {
-+        src_param = &ins->src[2];
-+        vsir_src_from_hlsl_node(src_param, ctx, ddx, VKD3DSP_WRITEMASK_ALL);
-+        src_param = &ins->src[3];
-+        vsir_src_from_hlsl_node(src_param, ctx, ddy, VKD3DSP_WRITEMASK_ALL);
-+    }
-+static void generate_vsir_instr_swizzle(struct hlsl_ctx *ctx,
-+        struct vsir_program *program, struct hlsl_ir_swizzle *swizzle_instr)
-+    struct hlsl_ir_node *instr = &swizzle_instr->node, *val = swizzle_instr->val.node;
-+    struct vkd3d_shader_dst_param *dst_param;
-+    struct vkd3d_shader_src_param *src_param;
-+    struct vkd3d_shader_instruction *ins;
-+    uint32_t swizzle;
-+    VKD3D_ASSERT(instr->reg.allocated);
-+    if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_MOV, 1, 1)))
-+        return;
-+    dst_param = &ins->dst[0];
-+    vsir_register_init(&dst_param->reg, VKD3DSPR_TEMP, vsir_data_type_from_hlsl_instruction(ctx, instr), 1);
-+    dst_param->reg.idx[0].offset = instr->reg.id;
-+    dst_param->reg.dimension = VSIR_DIMENSION_VEC4;
-+    dst_param->write_mask = instr->reg.writemask;
-+    swizzle = hlsl_swizzle_from_writemask(val->reg.writemask);
-+    swizzle = hlsl_combine_swizzles(swizzle, swizzle_instr->swizzle, instr->data_type->dimx);
-+    swizzle = hlsl_map_swizzle(swizzle, ins->dst[0].write_mask);
-+    swizzle = vsir_swizzle_from_hlsl(swizzle);
-+    src_param = &ins->src[0];
-+    VKD3D_ASSERT(val->type != HLSL_IR_CONSTANT);
-+    vsir_register_init(&src_param->reg, VKD3DSPR_TEMP, vsir_data_type_from_hlsl_instruction(ctx, val), 1);
-+    src_param->reg.idx[0].offset = val->reg.id;
-+    src_param->reg.dimension = VSIR_DIMENSION_VEC4;
-+    src_param->swizzle = swizzle;
-+static void sm1_generate_vsir_instr_store(struct hlsl_ctx *ctx, struct vsir_program *program,
-+        struct hlsl_ir_store *store)
-+    struct hlsl_ir_node *rhs = store->rhs.node;
-+    struct hlsl_ir_node *instr = &store->node;
-+    struct vkd3d_shader_instruction *ins;
-+    struct vkd3d_shader_src_param *src_param;
-+    if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_MOV, 1, 1)))
-+        return;
-+    sm1_generate_vsir_init_dst_param_from_deref(ctx, &ins->dst[0], &store->lhs, &ins->location, store->writemask);
-+    src_param = &ins->src[0];
-+    vsir_src_from_hlsl_node(src_param, ctx, rhs, ins->dst[0].write_mask);
-+static void sm1_generate_vsir_instr_jump(struct hlsl_ctx *ctx,
-+        struct vsir_program *program, struct hlsl_ir_jump *jump)
-+    struct hlsl_ir_node *condition = jump->condition.node;
-+    struct hlsl_ir_node *instr = &jump->node;
-+    struct vkd3d_shader_dst_param *dst_param;
-+    struct vkd3d_shader_instruction *ins;
-+    if (jump->type == HLSL_IR_JUMP_DISCARD_NEG)
-+    {
-+        if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_TEXKILL, 1, 0)))
-+            return;
-+        dst_param = &ins->dst[0];
-+        vsir_register_init(&dst_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1);
-+        dst_param->reg.idx[0].offset = condition->reg.id;
-+        dst_param->write_mask = condition->reg.writemask;
-+    }
-+    else
-+    {
-+        hlsl_fixme(ctx, &instr->loc, "Jump type %s.", hlsl_jump_type_to_string(jump->type));
-+    }
-+static void sm1_generate_vsir_block(struct hlsl_ctx *ctx, struct hlsl_block *block, struct vsir_program *program);
-+static void sm1_generate_vsir_instr_if(struct hlsl_ctx *ctx, struct vsir_program *program, struct hlsl_ir_if *iff)
-+    struct hlsl_ir_node *condition = iff->condition.node;
-+    struct vkd3d_shader_src_param *src_param;
-+    struct hlsl_ir_node *instr = &iff->node;
-+    struct vkd3d_shader_instruction *ins;
-+    if (hlsl_version_lt(ctx, 2, 1))
-+    {
-+        hlsl_fixme(ctx, &instr->loc, "Flatten \"if\" conditionals branches.");
-+        return;
-+    }
-+    VKD3D_ASSERT(condition->data_type->dimx == 1 && condition->data_type->dimy == 1);
-+    if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_IFC, 0, 2)))
-+        return;
-+    ins->flags = VKD3D_SHADER_REL_OP_NE;
-+    src_param = &ins->src[0];
-+    vsir_src_from_hlsl_node(src_param, ctx, condition, VKD3DSP_WRITEMASK_ALL);
-+    src_param->modifiers = 0;
-+    src_param = &ins->src[1];
-+    vsir_src_from_hlsl_node(src_param, ctx, condition, VKD3DSP_WRITEMASK_ALL);
-+    src_param->modifiers = VKD3DSPSM_NEG;
-+    sm1_generate_vsir_block(ctx, &iff->then_block, program);
-+    if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_ELSE, 0, 0)))
-+        return;
-+    sm1_generate_vsir_block(ctx, &iff->else_block, program);
-+    if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_ENDIF, 0, 0)))
-+        return;
-+static void sm1_generate_vsir_block(struct hlsl_ctx *ctx, struct hlsl_block *block, struct vsir_program *program)
-+    struct hlsl_ir_node *instr, *next;
-+    LIST_FOR_EACH_ENTRY_SAFE(instr, next, &block->instrs, struct hlsl_ir_node, entry)
-+    {
-+        if (instr->data_type)
-+        {
-+            if (instr->data_type->class != HLSL_CLASS_SCALAR && instr->data_type->class != HLSL_CLASS_VECTOR)
-+            {
-+                hlsl_fixme(ctx, &instr->loc, "Class %#x should have been lowered or removed.", instr->data_type->class);
-+                break;
-+            }
-+        }
-+        switch (instr->type)
-+        {
-+            case HLSL_IR_CALL:
-+                vkd3d_unreachable();
-+            case HLSL_IR_CONSTANT:
-+                sm1_generate_vsir_instr_constant(ctx, program, hlsl_ir_constant(instr));
-+                break;
-+            case HLSL_IR_EXPR:
-+                sm1_generate_vsir_instr_expr(ctx, program, hlsl_ir_expr(instr));
-+                break;
-+            case HLSL_IR_IF:
-+                sm1_generate_vsir_instr_if(ctx, program, hlsl_ir_if(instr));
-+                break;
-+            case HLSL_IR_JUMP:
-+                sm1_generate_vsir_instr_jump(ctx, program, hlsl_ir_jump(instr));
-+                break;
-+            case HLSL_IR_LOAD:
-+                sm1_generate_vsir_instr_load(ctx, program, hlsl_ir_load(instr));
-+                break;
-+            case HLSL_IR_RESOURCE_LOAD:
-+                sm1_generate_vsir_instr_resource_load(ctx, program, hlsl_ir_resource_load(instr));
-+                break;
-+            case HLSL_IR_STORE:
-+                sm1_generate_vsir_instr_store(ctx, program, hlsl_ir_store(instr));
-+                break;
-+            case HLSL_IR_SWIZZLE:
-+                generate_vsir_instr_swizzle(ctx, program, hlsl_ir_swizzle(instr));
-+                break;
-+            default:
-+                hlsl_fixme(ctx, &instr->loc, "Instruction type %s.", hlsl_node_type_to_string(instr->type));
-+                break;
-+        }
-+    }
-+static void sm1_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func,
-+        uint64_t config_flags, struct vsir_program *program, struct vkd3d_shader_code *ctab)
-+    struct vkd3d_shader_version version = {0};
-+    struct vkd3d_bytecode_buffer buffer = {0};
-+    struct hlsl_block block;
-+    version.major = ctx->profile->major_version;
-+    version.minor = ctx->profile->minor_version;
-+    version.type = ctx->profile->type;
-+    if (!vsir_program_init(program, NULL, &version, 0, VSIR_CF_STRUCTURED, VSIR_NOT_NORMALISED))
-+    {
-+        ctx->result = VKD3D_ERROR_OUT_OF_MEMORY;
-+        return;
-+    }
-+    write_sm1_uniforms(ctx, &buffer);
-+    if (buffer.status)
-+    {
-+        vkd3d_free(buffer.data);
-+        ctx->result = buffer.status;
-+        return;
-+    }
-+    ctab->code = buffer.data;
-+    ctab->size = buffer.size;
-+    generate_vsir_signature(ctx, program, entry_func);
-+    hlsl_block_init(&block);
-+    sm1_generate_vsir_constant_defs(ctx, program, &block);
-+    sm1_generate_vsir_sampler_dcls(ctx, program, &block);
-+    list_move_head(&entry_func->body.instrs, &block.instrs);
-+    sm1_generate_vsir_block(ctx, &entry_func->body, program);
-+static void add_last_vsir_instr_to_block(struct hlsl_ctx *ctx, struct vsir_program *program, struct hlsl_block *block)
-+    struct vkd3d_shader_location *loc;
-+    struct hlsl_ir_node *vsir_instr;
-+    loc = &program->instructions.elements[program->instructions.count - 1].location;
-+    if (!(vsir_instr = hlsl_new_vsir_instruction_ref(ctx, program->instructions.count - 1, NULL, NULL, loc)))
-+    {
-+        ctx->result = VKD3D_ERROR_OUT_OF_MEMORY;
-+        return;
-+    }
-+    hlsl_block_add_instr(block, vsir_instr);
-+static void replace_instr_with_last_vsir_instr(struct hlsl_ctx *ctx,
-+        struct vsir_program *program, struct hlsl_ir_node *instr)
-+    struct vkd3d_shader_location *loc;
-+    struct hlsl_ir_node *vsir_instr;
-+    loc = &program->instructions.elements[program->instructions.count - 1].location;
-+    if (!(vsir_instr = hlsl_new_vsir_instruction_ref(ctx,
-+            program->instructions.count - 1, instr->data_type, &instr->reg, loc)))
-+    {
-+        ctx->result = VKD3D_ERROR_OUT_OF_MEMORY;
-+        return;
-+    }
-+    list_add_before(&instr->entry, &vsir_instr->entry);
-+    hlsl_replace_node(instr, vsir_instr);
-+static void sm4_generate_vsir_instr_dcl_semantic(struct hlsl_ctx *ctx, struct vsir_program *program,
-+        const struct hlsl_ir_var *var, bool is_patch_constant_func, struct hlsl_block *block,
-+        const struct vkd3d_shader_location *loc)
-+    const struct vkd3d_shader_version *version = &program->shader_version;
-+    const bool output = var->is_output_semantic;
-+    enum vkd3d_shader_sysval_semantic semantic;
-+    struct vkd3d_shader_dst_param *dst_param;
-+    struct vkd3d_shader_instruction *ins;
-+    enum vkd3d_shader_register_type type;
-+    enum vkd3d_shader_opcode opcode;
-+    unsigned int idx = 0;
-+    uint32_t write_mask;
-+    bool has_idx;
-+    sm4_sysval_semantic_from_semantic_name(&semantic, version, ctx->semantic_compat_mapping,
-+            ctx->domain, var->semantic.name, var->semantic.index, output, is_patch_constant_func);
-+    if (semantic == ~0u)
-+        semantic = VKD3D_SHADER_SV_NONE;
-+    if (var->is_input_semantic)
-+    {
-+        switch (semantic)
-+        {
-+            case VKD3D_SHADER_SV_NONE:
-+                opcode = (version->type == VKD3D_SHADER_TYPE_PIXEL)
-+                        ? VKD3DSIH_DCL_INPUT_PS : VKD3DSIH_DCL_INPUT;
-+                break;
-+            case VKD3D_SHADER_SV_INSTANCE_ID:
-+            case VKD3D_SHADER_SV_IS_FRONT_FACE:
-+            case VKD3D_SHADER_SV_PRIMITIVE_ID:
-+            case VKD3D_SHADER_SV_SAMPLE_INDEX:
-+            case VKD3D_SHADER_SV_VERTEX_ID:
-+                opcode = (version->type == VKD3D_SHADER_TYPE_PIXEL)
-+                        ? VKD3DSIH_DCL_INPUT_PS_SGV : VKD3DSIH_DCL_INPUT_SGV;
-+                break;
-+            default:
-+                opcode = (version->type == VKD3D_SHADER_TYPE_PIXEL)
-+                        ? VKD3DSIH_DCL_INPUT_PS_SIV : VKD3DSIH_DCL_INPUT_SIV;
-+                break;
-+        }
-+    }
-+    else
-+    {
-+        if (semantic == VKD3D_SHADER_SV_NONE || version->type == VKD3D_SHADER_TYPE_PIXEL)
-+            opcode = VKD3DSIH_DCL_OUTPUT;
-+        else
-+            opcode = VKD3DSIH_DCL_OUTPUT_SIV;
-+    }
-+    if (sm4_register_from_semantic_name(version, var->semantic.name, output, &type, &has_idx))
-+    {
-+        if (has_idx)
-+            idx = var->semantic.index;
-+        write_mask = (1u << var->data_type->dimx) - 1;
-+    }
-+    else
-+    {
-+        if (output)
-+            type = VKD3DSPR_OUTPUT;
-+        else if (version->type == VKD3D_SHADER_TYPE_DOMAIN)
-+            type = VKD3DSPR_PATCHCONST;
-+        else
-+            type = VKD3DSPR_INPUT;
-+        has_idx = true;
-+        idx = var->regs[HLSL_REGSET_NUMERIC].id;
-+        write_mask = var->regs[HLSL_REGSET_NUMERIC].writemask;
-+    }
-+    if (!(ins = generate_vsir_add_program_instruction(ctx, program, loc, opcode, 0, 0)))
-+        return;
-+    if (opcode == VKD3DSIH_DCL_OUTPUT)
-+    {
-+        VKD3D_ASSERT(semantic == VKD3D_SHADER_SV_NONE
-+                || semantic == VKD3D_SHADER_SV_TARGET || type != VKD3DSPR_OUTPUT);
-+        dst_param = &ins->declaration.dst;
-+    }
-+    else if (opcode == VKD3DSIH_DCL_INPUT || opcode == VKD3DSIH_DCL_INPUT_PS)
-+    {
-+        VKD3D_ASSERT(semantic == VKD3D_SHADER_SV_NONE);
-+        dst_param = &ins->declaration.dst;
-+    }
-+    else
-+    {
-+        VKD3D_ASSERT(semantic != VKD3D_SHADER_SV_NONE);
-+        ins->declaration.register_semantic.sysval_semantic = vkd3d_siv_from_sysval_indexed(semantic,
-+                var->semantic.index);
-+        dst_param = &ins->declaration.register_semantic.reg;
-+    }
-+    if (has_idx)
-+    {
-+        vsir_register_init(&dst_param->reg, type, VKD3D_DATA_FLOAT, 1);
-+        dst_param->reg.idx[0].offset = idx;
-+    }
-+    else
-+    {
-+        vsir_register_init(&dst_param->reg, type, VKD3D_DATA_FLOAT, 0);
-+    }
-+    if (shader_sm4_is_scalar_register(&dst_param->reg))
-+        dst_param->reg.dimension = VSIR_DIMENSION_SCALAR;
-+    else
-+        dst_param->reg.dimension = VSIR_DIMENSION_VEC4;
-+    dst_param->write_mask = write_mask;
-+    if (var->is_input_semantic && version->type == VKD3D_SHADER_TYPE_PIXEL)
-+        ins->flags = sm4_get_interpolation_mode(var->data_type, var->storage_modifiers);
-+    add_last_vsir_instr_to_block(ctx, program, block);
-+static void sm4_generate_vsir_instr_dcl_temps(struct hlsl_ctx *ctx, struct vsir_program *program,
-+        uint32_t temp_count, struct hlsl_block *block, const struct vkd3d_shader_location *loc)
-+    struct vkd3d_shader_instruction *ins;
-+    if (!(ins = generate_vsir_add_program_instruction(ctx, program, loc, VKD3DSIH_DCL_TEMPS, 0, 0)))
-+        return;
-+    ins->declaration.count = temp_count;
-+    add_last_vsir_instr_to_block(ctx, program, block);
-+static void sm4_generate_vsir_instr_dcl_indexable_temp(struct hlsl_ctx *ctx,
-+        struct vsir_program *program, struct hlsl_block *block, uint32_t idx,
-+        uint32_t size, uint32_t comp_count, const struct vkd3d_shader_location *loc)
-+    struct vkd3d_shader_instruction *ins;
-+    if (!(ins = generate_vsir_add_program_instruction(ctx, program, loc, VKD3DSIH_DCL_INDEXABLE_TEMP, 0, 0)))
-+        return;
-+    ins->declaration.indexable_temp.register_idx = idx;
-+    ins->declaration.indexable_temp.register_size = size;
-+    ins->declaration.indexable_temp.alignment = 0;
-+    ins->declaration.indexable_temp.data_type = VKD3D_DATA_FLOAT;
-+    ins->declaration.indexable_temp.component_count = comp_count;
-+    ins->declaration.indexable_temp.has_function_scope = false;
-+    add_last_vsir_instr_to_block(ctx, program, block);
-+static bool type_is_float(const struct hlsl_type *type)
-+    return type->e.numeric.type == HLSL_TYPE_FLOAT || type->e.numeric.type == HLSL_TYPE_HALF;
-+static bool type_is_integer(const struct hlsl_type *type)
-+    return type->e.numeric.type == HLSL_TYPE_BOOL
-+            || type->e.numeric.type == HLSL_TYPE_INT
-+            || type->e.numeric.type == HLSL_TYPE_UINT;
-+static void sm4_generate_vsir_cast_from_bool(struct hlsl_ctx *ctx, struct vsir_program *program,
-+        const struct hlsl_ir_expr *expr, uint32_t bits)
-+    struct hlsl_ir_node *operand = expr->operands[0].node;
-+    const struct hlsl_ir_node *instr = &expr->node;
-+    struct vkd3d_shader_dst_param *dst_param;
-+    struct hlsl_constant_value value = {0};
-+    struct vkd3d_shader_instruction *ins;
-+    VKD3D_ASSERT(instr->reg.allocated);
-+    if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_AND, 1, 2)))
-+        return;
-+    dst_param = &ins->dst[0];
-+    vsir_dst_from_hlsl_node(dst_param, ctx, instr);
-+    vsir_src_from_hlsl_node(&ins->src[0], ctx, operand, dst_param->write_mask);
-+    value.u[0].u = bits;
-+    vsir_src_from_hlsl_constant_value(&ins->src[1], ctx, &value, VKD3D_DATA_UINT, 1, 0);
-+static bool sm4_generate_vsir_instr_expr_cast(struct hlsl_ctx *ctx,
-+        struct vsir_program *program, struct hlsl_ir_expr *expr)
-+    const struct hlsl_ir_node *arg1 = expr->operands[0].node;
-+    const struct hlsl_type *dst_type = expr->node.data_type;
-+    const struct hlsl_type *src_type = arg1->data_type;
-+    static const union
-+    {
-+        uint32_t u;
-+        float f;
-+    } one = { .f = 1.0 };
-+    /* Narrowing casts were already lowered. */
-+    VKD3D_ASSERT(src_type->dimx == dst_type->dimx);
-+    switch (dst_type->e.numeric.type)
-+    {
-+        case HLSL_TYPE_HALF:
-+        case HLSL_TYPE_FLOAT:
-+            switch (src_type->e.numeric.type)
-+            {
-+                case HLSL_TYPE_HALF:
-+                case HLSL_TYPE_FLOAT:
-+                    generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, 0, true);
-+                    return true;
-+                case HLSL_TYPE_INT:
-+                    generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_ITOF, 0, 0, true);
-+                    return true;
-+                case HLSL_TYPE_UINT:
-+                    generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_UTOF, 0, 0, true);
-+                    return true;
-+                case HLSL_TYPE_BOOL:
-+                    sm4_generate_vsir_cast_from_bool(ctx, program, expr, one.u);
-+                    return true;
-+                case HLSL_TYPE_DOUBLE:
-+                    hlsl_fixme(ctx, &expr->node.loc, "SM4 cast from double to float.");
-+                    return false;
-+                default:
-+                    vkd3d_unreachable();
-+            }
-+            break;
-+        case HLSL_TYPE_INT:
-+            switch (src_type->e.numeric.type)
-+            {
-+                case HLSL_TYPE_HALF:
-+                case HLSL_TYPE_FLOAT:
-+                    generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_FTOI, 0, 0, true);
-+                    return true;
-+                case HLSL_TYPE_INT:
-+                case HLSL_TYPE_UINT:
-+                    generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, 0, true);
-+                    return true;
-+                case HLSL_TYPE_BOOL:
-+                    sm4_generate_vsir_cast_from_bool(ctx, program, expr, 1u);
-+                    return true;
-+                case HLSL_TYPE_DOUBLE:
-+                    hlsl_fixme(ctx, &expr->node.loc, "SM4 cast from double to int.");
-+                    return false;
-+                default:
-+                    vkd3d_unreachable();
-+            }
-+            break;
-+        case HLSL_TYPE_UINT:
-+            switch (src_type->e.numeric.type)
-+            {
-+                case HLSL_TYPE_HALF:
-+                case HLSL_TYPE_FLOAT:
-+                    generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_FTOU, 0, 0, true);
-+                    return true;
-+                case HLSL_TYPE_INT:
-+                case HLSL_TYPE_UINT:
-+                    generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, 0, true);
-+                    return true;
-+                case HLSL_TYPE_BOOL:
-+                    sm4_generate_vsir_cast_from_bool(ctx, program, expr, 1u);
-+                    return true;
-+                case HLSL_TYPE_DOUBLE:
-+                    hlsl_fixme(ctx, &expr->node.loc, "SM4 cast from double to uint.");
-+                    return false;
-+                default:
-+                    vkd3d_unreachable();
-+            }
-+            break;
-+        case HLSL_TYPE_DOUBLE:
-+            hlsl_fixme(ctx, &expr->node.loc, "SM4 cast to double.");
-+            return false;
-+        case HLSL_TYPE_BOOL:
-+            /* Casts to bool should have already been lowered. */
-+        default:
-+            vkd3d_unreachable();
-+    }
-+static void sm4_generate_vsir_expr_with_two_destinations(struct hlsl_ctx *ctx, struct vsir_program *program,
-+        enum vkd3d_shader_opcode opcode, const struct hlsl_ir_expr *expr, unsigned int dst_idx)
-+    struct vkd3d_shader_dst_param *dst_param, *null_param;
-+    const struct hlsl_ir_node *instr = &expr->node;
-+    struct vkd3d_shader_instruction *ins;
-+    unsigned int i, src_count;
-+    VKD3D_ASSERT(instr->reg.allocated);
-+    for (i = 0; i < HLSL_MAX_OPERANDS; ++i)
-+    {
-+        if (expr->operands[i].node)
-+            src_count = i + 1;
-+    }
-+    if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, opcode, 2, src_count)))
-+        return;
-+    dst_param = &ins->dst[dst_idx];
-+    vsir_dst_from_hlsl_node(dst_param, ctx, instr);
-+    null_param = &ins->dst[1 - dst_idx];
-+    vsir_dst_param_init(null_param, VKD3DSPR_NULL, VKD3D_DATA_FLOAT, 0);
-+    null_param->reg.dimension = VSIR_DIMENSION_NONE;
-+    for (i = 0; i < src_count; ++i)
-+        vsir_src_from_hlsl_node(&ins->src[i], ctx, expr->operands[i].node, dst_param->write_mask);
-+static void sm4_generate_vsir_rcp_using_div(struct hlsl_ctx *ctx,
-+        struct vsir_program *program, const struct hlsl_ir_expr *expr)
-+    struct hlsl_ir_node *operand = expr->operands[0].node;
-+    const struct hlsl_ir_node *instr = &expr->node;
-+    struct vkd3d_shader_dst_param *dst_param;
-+    struct hlsl_constant_value value = {0};
-+    struct vkd3d_shader_instruction *ins;
-+    VKD3D_ASSERT(type_is_float(expr->node.data_type));
-+    if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_DIV, 1, 2)))
-         return;
--    if (output)
--        signature = &program->output_signature;
--    else
--        signature = &program->input_signature;
-+    dst_param = &ins->dst[0];
-+    vsir_dst_from_hlsl_node(dst_param, ctx, instr);
-+    value.u[0].f = 1.0f;
-+    value.u[1].f = 1.0f;
-+    value.u[2].f = 1.0f;
-+    value.u[3].f = 1.0f;
-+    vsir_src_from_hlsl_constant_value(&ins->src[0], ctx, &value,
-+            VKD3D_DATA_FLOAT, instr->data_type->dimx, dst_param->write_mask);
-+    vsir_src_from_hlsl_node(&ins->src[1], ctx, operand, dst_param->write_mask);
-+static bool sm4_generate_vsir_instr_expr(struct hlsl_ctx *ctx,
-+        struct vsir_program *program, struct hlsl_ir_expr *expr, const char *dst_type_name)
-+    const struct hlsl_type *dst_type = expr->node.data_type;
-+    const struct hlsl_type *src_type = NULL;
-+    VKD3D_ASSERT(expr->node.reg.allocated);
-+    if (expr->operands[0].node)
-+        src_type = expr->operands[0].node->data_type;
-+    switch (expr->op)
-+    {
-+            sm4_generate_vsir_rasterizer_sample_count(ctx, program, expr);
-+            return true;
-+        case HLSL_OP1_ABS:
-+            VKD3D_ASSERT(type_is_float(dst_type));
-+            generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, VKD3DSPSM_ABS, 0, true);
-+            return true;
-+        case HLSL_OP1_BIT_NOT:
-+            VKD3D_ASSERT(type_is_integer(dst_type));
-+            generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_NOT, 0, 0, true);
-+            return true;
-+        case HLSL_OP1_CAST:
-+            return sm4_generate_vsir_instr_expr_cast(ctx, program, expr);
-+        case HLSL_OP1_CEIL:
-+            VKD3D_ASSERT(type_is_float(dst_type));
-+            generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_ROUND_PI, 0, 0, true);
-+            return true;
-+        case HLSL_OP1_COS:
-+            VKD3D_ASSERT(type_is_float(dst_type));
-+            sm4_generate_vsir_expr_with_two_destinations(ctx, program, VKD3DSIH_SINCOS, expr, 1);
-+            return true;
-+        case HLSL_OP1_DSX:
-+            VKD3D_ASSERT(type_is_float(dst_type));
-+            generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DSX, 0, 0, true);
-+            return true;
-+        case HLSL_OP1_DSX_COARSE:
-+            VKD3D_ASSERT(type_is_float(dst_type));
-+            generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DSX_COARSE, 0, 0, true);
-+            return true;
-+        case HLSL_OP1_DSX_FINE:
-+            VKD3D_ASSERT(type_is_float(dst_type));
-+            generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DSX_FINE, 0, 0, true);
-+            return true;
-+        case HLSL_OP1_DSY:
-+            VKD3D_ASSERT(type_is_float(dst_type));
-+            generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DSY, 0, 0, true);
-+            return true;
-+        case HLSL_OP1_DSY_COARSE:
-+            VKD3D_ASSERT(type_is_float(dst_type));
-+            generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DSY_COARSE, 0, 0, true);
-+            return true;
-+        case HLSL_OP1_DSY_FINE:
-+            VKD3D_ASSERT(type_is_float(dst_type));
-+            generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DSY_FINE, 0, 0, true);
-+            return true;
-+        case HLSL_OP1_EXP2:
-+            VKD3D_ASSERT(type_is_float(dst_type));
-+            generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_EXP, 0, 0, true);
-+            return true;
-+        case HLSL_OP1_F16TOF32:
-+            VKD3D_ASSERT(type_is_float(dst_type));
-+            VKD3D_ASSERT(hlsl_version_ge(ctx, 5, 0));
-+            generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_F16TOF32, 0, 0, true);
-+            return true;
-+        case HLSL_OP1_F32TOF16:
-+            VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_UINT);
-+            VKD3D_ASSERT(hlsl_version_ge(ctx, 5, 0));
-+            generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_F32TOF16, 0, 0, true);
-+            return true;
-+        case HLSL_OP1_FLOOR:
-+            VKD3D_ASSERT(type_is_float(dst_type));
-+            generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_ROUND_NI, 0, 0, true);
-+            return true;
-+        case HLSL_OP1_FRACT:
-+            VKD3D_ASSERT(type_is_float(dst_type));
-+            generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_FRC, 0, 0, true);
-+            return true;
-+        case HLSL_OP1_LOG2:
-+            VKD3D_ASSERT(type_is_float(dst_type));
-+            generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_LOG, 0, 0, true);
-+            return true;
-+        case HLSL_OP1_LOGIC_NOT:
-+            VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL);
-+            generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_NOT, 0, 0, true);
-+            return true;
-+        case HLSL_OP1_NEG:
-+            switch (dst_type->e.numeric.type)
-+            {
-+                case HLSL_TYPE_FLOAT:
-+                    generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, VKD3DSPSM_NEG, 0, true);
-+                    return true;
-+                case HLSL_TYPE_INT:
-+                case HLSL_TYPE_UINT:
-+                    generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_INEG, 0, 0, true);
-+                    return true;
-+                default:
-+                    hlsl_fixme(ctx, &expr->node.loc, "SM4 %s negation expression.", dst_type_name);
-+                    return false;
-+            }
-+        case HLSL_OP1_RCP:
-+            switch (dst_type->e.numeric.type)
-+            {
-+                case HLSL_TYPE_FLOAT:
-+                    /* SM5 comes with a RCP opcode */
-+                    if (hlsl_version_ge(ctx, 5, 0))
-+                        generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_RCP, 0, 0, true);
-+                    else
-+                        sm4_generate_vsir_rcp_using_div(ctx, program, expr);
-+                    return true;
-+                default:
-+                    hlsl_fixme(ctx, &expr->node.loc, "SM4 %s rcp expression.", dst_type_name);
-+                    return false;
-+            }
-+        case HLSL_OP1_REINTERPRET:
-+            generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, 0, true);
-+            return true;
-+        case HLSL_OP1_ROUND:
-+            VKD3D_ASSERT(type_is_float(dst_type));
-+            generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_ROUND_NE, 0, 0, true);
-+            return true;
-+        case HLSL_OP1_RSQ:
-+            VKD3D_ASSERT(type_is_float(dst_type));
-+            generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_RSQ, 0, 0, true);
-+            return true;
-+        case HLSL_OP1_SAT:
-+            VKD3D_ASSERT(type_is_float(dst_type));
-+            generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, VKD3DSPDM_SATURATE, true);
-+            return true;
-+        case HLSL_OP1_SIN:
-+            VKD3D_ASSERT(type_is_float(dst_type));
-+            sm4_generate_vsir_expr_with_two_destinations(ctx, program, VKD3DSIH_SINCOS, expr, 0);
-+            return true;
-+        case HLSL_OP1_SQRT:
-+            VKD3D_ASSERT(type_is_float(dst_type));
-+            generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_SQRT, 0, 0, true);
-+            return true;
-+        case HLSL_OP1_TRUNC:
-+            VKD3D_ASSERT(type_is_float(dst_type));
-+            generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_ROUND_Z, 0, 0, true);
-+            return true;
-+        case HLSL_OP2_ADD:
-+            switch (dst_type->e.numeric.type)
-+            {
-+                case HLSL_TYPE_FLOAT:
-+                    generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_ADD, 0, 0, true);
-+                    return true;
-+                case HLSL_TYPE_INT:
-+                case HLSL_TYPE_UINT:
-+                    generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_IADD, 0, 0, true);
-+                    return true;
-+                default:
-+                    hlsl_fixme(ctx, &expr->node.loc, "SM4 %s addition expression.", dst_type_name);
-+                    return false;
-+            }
-+        case HLSL_OP2_BIT_AND:
-+            VKD3D_ASSERT(type_is_integer(dst_type));
-+            generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_AND, 0, 0, true);
-+            return true;
-+        case HLSL_OP2_BIT_OR:
-+            VKD3D_ASSERT(type_is_integer(dst_type));
-+            generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_OR, 0, 0, true);
-+            return true;
-+        case HLSL_OP2_BIT_XOR:
-+            VKD3D_ASSERT(type_is_integer(dst_type));
-+            generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_XOR, 0, 0, true);
-+            return true;
-+        case HLSL_OP2_DIV:
-+            switch (dst_type->e.numeric.type)
-+            {
-+                case HLSL_TYPE_FLOAT:
-+                    generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DIV, 0, 0, true);
-+                    return true;
-+                case HLSL_TYPE_UINT:
-+                    sm4_generate_vsir_expr_with_two_destinations(ctx, program, VKD3DSIH_UDIV, expr, 0);
-+                    return true;
-+                default:
-+                    hlsl_fixme(ctx, &expr->node.loc, "SM4 %s division expression.", dst_type_name);
-+                    return false;
-+            }
-+        case HLSL_OP2_DOT:
-+            switch (dst_type->e.numeric.type)
-+            {
-+                case HLSL_TYPE_FLOAT:
-+                    switch (expr->operands[0].node->data_type->dimx)
-+                    {
-+                        case 4:
-+                            generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DP4, 0, 0, false);
-+                            return true;
-+                        case 3:
-+                            generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DP3, 0, 0, false);
-+                            return true;
-+                        case 2:
-+                            generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DP2, 0, 0, false);
-+                            return true;
-+                        case 1:
-+                        default:
-+                            vkd3d_unreachable();
-+                    }
-+                default:
-+                    hlsl_fixme(ctx, &expr->node.loc, "SM4 %s dot expression.", dst_type_name);
-+                    return false;
-+            }
-+        case HLSL_OP2_EQUAL:
-+            VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL);
-+            switch (src_type->e.numeric.type)
-+            {
-+                case HLSL_TYPE_FLOAT:
-+                    generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_EQO, 0, 0, true);
-+                    return true;
-+                case HLSL_TYPE_BOOL:
-+                case HLSL_TYPE_INT:
-+                case HLSL_TYPE_UINT:
-+                    generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_IEQ, 0, 0, true);
-+                    return true;
-+                default:
-+                    hlsl_fixme(ctx, &expr->node.loc, "SM4 equality between \"%s\" operands.",
-+                            debug_hlsl_type(ctx, src_type));
-+                    return false;
-+            }
-+        case HLSL_OP2_GEQUAL:
-+            VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL);
-+            switch (src_type->e.numeric.type)
-+            {
-+                case HLSL_TYPE_FLOAT:
-+                    generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_GEO, 0, 0, true);
-+                    return true;
-+                case HLSL_TYPE_INT:
-+                    generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_IGE, 0, 0, true);
-+                    return true;
-+                case HLSL_TYPE_BOOL:
-+                case HLSL_TYPE_UINT:
-+                    generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_UGE, 0, 0, true);
-+                    return true;
-+                default:
-+                    hlsl_fixme(ctx, &expr->node.loc, "SM4 greater-than-or-equal between \"%s\" operands.",
-+                            debug_hlsl_type(ctx, src_type));
-+                    return false;
-+            }
-+        case HLSL_OP2_LESS:
-+            VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL);
-+            switch (src_type->e.numeric.type)
-+            {
-+                case HLSL_TYPE_FLOAT:
-+                    generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_LTO, 0, 0, true);
-+                    return true;
-+                case HLSL_TYPE_INT:
-+                    generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_ILT, 0, 0, true);
-+                    return true;
-+                case HLSL_TYPE_BOOL:
-+                case HLSL_TYPE_UINT:
-+                    generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_ULT, 0, 0, true);
-+                    return true;
-+                default:
-+                    hlsl_fixme(ctx, &expr->node.loc, "SM4 less-than between \"%s\" operands.",
-+                            debug_hlsl_type(ctx, src_type));
-+                    return false;
-+            }
-+        case HLSL_OP2_LOGIC_AND:
-+            VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL);
-+            generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_AND, 0, 0, true);
-+            return true;
-+        case HLSL_OP2_LOGIC_OR:
-+            VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL);
-+            generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_OR, 0, 0, true);
-+            return true;
-+        case HLSL_OP2_LSHIFT:
-+            VKD3D_ASSERT(type_is_integer(dst_type));
-+            VKD3D_ASSERT(dst_type->e.numeric.type != HLSL_TYPE_BOOL);
-+            generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_ISHL, 0, 0, true);
-+            return true;
-+        case HLSL_OP3_MAD:
-+            switch (dst_type->e.numeric.type)
-+            {
-+                case HLSL_TYPE_FLOAT:
-+                    generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MAD, 0, 0, true);
-+                    return true;
-+                case HLSL_TYPE_INT:
-+                case HLSL_TYPE_UINT:
-+                    generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_IMAD, 0, 0, true);
-+                    return true;
-+                default:
-+                    hlsl_fixme(ctx, &expr->node.loc, "SM4 %s MAD expression.", dst_type_name);
-+                    return false;
-+            }
-+        case HLSL_OP2_MAX:
-+            switch (dst_type->e.numeric.type)
-+            {
-+                case HLSL_TYPE_FLOAT:
-+                    generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MAX, 0, 0, true);
-+                    return true;
-+                case HLSL_TYPE_INT:
-+                    generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_IMAX, 0, 0, true);
-+                    return true;
-+                case HLSL_TYPE_UINT:
-+                    generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_UMAX, 0, 0, true);
-+                    return true;
-+                default:
-+                    hlsl_fixme(ctx, &expr->node.loc, "SM4 %s maximum expression.", dst_type_name);
-+                    return false;
-+            }
-+        case HLSL_OP2_MIN:
-+            switch (dst_type->e.numeric.type)
-+            {
-+                case HLSL_TYPE_FLOAT:
-+                    generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MIN, 0, 0, true);
-+                    return true;
-+                case HLSL_TYPE_INT:
-+                    generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_IMIN, 0, 0, true);
-+                    return true;
-+                case HLSL_TYPE_UINT:
-+                    generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_UMIN, 0, 0, true);
-+                    return true;
-+                default:
-+                    hlsl_fixme(ctx, &expr->node.loc, "SM4 %s minimum expression.", dst_type_name);
-+                    return false;
-+            }
-+        case HLSL_OP2_MOD:
-+            switch (dst_type->e.numeric.type)
-+            {
-+                case HLSL_TYPE_UINT:
-+                    sm4_generate_vsir_expr_with_two_destinations(ctx, program, VKD3DSIH_UDIV, expr, 1);
-+                    return true;
-+                default:
-+                    hlsl_fixme(ctx, &expr->node.loc, "SM4 %s modulus expression.", dst_type_name);
-+                    return false;
-+            }
-+        case HLSL_OP2_MUL:
-+            switch (dst_type->e.numeric.type)
-+            {
-+                case HLSL_TYPE_FLOAT:
-+                    generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MUL, 0, 0, true);
-+                    return true;
-+                case HLSL_TYPE_INT:
-+                case HLSL_TYPE_UINT:
-+                    /* Using IMUL instead of UMUL because we're taking the low
-+                     * bits, and the native compiler generates IMUL. */
-+                    sm4_generate_vsir_expr_with_two_destinations(ctx, program, VKD3DSIH_IMUL, expr, 1);
-+                    return true;
-+                default:
-+                    hlsl_fixme(ctx, &expr->node.loc, "SM4 %s multiplication expression.", dst_type_name);
-+                    return false;
-+            }
-+        case HLSL_OP2_NEQUAL:
-+            VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL);
-+            switch (src_type->e.numeric.type)
-+            {
-+                case HLSL_TYPE_FLOAT:
-+                    generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_NEU, 0, 0, true);
-+                    return true;
-+                case HLSL_TYPE_BOOL:
-+                case HLSL_TYPE_INT:
-+                case HLSL_TYPE_UINT:
-+                    generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_INE, 0, 0, true);
-+                    return true;
-+                default:
-+                    hlsl_fixme(ctx, &expr->node.loc, "SM4 inequality between \"%s\" operands.",
-+                            debug_hlsl_type(ctx, src_type));
-+                    return false;
-+            }
-+        case HLSL_OP2_RSHIFT:
-+            VKD3D_ASSERT(type_is_integer(dst_type));
-+            VKD3D_ASSERT(dst_type->e.numeric.type != HLSL_TYPE_BOOL);
-+            generate_vsir_instr_expr_single_instr_op(ctx, program, expr,
-+                    dst_type->e.numeric.type == HLSL_TYPE_INT ? VKD3DSIH_ISHR : VKD3DSIH_USHR, 0, 0, true);
-+            return true;
-+        case HLSL_OP3_TERNARY:
-+            generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOVC, 0, 0, true);
-+            return true;
-+        default:
-+            hlsl_fixme(ctx, &expr->node.loc, "SM4 %s expression.", debug_hlsl_expr_op(expr->op));
-+            return false;
-+    }
-+static bool sm4_generate_vsir_instr_store(struct hlsl_ctx *ctx,
-+        struct vsir_program *program, struct hlsl_ir_store *store)
-+    struct hlsl_ir_node *instr = &store->node;
-+    struct vkd3d_shader_dst_param *dst_param;
-+    struct vkd3d_shader_src_param *src_param;
-+    struct vkd3d_shader_instruction *ins;
-+    if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_MOV, 1, 1)))
-+        return false;
-+    dst_param = &ins->dst[0];
-+    if (!sm4_generate_vsir_init_dst_param_from_deref(ctx, program,
-+            dst_param, &store->lhs, &instr->loc, store->writemask))
-+        return false;
-+    src_param = &ins->src[0];
-+    vsir_src_from_hlsl_node(src_param, ctx, store->rhs.node, dst_param->write_mask);
-+    return true;
-+/* Does this variable's data come directly from the API user, rather than
-+ * being temporary or from a previous shader stage? I.e. is it a uniform or
-+ * VS input? */
-+static bool var_is_user_input(const struct vkd3d_shader_version *version, const struct hlsl_ir_var *var)
-+    if (var->is_uniform)
-+        return true;
-+    return var->is_input_semantic && version->type == VKD3D_SHADER_TYPE_VERTEX;
-+static bool sm4_generate_vsir_instr_load(struct hlsl_ctx *ctx, struct vsir_program *program, struct hlsl_ir_load *load)
-+    const struct vkd3d_shader_version *version = &program->shader_version;
-+    const struct hlsl_type *type = load->node.data_type;
-+    struct vkd3d_shader_dst_param *dst_param;
-+    struct hlsl_ir_node *instr = &load->node;
-+    struct vkd3d_shader_instruction *ins;
-+    struct hlsl_constant_value value;
-+    VKD3D_ASSERT(hlsl_is_numeric_type(type));
-+    if (type->e.numeric.type == HLSL_TYPE_BOOL && var_is_user_input(version, load->src.var))
-+    {
-+        /* Uniform bools can be specified as anything, but internal bools
-+         * always have 0 for false and ~0 for true. Normalise that here. */
-+        if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_MOVC, 1, 3)))
-+            return false;
-+        dst_param = &ins->dst[0];
-+        vsir_dst_from_hlsl_node(dst_param, ctx, instr);
-+        if (!sm4_generate_vsir_init_src_param_from_deref(ctx, program,
-+                &ins->src[0], &load->src, dst_param->write_mask, &instr->loc))
-+            return false;
-+        memset(&value, 0xff, sizeof(value));
-+        vsir_src_from_hlsl_constant_value(&ins->src[1], ctx, &value,
-+                VKD3D_DATA_UINT, type->dimx, dst_param->write_mask);
-+        memset(&value, 0x00, sizeof(value));
-+        vsir_src_from_hlsl_constant_value(&ins->src[2], ctx, &value,
-+                VKD3D_DATA_UINT, type->dimx, dst_param->write_mask);
-+    }
-+    else
-+    {
-+        if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_MOV, 1, 1)))
-+            return false;
-+        dst_param = &ins->dst[0];
-+        vsir_dst_from_hlsl_node(dst_param, ctx, instr);
-+        if (!sm4_generate_vsir_init_src_param_from_deref(ctx, program,
-+                &ins->src[0], &load->src, dst_param->write_mask, &instr->loc))
-+            return false;
-+    }
-+    return true;
-+static bool sm4_generate_vsir_instr_resource_store(struct hlsl_ctx *ctx,
-+        struct vsir_program *program, struct hlsl_ir_resource_store *store)
-+    struct hlsl_type *resource_type = hlsl_deref_get_type(ctx, &store->resource);
-+    struct hlsl_ir_node *coords = store->coords.node, *value = store->value.node;
-+    struct hlsl_ir_node *instr = &store->node;
-+    struct vkd3d_shader_instruction *ins;
-+    unsigned int writemask;
--    if (!vkd3d_array_reserve((void **)&signature->elements, &signature->elements_capacity,
--            signature->element_count + 1, sizeof(*signature->elements)))
-+    if (!store->resource.var->is_uniform)
-     {
--        ctx->result = VKD3D_ERROR_OUT_OF_MEMORY;
--        return;
-+        hlsl_fixme(ctx, &store->node.loc, "Store to non-uniform resource variable.");
-+        return false;
-     }
--    element = &signature->elements[signature->element_count++];
--    if (!hlsl_sm1_register_from_semantic(&program->shader_version,
--            var->semantic.name, var->semantic.index, output, &type, &register_index))
-+    if (resource_type->sampler_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER)
-     {
--        unsigned int usage_idx;
--        D3DDECLUSAGE usage;
--        bool ret;
-+        hlsl_fixme(ctx, &store->node.loc, "Structured buffers store is not implemented.");
-+        return false;
-+    }
--        register_index = var->regs[HLSL_REGSET_NUMERIC].id;
-+    if (resource_type->sampler_dim == HLSL_SAMPLER_DIM_RAW_BUFFER)
-+    {
-+        if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_STORE_RAW, 1, 2)))
-+            return false;
--        ret = hlsl_sm1_usage_from_semantic(var->semantic.name, var->semantic.index, &usage, &usage_idx);
--        VKD3D_ASSERT(ret);
--        /* With the exception of vertex POSITION output, none of these are
--         * system values. Pixel POSITION input is not equivalent to
--         * SV_Position; the closer equivalent is VPOS, which is not declared
--         * as a semantic. */
--        if (program->shader_version.type == VKD3D_SHADER_TYPE_VERTEX
--                && output && usage == D3DDECLUSAGE_POSITION)
--            sysval = VKD3D_SHADER_SV_POSITION;
-+        writemask = vkd3d_write_mask_from_component_count(value->data_type->dimx);
-+        if (!sm4_generate_vsir_init_dst_param_from_deref(ctx, program,
-+                &ins->dst[0], &store->resource, &instr->loc, writemask))
-+            return false;
-+    }
-+    else
-+    {
-+        if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_STORE_UAV_TYPED, 1, 2)))
-+            return false;
-+        if (!sm4_generate_vsir_init_dst_param_from_deref(ctx, program,
-+                &ins->dst[0], &store->resource, &instr->loc, VKD3DSP_WRITEMASK_ALL))
-+            return false;
-     }
--    mask = (1 << var->data_type->dimx) - 1;
--    memset(element, 0, sizeof(*element));
--    if (!(element->semantic_name = vkd3d_strdup(var->semantic.name)))
-+    vsir_src_from_hlsl_node(&ins->src[0], ctx, coords, VKD3DSP_WRITEMASK_ALL);
-+    vsir_src_from_hlsl_node(&ins->src[1], ctx, value, VKD3DSP_WRITEMASK_ALL);
-+    return true;
-+static void sm4_generate_vsir_block(struct hlsl_ctx *ctx, struct hlsl_block *block, struct vsir_program *program)
-+    struct vkd3d_string_buffer *dst_type_string;
-+    struct hlsl_ir_node *instr, *next;
-+    struct hlsl_ir_switch_case *c;
-+    LIST_FOR_EACH_ENTRY_SAFE(instr, next, &block->instrs, struct hlsl_ir_node, entry)
-     {
--        --signature->element_count;
--        ctx->result = VKD3D_ERROR_OUT_OF_MEMORY;
--        return;
-+        if (instr->data_type)
-+        {
-+            if (instr->data_type->class != HLSL_CLASS_SCALAR && instr->data_type->class != HLSL_CLASS_VECTOR)
-+            {
-+                hlsl_fixme(ctx, &instr->loc, "Class %#x should have been lowered or removed.", instr->data_type->class);
-+                break;
-+            }
-+        }
-+        switch (instr->type)
-+        {
-+            case HLSL_IR_CALL:
-+                vkd3d_unreachable();
-+            case HLSL_IR_CONSTANT:
-+                /* In SM4 all constants are inlined. */
-+                break;
-+            case HLSL_IR_EXPR:
-+                if (!(dst_type_string = hlsl_type_to_string(ctx, instr->data_type)))
-+                    break;
-+                if (sm4_generate_vsir_instr_expr(ctx, program, hlsl_ir_expr(instr), dst_type_string->buffer))
-+                    replace_instr_with_last_vsir_instr(ctx, program, instr);
-+                hlsl_release_string_buffer(ctx, dst_type_string);
-+                break;
-+            case HLSL_IR_IF:
-+                sm4_generate_vsir_block(ctx, &hlsl_ir_if(instr)->then_block, program);
-+                sm4_generate_vsir_block(ctx, &hlsl_ir_if(instr)->else_block, program);
-+                break;
-+            case HLSL_IR_LOAD:
-+                if (sm4_generate_vsir_instr_load(ctx, program, hlsl_ir_load(instr)))
-+                    replace_instr_with_last_vsir_instr(ctx, program, instr);
-+                break;
-+            case HLSL_IR_LOOP:
-+                sm4_generate_vsir_block(ctx, &hlsl_ir_loop(instr)->body, program);
-+                break;
-+            case HLSL_IR_RESOURCE_STORE:
-+                if (sm4_generate_vsir_instr_resource_store(ctx, program, hlsl_ir_resource_store(instr)))
-+                    replace_instr_with_last_vsir_instr(ctx, program, instr);
-+                break;
-+            case HLSL_IR_STORE:
-+                if (sm4_generate_vsir_instr_store(ctx, program, hlsl_ir_store(instr)))
-+                    replace_instr_with_last_vsir_instr(ctx, program, instr);
-+                break;
-+            case HLSL_IR_SWITCH:
-+                LIST_FOR_EACH_ENTRY(c, &hlsl_ir_switch(instr)->cases, struct hlsl_ir_switch_case, entry)
-+                    sm4_generate_vsir_block(ctx, &c->body, program);
-+                break;
-+            case HLSL_IR_SWIZZLE:
-+                generate_vsir_instr_swizzle(ctx, program, hlsl_ir_swizzle(instr));
-+                replace_instr_with_last_vsir_instr(ctx, program, instr);
-+                break;
-+            default:
-+                break;
-+        }
-     }
--    element->semantic_index = var->semantic.index;
--    element->sysval_semantic = sysval;
--    element->component_type = VKD3D_SHADER_COMPONENT_FLOAT;
--    element->register_index = register_index;
--    element->target_location = register_index;
--    element->register_count = 1;
--    element->mask = mask;
--    element->used_mask = mask;
--    if (program->shader_version.type == VKD3D_SHADER_TYPE_PIXEL && !output)
--        element->interpolation_mode = VKD3DSIM_LINEAR;
- }
--static void sm1_generate_vsir_signature(struct hlsl_ctx *ctx, struct vsir_program *program)
-+static void sm4_generate_vsir_add_function(struct hlsl_ctx *ctx,
-+        struct hlsl_ir_function_decl *func, uint64_t config_flags, struct vsir_program *program)
- {
-+    bool is_patch_constant_func = func == ctx->patch_constant_func;
-+    struct hlsl_block block = {0};
-+    struct hlsl_scope *scope;
-     struct hlsl_ir_var *var;
-+    uint32_t temp_count;
--    LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry)
-+    compute_liveness(ctx, func);
-+    mark_indexable_vars(ctx, func);
-+    temp_count = allocate_temp_registers(ctx, func);
-+    if (ctx->result)
-+        return;
-+    program->temp_count = max(program->temp_count, temp_count);
-+    hlsl_block_init(&block);
-+    LIST_FOR_EACH_ENTRY(var, &func->extern_vars, struct hlsl_ir_var, extern_entry)
-     {
--        if (var->is_input_semantic)
--            sm1_generate_vsir_signature_entry(ctx, program, false, var);
--        if (var->is_output_semantic)
--            sm1_generate_vsir_signature_entry(ctx, program, true, var);
-+        if ((var->is_input_semantic && var->last_read)
-+                || (var->is_output_semantic && var->first_write))
-+            sm4_generate_vsir_instr_dcl_semantic(ctx, program, var, is_patch_constant_func, &block, &var->loc);
-+    }
-+    if (temp_count)
-+        sm4_generate_vsir_instr_dcl_temps(ctx, program, temp_count, &block, &func->loc);
-+    LIST_FOR_EACH_ENTRY(scope, &ctx->scopes, struct hlsl_scope, entry)
-+    {
-+        LIST_FOR_EACH_ENTRY(var, &scope->vars, struct hlsl_ir_var, scope_entry)
-+        {
-+            if (var->is_uniform || var->is_input_semantic || var->is_output_semantic)
-+                continue;
-+            if (!var->regs[HLSL_REGSET_NUMERIC].allocated)
-+                continue;
-+            if (var->indexable)
-+            {
-+                unsigned int id = var->regs[HLSL_REGSET_NUMERIC].id;
-+                unsigned int size = align(var->data_type->reg_size[HLSL_REGSET_NUMERIC], 4) / 4;
-+                sm4_generate_vsir_instr_dcl_indexable_temp(ctx, program, &block, id, size, 4, &var->loc);
-+            }
-+        }
-     }
-+    list_move_head(&func->body.instrs, &block.instrs);
-+    hlsl_block_cleanup(&block);
-+    sm4_generate_vsir_block(ctx, &func->body, program);
- }
- /* OBJECTIVE: Translate all the information from ctx and entry_func to the
-- * vsir_program and ctab blob, so they can be used as input to d3dbc_compile()
-- * without relying on ctx and entry_func. */
--static void sm1_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func,
--        uint64_t config_flags, struct vsir_program *program, struct vkd3d_shader_code *ctab)
-+ * vsir_program, so it can be used as input to tpf_compile() without relying
-+ * on ctx and entry_func. */
-+static void sm4_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func,
-+        uint64_t config_flags, struct vsir_program *program)
- {
-     struct vkd3d_shader_version version = {0};
--    struct vkd3d_bytecode_buffer buffer = {0};
-     version.major = ctx->profile->major_version;
-     version.minor = ctx->profile->minor_version;
-     version.type = ctx->profile->type;
--    if (!vsir_program_init(program, NULL, &version, 0))
-+    if (!vsir_program_init(program, NULL, &version, 0, VSIR_CF_STRUCTURED, VSIR_NOT_NORMALISED))
-     {
-         ctx->result = VKD3D_ERROR_OUT_OF_MEMORY;
-         return;
-     }
--    write_sm1_uniforms(ctx, &buffer);
--    if (buffer.status)
-+    generate_vsir_signature(ctx, program, func);
-+    if (version.type == VKD3D_SHADER_TYPE_HULL)
-+        generate_vsir_signature(ctx, program, ctx->patch_constant_func);
-+    if (version.type == VKD3D_SHADER_TYPE_COMPUTE)
-     {
--        vkd3d_free(buffer.data);
--        ctx->result = buffer.status;
--        return;
-+        program->thread_group_size.x = ctx->thread_count[0];
-+        program->thread_group_size.y = ctx->thread_count[1];
-+        program->thread_group_size.z = ctx->thread_count[2];
-     }
--    ctab->code = buffer.data;
--    ctab->size = buffer.size;
--    sm1_generate_vsir_signature(ctx, program);
-+    sm4_generate_vsir_add_function(ctx, func, config_flags, program);
-+    if (version.type == VKD3D_SHADER_TYPE_HULL)
-+        sm4_generate_vsir_add_function(ctx, ctx->patch_constant_func, config_flags, program);
- }
- static struct hlsl_ir_jump *loop_unrolling_find_jump(struct hlsl_block *block, struct hlsl_ir_node *stop_point,
-@@ -6337,16 +9331,95 @@ static bool lower_f16tof32(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, stru
-     return true;
- }
--int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func,
--        enum vkd3d_shader_target_type target_type, struct vkd3d_shader_code *out)
-+static bool lower_f32tof16(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, struct hlsl_block *block)
-+    struct hlsl_ir_node *call, *rhs, *store;
-+    struct hlsl_ir_function_decl *func;
-+    unsigned int component_count;
-+    struct hlsl_ir_load *load;
-+    struct hlsl_ir_expr *expr;
-+    struct hlsl_ir_var *lhs;
-+    char *body;
-+    static const char template[] =
-+    "typedef uint%u uintX;\n"
-+    "uintX soft_f32tof16(float%u x)\n"
-+    "{\n"
-+    "    uintX v = asuint(x);\n"
-+    "    uintX v_abs = v & 0x7fffffff;\n"
-+    "    uintX sign_bit = (v >> 16) & 0x8000;\n"
-+    "    uintX exp = (v >> 23) & 0xff;\n"
-+    "    uintX mantissa = v & 0x7fffff;\n"
-+    "    uintX nan16;\n"
-+    "    uintX nan = (v & 0x7f800000) == 0x7f800000;\n"
-+    "    uintX val;\n"
-+    "\n"
-+    "    val = 113 - exp;\n"
-+    "    val = (mantissa + 0x800000) >> val;\n"
-+    "    val >>= 13;\n"
-+    "\n"
-+    "    val = (exp - 127) < -38 ? 0 : val;\n"
-+    "\n"
-+    "    val = v_abs < 0x38800000 ? val : (v_abs + 0xc8000000) >> 13;\n"
-+    "    val = v_abs > 0x47ffe000 ? 0x7bff : val;\n"
-+    "\n"
-+    "    nan16 = (((v >> 13) | (v >> 3) | v) & 0x3ff) + 0x7c00;\n"
-+    "    val = nan ? nan16 : val;\n"
-+    "\n"
-+    "    return (val & 0x7fff) + sign_bit;\n"
-+    "}\n";
-+    if (node->type != HLSL_IR_EXPR)
-+        return false;
-+    expr = hlsl_ir_expr(node);
-+    if (expr->op != HLSL_OP1_F32TOF16)
-+        return false;
-+    rhs = expr->operands[0].node;
-+    component_count = hlsl_type_component_count(rhs->data_type);
-+    if (!(body = hlsl_sprintf_alloc(ctx, template, component_count, component_count)))
-+        return false;
-+    if (!(func = hlsl_compile_internal_function(ctx, "soft_f32tof16", body)))
-+        return false;
-+    lhs = func->parameters.vars[0];
-+    if (!(store = hlsl_new_simple_store(ctx, lhs, rhs)))
-+        return false;
-+    hlsl_block_add_instr(block, store);
-+    if (!(call = hlsl_new_call(ctx, func, &node->loc)))
-+        return false;
-+    hlsl_block_add_instr(block, call);
-+    if (!(load = hlsl_new_var_load(ctx, func->return_var, &node->loc)))
-+        return false;
-+    hlsl_block_add_instr(block, &load->node);
-+    return true;
-+static void process_entry_function(struct hlsl_ctx *ctx,
-+        const struct hlsl_block *global_uniform_block, struct hlsl_ir_function_decl *entry_func)
- {
-     const struct hlsl_profile_info *profile = ctx->profile;
-+    struct hlsl_block static_initializers, global_uniforms;
-     struct hlsl_block *const body = &entry_func->body;
-     struct recursive_call_ctx recursive_call_ctx;
-     struct hlsl_ir_var *var;
-     unsigned int i;
--    list_move_head(&body->instrs, &ctx->static_initializers.instrs);
-+    if (!hlsl_clone_block(ctx, &static_initializers, &ctx->static_initializers))
-+        return;
-+    list_move_head(&body->instrs, &static_initializers.instrs);
-+    if (!hlsl_clone_block(ctx, &global_uniforms, global_uniform_block))
-+        return;
-+    list_move_head(&body->instrs, &global_uniforms.instrs);
-     memset(&recursive_call_ctx, 0, sizeof(recursive_call_ctx));
-     hlsl_transform_ir(ctx, find_recursive_calls, body, &recursive_call_ctx);
-@@ -6355,10 +9428,13 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry
-     /* Avoid going into an infinite loop when processing call instructions.
-      * lower_return() recurses into inferior calls. */
-     if (ctx->result)
--        return ctx->result;
-+        return;
-     if (hlsl_version_ge(ctx, 4, 0) && hlsl_version_lt(ctx, 5, 0))
-+    {
-         lower_ir(ctx, lower_f16tof32, body);
-+        lower_ir(ctx, lower_f32tof16, body);
-+    }
-     lower_return(ctx, entry_func, body, false);
-@@ -6367,20 +9443,22 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry
-     lower_ir(ctx, lower_matrix_swizzles, body);
-     lower_ir(ctx, lower_index_loads, body);
--    LIST_FOR_EACH_ENTRY(var, &ctx->globals->vars, struct hlsl_ir_var, scope_entry)
--    {
--        if (var->storage_modifiers & HLSL_STORAGE_UNIFORM)
--            prepend_uniform_copy(ctx, body, var);
--    }
-     for (i = 0; i < entry_func->parameters.count; ++i)
-     {
-         var = entry_func->parameters.vars[i];
--        if (hlsl_type_is_resource(var->data_type) || (var->storage_modifiers & HLSL_STORAGE_UNIFORM))
-+        if (hlsl_type_is_resource(var->data_type))
-         {
-             prepend_uniform_copy(ctx, body, var);
-         }
-+        else if ((var->storage_modifiers & HLSL_STORAGE_UNIFORM))
-+        {
-+            if (ctx->profile->type == VKD3D_SHADER_TYPE_HULL && entry_func == ctx->patch_constant_func)
-+                hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER,
-+                        "Patch constant function parameter \"%s\" cannot be uniform.", var->name);
-+            else
-+                prepend_uniform_copy(ctx, body, var);
-+        }
-         else
-         {
-             if (hlsl_get_multiarray_element_type(var->data_type)->class != HLSL_CLASS_STRUCT
-@@ -6392,9 +9470,9 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry
-             }
-             if (var->storage_modifiers & HLSL_STORAGE_IN)
--                prepend_input_var_copy(ctx, body, var);
-+                prepend_input_var_copy(ctx, entry_func, var);
-             if (var->storage_modifiers & HLSL_STORAGE_OUT)
--                append_output_var_copy(ctx, body, var);
-+                append_output_var_copy(ctx, entry_func, var);
-         }
-     }
-     if (entry_func->return_var)
-@@ -6403,28 +9481,17 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry
-             hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_SEMANTIC,
-                     "Entry point \"%s\" is missing a return value semantic.", entry_func->func->name);
--        append_output_var_copy(ctx, body, entry_func->return_var);
--    }
--    for (i = 0; i < entry_func->attr_count; ++i)
--    {
--        const struct hlsl_attribute *attr = entry_func->attrs[i];
--        if (!strcmp(attr->name, "numthreads") && profile->type == VKD3D_SHADER_TYPE_COMPUTE)
--            parse_numthreads_attribute(ctx, attr);
--        else
--            hlsl_warning(ctx, &entry_func->attrs[i]->loc, VKD3D_SHADER_WARNING_HLSL_UNKNOWN_ATTRIBUTE,
--                    "Ignoring unknown attribute \"%s\".", entry_func->attrs[i]->name);
-+        append_output_var_copy(ctx, entry_func, entry_func->return_var);
-     }
--    if (profile->type == VKD3D_SHADER_TYPE_COMPUTE && !ctx->found_numthreads)
--        hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_ATTRIBUTE,
--                "Entry point \"%s\" is missing a [numthreads] attribute.", entry_func->func->name);
-     if (profile->major_version >= 4)
-     {
-         hlsl_transform_ir(ctx, lower_discard_neg, body, NULL);
-     }
-+    else
-+    {
-+        hlsl_transform_ir(ctx, lower_discard_nz, body, NULL);
-+    }
-     transform_unroll_loops(ctx, body);
-     hlsl_run_const_passes(ctx, body);
-@@ -6496,29 +9563,72 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry
-     while (hlsl_transform_ir(ctx, dce, body, NULL));
-     compute_liveness(ctx, entry_func);
-+    mark_vars_usage(ctx);
--    if (TRACE_ON())
--        rb_for_each_entry(&ctx->functions, dump_function, ctx);
-+    calculate_resource_register_counts(ctx);
--    transform_derefs(ctx, mark_indexable_vars, body);
-+    allocate_register_reservations(ctx, &ctx->extern_vars);
-+    allocate_register_reservations(ctx, &entry_func->extern_vars);
-+    allocate_semantic_registers(ctx, entry_func);
--    calculate_resource_register_counts(ctx);
-+int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func,
-+        enum vkd3d_shader_target_type target_type, struct vkd3d_shader_code *out)
-+    const struct hlsl_profile_info *profile = ctx->profile;
-+    struct hlsl_block global_uniform_block;
-+    struct hlsl_ir_var *var;
-+    parse_entry_function_attributes(ctx, entry_func);
-+    if (ctx->result)
-+        return ctx->result;
-+    if (profile->type == VKD3D_SHADER_TYPE_HULL)
-+        validate_hull_shader_attributes(ctx, entry_func);
-+    else if (profile->type == VKD3D_SHADER_TYPE_COMPUTE && !ctx->found_numthreads)
-+        hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_ATTRIBUTE,
-+                "Entry point \"%s\" is missing a [numthreads] attribute.", entry_func->func->name);
-+    else if (profile->type == VKD3D_SHADER_TYPE_DOMAIN && ctx->domain == VKD3D_TESSELLATOR_DOMAIN_INVALID)
-+        hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_ATTRIBUTE,
-+                "Entry point \"%s\" is missing a [domain] attribute.", entry_func->func->name);
-+    hlsl_block_init(&global_uniform_block);
-+    LIST_FOR_EACH_ENTRY(var, &ctx->globals->vars, struct hlsl_ir_var, scope_entry)
-+    {
-+        if (var->storage_modifiers & HLSL_STORAGE_UNIFORM)
-+            prepend_uniform_copy(ctx, &global_uniform_block, var);
-+    }
-+    process_entry_function(ctx, &global_uniform_block, entry_func);
-+    if (ctx->result)
-+        return ctx->result;
--    allocate_register_reservations(ctx);
-+    if (profile->type == VKD3D_SHADER_TYPE_HULL)
-+    {
-+        process_entry_function(ctx, &global_uniform_block, ctx->patch_constant_func);
-+        if (ctx->result)
-+            return ctx->result;
-+    }
-+    hlsl_block_cleanup(&global_uniform_block);
--    allocate_temp_registers(ctx, entry_func);
-     if (profile->major_version < 4)
-     {
-+        mark_indexable_vars(ctx, entry_func);
-+        allocate_temp_registers(ctx, entry_func);
-         allocate_const_registers(ctx, entry_func);
-     }
-     else
-     {
-         allocate_buffers(ctx);
--        allocate_objects(ctx, HLSL_REGSET_TEXTURES);
--        allocate_objects(ctx, HLSL_REGSET_UAVS);
-+        allocate_objects(ctx, entry_func, HLSL_REGSET_TEXTURES);
-+        allocate_objects(ctx, entry_func, HLSL_REGSET_UAVS);
-     }
--    allocate_semantic_registers(ctx);
--    allocate_objects(ctx, HLSL_REGSET_SAMPLERS);
-+    allocate_objects(ctx, entry_func, HLSL_REGSET_SAMPLERS);
-+    if (TRACE_ON())
-+        rb_for_each_entry(&ctx->functions, dump_function, ctx);
-     if (ctx->result)
-         return ctx->result;
-@@ -6540,14 +9650,29 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry
-                 return ctx->result;
-             }
--            result = d3dbc_compile(&program, config_flags, NULL, &ctab, out, ctx->message_context, ctx, entry_func);
-+            result = d3dbc_compile(&program, config_flags, NULL, &ctab, out, ctx->message_context);
-             vsir_program_cleanup(&program);
-             vkd3d_shader_free_shader_code(&ctab);
-             return result;
-         }
--            return hlsl_sm4_write(ctx, entry_func, out);
-+        {
-+            uint32_t config_flags = vkd3d_shader_init_config_flags();
-+            struct vsir_program program;
-+            int result;
-+            sm4_generate_vsir(ctx, entry_func, config_flags, &program);
-+            if (ctx->result)
-+            {
-+                vsir_program_cleanup(&program);
-+                return ctx->result;
-+            }
-+            result = tpf_compile(&program, config_flags, out, ctx->message_context, ctx, entry_func);
-+            vsir_program_cleanup(&program);
-+            return result;
-+        }
-         default:
-             ERR("Unsupported shader target type %#x.\n", target_type);
-diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c
-index db4913b7c62..716adb15f08 100644
---- a/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c
-+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c
-@@ -1452,11 +1452,15 @@ static bool constant_is_one(struct hlsl_ir_constant *const_arg)
-             case HLSL_TYPE_UINT:
-             case HLSL_TYPE_INT:
--            case HLSL_TYPE_BOOL:
-                 if (const_arg->value.u[k].u != 1)
-                     return false;
-                 break;
-+            case HLSL_TYPE_BOOL:
-+                if (const_arg->value.u[k].u != ~0)
-+                    return false;
-+                break;
-             default:
-                 return false;
-         }
-@@ -1514,6 +1518,20 @@ bool hlsl_fold_constant_identities(struct hlsl_ctx *ctx, struct hlsl_ir_node *in
-                 res_node = mut_arg;
-             break;
-+        case HLSL_OP2_LOGIC_AND:
-+            if (constant_is_zero(const_arg))
-+                res_node = &const_arg->node;
-+            else if (constant_is_one(const_arg))
-+                res_node = mut_arg;
-+            break;
-+        case HLSL_OP2_LOGIC_OR:
-+            if (constant_is_zero(const_arg))
-+                res_node = mut_arg;
-+            else if (constant_is_one(const_arg))
-+                res_node = &const_arg->node;
-+            break;
-         default:
-             break;
-     }
-diff --git a/libs/vkd3d/libs/vkd3d-shader/ir.c b/libs/vkd3d/libs/vkd3d-shader/ir.c
-index 747238e2fee..56c98d30661 100644
---- a/libs/vkd3d/libs/vkd3d-shader/ir.c
-+++ b/libs/vkd3d/libs/vkd3d-shader/ir.c
-@@ -19,6 +19,15 @@
- #include "vkd3d_shader_private.h"
- #include "vkd3d_types.h"
-+struct vsir_transformation_context
-+    enum vkd3d_result result;
-+    struct vsir_program *program;
-+    uint64_t config_flags;
-+    const struct vkd3d_shader_compile_info *compile_info;
-+    struct vkd3d_shader_message_context *message_context;
- static int convert_parameter_info(const struct vkd3d_shader_compile_info *compile_info,
-         unsigned int *ret_count, const struct vkd3d_shader_parameter1 **ret_parameters)
- {
-@@ -65,7 +74,8 @@ static int convert_parameter_info(const struct vkd3d_shader_compile_info *compil
- }
- bool vsir_program_init(struct vsir_program *program, const struct vkd3d_shader_compile_info *compile_info,
--        const struct vkd3d_shader_version *version, unsigned int reserve)
-+        const struct vkd3d_shader_version *version, unsigned int reserve, enum vsir_control_flow_type cf_type,
-+        enum vsir_normalisation_level normalisation_level)
- {
-     memset(program, 0, sizeof(*program));
-@@ -87,6 +97,8 @@ bool vsir_program_init(struct vsir_program *program, const struct vkd3d_shader_c
-     }
-     program->shader_version = *version;
-+    program->cf_type = cf_type;
-+    program->normalisation_level = normalisation_level;
-     return shader_instruction_array_init(&program->instructions, reserve);
- }
-@@ -117,26 +129,204 @@ const struct vkd3d_shader_parameter1 *vsir_program_get_parameter(
-     return NULL;
- }
-+static struct signature_element *vsir_signature_find_element_by_name(
-+        const struct shader_signature *signature, const char *semantic_name, unsigned int semantic_index)
-+    for (unsigned int i = 0; i < signature->element_count; ++i)
-+    {
-+        if (!ascii_strcasecmp(signature->elements[i].semantic_name, semantic_name)
-+                && signature->elements[i].semantic_index == semantic_index)
-+            return &signature->elements[i];
-+    }
-+    return NULL;
-+bool vsir_signature_find_sysval(const struct shader_signature *signature,
-+        enum vkd3d_shader_sysval_semantic sysval, unsigned int semantic_index, unsigned int *element_index)
-+    const struct signature_element *e;
-+    unsigned int i;
-+    for (i = 0; i < signature->element_count; ++i)
-+    {
-+        e = &signature->elements[i];
-+        if (e->sysval_semantic == sysval && e->semantic_index == semantic_index)
-+        {
-+            *element_index = i;
-+            return true;
-+        }
-+    }
-+    return false;
-+void vsir_register_init(struct vkd3d_shader_register *reg, enum vkd3d_shader_register_type reg_type,
-+        enum vkd3d_data_type data_type, unsigned int idx_count)
-+    reg->type = reg_type;
-+    reg->non_uniform = false;
-+    reg->data_type = data_type;
-+    reg->idx[0].offset = ~0u;
-+    reg->idx[0].rel_addr = NULL;
-+    reg->idx[0].is_in_bounds = false;
-+    reg->idx[1].offset = ~0u;
-+    reg->idx[1].rel_addr = NULL;
-+    reg->idx[1].is_in_bounds = false;
-+    reg->idx[2].offset = ~0u;
-+    reg->idx[2].rel_addr = NULL;
-+    reg->idx[2].is_in_bounds = false;
-+    reg->idx_count = idx_count;
-+    reg->dimension = VSIR_DIMENSION_SCALAR;
-+    reg->alignment = 0;
- static inline bool shader_register_is_phase_instance_id(const struct vkd3d_shader_register *reg)
- {
-     return reg->type == VKD3DSPR_FORKINSTID || reg->type == VKD3DSPR_JOININSTID;
- }
--static bool vsir_instruction_is_dcl(const struct vkd3d_shader_instruction *instruction)
-+void vsir_src_param_init(struct vkd3d_shader_src_param *param, enum vkd3d_shader_register_type reg_type,
-+        enum vkd3d_data_type data_type, unsigned int idx_count)
- {
--    enum vkd3d_shader_opcode opcode = instruction->opcode;
--    return (VKD3DSIH_DCL <= opcode && opcode <= VKD3DSIH_DCL_VERTICES_OUT)
--            || opcode == VKD3DSIH_HS_DECLS;
-+    vsir_register_init(&param->reg, reg_type, data_type, idx_count);
-+    param->swizzle = 0;
-+    param->modifiers = VKD3DSPSM_NONE;
- }
--static void vkd3d_shader_instruction_make_nop(struct vkd3d_shader_instruction *ins)
-+static void src_param_init_const_uint(struct vkd3d_shader_src_param *src, uint32_t value)
- {
--    struct vkd3d_shader_location location = ins->location;
-+    vsir_src_param_init(src, VKD3DSPR_IMMCONST, VKD3D_DATA_UINT, 0);
-+    src->reg.u.immconst_u32[0] = value;
--    vsir_instruction_init(ins, &location, VKD3DSIH_NOP);
-+void vsir_src_param_init_label(struct vkd3d_shader_src_param *param, unsigned int label_id)
-+    vsir_src_param_init(param, VKD3DSPR_LABEL, VKD3D_DATA_UNUSED, 1);
-+    param->reg.dimension = VSIR_DIMENSION_NONE;
-+    param->reg.idx[0].offset = label_id;
-+static void src_param_init_parameter(struct vkd3d_shader_src_param *src, uint32_t idx, enum vkd3d_data_type type)
-+    vsir_src_param_init(src, VKD3DSPR_PARAMETER, type, 1);
-+    src->reg.idx[0].offset = idx;
-+static void vsir_src_param_init_resource(struct vkd3d_shader_src_param *src, unsigned int id, unsigned int idx)
-+    vsir_src_param_init(src, VKD3DSPR_RESOURCE, VKD3D_DATA_UNUSED, 2);
-+    src->reg.idx[0].offset = id;
-+    src->reg.idx[1].offset = idx;
-+    src->reg.dimension = VSIR_DIMENSION_VEC4;
-+    src->swizzle = VKD3D_SHADER_NO_SWIZZLE;
-+static void vsir_src_param_init_sampler(struct vkd3d_shader_src_param *src, unsigned int id, unsigned int idx)
-+    vsir_src_param_init(src, VKD3DSPR_SAMPLER, VKD3D_DATA_UNUSED, 2);
-+    src->reg.idx[0].offset = id;
-+    src->reg.idx[1].offset = idx;
-+    src->reg.dimension = VSIR_DIMENSION_NONE;
-+static void src_param_init_ssa_bool(struct vkd3d_shader_src_param *src, unsigned int idx)
-+    vsir_src_param_init(src, VKD3DSPR_SSA, VKD3D_DATA_BOOL, 1);
-+    src->reg.idx[0].offset = idx;
-+static void src_param_init_ssa_float(struct vkd3d_shader_src_param *src, unsigned int idx)
-+    vsir_src_param_init(src, VKD3DSPR_SSA, VKD3D_DATA_FLOAT, 1);
-+    src->reg.idx[0].offset = idx;
-+static void src_param_init_temp_bool(struct vkd3d_shader_src_param *src, unsigned int idx)
-+    vsir_src_param_init(src, VKD3DSPR_TEMP, VKD3D_DATA_BOOL, 1);
-+    src->reg.idx[0].offset = idx;
-+static void src_param_init_temp_float(struct vkd3d_shader_src_param *src, unsigned int idx)
-+    vsir_src_param_init(src, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1);
-+    src->reg.idx[0].offset = idx;
-+static void src_param_init_temp_float4(struct vkd3d_shader_src_param *src, unsigned int idx)
-+    vsir_src_param_init(src, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1);
-+    src->reg.dimension = VSIR_DIMENSION_VEC4;
-+    src->swizzle = VKD3D_SHADER_NO_SWIZZLE;
-+    src->reg.idx[0].offset = idx;
-+static void src_param_init_temp_uint(struct vkd3d_shader_src_param *src, unsigned int idx)
-+    vsir_src_param_init(src, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1);
-+    src->reg.idx[0].offset = idx;
-+void vsir_dst_param_init(struct vkd3d_shader_dst_param *param, enum vkd3d_shader_register_type reg_type,
-+        enum vkd3d_data_type data_type, unsigned int idx_count)
-+    vsir_register_init(&param->reg, reg_type, data_type, idx_count);
-+    param->write_mask = VKD3DSP_WRITEMASK_0;
-+    param->modifiers = VKD3DSPDM_NONE;
-+    param->shift = 0;
-+static void dst_param_init_ssa_bool(struct vkd3d_shader_dst_param *dst, unsigned int idx)
-+    vsir_dst_param_init(dst, VKD3DSPR_SSA, VKD3D_DATA_BOOL, 1);
-+    dst->reg.idx[0].offset = idx;
-+static void dst_param_init_ssa_float(struct vkd3d_shader_dst_param *dst, unsigned int idx)
-+    vsir_dst_param_init(dst, VKD3DSPR_SSA, VKD3D_DATA_FLOAT, 1);
-+    dst->reg.idx[0].offset = idx;
-+static void dst_param_init_temp_bool(struct vkd3d_shader_dst_param *dst, unsigned int idx)
-+    vsir_dst_param_init(dst, VKD3DSPR_TEMP, VKD3D_DATA_BOOL, 1);
-+    dst->reg.idx[0].offset = idx;
-+static void dst_param_init_temp_float4(struct vkd3d_shader_dst_param *dst, unsigned int idx)
-+    vsir_dst_param_init(dst, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1);
-+    dst->reg.idx[0].offset = idx;
-+    dst->reg.dimension = VSIR_DIMENSION_VEC4;
-+static void dst_param_init_temp_uint(struct vkd3d_shader_dst_param *dst, unsigned int idx)
-+    vsir_dst_param_init(dst, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1);
-+    dst->reg.idx[0].offset = idx;
-+static void dst_param_init_output(struct vkd3d_shader_dst_param *dst,
-+        enum vkd3d_data_type data_type, uint32_t idx, uint32_t write_mask)
-+    vsir_dst_param_init(dst, VKD3DSPR_OUTPUT, data_type, 1);
-+    dst->reg.idx[0].offset = idx;
-+    dst->reg.dimension = VSIR_DIMENSION_VEC4;
-+    dst->write_mask = write_mask;
-+void vsir_instruction_init(struct vkd3d_shader_instruction *ins, const struct vkd3d_shader_location *location,
-+        enum vkd3d_shader_opcode opcode)
-+    memset(ins, 0, sizeof(*ins));
-+    ins->location = *location;
-+    ins->opcode = opcode;
- }
--static bool vsir_instruction_init_with_params(struct vsir_program *program,
-+bool vsir_instruction_init_with_params(struct vsir_program *program,
-         struct vkd3d_shader_instruction *ins, const struct vkd3d_shader_location *location,
-         enum vkd3d_shader_opcode opcode, unsigned int dst_count, unsigned int src_count)
- {
-@@ -161,6 +351,37 @@ static bool vsir_instruction_init_with_params(struct vsir_program *program,
-     return true;
- }
-+static bool vsir_instruction_init_label(struct vkd3d_shader_instruction *ins,
-+        const struct vkd3d_shader_location *location, unsigned int label_id, struct vsir_program *program)
-+    struct vkd3d_shader_src_param *src_param;
-+    if (!(src_param = vsir_program_get_src_params(program, 1)))
-+        return false;
-+    vsir_src_param_init_label(src_param, label_id);
-+    vsir_instruction_init(ins, location, VKD3DSIH_LABEL);
-+    ins->src = src_param;
-+    ins->src_count = 1;
-+    return true;
-+static bool vsir_instruction_is_dcl(const struct vkd3d_shader_instruction *instruction)
-+    enum vkd3d_shader_opcode opcode = instruction->opcode;
-+    return (VKD3DSIH_DCL <= opcode && opcode <= VKD3DSIH_DCL_VERTICES_OUT)
-+            || opcode == VKD3DSIH_HS_DECLS;
-+static void vkd3d_shader_instruction_make_nop(struct vkd3d_shader_instruction *ins)
-+    struct vkd3d_shader_location location = ins->location;
-+    vsir_instruction_init(ins, &location, VKD3DSIH_NOP);
- static bool get_opcode_from_rel_op(enum vkd3d_shader_rel_op rel_op, enum vkd3d_data_type data_type,
-         enum vkd3d_shader_opcode *opcode, bool *requires_swap)
- {
-@@ -441,10 +662,58 @@ static enum vkd3d_result vsir_program_lower_sm1_sincos(struct vsir_program *prog
-     return VKD3D_OK;
- }
-+static enum vkd3d_result vsir_program_lower_tex(struct vsir_program *program, struct vkd3d_shader_instruction *tex)
-+    unsigned int idx = tex->src[1].reg.idx[0].offset;
-+    struct vkd3d_shader_src_param *srcs;
-+    VKD3D_ASSERT(tex->src[1].reg.idx_count == 1);
-+    VKD3D_ASSERT(!tex->src[1].reg.idx[0].rel_addr);
-+    if (!(srcs = shader_src_param_allocator_get(&program->instructions.src_params, 3)))
-+        return VKD3D_ERROR_OUT_OF_MEMORY;
-+    srcs[0] = tex->src[0];
-+    vsir_src_param_init_resource(&srcs[1], idx, idx);
-+    vsir_src_param_init_sampler(&srcs[2], idx, idx);
-+    tex->opcode = VKD3DSIH_SAMPLE;
-+    tex->src = srcs;
-+    tex->src_count = 3;
-+    return VKD3D_OK;
-+static enum vkd3d_result vsir_program_lower_texldd(struct vsir_program *program,
-+        struct vkd3d_shader_instruction *texldd)
-+    unsigned int idx = texldd->src[1].reg.idx[0].offset;
-+    struct vkd3d_shader_src_param *srcs;
-+    VKD3D_ASSERT(texldd->src[1].reg.idx_count == 1);
-+    VKD3D_ASSERT(!texldd->src[1].reg.idx[0].rel_addr);
-+    if (!(srcs = shader_src_param_allocator_get(&program->instructions.src_params, 5)))
-+        return VKD3D_ERROR_OUT_OF_MEMORY;
-+    srcs[0] = texldd->src[0];
-+    vsir_src_param_init_resource(&srcs[1], idx, idx);
-+    vsir_src_param_init_sampler(&srcs[2], idx, idx);
-+    srcs[3] = texldd->src[2];
-+    srcs[4] = texldd->src[3];
-+    texldd->opcode = VKD3DSIH_SAMPLE_GRAD;
-+    texldd->src = srcs;
-+    texldd->src_count = 5;
-+    return VKD3D_OK;
- static enum vkd3d_result vsir_program_lower_instructions(struct vsir_program *program,
--        struct vkd3d_shader_message_context *message_context)
-+        struct vsir_transformation_context *ctx)
- {
-     struct vkd3d_shader_instruction_array *instructions = &program->instructions;
-+    struct vkd3d_shader_message_context *message_context = ctx->message_context;
-     unsigned int tmp_idx = ~0u, i;
-     enum vkd3d_result ret;
-@@ -471,8 +740,12 @@ static enum vkd3d_result vsir_program_lower_instructions(struct vsir_program *pr
-             case VKD3DSIH_DCL:
-             case VKD3DSIH_DCL_CONSTANT_BUFFER:
-+            case VKD3DSIH_DCL_GLOBAL_FLAGS:
-             case VKD3DSIH_DCL_SAMPLER:
-             case VKD3DSIH_DCL_TEMPS:
-+            case VKD3DSIH_DCL_THREAD_GROUP:
-+            case VKD3DSIH_DCL_UAV_TYPED:
-                 vkd3d_shader_instruction_make_nop(ins);
-                 break;
-@@ -481,6 +754,38 @@ static enum vkd3d_result vsir_program_lower_instructions(struct vsir_program *pr
-                     return ret;
-                 break;
-+            case VKD3DSIH_TEX:
-+                if ((ret = vsir_program_lower_tex(program, ins)) < 0)
-+                    return ret;
-+                break;
-+            case VKD3DSIH_TEXLDD:
-+                if ((ret = vsir_program_lower_texldd(program, ins)) < 0)
-+                    return ret;
-+                break;
-+            case VKD3DSIH_TEXBEM:
-+            case VKD3DSIH_TEXBEML:
-+            case VKD3DSIH_TEXCOORD:
-+            case VKD3DSIH_TEXDEPTH:
-+            case VKD3DSIH_TEXDP3:
-+            case VKD3DSIH_TEXDP3TEX:
-+            case VKD3DSIH_TEXLDL:
-+            case VKD3DSIH_TEXM3x2PAD:
-+            case VKD3DSIH_TEXM3x2TEX:
-+            case VKD3DSIH_TEXM3x3DIFF:
-+            case VKD3DSIH_TEXM3x3PAD:
-+            case VKD3DSIH_TEXM3x3SPEC:
-+            case VKD3DSIH_TEXM3x3TEX:
-+            case VKD3DSIH_TEXM3x3VSPEC:
-+            case VKD3DSIH_TEXREG2AR:
-+            case VKD3DSIH_TEXREG2GB:
-+            case VKD3DSIH_TEXREG2RGB:
-+                vkd3d_shader_error(ctx->message_context, &ins->location, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED,
-+                        "Aborting due to unimplemented feature: Combined sampler instruction %#x.",
-+                        ins->opcode);
-+                return VKD3D_ERROR_NOT_IMPLEMENTED;
-             default:
-                 break;
-         }
-@@ -523,29 +828,197 @@ static void shader_instruction_eliminate_phase_instance_id(struct vkd3d_shader_i
-     }
-     for (i = 0; i < ins->dst_count; ++i)
--        shader_register_eliminate_phase_addressing((struct vkd3d_shader_register *)&ins->dst[i].reg, instance_id);
-+        shader_register_eliminate_phase_addressing(&ins->dst[i].reg, instance_id);
- }
--static const struct vkd3d_shader_varying_map *find_varying_map(
--        const struct vkd3d_shader_varying_map_info *varying_map, unsigned int signature_idx)
-+/* Ensure that the program closes with a ret. sm1 programs do not, by default.
-+ * Many of our IR passes rely on this in order to insert instructions at the
-+ * end of execution. */
-+static enum vkd3d_result vsir_program_ensure_ret(struct vsir_program *program,
-+        struct vsir_transformation_context *ctx)
- {
--    unsigned int i;
--    for (i = 0; i < varying_map->varying_count; ++i)
--    {
--        if (varying_map->varying_map[i].output_signature_index == signature_idx)
--            return &varying_map->varying_map[i];
--    }
-+    static const struct vkd3d_shader_location no_loc;
-+    if (program->instructions.count
-+            && program->instructions.elements[program->instructions.count - 1].opcode == VKD3DSIH_RET)
-+        return VKD3D_OK;
--    return NULL;
-+    if (!shader_instruction_array_insert_at(&program->instructions, program->instructions.count, 1))
-+        return VKD3D_ERROR_OUT_OF_MEMORY;
-+    vsir_instruction_init(&program->instructions.elements[program->instructions.count - 1], &no_loc, VKD3DSIH_RET);
-+    return VKD3D_OK;
- }
--static enum vkd3d_result vsir_program_remap_output_signature(struct vsir_program *program,
--        const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context)
-+static enum vkd3d_result vsir_program_add_diffuse_output(struct vsir_program *program,
-+        struct vsir_transformation_context *ctx)
-+    struct shader_signature *signature = &program->output_signature;
-+    struct signature_element *new_elements, *e;
-+    if (program->shader_version.type != VKD3D_SHADER_TYPE_VERTEX)
-+        return VKD3D_OK;
-+    if ((e = vsir_signature_find_element_by_name(signature, "COLOR", 0)))
-+    {
-+        program->diffuse_written_mask = e->mask;
-+        e->mask = VKD3DSP_WRITEMASK_ALL;
-+        return VKD3D_OK;
-+    }
-+    if (!(new_elements = vkd3d_realloc(signature->elements,
-+            (signature->element_count + 1) * sizeof(*signature->elements))))
-+        return VKD3D_ERROR_OUT_OF_MEMORY;
-+    signature->elements = new_elements;
-+    e = &signature->elements[signature->element_count++];
-+    memset(e, 0, sizeof(*e));
-+    e->semantic_name = vkd3d_strdup("COLOR");
-+    e->sysval_semantic = VKD3D_SHADER_SV_NONE;
-+    e->component_type = VKD3D_SHADER_COMPONENT_FLOAT;
-+    e->register_count = 1;
-+    e->mask = VKD3DSP_WRITEMASK_ALL;
-+    e->used_mask = VKD3DSP_WRITEMASK_ALL;
-+    e->register_index = SM1_COLOR_REGISTER_OFFSET;
-+    e->target_location = SM1_COLOR_REGISTER_OFFSET;
-+    e->interpolation_mode = VKD3DSIM_NONE;
-+    return VKD3D_OK;
-+/* Uninitialized components of diffuse yield 1.0 in SM1-2. Implement this by
-+ * always writing diffuse in those versions, even if the PS doesn't read it. */
-+static enum vkd3d_result vsir_program_ensure_diffuse(struct vsir_program *program,
-+        struct vsir_transformation_context *ctx)
-+    static const struct vkd3d_shader_location no_loc;
-+    struct vkd3d_shader_instruction *ins;
-+    unsigned int i;
-+    if (program->shader_version.type != VKD3D_SHADER_TYPE_VERTEX
-+            || program->diffuse_written_mask == VKD3DSP_WRITEMASK_ALL)
-+        return VKD3D_OK;
-+    /* Write the instruction after all LABEL, DCL, and NOP instructions.
-+     * We need to skip NOP instructions because they might result from removed
-+     * DCLs, and there could still be DCLs after NOPs. */
-+    for (i = 0; i < program->instructions.count; ++i)
-+    {
-+        ins = &program->instructions.elements[i];
-+        if (!vsir_instruction_is_dcl(ins) && ins->opcode != VKD3DSIH_LABEL && ins->opcode != VKD3DSIH_NOP)
-+            break;
-+    }
-+    if (!shader_instruction_array_insert_at(&program->instructions, i, 1))
-+        return VKD3D_ERROR_OUT_OF_MEMORY;
-+    ins = &program->instructions.elements[i];
-+    vsir_instruction_init_with_params(program, ins, &no_loc, VKD3DSIH_MOV, 1, 1);
-+    vsir_dst_param_init(&ins->dst[0], VKD3DSPR_ATTROUT, VKD3D_DATA_FLOAT, 1);
-+    ins->dst[0].reg.idx[0].offset = 0;
-+    ins->dst[0].reg.dimension = VSIR_DIMENSION_VEC4;
-+    ins->dst[0].write_mask = VKD3DSP_WRITEMASK_ALL & ~program->diffuse_written_mask;
-+    vsir_src_param_init(&ins->src[0], VKD3DSPR_IMMCONST, VKD3D_DATA_FLOAT, 0);
-+    ins->src[0].reg.dimension = VSIR_DIMENSION_VEC4;
-+    for (i = 0; i < 4; ++i)
-+        ins->src[0].reg.u.immconst_f32[i] = 1.0f;
-+    return VKD3D_OK;
-+static const struct vkd3d_shader_varying_map *find_varying_map(
-+        const struct vkd3d_shader_varying_map_info *varying_map, unsigned int signature_idx)
-+    unsigned int i;
-+    for (i = 0; i < varying_map->varying_count; ++i)
-+    {
-+        if (varying_map->varying_map[i].output_signature_index == signature_idx)
-+            return &varying_map->varying_map[i];
-+    }
-+    return NULL;
-+static bool target_allows_subset_masks(const struct vkd3d_shader_compile_info *info)
-+    const struct vkd3d_shader_spirv_target_info *spirv_info;
-+    enum vkd3d_shader_spirv_environment environment;
-+    switch (info->target_type)
-+    {
-+            spirv_info = vkd3d_find_struct(info->next, SPIRV_TARGET_INFO);
-+            environment = spirv_info ? spirv_info->environment : VKD3D_SHADER_SPIRV_ENVIRONMENT_VULKAN_1_0;
-+            switch (environment)
-+            {
-+                case VKD3D_SHADER_SPIRV_ENVIRONMENT_OPENGL_4_5:
-+                    return true;
-+                case VKD3D_SHADER_SPIRV_ENVIRONMENT_VULKAN_1_0:
-+                case VKD3D_SHADER_SPIRV_ENVIRONMENT_VULKAN_1_1:
-+                    /* FIXME: Allow KHR_maintenance4. */
-+                    return false;
-+                default:
-+                    FIXME("Unrecognized environment %#x.\n", environment);
-+                    return false;
-+            }
-+        default:
-+            return true;
-+    }
-+static void remove_unread_output_components(const struct shader_signature *signature,
-+        struct vkd3d_shader_instruction *ins, struct vkd3d_shader_dst_param *dst)
-+    const struct signature_element *e;
-+    switch (dst->reg.type)
-+    {
-+        case VKD3DSPR_OUTPUT:
-+            e = vsir_signature_find_element_for_reg(signature, dst->reg.idx[0].offset, 0);
-+            break;
-+        case VKD3DSPR_ATTROUT:
-+            e = vsir_signature_find_element_for_reg(signature,
-+                    SM1_COLOR_REGISTER_OFFSET + dst->reg.idx[0].offset, 0);
-+            break;
-+        case VKD3DSPR_RASTOUT:
-+            e = vsir_signature_find_element_for_reg(signature,
-+                    SM1_RASTOUT_REGISTER_OFFSET + dst->reg.idx[0].offset, 0);
-+            break;
-+        default:
-+            return;
-+    }
-+    /* We already changed the mask earlier. */
-+    dst->write_mask &= e->mask;
-+    if (!dst->write_mask)
-+    {
-+        if (ins->dst_count == 1)
-+            vkd3d_shader_instruction_make_nop(ins);
-+        else
-+            vsir_dst_param_init(dst, VKD3DSPR_NULL, VKD3D_DATA_UNUSED, 0);
-+    }
-+static enum vkd3d_result vsir_program_remap_output_signature(struct vsir_program *program,
-+        struct vsir_transformation_context *ctx)
- {
--    const struct vkd3d_shader_location location = {.source_name = compile_info->source_name};
-+    const struct vkd3d_shader_location location = {.source_name = ctx->compile_info->source_name};
-+    struct vkd3d_shader_message_context *message_context = ctx->message_context;
-+    const struct vkd3d_shader_compile_info *compile_info = ctx->compile_info;
-+    bool allows_subset_masks = target_allows_subset_masks(compile_info);
-     struct shader_signature *signature = &program->output_signature;
-+    unsigned int orig_element_count = signature->element_count;
-     const struct vkd3d_shader_varying_map_info *varying_map;
-+    struct signature_element *new_elements, *e;
-+    unsigned int uninit_varying_count = 0;
-+    unsigned int subset_varying_count = 0;
-+    unsigned int new_register_count = 0;
-     unsigned int i;
-     if (!(varying_map = vkd3d_find_struct(compile_info->next, VARYING_MAP_INFO)))
-@@ -554,22 +1027,29 @@ static enum vkd3d_result vsir_program_remap_output_signature(struct vsir_program
-     for (i = 0; i < signature->element_count; ++i)
-     {
-         const struct vkd3d_shader_varying_map *map = find_varying_map(varying_map, i);
--        struct signature_element *e = &signature->elements[i];
-+        e = &signature->elements[i];
-         if (map)
-         {
-             unsigned int input_mask = map->input_mask;
-             e->target_location = map->input_register_index;
--            /* It is illegal in Vulkan if the next shader uses the same varying
--             * location with a different mask. */
--            if (input_mask && input_mask != e->mask)
-+            if ((input_mask & e->mask) == input_mask)
-+            {
-+                ++subset_varying_count;
-+                if (!allows_subset_masks)
-+                {
-+                    e->mask = input_mask;
-+                    e->used_mask &= input_mask;
-+                }
-+            }
-+            else if (input_mask && input_mask != e->mask)
-             {
-                 vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED,
-                         "Aborting due to not yet implemented feature: "
--                        "Output mask %#x does not match input mask %#x.",
--                        e->mask, input_mask);
-+                        "Input mask %#x reads components not written in output mask %#x.",
-+                        input_mask, e->mask);
-                 return VKD3D_ERROR_NOT_IMPLEMENTED;
-             }
-         }
-@@ -577,17 +1057,103 @@ static enum vkd3d_result vsir_program_remap_output_signature(struct vsir_program
-         {
-             e->target_location = SIGNATURE_TARGET_LOCATION_UNUSED;
-         }
-+        new_register_count = max(new_register_count, e->register_index + 1);
-     }
-+    /* Handle uninitialized varyings by writing them before every ret.
-+     *
-+     * As far as sm1-sm3 is concerned, drivers disagree on what uninitialized
-+     * varyings contain.
-+     *
-+     * - Diffuse (COLOR0) reliably contains (1, 1, 1, 1) in SM1/2.
-+     *   In SM3 it may contain (0, 0, 0, 0), (0, 0, 0, 1), or (1, 1, 1, 1).
-+     *
-+     * - Specular (COLOR1) contains (0, 0, 0, 0) or (0, 0, 0, 1).
-+     *   WARP writes (1, 1, 1, 1).
-+     *
-+     * - Anything else contains (0, 0, 0, 0) or (0, 0, 0, 1).
-+     *
-+     * We don't have enough knowledge to identify diffuse here. Instead we deal
-+     * with that in vsir_program_ensure_diffuse(), by always writing diffuse if
-+     * the shader doesn't.
-+     */
-     for (i = 0; i < varying_map->varying_count; ++i)
-     {
-         if (varying_map->varying_map[i].output_signature_index >= signature->element_count)
-+            ++uninit_varying_count;
-+    }
-+    if (!(new_elements = vkd3d_realloc(signature->elements,
-+            (signature->element_count + uninit_varying_count) * sizeof(*signature->elements))))
-+        return VKD3D_ERROR_OUT_OF_MEMORY;
-+    signature->elements = new_elements;
-+    for (i = 0; i < varying_map->varying_count; ++i)
-+    {
-+        const struct vkd3d_shader_varying_map *map = &varying_map->varying_map[i];
-+        if (map->output_signature_index < orig_element_count)
-+            continue;
-+        TRACE("Synthesizing zero value for uninitialized output %u (mask %u).\n",
-+                map->input_register_index, map->input_mask);
-+        e = &signature->elements[signature->element_count++];
-+        memset(e, 0, sizeof(*e));
-+        e->sysval_semantic = VKD3D_SHADER_SV_NONE;
-+        e->component_type = VKD3D_SHADER_COMPONENT_FLOAT;
-+        e->register_count = 1;
-+        e->mask = map->input_mask;
-+        e->used_mask = map->input_mask;
-+        e->register_index = new_register_count++;
-+        e->target_location = map->input_register_index;
-+        e->interpolation_mode = VKD3DSIM_LINEAR;
-+    }
-+    /* Write each uninitialized varying before each ret. */
-+    for (i = 0; i < program->instructions.count; ++i)
-+    {
-+        struct vkd3d_shader_instruction *ins = &program->instructions.elements[i];
-+        struct vkd3d_shader_location loc;
-+        if (ins->opcode != VKD3DSIH_RET)
-+            continue;
-+        loc = ins->location;
-+        if (!shader_instruction_array_insert_at(&program->instructions, i, uninit_varying_count))
-+            return VKD3D_ERROR_OUT_OF_MEMORY;
-+        ins = &program->instructions.elements[i];
-+        for (unsigned int j = signature->element_count - uninit_varying_count; j < signature->element_count; ++j)
-         {
--            vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED,
--                    "Aborting due to not yet implemented feature: "
--                    "The next stage consumes varyings not written by this stage.");
--            return VKD3D_ERROR_NOT_IMPLEMENTED;
-+            e = &signature->elements[j];
-+            vsir_instruction_init_with_params(program, ins, &loc, VKD3DSIH_MOV, 1, 1);
-+            dst_param_init_output(&ins->dst[0], VKD3D_DATA_FLOAT, e->register_index, e->mask);
-+            vsir_src_param_init(&ins->src[0], VKD3DSPR_IMMCONST, VKD3D_DATA_FLOAT, 0);
-+            ins->src[0].reg.dimension = VSIR_DIMENSION_VEC4;
-+            ++ins;
-         }
-+        i += uninit_varying_count;
-+    }
-+    /* Vulkan (without KHR_maintenance4) disallows any mismatching masks,
-+     * including when the input mask is a proper subset of the output mask.
-+     * Resolve this by rewriting the shader to remove unread components from
-+     * any writes to the output variable. */
-+    if (!subset_varying_count || allows_subset_masks)
-+        return VKD3D_OK;
-+    for (i = 0; i < program->instructions.count; ++i)
-+    {
-+        struct vkd3d_shader_instruction *ins = &program->instructions.elements[i];
-+        for (unsigned int j = 0; j < ins->dst_count; ++j)
-+            remove_unread_output_components(signature, ins, &ins->dst[j]);
-     }
-     return VKD3D_OK;
-@@ -727,192 +1293,68 @@ static enum vkd3d_result flattener_flatten_phases(struct hull_flattener *normali
-     return VKD3D_OK;
- }
--void vsir_register_init(struct vkd3d_shader_register *reg, enum vkd3d_shader_register_type reg_type,
--        enum vkd3d_data_type data_type, unsigned int idx_count)
-+static enum vkd3d_result vsir_program_flatten_hull_shader_phases(struct vsir_program *program,
-+        struct vsir_transformation_context *ctx)
- {
--    reg->type = reg_type;
--    reg->non_uniform = false;
--    reg->data_type = data_type;
--    reg->idx[0].offset = ~0u;
--    reg->idx[0].rel_addr = NULL;
--    reg->idx[0].is_in_bounds = false;
--    reg->idx[1].offset = ~0u;
--    reg->idx[1].rel_addr = NULL;
--    reg->idx[1].is_in_bounds = false;
--    reg->idx[2].offset = ~0u;
--    reg->idx[2].rel_addr = NULL;
--    reg->idx[2].is_in_bounds = false;
--    reg->idx_count = idx_count;
--    reg->dimension = VSIR_DIMENSION_SCALAR;
--    reg->alignment = 0;
-+    struct hull_flattener flattener = {program->instructions};
-+    struct vkd3d_shader_instruction_array *instructions;
-+    struct shader_phase_location_array locations;
-+    enum vkd3d_result result = VKD3D_OK;
-+    unsigned int i;
--void vsir_src_param_init(struct vkd3d_shader_src_param *param, enum vkd3d_shader_register_type reg_type,
--        enum vkd3d_data_type data_type, unsigned int idx_count)
--    vsir_register_init(&param->reg, reg_type, data_type, idx_count);
--    param->swizzle = 0;
--    param->modifiers = VKD3DSPSM_NONE;
-+    instructions = &flattener.instructions;
--void vsir_dst_param_init(struct vkd3d_shader_dst_param *param, enum vkd3d_shader_register_type reg_type,
--        enum vkd3d_data_type data_type, unsigned int idx_count)
--    vsir_register_init(&param->reg, reg_type, data_type, idx_count);
--    param->write_mask = VKD3DSP_WRITEMASK_0;
--    param->modifiers = VKD3DSPDM_NONE;
--    param->shift = 0;
-+    flattener.phase = VKD3DSIH_INVALID;
-+    for (i = 0, locations.count = 0; i < instructions->count; ++i)
-+        flattener_eliminate_phase_related_dcls(&flattener, i, &locations);
--void vsir_src_param_init_label(struct vkd3d_shader_src_param *param, unsigned int label_id)
--    vsir_src_param_init(param, VKD3DSPR_LABEL, VKD3D_DATA_UNUSED, 1);
--    param->reg.dimension = VSIR_DIMENSION_NONE;
--    param->reg.idx[0].offset = label_id;
-+    if ((result = flattener_flatten_phases(&flattener, &locations)) < 0)
-+        return result;
--static void src_param_init_ssa_bool(struct vkd3d_shader_src_param *src, unsigned int idx)
--    vsir_src_param_init(src, VKD3DSPR_SSA, VKD3D_DATA_BOOL, 1);
--    src->reg.idx[0].offset = idx;
-+    if (flattener.phase != VKD3DSIH_INVALID)
-+    {
-+        if (!shader_instruction_array_reserve(&flattener.instructions, flattener.instructions.count + 1))
-+            return VKD3D_ERROR_OUT_OF_MEMORY;
-+        vsir_instruction_init(&instructions->elements[instructions->count++], &flattener.last_ret_location, VKD3DSIH_RET);
-+    }
--static void src_param_init_temp_bool(struct vkd3d_shader_src_param *src, unsigned int idx)
--    vsir_src_param_init(src, VKD3DSPR_TEMP, VKD3D_DATA_BOOL, 1);
--    src->reg.idx[0].offset = idx;
-+    program->instructions = flattener.instructions;
-+    return result;
- }
--static void dst_param_init_ssa_bool(struct vkd3d_shader_dst_param *dst, unsigned int idx)
-+struct control_point_normaliser
- {
--    vsir_dst_param_init(dst, VKD3DSPR_SSA, VKD3D_DATA_BOOL, 1);
--    dst->reg.idx[0].offset = idx;
-+    struct vkd3d_shader_instruction_array instructions;
-+    enum vkd3d_shader_opcode phase;
-+    struct vkd3d_shader_src_param *outpointid_param;
--static void dst_param_init_temp_bool(struct vkd3d_shader_dst_param *dst, unsigned int idx)
-+static bool control_point_normaliser_is_in_control_point_phase(const struct control_point_normaliser *normaliser)
- {
--    vsir_dst_param_init(dst, VKD3DSPR_TEMP, VKD3D_DATA_BOOL, 1);
--    dst->reg.idx[0].offset = idx;
-+    return normaliser->phase == VKD3DSIH_HS_CONTROL_POINT_PHASE;
- }
--static void dst_param_init_temp_uint(struct vkd3d_shader_dst_param *dst, unsigned int idx)
-+struct vkd3d_shader_src_param *vsir_program_create_outpointid_param(struct vsir_program *program)
- {
--    vsir_dst_param_init(dst, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1);
--    dst->reg.idx[0].offset = idx;
--    dst->write_mask = VKD3DSP_WRITEMASK_0;
-+    struct vkd3d_shader_instruction_array *instructions = &program->instructions;
-+    struct vkd3d_shader_src_param *rel_addr;
--static void src_param_init_temp_float(struct vkd3d_shader_src_param *src, unsigned int idx)
--    vsir_src_param_init(src, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1);
--    src->reg.idx[0].offset = idx;
-+    if (instructions->outpointid_param)
-+        return instructions->outpointid_param;
--static void src_param_init_temp_uint(struct vkd3d_shader_src_param *src, unsigned int idx)
--    vsir_src_param_init(src, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1);
--    src->reg.idx[0].offset = idx;
-+    if (!(rel_addr = shader_src_param_allocator_get(&instructions->src_params, 1)))
-+        return NULL;
--static void src_param_init_const_uint(struct vkd3d_shader_src_param *src, uint32_t value)
--    vsir_src_param_init(src, VKD3DSPR_IMMCONST, VKD3D_DATA_UINT, 0);
--    src->reg.u.immconst_u32[0] = value;
-+    vsir_register_init(&rel_addr->reg, VKD3DSPR_OUTPOINTID, VKD3D_DATA_UINT, 0);
-+    rel_addr->swizzle = 0;
-+    rel_addr->modifiers = 0;
--static void src_param_init_parameter(struct vkd3d_shader_src_param *src, uint32_t idx, enum vkd3d_data_type type)
--    vsir_src_param_init(src, VKD3DSPR_PARAMETER, type, 1);
--    src->reg.idx[0].offset = idx;
-+    instructions->outpointid_param = rel_addr;
-+    return rel_addr;
- }
--void vsir_instruction_init(struct vkd3d_shader_instruction *ins, const struct vkd3d_shader_location *location,
--        enum vkd3d_shader_opcode opcode)
--    memset(ins, 0, sizeof(*ins));
--    ins->location = *location;
--    ins->opcode = opcode;
--static bool vsir_instruction_init_label(struct vkd3d_shader_instruction *ins,
--        const struct vkd3d_shader_location *location, unsigned int label_id, struct vsir_program *program)
--    struct vkd3d_shader_src_param *src_param;
--    if (!(src_param = vsir_program_get_src_params(program, 1)))
--        return false;
--    vsir_src_param_init_label(src_param, label_id);
--    vsir_instruction_init(ins, location, VKD3DSIH_LABEL);
--    ins->src = src_param;
--    ins->src_count = 1;
--    return true;
--static enum vkd3d_result instruction_array_flatten_hull_shader_phases(struct vkd3d_shader_instruction_array *src_instructions)
--    struct hull_flattener flattener = {*src_instructions};
--    struct vkd3d_shader_instruction_array *instructions;
--    struct shader_phase_location_array locations;
--    enum vkd3d_result result = VKD3D_OK;
--    unsigned int i;
--    instructions = &flattener.instructions;
--    flattener.phase = VKD3DSIH_INVALID;
--    for (i = 0, locations.count = 0; i < instructions->count; ++i)
--        flattener_eliminate_phase_related_dcls(&flattener, i, &locations);
--    if ((result = flattener_flatten_phases(&flattener, &locations)) < 0)
--        return result;
--    if (flattener.phase != VKD3DSIH_INVALID)
--    {
--        if (!shader_instruction_array_reserve(&flattener.instructions, flattener.instructions.count + 1))
--            return VKD3D_ERROR_OUT_OF_MEMORY;
--        vsir_instruction_init(&instructions->elements[instructions->count++], &flattener.last_ret_location, VKD3DSIH_RET);
--    }
--    *src_instructions = flattener.instructions;
--    return result;
--struct control_point_normaliser
--    struct vkd3d_shader_instruction_array instructions;
--    enum vkd3d_shader_opcode phase;
--    struct vkd3d_shader_src_param *outpointid_param;
--static bool control_point_normaliser_is_in_control_point_phase(const struct control_point_normaliser *normaliser)
--    return normaliser->phase == VKD3DSIH_HS_CONTROL_POINT_PHASE;
--struct vkd3d_shader_src_param *instruction_array_create_outpointid_param(
--        struct vkd3d_shader_instruction_array *instructions)
--    struct vkd3d_shader_src_param *rel_addr;
--    if (instructions->outpointid_param)
--        return instructions->outpointid_param;
--    if (!(rel_addr = shader_src_param_allocator_get(&instructions->src_params, 1)))
--        return NULL;
--    vsir_register_init(&rel_addr->reg, VKD3DSPR_OUTPOINTID, VKD3D_DATA_UINT, 0);
--    rel_addr->swizzle = 0;
--    rel_addr->modifiers = 0;
--    instructions->outpointid_param = rel_addr;
--    return rel_addr;
--static void shader_dst_param_normalise_outpointid(struct vkd3d_shader_dst_param *dst_param,
--        struct control_point_normaliser *normaliser)
-+static void shader_dst_param_normalise_outpointid(struct vkd3d_shader_dst_param *dst_param,
-+        struct control_point_normaliser *normaliser)
- {
-     struct vkd3d_shader_register *reg = &dst_param->reg;
-@@ -991,7 +1433,7 @@ static enum vkd3d_result control_point_normaliser_emit_hs_input(struct control_p
- }
- static enum vkd3d_result instruction_array_normalise_hull_shader_control_point_io(
--        struct vkd3d_shader_instruction_array *src_instructions, const struct shader_signature *input_signature)
-+        struct vsir_program *program, struct vsir_transformation_context *ctx)
- {
-     struct vkd3d_shader_instruction_array *instructions;
-     struct control_point_normaliser normaliser;
-@@ -1001,12 +1443,20 @@ static enum vkd3d_result instruction_array_normalise_hull_shader_control_point_i
-     enum vkd3d_result ret;
-     unsigned int i, j;
--    if (!(normaliser.outpointid_param = instruction_array_create_outpointid_param(src_instructions)))
-+    VKD3D_ASSERT(program->normalisation_level == VSIR_NOT_NORMALISED);
-+    if (program->shader_version.type != VKD3D_SHADER_TYPE_HULL)
-+    {
-+        program->normalisation_level = VSIR_NORMALISED_HULL_CONTROL_POINT_IO;
-+        return VKD3D_OK;
-+    }
-+    if (!(normaliser.outpointid_param = vsir_program_create_outpointid_param(program)))
-     {
-         ERR("Failed to allocate src param.\n");
-         return VKD3D_ERROR_OUT_OF_MEMORY;
-     }
--    normaliser.instructions = *src_instructions;
-+    normaliser.instructions = program->instructions;
-     instructions = &normaliser.instructions;
-     normaliser.phase = VKD3DSIH_INVALID;
-@@ -1043,22 +1493,25 @@ static enum vkd3d_result instruction_array_normalise_hull_shader_control_point_i
-                 input_control_point_count = ins->declaration.count;
-                 break;
--                *src_instructions = normaliser.instructions;
-+                program->instructions = normaliser.instructions;
-+                program->normalisation_level = VSIR_NORMALISED_HULL_CONTROL_POINT_IO;
-                 return VKD3D_OK;
-             case VKD3DSIH_HS_FORK_PHASE:
-             case VKD3DSIH_HS_JOIN_PHASE:
-                 /* ins may be relocated if the instruction array expands. */
-                 location = ins->location;
--                ret = control_point_normaliser_emit_hs_input(&normaliser, input_signature,
-+                ret = control_point_normaliser_emit_hs_input(&normaliser, &program->input_signature,
-                         input_control_point_count, i, &location);
--                *src_instructions = normaliser.instructions;
-+                program->instructions = normaliser.instructions;
-+                program->normalisation_level = VSIR_NORMALISED_HULL_CONTROL_POINT_IO;
-                 return ret;
-             default:
-                 break;
-         }
-     }
--    *src_instructions = normaliser.instructions;
-+    program->instructions = normaliser.instructions;
-+    program->normalisation_level = VSIR_NORMALISED_HULL_CONTROL_POINT_IO;
-     return VKD3D_OK;
- }
-@@ -1098,36 +1551,35 @@ static bool io_normaliser_is_in_control_point_phase(const struct io_normaliser *
-     return normaliser->phase == VKD3DSIH_HS_CONTROL_POINT_PHASE;
- }
--static unsigned int shader_signature_find_element_for_reg(const struct shader_signature *signature,
--        unsigned int reg_idx, unsigned int write_mask)
-+static bool shader_signature_find_element_for_reg(const struct shader_signature *signature,
-+        unsigned int reg_idx, unsigned int write_mask, unsigned int *element_idx)
- {
--    unsigned int i, base_write_mask;
-+    const struct signature_element *e;
-+    unsigned int i;
-     for (i = 0; i < signature->element_count; ++i)
-     {
--        struct signature_element *e = &signature->elements[i];
--        if (e->register_index <= reg_idx && e->register_index + e->register_count > reg_idx
-+        e = &signature->elements[i];
-+        if (e->register_index <= reg_idx && e->register_count > reg_idx - e->register_index
-                 && (e->mask & write_mask) == write_mask)
-         {
--            return i;
-+            *element_idx = i;
-+            return true;
-         }
-     }
--    /* Validated in the TPF reader, but failure in signature_element_range_expand_mask()
--     * can land us here on an unmatched vector mask. */
--    FIXME("Failed to find signature element for register index %u, mask %#x; using scalar mask.\n",
--            reg_idx, write_mask);
--    base_write_mask = 1u << vsir_write_mask_get_component_idx(write_mask);
--    if (base_write_mask != write_mask)
--        return shader_signature_find_element_for_reg(signature, reg_idx, base_write_mask);
--    vkd3d_unreachable();
-+    return false;
- }
- struct signature_element *vsir_signature_find_element_for_reg(const struct shader_signature *signature,
-         unsigned int reg_idx, unsigned int write_mask)
- {
--    return &signature->elements[shader_signature_find_element_for_reg(signature, reg_idx, write_mask)];
-+    unsigned int element_idx;
-+    if (shader_signature_find_element_for_reg(signature, reg_idx, write_mask, &element_idx))
-+        return &signature->elements[element_idx];
-+    return NULL;
- }
- static unsigned int range_map_get_register_count(uint8_t range_map[][VKD3D_VEC4_SIZE],
-@@ -1181,9 +1633,10 @@ static void io_normaliser_add_index_range(struct io_normaliser *normaliser,
- {
-     const struct vkd3d_shader_index_range *range = &ins->declaration.index_range;
-     const struct vkd3d_shader_register *reg = &range->dst.reg;
--    unsigned int reg_idx, write_mask, element_idx;
-     const struct shader_signature *signature;
-     uint8_t (*range_map)[VKD3D_VEC4_SIZE];
-+    struct signature_element *element;
-+    unsigned int reg_idx, write_mask;
-     switch (reg->type)
-     {
-@@ -1215,9 +1668,8 @@ static void io_normaliser_add_index_range(struct io_normaliser *normaliser,
-     reg_idx = reg->idx[reg->idx_count - 1].offset;
-     write_mask = range->dst.write_mask;
--    element_idx = shader_signature_find_element_for_reg(signature, reg_idx, write_mask);
--    range_map_set_register_range(range_map, reg_idx, range->register_count,
--            signature->elements[element_idx].mask, true);
-+    element = vsir_signature_find_element_for_reg(signature, reg_idx, write_mask);
-+    range_map_set_register_range(range_map, reg_idx, range->register_count, element->mask, true);
- }
- static int signature_element_mask_compare(const void *a, const void *b)
-@@ -1276,8 +1728,34 @@ static void shader_signature_map_patch_constant_index_ranges(struct shader_signa
- static int signature_element_register_compare(const void *a, const void *b)
- {
-     const struct signature_element *e = a, *f = b;
-+    int ret;
-+    if ((ret = vkd3d_u32_compare(e->register_index, f->register_index)))
-+        return ret;
--    return vkd3d_u32_compare(e->register_index, f->register_index);
-+    /* System values like SV_RenderTargetArrayIndex and SV_ViewPortArrayIndex
-+     * can get packed into the same I/O register as non-system values, but
-+     * only at the end. E.g.:
-+     *
-+     *     vs_4_0
-+     *     ...
-+     *     .output
-+     *     ...
-+     *     .param B.x, o1.x, uint
-+     *     .param C.y, o1.y, uint
-+     *     .param SV_RenderTargetArrayIndex.z, o1.z, uint, RTINDEX
-+     *     .text
-+     *     ...
-+     *     mov o1.xy, v1.xyxx
-+     *     mov o1.z, v1.z
-+     *     ret
-+     *
-+     * Because I/O normalisation doesn't split writes like the mov to o1.xy
-+     * above, we want to make sure that o1.x and o1.y continue to be packed
-+     * into a single register after I/O normalisation, so we order system
-+     * values after non-system values here, allowing the non-system values to
-+     * get merged into a single register. */
-+    return vkd3d_u32_compare(f->sysval_semantic, e->sysval_semantic);
- }
- static int signature_element_index_compare(const void *a, const void *b)
-@@ -1345,6 +1823,9 @@ static bool shader_signature_merge(struct shader_signature *s, uint8_t range_map
-         return false;
-     memcpy(elements, s->elements, element_count * sizeof(*elements));
-+    for (i = 0; i < element_count; ++i)
-+        elements[i].sort_index = i;
-     qsort(elements, element_count, sizeof(elements[0]), signature_element_register_compare);
-     for (i = 0, new_count = 0; i < element_count; i = j, elements[new_count++] = *e)
-@@ -1388,6 +1869,8 @@ static bool shader_signature_merge(struct shader_signature *s, uint8_t range_map
-                 else
-                     e->interpolation_mode = f->interpolation_mode;
-             }
-+            vkd3d_free((void *)f->semantic_name);
-         }
-     }
-     element_count = new_count;
-@@ -1415,6 +1898,12 @@ static bool shader_signature_merge(struct shader_signature *s, uint8_t range_map
-             TRACE("Merging %s, base reg %u, count %u.\n", e->semantic_name, e->register_index, register_count);
-             e->register_count = register_count;
-             e->mask = signature_element_range_expand_mask(e, register_count, range_map);
-+            for (j = 1; j < register_count; ++j)
-+            {
-+                f = &elements[i + j];
-+                vkd3d_free((void *)f->semantic_name);
-+            }
-         }
-     }
-     element_count = new_count;
-@@ -1470,6 +1959,8 @@ static bool shader_dst_param_io_normalise(struct vkd3d_shader_dst_param *dst_par
-     const struct shader_signature *signature;
-     const struct signature_element *e;
-+    write_mask = dst_param->write_mask;
-     switch (reg->type)
-     {
-         case VKD3DSPR_OUTPUT:
-@@ -1518,10 +2009,17 @@ static bool shader_dst_param_io_normalise(struct vkd3d_shader_dst_param *dst_par
-             break;
-         case VKD3DSPR_RASTOUT:
-+            /* Leave point size as a system value for the backends to consume. */
-+            if (reg->idx[0].offset == VSIR_RASTOUT_POINT_SIZE)
-+                return true;
-             reg_idx = SM1_RASTOUT_REGISTER_OFFSET + reg->idx[0].offset;
-             signature = normaliser->output_signature;
-             reg->type = VKD3DSPR_OUTPUT;
-             dcl_params = normaliser->output_dcl_params;
-+            /* Fog and point size are scalar, but fxc/d3dcompiler emits a full
-+             * write mask when writing to them. */
-+            if (reg->idx[0].offset > 0)
-+                write_mask = VKD3DSP_WRITEMASK_0;
-             break;
-         default:
-@@ -1529,11 +2027,10 @@ static bool shader_dst_param_io_normalise(struct vkd3d_shader_dst_param *dst_par
-     }
-     id_idx = reg->idx_count - 1;
--    write_mask = dst_param->write_mask;
--    element_idx = shader_signature_find_element_for_reg(signature, reg_idx, write_mask);
-+    if (!shader_signature_find_element_for_reg(signature, reg_idx, write_mask, &element_idx))
-+        vkd3d_unreachable();
-     e = &signature->elements[element_idx];
--    dst_param->write_mask >>= vsir_write_mask_get_component_idx(e->mask);
-     if (is_io_dcl)
-     {
-         /* Validated in the TPF reader. */
-@@ -1653,7 +2150,8 @@ static void shader_src_param_io_normalise(struct vkd3d_shader_src_param *src_par
-     id_idx = reg->idx_count - 1;
-     write_mask = VKD3DSP_WRITEMASK_0 << vsir_swizzle_get_component(src_param->swizzle, 0);
--    element_idx = shader_signature_find_element_for_reg(signature, reg_idx, write_mask);
-+    if (!shader_signature_find_element_for_reg(signature, reg_idx, write_mask, &element_idx))
-+        vkd3d_unreachable();
-     e = &signature->elements[element_idx];
-     if ((e->register_count > 1 || vsir_sysval_semantic_is_tess_factor(e->sysval_semantic)))
-@@ -1725,38 +2223,14 @@ static void shader_instruction_normalise_io_params(struct vkd3d_shader_instructi
-     }
- }
--static bool use_flat_interpolation(const struct vsir_program *program,
--        struct vkd3d_shader_message_context *message_context)
--    static const struct vkd3d_shader_location no_loc;
--    const struct vkd3d_shader_parameter1 *parameter;
--    if (!(parameter = vsir_program_get_parameter(program, VKD3D_SHADER_PARAMETER_NAME_FLAT_INTERPOLATION)))
--        return false;
--    {
--        vkd3d_shader_error(message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED,
--                "Unsupported flat interpolation parameter type %#x.\n", parameter->type);
--        return false;
--    }
--    if (parameter->data_type != VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32)
--    {
--        vkd3d_shader_error(message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE,
--                "Invalid flat interpolation parameter data type %#x.\n", parameter->data_type);
--        return false;
--    }
--    return parameter->u.immediate_constant.u.u32;
- static enum vkd3d_result vsir_program_normalise_io_registers(struct vsir_program *program,
--        struct vkd3d_shader_message_context *message_context)
-+        struct vsir_transformation_context *ctx)
- {
-     struct io_normaliser normaliser = {program->instructions};
-     struct vkd3d_shader_instruction *ins;
--    bool has_control_point_phase;
--    unsigned int i, j;
-+    unsigned int i;
-+    VKD3D_ASSERT(program->normalisation_level == VSIR_NORMALISED_HULL_CONTROL_POINT_IO);
-     normaliser.phase = VKD3DSIH_INVALID;
-     normaliser.shader_type = program->shader_version.type;
-@@ -1765,7 +2239,7 @@ static enum vkd3d_result vsir_program_normalise_io_registers(struct vsir_program
-     normaliser.output_signature = &program->output_signature;
-     normaliser.patch_constant_signature = &program->patch_constant_signature;
--    for (i = 0, has_control_point_phase = false; i < program->instructions.count; ++i)
-+    for (i = 0; i < program->instructions.count; ++i)
-     {
-         ins = &program->instructions.elements[i];
-@@ -1779,8 +2253,6 @@ static enum vkd3d_result vsir_program_normalise_io_registers(struct vsir_program
-                 vkd3d_shader_instruction_make_nop(ins);
-                 break;
--                has_control_point_phase = true;
--                /* fall through */
-             case VKD3DSIH_HS_FORK_PHASE:
-             case VKD3DSIH_HS_JOIN_PHASE:
-                 normaliser.phase = ins->opcode;
-@@ -1790,22 +2262,6 @@ static enum vkd3d_result vsir_program_normalise_io_registers(struct vsir_program
-         }
-     }
--    if (normaliser.shader_type == VKD3D_SHADER_TYPE_HULL && !has_control_point_phase)
--    {
--        /* Inputs and outputs must match for the default phase, so merge ranges must match too. */
--        for (i = 0; i < MAX_REG_OUTPUT; ++i)
--        {
--            for (j = 0; j < VKD3D_VEC4_SIZE; ++j)
--            {
--                if (!normaliser.input_range_map[i][j] && normaliser.output_range_map[i][j])
--                    normaliser.input_range_map[i][j] = normaliser.output_range_map[i][j];
--                else if (normaliser.input_range_map[i][j] && !normaliser.output_range_map[i][j])
--                    normaliser.output_range_map[i][j] = normaliser.input_range_map[i][j];
--                else VKD3D_ASSERT(normaliser.input_range_map[i][j] == normaliser.output_range_map[i][j]);
--            }
--        }
--    }
-     if (!shader_signature_merge(&program->input_signature, normaliser.input_range_map, false)
-             || !shader_signature_merge(&program->output_signature, normaliser.output_range_map, false)
-             || !shader_signature_merge(&program->patch_constant_signature, normaliser.pc_range_map, true))
-@@ -1814,24 +2270,13 @@ static enum vkd3d_result vsir_program_normalise_io_registers(struct vsir_program
-         return VKD3D_ERROR_OUT_OF_MEMORY;
-     }
--    if (program->shader_version.type == VKD3D_SHADER_TYPE_PIXEL
--            && program->shader_version.major < 4 && use_flat_interpolation(program, message_context))
--    {
--        for (i = 0; i < program->input_signature.element_count; ++i)
--        {
--            struct signature_element *element = &program->input_signature.elements[i];
--            if (!ascii_strcasecmp(element->semantic_name, "COLOR"))
--                element->interpolation_mode = VKD3DSIM_CONSTANT;
--        }
--    }
-     normaliser.phase = VKD3DSIH_INVALID;
-     for (i = 0; i < normaliser.instructions.count; ++i)
-         shader_instruction_normalise_io_params(&normaliser.instructions.elements[i], &normaliser);
-     program->instructions = normaliser.instructions;
-     program->use_vocp = normaliser.use_vocp;
-+    program->normalisation_level = VSIR_FULLY_NORMALISED_IO;
-     return VKD3D_OK;
- }
-@@ -1918,7 +2363,8 @@ static void shader_register_normalise_flat_constants(struct vkd3d_shader_src_par
-     param->reg.idx_count = 3;
- }
--static enum vkd3d_result instruction_array_normalise_flat_constants(struct vsir_program *program)
-+static enum vkd3d_result vsir_program_normalise_flat_constants(struct vsir_program *program,
-+        struct vsir_transformation_context *ctx)
- {
-     struct flat_constants_normaliser normaliser = {0};
-     unsigned int i, j;
-@@ -1957,7 +2403,8 @@ static enum vkd3d_result instruction_array_normalise_flat_constants(struct vsir_
-     return VKD3D_OK;
- }
--static void remove_dead_code(struct vsir_program *program)
-+static enum vkd3d_result vsir_program_remove_dead_code(struct vsir_program *program,
-+        struct vsir_transformation_context *ctx)
- {
-     size_t i, depth = 0;
-     bool dead = false;
-@@ -2045,103 +2492,6 @@ static void remove_dead_code(struct vsir_program *program)
-                 break;
-         }
-     }
--static enum vkd3d_result vsir_program_normalise_combined_samplers(struct vsir_program *program,
--        struct vkd3d_shader_message_context *message_context)
--    unsigned int i;
--    for (i = 0; i < program->instructions.count; ++i)
--    {
--        struct vkd3d_shader_instruction *ins = &program->instructions.elements[i];
--        struct vkd3d_shader_src_param *srcs;
--        switch (ins->opcode)
--        {
--            case VKD3DSIH_TEX:
--                if (!(srcs = shader_src_param_allocator_get(&program->instructions.src_params, 3)))
--                    return VKD3D_ERROR_OUT_OF_MEMORY;
--                memset(srcs, 0, sizeof(*srcs) * 3);
--                ins->opcode = VKD3DSIH_SAMPLE;
--                srcs[0] = ins->src[0];
--                srcs[1].reg.type = VKD3DSPR_RESOURCE;
--                srcs[1].reg.idx[0] = ins->src[1].reg.idx[0];
--                srcs[1].reg.idx[1] = ins->src[1].reg.idx[0];
--                srcs[1].reg.idx_count = 2;
--                srcs[1].reg.data_type = VKD3D_DATA_RESOURCE;
--                srcs[1].reg.dimension = VSIR_DIMENSION_VEC4;
--                srcs[1].swizzle = VKD3D_SHADER_NO_SWIZZLE;
--                srcs[2].reg.type = VKD3DSPR_SAMPLER;
--                srcs[2].reg.idx[0] = ins->src[1].reg.idx[0];
--                srcs[2].reg.idx[1] = ins->src[1].reg.idx[0];
--                srcs[2].reg.idx_count = 2;
--                srcs[2].reg.data_type = VKD3D_DATA_SAMPLER;
--                ins->src = srcs;
--                ins->src_count = 3;
--                break;
--            case VKD3DSIH_TEXLDD:
--                if (!(srcs = shader_src_param_allocator_get(&program->instructions.src_params, 5)))
--                    return VKD3D_ERROR_OUT_OF_MEMORY;
--                memset(srcs, 0, sizeof(*srcs) * 5);
--                ins->opcode = VKD3DSIH_SAMPLE_GRAD;
--                srcs[0] = ins->src[0];
--                srcs[1].reg.type = VKD3DSPR_RESOURCE;
--                srcs[1].reg.idx[0] = ins->src[1].reg.idx[0];
--                srcs[1].reg.idx[1] = ins->src[1].reg.idx[0];
--                srcs[1].reg.idx_count = 2;
--                srcs[1].reg.data_type = VKD3D_DATA_RESOURCE;
--                srcs[1].reg.dimension = VSIR_DIMENSION_VEC4;
--                srcs[1].swizzle = VKD3D_SHADER_NO_SWIZZLE;
--                srcs[2].reg.type = VKD3DSPR_SAMPLER;
--                srcs[2].reg.idx[0] = ins->src[1].reg.idx[0];
--                srcs[2].reg.idx[1] = ins->src[1].reg.idx[0];
--                srcs[2].reg.idx_count = 2;
--                srcs[2].reg.data_type = VKD3D_DATA_SAMPLER;
--                srcs[3] = ins->src[2];
--                srcs[4] = ins->src[3];
--                ins->src = srcs;
--                ins->src_count = 5;
--                break;
--            case VKD3DSIH_TEXBEM:
--            case VKD3DSIH_TEXBEML:
--            case VKD3DSIH_TEXCOORD:
--            case VKD3DSIH_TEXDEPTH:
--            case VKD3DSIH_TEXDP3:
--            case VKD3DSIH_TEXDP3TEX:
--            case VKD3DSIH_TEXLDL:
--            case VKD3DSIH_TEXM3x2PAD:
--            case VKD3DSIH_TEXM3x2TEX:
--            case VKD3DSIH_TEXM3x3DIFF:
--            case VKD3DSIH_TEXM3x3PAD:
--            case VKD3DSIH_TEXM3x3SPEC:
--            case VKD3DSIH_TEXM3x3TEX:
--            case VKD3DSIH_TEXM3x3VSPEC:
--            case VKD3DSIH_TEXREG2AR:
--            case VKD3DSIH_TEXREG2GB:
--            case VKD3DSIH_TEXREG2RGB:
--                vkd3d_shader_error(message_context, &ins->location, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED,
--                        "Aborting due to not yet implemented feature: "
--                        "Combined sampler instruction %#x.", ins->opcode);
--                return VKD3D_ERROR_NOT_IMPLEMENTED;
--            default:
--                break;
--        }
--    }
-     return VKD3D_OK;
- }
-@@ -2434,15 +2784,14 @@ static void VKD3D_PRINTF_FUNC(3, 4) cf_flattener_create_block_name(struct cf_fla
- static enum vkd3d_result cf_flattener_iterate_instruction_array(struct cf_flattener *flattener,
-         struct vkd3d_shader_message_context *message_context)
- {
--    bool main_block_open, is_hull_shader, after_declarations_section;
-     struct vkd3d_shader_instruction_array *instructions;
-     struct vsir_program *program = flattener->program;
-+    bool is_hull_shader, after_declarations_section;
-     struct vkd3d_shader_instruction *dst_ins;
-     size_t i;
-     instructions = &program->instructions;
-     is_hull_shader = program->shader_version.type == VKD3D_SHADER_TYPE_HULL;
--    main_block_open = !is_hull_shader;
-     after_declarations_section = is_hull_shader;
-     if (!cf_flattener_require_space(flattener, instructions->count + 1))
-@@ -2766,8 +3115,6 @@ static enum vkd3d_result cf_flattener_iterate_instruction_array(struct cf_flatte
-                 if (cf_info)
-                     cf_info->inside_block = false;
--                else
--                    main_block_open = false;
-                 break;
-             default:
-@@ -2777,23 +3124,18 @@ static enum vkd3d_result cf_flattener_iterate_instruction_array(struct cf_flatte
-         }
-     }
--    if (main_block_open)
--    {
--        if (!(dst_ins = cf_flattener_require_space(flattener, 1)))
--            return VKD3D_ERROR_OUT_OF_MEMORY;
--        vsir_instruction_init(dst_ins, &flattener->location, VKD3DSIH_RET);
--        ++flattener->instruction_count;
--    }
-     return flattener->status;
- }
- static enum vkd3d_result vsir_program_flatten_control_flow_constructs(struct vsir_program *program,
--        struct vkd3d_shader_message_context *message_context)
-+        struct vsir_transformation_context *ctx)
- {
-+    struct vkd3d_shader_message_context *message_context = ctx->message_context;
-     struct cf_flattener flattener = {.program = program};
-     enum vkd3d_result result;
-+    VKD3D_ASSERT(program->cf_type == VSIR_CF_STRUCTURED);
-     if ((result = cf_flattener_iterate_instruction_array(&flattener, message_context)) >= 0)
-     {
-         vkd3d_free(program->instructions.elements);
-@@ -2801,6 +3143,7 @@ static enum vkd3d_result vsir_program_flatten_control_flow_constructs(struct vsi
-         program->instructions.capacity = flattener.instruction_capacity;
-         program->instructions.count = flattener.instruction_count;
-         program->block_count = flattener.block_id;
-+        program->cf_type = VSIR_CF_BLOCKS;
-     }
-     else
-     {
-@@ -2860,13 +3203,16 @@ static bool lower_switch_to_if_ladder_add_block_mapping(struct lower_switch_to_i
-     return true;
- }
--static enum vkd3d_result lower_switch_to_if_ladder(struct vsir_program *program)
-+static enum vkd3d_result vsir_program_lower_switch_to_selection_ladder(struct vsir_program *program,
-+        struct vsir_transformation_context *ctx)
- {
-     unsigned int block_count = program->block_count, ssa_count = program->ssa_count, current_label = 0, if_label;
-     size_t ins_capacity = 0, ins_count = 0, i, map_capacity = 0, map_count = 0;
-     struct vkd3d_shader_instruction *instructions = NULL;
-     struct lower_switch_to_if_ladder_block_mapping *block_map = NULL;
-+    VKD3D_ASSERT(program->cf_type == VSIR_CF_BLOCKS);
-     if (!reserve_instructions(&instructions, &ins_capacity, program->instructions.count))
-         goto fail;
-@@ -3050,7 +3396,8 @@ static void ssas_to_temps_block_info_cleanup(struct ssas_to_temps_block_info *bl
-     vkd3d_free(block_info);
- }
--static enum vkd3d_result vsir_program_materialise_phi_ssas_to_temps(struct vsir_program *program)
-+static enum vkd3d_result vsir_program_materialise_phi_ssas_to_temps(struct vsir_program *program,
-+        struct vsir_transformation_context *ctx)
- {
-     size_t ins_capacity = 0, ins_count = 0, phi_count, incoming_count, i;
-     struct ssas_to_temps_block_info *info, *block_info = NULL;
-@@ -3058,6 +3405,8 @@ static enum vkd3d_result vsir_program_materialise_phi_ssas_to_temps(struct vsir_
-     struct ssas_to_temps_alloc alloc = {0};
-     unsigned int current_label = 0;
-+    VKD3D_ASSERT(program->cf_type == VSIR_CF_BLOCKS);
-     if (!(block_info = vkd3d_calloc(program->block_count, sizeof(*block_info))))
-     {
-         ERR("Failed to allocate block info array.\n");
-@@ -5271,12 +5620,15 @@ out:
- }
- static enum vkd3d_result vsir_program_structurize(struct vsir_program *program,
--        struct vkd3d_shader_message_context *message_context)
-+        struct vsir_transformation_context *ctx)
- {
-+    struct vkd3d_shader_message_context *message_context = ctx->message_context;
-     struct vsir_cfg_emit_target target = {0};
-     enum vkd3d_result ret;
-     size_t i;
-+    VKD3D_ASSERT(program->cf_type == VSIR_CF_BLOCKS);
-     target.jump_target_temp_idx = program->temp_count;
-     target.temp_count = program->temp_count + 1;
-@@ -5324,6 +5676,7 @@ static enum vkd3d_result vsir_program_structurize(struct vsir_program *program,
-     program->instructions.capacity = target.ins_capacity;
-     program->instructions.count = target.ins_count;
-     program->temp_count = target.temp_count;
-+    program->cf_type = VSIR_CF_STRUCTURED;
-     return VKD3D_OK;
-@@ -5451,11 +5804,14 @@ static enum vkd3d_result vsir_program_materialize_undominated_ssas_to_temps_in_f
- }
- static enum vkd3d_result vsir_program_materialize_undominated_ssas_to_temps(struct vsir_program *program,
--        struct vkd3d_shader_message_context *message_context)
-+        struct vsir_transformation_context *ctx)
- {
-+    struct vkd3d_shader_message_context *message_context = ctx->message_context;
-     enum vkd3d_result ret;
-     size_t i;
-+    VKD3D_ASSERT(program->cf_type == VSIR_CF_BLOCKS);
-     for (i = 0; i < program->instructions.count;)
-     {
-         struct vkd3d_shader_instruction *ins = &program->instructions.elements[i];
-@@ -5491,26 +5847,67 @@ static enum vkd3d_result vsir_program_materialize_undominated_ssas_to_temps(stru
-     return VKD3D_OK;
- }
--static bool find_colour_signature_idx(const struct shader_signature *signature, uint32_t *index)
-+static bool use_flat_interpolation(const struct vsir_program *program,
-+        struct vkd3d_shader_message_context *message_context, bool *flat)
- {
--    for (unsigned int i = 0; i < signature->element_count; ++i)
-+    static const struct vkd3d_shader_location no_loc;
-+    const struct vkd3d_shader_parameter1 *parameter;
-+    *flat = false;
-+    if (!(parameter = vsir_program_get_parameter(program, VKD3D_SHADER_PARAMETER_NAME_FLAT_INTERPOLATION)))
-+        return true;
-     {
--        if (signature->elements[i].sysval_semantic == VKD3D_SHADER_SV_TARGET
--                && !signature->elements[i].register_index)
--        {
--            *index = i;
--            return true;
--        }
-+        vkd3d_shader_error(message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED,
-+                "Unsupported flat interpolation parameter type %#x.", parameter->type);
-+        return false;
-+    }
-+    if (parameter->data_type != VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32)
-+    {
-+        vkd3d_shader_error(message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE,
-+                "Invalid flat interpolation parameter data type %#x.", parameter->data_type);
-+        return false;
-     }
--    return false;
-+    *flat = parameter->u.immediate_constant.u.u32;
-+    return true;
-+static enum vkd3d_result vsir_program_apply_flat_interpolation(struct vsir_program *program,
-+        struct vsir_transformation_context *ctx)
-+    unsigned int i;
-+    bool flat;
-+    if (program->shader_version.type != VKD3D_SHADER_TYPE_PIXEL || program->shader_version.major >= 4)
-+        return VKD3D_OK;
-+    if (!use_flat_interpolation(program, ctx->message_context, &flat))
-+    if (!flat)
-+        return VKD3D_OK;
-+    for (i = 0; i < program->input_signature.element_count; ++i)
-+    {
-+        struct signature_element *element = &program->input_signature.elements[i];
-+        if (!ascii_strcasecmp(element->semantic_name, "COLOR"))
-+            element->interpolation_mode = VKD3DSIM_CONSTANT;
-+    }
-+    return VKD3D_OK;
- }
- static enum vkd3d_result insert_alpha_test_before_ret(struct vsir_program *program,
-         const struct vkd3d_shader_instruction *ret, enum vkd3d_shader_comparison_func compare_func,
--        const struct vkd3d_shader_parameter1 *ref, uint32_t colour_signature_idx, uint32_t colour_temp, size_t *ret_pos)
-+        const struct vkd3d_shader_parameter1 *ref, uint32_t colour_signature_idx,
-+        uint32_t colour_temp, size_t *ret_pos, struct vkd3d_shader_message_context *message_context)
- {
-     struct vkd3d_shader_instruction_array *instructions = &program->instructions;
-+    static const struct vkd3d_shader_location no_loc;
-     size_t pos = ret - instructions->elements;
-     struct vkd3d_shader_instruction *ins;
-@@ -5565,6 +5962,11 @@ static enum vkd3d_result insert_alpha_test_before_ret(struct vsir_program *progr
-             break;
-+            vkd3d_shader_error(message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_INVALID_PARAMETER,
-+                    "Alpha test reference data type must be a single component.");
-+            return VKD3D_ERROR_INVALID_ARGUMENT;
-         default:
-             FIXME("Unhandled parameter data type %#x.\n", ref->data_type);
-             return VKD3D_ERROR_NOT_IMPLEMENTED;
-@@ -5596,12 +5998,13 @@ static enum vkd3d_result insert_alpha_test_before_ret(struct vsir_program *progr
- }
- static enum vkd3d_result vsir_program_insert_alpha_test(struct vsir_program *program,
--        struct vkd3d_shader_message_context *message_context)
-+        struct vsir_transformation_context *ctx)
- {
-+    struct vkd3d_shader_message_context *message_context = ctx->message_context;
-     const struct vkd3d_shader_parameter1 *func = NULL, *ref = NULL;
-+    uint32_t colour_signature_idx, colour_temp = ~0u;
-     static const struct vkd3d_shader_location no_loc;
-     enum vkd3d_shader_comparison_func compare_func;
--    uint32_t colour_signature_idx, colour_temp;
-     struct vkd3d_shader_instruction *ins;
-     size_t new_pos;
-     int ret;
-@@ -5609,7 +6012,7 @@ static enum vkd3d_result vsir_program_insert_alpha_test(struct vsir_program *pro
-     if (program->shader_version.type != VKD3D_SHADER_TYPE_PIXEL)
-         return VKD3D_OK;
--    if (!find_colour_signature_idx(&program->output_signature, &colour_signature_idx)
-+    if (!vsir_signature_find_sysval(&program->output_signature, VKD3D_SHADER_SV_TARGET, 0, &colour_signature_idx)
-             || !(program->output_signature.elements[colour_signature_idx].mask & VKD3DSP_WRITEMASK_3))
-         return VKD3D_OK;
-@@ -5620,13 +6023,13 @@ static enum vkd3d_result vsir_program_insert_alpha_test(struct vsir_program *pro
-     {
-         vkd3d_shader_error(message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED,
--                "Unsupported alpha test function parameter type %#x.\n", func->type);
-+                "Unsupported alpha test function parameter type %#x.", func->type);
-     }
-     if (func->data_type != VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32)
-     {
-         vkd3d_shader_error(message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE,
--                "Invalid alpha test function parameter data type %#x.\n", func->data_type);
-+                "Invalid alpha test function parameter data type %#x.", func->data_type);
-     }
-     compare_func = func->u.immediate_constant.u.u32;
-@@ -5650,7 +6053,7 @@ static enum vkd3d_result vsir_program_insert_alpha_test(struct vsir_program *pro
-         if (ins->opcode == VKD3DSIH_RET)
-         {
-             if ((ret = insert_alpha_test_before_ret(program, ins, compare_func,
--                    ref, colour_signature_idx, colour_temp, &new_pos)) < 0)
-+                    ref, colour_signature_idx, colour_temp, &new_pos, message_context)) < 0)
-                 return ret;
-             i = new_pos;
-             continue;
-@@ -5677,456 +6080,2081 @@ static enum vkd3d_result vsir_program_insert_alpha_test(struct vsir_program *pro
-     return VKD3D_OK;
- }
--struct validation_context
-+static enum vkd3d_result insert_clip_planes_before_ret(struct vsir_program *program,
-+        const struct vkd3d_shader_instruction *ret, uint32_t mask, uint32_t position_signature_idx,
-+        uint32_t position_temp, uint32_t low_signature_idx, uint32_t high_signature_idx, size_t *ret_pos)
- {
--    struct vkd3d_shader_message_context *message_context;
--    const struct vsir_program *program;
--    size_t instruction_idx;
--    struct vkd3d_shader_location null_location;
--    bool invalid_instruction_idx;
--    enum vkd3d_result status;
--    bool dcl_temps_found;
--    enum vkd3d_shader_opcode phase;
--    enum cf_type
--    {
--        CF_TYPE_UNKNOWN = 0,
--        CF_TYPE_BLOCKS,
--    } cf_type;
--    bool inside_block;
--    struct validation_context_temp_data
--    {
--        enum vsir_dimension dimension;
--        size_t first_seen;
--    } *temps;
-+    struct vkd3d_shader_instruction_array *instructions = &program->instructions;
-+    size_t pos = ret - instructions->elements;
-+    struct vkd3d_shader_instruction *ins;
-+    unsigned int output_idx = 0;
--    struct validation_context_ssa_data
--    {
--        enum vsir_dimension dimension;
--        enum vkd3d_data_type data_type;
--        size_t first_seen;
--        uint32_t write_mask;
--        uint32_t read_mask;
--        size_t first_assigned;
--    } *ssas;
-+    if (!shader_instruction_array_insert_at(&program->instructions, pos, vkd3d_popcount(mask) + 1))
-+        return VKD3D_ERROR_OUT_OF_MEMORY;
--    enum vkd3d_shader_opcode *blocks;
--    size_t depth;
--    size_t blocks_capacity;
-+    ins = &program->instructions.elements[pos];
--static void VKD3D_PRINTF_FUNC(3, 4) validator_error(struct validation_context *ctx,
--        enum vkd3d_shader_error error, const char *format, ...)
--    struct vkd3d_string_buffer buf;
--    va_list args;
-+    for (unsigned int i = 0; i < 8; ++i)
-+    {
-+        if (!(mask & (1u << i)))
-+            continue;
--    vkd3d_string_buffer_init(&buf);
-+        vsir_instruction_init_with_params(program, ins, &ret->location, VKD3DSIH_DP4, 1, 2);
-+        src_param_init_temp_float4(&ins->src[0], position_temp);
-+        src_param_init_parameter(&ins->src[1], VKD3D_SHADER_PARAMETER_NAME_CLIP_PLANE_0 + i, VKD3D_DATA_FLOAT);
-+        ins->src[1].swizzle = VKD3D_SHADER_NO_SWIZZLE;
-+        ins->src[1].reg.dimension = VSIR_DIMENSION_VEC4;
--    va_start(args, format);
--    vkd3d_string_buffer_vprintf(&buf, format, args);
--    va_end(args);
-+        vsir_dst_param_init(&ins->dst[0], VKD3DSPR_OUTPUT, VKD3D_DATA_FLOAT, 1);
-+        if (output_idx < 4)
-+            ins->dst[0].reg.idx[0].offset = low_signature_idx;
-+        else
-+            ins->dst[0].reg.idx[0].offset = high_signature_idx;
-+        ins->dst[0].reg.dimension = VSIR_DIMENSION_VEC4;
-+        ins->dst[0].write_mask = (1u << (output_idx % 4));
-+        ++output_idx;
--    if (ctx->invalid_instruction_idx)
--    {
--        vkd3d_shader_error(ctx->message_context, &ctx->null_location, error, "%s", buf.buffer);
--        ERR("VSIR validation error: %s\n", buf.buffer);
--    }
--    else
--    {
--        const struct vkd3d_shader_instruction *ins = &ctx->program->instructions.elements[ctx->instruction_idx];
--        vkd3d_shader_error(ctx->message_context, &ins->location, error,
--                "instruction %zu: %s", ctx->instruction_idx + 1, buf.buffer);
--        ERR("VSIR validation error: instruction %zu: %s\n", ctx->instruction_idx + 1, buf.buffer);
-+        ++ins;
-     }
--    vkd3d_string_buffer_cleanup(&buf);
-+    vsir_instruction_init_with_params(program, ins, &ret->location, VKD3DSIH_MOV, 1, 1);
-+    vsir_dst_param_init(&ins->dst[0], VKD3DSPR_OUTPUT, VKD3D_DATA_FLOAT, 1);
-+    ins->dst[0].reg.idx[0].offset = position_signature_idx;
-+    ins->dst[0].reg.dimension = VSIR_DIMENSION_VEC4;
-+    ins->dst[0].write_mask = program->output_signature.elements[position_signature_idx].mask;
-+    src_param_init_temp_float(&ins->src[0], position_temp);
-+    ins->src[0].reg.dimension = VSIR_DIMENSION_VEC4;
-+    ins->src[0].swizzle = VKD3D_SHADER_NO_SWIZZLE;
--    if (!ctx->status)
--        ctx->status = VKD3D_ERROR_INVALID_SHADER;
-+    *ret_pos = pos + vkd3d_popcount(mask) + 1;
-+    return VKD3D_OK;
- }
--static void vsir_validate_src_param(struct validation_context *ctx,
--        const struct vkd3d_shader_src_param *src);
--static void vsir_validate_register(struct validation_context *ctx,
--        const struct vkd3d_shader_register *reg)
-+static enum vkd3d_result vsir_program_insert_clip_planes(struct vsir_program *program,
-+        struct vsir_transformation_context *ctx)
- {
--    unsigned int i;
--    if (reg->type >= VKD3DSPR_COUNT)
--        validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, "Invalid register type %#x.",
--                reg->type);
-+    struct shader_signature *signature = &program->output_signature;
-+    unsigned int low_signature_idx = ~0u, high_signature_idx = ~0u;
-+    const struct vkd3d_shader_parameter1 *mask_parameter = NULL;
-+    struct signature_element *new_elements, *clip_element;
-+    uint32_t position_signature_idx, position_temp, mask;
-+    static const struct vkd3d_shader_location no_loc;
-+    struct vkd3d_shader_instruction *ins;
-+    unsigned int plane_count;
-+    size_t new_pos;
-+    int ret;
--    if (reg->precision >= VKD3D_SHADER_REGISTER_PRECISION_COUNT)
--        validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_PRECISION, "Invalid register precision %#x.",
--                reg->precision);
-+    if (program->shader_version.type != VKD3D_SHADER_TYPE_VERTEX)
-+        return VKD3D_OK;
--    if (reg->data_type >= VKD3D_DATA_COUNT)
--        validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, "Invalid register data type %#x.",
--                reg->data_type);
-+    for (unsigned int i = 0; i < program->parameter_count; ++i)
-+    {
-+        const struct vkd3d_shader_parameter1 *parameter = &program->parameters[i];
--    if (reg->dimension >= VSIR_DIMENSION_COUNT)
--        validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION, "Invalid register dimension %#x.",
--                reg->dimension);
-+        if (parameter->name == VKD3D_SHADER_PARAMETER_NAME_CLIP_PLANE_MASK)
-+            mask_parameter = parameter;
-+    }
--    if (reg->idx_count > ARRAY_SIZE(reg->idx))
--        validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, "Invalid register index count %u.",
--                reg->idx_count);
-+    if (!mask_parameter)
-+        return VKD3D_OK;
--    for (i = 0; i < min(reg->idx_count, ARRAY_SIZE(reg->idx)); ++i)
-+    if (mask_parameter->type != VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT)
-     {
--        const struct vkd3d_shader_src_param *param = reg->idx[i].rel_addr;
--        if (reg->idx[i].rel_addr)
--            vsir_validate_src_param(ctx, param);
-+        vkd3d_shader_error(ctx->message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED,
-+                "Unsupported clip plane mask parameter type %#x.", mask_parameter->type);
-     }
-+    if (mask_parameter->data_type != VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32)
-+    {
-+        vkd3d_shader_error(ctx->message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE,
-+                "Invalid clip plane mask parameter data type %#x.", mask_parameter->data_type);
-+    }
-+    mask = mask_parameter->u.immediate_constant.u.u32;
--    switch (reg->type)
-+    if (!mask)
-+        return VKD3D_OK;
-+    for (unsigned int i = 0; i < signature->element_count; ++i)
-     {
--        case VKD3DSPR_TEMP:
-+        if (signature->elements[i].sysval_semantic == VKD3D_SHADER_SV_CLIP_DISTANCE)
-         {
--            struct validation_context_temp_data *data;
-+            vkd3d_shader_error(ctx->message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_INVALID_PARAMETER,
-+                    "Clip planes cannot be used if the shader writes clip distance.");
-+            return VKD3D_ERROR_INVALID_ARGUMENT;
-+        }
-+    }
--            if (reg->idx_count != 1)
--            {
--                validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, "Invalid index count %u for a TEMP register.",
--                        reg->idx_count);
--                break;
--            }
-+    if (!vsir_signature_find_sysval(signature, VKD3D_SHADER_SV_POSITION, 0, &position_signature_idx))
-+    {
-+        vkd3d_shader_error(ctx->message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_MISSING_SEMANTIC,
-+                "Shader does not write position.");
-+        return VKD3D_ERROR_INVALID_SHADER;
-+    }
--            if (reg->idx[0].rel_addr)
--                validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, "Non-NULL relative address for a TEMP register.");
-+    /* Append the clip plane signature indices. */
--            if (reg->idx[0].offset >= ctx->program->temp_count)
--            {
--                validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, "TEMP register index %u exceeds the maximum count %u.",
--                        reg->idx[0].offset, ctx->program->temp_count);
--                break;
--            }
-+    plane_count = vkd3d_popcount(mask);
--            data = &ctx->temps[reg->idx[0].offset];
-+    if (!(new_elements = vkd3d_realloc(signature->elements,
-+            (signature->element_count + 2) * sizeof(*signature->elements))))
-+        return VKD3D_ERROR_OUT_OF_MEMORY;
-+    signature->elements = new_elements;
-+    low_signature_idx = signature->element_count;
-+    clip_element = &signature->elements[signature->element_count++];
-+    memset(clip_element, 0, sizeof(*clip_element));
-+    clip_element->sysval_semantic = VKD3D_SHADER_SV_CLIP_DISTANCE;
-+    clip_element->component_type = VKD3D_SHADER_COMPONENT_FLOAT;
-+    clip_element->register_count = 1;
-+    clip_element->mask = vkd3d_write_mask_from_component_count(min(plane_count, 4));
-+    clip_element->used_mask = clip_element->mask;
-+    clip_element->min_precision = VKD3D_SHADER_MINIMUM_PRECISION_NONE;
-+    if (plane_count > 4)
-+    {
-+        high_signature_idx = signature->element_count;
-+        clip_element = &signature->elements[signature->element_count++];
-+        memset(clip_element, 0, sizeof(*clip_element));
-+        clip_element->sysval_semantic = VKD3D_SHADER_SV_CLIP_DISTANCE;
-+        clip_element->semantic_index = 1;
-+        clip_element->component_type = VKD3D_SHADER_COMPONENT_FLOAT;
-+        clip_element->register_count = 1;
-+        clip_element->mask = vkd3d_write_mask_from_component_count(plane_count - 4);
-+        clip_element->used_mask = clip_element->mask;
-+        clip_element->min_precision = VKD3D_SHADER_MINIMUM_PRECISION_NONE;
-+    }
-+    /* We're going to be reading from the output position, so we need to go
-+     * through the whole shader and convert it to a temp. */
--            if (reg->dimension == VSIR_DIMENSION_NONE)
--            {
--                validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION, "Invalid dimension NONE for a TEMP register.");
--                break;
--            }
-+    position_temp = program->temp_count++;
--            /* TEMP registers can be scalar or vec4, provided that
--             * each individual register always appears with the same
--             * dimension. */
--            if (data->dimension == VSIR_DIMENSION_NONE)
--            {
--                data->dimension = reg->dimension;
--                data->first_seen = ctx->instruction_idx;
--            }
--            else if (data->dimension != reg->dimension)
--            {
--                validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION, "Invalid dimension %#x for a TEMP register: "
--                        "it has already been seen with dimension %#x at instruction %zu.",
--                        reg->dimension, data->dimension, data->first_seen);
--            }
--            break;
-+    for (size_t i = 0; i < program->instructions.count; ++i)
-+    {
-+        ins = &program->instructions.elements[i];
-+        if (vsir_instruction_is_dcl(ins))
-+            continue;
-+        if (ins->opcode == VKD3DSIH_RET)
-+        {
-+            if ((ret = insert_clip_planes_before_ret(program, ins, mask, position_signature_idx,
-+                    position_temp, low_signature_idx, high_signature_idx, &new_pos)) < 0)
-+                return ret;
-+            i = new_pos;
-+            continue;
-         }
--        case VKD3DSPR_SSA:
-+        for (size_t j = 0; j < ins->dst_count; ++j)
-         {
--            struct validation_context_ssa_data *data;
-+            struct vkd3d_shader_dst_param *dst = &ins->dst[j];
--            if (reg->idx_count != 1)
-+            /* Note we run after I/O normalization. */
-+            if (dst->reg.type == VKD3DSPR_OUTPUT && dst->reg.idx[0].offset == position_signature_idx)
-             {
--                validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, "Invalid index count %u for a SSA register.",
--                        reg->idx_count);
--                break;
-+                dst->reg.type = VKD3DSPR_TEMP;
-+                dst->reg.idx[0].offset = position_temp;
-             }
-+        }
-+    }
--            if (reg->idx[0].rel_addr)
--                validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, "Non-NULL relative address for a SSA register.");
-+    return VKD3D_OK;
--            if (reg->idx[0].offset >= ctx->program->ssa_count)
--            {
--                validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX,
--                        "SSA register index %u exceeds the maximum count %u.",
--                        reg->idx[0].offset, ctx->program->ssa_count);
--                break;
--            }
-+static bool is_pre_rasterization_shader(enum vkd3d_shader_type type)
-+    return type == VKD3D_SHADER_TYPE_VERTEX
-+            || type == VKD3D_SHADER_TYPE_HULL
-+            || type == VKD3D_SHADER_TYPE_DOMAIN
-+            || type == VKD3D_SHADER_TYPE_GEOMETRY;
--            data = &ctx->ssas[reg->idx[0].offset];
-+static enum vkd3d_result insert_point_size_before_ret(struct vsir_program *program,
-+        const struct vkd3d_shader_instruction *ret, size_t *ret_pos)
-+    struct vkd3d_shader_instruction_array *instructions = &program->instructions;
-+    size_t pos = ret - instructions->elements;
-+    struct vkd3d_shader_instruction *ins;
--            if (reg->dimension == VSIR_DIMENSION_NONE)
--            {
--                validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION, "Invalid dimension NONE for a SSA register.");
--                break;
--            }
-+    if (!shader_instruction_array_insert_at(&program->instructions, pos, 1))
-+        return VKD3D_ERROR_OUT_OF_MEMORY;
--            /* SSA registers can be scalar or vec4, provided that each
--             * individual register always appears with the same
--             * dimension. */
--            if (data->dimension == VSIR_DIMENSION_NONE)
--            {
--                data->dimension = reg->dimension;
--                data->data_type = reg->data_type;
--                data->first_seen = ctx->instruction_idx;
--            }
--            else
--            {
--                if (data->dimension != reg->dimension)
--                    validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION, "Invalid dimension %#x for a SSA register: "
--                            "it has already been seen with dimension %#x at instruction %zu.",
--                            reg->dimension, data->dimension, data->first_seen);
--                if (data_type_is_64_bit(data->data_type) != data_type_is_64_bit(reg->data_type))
--                    validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, "Invalid data type %#x for a SSA register: "
--                            "it has already been seen with data type %#x at instruction %zu.",
--                            reg->data_type, data->data_type, data->first_seen);
--            }
--            break;
--        }
-+    ins = &program->instructions.elements[pos];
--        case VKD3DSPR_LABEL:
--            if (reg->precision != VKD3D_SHADER_REGISTER_PRECISION_DEFAULT)
--                validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_PRECISION, "Invalid precision %#x for a LABEL register.",
--                        reg->precision);
-+    vsir_instruction_init_with_params(program, ins, &ret->location, VKD3DSIH_MOV, 1, 1);
-+    vsir_dst_param_init(&ins->dst[0], VKD3DSPR_RASTOUT, VKD3D_DATA_FLOAT, 1);
-+    ins->dst[0].reg.idx[0].offset = VSIR_RASTOUT_POINT_SIZE;
-+    src_param_init_parameter(&ins->src[0], VKD3D_SHADER_PARAMETER_NAME_POINT_SIZE, VKD3D_DATA_FLOAT);
--            if (reg->data_type != VKD3D_DATA_UNUSED)
--                validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, "Invalid data type %#x for a LABEL register.",
--                        reg->data_type);
-+    *ret_pos = pos + 1;
-+    return VKD3D_OK;
--            if (reg->dimension != VSIR_DIMENSION_NONE)
--                validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION, "Invalid dimension %#x for a LABEL register.",
--                        reg->dimension);
-+static enum vkd3d_result vsir_program_insert_point_size(struct vsir_program *program,
-+        struct vsir_transformation_context *ctx)
-+    const struct vkd3d_shader_parameter1 *size_parameter = NULL;
-+    static const struct vkd3d_shader_location no_loc;
--            if (reg->idx_count != 1)
--            {
--                validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, "Invalid index count %u for a LABEL register.",
--                        reg->idx_count);
--                break;
--            }
-+    if (program->has_point_size)
-+        return VKD3D_OK;
--            if (reg->idx[0].rel_addr)
--                validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, "Non-NULL relative address for a LABEL register.");
--            /* Index == 0 is invalid, but it is temporarily allowed
--             * for intermediate stages. Once we support validation
--             * dialects we can selectively check for that. */
--            if (reg->idx[0].offset > ctx->program->block_count)
--                validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX,
--                        "LABEL register index %u exceeds the maximum count %u.",
--                        reg->idx[0].offset, ctx->program->block_count);
--            break;
-+    if (!is_pre_rasterization_shader(program->shader_version.type))
-+        return VKD3D_OK;
--        case VKD3DSPR_NULL:
--            if (reg->idx_count != 0)
--                validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, "Invalid index count %u for a NULL register.",
--                        reg->idx_count);
--            break;
-+    for (unsigned int i = 0; i < program->parameter_count; ++i)
-+    {
-+        const struct vkd3d_shader_parameter1 *parameter = &program->parameters[i];
--        case VKD3DSPR_IMMCONST:
--            if (reg->idx_count != 0)
--                validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, "Invalid index count %u for a IMMCONST register.",
--                        reg->idx_count);
--            break;
-+        if (parameter->name == VKD3D_SHADER_PARAMETER_NAME_POINT_SIZE)
-+            size_parameter = parameter;
-+    }
--        case VKD3DSPR_IMMCONST64:
--            if (reg->idx_count != 0)
--                validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, "Invalid index count %u for a IMMCONST64 register.",
--                        reg->idx_count);
--            break;
-+    if (!size_parameter)
-+        return VKD3D_OK;
--        default:
--            break;
-+    if (size_parameter->data_type != VKD3D_SHADER_PARAMETER_DATA_TYPE_FLOAT32)
-+    {
-+        vkd3d_shader_error(ctx->message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE,
-+                "Invalid point size parameter data type %#x.", size_parameter->data_type);
-+    }
-+    program->has_point_size = true;
-+    /* Append a point size write before each ret. */
-+    for (size_t i = 0; i < program->instructions.count; ++i)
-+    {
-+        struct vkd3d_shader_instruction *ins = &program->instructions.elements[i];
-+        if (ins->opcode == VKD3DSIH_RET)
-+        {
-+            size_t new_pos;
-+            int ret;
-+            if ((ret = insert_point_size_before_ret(program, ins, &new_pos)) < 0)
-+                return ret;
-+            i = new_pos;
-+        }
-     }
-+    return VKD3D_OK;
- }
--static void vsir_validate_dst_param(struct validation_context *ctx,
--        const struct vkd3d_shader_dst_param *dst)
-+static enum vkd3d_result vsir_program_insert_point_size_clamp(struct vsir_program *program,
-+        struct vsir_transformation_context *ctx)
- {
--    vsir_validate_register(ctx, &dst->reg);
-+    const struct vkd3d_shader_parameter1 *min_parameter = NULL, *max_parameter = NULL;
-+    static const struct vkd3d_shader_location no_loc;
--    if (dst->write_mask & ~VKD3DSP_WRITEMASK_ALL)
--        validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_WRITE_MASK, "Destination has invalid write mask %#x.",
--                dst->write_mask);
-+    if (!program->has_point_size)
-+        return VKD3D_OK;
--    switch (dst->reg.dimension)
--    {
--            if (dst->write_mask != VKD3DSP_WRITEMASK_0)
--                validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_WRITE_MASK, "Scalar destination has invalid write mask %#x.",
--                    dst->write_mask);
--            break;
-+    if (!is_pre_rasterization_shader(program->shader_version.type))
-+        return VKD3D_OK;
--        case VSIR_DIMENSION_VEC4:
--            if (dst->write_mask == 0)
--                validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_WRITE_MASK, "Vec4 destination has empty write mask.");
--            break;
-+    for (unsigned int i = 0; i < program->parameter_count; ++i)
-+    {
-+        const struct vkd3d_shader_parameter1 *parameter = &program->parameters[i];
--        default:
--            if (dst->write_mask != 0)
--                validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_WRITE_MASK, "Destination of dimension %u has invalid write mask %#x.",
--                    dst->reg.dimension, dst->write_mask);
--            break;
-+        if (parameter->name == VKD3D_SHADER_PARAMETER_NAME_POINT_SIZE_MIN)
-+            min_parameter = parameter;
-+        else if (parameter->name == VKD3D_SHADER_PARAMETER_NAME_POINT_SIZE_MAX)
-+            max_parameter = parameter;
-     }
--    if (dst->modifiers & ~VKD3DSPDM_MASK)
--        validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_MODIFIERS, "Destination has invalid modifiers %#x.",
--                dst->modifiers);
-+    if (!min_parameter && !max_parameter)
-+        return VKD3D_OK;
--    switch (dst->shift)
-+    if (min_parameter && min_parameter->data_type != VKD3D_SHADER_PARAMETER_DATA_TYPE_FLOAT32)
-     {
--        case 0:
--        case 1:
--        case 2:
--        case 3:
-+        vkd3d_shader_error(ctx->message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE,
-+                "Invalid minimum point size parameter data type %#x.", min_parameter->data_type);
-+    }
-+    if (max_parameter && max_parameter->data_type != VKD3D_SHADER_PARAMETER_DATA_TYPE_FLOAT32)
-+    {
-+        vkd3d_shader_error(ctx->message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE,
-+                "Invalid maximum point size parameter data type %#x.", max_parameter->data_type);
-+    }
-+    /* Replace writes to the point size by inserting a clamp before each write. */
-+    for (size_t i = 0; i < program->instructions.count; ++i)
-+    {
-+        struct vkd3d_shader_instruction *ins = &program->instructions.elements[i];
-+        const struct vkd3d_shader_location *loc;
-+        unsigned int ssa_value;
-+        bool clamp = false;
-+        if (vsir_instruction_is_dcl(ins))
-+            continue;
-+        for (size_t j = 0; j < ins->dst_count; ++j)
-+        {
-+            struct vkd3d_shader_dst_param *dst = &ins->dst[j];
-+            /* Note we run after I/O normalization. */
-+            if (dst->reg.type == VKD3DSPR_RASTOUT)
-+            {
-+                dst_param_init_ssa_float(dst, program->ssa_count);
-+                ssa_value = program->ssa_count++;
-+                clamp = true;
-+            }
-+        }
-+        if (!clamp)
-+            continue;
-+        if (!shader_instruction_array_insert_at(&program->instructions, i + 1, !!min_parameter + !!max_parameter))
-+            return VKD3D_ERROR_OUT_OF_MEMORY;
-+        loc = &program->instructions.elements[i].location;
-+        ins = &program->instructions.elements[i + 1];
-+        if (min_parameter)
-+        {
-+            vsir_instruction_init_with_params(program, ins, loc, VKD3DSIH_MAX, 1, 2);
-+            src_param_init_ssa_float(&ins->src[0], ssa_value);
-+            src_param_init_parameter(&ins->src[1], VKD3D_SHADER_PARAMETER_NAME_POINT_SIZE_MIN, VKD3D_DATA_FLOAT);
-+            if (max_parameter)
-+            {
-+                dst_param_init_ssa_float(&ins->dst[0], program->ssa_count);
-+                ssa_value = program->ssa_count++;
-+            }
-+            else
-+            {
-+                vsir_dst_param_init(&ins->dst[0], VKD3DSPR_RASTOUT, VKD3D_DATA_FLOAT, 1);
-+                ins->dst[0].reg.idx[0].offset = VSIR_RASTOUT_POINT_SIZE;
-+            }
-+            ++ins;
-+            ++i;
-+        }
-+        if (max_parameter)
-+        {
-+            vsir_instruction_init_with_params(program, ins, loc, VKD3DSIH_MIN, 1, 2);
-+            src_param_init_ssa_float(&ins->src[0], ssa_value);
-+            src_param_init_parameter(&ins->src[1], VKD3D_SHADER_PARAMETER_NAME_POINT_SIZE_MAX, VKD3D_DATA_FLOAT);
-+            vsir_dst_param_init(&ins->dst[0], VKD3DSPR_RASTOUT, VKD3D_DATA_FLOAT, 1);
-+            ins->dst[0].reg.idx[0].offset = VSIR_RASTOUT_POINT_SIZE;
-+            ++i;
-+        }
-+    }
-+    return VKD3D_OK;
-+static bool has_texcoord_signature_element(const struct shader_signature *signature)
-+    for (size_t i = 0; i < signature->element_count; ++i)
-+    {
-+        if (!ascii_strcasecmp(signature->elements[i].semantic_name, "TEXCOORD"))
-+            return true;
-+    }
-+    return false;
-+/* Returns true if replacement was done. */
-+static bool replace_texcoord_with_point_coord(struct vsir_program *program,
-+        struct vkd3d_shader_src_param *src, unsigned int coord_temp)
-+    uint32_t prev_swizzle = src->swizzle;
-+    const struct signature_element *e;
-+    /* The input semantic may have a nontrivial mask, which we need to
-+     * correct for. E.g. if the mask is .yz, and we read from .y, that needs
-+     * to become .x. */
-+    static const uint32_t inverse_swizzles[16] =
-+    {
-+        /* Use _ for "undefined" components, for clarity. */
-+        0,
-+        /* .x    */ VKD3D_SHADER_SWIZZLE(X, _, _, _),
-+        /* .y    */ VKD3D_SHADER_SWIZZLE(_, X, _, _),
-+        /* .xy   */ VKD3D_SHADER_SWIZZLE(X, Y, _, _),
-+        /* .z    */ VKD3D_SHADER_SWIZZLE(_, _, X, _),
-+        /* .xz   */ VKD3D_SHADER_SWIZZLE(X, _, Y, _),
-+        /* .yz   */ VKD3D_SHADER_SWIZZLE(_, X, Y, _),
-+        /* .xyz  */ VKD3D_SHADER_SWIZZLE(X, Y, Z, _),
-+        /* .w    */ VKD3D_SHADER_SWIZZLE(_, _, _, X),
-+        /* .xw   */ VKD3D_SHADER_SWIZZLE(X, _, _, Y),
-+        /* .yw   */ VKD3D_SHADER_SWIZZLE(_, X, _, Y),
-+        /* .xyw  */ VKD3D_SHADER_SWIZZLE(X, Y, _, Z),
-+        /* .zw   */ VKD3D_SHADER_SWIZZLE(_, _, X, Y),
-+        /* .xzw  */ VKD3D_SHADER_SWIZZLE(X, _, Y, Z),
-+        /* .yzw  */ VKD3D_SHADER_SWIZZLE(_, X, Y, Z),
-+        /* .xyzw */ VKD3D_SHADER_SWIZZLE(X, Y, Z, W),
-+    };
-+    if (src->reg.type != VKD3DSPR_INPUT)
-+        return false;
-+    e = &program->input_signature.elements[src->reg.idx[0].offset];
-+    if (ascii_strcasecmp(e->semantic_name, "TEXCOORD"))
-+        return false;
-+    src->reg.type = VKD3DSPR_TEMP;
-+    src->reg.idx[0].offset = coord_temp;
-+    /* If the mask is already contiguous and zero-based, no need to remap
-+     * the swizzle. */
-+    if (!(e->mask & (e->mask + 1)))
-+        return true;
-+    src->swizzle = 0;
-+    for (unsigned int i = 0; i < 4; ++i)
-+    {
-+        src->swizzle |= vsir_swizzle_get_component(inverse_swizzles[e->mask],
-+                vsir_swizzle_get_component(prev_swizzle, i)) << VKD3D_SHADER_SWIZZLE_SHIFT(i);
-+    }
-+    return true;
-+static enum vkd3d_result vsir_program_insert_point_coord(struct vsir_program *program,
-+        struct vsir_transformation_context *ctx)
-+    const struct vkd3d_shader_parameter1 *sprite_parameter = NULL;
-+    static const struct vkd3d_shader_location no_loc;
-+    struct vkd3d_shader_instruction *ins;
-+    bool used_texcoord = false;
-+    unsigned int coord_temp;
-+    size_t i, insert_pos;
-+    if (program->shader_version.type != VKD3D_SHADER_TYPE_PIXEL)
-+        return VKD3D_OK;
-+    for (i = 0; i < program->parameter_count; ++i)
-+    {
-+        const struct vkd3d_shader_parameter1 *parameter = &program->parameters[i];
-+        if (parameter->name == VKD3D_SHADER_PARAMETER_NAME_POINT_SPRITE)
-+            sprite_parameter = parameter;
-+    }
-+    if (!sprite_parameter)
-+        return VKD3D_OK;
-+    if (sprite_parameter->type != VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT)
-+    {
-+        vkd3d_shader_error(ctx->message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED,
-+                "Unsupported point sprite parameter type %#x.", sprite_parameter->type);
-+    }
-+    if (sprite_parameter->data_type != VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32)
-+    {
-+        vkd3d_shader_error(ctx->message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE,
-+                "Invalid point sprite parameter data type %#x.", sprite_parameter->data_type);
-+    }
-+    if (!sprite_parameter->u.immediate_constant.u.u32)
-+        return VKD3D_OK;
-+    if (!has_texcoord_signature_element(&program->input_signature))
-+        return VKD3D_OK;
-+    /* VKD3DSPR_POINTCOORD is a two-component value; fill the remaining two
-+     * components with zeroes. */
-+    coord_temp = program->temp_count++;
-+    /* Construct the new temp after all LABEL, DCL, and NOP instructions.
-+     * We need to skip NOP instructions because they might result from removed
-+     * DCLs, and there could still be DCLs after NOPs. */
-+    for (i = 0; i < program->instructions.count; ++i)
-+    {
-+        ins = &program->instructions.elements[i];
-+        if (!vsir_instruction_is_dcl(ins) && ins->opcode != VKD3DSIH_LABEL && ins->opcode != VKD3DSIH_NOP)
-+            break;
-+    }
-+    insert_pos = i;
-+    /* Replace each texcoord read with a read from the point coord. */
-+    for (; i < program->instructions.count; ++i)
-+    {
-+        ins = &program->instructions.elements[i];
-+        if (vsir_instruction_is_dcl(ins))
-+            continue;
-+        for (unsigned int j = 0; j < ins->src_count; ++j)
-+        {
-+            used_texcoord |= replace_texcoord_with_point_coord(program, &ins->src[j], coord_temp);
-+            for (unsigned int k = 0; k < ins->src[j].reg.idx_count; ++k)
-+            {
-+                if (ins->src[j].reg.idx[k].rel_addr)
-+                    used_texcoord |= replace_texcoord_with_point_coord(program,
-+                            ins->src[j].reg.idx[k].rel_addr, coord_temp);
-+            }
-+        }
-+        for (unsigned int j = 0; j < ins->dst_count; ++j)
-+        {
-+            for (unsigned int k = 0; k < ins->dst[j].reg.idx_count; ++k)
-+            {
-+                if (ins->dst[j].reg.idx[k].rel_addr)
-+                    used_texcoord |= replace_texcoord_with_point_coord(program,
-+                            ins->dst[j].reg.idx[k].rel_addr, coord_temp);
-+            }
-+        }
-+    }
-+    if (used_texcoord)
-+    {
-+        if (!shader_instruction_array_insert_at(&program->instructions, insert_pos, 2))
-+            return VKD3D_ERROR_OUT_OF_MEMORY;
-+        ins = &program->instructions.elements[insert_pos];
-+        vsir_instruction_init_with_params(program, ins, &no_loc, VKD3DSIH_MOV, 1, 1);
-+        dst_param_init_temp_float4(&ins->dst[0], coord_temp);
-+        ins->dst[0].write_mask = VKD3DSP_WRITEMASK_0 | VKD3DSP_WRITEMASK_1;
-+        vsir_src_param_init(&ins->src[0], VKD3DSPR_POINT_COORD, VKD3D_DATA_FLOAT, 0);
-+        ins->src[0].reg.dimension = VSIR_DIMENSION_VEC4;
-+        ins->src[0].swizzle = VKD3D_SHADER_NO_SWIZZLE;
-+        ++ins;
-+        vsir_instruction_init_with_params(program, ins, &no_loc, VKD3DSIH_MOV, 1, 1);
-+        dst_param_init_temp_float4(&ins->dst[0], coord_temp);
-+        ins->dst[0].write_mask = VKD3DSP_WRITEMASK_2 | VKD3DSP_WRITEMASK_3;
-+        vsir_src_param_init(&ins->src[0], VKD3DSPR_IMMCONST, VKD3D_DATA_FLOAT, 0);
-+        ins->src[0].reg.dimension = VSIR_DIMENSION_VEC4;
-+        ++ins;
-+        program->has_point_coord = true;
-+    }
-+    return VKD3D_OK;
-+struct validation_context
-+    struct vkd3d_shader_message_context *message_context;
-+    const struct vsir_program *program;
-+    size_t instruction_idx;
-+    struct vkd3d_shader_location null_location;
-+    bool invalid_instruction_idx;
-+    enum vkd3d_result status;
-+    bool dcl_temps_found;
-+    enum vkd3d_shader_opcode phase;
-+    bool inside_block;
-+    struct validation_context_temp_data
-+    {
-+        enum vsir_dimension dimension;
-+        size_t first_seen;
-+    } *temps;
-+    struct validation_context_ssa_data
-+    {
-+        enum vsir_dimension dimension;
-+        enum vkd3d_data_type data_type;
-+        size_t first_seen;
-+        uint32_t write_mask;
-+        uint32_t read_mask;
-+        size_t first_assigned;
-+    } *ssas;
-+    enum vkd3d_shader_opcode *blocks;
-+    size_t depth;
-+    size_t blocks_capacity;
-+    unsigned int outer_tess_idxs[4];
-+    unsigned int inner_tess_idxs[2];
-+static void VKD3D_PRINTF_FUNC(3, 4) validator_error(struct validation_context *ctx,
-+        enum vkd3d_shader_error error, const char *format, ...)
-+    struct vkd3d_string_buffer buf;
-+    va_list args;
-+    vkd3d_string_buffer_init(&buf);
-+    va_start(args, format);
-+    vkd3d_string_buffer_vprintf(&buf, format, args);
-+    va_end(args);
-+    if (ctx->invalid_instruction_idx)
-+    {
-+        vkd3d_shader_error(ctx->message_context, &ctx->null_location, error, "%s", buf.buffer);
-+        WARN("VSIR validation error: %s\n", buf.buffer);
-+    }
-+    else
-+    {
-+        const struct vkd3d_shader_instruction *ins = &ctx->program->instructions.elements[ctx->instruction_idx];
-+        vkd3d_shader_error(ctx->message_context, &ins->location, error,
-+                "instruction %zu: %s", ctx->instruction_idx + 1, buf.buffer);
-+        WARN("VSIR validation error: instruction %zu: %s\n", ctx->instruction_idx + 1, buf.buffer);
-+    }
-+    vkd3d_string_buffer_cleanup(&buf);
-+    if (!ctx->status)
-+        ctx->status = VKD3D_ERROR_INVALID_SHADER;
-+static void vsir_validate_register_without_indices(struct validation_context *ctx,
-+        const struct vkd3d_shader_register *reg)
-+    if (reg->idx_count != 0)
-+        validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT,
-+                "Invalid index count %u for a register of type %#x.",
-+                reg->idx_count, reg->type);
-+static void vsir_validate_io_register(struct validation_context *ctx,
-+        const struct vkd3d_shader_register *reg)
-+    const struct shader_signature *signature;
-+    bool has_control_point = false;
-+    switch (reg->type)
-+    {
-+        case VKD3DSPR_INPUT:
-+            signature = &ctx->program->input_signature;
-+            switch (ctx->program->shader_version.type)
-+            {
-+                case VKD3D_SHADER_TYPE_GEOMETRY:
-+                case VKD3D_SHADER_TYPE_HULL:
-+                case VKD3D_SHADER_TYPE_DOMAIN:
-+                    has_control_point = true;
-+                    break;
-+                default:
-+                    break;
-+            }
-+            break;
-+        case VKD3DSPR_OUTPUT:
-+            switch (ctx->program->shader_version.type)
-+            {
-+                case VKD3D_SHADER_TYPE_HULL:
-+                    if (ctx->phase == VKD3DSIH_HS_CONTROL_POINT_PHASE
-+                            || ctx->program->normalisation_level >= VSIR_FULLY_NORMALISED_IO)
-+                    {
-+                        signature = &ctx->program->output_signature;
-+                        has_control_point = ctx->program->normalisation_level >= VSIR_NORMALISED_HULL_CONTROL_POINT_IO;
-+                    }
-+                    else
-+                    {
-+                        signature = &ctx->program->patch_constant_signature;
-+                    }
-+                    break;
-+                default:
-+                    signature = &ctx->program->output_signature;
-+                    break;
-+            }
-+            break;
-+            signature = &ctx->program->input_signature;
-+            has_control_point = true;
-+            break;
-+            signature = &ctx->program->output_signature;
-+            has_control_point = true;
-+            break;
-+        case VKD3DSPR_PATCHCONST:
-+            signature = &ctx->program->patch_constant_signature;
-+            break;
-+        default:
-+            vkd3d_unreachable();
-+    }
-+    if (ctx->program->normalisation_level < VSIR_FULLY_NORMALISED_IO)
-+    {
-+        /* Indices are [register] or [control point, register]. Both are
-+         * allowed to have a relative address. */
-+        unsigned int expected_idx_count = 1 + !!has_control_point;
-+        if (reg->idx_count != expected_idx_count)
-+        {
-+            validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT,
-+                    "Invalid index count %u for a register of type %#x.",
-+                    reg->idx_count, reg->type);
-+            return;
-+        }
-+    }
-+    else
-+    {
-+        struct signature_element *element;
-+        unsigned int expected_idx_count;
-+        unsigned int signature_idx;
-+        bool is_array = false;
-+        /* If the signature element is not an array, indices are
-+         * [signature] or [control point, signature]. If the signature
-+         * element is an array, indices are [array, signature] or
-+         * [control point, array, signature]. In any case `signature' is
-+         * not allowed to have a relative address, while the others are.
-+         */
-+        if (reg->idx_count < 1)
-+        {
-+            validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT,
-+                    "Invalid index count %u for a register of type %#x.",
-+                    reg->idx_count, reg->type);
-+            return;
-+        }
-+        if (reg->idx[reg->idx_count - 1].rel_addr)
-+        {
-+            validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX,
-+                    "Non-NULL relative address for the signature index of a register of type %#x.",
-+                    reg->type);
-+            return;
-+        }
-+        signature_idx = reg->idx[reg->idx_count - 1].offset;
-+        if (signature_idx >= signature->element_count)
-+        {
-+            validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX,
-+                    "Signature index %u exceeds the signature size %u in a register of type %#x.",
-+                    signature_idx, signature->element_count, reg->type);
-+            return;
-+        }
-+        element = &signature->elements[signature_idx];
-+        if (element->register_count > 1 || vsir_sysval_semantic_is_tess_factor(element->sysval_semantic))
-+            is_array = true;
-+        expected_idx_count = 1 + !!has_control_point + !!is_array;
-+        if (reg->idx_count != expected_idx_count)
-+        {
-+            validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT,
-+                    "Invalid index count %u for a register of type %#x.",
-+                    reg->idx_count, reg->type);
-+            return;
-+        }
-+    }
-+static void vsir_validate_temp_register(struct validation_context *ctx,
-+        const struct vkd3d_shader_register *reg)
-+    struct validation_context_temp_data *data;
-+    if (reg->idx_count != 1)
-+    {
-+        validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT,
-+                "Invalid index count %u for a TEMP register.",
-+                reg->idx_count);
-+        return;
-+    }
-+    if (reg->idx[0].rel_addr)
-+        validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX,
-+                "Non-NULL relative address for a TEMP register.");
-+    if (reg->idx[0].offset >= ctx->program->temp_count)
-+    {
-+        validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX,
-+                "TEMP register index %u exceeds the maximum count %u.",
-+                reg->idx[0].offset, ctx->program->temp_count);
-+        return;
-+    }
-+    data = &ctx->temps[reg->idx[0].offset];
-+    if (reg->dimension == VSIR_DIMENSION_NONE)
-+    {
-+        validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION,
-+                "Invalid dimension NONE for a TEMP register.");
-+        return;
-+    }
-+    /* TEMP registers can be scalar or vec4, provided that
-+     * each individual register always appears with the same
-+     * dimension. */
-+    if (data->dimension == VSIR_DIMENSION_NONE)
-+    {
-+        data->dimension = reg->dimension;
-+        data->first_seen = ctx->instruction_idx;
-+    }
-+    else if (data->dimension != reg->dimension)
-+    {
-+        validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION,
-+                "Invalid dimension %#x for a TEMP register: "
-+                "it has already been seen with dimension %#x at instruction %zu.",
-+                reg->dimension, data->dimension, data->first_seen);
-+    }
-+static void vsir_validate_rastout_register(struct validation_context *ctx,
-+        const struct vkd3d_shader_register *reg)
-+    if (reg->idx_count != 1)
-+    {
-+        validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT,
-+                "Invalid index count %u for a RASTOUT register.",
-+                reg->idx_count);
-+        return;
-+    }
-+    if (reg->idx[0].rel_addr)
-+        validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX,
-+                "Non-NULL relative address for a RASTOUT register.");
-+    if (reg->idx[0].offset >= 3)
-+        validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX,
-+                "Invalid offset for a RASTOUT register.");
-+static void vsir_validate_misctype_register(struct validation_context *ctx,
-+        const struct vkd3d_shader_register *reg)
-+    if (reg->idx_count != 1)
-+    {
-+        validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT,
-+                "Invalid index count %u for a MISCTYPE register.",
-+                reg->idx_count);
-+        return;
-+    }
-+    if (reg->idx[0].rel_addr)
-+        validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX,
-+                "Non-NULL relative address for a MISCTYPE register.");
-+    if (reg->idx[0].offset >= 2)
-+        validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX,
-+                "Invalid offset for a MISCTYPE register.");
-+static void vsir_validate_label_register(struct validation_context *ctx,
-+        const struct vkd3d_shader_register *reg)
-+    if (reg->precision != VKD3D_SHADER_REGISTER_PRECISION_DEFAULT)
-+        validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_PRECISION,
-+                "Invalid precision %#x for a LABEL register.", reg->precision);
-+    if (reg->data_type != VKD3D_DATA_UNUSED)
-+        validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE,
-+                "Invalid data type %#x for a LABEL register.", reg->data_type);
-+    if (reg->dimension != VSIR_DIMENSION_NONE)
-+        validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION,
-+                "Invalid dimension %#x for a LABEL register.", reg->dimension);
-+    if (reg->idx_count != 1)
-+    {
-+        validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT,
-+                "Invalid index count %u for a LABEL register.", reg->idx_count);
-+        return;
-+    }
-+    if (reg->idx[0].rel_addr)
-+        validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX,
-+                "Non-NULL relative address for a LABEL register.");
-+    /* Index == 0 is invalid, but it is temporarily allowed
-+     * for intermediate stages. Once we support validation
-+     * dialects we can selectively check for that. */
-+    if (reg->idx[0].offset > ctx->program->block_count)
-+        validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX,
-+                "LABEL register index %u exceeds the maximum count %u.",
-+                reg->idx[0].offset, ctx->program->block_count);
-+static void vsir_validate_sampler_register(struct validation_context *ctx,
-+        const struct vkd3d_shader_register *reg)
-+    if (reg->precision != VKD3D_SHADER_REGISTER_PRECISION_DEFAULT)
-+        validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_PRECISION,
-+                "Invalid precision %#x for a SAMPLER register.", reg->precision);
-+    if (reg->data_type != VKD3D_DATA_UNUSED)
-+        validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE,
-+                "Invalid data type %#x for a SAMPLER register.", reg->data_type);
-+    /* VEC4 is allowed in gather operations. */
-+    if (reg->dimension == VSIR_DIMENSION_SCALAR)
-+        validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION,
-+                "Invalid dimension SCALAR for a SAMPLER register.");
-+    if (reg->idx_count != 2)
-+    {
-+        validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT,
-+                "Invalid index count %u for a SAMPLER register.", reg->idx_count);
-+        return;
-+    }
-+    if (reg->idx[0].rel_addr)
-+        validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX,
-+                "Non-NULL relative address for the descriptor index of a SAMPLER register.");
-+static void vsir_validate_resource_register(struct validation_context *ctx,
-+        const struct vkd3d_shader_register *reg)
-+    if (reg->precision != VKD3D_SHADER_REGISTER_PRECISION_DEFAULT)
-+        validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_PRECISION,
-+                "Invalid precision %#x for a RESOURCE register.", reg->precision);
-+    if (reg->data_type != VKD3D_DATA_UNUSED)
-+        validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE,
-+                "Invalid data type %#x for a RESOURCE register.", reg->data_type);
-+    if (reg->dimension != VSIR_DIMENSION_VEC4)
-+        validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION,
-+                "Invalid dimension %#x for a RESOURCE register.", reg->dimension);
-+    if (reg->idx_count != 2)
-+    {
-+        validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT,
-+                "Invalid index count %u for a RESOURCE register.", reg->idx_count);
-+        return;
-+    }
-+    if (reg->idx[0].rel_addr)
-+        validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX,
-+                "Non-NULL relative address for the descriptor index of a RESOURCE register.");
-+static void vsir_validate_uav_register(struct validation_context *ctx,
-+        const struct vkd3d_shader_register *reg)
-+    if (reg->precision != VKD3D_SHADER_REGISTER_PRECISION_DEFAULT)
-+        validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_PRECISION,
-+                "Invalid precision %#x for a UAV register.",
-+                reg->precision);
-+    if (reg->data_type != VKD3D_DATA_UNUSED)
-+        validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE,
-+                "Invalid data type %#x for a UAV register.",
-+                reg->data_type);
-+    /* NONE is allowed in counter operations. */
-+    if (reg->dimension == VSIR_DIMENSION_SCALAR)
-+        validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION,
-+                "Invalid dimension %#x for a UAV register.",
-+                reg->dimension);
-+    if (reg->idx_count != 2)
-+    {
-+        validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT,
-+                "Invalid index count %u for a UAV register.",
-+                reg->idx_count);
-+        return;
-+    }
-+    if (reg->idx[0].rel_addr)
-+        validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX,
-+                "Non-NULL relative address for the descriptor index of a UAV register.");
-+static void vsir_validate_ssa_register(struct validation_context *ctx,
-+        const struct vkd3d_shader_register *reg)
-+    struct validation_context_ssa_data *data;
-+    if (reg->idx_count != 1)
-+    {
-+        validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT,
-+                "Invalid index count %u for a SSA register.",
-+                reg->idx_count);
-+        return;
-+    }
-+    if (reg->idx[0].rel_addr)
-+        validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX,
-+                "Non-NULL relative address for a SSA register.");
-+    if (reg->idx[0].offset >= ctx->program->ssa_count)
-+    {
-+        validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX,
-+                "SSA register index %u exceeds the maximum count %u.",
-+                reg->idx[0].offset, ctx->program->ssa_count);
-+        return;
-+    }
-+    data = &ctx->ssas[reg->idx[0].offset];
-+    if (reg->dimension == VSIR_DIMENSION_NONE)
-+    {
-+        validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION,
-+                "Invalid dimension NONE for a SSA register.");
-+        return;
-+    }
-+    /* SSA registers can be scalar or vec4, provided that each
-+     * individual register always appears with the same
-+     * dimension. */
-+    if (data->dimension == VSIR_DIMENSION_NONE)
-+    {
-+        data->dimension = reg->dimension;
-+        data->data_type = reg->data_type;
-+        data->first_seen = ctx->instruction_idx;
-+    }
-+    else
-+    {
-+        if (data->dimension != reg->dimension)
-+            validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION,
-+                    "Invalid dimension %#x for a SSA register: "
-+                    "it has already been seen with dimension %#x at instruction %zu.",
-+                    reg->dimension, data->dimension, data->first_seen);
-+        if (data_type_is_64_bit(data->data_type) != data_type_is_64_bit(reg->data_type))
-+            validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE,
-+                    "Invalid data type %#x for a SSA register: "
-+                    "it has already been seen with data type %#x at instruction %zu.",
-+                    reg->data_type, data->data_type, data->first_seen);
-+    }
-+static void vsir_validate_src_param(struct validation_context *ctx,
-+        const struct vkd3d_shader_src_param *src);
-+static void vsir_validate_register(struct validation_context *ctx,
-+        const struct vkd3d_shader_register *reg)
-+    unsigned int i;
-+    if (reg->type >= VKD3DSPR_COUNT)
-+        validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, "Invalid register type %#x.",
-+                reg->type);
-+    if (reg->precision >= VKD3D_SHADER_REGISTER_PRECISION_COUNT)
-+        validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_PRECISION, "Invalid register precision %#x.",
-+                reg->precision);
-+    if (reg->data_type >= VKD3D_DATA_COUNT)
-+        validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, "Invalid register data type %#x.",
-+                reg->data_type);
-+    if (reg->dimension >= VSIR_DIMENSION_COUNT)
-+        validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION, "Invalid register dimension %#x.",
-+                reg->dimension);
-+    if (reg->idx_count > ARRAY_SIZE(reg->idx))
-+        validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, "Invalid register index count %u.",
-+                reg->idx_count);
-+    for (i = 0; i < min(reg->idx_count, ARRAY_SIZE(reg->idx)); ++i)
-+    {
-+        const struct vkd3d_shader_src_param *param = reg->idx[i].rel_addr;
-+        if (reg->idx[i].rel_addr)
-+            vsir_validate_src_param(ctx, param);
-+    }
-+    switch (reg->type)
-+    {
-+        case VKD3DSPR_TEMP:
-+            vsir_validate_temp_register(ctx, reg);
-+            break;
-+        case VKD3DSPR_INPUT:
-+            vsir_validate_io_register(ctx, reg);
-+            break;
-+        case VKD3DSPR_RASTOUT:
-+            vsir_validate_rastout_register(ctx, reg);
-+            break;
-+        case VKD3DSPR_OUTPUT:
-+            vsir_validate_io_register(ctx, reg);
-+            break;
-+        case VKD3DSPR_DEPTHOUT:
-+            vsir_validate_register_without_indices(ctx, reg);
-+            break;
-+        case VKD3DSPR_MISCTYPE:
-+            vsir_validate_misctype_register(ctx, reg);
-+            break;
-+        case VKD3DSPR_LABEL:
-+            vsir_validate_label_register(ctx, reg);
-+            break;
-+        case VKD3DSPR_IMMCONST:
-+            vsir_validate_register_without_indices(ctx, reg);
-+            break;
-+        case VKD3DSPR_IMMCONST64:
-+            vsir_validate_register_without_indices(ctx, reg);
-+            break;
-+        case VKD3DSPR_NULL:
-+            vsir_validate_register_without_indices(ctx, reg);
-+            break;
-+        case VKD3DSPR_SAMPLER:
-+            vsir_validate_sampler_register(ctx, reg);
-+            break;
-+        case VKD3DSPR_RESOURCE:
-+            vsir_validate_resource_register(ctx, reg);
-+            break;
-+        case VKD3DSPR_UAV:
-+            vsir_validate_uav_register(ctx, reg);
-+            break;
-+            vsir_validate_io_register(ctx, reg);
-+            break;
-+            vsir_validate_io_register(ctx, reg);
-+            break;
-+        case VKD3DSPR_PATCHCONST:
-+            vsir_validate_io_register(ctx, reg);
-+            break;
-+        case VKD3DSPR_DEPTHOUTGE:
-+            vsir_validate_register_without_indices(ctx, reg);
-+            break;
-+        case VKD3DSPR_DEPTHOUTLE:
-+            vsir_validate_register_without_indices(ctx, reg);
-+            break;
-+        case VKD3DSPR_SSA:
-+            vsir_validate_ssa_register(ctx, reg);
-+            break;
-+        default:
-+            break;
-+    }
-+static void vsir_validate_dst_param(struct validation_context *ctx,
-+        const struct vkd3d_shader_dst_param *dst)
-+    vsir_validate_register(ctx, &dst->reg);
-+    if (dst->write_mask & ~VKD3DSP_WRITEMASK_ALL)
-+        validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_WRITE_MASK, "Destination has invalid write mask %#x.",
-+                dst->write_mask);
-+    switch (dst->reg.dimension)
-+    {
-+            if (dst->write_mask != VKD3DSP_WRITEMASK_0)
-+                validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_WRITE_MASK, "Scalar destination has invalid write mask %#x.",
-+                    dst->write_mask);
-+            break;
-+        case VSIR_DIMENSION_VEC4:
-+            if (dst->write_mask == 0)
-+                validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_WRITE_MASK, "Vec4 destination has empty write mask.");
-+            break;
-+        default:
-+            if (dst->write_mask != 0)
-+                validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_WRITE_MASK, "Destination of dimension %u has invalid write mask %#x.",
-+                    dst->reg.dimension, dst->write_mask);
-+            break;
-+    }
-+    if (dst->modifiers & ~VKD3DSPDM_MASK)
-+        validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_MODIFIERS, "Destination has invalid modifiers %#x.",
-+                dst->modifiers);
-+    switch (dst->shift)
-+    {
-+        case 0:
-+        case 1:
-+        case 2:
-+        case 3:
-         case 13:
-         case 14:
-         case 15:
-             break;
-         default:
--            validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SHIFT, "Destination has invalid shift %#x.",
--                    dst->shift);
-+            validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SHIFT, "Destination has invalid shift %#x.",
-+                    dst->shift);
-+    }
-+    switch (dst->reg.type)
-+    {
-+        case VKD3DSPR_SSA:
-+            if (dst->reg.idx[0].offset < ctx->program->ssa_count)
-+            {
-+                struct validation_context_ssa_data *data = &ctx->ssas[dst->reg.idx[0].offset];
-+                if (data->write_mask == 0)
-+                {
-+                    data->write_mask = dst->write_mask;
-+                    data->first_assigned = ctx->instruction_idx;
-+                }
-+                else
-+                {
-+                    validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SSA_USAGE,
-+                            "SSA register is already assigned at instruction %zu.",
-+                            data->first_assigned);
-+                }
-+            }
-+            break;
-+        case VKD3DSPR_IMMCONST:
-+        case VKD3DSPR_IMMCONST64:
-+        case VKD3DSPR_SAMPLER:
-+        case VKD3DSPR_RESOURCE:
-+        case VKD3DSPR_INPUT:
-+            validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE,
-+                    "Invalid %#x register used as destination parameter.", dst->reg.type);
-+            break;
-+        case VKD3DSPR_PATCHCONST:
-+            if (ctx->program->shader_version.type != VKD3D_SHADER_TYPE_HULL)
-+                validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE,
-+                        "PATCHCONST register used as destination parameters are only allowed in Hull Shaders.");
-+            break;
-+        default:
-+            break;
-+    }
-+static void vsir_validate_src_param(struct validation_context *ctx,
-+        const struct vkd3d_shader_src_param *src)
-+    vsir_validate_register(ctx, &src->reg);
-+    if (src->swizzle & ~0x03030303u)
-+        validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SWIZZLE, "Source has invalid swizzle %#x.",
-+                src->swizzle);
-+    if (src->reg.dimension != VSIR_DIMENSION_VEC4 && src->swizzle != 0)
-+        validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SWIZZLE, "Source of dimension %u has invalid swizzle %#x.",
-+                src->reg.dimension, src->swizzle);
-+    if (src->modifiers >= VKD3DSPSM_COUNT)
-+        validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_MODIFIERS, "Source has invalid modifiers %#x.",
-+                src->modifiers);
-+    switch (src->reg.type)
-+    {
-+        case VKD3DSPR_SSA:
-+            if (src->reg.idx[0].offset < ctx->program->ssa_count)
-+            {
-+                struct validation_context_ssa_data *data = &ctx->ssas[src->reg.idx[0].offset];
-+                unsigned int i;
-+                for (i = 0; i < VKD3D_VEC4_SIZE; ++i)
-+                    data->read_mask |= (1u << vsir_swizzle_get_component(src->swizzle, i));
-+            }
-+            break;
-+        case VKD3DSPR_NULL:
-+            validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE,
-+                    "Invalid NULL register used as source parameter.");
-+            break;
-+        case VKD3DSPR_OUTPUT:
-+            if (ctx->program->shader_version.type != VKD3D_SHADER_TYPE_HULL
-+                    || (ctx->phase != VKD3DSIH_HS_FORK_PHASE && ctx->phase != VKD3DSIH_HS_JOIN_PHASE))
-+                validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE,
-+                        "Invalid OUTPUT register used as source parameter.");
-+            break;
-+        case VKD3DSPR_PATCHCONST:
-+            if (ctx->program->shader_version.type != VKD3D_SHADER_TYPE_DOMAIN
-+                    && ctx->program->shader_version.type != VKD3D_SHADER_TYPE_HULL)
-+                validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE,
-+                        "PATCHCONST register used as source parameters are only allowed in Hull and Domain Shaders.");
-+            break;
-+        default:
-+            break;
-+    }
-+static void vsir_validate_dst_count(struct validation_context *ctx,
-+        const struct vkd3d_shader_instruction *instruction, unsigned int count)
-+    if (instruction->dst_count != count)
-+        validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DEST_COUNT,
-+                "Invalid destination count %u for an instruction of type %#x, expected %u.",
-+                        instruction->dst_count, instruction->opcode, count);
-+static void vsir_validate_src_count(struct validation_context *ctx,
-+        const struct vkd3d_shader_instruction *instruction, unsigned int count)
-+    if (instruction->src_count != count)
-+        validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SOURCE_COUNT,
-+                "Invalid source count %u for an instruction of type %#x, expected %u.",
-+                instruction->src_count, instruction->opcode, count);
-+static bool vsir_validate_src_min_count(struct validation_context *ctx,
-+        const struct vkd3d_shader_instruction *instruction, unsigned int count)
-+    if (instruction->src_count < count)
-+    {
-+        validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SOURCE_COUNT,
-+                "Invalid source count %u for an instruction of type %#x, expected at least %u.",
-+                instruction->src_count, instruction->opcode, count);
-+        return false;
-+    }
-+    return true;
-+static bool vsir_validate_src_max_count(struct validation_context *ctx,
-+        const struct vkd3d_shader_instruction *instruction, unsigned int count)
-+    if (instruction->src_count > count)
-+    {
-+        validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SOURCE_COUNT,
-+                "Invalid source count %u for an instruction of type %#x, expected at most %u.",
-+                instruction->src_count, instruction->opcode, count);
-+        return false;
-+    }
-+    return true;
-+enum vsir_signature_type
-+static const char * const signature_type_names[] =
-+    [SIGNATURE_TYPE_INPUT] = "input",
-+    [SIGNATURE_TYPE_OUTPUT] = "output",
-+    [SIGNATURE_TYPE_PATCH_CONSTANT] = "patch constant",
-+#define PS_BIT (1u << VKD3D_SHADER_TYPE_PIXEL)
-+#define HS_BIT (1u << VKD3D_SHADER_TYPE_HULL)
-+static const struct sysval_validation_data_element
-+    unsigned int input;
-+    unsigned int output;
-+    unsigned int patch_constant;
-+    enum vkd3d_shader_component_type data_type;
-+    unsigned int component_count;
-+sysval_validation_data[] =
-+static void vsir_validate_signature_element(struct validation_context *ctx,
-+        const struct shader_signature *signature, enum vsir_signature_type signature_type,
-+        unsigned int idx)
-+    enum vkd3d_tessellator_domain expected_tess_domain = VKD3D_TESSELLATOR_DOMAIN_INVALID;
-+    const char *signature_type_name = signature_type_names[signature_type];
-+    const struct signature_element *element = &signature->elements[idx];
-+    bool integer_type = false, is_outer = false;
-+    unsigned int semantic_index_max = 0;
-+    if (element->register_count == 0)
-+        validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE,
-+                "element %u of %s signature: Invalid zero register count.", idx, signature_type_name);
-+    if (element->mask == 0 || (element->mask & ~0xf))
-+        validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE,
-+                "element %u of %s signature: Invalid mask %#x.", idx, signature_type_name, element->mask);
-+    if (!vkd3d_bitmask_is_contiguous(element->mask))
-+        validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE,
-+                "element %u of %s signature: Non-contiguous mask %#x.",
-+                idx, signature_type_name, element->mask);
-+    /* Here we'd likely want to validate that the usage mask is a subset of the
-+     * signature mask. Unfortunately the D3DBC parser sometimes violates this.
-+     * For example I've seen a shader like this:
-+     *   ps_3_0
-+     *   [...]
-+     *   dcl_texcoord0 v0
-+     *   [...]
-+     *   texld r2.xyzw, v0.xyzw, s1.xyzw
-+     *   [...]
-+     *
-+     * The dcl_textcoord0 instruction secretly has a .xy mask, which is used to
-+     * compute the signature mask, but the texld instruction apparently uses all
-+     * the components. Of course the last two components are ignored, but
-+     * formally they seem to be used. So we end up with a signature element with
-+     * mask .xy and usage mask .xyzw.
-+     *
-+     * The correct fix would probably be to make the D3DBC parser aware of which
-+     * components are really used for each instruction, but that would take some
-+     * time. */
-+    if (element->used_mask & ~0xf)
-+        validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE,
-+                "element %u of %s signature: Invalid usage mask %#x.",
-+                idx, signature_type_name, element->used_mask);
-+    switch (element->sysval_semantic)
-+    {
-+        case VKD3D_SHADER_SV_NONE:
-+        case VKD3D_SHADER_SV_POSITION:
-+        case VKD3D_SHADER_SV_VERTEX_ID:
-+        case VKD3D_SHADER_SV_TARGET:
-+        case VKD3D_SHADER_SV_DEPTH:
-+        case VKD3D_SHADER_SV_COVERAGE:
-+            break;
-+            expected_tess_domain = VKD3D_TESSELLATOR_DOMAIN_QUAD;
-+            semantic_index_max = 4;
-+            is_outer = true;
-+            break;
-+            expected_tess_domain = VKD3D_TESSELLATOR_DOMAIN_QUAD;
-+            semantic_index_max = 2;
-+            is_outer = false;
-+            break;
-+            expected_tess_domain = VKD3D_TESSELLATOR_DOMAIN_TRIANGLE;
-+            semantic_index_max = 3;
-+            is_outer = true;
-+            break;
-+            expected_tess_domain = VKD3D_TESSELLATOR_DOMAIN_TRIANGLE;
-+            semantic_index_max = 1;
-+            is_outer = false;
-+            break;
-+            expected_tess_domain = VKD3D_TESSELLATOR_DOMAIN_LINE;
-+            semantic_index_max = 2;
-+            is_outer = true;
-+            break;
-+        default:
-+            validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE,
-+                    "element %u of %s signature: Invalid system value semantic %#x.",
-+                    idx, signature_type_name, element->sysval_semantic);
-+            break;
-     }
--    switch (dst->reg.type)
-+    if (expected_tess_domain != VKD3D_TESSELLATOR_DOMAIN_INVALID)
-     {
--        case VKD3DSPR_SSA:
--            if (dst->reg.idx[0].offset < ctx->program->ssa_count)
-+        if (signature_type != SIGNATURE_TYPE_PATCH_CONSTANT)
-+            validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE,
-+                    "element %u of %s signature: System value semantic %#x is only valid "
-+                    "in the patch constant signature.",
-+                    idx, signature_type_name, element->sysval_semantic);
-+        if (ctx->program->tess_domain != expected_tess_domain)
-+            validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE,
-+                    "element %u of %s signature: Invalid system value semantic %#x for tessellator domain %#x.",
-+                    idx, signature_type_name, element->sysval_semantic, ctx->program->tess_domain);
-+        if (element->semantic_index >= semantic_index_max)
-+        {
-+            validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE,
-+                    "element %u of %s signature: Invalid semantic index %u for system value semantic %#x.",
-+                    idx, signature_type_name, element->semantic_index, element->sysval_semantic);
-+        }
-+        else
-+        {
-+            unsigned int *idx_pos = &(is_outer ? ctx->outer_tess_idxs : ctx->inner_tess_idxs)[element->semantic_index];
-+            if (*idx_pos != ~0u)
-+                validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE,
-+                        "element %u of %s signature: Duplicate semantic index %u for system value semantic %#x.",
-+                        idx, signature_type_name, element->semantic_index, element->sysval_semantic);
-+            else
-+                *idx_pos = idx;
-+        }
-+    }
-+    if (element->sysval_semantic < ARRAY_SIZE(sysval_validation_data))
-+    {
-+        const struct sysval_validation_data_element *data = &sysval_validation_data[element->sysval_semantic];
-+        if (data->input || data->output || data->patch_constant)
-+        {
-+            unsigned int mask;
-+            switch (signature_type)
-             {
--                struct validation_context_ssa_data *data = &ctx->ssas[dst->reg.idx[0].offset];
-+                case SIGNATURE_TYPE_INPUT:
-+                    mask = data->input;
-+                    break;
--                if (data->write_mask == 0)
--                {
--                    data->write_mask = dst->write_mask;
--                    data->first_assigned = ctx->instruction_idx;
--                }
--                else
--                {
--                    validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SSA_USAGE,
--                            "SSA register is already assigned at instruction %zu.",
--                            data->first_assigned);
--                }
-+                case SIGNATURE_TYPE_OUTPUT:
-+                    mask = data->output;
-+                    break;
-+                case SIGNATURE_TYPE_PATCH_CONSTANT:
-+                    mask = data->patch_constant;
-+                    break;
-+                default:
-+                    vkd3d_unreachable();
-             }
--            break;
--        case VKD3DSPR_IMMCONST:
--            validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE,
--                    "Invalid IMMCONST register used as destination parameter.");
-+            if (!(mask & (1u << ctx->program->shader_version.type)))
-+                validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE,
-+                        "element %u of %s signature: Invalid system value semantic %#x.",
-+                        idx, signature_type_name, element->sysval_semantic);
-+        }
-+        if (data->component_count != 0)
-+        {
-+            if (element->component_type != data->data_type)
-+                validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE,
-+                        "element %u of %s signature: Invalid data type %#x for system value semantic %#x.",
-+                        idx, signature_type_name, element->component_type, element->sysval_semantic);
-+            if (vsir_write_mask_component_count(element->mask) > data->component_count)
-+                validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE,
-+                        "element %u of %s signature: Invalid mask %#x for system value semantic %#x.",
-+                        idx, signature_type_name, element->mask, element->sysval_semantic);
-+        }
-+    }
-+    switch (element->component_type)
-+    {
-+            integer_type = true;
-             break;
--        case VKD3DSPR_IMMCONST64:
--            validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE,
--                    "Invalid IMMCONST64 register used as destination parameter.");
-             break;
-         default:
-+            validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE,
-+                    "element %u of %s signature: Invalid component type %#x.",
-+                    idx, signature_type_name, element->component_type);
-             break;
-     }
-+    if (element->min_precision >= VKD3D_SHADER_MINIMUM_PRECISION_COUNT)
-+        validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE,
-+                "element %u of %s signature: Invalid minimum precision %#x.",
-+                idx, signature_type_name, element->min_precision);
-+    if (element->interpolation_mode >= VKD3DSIM_COUNT)
-+        validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE,
-+                "element %u of %s signature: Invalid interpolation mode %#x.",
-+                idx, signature_type_name, element->interpolation_mode);
-+    if (integer_type && element->interpolation_mode != VKD3DSIM_NONE
-+            && element->interpolation_mode != VKD3DSIM_CONSTANT)
-+        validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE,
-+                "element %u of %s signature: Invalid interpolation mode %#x for integer component type.",
-+                idx, signature_type_name, element->interpolation_mode);
- }
--static void vsir_validate_src_param(struct validation_context *ctx,
--        const struct vkd3d_shader_src_param *src)
-+static const unsigned int allowed_signature_phases[] =
- {
--    vsir_validate_register(ctx, &src->reg);
--    if (src->swizzle & ~0x03030303u)
--        validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SWIZZLE, "Source has invalid swizzle %#x.",
--                src->swizzle);
-+static void vsir_validate_signature(struct validation_context *ctx,
-+        const struct shader_signature *signature, enum vsir_signature_type signature_type)
-+    unsigned int i;
--    if (src->reg.dimension != VSIR_DIMENSION_VEC4 && src->swizzle != 0)
--        validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SWIZZLE, "Source of dimension %u has invalid swizzle %#x.",
--                src->reg.dimension, src->swizzle);
-+    if (signature->element_count != 0 && !(allowed_signature_phases[signature_type]
-+            & (1u << ctx->program->shader_version.type)))
-+        validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE,
-+                "Unexpected %s signature.", signature_type_names[signature_type]);
--    if (src->modifiers >= VKD3DSPSM_COUNT)
--        validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_MODIFIERS, "Source has invalid modifiers %#x.",
--                src->modifiers);
-+    for (i = 0; i < signature->element_count; ++i)
-+        vsir_validate_signature_element(ctx, signature, signature_type, i);
--    switch (src->reg.type)
-+static const char *name_from_cf_type(enum vsir_control_flow_type type)
-+    switch (type)
-     {
--        case VKD3DSPR_SSA:
--            if (src->reg.idx[0].offset < ctx->program->ssa_count)
--            {
--                struct validation_context_ssa_data *data = &ctx->ssas[src->reg.idx[0].offset];
--                unsigned int i;
-+        case VSIR_CF_STRUCTURED:
-+            return "structured";
-+        case VSIR_CF_BLOCKS:
-+            return "block-based";
-+        default:
-+            vkd3d_unreachable();
-+    }
-+static void vsir_validate_cf_type(struct validation_context *ctx,
-+        const struct vkd3d_shader_instruction *instruction, enum vsir_control_flow_type expected_type)
-+    if (ctx->program->cf_type != expected_type)
-+        validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, "Invalid instruction %#x in %s shader.",
-+                instruction->opcode, name_from_cf_type(ctx->program->cf_type));
-+static void vsir_validator_push_block(struct validation_context *ctx, enum vkd3d_shader_opcode opcode)
-+    if (!vkd3d_array_reserve((void **)&ctx->blocks, &ctx->blocks_capacity, ctx->depth + 1, sizeof(*ctx->blocks)))
-+    {
-+        ctx->status = VKD3D_ERROR_OUT_OF_MEMORY;
-+        return;
-+    }
-+    ctx->blocks[ctx->depth++] = opcode;
-+static void vsir_validate_hull_shader_phase(struct validation_context *ctx,
-+        const struct vkd3d_shader_instruction *instruction)
-+    if (ctx->program->shader_version.type != VKD3D_SHADER_TYPE_HULL)
-+        validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_HANDLER,
-+                "Phase instruction %#x is only valid in a hull shader.",
-+                instruction->opcode);
-+    if (ctx->depth != 0)
-+        validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW,
-+                "Phase instruction %#x must appear to top level.",
-+                instruction->opcode);
-+    ctx->phase = instruction->opcode;
-+    ctx->dcl_temps_found = false;
-+static void vsir_validate_branch(struct validation_context *ctx, const struct vkd3d_shader_instruction *instruction)
-+    size_t i;
-+    vsir_validate_cf_type(ctx, instruction, VSIR_CF_BLOCKS);
-+    vsir_validate_dst_count(ctx, instruction, 0);
-+    if (!vsir_validate_src_min_count(ctx, instruction, 1))
-+        return;
-+    if (vsir_register_is_label(&instruction->src[0].reg))
-+    {
-+        /* Unconditional branch: parameters are jump label,
-+         * optional merge label, optional continue label. */
-+        vsir_validate_src_max_count(ctx, instruction, 3);
-+        for (i = 0; i < instruction->src_count; ++i)
-+        {
-+            if (!vsir_register_is_label(&instruction->src[i].reg))
-+                validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE,
-+                        "Invalid register of type %#x in unconditional BRANCH instruction, expected LABEL.",
-+                        instruction->src[i].reg.type);
-+        }
-+    }
-+    else
-+    {
-+        /* Conditional branch: parameters are condition, true
-+         * jump label, false jump label, optional merge label,
-+         * optional continue label. */
-+        vsir_validate_src_min_count(ctx, instruction, 3);
-+        vsir_validate_src_max_count(ctx, instruction, 5);
-+        for (i = 1; i < instruction->src_count; ++i)
-+        {
-+            if (!vsir_register_is_label(&instruction->src[i].reg))
-+                validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE,
-+                        "Invalid register of type %#x in conditional BRANCH instruction, expected LABEL.",
-+                        instruction->src[i].reg.type);
-+        }
-+    }
-+    ctx->inside_block = false;
-+static void vsir_validate_dcl_gs_instances(struct validation_context *ctx,
-+        const struct vkd3d_shader_instruction *instruction)
-+    if (!instruction->declaration.count || instruction->declaration.count > 32)
-+        validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_GS, "GS instance count %u is invalid.",
-+                instruction->declaration.count);
-+static void vsir_validate_dcl_hs_max_tessfactor(struct validation_context *ctx,
-+        const struct vkd3d_shader_instruction *instruction)
-+    /* Exclude non-finite values. */
-+    if (!(instruction->declaration.max_tessellation_factor >= 1.0f
-+            && instruction->declaration.max_tessellation_factor <= 64.0f))
-+        validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION,
-+                "Max tessellation factor %f is invalid.",
-+                instruction->declaration.max_tessellation_factor);
-+static void vsir_validate_dcl_input_primitive(struct validation_context *ctx,
-+        const struct vkd3d_shader_instruction *instruction)
-+    if (instruction->declaration.primitive_type.type == VKD3D_PT_UNDEFINED
-+            || instruction->declaration.primitive_type.type >= VKD3D_PT_COUNT)
-+        validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_GS, "GS input primitive %u is invalid.",
-+                instruction->declaration.primitive_type.type);
-+static void vsir_validate_dcl_output_control_point_count(struct validation_context *ctx,
-+        const struct vkd3d_shader_instruction *instruction)
-+    if (!instruction->declaration.count || instruction->declaration.count > 32)
-+        validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION,
-+                "Output control point count %u is invalid.",
-+                instruction->declaration.count);
-+static void vsir_validate_dcl_output_topology(struct validation_context *ctx,
-+        const struct vkd3d_shader_instruction *instruction)
-+    if (instruction->declaration.primitive_type.type == VKD3D_PT_UNDEFINED
-+            || instruction->declaration.primitive_type.type >= VKD3D_PT_COUNT)
-+        validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_GS, "GS output primitive %u is invalid.",
-+                instruction->declaration.primitive_type.type);
-+static void vsir_validate_dcl_temps(struct validation_context *ctx,
-+        const struct vkd3d_shader_instruction *instruction)
-+    if (ctx->dcl_temps_found)
-+        validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_DUPLICATE_DCL_TEMPS,
-+                "Duplicate DCL_TEMPS instruction.");
-+    if (instruction->declaration.count > ctx->program->temp_count)
-+        validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DCL_TEMPS,
-+                "Invalid DCL_TEMPS count %u, expected at most %u.",
-+                instruction->declaration.count, ctx->program->temp_count);
-+    ctx->dcl_temps_found = true;
-+static void vsir_validate_dcl_tessellator_domain(struct validation_context *ctx,
-+        const struct vkd3d_shader_instruction *instruction)
-+    if (instruction->declaration.tessellator_domain == VKD3D_TESSELLATOR_DOMAIN_INVALID
-+            || instruction->declaration.tessellator_domain >= VKD3D_TESSELLATOR_DOMAIN_COUNT)
-+        validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION,
-+                "Tessellator domain %#x is invalid.", instruction->declaration.tessellator_domain);
-+    if (instruction->declaration.tessellator_domain != ctx->program->tess_domain)
-+        validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION,
-+                "DCL_TESSELLATOR_DOMAIN argument %#x doesn't match the shader tessellator domain %#x.",
-+                instruction->declaration.tessellator_domain, ctx->program->tess_domain);
-+static void vsir_validate_dcl_tessellator_output_primitive(struct validation_context *ctx,
-+        const struct vkd3d_shader_instruction *instruction)
-+    if (!instruction->declaration.tessellator_output_primitive
-+            || instruction->declaration.tessellator_output_primitive
-+        validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION,
-+                "Tessellator output primitive %#x is invalid.",
-+                instruction->declaration.tessellator_output_primitive);
-+static void vsir_validate_dcl_tessellator_partitioning(struct validation_context *ctx,
-+        const struct vkd3d_shader_instruction *instruction)
-+    if (!instruction->declaration.tessellator_partitioning
-+            || instruction->declaration.tessellator_partitioning
-+        validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION,
-+                "Tessellator partitioning %#x is invalid.",
-+                instruction->declaration.tessellator_partitioning);
-+static void vsir_validate_dcl_vertices_out(struct validation_context *ctx,
-+        const struct vkd3d_shader_instruction *instruction)
-+    if (instruction->declaration.count > 1024)
-+        validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_GS, "GS output vertex count %u is invalid.",
-+                instruction->declaration.count);
-+static void vsir_validate_else(struct validation_context *ctx, const struct vkd3d_shader_instruction *instruction)
-+    vsir_validate_cf_type(ctx, instruction, VSIR_CF_STRUCTURED);
-+    if (ctx->depth == 0 || ctx->blocks[ctx->depth - 1] != VKD3DSIH_IF)
-+        validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW,
-+                "ELSE instruction doesn't terminate IF block.");
-+    else
-+        ctx->blocks[ctx->depth - 1] = VKD3DSIH_ELSE;
-+static void vsir_validate_endif(struct validation_context *ctx, const struct vkd3d_shader_instruction *instruction)
-+    vsir_validate_cf_type(ctx, instruction, VSIR_CF_STRUCTURED);
-+    if (ctx->depth == 0 || (ctx->blocks[ctx->depth - 1] != VKD3DSIH_IF
-+            && ctx->blocks[ctx->depth - 1] != VKD3DSIH_ELSE))
-+        validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW,
-+                "ENDIF instruction doesn't terminate IF/ELSE block.");
-+    else
-+        --ctx->depth;
-+static void vsir_validate_endloop(struct validation_context *ctx, const struct vkd3d_shader_instruction *instruction)
-+    vsir_validate_cf_type(ctx, instruction, VSIR_CF_STRUCTURED);
-+    if (ctx->depth == 0 || ctx->blocks[ctx->depth - 1] != VKD3DSIH_LOOP)
-+        validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW,
-+                "ENDLOOP instruction doesn't terminate LOOP block.");
-+    else
-+        --ctx->depth;
-+static void vsir_validate_endrep(struct validation_context *ctx, const struct vkd3d_shader_instruction *instruction)
-+    vsir_validate_cf_type(ctx, instruction, VSIR_CF_STRUCTURED);
-+    if (ctx->depth == 0 || ctx->blocks[ctx->depth - 1] != VKD3DSIH_REP)
-+        validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW,
-+                "ENDREP instruction doesn't terminate REP block.");
-+    else
-+        --ctx->depth;
-+static void vsir_validate_endswitch(struct validation_context *ctx, const struct vkd3d_shader_instruction *instruction)
-+    vsir_validate_cf_type(ctx, instruction, VSIR_CF_STRUCTURED);
-+    if (ctx->depth == 0 || ctx->blocks[ctx->depth - 1] != VKD3DSIH_SWITCH)
-+        validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW,
-+                "ENDSWITCH instruction doesn't terminate SWITCH block.");
-+    else
-+        --ctx->depth;
-+static void vsir_validate_if(struct validation_context *ctx, const struct vkd3d_shader_instruction *instruction)
-+    vsir_validate_cf_type(ctx, instruction, VSIR_CF_STRUCTURED);
-+    vsir_validator_push_block(ctx, VKD3DSIH_IF);
-+static void vsir_validate_ifc(struct validation_context *ctx, const struct vkd3d_shader_instruction *instruction)
-+    vsir_validate_cf_type(ctx, instruction, VSIR_CF_STRUCTURED);
-+    vsir_validator_push_block(ctx, VKD3DSIH_IF);
-+static void vsir_validate_label(struct validation_context *ctx, const struct vkd3d_shader_instruction *instruction)
-+    vsir_validate_cf_type(ctx, instruction, VSIR_CF_BLOCKS);
-+    if (instruction->src_count >= 1 && !vsir_register_is_label(&instruction->src[0].reg))
-+        validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE,
-+                "Invalid register of type %#x in a LABEL instruction, expected LABEL.",
-+                instruction->src[0].reg.type);
-+    if (ctx->inside_block)
-+        validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW,
-+                "Invalid LABEL instruction inside a block.");
-+    ctx->inside_block = true;
-+static void vsir_validate_loop(struct validation_context *ctx, const struct vkd3d_shader_instruction *instruction)
-+    vsir_validate_cf_type(ctx, instruction, VSIR_CF_STRUCTURED);
-+    vsir_validate_src_count(ctx, instruction, ctx->program->shader_version.major <= 3 ? 2 : 0);
-+    vsir_validator_push_block(ctx, VKD3DSIH_LOOP);
-+static void vsir_validate_nop(struct validation_context *ctx, const struct vkd3d_shader_instruction *instruction)
-+static void vsir_validate_phi(struct validation_context *ctx, const struct vkd3d_shader_instruction *instruction)
-+    unsigned int i, incoming_count;
-+    vsir_validate_cf_type(ctx, instruction, VSIR_CF_BLOCKS);
-+    vsir_validate_src_min_count(ctx, instruction, 2);
-+    if (instruction->src_count % 2 != 0)
-+        validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SOURCE_COUNT,
-+                "Invalid source count %u for a PHI instruction, it must be an even number.",
-+                instruction->src_count);
-+    incoming_count = instruction->src_count / 2;
-+    for (i = 0; i < incoming_count; ++i)
-+    {
-+        unsigned int value_idx = 2 * i;
-+        unsigned int label_idx = 2 * i + 1;
-+        if (!register_is_constant_or_undef(&instruction->src[value_idx].reg)
-+                && !register_is_ssa(&instruction->src[value_idx].reg))
-+            validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE,
-+                    "Invalid value register for incoming %u of type %#x in PHI instruction, "
-+                    "expected SSA, IMMCONST or IMMCONST64.", i, instruction->src[value_idx].reg.type);
-+        if (instruction->src[value_idx].reg.dimension != VSIR_DIMENSION_SCALAR)
-+            validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION,
-+                    "Invalid value dimension %#x for incoming %u in PHI instruction, expected scalar.",
-+                    instruction->src[value_idx].reg.dimension, i);
-+        if (!vsir_register_is_label(&instruction->src[label_idx].reg))
-+            validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE,
-+                    "Invalid label register for case %u of type %#x in PHI instruction, "
-+                    "expected LABEL.", i, instruction->src[value_idx].reg.type);
-+    }
-+    if (instruction->dst_count < 1)
-+        return;
-+    if (!register_is_ssa(&instruction->dst[0].reg))
-+        validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE,
-+                "Invalid destination of type %#x in PHI instruction, expected SSA.",
-+                instruction->dst[0].reg.type);
-+    if (instruction->dst[0].reg.dimension != VSIR_DIMENSION_SCALAR)
-+        validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION,
-+                "Invalid destination dimension %#x in PHI instruction, expected scalar.",
-+                instruction->dst[0].reg.dimension);
--                for (i = 0; i < VKD3D_VEC4_SIZE; ++i)
--                    data->read_mask |= (1u << vsir_swizzle_get_component(src->swizzle, i));
--            }
--            break;
-+    if (instruction->dst[0].modifiers != VKD3DSPDM_NONE)
-+        validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_MODIFIERS,
-+                "Invalid modifiers %#x for the destination of a PHI instruction, expected none.",
-+                instruction->dst[0].modifiers);
--        default:
--            break;
--    }
-+    if (instruction->dst[0].shift != 0)
-+        validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SHIFT,
-+                "Invalid shift %#x for the destination of a PHI instruction, expected none.",
-+                instruction->dst[0].shift);
- }
--static void vsir_validate_dst_count(struct validation_context *ctx,
--        const struct vkd3d_shader_instruction *instruction, unsigned int count)
-+static void vsir_validate_rep(struct validation_context *ctx, const struct vkd3d_shader_instruction *instruction)
- {
--    if (instruction->dst_count != count)
--        validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DEST_COUNT,
--                "Invalid destination count %u for an instruction of type %#x, expected %u.",
--                        instruction->dst_count, instruction->opcode, count);
-+    vsir_validate_cf_type(ctx, instruction, VSIR_CF_STRUCTURED);
-+    vsir_validator_push_block(ctx, VKD3DSIH_REP);
- }
--static void vsir_validate_src_count(struct validation_context *ctx,
--        const struct vkd3d_shader_instruction *instruction, unsigned int count)
-+static void vsir_validate_ret(struct validation_context *ctx, const struct vkd3d_shader_instruction *instruction)
- {
--    if (instruction->src_count != count)
--        validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SOURCE_COUNT,
--                "Invalid source count %u for an instruction of type %#x, expected %u.",
--                instruction->src_count, instruction->opcode, count);
-+    ctx->inside_block = false;
- }
--static bool vsir_validate_src_min_count(struct validation_context *ctx,
--        const struct vkd3d_shader_instruction *instruction, unsigned int count)
-+static void vsir_validate_switch(struct validation_context *ctx, const struct vkd3d_shader_instruction *instruction)
- {
--    if (instruction->src_count < count)
--    {
--        validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SOURCE_COUNT,
--                "Invalid source count %u for an instruction of type %#x, expected at least %u.",
--                instruction->src_count, instruction->opcode, count);
--        return false;
--    }
--    return true;
-+    vsir_validate_cf_type(ctx, instruction, VSIR_CF_STRUCTURED);
-+    vsir_validator_push_block(ctx, VKD3DSIH_SWITCH);
- }
--static bool vsir_validate_src_max_count(struct validation_context *ctx,
--        const struct vkd3d_shader_instruction *instruction, unsigned int count)
-+static void vsir_validate_switch_monolithic(struct validation_context *ctx,
-+        const struct vkd3d_shader_instruction *instruction)
- {
--    if (instruction->src_count > count)
--    {
-+    unsigned int i, case_count;
-+    vsir_validate_cf_type(ctx, instruction, VSIR_CF_BLOCKS);
-+    /* Parameters are source, default label, merge label and
-+     * then pairs of constant value and case label. */
-+    if (!vsir_validate_src_min_count(ctx, instruction, 3))
-+        return;
-+    if (instruction->src_count % 2 != 1)
-         validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SOURCE_COUNT,
--                "Invalid source count %u for an instruction of type %#x, expected at most %u.",
--                instruction->src_count, instruction->opcode, count);
--        return false;
--    }
-+                "Invalid source count %u for a monolithic SWITCH instruction, it must be an odd number.",
-+                instruction->src_count);
--    return true;
-+    if (!vsir_register_is_label(&instruction->src[1].reg))
-+        validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE,
-+                "Invalid default label register of type %#x in monolithic SWITCH instruction, expected LABEL.",
-+                instruction->src[1].reg.type);
--static const char *name_from_cf_type(enum cf_type type)
--    switch (type)
-+    if (!vsir_register_is_label(&instruction->src[2].reg))
-+        validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE,
-+                "Invalid merge label register of type %#x in monolithic SWITCH instruction, expected LABEL.",
-+                instruction->src[2].reg.type);
-+    case_count = (instruction->src_count - 3) / 2;
-+    for (i = 0; i < case_count; ++i)
-     {
--        case CF_TYPE_STRUCTURED:
--            return "structured";
--        case CF_TYPE_BLOCKS:
--            return "block-based";
--        default:
--            vkd3d_unreachable();
-+        unsigned int value_idx = 3 + 2 * i;
-+        unsigned int label_idx = 3 + 2 * i + 1;
-+        if (!register_is_constant(&instruction->src[value_idx].reg))
-+            validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE,
-+                    "Invalid value register for case %u of type %#x in monolithic SWITCH instruction, "
-+                    "expected IMMCONST or IMMCONST64.", i, instruction->src[value_idx].reg.type);
-+        if (!vsir_register_is_label(&instruction->src[label_idx].reg))
-+            validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE,
-+                    "Invalid label register for case %u of type %#x in monolithic SWITCH instruction, "
-+                    "expected LABEL.", i, instruction->src[value_idx].reg.type);
-     }
-+    ctx->inside_block = false;
- }
--static void vsir_validate_cf_type(struct validation_context *ctx,
--        const struct vkd3d_shader_instruction *instruction, enum cf_type expected_type)
-+struct vsir_validator_instruction_desc
- {
--    VKD3D_ASSERT(ctx->cf_type != CF_TYPE_UNKNOWN);
--    VKD3D_ASSERT(expected_type != CF_TYPE_UNKNOWN);
--    if (ctx->cf_type != expected_type)
--        validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, "Invalid instruction %#x in %s shader.",
--                instruction->opcode, name_from_cf_type(ctx->cf_type));
-+    unsigned int dst_param_count;
-+    unsigned int src_param_count;
-+    void (*validate)(struct validation_context *ctx, const struct vkd3d_shader_instruction *instruction);
-+static const struct vsir_validator_instruction_desc vsir_validator_instructions[] =
-+    [VKD3DSIH_BRANCH] =                           {0, ~0u, vsir_validate_branch},
-+    [VKD3DSIH_HS_CONTROL_POINT_PHASE] =           {0,   0, vsir_validate_hull_shader_phase},
-+    [VKD3DSIH_HS_DECLS] =                         {0,   0, vsir_validate_hull_shader_phase},
-+    [VKD3DSIH_HS_FORK_PHASE] =                    {0,   0, vsir_validate_hull_shader_phase},
-+    [VKD3DSIH_HS_JOIN_PHASE] =                    {0,   0, vsir_validate_hull_shader_phase},
-+    [VKD3DSIH_DCL_GS_INSTANCES] =                 {0,   0, vsir_validate_dcl_gs_instances},
-+    [VKD3DSIH_DCL_HS_MAX_TESSFACTOR] =            {0,   0, vsir_validate_dcl_hs_max_tessfactor},
-+    [VKD3DSIH_DCL_INPUT_PRIMITIVE] =              {0,   0, vsir_validate_dcl_input_primitive},
-+    [VKD3DSIH_DCL_OUTPUT_CONTROL_POINT_COUNT] =   {0,   0, vsir_validate_dcl_output_control_point_count},
-+    [VKD3DSIH_DCL_OUTPUT_TOPOLOGY] =              {0,   0, vsir_validate_dcl_output_topology},
-+    [VKD3DSIH_DCL_TEMPS] =                        {0,   0, vsir_validate_dcl_temps},
-+    [VKD3DSIH_DCL_TESSELLATOR_DOMAIN] =           {0,   0, vsir_validate_dcl_tessellator_domain},
-+    [VKD3DSIH_DCL_TESSELLATOR_OUTPUT_PRIMITIVE] = {0,   0, vsir_validate_dcl_tessellator_output_primitive},
-+    [VKD3DSIH_DCL_TESSELLATOR_PARTITIONING] =     {0,   0, vsir_validate_dcl_tessellator_partitioning},
-+    [VKD3DSIH_DCL_VERTICES_OUT] =                 {0,   0, vsir_validate_dcl_vertices_out},
-+    [VKD3DSIH_ELSE] =                             {0,   0, vsir_validate_else},
-+    [VKD3DSIH_ENDIF] =                            {0,   0, vsir_validate_endif},
-+    [VKD3DSIH_ENDLOOP] =                          {0,   0, vsir_validate_endloop},
-+    [VKD3DSIH_ENDREP] =                           {0,   0, vsir_validate_endrep},
-+    [VKD3DSIH_ENDSWITCH] =                        {0,   0, vsir_validate_endswitch},
-+    [VKD3DSIH_IF] =                               {0,   1, vsir_validate_if},
-+    [VKD3DSIH_IFC] =                              {0,   2, vsir_validate_ifc},
-+    [VKD3DSIH_LABEL] =                            {0,   1, vsir_validate_label},
-+    [VKD3DSIH_LOOP] =                             {0, ~0u, vsir_validate_loop},
-+    [VKD3DSIH_NOP] =                              {0,   0, vsir_validate_nop},
-+    [VKD3DSIH_PHI] =                              {1, ~0u, vsir_validate_phi},
-+    [VKD3DSIH_REP] =                              {0,   1, vsir_validate_rep},
-+    [VKD3DSIH_RET] =                              {0,   0, vsir_validate_ret},
-+    [VKD3DSIH_SWITCH] =                           {0,   1, vsir_validate_switch},
-+    [VKD3DSIH_SWITCH_MONOLITHIC] =                {0, ~0u, vsir_validate_switch_monolithic},
- static void vsir_validate_instruction(struct validation_context *ctx)
- {
-@@ -6148,136 +8176,40 @@ static void vsir_validate_instruction(struct validation_context *ctx)
-                 instruction->opcode);
-     }
--    switch (instruction->opcode)
-+    if (version->type == VKD3D_SHADER_TYPE_HULL && ctx->phase == VKD3DSIH_INVALID)
-     {
--        case VKD3DSIH_HS_DECLS:
--        case VKD3DSIH_HS_FORK_PHASE:
--        case VKD3DSIH_HS_JOIN_PHASE:
--            vsir_validate_dst_count(ctx, instruction, 0);
--            vsir_validate_src_count(ctx, instruction, 0);
--            if (version->type != VKD3D_SHADER_TYPE_HULL)
--                validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_HANDLER,
--                        "Phase instruction %#x is only valid in a hull shader.",
--                        instruction->opcode);
--            if (ctx->depth != 0)
--                validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW,
--                        "Phase instruction %#x must appear to top level.",
--                        instruction->opcode);
--            ctx->phase = instruction->opcode;
--            ctx->dcl_temps_found = false;
--            return;
--            /* Exclude non-finite values. */
--            if (!(instruction->declaration.max_tessellation_factor >= 1.0f
--                    && instruction->declaration.max_tessellation_factor <= 64.0f))
--                validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION, "Max tessellation factor %f is invalid.",
--                        instruction->declaration.max_tessellation_factor);
--            return;
--            if (instruction->declaration.primitive_type.type == VKD3D_PT_UNDEFINED
--                    || instruction->declaration.primitive_type.type >= VKD3D_PT_COUNT)
--                validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_GS, "GS input primitive %u is invalid.",
--                        instruction->declaration.primitive_type.type);
--            return;
--            if (instruction->declaration.count > 1024)
--                validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_GS, "GS output vertex count %u is invalid.",
--                        instruction->declaration.count);
--            return;
--            if (instruction->declaration.primitive_type.type == VKD3D_PT_UNDEFINED
--                    || instruction->declaration.primitive_type.type >= VKD3D_PT_COUNT)
--                validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_GS, "GS output primitive %u is invalid.",
--                        instruction->declaration.primitive_type.type);
--            return;
--            if (!instruction->declaration.count || instruction->declaration.count > 32)
--                validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_GS, "GS instance count %u is invalid.",
--                        instruction->declaration.count);
--            return;
--            if (!instruction->declaration.count || instruction->declaration.count > 32)
--                validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION, "Output control point count %u is invalid.",
--                        instruction->declaration.count);
--            return;
--            if (instruction->declaration.tessellator_domain == VKD3D_TESSELLATOR_DOMAIN_INVALID
--                    || instruction->declaration.tessellator_domain >= VKD3D_TESSELLATOR_DOMAIN_COUNT)
--                validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION,
--                        "Tessellator domain %#x is invalid.", instruction->declaration.tessellator_domain);
--            return;
--            if (!instruction->declaration.tessellator_output_primitive
--                    || instruction->declaration.tessellator_output_primitive > VKD3D_SHADER_TESSELLATOR_OUTPUT_TRIANGLE_CCW)
--                validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION,
--                        "Tessellator output primitive %#x is invalid.", instruction->declaration.tessellator_output_primitive);
--            return;
--            if (!instruction->declaration.tessellator_partitioning
--                    || instruction->declaration.tessellator_partitioning > VKD3D_SHADER_TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN)
--                validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION,
--                        "Tessellator partitioning %#x is invalid.", instruction->declaration.tessellator_partitioning);
--            return;
--        default:
--            break;
--    }
--    /* Only DCL instructions may occur outside hull shader phases. */
--    if (!vsir_instruction_is_dcl(instruction) && version->type == VKD3D_SHADER_TYPE_HULL
--            && ctx->phase == VKD3DSIH_INVALID)
--        validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_HANDLER,
--                "Instruction %#x appear before any phase instruction in a hull shader.",
--                instruction->opcode);
-+        switch (instruction->opcode)
-+        {
-+            case VKD3DSIH_NOP:
-+            case VKD3DSIH_HS_DECLS:
-+            case VKD3DSIH_HS_FORK_PHASE:
-+            case VKD3DSIH_HS_JOIN_PHASE:
-+                break;
--    /* We support two different control flow types in shaders:
--     * block-based, like DXIL and SPIR-V, and structured, like D3DBC
--     * and TPF. The shader is detected as block-based when its first
--     * instruction, except for DCL_* and phases, is a LABEL. Currently
--     * we mandate that each shader is either purely block-based or
--     * purely structured. In principle we could allow structured
--     * constructs in a block, provided they are confined in a single
--     * block, but need for that hasn't arisen yet, so we don't. */
--    if (ctx->cf_type == CF_TYPE_UNKNOWN && !vsir_instruction_is_dcl(instruction))
--    {
--        if (instruction->opcode == VKD3DSIH_LABEL)
--            ctx->cf_type = CF_TYPE_BLOCKS;
--        else
--            ctx->cf_type = CF_TYPE_STRUCTURED;
-+            default:
-+                if (!vsir_instruction_is_dcl(instruction))
-+                    validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_HANDLER,
-+                            "Instruction %#x appear before any phase instruction in a hull shader.",
-+                            instruction->opcode);
-+                break;
-+        }
-     }
--    if (ctx->cf_type == CF_TYPE_BLOCKS && !vsir_instruction_is_dcl(instruction))
-+    if (ctx->program->cf_type == VSIR_CF_BLOCKS && !ctx->inside_block)
-     {
-         switch (instruction->opcode)
-         {
-+            case VKD3DSIH_NOP:
-             case VKD3DSIH_LABEL:
--                if (ctx->inside_block)
--                    validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, "Invalid LABEL instruction inside a block.");
--                ctx->inside_block = true;
--                break;
--            case VKD3DSIH_RET:
--            case VKD3DSIH_BRANCH:
--            case VKD3DSIH_SWITCH_MONOLITHIC:
--                if (!ctx->inside_block)
--                    validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW,
--                            "Invalid instruction %#x outside any block.",
--                            instruction->opcode);
--                ctx->inside_block = false;
-+            case VKD3DSIH_HS_DECLS:
-+            case VKD3DSIH_HS_FORK_PHASE:
-+            case VKD3DSIH_HS_JOIN_PHASE:
-                 break;
-             default:
--                if (!ctx->inside_block)
-+                if (!vsir_instruction_is_dcl(instruction))
-                     validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW,
-                             "Invalid instruction %#x outside any block.",
-                             instruction->opcode);
-@@ -6285,271 +8217,20 @@ static void vsir_validate_instruction(struct validation_context *ctx)
-         }
-     }
--    switch (instruction->opcode)
-+    if (instruction->opcode < ARRAY_SIZE(vsir_validator_instructions))
-     {
--        case VKD3DSIH_DCL_TEMPS:
--            vsir_validate_dst_count(ctx, instruction, 0);
--            vsir_validate_src_count(ctx, instruction, 0);
--            if (ctx->dcl_temps_found)
--                validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_DUPLICATE_DCL_TEMPS, "Duplicate DCL_TEMPS instruction.");
--            if (instruction->declaration.count > ctx->program->temp_count)
--                validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DCL_TEMPS,
--                        "Invalid DCL_TEMPS count %u, expected at most %u.",
--                        instruction->declaration.count, ctx->program->temp_count);
--            ctx->dcl_temps_found = true;
--            break;
--        case VKD3DSIH_IF:
--            vsir_validate_cf_type(ctx, instruction, CF_TYPE_STRUCTURED);
--            vsir_validate_dst_count(ctx, instruction, 0);
--            vsir_validate_src_count(ctx, instruction, 1);
--            if (!vkd3d_array_reserve((void **)&ctx->blocks, &ctx->blocks_capacity, ctx->depth + 1, sizeof(*ctx->blocks)))
--                return;
--            ctx->blocks[ctx->depth++] = instruction->opcode;
--            break;
--        case VKD3DSIH_IFC:
--            vsir_validate_cf_type(ctx, instruction, CF_TYPE_STRUCTURED);
--            vsir_validate_dst_count(ctx, instruction, 0);
--            vsir_validate_src_count(ctx, instruction, 2);
--            if (!vkd3d_array_reserve((void **)&ctx->blocks, &ctx->blocks_capacity, ctx->depth + 1, sizeof(*ctx->blocks)))
--                return;
--            ctx->blocks[ctx->depth++] = VKD3DSIH_IF;
--            break;
--        case VKD3DSIH_ELSE:
--            vsir_validate_cf_type(ctx, instruction, CF_TYPE_STRUCTURED);
--            vsir_validate_dst_count(ctx, instruction, 0);
--            vsir_validate_src_count(ctx, instruction, 0);
--            if (ctx->depth == 0 || ctx->blocks[ctx->depth - 1] != VKD3DSIH_IF)
--                validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, "ELSE instruction doesn't terminate IF block.");
--            else
--                ctx->blocks[ctx->depth - 1] = instruction->opcode;
--            break;
--        case VKD3DSIH_ENDIF:
--            vsir_validate_cf_type(ctx, instruction, CF_TYPE_STRUCTURED);
--            vsir_validate_dst_count(ctx, instruction, 0);
--            vsir_validate_src_count(ctx, instruction, 0);
--            if (ctx->depth == 0 || (ctx->blocks[ctx->depth - 1] != VKD3DSIH_IF && ctx->blocks[ctx->depth - 1] != VKD3DSIH_ELSE))
--                validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, "ENDIF instruction doesn't terminate IF/ELSE block.");
--            else
--                --ctx->depth;
--            break;
--        case VKD3DSIH_LOOP:
--            vsir_validate_cf_type(ctx, instruction, CF_TYPE_STRUCTURED);
--            vsir_validate_dst_count(ctx, instruction, 0);
--            vsir_validate_src_count(ctx, instruction, version->major <= 3 ? 2 : 0);
--            if (!vkd3d_array_reserve((void **)&ctx->blocks, &ctx->blocks_capacity, ctx->depth + 1, sizeof(*ctx->blocks)))
--                return;
--            ctx->blocks[ctx->depth++] = instruction->opcode;
--            break;
--        case VKD3DSIH_ENDLOOP:
--            vsir_validate_cf_type(ctx, instruction, CF_TYPE_STRUCTURED);
--            vsir_validate_dst_count(ctx, instruction, 0);
--            vsir_validate_src_count(ctx, instruction, 0);
--            if (ctx->depth == 0 || ctx->blocks[ctx->depth - 1] != VKD3DSIH_LOOP)
--                validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, "ENDLOOP instruction doesn't terminate LOOP block.");
--            else
--                --ctx->depth;
--            break;
--        case VKD3DSIH_REP:
--            vsir_validate_cf_type(ctx, instruction, CF_TYPE_STRUCTURED);
--            vsir_validate_dst_count(ctx, instruction, 0);
--            vsir_validate_src_count(ctx, instruction, 1);
--            if (!vkd3d_array_reserve((void **)&ctx->blocks, &ctx->blocks_capacity, ctx->depth + 1, sizeof(*ctx->blocks)))
--                return;
--            ctx->blocks[ctx->depth++] = instruction->opcode;
--            break;
--        case VKD3DSIH_ENDREP:
--            vsir_validate_cf_type(ctx, instruction, CF_TYPE_STRUCTURED);
--            vsir_validate_dst_count(ctx, instruction, 0);
--            vsir_validate_src_count(ctx, instruction, 0);
--            if (ctx->depth == 0 || ctx->blocks[ctx->depth - 1] != VKD3DSIH_REP)
--                validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, "ENDREP instruction doesn't terminate REP block.");
--            else
--                --ctx->depth;
--            break;
--        case VKD3DSIH_SWITCH:
--            vsir_validate_cf_type(ctx, instruction, CF_TYPE_STRUCTURED);
--            vsir_validate_dst_count(ctx, instruction, 0);
--            vsir_validate_src_count(ctx, instruction, 1);
--            if (!vkd3d_array_reserve((void **)&ctx->blocks, &ctx->blocks_capacity, ctx->depth + 1, sizeof(*ctx->blocks)))
--                return;
--            ctx->blocks[ctx->depth++] = instruction->opcode;
--            break;
--        case VKD3DSIH_ENDSWITCH:
--            vsir_validate_cf_type(ctx, instruction, CF_TYPE_STRUCTURED);
--            vsir_validate_dst_count(ctx, instruction, 0);
--            vsir_validate_src_count(ctx, instruction, 0);
--            if (ctx->depth == 0 || ctx->blocks[ctx->depth - 1] != VKD3DSIH_SWITCH)
--                validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, "ENDSWITCH instruction doesn't terminate SWITCH block.");
--            else
--                --ctx->depth;
--            break;
--        case VKD3DSIH_RET:
--            vsir_validate_dst_count(ctx, instruction, 0);
--            vsir_validate_src_count(ctx, instruction, 0);
--            break;
--        case VKD3DSIH_LABEL:
--            vsir_validate_cf_type(ctx, instruction, CF_TYPE_BLOCKS);
--            vsir_validate_dst_count(ctx, instruction, 0);
--            vsir_validate_src_count(ctx, instruction, 1);
--            if (instruction->src_count >= 1 && !vsir_register_is_label(&instruction->src[0].reg))
--                validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE,
--                        "Invalid register of type %#x in a LABEL instruction, expected LABEL.",
--                        instruction->src[0].reg.type);
--            break;
--        case VKD3DSIH_BRANCH:
--            vsir_validate_cf_type(ctx, instruction, CF_TYPE_BLOCKS);
--            vsir_validate_dst_count(ctx, instruction, 0);
--            if (!vsir_validate_src_min_count(ctx, instruction, 1))
--                break;
--            if (vsir_register_is_label(&instruction->src[0].reg))
--            {
--                /* Unconditional branch: parameters are jump label,
--                 * optional merge label, optional continue label. */
--                vsir_validate_src_max_count(ctx, instruction, 3);
--                for (i = 0; i < instruction->src_count; ++i)
--                {
--                    if (!vsir_register_is_label(&instruction->src[i].reg))
--                        validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE,
--                                "Invalid register of type %#x in unconditional BRANCH instruction, expected LABEL.",
--                                instruction->src[i].reg.type);
--                }
--            }
--            else
--            {
--                /* Conditional branch: parameters are condition, true
--                 * jump label, false jump label, optional merge label,
--                 * optional continue label. */
--                vsir_validate_src_min_count(ctx, instruction, 3);
--                vsir_validate_src_max_count(ctx, instruction, 5);
--                for (i = 1; i < instruction->src_count; ++i)
--                {
--                    if (!vsir_register_is_label(&instruction->src[i].reg))
--                        validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE,
--                                "Invalid register of type %#x in conditional BRANCH instruction, expected LABEL.",
--                                instruction->src[i].reg.type);
--                }
--            }
--            break;
--        {
--            unsigned int case_count;
-+        const struct vsir_validator_instruction_desc *desc;
--            vsir_validate_cf_type(ctx, instruction, CF_TYPE_BLOCKS);
--            vsir_validate_dst_count(ctx, instruction, 0);
--            /* Parameters are source, default label, merge label and
--             * then pairs of constant value and case label. */
--            if (!vsir_validate_src_min_count(ctx, instruction, 3))
--                break;
--            if (instruction->src_count % 2 != 1)
--                validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SOURCE_COUNT,
--                        "Invalid source count %u for a monolithic SWITCH instruction, it must be an odd number.",
--                        instruction->src_count);
--            if (!vsir_register_is_label(&instruction->src[1].reg))
--                validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE,
--                        "Invalid default label register of type %#x in monolithic SWITCH instruction, expected LABEL.",
--                        instruction->src[1].reg.type);
--            if (!vsir_register_is_label(&instruction->src[2].reg))
--                validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE,
--                        "Invalid merge label register of type %#x in monolithic SWITCH instruction, expected LABEL.",
--                        instruction->src[2].reg.type);
--            case_count = (instruction->src_count - 3) / 2;
--            for (i = 0; i < case_count; ++i)
--            {
--                unsigned int value_idx = 3 + 2 * i;
--                unsigned int label_idx = 3 + 2 * i + 1;
--                if (!register_is_constant(&instruction->src[value_idx].reg))
--                    validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE,
--                            "Invalid value register for case %zu of type %#x in monolithic SWITCH instruction, "
--                            "expected IMMCONST or IMMCONST64.", i, instruction->src[value_idx].reg.type);
--                if (!vsir_register_is_label(&instruction->src[label_idx].reg))
--                    validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE,
--                            "Invalid label register for case %zu of type %#x in monolithic SWITCH instruction, "
--                            "expected LABEL.", i, instruction->src[value_idx].reg.type);
--            }
--            break;
--        }
-+        desc = &vsir_validator_instructions[instruction->opcode];
--        case VKD3DSIH_PHI:
-+        if (desc->validate)
-         {
--            unsigned int incoming_count;
--            vsir_validate_cf_type(ctx, instruction, CF_TYPE_BLOCKS);
--            vsir_validate_dst_count(ctx, instruction, 1);
--            vsir_validate_src_min_count(ctx, instruction, 2);
--            if (instruction->src_count % 2 != 0)
--                validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SOURCE_COUNT,
--                        "Invalid source count %u for a PHI instruction, it must be an even number.",
--                        instruction->src_count);
--            incoming_count = instruction->src_count / 2;
--            if (!register_is_ssa(&instruction->dst[0].reg))
--                validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE,
--                        "Invalid destination of type %#x in PHI instruction, expected SSA.",
--                        instruction->dst[0].reg.type);
--            if (instruction->dst[0].reg.dimension != VSIR_DIMENSION_SCALAR)
--                validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION,
--                        "Invalid destination dimension %#x in PHI instruction, expected scalar.",
--                        instruction->dst[0].reg.dimension);
--            if (instruction->dst[0].modifiers != VKD3DSPDM_NONE)
--                validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_MODIFIERS,
--                        "Invalid modifiers %#x for the destination of a PHI instruction, expected none.",
--                        instruction->dst[0].modifiers);
--            if (instruction->dst[0].shift != 0)
--                validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SHIFT,
--                        "Invalid shift %#x for the destination of a PHI instruction, expected none.",
--                        instruction->dst[0].shift);
--            for (i = 0; i < incoming_count; ++i)
--            {
--                unsigned int value_idx = 2 * i;
--                unsigned int label_idx = 2 * i + 1;
--                if (!register_is_constant_or_undef(&instruction->src[value_idx].reg)
--                        && !register_is_ssa(&instruction->src[value_idx].reg))
--                    validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE,
--                            "Invalid value register for incoming %zu of type %#x in PHI instruction, "
--                            "expected SSA, IMMCONST or IMMCONST64.", i, instruction->src[value_idx].reg.type);
--                if (instruction->src[value_idx].reg.dimension != VSIR_DIMENSION_SCALAR)
--                    validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION,
--                            "Invalid value dimension %#x for incoming %zu in PHI instruction, expected scalar.",
--                            instruction->src[value_idx].reg.dimension, i);
--                if (!vsir_register_is_label(&instruction->src[label_idx].reg))
--                    validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE,
--                            "Invalid label register for case %zu of type %#x in PHI instruction, "
--                            "expected LABEL.", i, instruction->src[value_idx].reg.type);
--            }
--            break;
-+            if (desc->dst_param_count != ~0u)
-+                vsir_validate_dst_count(ctx, instruction, desc->dst_param_count);
-+            if (desc->src_param_count != ~0u)
-+                vsir_validate_src_count(ctx, instruction, desc->src_param_count);
-+            desc->validate(ctx, instruction);
-         }
--        default:
--            break;
-     }
- }
-@@ -6563,19 +8244,84 @@ enum vkd3d_result vsir_program_validate(struct vsir_program *program, uint64_t c
-         .null_location = {.source_name = source_name},
-         .status = VKD3D_OK,
-         .phase = VKD3DSIH_INVALID,
-+        .invalid_instruction_idx = true,
-+        .outer_tess_idxs[0] = ~0u,
-+        .outer_tess_idxs[1] = ~0u,
-+        .outer_tess_idxs[2] = ~0u,
-+        .outer_tess_idxs[3] = ~0u,
-+        .inner_tess_idxs[0] = ~0u,
-+        .inner_tess_idxs[1] = ~0u,
-     };
-     unsigned int i;
-     if (!(config_flags & VKD3D_SHADER_CONFIG_FLAG_FORCE_VALIDATION))
-         return VKD3D_OK;
-+    switch (program->shader_version.type)
-+    {
-+        case VKD3D_SHADER_TYPE_HULL:
-+        case VKD3D_SHADER_TYPE_DOMAIN:
-+            if (program->tess_domain == VKD3D_TESSELLATOR_DOMAIN_INVALID
-+                    || program->tess_domain >= VKD3D_TESSELLATOR_DOMAIN_COUNT)
-+                validator_error(&ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION,
-+                        "Invalid tessellation domain %#x.", program->tess_domain);
-+            break;
-+        default:
-+            if (program->patch_constant_signature.element_count != 0)
-+                validator_error(&ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE,
-+                        "Patch constant signature is only valid for hull and domain shaders.");
-+            if (program->tess_domain != VKD3D_TESSELLATOR_DOMAIN_INVALID)
-+                validator_error(&ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION,
-+                        "Invalid tessellation domain %#x.", program->tess_domain);
-+    }
-+    switch (program->shader_version.type)
-+    {
-+        case VKD3D_SHADER_TYPE_DOMAIN:
-+            break;
-+        case VKD3D_SHADER_TYPE_HULL:
-+            if (program->input_control_point_count == 0)
-+                validator_error(&ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE,
-+                        "Invalid zero input control point count.");
-+            break;
-+        default:
-+            if (program->input_control_point_count != 0)
-+                validator_error(&ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE,
-+                        "Invalid input control point count %u.",
-+                        program->input_control_point_count);
-+    }
-+    switch (program->shader_version.type)
-+    {
-+        case VKD3D_SHADER_TYPE_HULL:
-+            break;
-+        default:
-+            if (program->output_control_point_count != 0)
-+                validator_error(&ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE,
-+                        "Invalid output control point count %u.",
-+                        program->output_control_point_count);
-+    }
-+    vsir_validate_signature(&ctx, &program->input_signature, SIGNATURE_TYPE_INPUT);
-+    vsir_validate_signature(&ctx, &program->output_signature, SIGNATURE_TYPE_OUTPUT);
-+    vsir_validate_signature(&ctx, &program->patch_constant_signature, SIGNATURE_TYPE_PATCH_CONSTANT);
-     if (!(ctx.temps = vkd3d_calloc(ctx.program->temp_count, sizeof(*ctx.temps))))
-         goto fail;
-     if (!(ctx.ssas = vkd3d_calloc(ctx.program->ssa_count, sizeof(*ctx.ssas))))
-         goto fail;
--    for (ctx.instruction_idx = 0; ctx.instruction_idx < program->instructions.count; ++ctx.instruction_idx)
-+    ctx.invalid_instruction_idx = false;
-+    for (ctx.instruction_idx = 0; ctx.instruction_idx < program->instructions.count
-+            && ctx.status != VKD3D_ERROR_OUT_OF_MEMORY; ++ctx.instruction_idx)
-         vsir_validate_instruction(&ctx);
-     ctx.invalid_instruction_idx = true;
-@@ -6610,74 +8356,107 @@ fail:
- }
--enum vkd3d_result vsir_program_normalise(struct vsir_program *program, uint64_t config_flags,
--        const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context)
-+#define vsir_transform(ctx, step) vsir_transform_(ctx, #step, step)
-+static void vsir_transform_(
-+        struct vsir_transformation_context *ctx, const char *step_name,
-+        enum vkd3d_result (*step)(struct vsir_program *program, struct vsir_transformation_context *ctx))
- {
--    enum vkd3d_result result = VKD3D_OK;
-+    if (ctx->result < 0)
-+        return;
--    if ((result = vsir_program_lower_instructions(program, message_context)) < 0)
--        return result;
-+    if ((ctx->result = step(ctx->program, ctx)) < 0)
-+    {
-+        WARN("Transformation \"%s\" failed with result %d.\n", step_name, ctx->result);
-+        return;
-+    }
--    if (program->shader_version.major >= 6)
-+    if ((ctx->result = vsir_program_validate(ctx->program, ctx->config_flags,
-+            ctx->compile_info->source_name, ctx->message_context)) < 0)
-+    {
-+        WARN("Validation failed with result %d after transformation \"%s\".\n", ctx->result, step_name);
-+        return;
-+    }
-+/* Transformations which should happen at parse time, i.e. before scan
-+ * information is returned to the user.
-+ *
-+ * In particular, some passes need to modify the signature, and
-+ * vkd3d_shader_scan() should report the modified signature for the given
-+ * target. */
-+enum vkd3d_result vsir_program_transform_early(struct vsir_program *program, uint64_t config_flags,
-+        const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context)
-+    struct vsir_transformation_context ctx =
-     {
--        if ((result = vsir_program_materialise_phi_ssas_to_temps(program)) < 0)
--            return result;
-+        .result = VKD3D_OK,
-+        .program = program,
-+        .config_flags = config_flags,
-+        .compile_info = compile_info,
-+        .message_context = message_context,
-+    };
--        if ((result = lower_switch_to_if_ladder(program)) < 0)
--            return result;
-+    /* For vsir_program_ensure_diffuse(). */
-+    if (program->shader_version.major <= 2)
-+        vsir_transform(&ctx, vsir_program_add_diffuse_output);
--        if ((result = vsir_program_structurize(program, message_context)) < 0)
--            return result;
-+    return ctx.result;
-+enum vkd3d_result vsir_program_transform(struct vsir_program *program, uint64_t config_flags,
-+        const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context)
-+    struct vsir_transformation_context ctx =
-+    {
-+        .result = VKD3D_OK,
-+        .program = program,
-+        .config_flags = config_flags,
-+        .compile_info = compile_info,
-+        .message_context = message_context,
-+    };
--        if ((result = vsir_program_flatten_control_flow_constructs(program, message_context)) < 0)
--            return result;
-+    vsir_transform(&ctx, vsir_program_lower_instructions);
--        if ((result = vsir_program_materialize_undominated_ssas_to_temps(program, message_context)) < 0)
--            return result;
-+    if (program->shader_version.major >= 6)
-+    {
-+        vsir_transform(&ctx, vsir_program_materialise_phi_ssas_to_temps);
-+        vsir_transform(&ctx, vsir_program_lower_switch_to_selection_ladder);
-+        vsir_transform(&ctx, vsir_program_structurize);
-+        vsir_transform(&ctx, vsir_program_flatten_control_flow_constructs);
-+        vsir_transform(&ctx, vsir_program_materialize_undominated_ssas_to_temps);
-     }
-     else
-     {
--        if (program->shader_version.type != VKD3D_SHADER_TYPE_PIXEL)
--        {
--            if ((result = vsir_program_remap_output_signature(program, compile_info, message_context)) < 0)
--                return result;
--        }
--        if (program->shader_version.type == VKD3D_SHADER_TYPE_HULL)
--        {
--            if ((result = instruction_array_flatten_hull_shader_phases(&program->instructions)) < 0)
--                return result;
-+        vsir_transform(&ctx, vsir_program_ensure_ret);
--            if ((result = instruction_array_normalise_hull_shader_control_point_io(&program->instructions,
--                    &program->input_signature)) < 0)
--                return result;
--        }
--        if ((result = vsir_program_normalise_io_registers(program, message_context)) < 0)
--            return result;
-+        if (program->shader_version.major <= 2)
-+            vsir_transform(&ctx, vsir_program_ensure_diffuse);
--        if ((result = instruction_array_normalise_flat_constants(program)) < 0)
--            return result;
-+        if (program->shader_version.type != VKD3D_SHADER_TYPE_PIXEL)
-+            vsir_transform(&ctx, vsir_program_remap_output_signature);
--        remove_dead_code(program);
-+        if (program->shader_version.type == VKD3D_SHADER_TYPE_HULL)
-+            vsir_transform(&ctx, vsir_program_flatten_hull_shader_phases);
--        if ((result = vsir_program_normalise_combined_samplers(program, message_context)) < 0)
--            return result;
-+        vsir_transform(&ctx, instruction_array_normalise_hull_shader_control_point_io);
-+        vsir_transform(&ctx, vsir_program_normalise_io_registers);
-+        vsir_transform(&ctx, vsir_program_normalise_flat_constants);
-+        vsir_transform(&ctx, vsir_program_remove_dead_code);
-         if (compile_info->target_type != VKD3D_SHADER_TARGET_GLSL
--                && (result = vsir_program_flatten_control_flow_constructs(program, message_context)) < 0)
--            return result;
-+                && compile_info->target_type != VKD3D_SHADER_TARGET_MSL)
-+            vsir_transform(&ctx, vsir_program_flatten_control_flow_constructs);
-     }
--    if ((result = vsir_program_insert_alpha_test(program, message_context)) < 0)
--        return result;
-+    vsir_transform(&ctx, vsir_program_apply_flat_interpolation);
-+    vsir_transform(&ctx, vsir_program_insert_alpha_test);
-+    vsir_transform(&ctx, vsir_program_insert_clip_planes);
-+    vsir_transform(&ctx, vsir_program_insert_point_size);
-+    vsir_transform(&ctx, vsir_program_insert_point_size_clamp);
-+    vsir_transform(&ctx, vsir_program_insert_point_coord);
-     if (TRACE_ON())
--        vkd3d_shader_trace(program);
--    if ((result = vsir_program_validate(program, config_flags,
--            compile_info->source_name, message_context)) < 0)
--        return result;
-+        vsir_program_trace(program);
--    return result;
-+    return ctx.result;
- }
-diff --git a/libs/vkd3d/libs/vkd3d-shader/msl.c b/libs/vkd3d/libs/vkd3d-shader/msl.c
-new file mode 100644
-index 00000000000..df3edeaa4e6
---- /dev/null
-+++ b/libs/vkd3d/libs/vkd3d-shader/msl.c
-@@ -0,0 +1,898 @@
-+ * Copyright 2024 Feifan He for CodeWeavers
-+ *
-+ * This library is free software; you can redistribute it and/or
-+ * modify it under the terms of the GNU Lesser General Public
-+ * License as published by the Free Software Foundation; either
-+ * version 2.1 of the License, or (at your option) any later version.
-+ *
-+ * This library is distributed in the hope that it will be useful,
-+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
-+ * Lesser General Public License for more details.
-+ *
-+ * You should have received a copy of the GNU Lesser General Public
-+ * License along with this library; if not, write to the Free Software
-+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
-+ */
-+#include "vkd3d_shader_private.h"
-+struct msl_src
-+    struct vkd3d_string_buffer *str;
-+struct msl_dst
-+    const struct vkd3d_shader_dst_param *vsir;
-+    struct vkd3d_string_buffer *register_name;
-+    struct vkd3d_string_buffer *mask;
-+struct msl_generator
-+    struct vsir_program *program;
-+    struct vkd3d_string_buffer_cache string_buffers;
-+    struct vkd3d_string_buffer *buffer;
-+    struct vkd3d_shader_location location;
-+    struct vkd3d_shader_message_context *message_context;
-+    unsigned int indent;
-+    const char *prefix;
-+    bool failed;
-+    const struct vkd3d_shader_interface_info *interface_info;
-+    const struct vkd3d_shader_scan_descriptor_info1 *descriptor_info;
-+static void VKD3D_PRINTF_FUNC(3, 4) msl_compiler_error(struct msl_generator *gen,
-+        enum vkd3d_shader_error error, const char *fmt, ...)
-+    va_list args;
-+    va_start(args, fmt);
-+    vkd3d_shader_verror(gen->message_context, &gen->location, error, fmt, args);
-+    va_end(args);
-+    gen->failed = true;
-+static const char *msl_get_prefix(enum vkd3d_shader_type type)
-+    switch (type)
-+    {
-+        case VKD3D_SHADER_TYPE_VERTEX:
-+            return "vs";
-+        case VKD3D_SHADER_TYPE_HULL:
-+            return "hs";
-+        case VKD3D_SHADER_TYPE_DOMAIN:
-+            return "ds";
-+            return "gs";
-+        case VKD3D_SHADER_TYPE_PIXEL:
-+            return "ps";
-+            return "cs";
-+        default:
-+            return NULL;
-+    }
-+static void msl_print_indent(struct vkd3d_string_buffer *buffer, unsigned int indent)
-+    vkd3d_string_buffer_printf(buffer, "%*s", 4 * indent, "");
-+static void msl_print_register_datatype(struct vkd3d_string_buffer *buffer,
-+        struct msl_generator *gen, enum vkd3d_data_type data_type)
-+    vkd3d_string_buffer_printf(buffer, ".");
-+    switch (data_type)
-+    {
-+        case VKD3D_DATA_FLOAT:
-+            vkd3d_string_buffer_printf(buffer, "f");
-+            break;
-+        case VKD3D_DATA_INT:
-+            vkd3d_string_buffer_printf(buffer, "i");
-+            break;
-+        case VKD3D_DATA_UINT:
-+            vkd3d_string_buffer_printf(buffer, "u");
-+            break;
-+        default:
-+            msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL,
-+                    "Internal compiler error: Unhandled register datatype %#x.", data_type);
-+            vkd3d_string_buffer_printf(buffer, "<unrecognised register datatype %#x>", data_type);
-+            break;
-+    }
-+static void msl_print_register_name(struct vkd3d_string_buffer *buffer,
-+        struct msl_generator *gen, const struct vkd3d_shader_register *reg)
-+    switch (reg->type)
-+    {
-+        case VKD3DSPR_TEMP:
-+            vkd3d_string_buffer_printf(buffer, "r[%u]", reg->idx[0].offset);
-+            msl_print_register_datatype(buffer, gen, reg->data_type);
-+            break;
-+        case VKD3DSPR_INPUT:
-+            if (reg->idx_count != 1)
-+            {
-+                msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL,
-+                        "Internal compiler error: Unhandled input register index count %u.", reg->idx_count);
-+                vkd3d_string_buffer_printf(buffer, "<unhandled register %#x>", reg->type);
-+                break;
-+            }
-+            if (reg->idx[0].rel_addr)
-+            {
-+                msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL,
-+                        "Internal compiler error: Unhandled input register indirect addressing.");
-+                vkd3d_string_buffer_printf(buffer, "<unhandled register %#x>", reg->type);
-+                break;
-+            }
-+            vkd3d_string_buffer_printf(buffer, "v[%u]", reg->idx[0].offset);
-+            msl_print_register_datatype(buffer, gen, reg->data_type);
-+            break;
-+        case VKD3DSPR_OUTPUT:
-+            if (reg->idx_count != 1)
-+            {
-+                msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL,
-+                        "Internal compiler error: Unhandled output register index count %u.", reg->idx_count);
-+                vkd3d_string_buffer_printf(buffer, "<unhandled register %#x>", reg->type);
-+                break;
-+            }
-+            if (reg->idx[0].rel_addr)
-+            {
-+                msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL,
-+                        "Internal compiler error: Unhandled output register indirect addressing.");
-+                vkd3d_string_buffer_printf(buffer, "<unhandled register %#x>", reg->type);
-+                break;
-+            }
-+            vkd3d_string_buffer_printf(buffer, "o[%u]", reg->idx[0].offset);
-+            msl_print_register_datatype(buffer, gen, reg->data_type);
-+            break;
-+        case VKD3DSPR_CONSTBUFFER:
-+            if (reg->idx_count != 3)
-+            {
-+                msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL,
-+                        "Internal compiler error: Unhandled constant buffer register index count %u.", reg->idx_count);
-+                vkd3d_string_buffer_printf(buffer, "<unhandled register %#x>", reg->type);
-+                break;
-+            }
-+            if (reg->idx[0].rel_addr || reg->idx[2].rel_addr)
-+            {
-+                msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL,
-+                        "Internal compiler error: Unhandled constant buffer register indirect addressing.");
-+                vkd3d_string_buffer_printf(buffer, "<unhandled register %#x>", reg->type);
-+                break;
-+            }
-+            vkd3d_string_buffer_printf(buffer, "descriptors.cb_%u[%u]", reg->idx[0].offset, reg->idx[2].offset);
-+            msl_print_register_datatype(buffer, gen, reg->data_type);
-+            break;
-+        default:
-+            msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL,
-+                    "Internal compiler error: Unhandled register type %#x.", reg->type);
-+            vkd3d_string_buffer_printf(buffer, "<unrecognised register %#x>", reg->type);
-+            break;
-+    }
-+static void msl_print_swizzle(struct vkd3d_string_buffer *buffer, uint32_t swizzle, uint32_t mask)
-+    const char swizzle_chars[] = "xyzw";
-+    unsigned int i;
-+    vkd3d_string_buffer_printf(buffer, ".");
-+    for (i = 0; i < VKD3D_VEC4_SIZE; ++i)
-+    {
-+        if (mask & (VKD3DSP_WRITEMASK_0 << i))
-+            vkd3d_string_buffer_printf(buffer, "%c", swizzle_chars[vsir_swizzle_get_component(swizzle, i)]);
-+    }
-+static void msl_print_write_mask(struct vkd3d_string_buffer *buffer, uint32_t write_mask)
-+    vkd3d_string_buffer_printf(buffer, ".");
-+    if (write_mask & VKD3DSP_WRITEMASK_0)
-+        vkd3d_string_buffer_printf(buffer, "x");
-+    if (write_mask & VKD3DSP_WRITEMASK_1)
-+        vkd3d_string_buffer_printf(buffer, "y");
-+    if (write_mask & VKD3DSP_WRITEMASK_2)
-+        vkd3d_string_buffer_printf(buffer, "z");
-+    if (write_mask & VKD3DSP_WRITEMASK_3)
-+        vkd3d_string_buffer_printf(buffer, "w");
-+static void msl_src_cleanup(struct msl_src *src, struct vkd3d_string_buffer_cache *cache)
-+    vkd3d_string_buffer_release(cache, src->str);
-+static void msl_src_init(struct msl_src *msl_src, struct msl_generator *gen,
-+        const struct vkd3d_shader_src_param *vsir_src, uint32_t mask)
-+    const struct vkd3d_shader_register *reg = &vsir_src->reg;
-+    msl_src->str = vkd3d_string_buffer_get(&gen->string_buffers);
-+    if (reg->non_uniform)
-+        msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL,
-+                "Internal compiler error: Unhandled 'non-uniform' modifier.");
-+    if (vsir_src->modifiers)
-+        msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL,
-+                "Internal compiler error: Unhandled source modifier(s) %#x.", vsir_src->modifiers);
-+    msl_print_register_name(msl_src->str, gen, reg);
-+    if (reg->dimension == VSIR_DIMENSION_VEC4)
-+        msl_print_swizzle(msl_src->str, vsir_src->swizzle, mask);
-+static void msl_dst_cleanup(struct msl_dst *dst, struct vkd3d_string_buffer_cache *cache)
-+    vkd3d_string_buffer_release(cache, dst->mask);
-+    vkd3d_string_buffer_release(cache, dst->register_name);
-+static uint32_t msl_dst_init(struct msl_dst *msl_dst, struct msl_generator *gen,
-+        const struct vkd3d_shader_instruction *ins, const struct vkd3d_shader_dst_param *vsir_dst)
-+    uint32_t write_mask = vsir_dst->write_mask;
-+    if (ins->flags & VKD3DSI_PRECISE_XYZW)
-+        msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL,
-+                "Internal compiler error: Unhandled 'precise' modifier.");
-+    if (vsir_dst->reg.non_uniform)
-+        msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL,
-+                "Internal compiler error: Unhandled 'non-uniform' modifier.");
-+    msl_dst->vsir = vsir_dst;
-+    msl_dst->register_name = vkd3d_string_buffer_get(&gen->string_buffers);
-+    msl_dst->mask = vkd3d_string_buffer_get(&gen->string_buffers);
-+    msl_print_register_name(msl_dst->register_name, gen, &vsir_dst->reg);
-+    msl_print_write_mask(msl_dst->mask, write_mask);
-+    return write_mask;
-+static void VKD3D_PRINTF_FUNC(3, 4) msl_print_assignment(
-+        struct msl_generator *gen, struct msl_dst *dst, const char *format, ...)
-+    va_list args;
-+    if (dst->vsir->shift)
-+        msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL,
-+                "Internal compiler error: Unhandled destination shift %#x.", dst->vsir->shift);
-+    if (dst->vsir->modifiers)
-+        msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL,
-+                "Internal compiler error: Unhandled destination modifier(s) %#x.", dst->vsir->modifiers);
-+    msl_print_indent(gen->buffer, gen->indent);
-+    vkd3d_string_buffer_printf(gen->buffer, "%s%s = ", dst->register_name->buffer, dst->mask->buffer);
-+    va_start(args, format);
-+    vkd3d_string_buffer_vprintf(gen->buffer, format, args);
-+    va_end(args);
-+    vkd3d_string_buffer_printf(gen->buffer, ";\n");
-+static void msl_unhandled(struct msl_generator *gen, const struct vkd3d_shader_instruction *ins)
-+    msl_print_indent(gen->buffer, gen->indent);
-+    vkd3d_string_buffer_printf(gen->buffer, "/* <unhandled instruction %#x> */\n", ins->opcode);
-+    msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL,
-+            "Internal compiler error: Unhandled instruction %#x.", ins->opcode);
-+static void msl_mov(struct msl_generator *gen, const struct vkd3d_shader_instruction *ins)
-+    struct msl_src src;
-+    struct msl_dst dst;
-+    uint32_t mask;
-+    mask = msl_dst_init(&dst, gen, ins, &ins->dst[0]);
-+    msl_src_init(&src, gen, &ins->src[0], mask);
-+    msl_print_assignment(gen, &dst, "%s", src.str->buffer);
-+    msl_src_cleanup(&src, &gen->string_buffers);
-+    msl_dst_cleanup(&dst, &gen->string_buffers);
-+static void msl_ret(struct msl_generator *gen, const struct vkd3d_shader_instruction *ins)
-+    msl_print_indent(gen->buffer, gen->indent);
-+    vkd3d_string_buffer_printf(gen->buffer, "return;\n");
-+static void msl_handle_instruction(struct msl_generator *gen, const struct vkd3d_shader_instruction *ins)
-+    gen->location = ins->location;
-+    switch (ins->opcode)
-+    {
-+        case VKD3DSIH_DCL_INPUT:
-+        case VKD3DSIH_DCL_OUTPUT:
-+        case VKD3DSIH_DCL_OUTPUT_SIV:
-+        case VKD3DSIH_NOP:
-+            break;
-+        case VKD3DSIH_MOV:
-+            msl_mov(gen, ins);
-+            break;
-+        case VKD3DSIH_RET:
-+            msl_ret(gen, ins);
-+            break;
-+        default:
-+            msl_unhandled(gen, ins);
-+            break;
-+    }
-+static bool msl_check_shader_visibility(const struct msl_generator *gen,
-+        enum vkd3d_shader_visibility visibility)
-+    enum vkd3d_shader_type t = gen->program->shader_version.type;
-+    switch (visibility)
-+    {
-+            return true;
-+            return t == VKD3D_SHADER_TYPE_VERTEX;
-+            return t == VKD3D_SHADER_TYPE_HULL;
-+            return t == VKD3D_SHADER_TYPE_DOMAIN;
-+            return t == VKD3D_SHADER_TYPE_GEOMETRY;
-+            return t == VKD3D_SHADER_TYPE_PIXEL;
-+            return t == VKD3D_SHADER_TYPE_COMPUTE;
-+        default:
-+            WARN("Invalid shader visibility %#x.\n", visibility);
-+            return false;
-+    }
-+static bool msl_get_cbv_binding(const struct msl_generator *gen,
-+        unsigned int register_space, unsigned int register_idx, unsigned int *binding_idx)
-+    const struct vkd3d_shader_interface_info *interface_info = gen->interface_info;
-+    const struct vkd3d_shader_resource_binding *binding;
-+    unsigned int i;
-+    if (!interface_info)
-+        return false;
-+    for (i = 0; i < interface_info->binding_count; ++i)
-+    {
-+        binding = &interface_info->bindings[i];
-+        if (binding->type != VKD3D_SHADER_DESCRIPTOR_TYPE_CBV)
-+            continue;
-+        if (binding->register_space != register_space)
-+            continue;
-+        if (binding->register_index != register_idx)
-+            continue;
-+        if (!msl_check_shader_visibility(gen, binding->shader_visibility))
-+            continue;
-+        if (!(binding->flags & VKD3D_SHADER_BINDING_FLAG_BUFFER))
-+            continue;
-+        *binding_idx = i;
-+        return true;
-+    }
-+    return false;
-+static void msl_generate_cbv_declaration(struct msl_generator *gen,
-+        const struct vkd3d_shader_descriptor_info1 *cbv)
-+    const struct vkd3d_shader_descriptor_binding *binding;
-+    struct vkd3d_string_buffer *buffer = gen->buffer;
-+    unsigned int binding_idx;
-+    size_t size;
-+    if (cbv->count != 1)
-+    {
-+        msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_BINDING_NOT_FOUND,
-+                "Constant buffer %u has unsupported descriptor array size %u.", cbv->register_id, cbv->count);
-+        return;
-+    }
-+    if (!msl_get_cbv_binding(gen, cbv->register_space, cbv->register_index, &binding_idx))
-+    {
-+        msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_BINDING_NOT_FOUND,
-+                "No descriptor binding specified for constant buffer %u.", cbv->register_id);
-+        return;
-+    }
-+    binding = &gen->interface_info->bindings[binding_idx].binding;
-+    if (binding->set != 0)
-+    {
-+        msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_BINDING_NOT_FOUND,
-+                "Unsupported binding set %u specified for constant buffer %u.", binding->set, cbv->register_id);
-+        return;
-+    }
-+    if (binding->count != 1)
-+    {
-+        msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_BINDING_NOT_FOUND,
-+                "Unsupported binding count %u specified for constant buffer %u.", binding->count, cbv->register_id);
-+        return;
-+    }
-+    size = align(cbv->buffer_size, VKD3D_VEC4_SIZE * sizeof(uint32_t));
-+    size /= VKD3D_VEC4_SIZE * sizeof(uint32_t);
-+    vkd3d_string_buffer_printf(buffer,
-+            "constant vkd3d_vec4 *cb_%u [[id(%u)]];", cbv->register_id, binding->binding);
-+static void msl_generate_descriptor_struct_declarations(struct msl_generator *gen)
-+    const struct vkd3d_shader_scan_descriptor_info1 *info = gen->descriptor_info;
-+    const struct vkd3d_shader_descriptor_info1 *descriptor;
-+    struct vkd3d_string_buffer *buffer = gen->buffer;
-+    unsigned int i;
-+    if (!info->descriptor_count)
-+        return;
-+    vkd3d_string_buffer_printf(buffer, "struct vkd3d_%s_descriptors\n{\n", gen->prefix);
-+    for (i = 0; i < info->descriptor_count; ++i)
-+    {
-+        descriptor = &info->descriptors[i];
-+        msl_print_indent(buffer, 1);
-+        switch (descriptor->type)
-+        {
-+                msl_generate_cbv_declaration(gen, descriptor);
-+                break;
-+            default:
-+                vkd3d_string_buffer_printf(buffer, "/* <unhandled descriptor type %#x> */", descriptor->type);
-+                msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL,
-+                        "Internal compiler error: Unhandled descriptor type %#x.", descriptor->type);
-+                break;
-+        }
-+        vkd3d_string_buffer_printf(buffer, "\n");
-+    }
-+    vkd3d_string_buffer_printf(buffer, "};\n\n");
-+static void msl_generate_input_struct_declarations(struct msl_generator *gen)
-+    const struct shader_signature *signature = &gen->program->input_signature;
-+    enum vkd3d_shader_type type = gen->program->shader_version.type;
-+    struct vkd3d_string_buffer *buffer = gen->buffer;
-+    const struct signature_element *e;
-+    unsigned int i;
-+    vkd3d_string_buffer_printf(buffer, "struct vkd3d_%s_in\n{\n", gen->prefix);
-+    for (i = 0; i < signature->element_count; ++i)
-+    {
-+        e = &signature->elements[i];
-+        if (e->target_location == SIGNATURE_TARGET_LOCATION_UNUSED)
-+            continue;
-+        if (e->sysval_semantic)
-+        {
-+            msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL,
-+                    "Internal compiler error: Unhandled system value %#x.", e->sysval_semantic);
-+            continue;
-+        }
-+        if (e->min_precision != VKD3D_SHADER_MINIMUM_PRECISION_NONE)
-+        {
-+            msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL,
-+                    "Internal compiler error: Unhandled minimum precision %#x.", e->min_precision);
-+            continue;
-+        }
-+        if (e->interpolation_mode != VKD3DSIM_NONE)
-+        {
-+            msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL,
-+                    "Internal compiler error: Unhandled interpolation mode %#x.", e->interpolation_mode);
-+            continue;
-+        }
-+        if(e->register_count > 1)
-+        {
-+            msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL,
-+                    "Internal compiler error: Unhandled register count %u.", e->register_count);
-+            continue;
-+        }
-+        msl_print_indent(gen->buffer, 1);
-+        switch(e->component_type)
-+        {
-+            case VKD3D_SHADER_COMPONENT_FLOAT:
-+                vkd3d_string_buffer_printf(buffer, "float4 ");
-+                break;
-+            case VKD3D_SHADER_COMPONENT_INT:
-+                vkd3d_string_buffer_printf(buffer, "int4 ");
-+                break;
-+            case VKD3D_SHADER_COMPONENT_UINT:
-+                vkd3d_string_buffer_printf(buffer, "uint4 ");
-+                break;
-+            default:
-+                vkd3d_string_buffer_printf(buffer, "<unhandled component type %#x> ", e->component_type);
-+                msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL,
-+                        "Internal compiler error: Unhandled component type %#x.", e->component_type);
-+                break;
-+        }
-+        vkd3d_string_buffer_printf(buffer, "shader_in_%u ", i);
-+        switch (type)
-+        {
-+            case VKD3D_SHADER_TYPE_VERTEX:
-+                vkd3d_string_buffer_printf(gen->buffer, "[[attribute(%u)]]", e->target_location);
-+                break;
-+            case VKD3D_SHADER_TYPE_PIXEL:
-+                vkd3d_string_buffer_printf(gen->buffer, "[[user(locn%u)]]", e->target_location);
-+                break;
-+            default:
-+                msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL,
-+                        "Internal compiler error: Unhandled shader type %#x.", type);
-+                break;
-+        }
-+        vkd3d_string_buffer_printf(buffer, ";\n");
-+    }
-+    vkd3d_string_buffer_printf(buffer, "};\n\n");
-+static void msl_generate_vertex_output_element_attribute(struct msl_generator *gen, const struct signature_element *e)
-+    switch (e->sysval_semantic)
-+    {
-+        case VKD3D_SHADER_SV_POSITION:
-+            vkd3d_string_buffer_printf(gen->buffer, "[[position]]");
-+            break;
-+        case VKD3D_SHADER_SV_NONE:
-+            vkd3d_string_buffer_printf(gen->buffer, "[[user(locn%u)]]", e->target_location);
-+            break;
-+        default:
-+            msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL,
-+                    "Internal compiler error: Unhandled vertex shader system value %#x.", e->sysval_semantic);
-+            break;
-+    }
-+static void msl_generate_pixel_output_element_attribute(struct msl_generator *gen, const struct signature_element *e)
-+    switch (e->sysval_semantic)
-+    {
-+        case VKD3D_SHADER_SV_TARGET:
-+            vkd3d_string_buffer_printf(gen->buffer, "[[color(%u)]]", e->target_location);
-+            break;
-+        default:
-+            msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL,
-+                    "Internal compiler error: Unhandled pixel shader system value %#x.", e->sysval_semantic);
-+            break;
-+    }
-+static void msl_generate_output_struct_declarations(struct msl_generator *gen)
-+    const struct shader_signature *signature = &gen->program->output_signature;
-+    enum vkd3d_shader_type type = gen->program->shader_version.type;
-+    struct vkd3d_string_buffer *buffer = gen->buffer;
-+    const struct signature_element *e;
-+    unsigned int i;
-+    vkd3d_string_buffer_printf(buffer, "struct vkd3d_%s_out\n{\n", gen->prefix);
-+    for (i = 0; i < signature->element_count; ++i)
-+    {
-+        e = &signature->elements[i];
-+        if (e->target_location == SIGNATURE_TARGET_LOCATION_UNUSED)
-+            continue;
-+        if (e->min_precision != VKD3D_SHADER_MINIMUM_PRECISION_NONE)
-+        {
-+            msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL,
-+                    "Internal compiler error: Unhandled minimum precision %#x.", e->min_precision);
-+            continue;
-+        }
-+        if (e->interpolation_mode != VKD3DSIM_NONE)
-+        {
-+            msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL,
-+                    "Internal compiler error: Unhandled interpolation mode %#x.", e->interpolation_mode);
-+            continue;
-+        }
-+        if(e->register_count > 1)
-+        {
-+            msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL,
-+                    "Internal compiler error: Unhandled register count %u.", e->register_count);
-+            continue;
-+        }
-+        msl_print_indent(gen->buffer, 1);
-+        switch(e->component_type)
-+        {
-+            case VKD3D_SHADER_COMPONENT_FLOAT:
-+                vkd3d_string_buffer_printf(buffer, "float4 ");
-+                break;
-+            case VKD3D_SHADER_COMPONENT_INT:
-+                vkd3d_string_buffer_printf(buffer, "int4 ");
-+                break;
-+            case VKD3D_SHADER_COMPONENT_UINT:
-+                vkd3d_string_buffer_printf(buffer, "uint4 ");
-+                break;
-+            default:
-+                vkd3d_string_buffer_printf(buffer, "<unhandled component type %#x> ", e->component_type);
-+                msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL,
-+                        "Internal compiler error: Unhandled component type %#x.", e->component_type);
-+                break;
-+        }
-+        vkd3d_string_buffer_printf(buffer, "shader_out_%u ", i);
-+        switch (type)
-+        {
-+            case VKD3D_SHADER_TYPE_VERTEX:
-+                msl_generate_vertex_output_element_attribute(gen, e);
-+                break;
-+            case VKD3D_SHADER_TYPE_PIXEL:
-+                msl_generate_pixel_output_element_attribute(gen, e);
-+                break;
-+            default:
-+                msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL,
-+                        "Internal compiler error: Unhandled shader type %#x.", type);
-+                break;
-+        }
-+        vkd3d_string_buffer_printf(buffer, ";\n");
-+    }
-+    vkd3d_string_buffer_printf(buffer, "};\n\n");
-+static void msl_generate_entrypoint_prologue(struct msl_generator *gen)
-+    const struct shader_signature *signature = &gen->program->input_signature;
-+    struct vkd3d_string_buffer *buffer = gen->buffer;
-+    const struct signature_element *e;
-+    unsigned int i;
-+    for (i = 0; i < signature->element_count; ++i)
-+    {
-+        e = &signature->elements[i];
-+        if (e->target_location == SIGNATURE_TARGET_LOCATION_UNUSED)
-+            continue;
-+        vkd3d_string_buffer_printf(buffer, "    %s_in[%u]", gen->prefix, e->register_index);
-+        if (e->sysval_semantic == VKD3D_SHADER_SV_NONE)
-+        {
-+            msl_print_register_datatype(buffer, gen, vkd3d_data_type_from_component_type(e->component_type));
-+            msl_print_write_mask(buffer, e->mask);
-+            vkd3d_string_buffer_printf(buffer, " = input.shader_in_%u", i);
-+            msl_print_write_mask(buffer, e->mask);
-+        }
-+        else
-+        {
-+            vkd3d_string_buffer_printf(buffer, " = <unhandled sysval %#x>", e->sysval_semantic);
-+            msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL,
-+                    "Internal compiler error: Unhandled system value %#x input.", e->sysval_semantic);
-+        }
-+        vkd3d_string_buffer_printf(buffer, ";\n");
-+    }
-+static void msl_generate_entrypoint_epilogue(struct msl_generator *gen)
-+    const struct shader_signature *signature = &gen->program->output_signature;
-+    struct vkd3d_string_buffer *buffer = gen->buffer;
-+    const struct signature_element *e;
-+    unsigned int i;
-+    for (i = 0; i < signature->element_count; ++i)
-+    {
-+        e = &signature->elements[i];
-+        if (e->target_location == SIGNATURE_TARGET_LOCATION_UNUSED)
-+            continue;
-+        switch (e->sysval_semantic)
-+        {
-+            case VKD3D_SHADER_SV_NONE:
-+            case VKD3D_SHADER_SV_TARGET:
-+            case VKD3D_SHADER_SV_POSITION:
-+                vkd3d_string_buffer_printf(buffer, "    output.shader_out_%u", i);
-+                msl_print_write_mask(buffer, e->mask);
-+                vkd3d_string_buffer_printf(buffer, " = %s_out[%u]", gen->prefix, e->register_index);
-+                msl_print_register_datatype(buffer, gen, vkd3d_data_type_from_component_type(e->component_type));
-+                msl_print_write_mask(buffer, e->mask);
-+                break;
-+            default:
-+                vkd3d_string_buffer_printf(buffer, "    <unhandled sysval %#x>", e->sysval_semantic);
-+                msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL,
-+                        "Internal compiler error: Unhandled system value %#x input.", e->sysval_semantic);
-+        }
-+        vkd3d_string_buffer_printf(buffer, ";\n");
-+    }
-+static void msl_generate_entrypoint(struct msl_generator *gen)
-+    enum vkd3d_shader_type type = gen->program->shader_version.type;
-+    switch (type)
-+    {
-+        case VKD3D_SHADER_TYPE_VERTEX:
-+            vkd3d_string_buffer_printf(gen->buffer, "vertex ");
-+            break;
-+        case VKD3D_SHADER_TYPE_PIXEL:
-+            vkd3d_string_buffer_printf(gen->buffer, "fragment ");
-+            break;
-+        default:
-+            msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL,
-+                    "Internal compiler error: Unhandled shader type %#x.", type);
-+            return;
-+    }
-+    vkd3d_string_buffer_printf(gen->buffer, "vkd3d_%s_out shader_entry(\n", gen->prefix);
-+    if (gen->descriptor_info->descriptor_count)
-+    {
-+        msl_print_indent(gen->buffer, 2);
-+        /* TODO: Configurable argument buffer binding location. */
-+        vkd3d_string_buffer_printf(gen->buffer,
-+                "constant vkd3d_%s_descriptors& descriptors [[buffer(0)]],\n", gen->prefix);
-+    }
-+    msl_print_indent(gen->buffer, 2);
-+    vkd3d_string_buffer_printf(gen->buffer, "vkd3d_%s_in input [[stage_in]])\n{\n", gen->prefix);
-+    /* TODO: declare #maximum_register + 1 */
-+    vkd3d_string_buffer_printf(gen->buffer, "    vkd3d_vec4 %s_in[%u];\n", gen->prefix, 32);
-+    vkd3d_string_buffer_printf(gen->buffer, "    vkd3d_vec4 %s_out[%u];\n", gen->prefix, 32);
-+    vkd3d_string_buffer_printf(gen->buffer, "    vkd3d_%s_out output;\n", gen->prefix);
-+    msl_generate_entrypoint_prologue(gen);
-+    vkd3d_string_buffer_printf(gen->buffer, "    %s_main(%s_in, %s_out", gen->prefix, gen->prefix, gen->prefix);
-+    if (gen->descriptor_info->descriptor_count)
-+        vkd3d_string_buffer_printf(gen->buffer, ", descriptors");
-+    vkd3d_string_buffer_printf(gen->buffer, ");\n");
-+    msl_generate_entrypoint_epilogue(gen);
-+    vkd3d_string_buffer_printf(gen->buffer, "    return output;\n}\n");
-+static int msl_generator_generate(struct msl_generator *gen, struct vkd3d_shader_code *out)
-+    const struct vkd3d_shader_instruction_array *instructions = &gen->program->instructions;
-+    unsigned int i;
-+    MESSAGE("Generating a MSL shader. This is unsupported; you get to keep all the pieces if it breaks.\n");
-+    vkd3d_string_buffer_printf(gen->buffer, "/* Generated by %s. */\n\n", vkd3d_shader_get_version(NULL, NULL));
-+    if (gen->program->global_flags)
-+        msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL,
-+                "Internal compiler error: Unhandled global flags %#"PRIx64".", (uint64_t)gen->program->global_flags);
-+    vkd3d_string_buffer_printf(gen->buffer, "union vkd3d_vec4\n{\n");
-+    vkd3d_string_buffer_printf(gen->buffer, "    uint4 u;\n");
-+    vkd3d_string_buffer_printf(gen->buffer, "    int4 i;\n");
-+    vkd3d_string_buffer_printf(gen->buffer, "    float4 f;\n};\n\n");
-+    msl_generate_descriptor_struct_declarations(gen);
-+    msl_generate_input_struct_declarations(gen);
-+    msl_generate_output_struct_declarations(gen);
-+    vkd3d_string_buffer_printf(gen->buffer,
-+            "void %s_main(thread vkd3d_vec4 *v, "
-+            "thread vkd3d_vec4 *o",
-+            gen->prefix);
-+    if (gen->descriptor_info->descriptor_count)
-+        vkd3d_string_buffer_printf(gen->buffer, ", constant vkd3d_%s_descriptors& descriptors", gen->prefix);
-+    vkd3d_string_buffer_printf(gen->buffer, ")\n{\n");
-+    ++gen->indent;
-+    if (gen->program->temp_count)
-+    {
-+        msl_print_indent(gen->buffer, gen->indent);
-+        vkd3d_string_buffer_printf(gen->buffer, "vkd3d_vec4 r[%u];\n\n", gen->program->temp_count);
-+    }
-+    for (i = 0; i < instructions->count; ++i)
-+    {
-+        msl_handle_instruction(gen, &instructions->elements[i]);
-+    }
-+    --gen->indent;
-+    vkd3d_string_buffer_printf(gen->buffer, "}\n\n");
-+    msl_generate_entrypoint(gen);
-+    if (TRACE_ON())
-+        vkd3d_string_buffer_trace(gen->buffer);
-+    if (gen->failed)
-+        return VKD3D_ERROR_INVALID_SHADER;
-+    vkd3d_shader_code_from_string_buffer(out, gen->buffer);
-+    return VKD3D_OK;
-+static void msl_generator_cleanup(struct msl_generator *gen)
-+    vkd3d_string_buffer_release(&gen->string_buffers, gen->buffer);
-+    vkd3d_string_buffer_cache_cleanup(&gen->string_buffers);
-+static int msl_generator_init(struct msl_generator *gen, struct vsir_program *program,
-+        const struct vkd3d_shader_compile_info *compile_info,
-+        const struct vkd3d_shader_scan_descriptor_info1 *descriptor_info,
-+        struct vkd3d_shader_message_context *message_context)
-+    enum vkd3d_shader_type type = program->shader_version.type;
-+    memset(gen, 0, sizeof(*gen));
-+    gen->program = program;
-+    vkd3d_string_buffer_cache_init(&gen->string_buffers);
-+    if (!(gen->buffer = vkd3d_string_buffer_get(&gen->string_buffers)))
-+    {
-+        vkd3d_string_buffer_cache_cleanup(&gen->string_buffers);
-+        return VKD3D_ERROR_OUT_OF_MEMORY;
-+    }
-+    gen->message_context = message_context;
-+    if (!(gen->prefix = msl_get_prefix(type)))
-+    {
-+        msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL,
-+                "Internal compiler error: Unhandled shader type %#x.", type);
-+        return VKD3D_ERROR_INVALID_SHADER;
-+    }
-+    gen->interface_info = vkd3d_find_struct(compile_info->next, INTERFACE_INFO);
-+    gen->descriptor_info = descriptor_info;
-+    return VKD3D_OK;
-+int msl_compile(struct vsir_program *program, uint64_t config_flags,
-+        const struct vkd3d_shader_scan_descriptor_info1 *descriptor_info,
-+        const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *out,
-+        struct vkd3d_shader_message_context *message_context)
-+    struct msl_generator generator;
-+    int ret;
-+    if ((ret = vsir_program_transform(program, config_flags, compile_info, message_context)) < 0)
-+        return ret;
-+    VKD3D_ASSERT(program->normalisation_level == VSIR_FULLY_NORMALISED_IO);
-+    if ((ret = msl_generator_init(&generator, program, compile_info, descriptor_info, message_context)) < 0)
-+        return ret;
-+    ret = msl_generator_generate(&generator, out);
-+    msl_generator_cleanup(&generator);
-+    return ret;
-diff --git a/libs/vkd3d/libs/vkd3d-shader/preproc.h b/libs/vkd3d/libs/vkd3d-shader/preproc.h
-index 9806614a35b..a98c8ae3df5 100644
---- a/libs/vkd3d/libs/vkd3d-shader/preproc.h
-+++ b/libs/vkd3d/libs/vkd3d-shader/preproc.h
-@@ -60,6 +60,7 @@ struct preproc_expansion
- {
-     struct preproc_buffer buffer;
-     const struct preproc_text *text;
-+    struct preproc_text *arg_values;
-     /* Back-pointer to the macro, if this expansion a macro body. This is
-      * necessary so that argument tokens can be correctly replaced. */
-     struct preproc_macro *macro;
-@@ -72,7 +73,6 @@ struct preproc_macro
-     char **arg_names;
-     size_t arg_count;
--    struct preproc_text *arg_values;
-     struct preproc_text body;
- };
-@@ -117,6 +117,7 @@ struct preproc_ctx
-             STATE_ARGS,
-         } state;
-         unsigned int paren_depth;
-+        struct preproc_text *arg_values;
-     } text_func, directive_func;
-     int current_directive;
-diff --git a/libs/vkd3d/libs/vkd3d-shader/preproc.l b/libs/vkd3d/libs/vkd3d-shader/preproc.l
-index 2b7455a5c30..d167415c356 100644
---- a/libs/vkd3d/libs/vkd3d-shader/preproc.l
-+++ b/libs/vkd3d/libs/vkd3d-shader/preproc.l
-@@ -20,6 +20,7 @@
- %{
-+#include "preproc.h"
- #include "preproc.tab.h"
- #undef ERROR  /* defined in wingdi.h */
-@@ -29,11 +30,11 @@
- #define YY_DECL static int preproc_lexer_lex(YYSTYPE *yylval_param, YYLTYPE *yylloc_param, yyscan_t yyscanner)
--static struct preproc_macro *preproc_get_top_macro(struct preproc_ctx *ctx)
-+static struct preproc_expansion *preproc_get_top_expansion(struct preproc_ctx *ctx)
- {
-     if (!ctx->expansion_count)
-         return NULL;
--    return ctx->expansion_stack[ctx->expansion_count - 1].macro;
-+    return &ctx->expansion_stack[ctx->expansion_count - 1];
- }
- static void update_location(struct preproc_ctx *ctx);
-@@ -66,7 +67,7 @@ static void update_location(struct preproc_ctx *ctx);
- NEWLINE         \r?\n
- WS              [ \t\r]
--IDENTIFIER      [A-Za-z_][A-Za-z0-9_]*
-+IDENTIFIER      (::)?[A-Za-z_]((::)?[A-Za-z0-9_]+)*
- INT_SUFFIX      [uUlL]{0,2}
- %%
-@@ -132,14 +133,14 @@ INT_SUFFIX      [uUlL]{0,2}
-         if (!ctx->last_was_newline)
-         {
--            struct preproc_macro *macro;
-+            struct preproc_expansion *exp;
-             /* Stringification is only done for function-like macro bodies.
-              * Anywhere else, we need to parse it as two separate tokens.
-              * We could use a state for this, but yyless() is easier and cheap.
-              */
--            if ((macro = preproc_get_top_macro(ctx)) && macro->arg_count)
-+            if ((exp = preproc_get_top_expansion(ctx)) && exp->macro && exp->macro->arg_count)
-                 return T_HASHSTRING;
-             yyless(1);
-@@ -258,6 +259,12 @@ static void preproc_pop_buffer(struct preproc_ctx *ctx)
-         yy_delete_buffer(exp->buffer.lexer_buffer, ctx->scanner);
-+        if (exp->macro)
-+        {
-+            for (unsigned int i = 0; i < exp->macro->arg_count; ++i)
-+                vkd3d_string_buffer_cleanup(&exp->arg_values[i].text);
-+            free(exp->arg_values);
-+        }
-         --ctx->expansion_count;
-         TRACE("Expansion stack size is now %zu.\n", ctx->expansion_count);
-     }
-@@ -310,15 +317,15 @@ static int return_token(int token, YYSTYPE *lval, const char *text)
- static const struct preproc_text *find_arg_expansion(struct preproc_ctx *ctx, const char *s)
- {
--    struct preproc_macro *macro;
-+    struct preproc_expansion *exp;
-     unsigned int i;
--    if ((macro = preproc_get_top_macro(ctx)))
-+    if ((exp = preproc_get_top_expansion(ctx)) && exp->macro)
-     {
--        for (i = 0; i < macro->arg_count; ++i)
-+        for (i = 0; i < exp->macro->arg_count; ++i)
-         {
--            if (!strcmp(s, macro->arg_names[i]))
--                return &macro->arg_values[i];
-+            if (!strcmp(s, exp->macro->arg_names[i]))
-+                return &exp->arg_values[i];
-         }
-     }
-     return NULL;
-@@ -330,7 +337,7 @@ static void preproc_text_add(struct preproc_text *text, const char *string)
- }
- static bool preproc_push_expansion(struct preproc_ctx *ctx,
--        const struct preproc_text *text, struct preproc_macro *macro)
-+        const struct preproc_text *text, struct preproc_macro *macro, struct preproc_text *arg_values)
- {
-     struct preproc_expansion *exp;
-@@ -342,6 +349,7 @@ static bool preproc_push_expansion(struct preproc_ctx *ctx,
-     exp->buffer.lexer_buffer = yy_scan_bytes(text->text.buffer, text->text.content_size, ctx->scanner);
-     exp->buffer.location = text->location;
-     exp->macro = macro;
-+    exp->arg_values = arg_values;
-     TRACE("Expansion stack size is now %zu.\n", ctx->expansion_count);
-     return true;
- }
-@@ -542,7 +550,7 @@ int yylex(YYSTYPE *lval, YYLTYPE *lloc, yyscan_t scanner)
-                     if ((expansion = find_arg_expansion(ctx, text)))
-                     {
--                        preproc_push_expansion(ctx, expansion, NULL);
-+                        preproc_push_expansion(ctx, expansion, NULL, NULL);
-                         continue;
-                     }
-@@ -550,7 +558,7 @@ int yylex(YYSTYPE *lval, YYLTYPE *lloc, yyscan_t scanner)
-                     {
-                         if (!macro->arg_count)
-                         {
--                            preproc_push_expansion(ctx, &macro->body, macro);
-+                            preproc_push_expansion(ctx, &macro->body, macro, NULL);
-                         }
-                         else
-                         {
-@@ -616,16 +624,19 @@ int yylex(YYSTYPE *lval, YYLTYPE *lloc, yyscan_t scanner)
-             case STATE_IDENTIFIER:
-                 if (token == '(')
-                 {
--                    struct preproc_text *first_arg = &func_state->macro->arg_values[0];
--                    unsigned int i;
-+                    struct preproc_text *arg_values;
-+                    if (!(arg_values = calloc(func_state->macro->arg_count, sizeof(*arg_values))))
-+                        return 0;
-+                    for (unsigned int i = 0; i < func_state->macro->arg_count; ++i)
-+                        vkd3d_string_buffer_init(&arg_values[i].text);
-+                    arg_values[0].location = *lloc;
-                     func_state->arg_count = 0;
-                     func_state->paren_depth = 1;
-                     func_state->state = STATE_ARGS;
--                    for (i = 0; i < func_state->macro->arg_count; ++i)
--                        func_state->macro->arg_values[i].text.content_size = 0;
--                    first_arg->location = *lloc;
-+                    func_state->arg_values = arg_values;
-                 }
-                 else
-                 {
-@@ -649,7 +660,7 @@ int yylex(YYSTYPE *lval, YYLTYPE *lloc, yyscan_t scanner)
-                 VKD3D_ASSERT(func_state->macro->arg_count);
-                 if (func_state->arg_count < func_state->macro->arg_count)
--                    current_arg = &func_state->macro->arg_values[func_state->arg_count];
-+                    current_arg = &func_state->arg_values[func_state->arg_count];
-                 switch (token)
-                 {
-@@ -664,7 +675,7 @@ int yylex(YYSTYPE *lval, YYLTYPE *lloc, yyscan_t scanner)
-                         if ((expansion = find_arg_expansion(ctx, text)))
-                         {
--                            preproc_push_expansion(ctx, expansion, NULL);
-+                            preproc_push_expansion(ctx, expansion, NULL, NULL);
-                             continue;
-                         }
-@@ -700,7 +711,8 @@ int yylex(YYSTYPE *lval, YYLTYPE *lloc, yyscan_t scanner)
-                         {
-                             if (++func_state->arg_count == func_state->macro->arg_count)
-                             {
--                                preproc_push_expansion(ctx, &func_state->macro->body, func_state->macro);
-+                                preproc_push_expansion(ctx, &func_state->macro->body,
-+                                        func_state->macro, func_state->arg_values);
-                             }
-                             else
-                             {
-diff --git a/libs/vkd3d/libs/vkd3d-shader/preproc.y b/libs/vkd3d/libs/vkd3d-shader/preproc.y
-index 366e351e3b5..c6be17bd230 100644
---- a/libs/vkd3d/libs/vkd3d-shader/preproc.y
-+++ b/libs/vkd3d/libs/vkd3d-shader/preproc.y
-@@ -91,7 +91,6 @@ bool preproc_add_macro(struct preproc_ctx *ctx, const struct vkd3d_shader_locati
-         size_t arg_count, const struct vkd3d_shader_location *body_loc, struct vkd3d_string_buffer *body)
- {
-     struct preproc_macro *macro;
--    unsigned int i;
-     int ret;
-     if ((macro = preproc_find_macro(ctx, name)))
-@@ -108,14 +107,6 @@ bool preproc_add_macro(struct preproc_ctx *ctx, const struct vkd3d_shader_locati
-     macro->name = name;
-     macro->arg_names = arg_names;
-     macro->arg_count = arg_count;
--    macro->arg_values = NULL;
--    if (arg_count && !(macro->arg_values = vkd3d_calloc(arg_count, sizeof(*macro->arg_values))))
--    {
--        vkd3d_free(macro);
--        return false;
--    }
--    for (i = 0; i < arg_count; ++i)
--        vkd3d_string_buffer_init(&macro->arg_values[i].text);
-     macro->body.text = *body;
-     macro->body.location = *body_loc;
-     ret = rb_put(&ctx->macros, name, &macro->entry);
-@@ -129,12 +120,8 @@ void preproc_free_macro(struct preproc_macro *macro)
-     vkd3d_free(macro->name);
-     for (i = 0; i < macro->arg_count; ++i)
--    {
--        vkd3d_string_buffer_cleanup(&macro->arg_values[i].text);
-         vkd3d_free(macro->arg_names[i]);
--    }
-     vkd3d_free(macro->arg_names);
--    vkd3d_free(macro->arg_values);
-     vkd3d_string_buffer_cleanup(&macro->body.text);
-     vkd3d_free(macro);
- }
-diff --git a/libs/vkd3d/libs/vkd3d-shader/spirv.c b/libs/vkd3d/libs/vkd3d-shader/spirv.c
-index 49979ab2491..81555e702ec 100644
---- a/libs/vkd3d/libs/vkd3d-shader/spirv.c
-+++ b/libs/vkd3d/libs/vkd3d-shader/spirv.c
-@@ -97,15 +97,37 @@ static enum vkd3d_result vkd3d_spirv_binary_to_text(const struct vkd3d_shader_co
-     if (!(spvret = spvBinaryToText(context, spirv->code, spirv->size / sizeof(uint32_t),
-             get_binary_to_text_options(formatting), &text, &diagnostic)))
-     {
--        void *code = vkd3d_malloc(text->length);
--        if (code)
-+        const char *p, *q, *end, *pad, *truncate;
-+        struct vkd3d_string_buffer buffer;
-+        size_t line_len;
-+        vkd3d_string_buffer_init(&buffer);
-+        for (p = text->str, end = p + text->length; p < end; p = q)
-         {
--            memcpy(code, text->str, text->length);
--            out->size = text->length;
--            out->code = code;
-+            if (!(q = memchr(p, '\n', end - p)))
-+                q = end;
-+            else
-+                ++q;
-+            /* FIXME: Note that when colour output is enabled, we count colour
-+             * escape codes towards the line length. It's possible to fix
-+             * that, but not completely trivial. */
-+            for (pad = "", line_len = 100; q - p > line_len; line_len = 100 - strlen(pad))
-+            {
-+                if (!(truncate = memchr(p + line_len, ' ', q - p - line_len)))
-+                    break;
-+                vkd3d_string_buffer_printf(&buffer, "%s%.*s\n", pad, (int)(truncate - p), p);
-+                p = truncate + 1;
-+                if (formatting & VKD3D_SHADER_COMPILE_OPTION_FORMATTING_INDENT)
-+                    pad = "                       ";
-+                else
-+                    pad = "        ";
-+            }
-+            vkd3d_string_buffer_printf(&buffer, "%s%.*s", pad, (int)(q - p), p);
-         }
--        else
--            result = VKD3D_ERROR_OUT_OF_MEMORY;
-+        vkd3d_shader_code_from_string_buffer(out, &buffer);
-     }
-     else
-     {
-@@ -228,7 +250,7 @@ enum vkd3d_shader_input_sysval_semantic vkd3d_siv_from_sysval_indexed(enum vkd3d
- #define VKD3D_SPIRV_VERSION_1_0 0x00010000
- #define VKD3D_SPIRV_VERSION_1_3 0x00010300
- struct vkd3d_spirv_stream
-@@ -277,6 +299,16 @@ static void vkd3d_spirv_stream_free(struct vkd3d_spirv_stream *stream)
-     vkd3d_spirv_stream_clear(stream);
- }
-+static void vkd3d_shader_code_from_spirv_stream(struct vkd3d_shader_code *code, struct vkd3d_spirv_stream *stream)
-+    code->code = stream->words;
-+    code->size = stream->word_count * sizeof(*stream->words);
-+    stream->words = NULL;
-+    stream->capacity = 0;
-+    stream->word_count = 0;
- static size_t vkd3d_spirv_stream_current_location(struct vkd3d_spirv_stream *stream)
- {
-     return stream->word_count;
-@@ -362,6 +394,7 @@ struct vkd3d_spirv_builder
-     uint32_t type_bool_id;
-     uint32_t type_void_id;
-     uint32_t scope_subgroup_id;
-+    uint32_t numeric_type_ids[VKD3D_SHADER_COMPONENT_TYPE_COUNT][VKD3D_VEC4_SIZE];
-     struct vkd3d_spirv_stream debug_stream; /* debug instructions */
-     struct vkd3d_spirv_stream annotation_stream; /* decoration instructions */
-@@ -1195,6 +1228,13 @@ static uint32_t vkd3d_spirv_build_op_constant_composite(struct vkd3d_spirv_build
-             SpvOpConstantComposite, result_type, constituents, constituent_count);
- }
-+static uint32_t vkd3d_spirv_build_op_spec_constant_composite(struct vkd3d_spirv_builder *builder,
-+        uint32_t result_type, const uint32_t *constituents, unsigned int constituent_count)
-+    return vkd3d_spirv_build_op_trv(builder, &builder->global_stream,
-+            SpvOpSpecConstantComposite, result_type, constituents, constituent_count);
- static uint32_t vkd3d_spirv_get_op_constant_composite(struct vkd3d_spirv_builder *builder,
-         uint32_t result_type, const uint32_t *constituents, unsigned int constituent_count)
- {
-@@ -1870,29 +1910,41 @@ static uint32_t vkd3d_spirv_build_op_glsl_std450_nclamp(struct vkd3d_spirv_build
- static uint32_t vkd3d_spirv_get_type_id(struct vkd3d_spirv_builder *builder,
-         enum vkd3d_shader_component_type component_type, unsigned int component_count)
- {
--    uint32_t scalar_id;
-+    uint32_t scalar_id, type_id;
-+    if (!component_count || component_count > VKD3D_VEC4_SIZE)
-+    {
-+        ERR("Invalid component count %u.\n", component_count);
-+        return 0;
-+    }
-+    if ((type_id = builder->numeric_type_ids[component_type][component_count - 1]))
-+        return type_id;
-     if (component_count == 1)
-     {
-         switch (component_type)
-         {
-             case VKD3D_SHADER_COMPONENT_VOID:
--                return vkd3d_spirv_get_op_type_void(builder);
-+                type_id = vkd3d_spirv_get_op_type_void(builder);
-                 break;
-             case VKD3D_SHADER_COMPONENT_FLOAT:
--                return vkd3d_spirv_get_op_type_float(builder, 32);
-+                type_id = vkd3d_spirv_get_op_type_float(builder, 32);
-                 break;
-             case VKD3D_SHADER_COMPONENT_INT:
-             case VKD3D_SHADER_COMPONENT_UINT:
--                return vkd3d_spirv_get_op_type_int(builder, 32, component_type == VKD3D_SHADER_COMPONENT_INT);
-+                type_id = vkd3d_spirv_get_op_type_int(builder, 32, component_type == VKD3D_SHADER_COMPONENT_INT);
-                 break;
-             case VKD3D_SHADER_COMPONENT_BOOL:
--                return vkd3d_spirv_get_op_type_bool(builder);
-+                type_id = vkd3d_spirv_get_op_type_bool(builder);
-                 break;
--                return vkd3d_spirv_get_op_type_float(builder, 64);
-+                type_id = vkd3d_spirv_get_op_type_float(builder, 64);
-+                break;
-             case VKD3D_SHADER_COMPONENT_UINT64:
--                return vkd3d_spirv_get_op_type_int(builder, 64, 0);
-+                type_id = vkd3d_spirv_get_op_type_int(builder, 64, 0);
-+                break;
-             default:
-                 FIXME("Unhandled component type %#x.\n", component_type);
-                 return 0;
-@@ -1902,46 +1954,21 @@ static uint32_t vkd3d_spirv_get_type_id(struct vkd3d_spirv_builder *builder,
-     {
-         VKD3D_ASSERT(component_type != VKD3D_SHADER_COMPONENT_VOID);
-         scalar_id = vkd3d_spirv_get_type_id(builder, component_type, 1);
--        return vkd3d_spirv_get_op_type_vector(builder, scalar_id, component_count);
-+        type_id = vkd3d_spirv_get_op_type_vector(builder, scalar_id, component_count);
-     }
-+    builder->numeric_type_ids[component_type][component_count - 1] = type_id;
-+    return type_id;
- }
- static uint32_t vkd3d_spirv_get_type_id_for_data_type(struct vkd3d_spirv_builder *builder,
-         enum vkd3d_data_type data_type, unsigned int component_count)
- {
--    uint32_t scalar_id;
-+    enum vkd3d_shader_component_type component_type;
--    if (component_count == 1)
--    {
--        switch (data_type)
--        {
--            case VKD3D_DATA_HALF: /* Minimum precision. TODO: native 16-bit */
--            case VKD3D_DATA_FLOAT:
--            case VKD3D_DATA_SNORM:
--            case VKD3D_DATA_UNORM:
--                return vkd3d_spirv_get_op_type_float(builder, 32);
--                break;
--            case VKD3D_DATA_INT:
--            case VKD3D_DATA_UINT:
--            case VKD3D_DATA_UINT16: /* Minimum precision. TODO: native 16-bit */
--                return vkd3d_spirv_get_op_type_int(builder, 32, data_type == VKD3D_DATA_INT);
--                break;
--            case VKD3D_DATA_DOUBLE:
--                return vkd3d_spirv_get_op_type_float(builder, 64);
--            case VKD3D_DATA_UINT64:
--                return vkd3d_spirv_get_op_type_int(builder, 64, 0);
--            case VKD3D_DATA_BOOL:
--                return vkd3d_spirv_get_op_type_bool(builder);
--            default:
--                FIXME("Unhandled data type %#x.\n", data_type);
--                return 0;
--        }
--    }
--    else
--    {
--        scalar_id = vkd3d_spirv_get_type_id_for_data_type(builder, data_type, 1);
--        return vkd3d_spirv_get_op_type_vector(builder, scalar_id, component_count);
--    }
-+    component_type = vkd3d_component_type_from_data_type(data_type);
-+    return vkd3d_spirv_get_type_id(builder, component_type, component_count);
- }
- static void vkd3d_spirv_builder_init(struct vkd3d_spirv_builder *builder, const char *entry_point)
-@@ -1996,9 +2023,7 @@ static bool vkd3d_spirv_compile_module(struct vkd3d_spirv_builder *builder,
- {
-     uint64_t capability_mask = builder->capability_mask;
-     struct vkd3d_spirv_stream stream;
--    uint32_t *code;
-     unsigned int i;
--    size_t size;
-     vkd3d_spirv_stream_init(&stream);
-@@ -2053,26 +2078,20 @@ static bool vkd3d_spirv_compile_module(struct vkd3d_spirv_builder *builder,
-     if (builder->invocation_count)
-         vkd3d_spirv_build_op_execution_mode(&builder->execution_mode_stream,
-                 builder->main_function_id, SpvExecutionModeInvocations, &builder->invocation_count, 1);
--    vkd3d_spirv_stream_append(&stream, &builder->execution_mode_stream);
--    vkd3d_spirv_stream_append(&stream, &builder->debug_stream);
--    vkd3d_spirv_stream_append(&stream, &builder->annotation_stream);
--    vkd3d_spirv_stream_append(&stream, &builder->global_stream);
--    vkd3d_spirv_stream_append(&stream, &builder->function_stream);
--    if (!(code = vkd3d_calloc(stream.word_count, sizeof(*code))))
-+    if (!vkd3d_spirv_stream_append(&stream, &builder->execution_mode_stream)
-+            || !vkd3d_spirv_stream_append(&stream, &builder->debug_stream)
-+            || !vkd3d_spirv_stream_append(&stream, &builder->annotation_stream)
-+            || !vkd3d_spirv_stream_append(&stream, &builder->global_stream)
-+            || !vkd3d_spirv_stream_append(&stream, &builder->function_stream))
-     {
-         vkd3d_spirv_stream_free(&stream);
-         return false;
-     }
--    size = stream.word_count * sizeof(*code);
--    memcpy(code, stream.words, size);
-+    vkd3d_shader_code_from_spirv_stream(spirv, &stream);
-     vkd3d_spirv_stream_free(&stream);
--    spirv->code = code;
--    spirv->size = size;
-     return true;
- }
-@@ -2083,28 +2102,26 @@ static const struct vkd3d_spirv_resource_type
-     SpvDim dim;
-     uint32_t arrayed;
-     uint32_t ms;
-     unsigned int coordinate_component_count;
--    unsigned int offset_component_count;
-     SpvCapability capability;
-     SpvCapability uav_capability;
- }
- vkd3d_spirv_resource_type_table[] =
- {
--    {VKD3D_SHADER_RESOURCE_BUFFER,            SpvDimBuffer, 0, 0, 1, 0,
-+    {VKD3D_SHADER_RESOURCE_BUFFER,            SpvDimBuffer, 0, 0, 1,
-             SpvCapabilitySampledBuffer, SpvCapabilityImageBuffer},
--    {VKD3D_SHADER_RESOURCE_TEXTURE_1D,        SpvDim1D,     0, 0, 1, 1,
-+    {VKD3D_SHADER_RESOURCE_TEXTURE_1D,        SpvDim1D,     0, 0, 1,
-             SpvCapabilitySampled1D, SpvCapabilityImage1D},
--    {VKD3D_SHADER_RESOURCE_TEXTURE_2DMS,      SpvDim2D,     0, 1, 2, 2},
--    {VKD3D_SHADER_RESOURCE_TEXTURE_2D,        SpvDim2D,     0, 0, 2, 2},
--    {VKD3D_SHADER_RESOURCE_TEXTURE_3D,        SpvDim3D,     0, 0, 3, 3},
--    {VKD3D_SHADER_RESOURCE_TEXTURE_CUBE,      SpvDimCube,   0, 0, 3, 0},
--    {VKD3D_SHADER_RESOURCE_TEXTURE_1DARRAY,   SpvDim1D,     1, 0, 2, 1,
-+    {VKD3D_SHADER_RESOURCE_TEXTURE_2DMS,      SpvDim2D,     0, 1, 2},
-+    {VKD3D_SHADER_RESOURCE_TEXTURE_2D,        SpvDim2D,     0, 0, 2},
-+    {VKD3D_SHADER_RESOURCE_TEXTURE_3D,        SpvDim3D,     0, 0, 3},
-+    {VKD3D_SHADER_RESOURCE_TEXTURE_CUBE,      SpvDimCube,   0, 0, 3},
-+    {VKD3D_SHADER_RESOURCE_TEXTURE_1DARRAY,   SpvDim1D,     1, 0, 2,
-             SpvCapabilitySampled1D, SpvCapabilityImage1D},
--    {VKD3D_SHADER_RESOURCE_TEXTURE_2DARRAY,   SpvDim2D,     1, 0, 3, 2},
--    {VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY, SpvDim2D,     1, 1, 3, 2},
-+    {VKD3D_SHADER_RESOURCE_TEXTURE_2DARRAY,   SpvDim2D,     1, 0, 3},
-             SpvCapabilitySampledCubeArray, SpvCapabilityImageCubeArray},
- };
-@@ -2647,8 +2664,6 @@ static struct spirv_compiler *spirv_compiler_create(const struct vsir_program *p
-     if ((shader_interface = vkd3d_find_struct(compile_info->next, INTERFACE_INFO)))
-     {
-         compiler->xfb_info = vkd3d_find_struct(compile_info->next, TRANSFORM_FEEDBACK_INFO);
--        compiler->emit_point_size = compiler->xfb_info && compiler->xfb_info->element_count
--                && compiler->shader_type != VKD3D_SHADER_TYPE_GEOMETRY;
-         compiler->shader_interface = *shader_interface;
-         if (shader_interface->push_constant_buffer_count)
-@@ -2675,6 +2690,11 @@ static struct spirv_compiler *spirv_compiler_create(const struct vsir_program *p
-         }
-     }
-+    if (compiler->shader_type == VKD3D_SHADER_TYPE_VERTEX)
-+        compiler->emit_point_size = true;
-+    else if (compiler->shader_type != VKD3D_SHADER_TYPE_GEOMETRY)
-+        compiler->emit_point_size = compiler->xfb_info && compiler->xfb_info->element_count;
-     compiler->scan_descriptor_info = scan_descriptor_info;
-     compiler->phase = VKD3DSIH_INVALID;
-@@ -3174,6 +3194,14 @@ static bool spirv_compiler_get_register_name(char *buffer, unsigned int buffer_s
-         case VKD3DSPR_CONSTBUFFER:
-             snprintf(buffer, buffer_size, "cb%u_%u", reg->idx[0].offset, reg->idx[1].offset);
-             break;
-+        case VKD3DSPR_RASTOUT:
-+            if (idx == VSIR_RASTOUT_POINT_SIZE)
-+            {
-+                snprintf(buffer, buffer_size, "oPts");
-+                break;
-+            }
-+            FIXME("Unhandled rastout register %#x.\n", idx);
-+            return false;
-         case VKD3DSPR_INPUT:
-             snprintf(buffer, buffer_size, "v%u", idx);
-             break;
-@@ -3234,6 +3262,9 @@ static bool spirv_compiler_get_register_name(char *buffer, unsigned int buffer_s
-             snprintf(buffer, buffer_size, "vWaveLaneIndex");
-             break;
-+        case VKD3DSPR_POINT_COORD:
-+            snprintf(buffer, buffer_size, "vPointCoord");
-+            break;
-         default:
-             FIXME("Unhandled register %#x.\n", reg->type);
-             snprintf(buffer, buffer_size, "unrecognized_%#x", reg->type);
-@@ -3252,18 +3283,6 @@ static void spirv_compiler_emit_register_debug_name(struct vkd3d_spirv_builder *
-         vkd3d_spirv_build_op_name(builder, id, "%s", debug_name);
- }
--static uint32_t spirv_compiler_emit_variable(struct spirv_compiler *compiler,
--        struct vkd3d_spirv_stream *stream, SpvStorageClass storage_class,
--        enum vkd3d_shader_component_type component_type, unsigned int component_count)
--    struct vkd3d_spirv_builder *builder = &compiler->spirv_builder;
--    uint32_t type_id, ptr_type_id;
--    type_id = vkd3d_spirv_get_type_id(builder, component_type, component_count);
--    ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, storage_class, type_id);
--    return vkd3d_spirv_build_op_variable(builder, stream, ptr_type_id, storage_class, 0);
- static uint32_t spirv_compiler_emit_array_variable(struct spirv_compiler *compiler,
-         struct vkd3d_spirv_stream *stream, SpvStorageClass storage_class,
-         enum vkd3d_shader_component_type component_type, unsigned int component_count,
-@@ -3273,10 +3292,6 @@ static uint32_t spirv_compiler_emit_array_variable(struct spirv_compiler *compil
-     uint32_t type_id, length_id, ptr_type_id;
-     unsigned int i;
--    if (!length_count)
--        return spirv_compiler_emit_variable(compiler,
--                stream, storage_class, component_type, component_count);
-     type_id = vkd3d_spirv_get_type_id(builder, component_type, component_count);
-     for (i = 0; i < length_count; ++i)
-     {
-@@ -3290,6 +3305,14 @@ static uint32_t spirv_compiler_emit_array_variable(struct spirv_compiler *compil
-     return vkd3d_spirv_build_op_variable(builder, stream, ptr_type_id, storage_class, 0);
- }
-+static uint32_t spirv_compiler_emit_variable(struct spirv_compiler *compiler,
-+        struct vkd3d_spirv_stream *stream, SpvStorageClass storage_class,
-+        enum vkd3d_shader_component_type component_type, unsigned int component_count)
-+    return spirv_compiler_emit_array_variable(compiler, stream, storage_class,
-+            component_type, component_count, NULL, 0);
- static const struct vkd3d_spec_constant_info
- {
-     enum vkd3d_shader_parameter_name name;
-@@ -3316,8 +3339,10 @@ static const struct vkd3d_spec_constant_info *get_spec_constant_info(enum vkd3d_
-     return NULL;
- }
--static uint32_t spirv_compiler_alloc_spec_constant_id(struct spirv_compiler *compiler)
-+static uint32_t spirv_compiler_alloc_spec_constant_id(struct spirv_compiler *compiler, unsigned int count)
- {
-+    uint32_t ret;
-     if (!compiler->current_spec_constant_id)
-     {
-         unsigned int i, id = 0;
-@@ -3327,28 +3352,52 @@ static uint32_t spirv_compiler_alloc_spec_constant_id(struct spirv_compiler *com
-             const struct vkd3d_shader_parameter1 *current = &compiler->program->parameters[i];
--                id = max(current->u.specialization_constant.id + 1, id);
-+            {
-+                switch (current->data_type)
-+                {
-+                    case VKD3D_SHADER_PARAMETER_DATA_TYPE_FLOAT32_VEC4:
-+                        id = max(current->u.specialization_constant.id + 4, id);
-+                        break;
-+                    default:
-+                        id = max(current->u.specialization_constant.id + 1, id);
-+                        break;
-+                }
-+            }
-         }
-         compiler->current_spec_constant_id = id;
-     }
--    return compiler->current_spec_constant_id++;
-+    ret = compiler->current_spec_constant_id;
-+    compiler->current_spec_constant_id += count;
-+    return ret;
- }
- static uint32_t spirv_compiler_emit_spec_constant(struct spirv_compiler *compiler,
--        enum vkd3d_shader_parameter_name name, uint32_t spec_id, enum vkd3d_data_type type)
-+        enum vkd3d_shader_parameter_name name, uint32_t spec_id,
-+        enum vkd3d_data_type type, unsigned int component_count)
- {
-+    uint32_t scalar_type_id, vector_type_id, id, default_value, components[4];
-     struct vkd3d_spirv_builder *builder = &compiler->spirv_builder;
-     const struct vkd3d_spec_constant_info *info;
--    uint32_t type_id, id, default_value;
-     info = get_spec_constant_info(name);
-     default_value = info ? info->default_value : 0;
--    type_id = vkd3d_spirv_get_type_id(builder, vkd3d_component_type_from_data_type(type), 1);
--    id = vkd3d_spirv_build_op_spec_constant(builder, type_id, default_value);
--    vkd3d_spirv_build_op_decorate1(builder, id, SpvDecorationSpecId, spec_id);
-+    scalar_type_id = vkd3d_spirv_get_type_id(builder, vkd3d_component_type_from_data_type(type), 1);
-+    vector_type_id = vkd3d_spirv_get_type_id(builder, vkd3d_component_type_from_data_type(type), component_count);
-+    for (unsigned int i = 0; i < component_count; ++i)
-+    {
-+        components[i] = vkd3d_spirv_build_op_spec_constant(builder, scalar_type_id, default_value);
-+        vkd3d_spirv_build_op_decorate1(builder, components[i], SpvDecorationSpecId, spec_id + i);
-+    }
-+    if (component_count == 1)
-+        id = components[0];
-+    else
-+        id = vkd3d_spirv_build_op_spec_constant_composite(builder, vector_type_id, components, component_count);
-     if (info)
-         vkd3d_spirv_build_op_name(builder, id, "%s", info->debug_name);
-@@ -3365,7 +3414,8 @@ static uint32_t spirv_compiler_emit_spec_constant(struct spirv_compiler *compile
- }
- static uint32_t spirv_compiler_get_spec_constant(struct spirv_compiler *compiler,
--        enum vkd3d_shader_parameter_name name, uint32_t spec_id, enum vkd3d_data_type type)
-+        enum vkd3d_shader_parameter_name name, uint32_t spec_id,
-+        enum vkd3d_data_type type, unsigned int component_count)
- {
-     unsigned int i;
-@@ -3375,17 +3425,17 @@ static uint32_t spirv_compiler_get_spec_constant(struct spirv_compiler *compiler
-             return compiler->spec_constants[i].id;
-     }
--    return spirv_compiler_emit_spec_constant(compiler, name, spec_id, type);
-+    return spirv_compiler_emit_spec_constant(compiler, name, spec_id, type, component_count);
- }
- static uint32_t spirv_compiler_get_buffer_parameter(struct spirv_compiler *compiler,
--        const struct vkd3d_shader_parameter1 *parameter, enum vkd3d_data_type type)
-+        const struct vkd3d_shader_parameter1 *parameter, enum vkd3d_data_type type, unsigned int component_count)
- {
-     struct vkd3d_spirv_builder *builder = &compiler->spirv_builder;
-     unsigned int index = parameter - compiler->program->parameters;
-     uint32_t type_id, ptr_id, ptr_type_id;
--    type_id = vkd3d_spirv_get_type_id(builder, vkd3d_component_type_from_data_type(type), 1);
-+    type_id = vkd3d_spirv_get_type_id(builder, vkd3d_component_type_from_data_type(type), component_count);
-     ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, SpvStorageClassUniform, type_id);
-     ptr_id = vkd3d_spirv_build_op_access_chain1(builder, ptr_type_id,
-             compiler->spirv_parameter_info[index].buffer_id,
-@@ -3393,48 +3443,49 @@ static uint32_t spirv_compiler_get_buffer_parameter(struct spirv_compiler *compi
-     return vkd3d_spirv_build_op_load(builder, type_id, ptr_id, SpvMemoryAccessMaskNone);
- }
-+static const struct
-+    enum vkd3d_data_type type;
-+    unsigned int component_count;
-+parameter_data_type_map[] =
- static uint32_t spirv_compiler_emit_shader_parameter(struct spirv_compiler *compiler,
--        enum vkd3d_shader_parameter_name name, enum vkd3d_data_type type)
-+        enum vkd3d_shader_parameter_name name, enum vkd3d_data_type type, unsigned int component_count)
- {
-     const struct vkd3d_shader_parameter1 *parameter;
--    static const struct
--    {
--        enum vkd3d_data_type type;
--    }
--    type_map[] =
--    {
--    };
-     if (!(parameter = vsir_program_get_parameter(compiler->program, name)))
-     {
-         WARN("Unresolved shader parameter %#x.\n", name);
-         goto default_parameter;
-     }
--    if (type_map[parameter->data_type].type != type)
--        ERR("Expected data type %#x for parameter %#x, got %#x.\n", type, name, parameter->data_type);
-+    if (parameter_data_type_map[parameter->data_type].type != type
-+            || parameter_data_type_map[parameter->data_type].component_count != component_count)
-+        ERR("Expected type %#x, count %u for parameter %#x, got %#x.\n",
-+                type, component_count, name, parameter->data_type);
--    {
--        if (parameter->data_type == VKD3D_SHADER_PARAMETER_DATA_TYPE_FLOAT32)
--            return spirv_compiler_get_constant_float(compiler, parameter->u.immediate_constant.u.f32);
--        else
--            return spirv_compiler_get_constant_uint(compiler, parameter->u.immediate_constant.u.u32);
--    }
-+        return spirv_compiler_get_constant(compiler, vkd3d_component_type_from_data_type(type),
-+                component_count, (const uint32_t *)&parameter->u.immediate_constant);
--        return spirv_compiler_get_spec_constant(compiler, name, parameter->u.specialization_constant.id, type);
-+        return spirv_compiler_get_spec_constant(compiler, name,
-+                parameter->u.specialization_constant.id, type, component_count);
-     if (parameter->type == VKD3D_SHADER_PARAMETER_TYPE_BUFFER)
--        return spirv_compiler_get_buffer_parameter(compiler, parameter, type);
-+        return spirv_compiler_get_buffer_parameter(compiler, parameter, type, component_count);
-     FIXME("Unhandled parameter type %#x.\n", parameter->type);
- default_parameter:
-     return spirv_compiler_get_spec_constant(compiler,
--            name, spirv_compiler_alloc_spec_constant_id(compiler), type);
-+            name, spirv_compiler_alloc_spec_constant_id(compiler, component_count), type, component_count);
- }
- static uint32_t spirv_compiler_emit_construct_vector(struct spirv_compiler *compiler,
-@@ -4210,7 +4261,8 @@ static uint32_t spirv_compiler_emit_load_reg(struct spirv_compiler *compiler,
-     else if (reg->type == VKD3DSPR_UNDEF)
-         return spirv_compiler_emit_load_undef(compiler, reg, write_mask);
-     else if (reg->type == VKD3DSPR_PARAMETER)
--        return spirv_compiler_emit_shader_parameter(compiler, reg->idx[0].offset, reg->data_type);
-+        return spirv_compiler_emit_shader_parameter(compiler, reg->idx[0].offset,
-+                reg->data_type, reg->dimension == VSIR_DIMENSION_VEC4 ? 4 : 1);
-     component_count = vsir_write_mask_component_count(write_mask);
-     component_type = vkd3d_component_type_from_data_type(reg->data_type);
-@@ -4500,9 +4552,24 @@ static uint32_t spirv_compiler_emit_sat(struct spirv_compiler *compiler,
- static void spirv_compiler_emit_store_dst(struct spirv_compiler *compiler,
-         const struct vkd3d_shader_dst_param *dst, uint32_t val_id)
- {
--    VKD3D_ASSERT(!(dst->modifiers & ~VKD3DSPDM_SATURATE));
--    if (dst->modifiers & VKD3DSPDM_SATURATE)
-+    uint32_t modifiers = dst->modifiers;
-+    /* It is always legitimate to ignore _pp. */
-+    if (modifiers & VKD3DSPDM_SATURATE)
-+    {
-         val_id = spirv_compiler_emit_sat(compiler, &dst->reg, dst->write_mask, val_id);
-+        modifiers &= ~VKD3DSPDM_SATURATE;
-+    }
-+    if (dst->modifiers & VKD3DSPDM_MSAMPCENTROID)
-+    {
-+        FIXME("Ignoring _centroid modifier.\n");
-+        modifiers &= ~VKD3DSPDM_MSAMPCENTROID;
-+    }
-+    VKD3D_ASSERT(!modifiers);
-     spirv_compiler_emit_store_reg(compiler, &dst->reg, dst->write_mask, val_id);
- }
-@@ -4809,6 +4876,10 @@ static const struct vkd3d_spirv_builtin vkd3d_pixel_shader_position_builtin =
- {
-     VKD3D_SHADER_COMPONENT_FLOAT, 4, SpvBuiltInFragCoord, frag_coord_fixup,
- };
-+static const struct vkd3d_spirv_builtin vkd3d_output_point_size_builtin =
-+    VKD3D_SHADER_COMPONENT_FLOAT, 1, SpvBuiltInPointSize,
- static const struct
- {
-     enum vkd3d_shader_register_type reg_type;
-@@ -4828,6 +4899,8 @@ vkd3d_register_builtins[] =
-     {VKD3DSPR_TESSCOORD,        {VKD3D_SHADER_COMPONENT_FLOAT, 3, SpvBuiltInTessCoord}},
-     {VKD3DSPR_COVERAGE,         {VKD3D_SHADER_COMPONENT_UINT, 1, SpvBuiltInSampleMask, NULL, 1}},
-     {VKD3DSPR_SAMPLEMASK,       {VKD3D_SHADER_COMPONENT_UINT, 1, SpvBuiltInSampleMask, NULL, 1}},
-@@ -5398,7 +5471,11 @@ static void spirv_compiler_emit_output_register(struct spirv_compiler *compiler,
-     VKD3D_ASSERT(!reg->idx_count || !reg->idx[0].rel_addr);
-     VKD3D_ASSERT(reg->idx_count < 2);
--    if (!(builtin = get_spirv_builtin_for_register(reg->type)))
-+    if (reg->type == VKD3DSPR_RASTOUT && reg->idx[0].offset == VSIR_RASTOUT_POINT_SIZE)
-+    {
-+        builtin = &vkd3d_output_point_size_builtin;
-+    }
-+    else if (!(builtin = get_spirv_builtin_for_register(reg->type)))
-     {
-         FIXME("Unhandled register %#x.\n", reg->type);
-         return;
-@@ -5451,7 +5528,7 @@ static void spirv_compiler_emit_output(struct spirv_compiler *compiler,
-     const struct shader_signature *shader_signature;
-     const struct vkd3d_spirv_builtin *builtin;
-     enum vkd3d_shader_sysval_semantic sysval;
--    uint32_t write_mask, reg_write_mask;
-+    uint32_t write_mask;
-     bool use_private_variable = false;
-     struct vkd3d_symbol reg_symbol;
-     SpvStorageClass storage_class;
-@@ -5502,7 +5579,6 @@ static void spirv_compiler_emit_output(struct spirv_compiler *compiler,
-         use_private_variable = true;
-     }
--    reg_write_mask = write_mask >> component_idx;
-     vkd3d_symbol_make_io(&reg_symbol, reg_type, element_idx);
-     if (rb_get(&compiler->symbol_table, &reg_symbol))
-@@ -5580,7 +5656,7 @@ static void spirv_compiler_emit_output(struct spirv_compiler *compiler,
-     vkd3d_symbol_set_register_info(&reg_symbol, var_id, storage_class,
-             use_private_variable ? VKD3D_SHADER_COMPONENT_FLOAT : component_type,
--            use_private_variable ? VKD3DSP_WRITEMASK_ALL : reg_write_mask);
-+            use_private_variable ? VKD3DSP_WRITEMASK_ALL : write_mask);
-     reg_symbol.info.reg.is_aggregate = array_sizes[0] || array_sizes[1];
-     VKD3D_ASSERT(!builtin || !builtin->spirv_array_size || use_private_variable || array_sizes[0] || array_sizes[1]);
-@@ -5591,7 +5667,7 @@ static void spirv_compiler_emit_output(struct spirv_compiler *compiler,
-     if (use_private_variable)
-     {
-         compiler->private_output_variable[element_idx] = var_id;
--        compiler->private_output_variable_write_mask[element_idx] |= reg_write_mask;
-+        compiler->private_output_variable_write_mask[element_idx] |= write_mask >> component_idx;
-         if (!compiler->epilogue_function_id)
-             compiler->epilogue_function_id = vkd3d_spirv_alloc_id(builder);
-     }
-@@ -5846,11 +5922,8 @@ static size_t spirv_compiler_get_current_function_location(struct spirv_compiler
-     return builder->main_function_location;
- }
--static void spirv_compiler_emit_dcl_global_flags(struct spirv_compiler *compiler,
--        const struct vkd3d_shader_instruction *instruction)
-+static void spirv_compiler_emit_global_flags(struct spirv_compiler *compiler, enum vsir_global_flags flags)
- {
--    enum vkd3d_shader_global_flags flags = instruction->declaration.global_flags;
-     {
-         spirv_compiler_emit_execution_mode(compiler, SpvExecutionModeEarlyFragmentTests, NULL, 0);
-@@ -6120,12 +6193,12 @@ static void spirv_compiler_decorate_descriptor(struct spirv_compiler *compiler,
- static uint32_t spirv_compiler_build_descriptor_variable(struct spirv_compiler *compiler,
-         SpvStorageClass storage_class, uint32_t type_id, const struct vkd3d_shader_register *reg,
-         const struct vkd3d_shader_register_range *range, enum vkd3d_shader_resource_type resource_type,
--        bool is_uav, bool is_uav_counter, struct vkd3d_descriptor_variable_info *var_info)
-+        const struct vkd3d_shader_descriptor_info1 *descriptor, bool is_uav_counter,
-+        struct vkd3d_descriptor_variable_info *var_info)
- {
-     struct vkd3d_spirv_builder *builder = &compiler->spirv_builder;
-     struct vkd3d_descriptor_binding_address binding_address;
-     struct vkd3d_shader_descriptor_binding binding;
--    const struct vkd3d_shader_descriptor_info1 *d;
-     uint32_t array_type_id, ptr_type_id, var_id;
-     bool write_only = false, coherent = false;
-     struct vkd3d_symbol symbol;
-@@ -6135,12 +6208,11 @@ static uint32_t spirv_compiler_build_descriptor_variable(struct spirv_compiler *
-             resource_type, is_uav_counter, &binding_address);
-     var_info->binding_base_idx = binding_address.binding_base_idx;
--    if (is_uav)
-+    if (descriptor->type == VKD3D_SHADER_DESCRIPTOR_TYPE_UAV && !is_uav_counter)
-     {
--        d = spirv_compiler_get_descriptor_info(compiler, VKD3D_SHADER_DESCRIPTOR_TYPE_UAV, range);
--        write_only = !(d->flags & VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_READ);
-+        write_only = !(descriptor->flags & VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_READ);
-         /* ROVs are implicitly globally coherent. */
-+        coherent = descriptor->uav_flags & (VKD3DSUF_GLOBALLY_COHERENT | VKD3DSUF_RASTERISER_ORDERED_VIEW);
-     }
-     if (binding.count == 1 && range->first == binding_address.binding_base_idx && range->last != ~0u
-@@ -6194,11 +6266,12 @@ static uint32_t spirv_compiler_build_descriptor_variable(struct spirv_compiler *
- }
- static void spirv_compiler_emit_cbv_declaration(struct spirv_compiler *compiler,
--        const struct vkd3d_shader_register_range *range, unsigned int register_id, unsigned int size_in_bytes)
-+        const struct vkd3d_shader_register_range *range, const struct vkd3d_shader_descriptor_info1 *descriptor)
- {
-     struct vkd3d_spirv_builder *builder = &compiler->spirv_builder;
-     uint32_t vec4_id, array_type_id, length_id, struct_id, var_id;
-     const SpvStorageClass storage_class = SpvStorageClassUniform;
-+    unsigned int size_in_bytes = descriptor->buffer_size;
-     struct vkd3d_push_constant_buffer_binding *push_cb;
-     struct vkd3d_descriptor_variable_info var_info;
-     struct vkd3d_shader_register reg;
-@@ -6206,7 +6279,7 @@ static void spirv_compiler_emit_cbv_declaration(struct spirv_compiler *compiler,
-     unsigned int size;
-     vsir_register_init(&reg, VKD3DSPR_CONSTBUFFER, VKD3D_DATA_FLOAT, 3);
--    reg.idx[0].offset = register_id;
-+    reg.idx[0].offset = descriptor->register_id;
-     reg.idx[1].offset = range->first;
-     reg.idx[2].offset = range->last;
-@@ -6239,7 +6312,7 @@ static void spirv_compiler_emit_cbv_declaration(struct spirv_compiler *compiler,
-     vkd3d_spirv_build_op_name(builder, struct_id, "cb%u_struct", size);
-     var_id = spirv_compiler_build_descriptor_variable(compiler, storage_class, struct_id,
--            &reg, range, VKD3D_SHADER_RESOURCE_BUFFER, false, false, &var_info);
-+            &reg, range, VKD3D_SHADER_RESOURCE_BUFFER, descriptor, false, &var_info);
-     vkd3d_symbol_make_register(&reg_symbol, &reg);
-     vkd3d_symbol_set_register_info(&reg_symbol, var_id, storage_class,
-@@ -6275,7 +6348,7 @@ static void spirv_compiler_emit_dcl_immediate_constant_buffer(struct spirv_compi
- }
- static void spirv_compiler_emit_sampler_declaration(struct spirv_compiler *compiler,
--        const struct vkd3d_shader_register_range *range, unsigned int register_id)
-+        const struct vkd3d_shader_register_range *range, const struct vkd3d_shader_descriptor_info1 *descriptor)
- {
-     const SpvStorageClass storage_class = SpvStorageClassUniformConstant;
-     struct vkd3d_spirv_builder *builder = &compiler->spirv_builder;
-@@ -6285,7 +6358,7 @@ static void spirv_compiler_emit_sampler_declaration(struct spirv_compiler *compi
-     uint32_t type_id, var_id;
-     vsir_register_init(&reg, VKD3DSPR_SAMPLER, VKD3D_DATA_FLOAT, 1);
--    reg.idx[0].offset = register_id;
-+    reg.idx[0].offset = descriptor->register_id;
-     vkd3d_symbol_make_sampler(&reg_symbol, &reg);
-     reg_symbol.info.sampler.range = *range;
-@@ -6295,8 +6368,8 @@ static void spirv_compiler_emit_sampler_declaration(struct spirv_compiler *compi
-         return;
-     type_id = vkd3d_spirv_get_op_type_sampler(builder);
--    var_id = spirv_compiler_build_descriptor_variable(compiler, storage_class, type_id, &reg,
--            range, VKD3D_SHADER_RESOURCE_NONE, false, false, &var_info);
-+    var_id = spirv_compiler_build_descriptor_variable(compiler, storage_class, type_id,
-+            &reg, range, VKD3D_SHADER_RESOURCE_NONE, descriptor, false, &var_info);
-     vkd3d_symbol_make_register(&reg_symbol, &reg);
-     vkd3d_symbol_set_register_info(&reg_symbol, var_id, storage_class,
-@@ -6346,7 +6419,7 @@ static SpvImageFormat image_format_for_image_read(enum vkd3d_shader_component_ty
- static uint32_t spirv_compiler_get_image_type_id(struct spirv_compiler *compiler,
-         const struct vkd3d_shader_register *reg, const struct vkd3d_shader_register_range *range,
-         const struct vkd3d_spirv_resource_type *resource_type_info, enum vkd3d_shader_component_type data_type,
--        bool raw_structured, uint32_t depth)
-+        bool raw_structured)
- {
-     struct vkd3d_spirv_builder *builder = &compiler->spirv_builder;
-     const struct vkd3d_shader_descriptor_info1 *d;
-@@ -6369,7 +6442,7 @@ static uint32_t spirv_compiler_get_image_type_id(struct spirv_compiler *compiler
-     sampled_type_id = vkd3d_spirv_get_type_id(builder, data_type, 1);
-     return vkd3d_spirv_get_op_type_image(builder, sampled_type_id, resource_type_info->dim,
--            depth, resource_type_info->arrayed, resource_type_info->ms,
-+            2, resource_type_info->arrayed, resource_type_info->ms,
-             reg->type == VKD3DSPR_UAV ? 2 : 1, format);
- }
-@@ -6384,18 +6457,14 @@ static void spirv_compiler_emit_combined_sampler_declarations(struct spirv_compi
-     const struct vkd3d_shader_combined_resource_sampler *current;
-     uint32_t image_type_id, type_id, ptr_type_id, var_id;
-     enum vkd3d_shader_binding_flag resource_type_flag;
--    const struct vkd3d_shader_descriptor_info1 *d;
-     struct vkd3d_symbol symbol;
-     unsigned int i;
--    bool depth;
-     resource_type_flag = resource_type == VKD3D_SHADER_RESOURCE_BUFFER
-     for (i = 0; i < shader_interface->combined_sampler_count; ++i)
-     {
--        struct vkd3d_shader_register_range sampler_range;
-         current = &shader_interface->combined_samplers[i];
-         if (current->resource_space != resource_range->space || current->resource_index != resource_range->first)
-@@ -6417,16 +6486,8 @@ static void spirv_compiler_emit_combined_sampler_declarations(struct spirv_compi
-                     current->sampler_space, current->binding.count);
-         }
--        sampler_range.space = current->sampler_space;
--        sampler_range.first = current->sampler_index;
--        sampler_range.last = current->sampler_index;
--        d = spirv_compiler_get_descriptor_info(compiler,
--                VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER, &sampler_range);
--        depth = current->sampler_index != VKD3D_SHADER_DUMMY_SAMPLER_INDEX
-         image_type_id = spirv_compiler_get_image_type_id(compiler, resource, resource_range,
--                resource_type_info, sampled_type, structure_stride || raw, depth);
-+                resource_type_info, sampled_type, structure_stride || raw);
-         type_id = vkd3d_spirv_get_op_type_sampled_image(builder, image_type_id);
-         ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, storage_class, type_id);
-@@ -6461,21 +6522,24 @@ static void spirv_compiler_emit_combined_sampler_declarations(struct spirv_compi
- }
- static void spirv_compiler_emit_resource_declaration(struct spirv_compiler *compiler,
--        const struct vkd3d_shader_register_range *range, unsigned int register_id,
--        unsigned int sample_count, bool is_uav, enum vkd3d_shader_resource_type resource_type,
--        enum vkd3d_shader_resource_data_type resource_data_type, unsigned int structure_stride, bool raw)
-+        const struct vkd3d_shader_register_range *range, const struct vkd3d_shader_descriptor_info1 *descriptor)
- {
-+    bool raw = descriptor->flags & VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_RAW_BUFFER;
-+    enum vkd3d_shader_resource_type resource_type = descriptor->resource_type;
-     struct vkd3d_descriptor_variable_info var_info, counter_var_info = {0};
-+    bool is_uav = descriptor->type == VKD3D_SHADER_DESCRIPTOR_TYPE_UAV;
-+    unsigned int structure_stride = descriptor->structure_stride / 4;
-     struct vkd3d_spirv_builder *builder = &compiler->spirv_builder;
-     SpvStorageClass storage_class = SpvStorageClassUniformConstant;
-     uint32_t counter_type_id, type_id, var_id, counter_var_id = 0;
-     const struct vkd3d_spirv_resource_type *resource_type_info;
-+    unsigned int sample_count = descriptor->sample_count;
-     enum vkd3d_shader_component_type sampled_type;
-     struct vkd3d_symbol resource_symbol;
-     struct vkd3d_shader_register reg;
-     vsir_register_init(&reg, is_uav ? VKD3DSPR_UAV : VKD3DSPR_RESOURCE, VKD3D_DATA_FLOAT, 1);
--    reg.idx[0].offset = register_id;
-+    reg.idx[0].offset = descriptor->register_id;
-     if (resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_2DMS && sample_count == 1)
-         resource_type = VKD3D_SHADER_RESOURCE_TEXTURE_2D;
-@@ -6489,7 +6553,7 @@ static void spirv_compiler_emit_resource_declaration(struct spirv_compiler *comp
-         return;
-     }
--    sampled_type = vkd3d_component_type_from_resource_data_type(resource_data_type);
-+    sampled_type = vkd3d_component_type_from_resource_data_type(descriptor->resource_data_type);
-     if (!is_uav && spirv_compiler_has_combined_sampler_for_resource(compiler, range))
-     {
-@@ -6517,19 +6581,15 @@ static void spirv_compiler_emit_resource_declaration(struct spirv_compiler *comp
-     else
-     {
-         type_id = spirv_compiler_get_image_type_id(compiler, &reg, range,
--                resource_type_info, sampled_type, structure_stride || raw, 0);
-+                resource_type_info, sampled_type, structure_stride || raw);
-     }
--    var_id = spirv_compiler_build_descriptor_variable(compiler, storage_class, type_id, &reg,
--            range, resource_type, is_uav, false, &var_info);
-+    var_id = spirv_compiler_build_descriptor_variable(compiler, storage_class,
-+            type_id, &reg, range, resource_type, descriptor, false, &var_info);
-     if (is_uav)
-     {
--        const struct vkd3d_shader_descriptor_info1 *d;
--        d = spirv_compiler_get_descriptor_info(compiler, VKD3D_SHADER_DESCRIPTOR_TYPE_UAV, range);
--        if (d->uav_flags & VKD3DSUF_RASTERISER_ORDERED_VIEW)
-+        if (descriptor->uav_flags & VKD3DSUF_RASTERISER_ORDERED_VIEW)
-         {
-             if (compiler->shader_type != VKD3D_SHADER_TYPE_PIXEL)
-                 spirv_compiler_error(compiler, VKD3D_SHADER_ERROR_SPV_UNSUPPORTED_FEATURE,
-@@ -6543,7 +6603,7 @@ static void spirv_compiler_emit_resource_declaration(struct spirv_compiler *comp
-                 compiler->use_invocation_interlock = true;
-         }
-+        if (descriptor->flags & VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_COUNTER)
-         {
-             VKD3D_ASSERT(structure_stride); /* counters are valid only for structured buffers */
-@@ -6571,7 +6631,7 @@ static void spirv_compiler_emit_resource_declaration(struct spirv_compiler *comp
-             }
-             counter_var_id = spirv_compiler_build_descriptor_variable(compiler, storage_class,
--                    type_id, &reg, range, resource_type, false, true, &counter_var_info);
-+                    type_id, &reg, range, resource_type, descriptor, true, &counter_var_info);
-         }
-     }
-@@ -6709,7 +6769,8 @@ static void spirv_compiler_emit_dcl_input_primitive(struct spirv_compiler *compi
- static void spirv_compiler_emit_point_size(struct spirv_compiler *compiler)
- {
--    static const struct vkd3d_spirv_builtin point_size = {VKD3D_SHADER_COMPONENT_FLOAT, 1, SpvBuiltInPointSize};
-+    if (compiler->program->has_point_size)
-+        return;
-     /* Set the point size. Point sprites are not supported in d3d10+, but
-      * point primitives can still be used with e.g. stream output. Vulkan
-@@ -6723,7 +6784,8 @@ static void spirv_compiler_emit_point_size(struct spirv_compiler *compiler)
-             || compiler->write_tess_geom_point_size)
-     {
-         vkd3d_spirv_build_op_store(&compiler->spirv_builder,
--                spirv_compiler_emit_builtin_variable(compiler, &point_size, SpvStorageClassOutput, 0),
-+                spirv_compiler_emit_builtin_variable(compiler,
-+                        &vkd3d_output_point_size_builtin, SpvStorageClassOutput, 0),
-                 spirv_compiler_get_constant_float(compiler, 1.0f), SpvMemoryAccessMaskNone);
-     }
- }
-@@ -6760,15 +6822,11 @@ static void spirv_compiler_emit_dcl_gs_instances(struct spirv_compiler *compiler
-     compiler->spirv_builder.invocation_count = instruction->declaration.count;
- }
--static void spirv_compiler_emit_dcl_tessellator_domain(struct spirv_compiler *compiler,
--        const struct vkd3d_shader_instruction *instruction)
-+static void spirv_compiler_emit_tessellator_domain(struct spirv_compiler *compiler,
-+        enum vkd3d_tessellator_domain domain)
- {
--    enum vkd3d_tessellator_domain domain = instruction->declaration.tessellator_domain;
-     SpvExecutionMode mode;
--    if (compiler->shader_type == VKD3D_SHADER_TYPE_HULL && spirv_compiler_is_opengl_target(compiler))
--        return;
-     switch (domain)
-     {
-@@ -6845,10 +6903,9 @@ static void spirv_compiler_emit_tessellator_partitioning(struct spirv_compiler *
-     spirv_compiler_emit_execution_mode(compiler, mode, NULL, 0);
- }
--static void spirv_compiler_emit_dcl_thread_group(struct spirv_compiler *compiler,
--        const struct vkd3d_shader_instruction *instruction)
-+static void spirv_compiler_emit_thread_group_size(struct spirv_compiler *compiler,
-+        const struct vsir_thread_group_size *group_size)
- {
--    const struct vkd3d_shader_thread_group_size *group_size = &instruction->declaration.thread_group_size;
-     const uint32_t local_size[] = {group_size->x, group_size->y, group_size->z};
-     spirv_compiler_emit_execution_mode(compiler,
-@@ -7391,7 +7448,7 @@ static void spirv_compiler_emit_mov(struct spirv_compiler *compiler,
-     uint32_t components[VKD3D_VEC4_SIZE];
-     if (register_is_constant_or_undef(&src->reg) || src->reg.type == VKD3DSPR_SSA || dst->reg.type == VKD3DSPR_SSA
--            || dst->modifiers || src->modifiers)
-+            || src->reg.type == VKD3DSPR_PARAMETER || dst->modifiers || src->modifiers)
-         goto general_implementation;
-     spirv_compiler_get_register_info(compiler, &dst->reg, &dst_reg_info);
-@@ -8433,11 +8490,10 @@ static void spirv_compiler_prepare_image(struct spirv_compiler *compiler,
-     struct vkd3d_spirv_builder *builder = &compiler->spirv_builder;
-     uint32_t sampler_var_id, sampler_id, sampled_image_type_id;
-     const struct vkd3d_symbol *symbol = NULL;
--    bool load, sampled, depth_comparison;
-+    bool load, sampled;
-     load = !(flags & VKD3D_IMAGE_FLAG_NO_LOAD);
-     sampled = flags & VKD3D_IMAGE_FLAG_SAMPLED;
--    depth_comparison = flags & VKD3D_IMAGE_FLAG_DEPTH;
-     if (resource_reg->type == VKD3DSPR_RESOURCE)
-         symbol = spirv_compiler_find_combined_sampler(compiler, resource_reg, sampler_reg);
-@@ -8491,7 +8547,7 @@ static void spirv_compiler_prepare_image(struct spirv_compiler *compiler,
-     image->image_type_id = spirv_compiler_get_image_type_id(compiler, resource_reg,
-             &symbol->info.resource.range, image->resource_type_info,
--            image->sampled_type, image->structure_stride || image->raw, depth_comparison);
-+            image->sampled_type, image->structure_stride || image->raw);
-     if (sampled)
-     {
-@@ -8535,9 +8591,11 @@ static uint32_t spirv_compiler_emit_texel_offset(struct spirv_compiler *compiler
-         const struct vkd3d_shader_instruction *instruction,
-         const struct vkd3d_spirv_resource_type *resource_type_info)
- {
-+    unsigned int component_count = resource_type_info->coordinate_component_count - resource_type_info->arrayed;
-     const struct vkd3d_shader_texel_offset *offset = &instruction->texel_offset;
--    unsigned int component_count = resource_type_info->offset_component_count;
-     int32_t data[4] = {offset->u, offset->v, offset->w, 0};
-+    VKD3D_ASSERT(resource_type_info->dim != SpvDimCube);
-     return spirv_compiler_get_constant(compiler,
-             VKD3D_SHADER_COMPONENT_INT, component_count, (const uint32_t *)data);
- }
-@@ -8622,9 +8680,9 @@ static void spirv_compiler_emit_sample(struct spirv_compiler *compiler,
-     const struct vkd3d_shader_dst_param *dst = instruction->dst;
-     const struct vkd3d_shader_src_param *src = instruction->src;
-     const struct vkd3d_shader_src_param *resource, *sampler;
-+    unsigned int image_operand_count = 0, component_count;
-     uint32_t sampled_type_id, coordinate_id, val_id;
-     SpvImageOperandsMask operands_mask = 0;
--    unsigned int image_operand_count = 0;
-     struct vkd3d_shader_image image;
-     uint32_t image_operands[3];
-     uint32_t coordinate_mask;
-@@ -8649,7 +8707,8 @@ static void spirv_compiler_emit_sample(struct spirv_compiler *compiler,
-         case VKD3DSIH_SAMPLE_GRAD:
-             op = SpvOpImageSampleExplicitLod;
-             operands_mask |= SpvImageOperandsGradMask;
--            coordinate_mask = (1u << image.resource_type_info->offset_component_count) - 1;
-+            component_count = image.resource_type_info->coordinate_component_count - image.resource_type_info->arrayed;
-+            coordinate_mask = (1u << component_count) - 1;
-             image_operands[image_operand_count++] = spirv_compiler_emit_load_src(compiler,
-                     &src[3], coordinate_mask);
-             image_operands[image_operand_count++] = spirv_compiler_emit_load_src(compiler,
-@@ -8738,10 +8797,10 @@ static void spirv_compiler_emit_gather4(struct spirv_compiler *compiler,
-     const struct vkd3d_shader_dst_param *dst = instruction->dst;
-     const struct vkd3d_shader_src_param *src = instruction->src;
-     unsigned int image_flags = VKD3D_IMAGE_FLAG_SAMPLED;
-+    unsigned int component_count, component_idx;
-     SpvImageOperandsMask operands_mask = 0;
-     unsigned int image_operand_count = 0;
-     struct vkd3d_shader_image image;
--    unsigned int component_idx;
-     uint32_t image_operands[1];
-     uint32_t coordinate_mask;
-     bool extended_offset;
-@@ -8763,10 +8822,12 @@ static void spirv_compiler_emit_gather4(struct spirv_compiler *compiler,
-     if (offset)
-     {
-+        component_count = image.resource_type_info->coordinate_component_count - image.resource_type_info->arrayed;
-+        VKD3D_ASSERT(image.resource_type_info->dim != SpvDimCube);
-         vkd3d_spirv_enable_capability(builder, SpvCapabilityImageGatherExtended);
-         operands_mask |= SpvImageOperandsOffsetMask;
-         image_operands[image_operand_count++] = spirv_compiler_emit_load_src(compiler,
--                offset, (1u << image.resource_type_info->offset_component_count) - 1);
-+                offset, (1u << component_count) - 1);
-     }
-     else if (vkd3d_shader_instruction_has_texel_offset(instruction))
-     {
-@@ -8842,15 +8903,20 @@ static void spirv_compiler_emit_ld_raw_structured_srv_uav(struct spirv_compiler
-     uint32_t base_coordinate_id, component_idx;
-     uint32_t constituents[VKD3D_VEC4_SIZE];
-     struct vkd3d_shader_image image;
-+    bool storage_buffer_uav = false;
-     uint32_t indices[2];
-     unsigned int i, j;
-     SpvOp op;
-     resource = &src[instruction->src_count - 1];
--    resource_symbol = spirv_compiler_find_resource(compiler, &resource->reg);
--    if (resource->reg.type == VKD3DSPR_UAV
--            && spirv_compiler_use_storage_buffer(compiler, &resource_symbol->info.resource))
-+    if (resource->reg.type == VKD3DSPR_UAV)
-+    {
-+        resource_symbol = spirv_compiler_find_resource(compiler, &resource->reg);
-+        storage_buffer_uav = spirv_compiler_use_storage_buffer(compiler, &resource_symbol->info.resource);
-+    }
-+    if (storage_buffer_uav)
-     {
-         texel_type_id = vkd3d_spirv_get_type_id(builder, resource_symbol->info.resource.sampled_type, 1);
-         ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, SpvStorageClassUniform, texel_type_id);
-@@ -9569,7 +9635,7 @@ static uint32_t spirv_compiler_emit_query_sample_count(struct spirv_compiler *co
-     if (src->reg.type == VKD3DSPR_RASTERIZER)
-     {
-         val_id = spirv_compiler_emit_shader_parameter(compiler,
-     }
-     else
-     {
-@@ -10132,9 +10198,6 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler,
-     switch (instruction->opcode)
-     {
--            spirv_compiler_emit_dcl_global_flags(compiler, instruction);
--            break;
-             spirv_compiler_emit_dcl_indexable_temp(compiler, instruction);
-             break;
-@@ -10172,9 +10235,6 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler,
-             spirv_compiler_emit_output_vertex_count(compiler, instruction);
-             break;
--            spirv_compiler_emit_dcl_tessellator_domain(compiler, instruction);
--            break;
-             spirv_compiler_emit_tessellator_output_primitive(compiler,
-                     instruction->declaration.tessellator_output_primitive);
-@@ -10183,9 +10243,6 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler,
-             spirv_compiler_emit_tessellator_partitioning(compiler,
-                     instruction->declaration.tessellator_partitioning);
-             break;
--            spirv_compiler_emit_dcl_thread_group(compiler, instruction);
--            break;
-         case VKD3DSIH_HS_FORK_PHASE:
-         case VKD3DSIH_HS_JOIN_PHASE:
-@@ -10506,7 +10563,6 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler,
-         case VKD3DSIH_DCL_UAV_RAW:
--        case VKD3DSIH_DCL_UAV_TYPED:
-         case VKD3DSIH_HS_DECLS:
-         case VKD3DSIH_NOP:
-             /* nothing to do */
-@@ -10543,6 +10599,23 @@ static void spirv_compiler_emit_io_declarations(struct spirv_compiler *compiler)
-         else
-             spirv_compiler_emit_input(compiler, VKD3DSPR_PATCHCONST, i);
-     }
-+    if (compiler->program->has_point_size)
-+    {
-+        struct vkd3d_shader_dst_param dst;
-+        vsir_dst_param_init(&dst, VKD3DSPR_RASTOUT, VKD3D_DATA_FLOAT, 1);
-+        dst.reg.idx[0].offset = VSIR_RASTOUT_POINT_SIZE;
-+        spirv_compiler_emit_output_register(compiler, &dst);
-+    }
-+    if (compiler->program->has_point_coord)
-+    {
-+        struct vkd3d_shader_dst_param dst;
-+        vsir_dst_param_init(&dst, VKD3DSPR_POINT_COORD, VKD3D_DATA_FLOAT, 0);
-+        spirv_compiler_emit_input_register(compiler, &dst);
-+    }
- }
- static void spirv_compiler_emit_descriptor_declarations(struct spirv_compiler *compiler)
-@@ -10564,23 +10637,16 @@ static void spirv_compiler_emit_descriptor_declarations(struct spirv_compiler *c
-         switch (descriptor->type)
-         {
--                spirv_compiler_emit_sampler_declaration(compiler, &range, descriptor->register_id);
-+                spirv_compiler_emit_sampler_declaration(compiler, &range, descriptor);
-                 break;
--                spirv_compiler_emit_cbv_declaration(compiler, &range, descriptor->register_id, descriptor->buffer_size);
-+                spirv_compiler_emit_cbv_declaration(compiler, &range, descriptor);
-                 break;
--                spirv_compiler_emit_resource_declaration(compiler, &range, descriptor->register_id,
--                        descriptor->sample_count, false, descriptor->resource_type, descriptor->resource_data_type,
--                        descriptor->structure_stride / 4, descriptor->flags & VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_RAW_BUFFER);
--                break;
--                spirv_compiler_emit_resource_declaration(compiler, &range, descriptor->register_id,
--                        descriptor->sample_count, true, descriptor->resource_type, descriptor->resource_data_type,
--                        descriptor->structure_stride / 4, descriptor->flags & VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_RAW_BUFFER);
-+                spirv_compiler_emit_resource_declaration(compiler, &range, descriptor);
-                 break;
-             default:
-@@ -10600,10 +10666,12 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, struct
-     enum vkd3d_result result = VKD3D_OK;
-     unsigned int i, max_element_count;
--    if ((result = vsir_program_normalise(program, compiler->config_flags,
-+    if ((result = vsir_program_transform(program, compiler->config_flags,
-             compile_info, compiler->message_context)) < 0)
-         return result;
-+    VKD3D_ASSERT(program->normalisation_level == VSIR_FULLY_NORMALISED_IO);
-     max_element_count = max(program->output_signature.element_count, program->patch_constant_signature.element_count);
-     if (!(compiler->output_info = vkd3d_calloc(max_element_count, sizeof(*compiler->output_info))))
-         return VKD3D_ERROR_OUT_OF_MEMORY;
-@@ -10612,6 +10680,9 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, struct
-         spirv_compiler_emit_temps(compiler, program->temp_count);
-     if (program->ssa_count)
-         spirv_compiler_allocate_ssa_register_ids(compiler, program->ssa_count);
-+    if (compiler->shader_type == VKD3D_SHADER_TYPE_COMPUTE)
-+        spirv_compiler_emit_thread_group_size(compiler, &program->thread_group_size);
-+    spirv_compiler_emit_global_flags(compiler, program->global_flags);
-     spirv_compiler_emit_descriptor_declarations(compiler);
-@@ -10624,7 +10695,9 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, struct
-         {
-             uint32_t type_id, struct_id, ptr_type_id, var_id;
--            type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_UINT, 1);
-+            type_id = vkd3d_spirv_get_type_id(builder,
-+                    vkd3d_component_type_from_data_type(parameter_data_type_map[parameter->data_type].type),
-+                    parameter_data_type_map[parameter->data_type].component_count);
-             struct_id = vkd3d_spirv_build_op_type_struct(builder, &type_id, 1);
-             vkd3d_spirv_build_op_decorate(builder, struct_id, SpvDecorationBlock, NULL, 0);
-@@ -10663,6 +10736,9 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, struct
-     compiler->input_control_point_count = program->input_control_point_count;
-     compiler->output_control_point_count = program->output_control_point_count;
-+    if (program->shader_version.type == VKD3D_SHADER_TYPE_HULL && !spirv_compiler_is_opengl_target(compiler))
-+        spirv_compiler_emit_tessellator_domain(compiler, program->tess_domain);
-     if (compiler->shader_type != VKD3D_SHADER_TYPE_HULL)
-         spirv_compiler_emit_shader_signature_outputs(compiler);
-diff --git a/libs/vkd3d/libs/vkd3d-shader/tpf.c b/libs/vkd3d/libs/vkd3d-shader/tpf.c
-index 84f641cc316..fcfe074e61e 100644
---- a/libs/vkd3d/libs/vkd3d-shader/tpf.c
-+++ b/libs/vkd3d/libs/vkd3d-shader/tpf.c
-@@ -23,6 +23,7 @@
- #include "hlsl.h"
- #include "vkd3d_shader_private.h"
-+#include "d3dcommon.h"
- #define SM4_MAX_SRC_COUNT 6
- #define SM4_MAX_DST_COUNT 2
-@@ -616,6 +617,47 @@ enum vkd3d_sm4_shader_data_type
-     VKD3D_SM4_SHADER_DATA_MESSAGE                   = 0x4,
- };
-+enum vkd3d_sm4_stat_field
-+    VKD3D_STAT_UNUSED = 0,
-+struct vkd3d_sm4_stat_field_info
-+    enum vkd3d_sm4_opcode opcode;
-+    enum vkd3d_sm4_stat_field field;
- struct sm4_index_range
- {
-     unsigned int index;
-@@ -632,8 +674,10 @@ struct sm4_index_range_array
- struct vkd3d_sm4_lookup_tables
- {
-     const struct vkd3d_sm4_opcode_info *opcode_info_from_sm4[VKD3D_SM4_OP_COUNT];
-+    const struct vkd3d_sm4_opcode_info *opcode_info_from_vsir[VKD3DSIH_COUNT];
-     const struct vkd3d_sm4_register_type_info *register_type_info_from_sm4[VKD3D_SM4_REGISTER_TYPE_COUNT];
-     const struct vkd3d_sm4_register_type_info *register_type_info_from_vkd3d[VKD3DSPR_COUNT];
-+    const struct vkd3d_sm4_stat_field_info *stat_field_from_sm4[VKD3D_SM4_OP_COUNT];
- };
- struct vkd3d_shader_sm4_parser
-@@ -853,7 +897,7 @@ static void shader_sm4_read_dcl_resource(struct vkd3d_shader_instruction *ins, u
-     }
--    reg_data_type = opcode == VKD3D_SM4_OP_DCL_RESOURCE ? VKD3D_DATA_RESOURCE : VKD3D_DATA_UAV;
-+    reg_data_type = VKD3D_DATA_UNUSED;
-     shader_sm4_read_dst_param(priv, &tokens, end, reg_data_type, &semantic->resource.reg);
-     shader_sm4_set_descriptor_register_range(priv, &semantic->resource.reg.reg, &semantic->resource.range);
-@@ -873,7 +917,7 @@ static void shader_sm4_read_dcl_resource(struct vkd3d_shader_instruction *ins, u
-         }
-     }
--    if (reg_data_type == VKD3D_DATA_UAV)
-+    if (opcode != VKD3D_SM4_OP_DCL_RESOURCE)
-         ins->flags = (opcode_token & VKD3D_SM5_UAV_FLAGS_MASK) >> VKD3D_SM5_UAV_FLAGS_SHIFT;
-     shader_sm4_read_register_space(priv, &tokens, end, &semantic->resource.range.space);
-@@ -915,7 +959,7 @@ static void shader_sm4_read_dcl_sampler(struct vkd3d_shader_instruction *ins, ui
-     ins->flags = (opcode_token & VKD3D_SM4_SAMPLER_MODE_MASK) >> VKD3D_SM4_SAMPLER_MODE_SHIFT;
-     if (ins->flags & ~VKD3D_SM4_SAMPLER_COMPARISON)
-         FIXME("Unhandled sampler mode %#x.\n", ins->flags);
--    shader_sm4_read_src_param(priv, &tokens, end, VKD3D_DATA_SAMPLER, &ins->declaration.sampler.src);
-+    shader_sm4_read_src_param(priv, &tokens, end, VKD3D_DATA_UNUSED, &ins->declaration.sampler.src);
-     shader_sm4_set_descriptor_register_range(priv, &ins->declaration.sampler.src.reg, &ins->declaration.sampler.range);
-     shader_sm4_read_register_space(priv, &tokens, end, &ins->declaration.sampler.range.space);
- }
-@@ -1115,7 +1159,18 @@ static void shader_sm4_read_dcl_input_ps(struct vkd3d_shader_instruction *ins, u
-         struct signature_element *e = vsir_signature_find_element_for_reg(
-                 &priv->p.program->input_signature, dst->reg.idx[dst->reg.idx_count - 1].offset, dst->write_mask);
--        e->interpolation_mode = ins->flags;
-+        if (!e)
-+        {
-+            WARN("No matching signature element for input register %u with mask %#x.\n",
-+                    dst->reg.idx[dst->reg.idx_count - 1].offset, dst->write_mask);
-+            vkd3d_shader_parser_error(&priv->p, VKD3D_SHADER_ERROR_TPF_INVALID_REGISTER_DCL,
-+                    "No matching signature element for input register %u with mask %#x.\n",
-+                    dst->reg.idx[dst->reg.idx_count - 1].offset, dst->write_mask);
-+        }
-+        else
-+        {
-+            e->interpolation_mode = ins->flags;
-+        }
-     }
- }
-@@ -1130,7 +1185,18 @@ static void shader_sm4_read_dcl_input_ps_siv(struct vkd3d_shader_instruction *in
-         struct signature_element *e = vsir_signature_find_element_for_reg(
-                 &priv->p.program->input_signature, dst->reg.idx[dst->reg.idx_count - 1].offset, dst->write_mask);
--        e->interpolation_mode = ins->flags;
-+        if (!e)
-+        {
-+            WARN("No matching signature element for input register %u with mask %#x.\n",
-+                    dst->reg.idx[dst->reg.idx_count - 1].offset, dst->write_mask);
-+            vkd3d_shader_parser_error(&priv->p, VKD3D_SHADER_ERROR_TPF_INVALID_REGISTER_DCL,
-+                    "No matching signature element for input register %u with mask %#x.\n",
-+                    dst->reg.idx[dst->reg.idx_count - 1].offset, dst->write_mask);
-+        }
-+        else
-+        {
-+            e->interpolation_mode = ins->flags;
-+        }
-     }
-     ins->declaration.register_semantic.sysval_semantic = *tokens;
- }
-@@ -1147,9 +1213,10 @@ static void shader_sm4_read_dcl_indexable_temp(struct vkd3d_shader_instruction *
- }
- static void shader_sm4_read_dcl_global_flags(struct vkd3d_shader_instruction *ins, uint32_t opcode,
--        uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv)
-+        uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *sm4)
- {
-     ins->declaration.global_flags = (opcode_token & VKD3D_SM4_GLOBAL_FLAGS_MASK) >> VKD3D_SM4_GLOBAL_FLAGS_SHIFT;
-+    sm4->p.program->global_flags = ins->declaration.global_flags;
- }
- static void shader_sm5_read_fcall(struct vkd3d_shader_instruction *ins, uint32_t opcode, uint32_t opcode_token,
-@@ -1201,6 +1268,7 @@ static void shader_sm5_read_dcl_tessellator_domain(struct vkd3d_shader_instructi
- {
-     ins->declaration.tessellator_domain = (opcode_token & VKD3D_SM5_TESSELLATOR_MASK)
-             >> VKD3D_SM5_TESSELLATOR_SHIFT;
-+    priv->p.program->tess_domain = ins->declaration.tessellator_domain;
- }
- static void shader_sm5_read_dcl_tessellator_partitioning(struct vkd3d_shader_instruction *ins, uint32_t opcode,
-@@ -1224,11 +1292,14 @@ static void shader_sm5_read_dcl_hs_max_tessfactor(struct vkd3d_shader_instructio
- }
- static void shader_sm5_read_dcl_thread_group(struct vkd3d_shader_instruction *ins, uint32_t opcode,
--        uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv)
-+        uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *sm4)
- {
-+    struct vsir_program *program = sm4->p.program;
-     ins->declaration.thread_group_size.x = *tokens++;
-     ins->declaration.thread_group_size.y = *tokens++;
-     ins->declaration.thread_group_size.z = *tokens++;
-+    program->thread_group_size = ins->declaration.thread_group_size;
- }
- static void shader_sm5_read_dcl_uav_raw(struct vkd3d_shader_instruction *ins, uint32_t opcode, uint32_t opcode_token,
-@@ -1237,7 +1308,7 @@ static void shader_sm5_read_dcl_uav_raw(struct vkd3d_shader_instruction *ins, ui
-     struct vkd3d_shader_raw_resource *resource = &ins->declaration.raw_resource;
-     const uint32_t *end = &tokens[token_count];
--    shader_sm4_read_dst_param(priv, &tokens, end, VKD3D_DATA_UAV, &resource->resource.reg);
-+    shader_sm4_read_dst_param(priv, &tokens, end, VKD3D_DATA_UNUSED, &resource->resource.reg);
-     shader_sm4_set_descriptor_register_range(priv, &resource->resource.reg.reg, &resource->resource.range);
-     ins->flags = (opcode_token & VKD3D_SM5_UAV_FLAGS_MASK) >> VKD3D_SM5_UAV_FLAGS_SHIFT;
-     shader_sm4_read_register_space(priv, &tokens, end, &resource->resource.range.space);
-@@ -1249,7 +1320,7 @@ static void shader_sm5_read_dcl_uav_structured(struct vkd3d_shader_instruction *
-     struct vkd3d_shader_structured_resource *resource = &ins->declaration.structured_resource;
-     const uint32_t *end = &tokens[token_count];
--    shader_sm4_read_dst_param(priv, &tokens, end, VKD3D_DATA_UAV, &resource->resource.reg);
-+    shader_sm4_read_dst_param(priv, &tokens, end, VKD3D_DATA_UNUSED, &resource->resource.reg);
-     shader_sm4_set_descriptor_register_range(priv, &resource->resource.reg.reg, &resource->resource.range);
-     ins->flags = (opcode_token & VKD3D_SM5_UAV_FLAGS_MASK) >> VKD3D_SM5_UAV_FLAGS_SHIFT;
-     resource->byte_stride = *tokens++;
-@@ -1286,7 +1357,7 @@ static void shader_sm5_read_dcl_resource_structured(struct vkd3d_shader_instruct
-     struct vkd3d_shader_structured_resource *resource = &ins->declaration.structured_resource;
-     const uint32_t *end = &tokens[token_count];
--    shader_sm4_read_dst_param(priv, &tokens, end, VKD3D_DATA_RESOURCE, &resource->resource.reg);
-+    shader_sm4_read_dst_param(priv, &tokens, end, VKD3D_DATA_UNUSED, &resource->resource.reg);
-     shader_sm4_set_descriptor_register_range(priv, &resource->resource.reg.reg, &resource->resource.range);
-     resource->byte_stride = *tokens++;
-     if (resource->byte_stride % 4)
-@@ -1300,7 +1371,7 @@ static void shader_sm5_read_dcl_resource_raw(struct vkd3d_shader_instruction *in
-     struct vkd3d_shader_raw_resource *resource = &ins->declaration.raw_resource;
-     const uint32_t *end = &tokens[token_count];
--    shader_sm4_read_dst_param(priv, &tokens, end, VKD3D_DATA_RESOURCE, &resource->resource.reg);
-+    shader_sm4_read_dst_param(priv, &tokens, end, VKD3D_DATA_UNUSED, &resource->resource.reg);
-     shader_sm4_set_descriptor_register_range(priv, &resource->resource.reg.reg, &resource->resource.range);
-     shader_sm4_read_register_space(priv, &tokens, end, &resource->resource.range.space);
- }
-@@ -1330,11 +1401,23 @@ static const enum vkd3d_shader_register_precision register_precision_table[] =
- };
--struct tpf_writer
-+struct sm4_stat
-+    uint32_t fields[VKD3D_STAT_COUNT];
-+struct tpf_compiler
- {
-+    /* OBJECTIVE: We want to get rid of this HLSL IR specific field. */
-     struct hlsl_ctx *ctx;
--    struct vkd3d_bytecode_buffer *buffer;
-+    struct vsir_program *program;
-     struct vkd3d_sm4_lookup_tables lookup;
-+    struct sm4_stat *stat;
-+    int result;
-+    struct vkd3d_bytecode_buffer *buffer;
-+    struct dxbc_writer dxbc;
- };
- static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup)
-@@ -1400,8 +1483,8 @@ static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup)
-         {VKD3D_SM4_OP_ISHR,                             VKD3DSIH_ISHR,                             "i",    "ii"},
-         {VKD3D_SM4_OP_ITOF,                             VKD3DSIH_ITOF,                             "f",    "i"},
-         {VKD3D_SM4_OP_LABEL,                            VKD3DSIH_LABEL,                            "",     "O"},
--        {VKD3D_SM4_OP_LD,                               VKD3DSIH_LD,                               "u",    "iR"},
--        {VKD3D_SM4_OP_LD2DMS,                           VKD3DSIH_LD2DMS,                           "u",    "iRi"},
-+        {VKD3D_SM4_OP_LD,                               VKD3DSIH_LD,                               "u",    "i*"},
-+        {VKD3D_SM4_OP_LD2DMS,                           VKD3DSIH_LD2DMS,                           "u",    "i*i"},
-         {VKD3D_SM4_OP_LOG,                              VKD3DSIH_LOG,                              "f",    "f"},
-         {VKD3D_SM4_OP_LOOP,                             VKD3DSIH_LOOP,                             "",     ""},
-         {VKD3D_SM4_OP_LT,                               VKD3DSIH_LTO,                              "u",    "ff"},
-@@ -1417,7 +1500,7 @@ static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup)
-         {VKD3D_SM4_OP_NOP,                              VKD3DSIH_NOP,                              "",     ""},
-         {VKD3D_SM4_OP_NOT,                              VKD3DSIH_NOT,                              "u",    "u"},
-         {VKD3D_SM4_OP_OR,                               VKD3DSIH_OR,                               "u",    "uu"},
--        {VKD3D_SM4_OP_RESINFO,                          VKD3DSIH_RESINFO,                          "f",    "iR"},
-+        {VKD3D_SM4_OP_RESINFO,                          VKD3DSIH_RESINFO,                          "f",    "i*"},
-         {VKD3D_SM4_OP_RET,                              VKD3DSIH_RET,                              "",     ""},
-         {VKD3D_SM4_OP_RETC,                             VKD3DSIH_RETP,                             "",     "u",
-                 shader_sm4_read_conditional_op},
-@@ -1426,12 +1509,12 @@ static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup)
-         {VKD3D_SM4_OP_ROUND_PI,                         VKD3DSIH_ROUND_PI,                         "f",    "f"},
-         {VKD3D_SM4_OP_ROUND_Z,                          VKD3DSIH_ROUND_Z,                          "f",    "f"},
-         {VKD3D_SM4_OP_RSQ,                              VKD3DSIH_RSQ,                              "f",    "f"},
--        {VKD3D_SM4_OP_SAMPLE,                           VKD3DSIH_SAMPLE,                           "u",    "fRS"},
--        {VKD3D_SM4_OP_SAMPLE_C,                         VKD3DSIH_SAMPLE_C,                         "f",    "fRSf"},
--        {VKD3D_SM4_OP_SAMPLE_C_LZ,                      VKD3DSIH_SAMPLE_C_LZ,                      "f",    "fRSf"},
--        {VKD3D_SM4_OP_SAMPLE_LOD,                       VKD3DSIH_SAMPLE_LOD,                       "u",    "fRSf"},
--        {VKD3D_SM4_OP_SAMPLE_GRAD,                      VKD3DSIH_SAMPLE_GRAD,                      "u",    "fRSff"},
--        {VKD3D_SM4_OP_SAMPLE_B,                         VKD3DSIH_SAMPLE_B,                         "u",    "fRSf"},
-+        {VKD3D_SM4_OP_SAMPLE,                           VKD3DSIH_SAMPLE,                           "u",    "f**"},
-+        {VKD3D_SM4_OP_SAMPLE_C,                         VKD3DSIH_SAMPLE_C,                         "f",    "f**f"},
-+        {VKD3D_SM4_OP_SAMPLE_C_LZ,                      VKD3DSIH_SAMPLE_C_LZ,                      "f",    "f**f"},
-+        {VKD3D_SM4_OP_SAMPLE_LOD,                       VKD3DSIH_SAMPLE_LOD,                       "u",    "f**f"},
-+        {VKD3D_SM4_OP_SAMPLE_GRAD,                      VKD3DSIH_SAMPLE_GRAD,                      "u",    "f**ff"},
-+        {VKD3D_SM4_OP_SAMPLE_B,                         VKD3DSIH_SAMPLE_B,                         "u",    "f**f"},
-         {VKD3D_SM4_OP_SQRT,                             VKD3DSIH_SQRT,                             "f",    "f"},
-         {VKD3D_SM4_OP_SWITCH,                           VKD3DSIH_SWITCH,                           "",     "i"},
-         {VKD3D_SM4_OP_SINCOS,                           VKD3DSIH_SINCOS,                           "ff",   "f"},
-@@ -1480,10 +1563,10 @@ static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup)
-                 shader_sm4_read_dcl_indexable_temp},
-         {VKD3D_SM4_OP_DCL_GLOBAL_FLAGS,                 VKD3DSIH_DCL_GLOBAL_FLAGS,                 "",     "",
-                 shader_sm4_read_dcl_global_flags},
--        {VKD3D_SM4_OP_LOD,                              VKD3DSIH_LOD,                              "f",    "fRS"},
--        {VKD3D_SM4_OP_GATHER4,                          VKD3DSIH_GATHER4,                          "u",    "fRS"},
--        {VKD3D_SM4_OP_SAMPLE_POS,                       VKD3DSIH_SAMPLE_POS,                       "f",    "Ru"},
--        {VKD3D_SM4_OP_SAMPLE_INFO,                      VKD3DSIH_SAMPLE_INFO,                      "f",    "R"},
-+        {VKD3D_SM4_OP_LOD,                              VKD3DSIH_LOD,                              "f",    "f**"},
-+        {VKD3D_SM4_OP_GATHER4,                          VKD3DSIH_GATHER4,                          "u",    "f**"},
-+        {VKD3D_SM4_OP_SAMPLE_POS,                       VKD3DSIH_SAMPLE_POS,                       "f",    "*u"},
-+        {VKD3D_SM4_OP_SAMPLE_INFO,                      VKD3DSIH_SAMPLE_INFO,                      "f",    "*"},
-         {VKD3D_SM5_OP_HS_DECLS,                         VKD3DSIH_HS_DECLS,                         "",     ""},
-         {VKD3D_SM5_OP_HS_CONTROL_POINT_PHASE,           VKD3DSIH_HS_CONTROL_POINT_PHASE,           "",     ""},
-         {VKD3D_SM5_OP_HS_FORK_PHASE,                    VKD3DSIH_HS_FORK_PHASE,                    "",     ""},
-@@ -1492,14 +1575,14 @@ static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup)
-         {VKD3D_SM5_OP_CUT_STREAM,                       VKD3DSIH_CUT_STREAM,                       "",     "f"},
-         {VKD3D_SM5_OP_FCALL,                            VKD3DSIH_FCALL,                            "",     "O",
-                 shader_sm5_read_fcall},
--        {VKD3D_SM5_OP_BUFINFO,                          VKD3DSIH_BUFINFO,                          "i",    "U"},
-+        {VKD3D_SM5_OP_BUFINFO,                          VKD3DSIH_BUFINFO,                          "i",    "*"},
-         {VKD3D_SM5_OP_DERIV_RTX_COARSE,                 VKD3DSIH_DSX_COARSE,                       "f",    "f"},
-         {VKD3D_SM5_OP_DERIV_RTX_FINE,                   VKD3DSIH_DSX_FINE,                         "f",    "f"},
-         {VKD3D_SM5_OP_DERIV_RTY_COARSE,                 VKD3DSIH_DSY_COARSE,                       "f",    "f"},
-         {VKD3D_SM5_OP_DERIV_RTY_FINE,                   VKD3DSIH_DSY_FINE,                         "f",    "f"},
--        {VKD3D_SM5_OP_GATHER4_C,                        VKD3DSIH_GATHER4_C,                        "f",    "fRSf"},
--        {VKD3D_SM5_OP_GATHER4_PO,                       VKD3DSIH_GATHER4_PO,                       "f",    "fiRS"},
--        {VKD3D_SM5_OP_GATHER4_PO_C,                     VKD3DSIH_GATHER4_PO_C,                     "f",    "fiRSf"},
-+        {VKD3D_SM5_OP_GATHER4_C,                        VKD3DSIH_GATHER4_C,                        "f",    "f**f"},
-+        {VKD3D_SM5_OP_GATHER4_PO,                       VKD3DSIH_GATHER4_PO,                       "f",    "fi**"},
-+        {VKD3D_SM5_OP_GATHER4_PO_C,                     VKD3DSIH_GATHER4_PO_C,                     "f",    "fi**f"},
-         {VKD3D_SM5_OP_RCP,                              VKD3DSIH_RCP,                              "f",    "f"},
-         {VKD3D_SM5_OP_F32TOF16,                         VKD3DSIH_F32TOF16,                         "u",    "f"},
-         {VKD3D_SM5_OP_F16TOF32,                         VKD3DSIH_F16TOF32,                         "f",    "u"},
-@@ -1551,33 +1634,33 @@ static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup)
-                 shader_sm5_read_dcl_resource_raw},
-                 shader_sm5_read_dcl_resource_structured},
--        {VKD3D_SM5_OP_LD_UAV_TYPED,                     VKD3DSIH_LD_UAV_TYPED,                     "u",    "iU"},
--        {VKD3D_SM5_OP_STORE_UAV_TYPED,                  VKD3DSIH_STORE_UAV_TYPED,                  "U",    "iu"},
--        {VKD3D_SM5_OP_LD_RAW,                           VKD3DSIH_LD_RAW,                           "u",    "iU"},
--        {VKD3D_SM5_OP_STORE_RAW,                        VKD3DSIH_STORE_RAW,                        "U",    "uu"},
--        {VKD3D_SM5_OP_LD_STRUCTURED,                    VKD3DSIH_LD_STRUCTURED,                    "u",    "iiR"},
--        {VKD3D_SM5_OP_STORE_STRUCTURED,                 VKD3DSIH_STORE_STRUCTURED,                 "U",    "iiu"},
--        {VKD3D_SM5_OP_ATOMIC_AND,                       VKD3DSIH_ATOMIC_AND,                       "U",    "iu"},
--        {VKD3D_SM5_OP_ATOMIC_OR,                        VKD3DSIH_ATOMIC_OR,                        "U",    "iu"},
--        {VKD3D_SM5_OP_ATOMIC_XOR,                       VKD3DSIH_ATOMIC_XOR,                       "U",    "iu"},
--        {VKD3D_SM5_OP_ATOMIC_CMP_STORE,                 VKD3DSIH_ATOMIC_CMP_STORE,                 "U",    "iuu"},
--        {VKD3D_SM5_OP_ATOMIC_IADD,                      VKD3DSIH_ATOMIC_IADD,                      "U",    "ii"},
--        {VKD3D_SM5_OP_ATOMIC_IMAX,                      VKD3DSIH_ATOMIC_IMAX,                      "U",    "ii"},
--        {VKD3D_SM5_OP_ATOMIC_IMIN,                      VKD3DSIH_ATOMIC_IMIN,                      "U",    "ii"},
--        {VKD3D_SM5_OP_ATOMIC_UMAX,                      VKD3DSIH_ATOMIC_UMAX,                      "U",    "iu"},
--        {VKD3D_SM5_OP_ATOMIC_UMIN,                      VKD3DSIH_ATOMIC_UMIN,                      "U",    "iu"},
--        {VKD3D_SM5_OP_IMM_ATOMIC_ALLOC,                 VKD3DSIH_IMM_ATOMIC_ALLOC,                 "u",    "U"},
--        {VKD3D_SM5_OP_IMM_ATOMIC_CONSUME,               VKD3DSIH_IMM_ATOMIC_CONSUME,               "u",    "U"},
--        {VKD3D_SM5_OP_IMM_ATOMIC_IADD,                  VKD3DSIH_IMM_ATOMIC_IADD,                  "uU",   "ii"},
--        {VKD3D_SM5_OP_IMM_ATOMIC_AND,                   VKD3DSIH_IMM_ATOMIC_AND,                   "uU",   "iu"},
--        {VKD3D_SM5_OP_IMM_ATOMIC_OR,                    VKD3DSIH_IMM_ATOMIC_OR,                    "uU",   "iu"},
--        {VKD3D_SM5_OP_IMM_ATOMIC_XOR,                   VKD3DSIH_IMM_ATOMIC_XOR,                   "uU",   "iu"},
--        {VKD3D_SM5_OP_IMM_ATOMIC_EXCH,                  VKD3DSIH_IMM_ATOMIC_EXCH,                  "uU",   "iu"},
--        {VKD3D_SM5_OP_IMM_ATOMIC_CMP_EXCH,              VKD3DSIH_IMM_ATOMIC_CMP_EXCH,              "uU",   "iuu"},
--        {VKD3D_SM5_OP_IMM_ATOMIC_IMAX,                  VKD3DSIH_IMM_ATOMIC_IMAX,                  "iU",   "ii"},
--        {VKD3D_SM5_OP_IMM_ATOMIC_IMIN,                  VKD3DSIH_IMM_ATOMIC_IMIN,                  "iU",   "ii"},
--        {VKD3D_SM5_OP_IMM_ATOMIC_UMAX,                  VKD3DSIH_IMM_ATOMIC_UMAX,                  "uU",   "iu"},
--        {VKD3D_SM5_OP_IMM_ATOMIC_UMIN,                  VKD3DSIH_IMM_ATOMIC_UMIN,                  "uU",   "iu"},
-+        {VKD3D_SM5_OP_LD_UAV_TYPED,                     VKD3DSIH_LD_UAV_TYPED,                     "u",    "i*"},
-+        {VKD3D_SM5_OP_STORE_UAV_TYPED,                  VKD3DSIH_STORE_UAV_TYPED,                  "*",    "iu"},
-+        {VKD3D_SM5_OP_LD_RAW,                           VKD3DSIH_LD_RAW,                           "u",    "i*"},
-+        {VKD3D_SM5_OP_STORE_RAW,                        VKD3DSIH_STORE_RAW,                        "*",    "uu"},
-+        {VKD3D_SM5_OP_LD_STRUCTURED,                    VKD3DSIH_LD_STRUCTURED,                    "u",    "ii*"},
-+        {VKD3D_SM5_OP_STORE_STRUCTURED,                 VKD3DSIH_STORE_STRUCTURED,                 "*",    "iiu"},
-+        {VKD3D_SM5_OP_ATOMIC_AND,                       VKD3DSIH_ATOMIC_AND,                       "*",    "iu"},
-+        {VKD3D_SM5_OP_ATOMIC_OR,                        VKD3DSIH_ATOMIC_OR,                        "*",    "iu"},
-+        {VKD3D_SM5_OP_ATOMIC_XOR,                       VKD3DSIH_ATOMIC_XOR,                       "*",    "iu"},
-+        {VKD3D_SM5_OP_ATOMIC_CMP_STORE,                 VKD3DSIH_ATOMIC_CMP_STORE,                 "*",    "iuu"},
-+        {VKD3D_SM5_OP_ATOMIC_IADD,                      VKD3DSIH_ATOMIC_IADD,                      "*",    "ii"},
-+        {VKD3D_SM5_OP_ATOMIC_IMAX,                      VKD3DSIH_ATOMIC_IMAX,                      "*",    "ii"},
-+        {VKD3D_SM5_OP_ATOMIC_IMIN,                      VKD3DSIH_ATOMIC_IMIN,                      "*",    "ii"},
-+        {VKD3D_SM5_OP_ATOMIC_UMAX,                      VKD3DSIH_ATOMIC_UMAX,                      "*",    "iu"},
-+        {VKD3D_SM5_OP_ATOMIC_UMIN,                      VKD3DSIH_ATOMIC_UMIN,                      "*",    "iu"},
-+        {VKD3D_SM5_OP_IMM_ATOMIC_ALLOC,                 VKD3DSIH_IMM_ATOMIC_ALLOC,                 "u",    "*"},
-+        {VKD3D_SM5_OP_IMM_ATOMIC_CONSUME,               VKD3DSIH_IMM_ATOMIC_CONSUME,               "u",    "*"},
-+        {VKD3D_SM5_OP_IMM_ATOMIC_IADD,                  VKD3DSIH_IMM_ATOMIC_IADD,                  "u*",   "ii"},
-+        {VKD3D_SM5_OP_IMM_ATOMIC_AND,                   VKD3DSIH_IMM_ATOMIC_AND,                   "u*",   "iu"},
-+        {VKD3D_SM5_OP_IMM_ATOMIC_OR,                    VKD3DSIH_IMM_ATOMIC_OR,                    "u*",   "iu"},
-+        {VKD3D_SM5_OP_IMM_ATOMIC_XOR,                   VKD3DSIH_IMM_ATOMIC_XOR,                   "u*",   "iu"},
-+        {VKD3D_SM5_OP_IMM_ATOMIC_EXCH,                  VKD3DSIH_IMM_ATOMIC_EXCH,                  "u*",   "iu"},
-+        {VKD3D_SM5_OP_IMM_ATOMIC_CMP_EXCH,              VKD3DSIH_IMM_ATOMIC_CMP_EXCH,              "u*",   "iuu"},
-+        {VKD3D_SM5_OP_IMM_ATOMIC_IMAX,                  VKD3DSIH_IMM_ATOMIC_IMAX,                  "i*",   "ii"},
-+        {VKD3D_SM5_OP_IMM_ATOMIC_IMIN,                  VKD3DSIH_IMM_ATOMIC_IMIN,                  "i*",   "ii"},
-+        {VKD3D_SM5_OP_IMM_ATOMIC_UMAX,                  VKD3DSIH_IMM_ATOMIC_UMAX,                  "u*",   "iu"},
-+        {VKD3D_SM5_OP_IMM_ATOMIC_UMIN,                  VKD3DSIH_IMM_ATOMIC_UMIN,                  "u*",   "iu"},
-         {VKD3D_SM5_OP_SYNC,                             VKD3DSIH_SYNC,                             "",     "",
-                 shader_sm5_read_sync},
-         {VKD3D_SM5_OP_DADD,                             VKD3DSIH_DADD,                             "d",    "dd"},
-@@ -1604,21 +1687,21 @@ static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup)
-         {VKD3D_SM5_OP_DTOU,                             VKD3DSIH_DTOU,                             "u",    "d"},
-         {VKD3D_SM5_OP_ITOD,                             VKD3DSIH_ITOD,                             "d",    "i"},
-         {VKD3D_SM5_OP_UTOD,                             VKD3DSIH_UTOD,                             "d",    "u"},
--        {VKD3D_SM5_OP_GATHER4_S,                        VKD3DSIH_GATHER4_S,                        "uu",   "fRS"},
--        {VKD3D_SM5_OP_GATHER4_C_S,                      VKD3DSIH_GATHER4_C_S,                      "fu",   "fRSf"},
--        {VKD3D_SM5_OP_GATHER4_PO_S,                     VKD3DSIH_GATHER4_PO_S,                     "fu",   "fiRS"},
--        {VKD3D_SM5_OP_GATHER4_PO_C_S,                   VKD3DSIH_GATHER4_PO_C_S,                   "fu",   "fiRSf"},
--        {VKD3D_SM5_OP_LD_S,                             VKD3DSIH_LD_S,                             "uu",   "iR"},
--        {VKD3D_SM5_OP_LD2DMS_S,                         VKD3DSIH_LD2DMS_S,                         "uu",   "iRi"},
-+        {VKD3D_SM5_OP_GATHER4_S,                        VKD3DSIH_GATHER4_S,                        "uu",   "f**"},
-+        {VKD3D_SM5_OP_GATHER4_C_S,                      VKD3DSIH_GATHER4_C_S,                      "fu",   "f**f"},
-+        {VKD3D_SM5_OP_GATHER4_PO_S,                     VKD3DSIH_GATHER4_PO_S,                     "fu",   "fi**"},
-+        {VKD3D_SM5_OP_GATHER4_PO_C_S,                   VKD3DSIH_GATHER4_PO_C_S,                   "fu",   "fi**f"},
-+        {VKD3D_SM5_OP_LD_S,                             VKD3DSIH_LD_S,                             "uu",   "i*"},
-+        {VKD3D_SM5_OP_LD2DMS_S,                         VKD3DSIH_LD2DMS_S,                         "uu",   "i*i"},
-         {VKD3D_SM5_OP_LD_UAV_TYPED_S,                   VKD3DSIH_LD_UAV_TYPED_S,                   "uu",   "iU"},
-         {VKD3D_SM5_OP_LD_RAW_S,                         VKD3DSIH_LD_RAW_S,                         "uu",   "iU"},
--        {VKD3D_SM5_OP_LD_STRUCTURED_S,                  VKD3DSIH_LD_STRUCTURED_S,                  "uu",   "iiR"},
--        {VKD3D_SM5_OP_SAMPLE_LOD_S,                     VKD3DSIH_SAMPLE_LOD_S,                     "uu",   "fRSf"},
--        {VKD3D_SM5_OP_SAMPLE_C_LZ_S,                    VKD3DSIH_SAMPLE_C_LZ_S,                    "fu",   "fRSf"},
--        {VKD3D_SM5_OP_SAMPLE_CL_S,                      VKD3DSIH_SAMPLE_CL_S,                      "uu",   "fRSf"},
--        {VKD3D_SM5_OP_SAMPLE_B_CL_S,                    VKD3DSIH_SAMPLE_B_CL_S,                    "uu",   "fRSff"},
--        {VKD3D_SM5_OP_SAMPLE_GRAD_CL_S,                 VKD3DSIH_SAMPLE_GRAD_CL_S,                 "uu",   "fRSfff"},
--        {VKD3D_SM5_OP_SAMPLE_C_CL_S,                    VKD3DSIH_SAMPLE_C_CL_S,                    "fu",   "fRSff"},
-+        {VKD3D_SM5_OP_LD_STRUCTURED_S,                  VKD3DSIH_LD_STRUCTURED_S,                  "uu",   "ii*"},
-+        {VKD3D_SM5_OP_SAMPLE_LOD_S,                     VKD3DSIH_SAMPLE_LOD_S,                     "uu",   "f**f"},
-+        {VKD3D_SM5_OP_SAMPLE_C_LZ_S,                    VKD3DSIH_SAMPLE_C_LZ_S,                    "fu",   "f**f"},
-+        {VKD3D_SM5_OP_SAMPLE_CL_S,                      VKD3DSIH_SAMPLE_CL_S,                      "uu",   "f**f"},
-+        {VKD3D_SM5_OP_SAMPLE_B_CL_S,                    VKD3DSIH_SAMPLE_B_CL_S,                    "uu",   "f**ff"},
-+        {VKD3D_SM5_OP_SAMPLE_GRAD_CL_S,                 VKD3DSIH_SAMPLE_GRAD_CL_S,                 "uu",   "f**fff"},
-+        {VKD3D_SM5_OP_SAMPLE_C_CL_S,                    VKD3DSIH_SAMPLE_C_CL_S,                    "fu",   "f**ff"},
-     };
-@@ -1637,7 +1720,7 @@ static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup)
-         {VKD3D_SM4_RT_PRIMID,                  VKD3DSPR_PRIMID,          VKD3D_SM4_SWIZZLE_NONE},
-         {VKD3D_SM4_RT_DEPTHOUT,                VKD3DSPR_DEPTHOUT,        VKD3D_SM4_SWIZZLE_VEC4},
-         {VKD3D_SM4_RT_NULL,                    VKD3DSPR_NULL,            VKD3D_SM4_SWIZZLE_INVALID},
-         {VKD3D_SM4_RT_OMASK,                   VKD3DSPR_SAMPLEMASK,      VKD3D_SM4_SWIZZLE_VEC4},
-         {VKD3D_SM5_RT_STREAM,                  VKD3DSPR_STREAM,          VKD3D_SM4_SWIZZLE_VEC4},
-@@ -1662,6 +1745,161 @@ static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup)
-     };
-+    static const struct vkd3d_sm4_stat_field_info stat_field_table[] =
-+    {
-+        {VKD3D_SM4_OP_MOV,      VKD3D_STAT_MOV},
-+        {VKD3D_SM4_OP_MOVC,     VKD3D_STAT_MOVC},
-+        {VKD3D_SM5_OP_DMOV,     VKD3D_STAT_MOV},
-+        {VKD3D_SM5_OP_DMOVC,    VKD3D_STAT_MOVC},
-+        {VKD3D_SM4_OP_ITOF,     VKD3D_STAT_CONV},
-+        {VKD3D_SM4_OP_FTOI,     VKD3D_STAT_CONV},
-+        {VKD3D_SM4_OP_FTOU,     VKD3D_STAT_CONV},
-+        {VKD3D_SM4_OP_UTOF,     VKD3D_STAT_CONV},
-+        {VKD3D_SM5_OP_DTOU,     VKD3D_STAT_CONV},
-+        {VKD3D_SM5_OP_UTOD,     VKD3D_STAT_CONV},
-+        {VKD3D_SM5_OP_DTOF,     VKD3D_STAT_CONV},
-+        {VKD3D_SM5_OP_FTOD,     VKD3D_STAT_CONV},
-+        {VKD3D_SM5_OP_DTOI,     VKD3D_STAT_CONV},
-+        {VKD3D_SM5_OP_ITOD,     VKD3D_STAT_CONV},
-+        {VKD3D_SM5_OP_F32TOF16, VKD3D_STAT_CONV},
-+        {VKD3D_SM5_OP_F16TOF32, VKD3D_STAT_CONV},
-+        {VKD3D_SM4_OP_ADD,      VKD3D_STAT_FLOAT},
-+        {VKD3D_SM4_OP_DIV,      VKD3D_STAT_FLOAT},
-+        {VKD3D_SM4_OP_DP2,      VKD3D_STAT_FLOAT},
-+        {VKD3D_SM4_OP_DP3,      VKD3D_STAT_FLOAT},
-+        {VKD3D_SM4_OP_DP4,      VKD3D_STAT_FLOAT},
-+        {VKD3D_SM4_OP_EQ,       VKD3D_STAT_FLOAT},
-+        {VKD3D_SM4_OP_EXP,      VKD3D_STAT_FLOAT},
-+        {VKD3D_SM4_OP_FRC,      VKD3D_STAT_FLOAT},
-+        {VKD3D_SM4_OP_GE,       VKD3D_STAT_FLOAT},
-+        {VKD3D_SM4_OP_LT,       VKD3D_STAT_FLOAT},
-+        {VKD3D_SM4_OP_MAD,      VKD3D_STAT_FLOAT},
-+        {VKD3D_SM4_OP_MIN,      VKD3D_STAT_FLOAT},
-+        {VKD3D_SM4_OP_MAX,      VKD3D_STAT_FLOAT},
-+        {VKD3D_SM4_OP_MUL,      VKD3D_STAT_FLOAT},
-+        {VKD3D_SM4_OP_NE,       VKD3D_STAT_FLOAT},
-+        {VKD3D_SM4_OP_RSQ,      VKD3D_STAT_FLOAT},
-+        {VKD3D_SM4_OP_SQRT,     VKD3D_STAT_FLOAT},
-+        {VKD3D_SM5_OP_RCP,      VKD3D_STAT_FLOAT},
-+        {VKD3D_SM5_OP_DADD,     VKD3D_STAT_FLOAT},
-+        {VKD3D_SM5_OP_DMAX,     VKD3D_STAT_FLOAT},
-+        {VKD3D_SM5_OP_DMIN,     VKD3D_STAT_FLOAT},
-+        {VKD3D_SM5_OP_DMUL,     VKD3D_STAT_FLOAT},
-+        {VKD3D_SM5_OP_DEQ,      VKD3D_STAT_FLOAT},
-+        {VKD3D_SM5_OP_DGE,      VKD3D_STAT_FLOAT},
-+        {VKD3D_SM5_OP_DLT,      VKD3D_STAT_FLOAT},
-+        {VKD3D_SM5_OP_DNE,      VKD3D_STAT_FLOAT},
-+        {VKD3D_SM5_OP_DDIV,     VKD3D_STAT_FLOAT},
-+        {VKD3D_SM5_OP_DFMA,     VKD3D_STAT_FLOAT},
-+        {VKD3D_SM5_OP_DRCP,     VKD3D_STAT_FLOAT},
-+        {VKD3D_SM4_OP_IADD, VKD3D_STAT_INT},
-+        {VKD3D_SM4_OP_IEQ,  VKD3D_STAT_INT},
-+        {VKD3D_SM4_OP_IGE,  VKD3D_STAT_INT},
-+        {VKD3D_SM4_OP_ILT,  VKD3D_STAT_INT},
-+        {VKD3D_SM4_OP_IMAD, VKD3D_STAT_INT},
-+        {VKD3D_SM4_OP_IMAX, VKD3D_STAT_INT},
-+        {VKD3D_SM4_OP_IMIN, VKD3D_STAT_INT},
-+        {VKD3D_SM4_OP_IMUL, VKD3D_STAT_INT},
-+        {VKD3D_SM4_OP_INE,  VKD3D_STAT_INT},
-+        {VKD3D_SM4_OP_INEG, VKD3D_STAT_INT},
-+        {VKD3D_SM4_OP_ISHL, VKD3D_STAT_INT},
-+        {VKD3D_SM4_OP_ISHR, VKD3D_STAT_INT},
-+        {VKD3D_SM4_OP_ITOF, VKD3D_STAT_INT},
-+        {VKD3D_SM4_OP_ULT,  VKD3D_STAT_UINT},
-+        {VKD3D_SM4_OP_UGE,  VKD3D_STAT_UINT},
-+        {VKD3D_SM4_OP_EMIT,        VKD3D_STAT_EMIT},
-+        {VKD3D_SM4_OP_CUT,         VKD3D_STAT_CUT},
-+        {VKD3D_SM4_OP_SAMPLE,           VKD3D_STAT_SAMPLE},
-+        {VKD3D_SM4_OP_SAMPLE_C,         VKD3D_STAT_SAMPLE_C},
-+        {VKD3D_SM4_OP_SAMPLE_B,         VKD3D_STAT_SAMPLE_BIAS},
-+        {VKD3D_SM4_OP_GATHER4,          VKD3D_STAT_GATHER},
-+        {VKD3D_SM5_OP_GATHER4_PO,       VKD3D_STAT_GATHER},
-+        {VKD3D_SM4_OP_LOD,              VKD3D_STAT_LOD},
-+        {VKD3D_SM4_OP_LD,              VKD3D_STAT_LOAD},
-+        {VKD3D_SM4_OP_LD2DMS,          VKD3D_STAT_LOAD},
-+        {VKD3D_SM5_OP_LD_RAW,          VKD3D_STAT_LOAD},
-+        {VKD3D_SM5_OP_LD_S,            VKD3D_STAT_LOAD},
-+        {VKD3D_SM5_OP_LD2DMS_S,        VKD3D_STAT_LOAD},
-+        {VKD3D_SM5_OP_LD_RAW_S,        VKD3D_STAT_LOAD},
-+        {VKD3D_SM5_OP_STORE_RAW,       VKD3D_STAT_STORE},
-+        {VKD3D_SM5_OP_ATOMIC_AND,          VKD3D_STAT_ATOMIC},
-+        {VKD3D_SM5_OP_ATOMIC_OR,           VKD3D_STAT_ATOMIC},
-+        {VKD3D_SM5_OP_ATOMIC_XOR,          VKD3D_STAT_ATOMIC},
-+        {VKD3D_SM5_OP_ATOMIC_IADD,         VKD3D_STAT_ATOMIC},
-+        {VKD3D_SM5_OP_ATOMIC_IMAX,         VKD3D_STAT_ATOMIC},
-+        {VKD3D_SM5_OP_ATOMIC_IMIN,         VKD3D_STAT_ATOMIC},
-+        {VKD3D_SM5_OP_ATOMIC_UMAX,         VKD3D_STAT_ATOMIC},
-+        {VKD3D_SM5_OP_ATOMIC_UMIN,         VKD3D_STAT_ATOMIC},
-+    };
-     memset(lookup, 0, sizeof(*lookup));
-     for (i = 0; i < ARRAY_SIZE(opcode_table); ++i)
-@@ -1669,6 +1907,7 @@ static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup)
-         const struct vkd3d_sm4_opcode_info *info = &opcode_table[i];
-         lookup->opcode_info_from_sm4[info->opcode] = info;
-+        lookup->opcode_info_from_vsir[info->handler_idx] = info;
-     }
-     for (i = 0; i < ARRAY_SIZE(register_type_table); ++i)
-@@ -1678,13 +1917,13 @@ static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup)
-         lookup->register_type_info_from_sm4[info->sm4_type] = info;
-         lookup->register_type_info_from_vkd3d[info->vkd3d_type] = info;
-     }
--static void tpf_writer_init(struct tpf_writer *tpf, struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer)
--    tpf->ctx = ctx;
--    tpf->buffer = buffer;
--    init_sm4_lookup_tables(&tpf->lookup);
-+    for (i = 0; i < ARRAY_SIZE(stat_field_table); ++i)
-+    {
-+        const struct vkd3d_sm4_stat_field_info *info = &stat_field_table[i];
-+        lookup->stat_field_from_sm4[info->opcode] = info;
-+    }
- }
- static const struct vkd3d_sm4_opcode_info *get_info_from_sm4_opcode(
-@@ -1695,6 +1934,24 @@ static const struct vkd3d_sm4_opcode_info *get_info_from_sm4_opcode(
-     return lookup->opcode_info_from_sm4[sm4_opcode];
- }
-+static const struct vkd3d_sm4_opcode_info *get_info_from_vsir_opcode(
-+        const struct vkd3d_sm4_lookup_tables *lookup, enum vkd3d_shader_opcode vsir_opcode)
-+    if (vsir_opcode >= VKD3DSIH_COUNT)
-+        return NULL;
-+    return lookup->opcode_info_from_vsir[vsir_opcode];
-+static unsigned int opcode_info_get_dst_count(const struct vkd3d_sm4_opcode_info *info)
-+    return strnlen(info->dst_info, SM4_MAX_DST_COUNT);
-+static unsigned int opcode_info_get_src_count(const struct vkd3d_sm4_opcode_info *info)
-+    return strnlen(info->src_info, SM4_MAX_SRC_COUNT);
- static const struct vkd3d_sm4_register_type_info *get_info_from_sm4_register_type(
-         const struct vkd3d_sm4_lookup_tables *lookup, enum vkd3d_sm4_register_type sm4_type)
- {
-@@ -1721,6 +1978,16 @@ static enum vkd3d_sm4_swizzle_type vkd3d_sm4_get_default_swizzle_type(
-     return register_type_info->default_src_swizzle_type;
- }
-+static enum vkd3d_sm4_stat_field get_stat_field_from_sm4_opcode(
-+        const struct vkd3d_sm4_lookup_tables *lookup, enum vkd3d_sm4_opcode sm4_opcode)
-+    const struct vkd3d_sm4_stat_field_info *field_info;
-+    if (sm4_opcode >= VKD3D_SM4_OP_COUNT || !(field_info = lookup->stat_field_from_sm4[sm4_opcode]))
-+        return VKD3D_STAT_UNUSED;
-+    return field_info->field;
- static enum vkd3d_data_type map_data_type(char t)
- {
-     switch (t)
-@@ -1735,12 +2002,8 @@ static enum vkd3d_data_type map_data_type(char t)
-             return VKD3D_DATA_UINT;
-         case 'O':
-             return VKD3D_DATA_OPAQUE;
--        case 'R':
--            return VKD3D_DATA_RESOURCE;
--        case 'S':
--            return VKD3D_DATA_SAMPLER;
--        case 'U':
--            return VKD3D_DATA_UAV;
-+        case '*':
-+            return VKD3D_DATA_UNUSED;
-         default:
-             ERR("Invalid data type '%c'.\n", t);
-             return VKD3D_DATA_FLOAT;
-@@ -1973,7 +2236,7 @@ static bool shader_sm4_read_param(struct vkd3d_shader_sm4_parser *priv, const ui
-     return true;
- }
--static bool shader_sm4_is_scalar_register(const struct vkd3d_shader_register *reg)
-+bool shader_sm4_is_scalar_register(const struct vkd3d_shader_register *reg)
- {
-     switch (reg->type)
-     {
-@@ -2411,8 +2674,8 @@ static void shader_sm4_read_instruction(struct vkd3d_shader_sm4_parser *sm4, str
-     ins->raw = false;
-     ins->structured = false;
-     ins->predicate = NULL;
--    ins->dst_count = strnlen(opcode_info->dst_info, SM4_MAX_DST_COUNT);
--    ins->src_count = strnlen(opcode_info->src_info, SM4_MAX_SRC_COUNT);
-+    ins->dst_count = opcode_info_get_dst_count(opcode_info);
-+    ins->src_count = opcode_info_get_src_count(opcode_info);
-     ins->src = src_params = vsir_program_get_src_params(program, ins->src_count);
-     if (!src_params && ins->src_count)
-     {
-@@ -2553,7 +2816,8 @@ static bool shader_sm4_init(struct vkd3d_shader_sm4_parser *sm4, struct vsir_pro
-     version.minor = VKD3D_SM4_VERSION_MINOR(version_token);
-     /* Estimate instruction count to avoid reallocation in most shaders. */
--    if (!vsir_program_init(program, compile_info, &version, token_count / 7u + 20))
-+    if (!vsir_program_init(program, compile_info,
-+            &version, token_count / 7u + 20, VSIR_CF_STRUCTURED, VSIR_NOT_NORMALISED))
-         return false;
-     vkd3d_shader_parser_init(&sm4->p, program, message_context, compile_info->source_name);
-     sm4->ptr = sm4->start;
-@@ -2670,6 +2934,21 @@ int tpf_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t con
-     if (program->shader_version.type == VKD3D_SHADER_TYPE_HULL)
-         uninvert_used_masks(&program->patch_constant_signature);
-+    switch (program->shader_version.type)
-+    {
-+        case VKD3D_SHADER_TYPE_HULL:
-+        case VKD3D_SHADER_TYPE_DOMAIN:
-+            break;
-+        default:
-+            if (program->patch_constant_signature.element_count != 0)
-+            {
-+                WARN("The patch constant signature only makes sense for Hull and Domain Shaders, ignoring it.\n");
-+                shader_signature_cleanup(&program->patch_constant_signature);
-+            }
-+            break;
-+    }
-     if (!shader_sm4_parser_validate_signature(&sm4, &program->input_signature,
-             sm4.input_register_masks, "Input")
-             || !shader_sm4_parser_validate_signature(&sm4, &program->output_signature,
-@@ -2706,12 +2985,8 @@ int tpf_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t con
-             && !sm4.has_control_point_phase && !sm4.p.failed)
-         shader_sm4_validate_default_phase_index_ranges(&sm4);
--    if (!sm4.p.failed)
--        vkd3d_shader_parser_validate(&sm4.p, config_flags);
-     if (sm4.p.failed)
-     {
--        WARN("Failed to parse shader.\n");
-         vsir_program_cleanup(program);
-         return VKD3D_ERROR_INVALID_SHADER;
-     }
-@@ -2719,24 +2994,10 @@ int tpf_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t con
-     return VKD3D_OK;
- }
--static void write_sm4_block(const struct tpf_writer *tpf, const struct hlsl_block *block);
--static bool type_is_integer(const struct hlsl_type *type)
--    switch (type->e.numeric.type)
--    {
--        case HLSL_TYPE_BOOL:
--        case HLSL_TYPE_INT:
--        case HLSL_TYPE_UINT:
--            return true;
--        default:
--            return false;
--    }
-+static void write_sm4_block(struct tpf_compiler *tpf, const struct hlsl_block *block);
--bool hlsl_sm4_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic,
--        bool output, enum vkd3d_shader_register_type *type, bool *has_idx)
-+bool sm4_register_from_semantic_name(const struct vkd3d_shader_version *version,
-+        const char *semantic_name, bool output, enum vkd3d_shader_register_type *type, bool *has_idx)
- {
-     unsigned int i;
-@@ -2750,12 +3011,19 @@ bool hlsl_sm4_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_sem
-     }
-     register_table[] =
-     {
--        {"sv_dispatchthreadid", false, VKD3D_SHADER_TYPE_COMPUTE,  VKD3DSPR_THREADID,      false},
--        {"sv_groupid",          false, VKD3D_SHADER_TYPE_COMPUTE,  VKD3DSPR_THREADGROUPID, false},
--        {"sv_groupthreadid",    false, VKD3D_SHADER_TYPE_COMPUTE,  VKD3DSPR_LOCALTHREADID, false},
-+        {"sv_dispatchthreadid", false, VKD3D_SHADER_TYPE_COMPUTE,  VKD3DSPR_THREADID,         false},
-+        {"sv_groupid",          false, VKD3D_SHADER_TYPE_COMPUTE,  VKD3DSPR_THREADGROUPID,    false},
-+        {"sv_groupindex",       false, VKD3D_SHADER_TYPE_COMPUTE,  VKD3DSPR_LOCALTHREADINDEX, false},
-+        {"sv_groupthreadid",    false, VKD3D_SHADER_TYPE_COMPUTE,  VKD3DSPR_LOCALTHREADID,    false},
-+        {"sv_domainlocation",   false, VKD3D_SHADER_TYPE_DOMAIN,   VKD3DSPR_TESSCOORD,     false},
-+        {"sv_primitiveid",      false, VKD3D_SHADER_TYPE_DOMAIN,   VKD3DSPR_PRIMID,        false},
-         {"sv_primitiveid",      false, VKD3D_SHADER_TYPE_GEOMETRY, VKD3DSPR_PRIMID,        false},
-+        {"sv_outputcontrolpointid", false, VKD3D_SHADER_TYPE_HULL, VKD3DSPR_OUTPOINTID,    false},
-+        {"sv_primitiveid",          false, VKD3D_SHADER_TYPE_HULL, VKD3DSPR_PRIMID,        false},
-         /* Put sv_target in this table, instead of letting it fall through to
-          * default varying allocation, so that the register index matches the
-          * usage index. */
-@@ -2768,9 +3036,9 @@ bool hlsl_sm4_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_sem
-     for (i = 0; i < ARRAY_SIZE(register_table); ++i)
-     {
--        if (!ascii_strcasecmp(semantic->name, register_table[i].semantic)
-+        if (!ascii_strcasecmp(semantic_name, register_table[i].semantic)
-                 && output == register_table[i].output
--                && ctx->profile->type == register_table[i].shader_type)
-+                && version->type == register_table[i].shader_type)
-         {
-             if (type)
-                 *type = register_table[i].type;
-@@ -2782,8 +3050,57 @@ bool hlsl_sm4_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_sem
-     return false;
- }
--bool hlsl_sm4_usage_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic,
--        bool output, D3D_NAME *usage)
-+static bool get_tessfactor_sysval_semantic(enum vkd3d_shader_sysval_semantic *semantic,
-+        enum vkd3d_tessellator_domain domain, uint32_t index)
-+    switch (domain)
-+    {
-+            if (index == 0)
-+                *semantic = VKD3D_SHADER_SV_TESS_FACTOR_LINEDEN;
-+            else if (index == 1)
-+                *semantic = VKD3D_SHADER_SV_TESS_FACTOR_LINEDET;
-+            else
-+                return false;
-+            return true;
-+            *semantic = VKD3D_SHADER_SV_TESS_FACTOR_TRIEDGE;
-+            return index < 3;
-+            *semantic = VKD3D_SHADER_SV_TESS_FACTOR_QUADEDGE;
-+            return index < 4;
-+        default:
-+            vkd3d_unreachable();
-+    }
-+static bool get_insidetessfactor_sysval_semantic(enum vkd3d_shader_sysval_semantic *semantic,
-+        enum vkd3d_tessellator_domain domain, uint32_t index)
-+    switch (domain)
-+    {
-+            return false;
-+            *semantic = VKD3D_SHADER_SV_TESS_FACTOR_TRIINT;
-+            return index == 0;
-+            *semantic = VKD3D_SHADER_SV_TESS_FACTOR_QUADINT;
-+            return index < 2;
-+        default:
-+            vkd3d_unreachable();
-+    }
-+bool sm4_sysval_semantic_from_semantic_name(enum vkd3d_shader_sysval_semantic *sysval_semantic,
-+        const struct vkd3d_shader_version *version, bool semantic_compat_mapping, enum vkd3d_tessellator_domain domain,
-+        const char *semantic_name, unsigned int semantic_idx, bool output, bool is_patch_constant_func)
- {
-     unsigned int i;
-@@ -2792,54 +3109,104 @@ bool hlsl_sm4_usage_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semant
-         const char *name;
-         bool output;
-         enum vkd3d_shader_type shader_type;
--        D3D_NAME usage;
-+        enum vkd3d_shader_sysval_semantic semantic;
-     }
-     semantics[] =
-     {
-         {"sv_dispatchthreadid",         false, VKD3D_SHADER_TYPE_COMPUTE,   ~0u},
-         {"sv_groupid",                  false, VKD3D_SHADER_TYPE_COMPUTE,   ~0u},
-+        {"sv_groupindex",               false, VKD3D_SHADER_TYPE_COMPUTE,   ~0u},
-         {"sv_groupthreadid",            false, VKD3D_SHADER_TYPE_COMPUTE,   ~0u},
--        {"position",                    false, VKD3D_SHADER_TYPE_GEOMETRY,  D3D_NAME_POSITION},
--        {"sv_position",                 false, VKD3D_SHADER_TYPE_GEOMETRY,  D3D_NAME_POSITION},
--        {"sv_primitiveid",              false, VKD3D_SHADER_TYPE_GEOMETRY,  D3D_NAME_PRIMITIVE_ID},
--        {"position",                    true,  VKD3D_SHADER_TYPE_GEOMETRY,  D3D_NAME_POSITION},
--        {"sv_position",                 true,  VKD3D_SHADER_TYPE_GEOMETRY,  D3D_NAME_POSITION},
--        {"sv_primitiveid",              true,  VKD3D_SHADER_TYPE_GEOMETRY,  D3D_NAME_PRIMITIVE_ID},
--        {"position",                    false, VKD3D_SHADER_TYPE_PIXEL,     D3D_NAME_POSITION},
--        {"sv_position",                 false, VKD3D_SHADER_TYPE_PIXEL,     D3D_NAME_POSITION},
--        {"sv_primitiveid",              false, VKD3D_SHADER_TYPE_PIXEL,     D3D_NAME_PRIMITIVE_ID},
--        {"sv_isfrontface",              false, VKD3D_SHADER_TYPE_PIXEL,     D3D_NAME_IS_FRONT_FACE},
--        {"sv_rendertargetarrayindex",   false, VKD3D_SHADER_TYPE_PIXEL,     D3D_NAME_RENDER_TARGET_ARRAY_INDEX},
--        {"sv_viewportarrayindex",       false, VKD3D_SHADER_TYPE_PIXEL,     D3D_NAME_VIEWPORT_ARRAY_INDEX},
--        {"color",                       true,  VKD3D_SHADER_TYPE_PIXEL,     D3D_NAME_TARGET},
--        {"depth",                       true,  VKD3D_SHADER_TYPE_PIXEL,     D3D_NAME_DEPTH},
--        {"sv_target",                   true,  VKD3D_SHADER_TYPE_PIXEL,     D3D_NAME_TARGET},
--        {"sv_depth",                    true,  VKD3D_SHADER_TYPE_PIXEL,     D3D_NAME_DEPTH},
--        {"sv_coverage",                 true,  VKD3D_SHADER_TYPE_PIXEL,     D3D_NAME_COVERAGE},
--        {"sv_position",                 false, VKD3D_SHADER_TYPE_VERTEX,    D3D_NAME_UNDEFINED},
--        {"sv_vertexid",                 false, VKD3D_SHADER_TYPE_VERTEX,    D3D_NAME_VERTEX_ID},
--        {"sv_instanceid",               false, VKD3D_SHADER_TYPE_VERTEX,    D3D_NAME_INSTANCE_ID},
--        {"position",                    true,  VKD3D_SHADER_TYPE_VERTEX,    D3D_NAME_POSITION},
--        {"sv_position",                 true,  VKD3D_SHADER_TYPE_VERTEX,    D3D_NAME_POSITION},
--        {"sv_rendertargetarrayindex",   true,  VKD3D_SHADER_TYPE_VERTEX,    D3D_NAME_RENDER_TARGET_ARRAY_INDEX},
--        {"sv_viewportarrayindex",       true,  VKD3D_SHADER_TYPE_VERTEX,    D3D_NAME_VIEWPORT_ARRAY_INDEX},
-+        {"sv_domainlocation",           false, VKD3D_SHADER_TYPE_DOMAIN,    ~0u},
-+        {"sv_position",                 false, VKD3D_SHADER_TYPE_DOMAIN,    VKD3D_SHADER_SV_NONE},
-+        {"sv_primitiveid",              false, VKD3D_SHADER_TYPE_DOMAIN,    ~0u},
-+        {"sv_position",                 true,  VKD3D_SHADER_TYPE_DOMAIN,    VKD3D_SHADER_SV_POSITION},
-+        {"position",                    false, VKD3D_SHADER_TYPE_GEOMETRY,  VKD3D_SHADER_SV_POSITION},
-+        {"sv_position",                 false, VKD3D_SHADER_TYPE_GEOMETRY,  VKD3D_SHADER_SV_POSITION},
-+        {"sv_primitiveid",              false, VKD3D_SHADER_TYPE_GEOMETRY,  VKD3D_SHADER_SV_PRIMITIVE_ID},
-+        {"position",                    true,  VKD3D_SHADER_TYPE_GEOMETRY,  VKD3D_SHADER_SV_POSITION},
-+        {"sv_position",                 true,  VKD3D_SHADER_TYPE_GEOMETRY,  VKD3D_SHADER_SV_POSITION},
-+        {"sv_primitiveid",              true,  VKD3D_SHADER_TYPE_GEOMETRY,  VKD3D_SHADER_SV_PRIMITIVE_ID},
-+        {"sv_outputcontrolpointid",     false, VKD3D_SHADER_TYPE_HULL,      ~0u},
-+        {"sv_position",                 false, VKD3D_SHADER_TYPE_HULL,      ~0u},
-+        {"sv_primitiveid",              false, VKD3D_SHADER_TYPE_HULL,      ~0u},
-+        {"sv_position",                 true,  VKD3D_SHADER_TYPE_HULL,      VKD3D_SHADER_SV_POSITION},
-+        {"position",                    false, VKD3D_SHADER_TYPE_PIXEL,     VKD3D_SHADER_SV_POSITION},
-+        {"sv_position",                 false, VKD3D_SHADER_TYPE_PIXEL,     VKD3D_SHADER_SV_POSITION},
-+        {"sv_primitiveid",              false, VKD3D_SHADER_TYPE_PIXEL,     VKD3D_SHADER_SV_PRIMITIVE_ID},
-+        {"sv_isfrontface",              false, VKD3D_SHADER_TYPE_PIXEL,     VKD3D_SHADER_SV_IS_FRONT_FACE},
-+        {"sv_rendertargetarrayindex",   false, VKD3D_SHADER_TYPE_PIXEL,     VKD3D_SHADER_SV_RENDER_TARGET_ARRAY_INDEX},
-+        {"sv_viewportarrayindex",       false, VKD3D_SHADER_TYPE_PIXEL,     VKD3D_SHADER_SV_VIEWPORT_ARRAY_INDEX},
-+        {"sv_sampleindex",              false, VKD3D_SHADER_TYPE_PIXEL,     VKD3D_SHADER_SV_SAMPLE_INDEX},
-+        {"color",                       true,  VKD3D_SHADER_TYPE_PIXEL,     VKD3D_SHADER_SV_TARGET},
-+        {"depth",                       true,  VKD3D_SHADER_TYPE_PIXEL,     VKD3D_SHADER_SV_DEPTH},
-+        {"sv_target",                   true,  VKD3D_SHADER_TYPE_PIXEL,     VKD3D_SHADER_SV_TARGET},
-+        {"sv_depth",                    true,  VKD3D_SHADER_TYPE_PIXEL,     VKD3D_SHADER_SV_DEPTH},
-+        {"sv_coverage",                 true,  VKD3D_SHADER_TYPE_PIXEL,     VKD3D_SHADER_SV_COVERAGE},
-+        {"sv_position",                 false, VKD3D_SHADER_TYPE_VERTEX,    VKD3D_SHADER_SV_NONE},
-+        {"sv_vertexid",                 false, VKD3D_SHADER_TYPE_VERTEX,    VKD3D_SHADER_SV_VERTEX_ID},
-+        {"sv_instanceid",               false, VKD3D_SHADER_TYPE_VERTEX,    VKD3D_SHADER_SV_INSTANCE_ID},
-+        {"position",                    true,  VKD3D_SHADER_TYPE_VERTEX,    VKD3D_SHADER_SV_POSITION},
-+        {"sv_position",                 true,  VKD3D_SHADER_TYPE_VERTEX,    VKD3D_SHADER_SV_POSITION},
-+        {"sv_rendertargetarrayindex",   true,  VKD3D_SHADER_TYPE_VERTEX,    VKD3D_SHADER_SV_RENDER_TARGET_ARRAY_INDEX},
-+        {"sv_viewportarrayindex",       true,  VKD3D_SHADER_TYPE_VERTEX,    VKD3D_SHADER_SV_VIEWPORT_ARRAY_INDEX},
-     };
--    bool needs_compat_mapping = ascii_strncasecmp(semantic->name, "sv_", 3);
-+    bool needs_compat_mapping = ascii_strncasecmp(semantic_name, "sv_", 3);
-+    if (is_patch_constant_func)
-+    {
-+        if (output)
-+        {
-+            if (!ascii_strcasecmp(semantic_name, "sv_tessfactor"))
-+                return get_tessfactor_sysval_semantic(sysval_semantic, domain, semantic_idx);
-+            if (!ascii_strcasecmp(semantic_name, "sv_insidetessfactor"))
-+                return get_insidetessfactor_sysval_semantic(sysval_semantic, domain, semantic_idx);
-+            if (!ascii_strcasecmp(semantic_name, "sv_position"))
-+            {
-+                *sysval_semantic = VKD3D_SHADER_SV_NONE;
-+                return true;
-+            }
-+        }
-+        else
-+        {
-+            if (!ascii_strcasecmp(semantic_name, "sv_primitiveid")
-+                    || !ascii_strcasecmp(semantic_name, "sv_position"))
-+            {
-+                *sysval_semantic = ~0u;
-+                return true;
-+            }
-+            return false;
-+        }
-+    }
-+    else if (version->type == VKD3D_SHADER_TYPE_DOMAIN)
-+    {
-+        if (!output)
-+        {
-+            if (!ascii_strcasecmp(semantic_name, "sv_tessfactor"))
-+                return get_tessfactor_sysval_semantic(sysval_semantic, domain, semantic_idx);
-+            if (!ascii_strcasecmp(semantic_name, "sv_insidetessfactor"))
-+                return get_insidetessfactor_sysval_semantic(sysval_semantic, domain, semantic_idx);
-+        }
-+    }
-     for (i = 0; i < ARRAY_SIZE(semantics); ++i)
-     {
--        if (!ascii_strcasecmp(semantic->name, semantics[i].name)
-+        if (!ascii_strcasecmp(semantic_name, semantics[i].name)
-                 && output == semantics[i].output
--                && (ctx->semantic_compat_mapping == needs_compat_mapping || !needs_compat_mapping)
--                && ctx->profile->type == semantics[i].shader_type)
-+                && (semantic_compat_mapping == needs_compat_mapping || !needs_compat_mapping)
-+                && version->type == semantics[i].shader_type)
-         {
--            *usage = semantics[i].usage;
-+            *sysval_semantic = semantics[i].semantic;
-             return true;
-         }
-     }
-@@ -2847,7 +3214,7 @@ bool hlsl_sm4_usage_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semant
-     if (!needs_compat_mapping)
-         return false;
--    *usage = D3D_NAME_UNDEFINED;
-+    *sysval_semantic = VKD3D_SHADER_SV_NONE;
-     return true;
- }
-@@ -2865,110 +3232,66 @@ static void add_section(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc,
-         ctx->result = buffer->status;
- }
--static void write_sm4_signature(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc, bool output)
-+static int signature_element_pointer_compare(const void *x, const void *y)
-+    const struct signature_element *e = *(const struct signature_element **)x;
-+    const struct signature_element *f = *(const struct signature_element **)y;
-+    int ret;
-+    if ((ret = vkd3d_u32_compare(e->register_index, f->register_index)))
-+        return ret;
-+    return vkd3d_u32_compare(e->mask, f->mask);
-+static void tpf_write_signature(struct tpf_compiler *tpf, const struct shader_signature *signature, uint32_t tag)
- {
-+    bool output = tag == TAG_OSGN || (tag == TAG_PCSG
-+            && tpf->program->shader_version.type == VKD3D_SHADER_TYPE_HULL);
-+    const struct signature_element **sorted_elements;
-     struct vkd3d_bytecode_buffer buffer = {0};
--    struct vkd3d_string_buffer *string;
--    const struct hlsl_ir_var *var;
--    size_t count_position;
-     unsigned int i;
--    bool ret;
--    count_position = put_u32(&buffer, 0);
-+    put_u32(&buffer, signature->element_count);
-     put_u32(&buffer, 8); /* unknown */
--    LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry)
--    {
--        unsigned int width = (1u << var->data_type->dimx) - 1, use_mask;
--        uint32_t usage_idx, reg_idx;
--        D3D_NAME usage;
--        bool has_idx;
--        if ((output && !var->is_output_semantic) || (!output && !var->is_input_semantic))
--            continue;
--        ret = hlsl_sm4_usage_from_semantic(ctx, &var->semantic, output, &usage);
--        VKD3D_ASSERT(ret);
--        if (usage == ~0u)
--            continue;
--        usage_idx = var->semantic.index;
-+    if (!(sorted_elements = vkd3d_calloc(signature->element_count, sizeof(*sorted_elements))))
-+        return;
-+    for (i = 0; i < signature->element_count; ++i)
-+        sorted_elements[i] = &signature->elements[i];
-+    qsort(sorted_elements, signature->element_count, sizeof(*sorted_elements), signature_element_pointer_compare);
--        if (hlsl_sm4_register_from_semantic(ctx, &var->semantic, output, NULL, &has_idx))
--        {
--            reg_idx = has_idx ? var->semantic.index : ~0u;
--        }
--        else
--        {
--            VKD3D_ASSERT(var->regs[HLSL_REGSET_NUMERIC].allocated);
--            reg_idx = var->regs[HLSL_REGSET_NUMERIC].id;
--        }
-+    for (i = 0; i < signature->element_count; ++i)
-+    {
-+        const struct signature_element *element = sorted_elements[i];
-+        enum vkd3d_shader_sysval_semantic sysval;
-+        uint32_t used_mask = element->used_mask;
--        use_mask = width; /* FIXME: accurately report use mask */
-         if (output)
--            use_mask = 0xf ^ use_mask;
-+            used_mask = 0xf ^ used_mask;
--        /* Special pixel shader semantics (TARGET, DEPTH, COVERAGE). */
--        if (usage >= 64)
--            usage = 0;
-+        sysval = element->sysval_semantic;
-+        if (sysval >= VKD3D_SHADER_SV_TARGET)
-+            sysval = VKD3D_SHADER_SV_NONE;
-         put_u32(&buffer, 0); /* name */
--        put_u32(&buffer, usage_idx);
--        put_u32(&buffer, usage);
--        switch (var->data_type->e.numeric.type)
--        {
--            case HLSL_TYPE_FLOAT:
--            case HLSL_TYPE_HALF:
--                put_u32(&buffer, D3D_REGISTER_COMPONENT_FLOAT32);
--                break;
--            case HLSL_TYPE_INT:
--                put_u32(&buffer, D3D_REGISTER_COMPONENT_SINT32);
--                break;
--            case HLSL_TYPE_BOOL:
--            case HLSL_TYPE_UINT:
--                put_u32(&buffer, D3D_REGISTER_COMPONENT_UINT32);
--                break;
--            default:
--                if ((string = hlsl_type_to_string(ctx, var->data_type)))
--                    hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE,
--                            "Invalid data type %s for semantic variable %s.", string->buffer, var->name);
--                hlsl_release_string_buffer(ctx, string);
--                put_u32(&buffer, D3D_REGISTER_COMPONENT_UNKNOWN);
--        }
--        put_u32(&buffer, reg_idx);
--        put_u32(&buffer, vkd3d_make_u16(width, use_mask));
-+        put_u32(&buffer, element->semantic_index);
-+        put_u32(&buffer, sysval);
-+        put_u32(&buffer, element->component_type);
-+        put_u32(&buffer, element->register_index);
-+        put_u32(&buffer, vkd3d_make_u16(element->mask, used_mask));
-     }
--    i = 0;
--    LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry)
-+    for (i = 0; i < signature->element_count; ++i)
-     {
--        const char *semantic = var->semantic.name;
-+        const struct signature_element *element = sorted_elements[i];
-         size_t string_offset;
--        D3D_NAME usage;
--        if ((output && !var->is_output_semantic) || (!output && !var->is_input_semantic))
--            continue;
--        hlsl_sm4_usage_from_semantic(ctx, &var->semantic, output, &usage);
--        if (usage == ~0u)
--            continue;
--        if (usage == D3D_NAME_TARGET && !ascii_strcasecmp(semantic, "color"))
--            string_offset = put_string(&buffer, "SV_Target");
--        else if (usage == D3D_NAME_DEPTH && !ascii_strcasecmp(semantic, "depth"))
--            string_offset = put_string(&buffer, "SV_Depth");
--        else if (usage == D3D_NAME_POSITION && !ascii_strcasecmp(semantic, "position"))
--            string_offset = put_string(&buffer, "SV_Position");
--        else
--            string_offset = put_string(&buffer, semantic);
--        set_u32(&buffer, (2 + i++ * 6) * sizeof(uint32_t), string_offset);
-+        string_offset = put_string(&buffer, element->semantic_name);
-+        set_u32(&buffer, (2 + i * 6) * sizeof(uint32_t), string_offset);
-     }
--    set_u32(&buffer, count_position, i);
--    add_section(ctx, dxbc, output ? TAG_OSGN : TAG_ISGN, &buffer);
-+    add_section(tpf->ctx, &tpf->dxbc, tag, &buffer);
-+    vkd3d_free(sorted_elements);
- }
- static D3D_SHADER_VARIABLE_CLASS sm4_class(const struct hlsl_type *type)
-@@ -2990,6 +3313,7 @@ static D3D_SHADER_VARIABLE_CLASS sm4_class(const struct hlsl_type *type)
-+        case HLSL_CLASS_ERROR:
-         case HLSL_CLASS_STRUCT:
-         case HLSL_CLASS_PASS:
-@@ -3008,6 +3332,7 @@ static D3D_SHADER_VARIABLE_CLASS sm4_class(const struct hlsl_type *type)
-         case HLSL_CLASS_HULL_SHADER:
-         case HLSL_CLASS_BLEND_STATE:
-         case HLSL_CLASS_NULL:
-             break;
-     }
-@@ -3123,24 +3448,30 @@ static D3D_SHADER_INPUT_TYPE sm4_resource_type(const struct hlsl_type *type)
-     vkd3d_unreachable();
- }
--static D3D_RESOURCE_RETURN_TYPE sm4_resource_format(const struct hlsl_type *type)
-+static enum vkd3d_sm4_data_type sm4_data_type(const struct hlsl_type *type)
- {
--    switch (type->e.resource.format->e.numeric.type)
-+    const struct hlsl_type *format = type->e.resource.format;
-+    switch (format->e.numeric.type)
-     {
-         case HLSL_TYPE_DOUBLE:
--            return D3D_RETURN_TYPE_DOUBLE;
-+            return VKD3D_SM4_DATA_DOUBLE;
-         case HLSL_TYPE_FLOAT:
-         case HLSL_TYPE_HALF:
--            return D3D_RETURN_TYPE_FLOAT;
-+            if (format->modifiers & HLSL_MODIFIER_UNORM)
-+                return VKD3D_SM4_DATA_UNORM;
-+            if (format->modifiers & HLSL_MODIFIER_SNORM)
-+                return VKD3D_SM4_DATA_SNORM;
-+            return VKD3D_SM4_DATA_FLOAT;
-         case HLSL_TYPE_INT:
--            return D3D_RETURN_TYPE_SINT;
-+            return VKD3D_SM4_DATA_INT;
-             break;
-         case HLSL_TYPE_BOOL:
-         case HLSL_TYPE_UINT:
--            return D3D_RETURN_TYPE_UINT;
-+            return VKD3D_SM4_DATA_UINT;
-         default:
-             vkd3d_unreachable();
-@@ -3170,6 +3501,7 @@ static D3D_SRV_DIMENSION sm4_rdef_resource_dimension(const struct hlsl_type *typ
-             return D3D_SRV_DIMENSION_BUFFER;
-         default:
-@@ -3398,6 +3730,48 @@ static struct extern_resource *sm4_get_extern_resources(struct hlsl_ctx *ctx, un
-     return extern_resources;
- }
-+/* For some reason, for matrices, values from default value initializers end up in different
-+ * components than from regular initializers. Default value initializers fill the matrix in
-+ * vertical reading order (left-to-right top-to-bottom) instead of regular reading order
-+ * (top-to-bottom left-to-right), so they have to be adjusted.
-+ * An exception is that the order of matrix initializers for function parameters are row-major
-+ * (top-to-bottom left-to-right). */
-+static unsigned int get_component_index_from_default_initializer_index(struct hlsl_type *type, unsigned int index)
-+    unsigned int element_comp_count, element, x, y, i;
-+    unsigned int base = 0;
-+    switch (type->class)
-+    {
-+        case HLSL_CLASS_MATRIX:
-+            x = index / type->dimy;
-+            y = index % type->dimy;
-+            return y * type->dimx + x;
-+        case HLSL_CLASS_ARRAY:
-+            element_comp_count = hlsl_type_component_count(type->e.array.type);
-+            element = index / element_comp_count;
-+            base = element * element_comp_count;
-+            return base + get_component_index_from_default_initializer_index(type->e.array.type, index - base);
-+        case HLSL_CLASS_STRUCT:
-+            for (i = 0; i < type->e.record.field_count; ++i)
-+            {
-+                struct hlsl_type *field_type = type->e.record.fields[i].type;
-+                element_comp_count = hlsl_type_component_count(field_type);
-+                if (index - base < element_comp_count)
-+                    return base + get_component_index_from_default_initializer_index(field_type, index - base);
-+                base += element_comp_count;
-+            }
-+            break;
-+        default:
-+            return index;
-+    }
-+    vkd3d_unreachable();
- static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc)
- {
-     uint32_t binding_desc_size = (hlsl_version_ge(ctx, 5, 1) ? 10 : 8) * sizeof(uint32_t);
-@@ -3471,7 +3845,7 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc)
-         {
-             unsigned int dimx = resource->component_type->e.resource.format->dimx;
--            put_u32(&buffer, sm4_resource_format(resource->component_type));
-+            put_u32(&buffer, sm4_data_type(resource->component_type));
-             put_u32(&buffer, sm4_rdef_resource_dimension(resource->component_type));
-             put_u32(&buffer, ~0u); /* FIXME: multisample count */
-             flags |= (dimx - 1) << VKD3D_SM4_SIF_TEXTURE_COMPONENTS_SHIFT;
-@@ -3552,7 +3926,7 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc)
-             {
-                 uint32_t flags = 0;
--                if (var->last_read)
-+                if (var->is_read)
-                     flags |= D3D_SVF_USED;
-                 put_u32(&buffer, 0); /* name */
-@@ -3598,7 +3972,7 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc)
-                     for (k = 0; k < comp_count; ++k)
-                     {
-                         struct hlsl_type *comp_type = hlsl_type_get_component_type(ctx, var->data_type, k);
--                        unsigned int comp_offset;
-+                        unsigned int comp_offset, comp_index;
-                         enum hlsl_regset regset;
-                         if (comp_type->class == HLSL_CLASS_STRING)
-@@ -3608,7 +3982,8 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc)
-                             continue;
-                         }
--                        comp_offset = hlsl_type_get_component_offset(ctx, var->data_type, k, &regset);
-+                        comp_index = get_component_index_from_default_initializer_index(var->data_type, k);
-+                        comp_offset = hlsl_type_get_component_offset(ctx, var->data_type, comp_index, &regset);
-                         if (regset == HLSL_REGSET_NUMERIC)
-                         {
-                             if (comp_type->e.numeric.type == HLSL_TYPE_DOUBLE)
-@@ -3655,6 +4030,7 @@ static enum vkd3d_sm4_resource_type sm4_resource_dimension(const struct hlsl_typ
-             return VKD3D_SM4_RESOURCE_BUFFER;
-         default:
-@@ -3779,11 +4155,13 @@ static void sm4_numeric_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_s
-         *writemask = hlsl_combine_writemasks(var->regs[HLSL_REGSET_NUMERIC].writemask, *writemask);
- }
--static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_register *reg,
-+static void sm4_register_from_deref(const struct tpf_compiler *tpf, struct vkd3d_shader_register *reg,
-         uint32_t *writemask, const struct hlsl_deref *deref, struct sm4_instruction *sm4_instr)
- {
--    const struct hlsl_type *data_type = hlsl_deref_get_type(ctx, deref);
-+    const struct vkd3d_shader_version *version = &tpf->program->shader_version;
-+    const struct hlsl_type *data_type = hlsl_deref_get_type(tpf->ctx, deref);
-     const struct hlsl_ir_var *var = deref->var;
-+    struct hlsl_ctx *ctx = tpf->ctx;
-     if (var->is_uniform)
-     {
-@@ -3793,7 +4171,7 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_re
-         {
-             reg->type = VKD3DSPR_RESOURCE;
-             reg->dimension = VSIR_DIMENSION_VEC4;
--            if (hlsl_version_ge(ctx, 5, 1))
-+            if (vkd3d_shader_ver_ge(version, 5, 1))
-             {
-                 reg->idx[0].offset = var->regs[HLSL_REGSET_TEXTURES].id;
-                 reg->idx[1].offset = var->regs[HLSL_REGSET_TEXTURES].index; /* FIXME: array index */
-@@ -3812,7 +4190,7 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_re
-         {
-             reg->type = VKD3DSPR_UAV;
-             reg->dimension = VSIR_DIMENSION_VEC4;
--            if (hlsl_version_ge(ctx, 5, 1))
-+            if (vkd3d_shader_ver_ge(version, 5, 1))
-             {
-                 reg->idx[0].offset = var->regs[HLSL_REGSET_UAVS].id;
-                 reg->idx[1].offset = var->regs[HLSL_REGSET_UAVS].index; /* FIXME: array index */
-@@ -3831,7 +4209,7 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_re
-         {
-             reg->type = VKD3DSPR_SAMPLER;
-             reg->dimension = VSIR_DIMENSION_NONE;
--            if (hlsl_version_ge(ctx, 5, 1))
-+            if (vkd3d_shader_ver_ge(version, 5, 1))
-             {
-                 reg->idx[0].offset = var->regs[HLSL_REGSET_SAMPLERS].id;
-                 reg->idx[1].offset = var->regs[HLSL_REGSET_SAMPLERS].index; /* FIXME: array index */
-@@ -3853,7 +4231,7 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_re
-             VKD3D_ASSERT(data_type->class <= HLSL_CLASS_VECTOR);
-             reg->type = VKD3DSPR_CONSTBUFFER;
-             reg->dimension = VSIR_DIMENSION_VEC4;
--            if (hlsl_version_ge(ctx, 5, 1))
-+            if (vkd3d_shader_ver_ge(version, 5, 1))
-             {
-                 reg->idx[0].offset = var->buffer->reg.id;
-                 reg->idx[1].offset = var->buffer->reg.index; /* FIXME: array index */
-@@ -3873,7 +4251,7 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_re
-     {
-         bool has_idx;
--        if (hlsl_sm4_register_from_semantic(ctx, &var->semantic, false, &reg->type, &has_idx))
-+        if (sm4_register_from_semantic_name(version, var->semantic.name, false, &reg->type, &has_idx))
-         {
-             unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref);
-@@ -3883,7 +4261,10 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_re
-                 reg->idx_count = 1;
-             }
--            reg->dimension = VSIR_DIMENSION_VEC4;
-+            if (shader_sm4_is_scalar_register(reg))
-+                reg->dimension = VSIR_DIMENSION_SCALAR;
-+            else
-+                reg->dimension = VSIR_DIMENSION_VEC4;
-             *writemask = ((1u << data_type->dimx) - 1) << (offset % 4);
-         }
-         else
-@@ -3891,7 +4272,11 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_re
-             struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref);
-             VKD3D_ASSERT(hlsl_reg.allocated);
--            reg->type = VKD3DSPR_INPUT;
-+            if (version->type == VKD3D_SHADER_TYPE_DOMAIN)
-+                reg->type = VKD3DSPR_PATCHCONST;
-+            else
-+                reg->type = VKD3DSPR_INPUT;
-             reg->dimension = VSIR_DIMENSION_VEC4;
-             reg->idx[0].offset = hlsl_reg.id;
-             reg->idx_count = 1;
-@@ -3902,7 +4287,7 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_re
-     {
-         bool has_idx;
--        if (hlsl_sm4_register_from_semantic(ctx, &var->semantic, true, &reg->type, &has_idx))
-+        if (sm4_register_from_semantic_name(version, var->semantic.name, true, &reg->type, &has_idx))
-         {
-             unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref);
-@@ -3912,7 +4297,7 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_re
-                 reg->idx_count = 1;
-             }
--            if (reg->type == VKD3DSPR_DEPTHOUT)
-+            if (shader_sm4_is_scalar_register(reg))
-                 reg->dimension = VSIR_DIMENSION_SCALAR;
-             else
-                 reg->dimension = VSIR_DIMENSION_VEC4;
-@@ -3938,13 +4323,13 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_re
-     }
- }
--static void sm4_src_from_deref(const struct tpf_writer *tpf, struct vkd3d_shader_src_param *src,
-+static void sm4_src_from_deref(const struct tpf_compiler *tpf, struct vkd3d_shader_src_param *src,
-         const struct hlsl_deref *deref, unsigned int map_writemask, struct sm4_instruction *sm4_instr)
- {
-     unsigned int hlsl_swizzle;
-     uint32_t writemask;
--    sm4_register_from_deref(tpf->ctx, &src->reg, &writemask, deref, sm4_instr);
-+    sm4_register_from_deref(tpf, &src->reg, &writemask, deref, sm4_instr);
-     if (vkd3d_sm4_get_default_swizzle_type(&tpf->lookup, src->reg.type) == VKD3D_SM4_SWIZZLE_VEC4)
-     {
-         hlsl_swizzle = hlsl_map_swizzle(hlsl_swizzle_from_writemask(writemask), map_writemask);
-@@ -3982,7 +4367,7 @@ static void sm4_src_from_constant_value(struct vkd3d_shader_src_param *src,
-     }
- }
--static void sm4_src_from_node(const struct tpf_writer *tpf, struct vkd3d_shader_src_param *src,
-+static void sm4_src_from_node(const struct tpf_compiler *tpf, struct vkd3d_shader_src_param *src,
-         const struct hlsl_ir_node *instr, uint32_t map_writemask)
- {
-     unsigned int hlsl_swizzle;
-@@ -4018,7 +4403,7 @@ static unsigned int sm4_get_index_addressing_from_reg(const struct vkd3d_shader_
-     return 0;
- }
--static uint32_t sm4_encode_register(const struct tpf_writer *tpf, const struct vkd3d_shader_register *reg,
-+static uint32_t sm4_encode_register(const struct tpf_compiler *tpf, const struct vkd3d_shader_register *reg,
-         enum vkd3d_sm4_swizzle_type sm4_swizzle_type, uint32_t sm4_swizzle)
- {
-     const struct vkd3d_sm4_register_type_info *register_type_info;
-@@ -4078,7 +4463,7 @@ static uint32_t sm4_encode_register(const struct tpf_writer *tpf, const struct v
-     return token;
- }
--static void sm4_write_register_index(const struct tpf_writer *tpf, const struct vkd3d_shader_register *reg,
-+static void sm4_write_register_index(const struct tpf_compiler *tpf, const struct vkd3d_shader_register *reg,
-         unsigned int j)
- {
-     unsigned int addressing = sm4_get_index_addressing_from_reg(reg, j);
-@@ -4108,7 +4493,7 @@ static void sm4_write_register_index(const struct tpf_writer *tpf, const struct
-     }
- }
--static void sm4_write_dst_register(const struct tpf_writer *tpf, const struct vkd3d_shader_dst_param *dst)
-+static void sm4_write_dst_register(const struct tpf_compiler *tpf, const struct vkd3d_shader_dst_param *dst)
- {
-     struct vkd3d_bytecode_buffer *buffer = tpf->buffer;
-     uint32_t token = 0;
-@@ -4121,7 +4506,7 @@ static void sm4_write_dst_register(const struct tpf_writer *tpf, const struct vk
-         sm4_write_register_index(tpf, &dst->reg, j);
- }
--static void sm4_write_src_register(const struct tpf_writer *tpf, const struct vkd3d_shader_src_param *src)
-+static void sm4_write_src_register(const struct tpf_compiler *tpf, const struct vkd3d_shader_src_param *src)
- {
-     struct vkd3d_bytecode_buffer *buffer = tpf->buffer;
-     uint32_t token = 0, mod_token = 0;
-@@ -4182,10 +4567,55 @@ static void sm4_write_src_register(const struct tpf_writer *tpf, const struct vk
-     }
- }
--static void write_sm4_instruction(const struct tpf_writer *tpf, const struct sm4_instruction *instr)
-+static void sm4_update_stat_counters(const struct tpf_compiler *tpf, const struct sm4_instruction *instr)
-+    enum vkd3d_shader_type shader_type = tpf->program->shader_version.type;
-+    enum vkd3d_sm4_stat_field stat_field;
-+    uint32_t opcode;
-+    ++tpf->stat->fields[VKD3D_STAT_INSTR_COUNT];
-+    opcode = instr->opcode & VKD3D_SM4_OPCODE_MASK;
-+    stat_field = get_stat_field_from_sm4_opcode(&tpf->lookup, opcode);
-+    switch (opcode)
-+    {
-+        case VKD3D_SM4_OP_DCL_TEMPS:
-+            tpf->stat->fields[stat_field] = max(tpf->stat->fields[stat_field], instr->idx[0]);
-+            break;
-+            tpf->stat->fields[stat_field] = (instr->opcode & VKD3D_SM4_PRIMITIVE_TYPE_MASK)
-+                    >> VKD3D_SM4_PRIMITIVE_TYPE_SHIFT;
-+            break;
-+        case VKD3D_SM4_OP_DCL_VERTICES_OUT:
-+        case VKD3D_SM5_OP_DCL_GS_INSTANCES:
-+            tpf->stat->fields[stat_field] = instr->idx[0];
-+            break;
-+            tpf->stat->fields[stat_field] = (instr->opcode & VKD3D_SM5_TESSELLATOR_MASK) >> VKD3D_SM5_TESSELLATOR_SHIFT;
-+            break;
-+            if ((shader_type == VKD3D_SHADER_TYPE_HULL && opcode == VKD3D_SM5_OP_DCL_OUTPUT_CONTROL_POINT_COUNT)
-+                    || (shader_type == VKD3D_SHADER_TYPE_DOMAIN
-+                            && opcode == VKD3D_SM5_OP_DCL_INPUT_CONTROL_POINT_COUNT))
-+            {
-+                tpf->stat->fields[stat_field] = (instr->opcode & VKD3D_SM5_CONTROL_POINT_COUNT_MASK)
-+                        >> VKD3D_SM5_CONTROL_POINT_COUNT_SHIFT;
-+            }
-+            break;
-+        default:
-+            ++tpf->stat->fields[stat_field];
-+    }
-+static void write_sm4_instruction(const struct tpf_compiler *tpf, const struct sm4_instruction *instr)
- {
--    struct vkd3d_bytecode_buffer *buffer = tpf->buffer;
-     uint32_t token = instr->opcode | instr->extra_bits;
-+    struct vkd3d_bytecode_buffer *buffer = tpf->buffer;
-     unsigned int size, i, j;
-     size_t token_position;
-@@ -4218,6 +4648,8 @@ static void write_sm4_instruction(const struct tpf_writer *tpf, const struct sm4
-     size = (bytecode_get_size(buffer) - token_position) / sizeof(uint32_t);
-     token |= (size << VKD3D_SM4_INSTRUCTION_LENGTH_SHIFT);
-     set_u32(buffer, token_position, token);
-+    sm4_update_stat_counters(tpf, instr);
- }
- static bool encode_texel_offset_as_aoffimmi(struct sm4_instruction *instr,
-@@ -4247,7 +4679,7 @@ static bool encode_texel_offset_as_aoffimmi(struct sm4_instruction *instr,
-     return true;
- }
--static void write_sm4_dcl_constant_buffer(const struct tpf_writer *tpf, const struct hlsl_buffer *cbuffer)
-+static void write_sm4_dcl_constant_buffer(const struct tpf_compiler *tpf, const struct hlsl_buffer *cbuffer)
- {
-     size_t size = (cbuffer->used_size + 3) / 4;
-@@ -4282,7 +4714,7 @@ static void write_sm4_dcl_constant_buffer(const struct tpf_writer *tpf, const st
-     write_sm4_instruction(tpf, &instr);
- }
--static void write_sm4_dcl_samplers(const struct tpf_writer *tpf, const struct extern_resource *resource)
-+static void write_sm4_dcl_samplers(const struct tpf_compiler *tpf, const struct extern_resource *resource)
- {
-     unsigned int i;
-     struct sm4_instruction instr =
-@@ -4323,9 +4755,10 @@ static void write_sm4_dcl_samplers(const struct tpf_writer *tpf, const struct ex
-     }
- }
--static void write_sm4_dcl_textures(const struct tpf_writer *tpf, const struct extern_resource *resource,
-+static void write_sm4_dcl_textures(const struct tpf_compiler *tpf, const struct extern_resource *resource,
-         bool uav)
- {
-+    const struct vkd3d_shader_version *version = &tpf->program->shader_version;
-     enum hlsl_regset regset = uav ? HLSL_REGSET_UAVS : HLSL_REGSET_TEXTURES;
-     struct hlsl_type *component_type;
-     struct sm4_instruction instr;
-@@ -4348,21 +4781,21 @@ static void write_sm4_dcl_textures(const struct tpf_writer *tpf, const struct ex
-             .dsts[0].reg.idx_count = 1,
-             .dst_count = 1,
--            .idx[0] = sm4_resource_format(component_type) * 0x1111,
-+            .idx[0] = sm4_data_type(component_type) * 0x1111,
-             .idx_count = 1,
-         };
-         multisampled = component_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS
-                 || component_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY;
--        if (hlsl_version_lt(tpf->ctx, 4, 1) && multisampled && !component_type->sample_count)
-+        if (!vkd3d_shader_ver_ge(version, 4, 1) && multisampled && !component_type->sample_count)
-         {
-             hlsl_error(tpf->ctx, &resource->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE,
--                    "Multisampled texture object declaration needs sample count for profile %s.",
--                    tpf->ctx->profile->name);
-+                    "Multisampled texture object declaration needs sample count for profile %u.%u.",
-+                    version->major, version->minor);
-         }
--        if (hlsl_version_ge(tpf->ctx, 5, 1))
-+        if (vkd3d_shader_ver_ge(version, 5, 1))
-         {
-             VKD3D_ASSERT(!i);
-             instr.dsts[0].reg.idx[0].offset = resource->id;
-@@ -4387,6 +4820,9 @@ static void write_sm4_dcl_textures(const struct tpf_writer *tpf, const struct ex
-                     instr.opcode = VKD3D_SM5_OP_DCL_UAV_STRUCTURED;
-                     instr.byte_stride = component_type->e.resource.format->reg_size[HLSL_REGSET_NUMERIC] * 4;
-                     break;
-+                case HLSL_SAMPLER_DIM_RAW_BUFFER:
-+                    instr.opcode = VKD3D_SM5_OP_DCL_UAV_RAW;
-+                    break;
-                 default:
-                     instr.opcode = VKD3D_SM5_OP_DCL_UAV_TYPED;
-                     break;
-@@ -4397,7 +4833,15 @@ static void write_sm4_dcl_textures(const struct tpf_writer *tpf, const struct ex
-         }
-         else
-         {
--            instr.opcode = VKD3D_SM4_OP_DCL_RESOURCE;
-+            switch (component_type->sampler_dim)
-+            {
-+                case HLSL_SAMPLER_DIM_RAW_BUFFER:
-+                    instr.opcode = VKD3D_SM5_OP_DCL_RESOURCE_RAW;
-+                    break;
-+                default:
-+                    instr.opcode = VKD3D_SM4_OP_DCL_RESOURCE;
-+                    break;
-+            }
-         }
-         instr.extra_bits |= (sm4_resource_dimension(component_type) << VKD3D_SM4_RESOURCE_TYPE_SHIFT);
-@@ -4408,305 +4852,189 @@ static void write_sm4_dcl_textures(const struct tpf_writer *tpf, const struct ex
-     }
- }
--static void write_sm4_dcl_semantic(const struct tpf_writer *tpf, const struct hlsl_ir_var *var)
-+static void tpf_dcl_temps(const struct tpf_compiler *tpf, unsigned int count)
- {
--    const struct hlsl_profile_info *profile = tpf->ctx->profile;
--    const bool output = var->is_output_semantic;
--    D3D_NAME usage;
--    bool has_idx;
-     struct sm4_instruction instr =
-     {
--        .dsts[0].reg.dimension = VSIR_DIMENSION_VEC4,
--        .dst_count = 1,
-+        .opcode = VKD3D_SM4_OP_DCL_TEMPS,
-+        .idx = {count},
-+        .idx_count = 1,
-     };
--    if (hlsl_sm4_register_from_semantic(tpf->ctx, &var->semantic, output, &instr.dsts[0].reg.type, &has_idx))
--    {
--        if (has_idx)
--        {
--            instr.dsts[0].reg.idx[0].offset = var->semantic.index;
--            instr.dsts[0].reg.idx_count = 1;
--        }
--        else
--        {
--            instr.dsts[0].reg.idx_count = 0;
--        }
--        instr.dsts[0].write_mask = (1 << var->data_type->dimx) - 1;
--    }
--    else
-+    write_sm4_instruction(tpf, &instr);
-+static void tpf_dcl_indexable_temp(const struct tpf_compiler *tpf, const struct vkd3d_shader_indexable_temp *temp)
-+    struct sm4_instruction instr =
-     {
--        instr.dsts[0].reg.type = output ? VKD3DSPR_OUTPUT : VKD3DSPR_INPUT;
--        instr.dsts[0].reg.idx[0].offset = var->regs[HLSL_REGSET_NUMERIC].id;
--        instr.dsts[0].reg.idx_count = 1;
--        instr.dsts[0].write_mask = var->regs[HLSL_REGSET_NUMERIC].writemask;
--    }
-+        .opcode = VKD3D_SM4_OP_DCL_INDEXABLE_TEMP,
--    if (instr.dsts[0].reg.type == VKD3DSPR_DEPTHOUT)
--        instr.dsts[0].reg.dimension = VSIR_DIMENSION_SCALAR;
-+        .idx = {temp->register_idx, temp->register_size, temp->component_count},
-+        .idx_count = 3,
-+    };
--    hlsl_sm4_usage_from_semantic(tpf->ctx, &var->semantic, output, &usage);
--    if (usage == ~0u)
--        usage = D3D_NAME_UNDEFINED;
-+    write_sm4_instruction(tpf, &instr);
--    if (var->is_input_semantic)
-+static void tpf_dcl_semantic(const struct tpf_compiler *tpf, enum vkd3d_sm4_opcode opcode,
-+        const struct vkd3d_shader_dst_param *dst, uint32_t interpolation_flags)
-+    struct sm4_instruction instr =
-     {
--        switch (usage)
--        {
--            case D3D_NAME_UNDEFINED:
--                instr.opcode = (profile->type == VKD3D_SHADER_TYPE_PIXEL)
--                        ? VKD3D_SM4_OP_DCL_INPUT_PS : VKD3D_SM4_OP_DCL_INPUT;
--                break;
-+        .opcode = opcode,
--            case D3D_NAME_INSTANCE_ID:
--            case D3D_NAME_PRIMITIVE_ID:
--            case D3D_NAME_VERTEX_ID:
--                instr.opcode = (profile->type == VKD3D_SHADER_TYPE_PIXEL)
--                        ? VKD3D_SM4_OP_DCL_INPUT_PS_SGV : VKD3D_SM4_OP_DCL_INPUT_SGV;
--                break;
-+        .dsts[0] = *dst,
-+        .dst_count = 1,
--            default:
--                instr.opcode = (profile->type == VKD3D_SHADER_TYPE_PIXEL)
--                        ? VKD3D_SM4_OP_DCL_INPUT_PS_SIV : VKD3D_SM4_OP_DCL_INPUT_SIV;
--                break;
--        }
--        if (profile->type == VKD3D_SHADER_TYPE_PIXEL)
--        {
--            enum vkd3d_shader_interpolation_mode mode = VKD3DSIM_LINEAR;
--            if ((var->storage_modifiers & HLSL_STORAGE_NOINTERPOLATION) || type_is_integer(var->data_type))
--            {
--                mode = VKD3DSIM_CONSTANT;
--            }
--            else
--            {
--                static const struct
--                {
--                    unsigned int modifiers;
--                    enum vkd3d_shader_interpolation_mode mode;
--                }
--                modes[] =
--                {
--                };
--                unsigned int i;
--                for (i = 0; i < ARRAY_SIZE(modes); ++i)
--                {
--                    if ((var->storage_modifiers & modes[i].modifiers) == modes[i].modifiers)
--                    {
--                        mode = modes[i].mode;
--                        break;
--                    }
--                }
--            }
--            instr.extra_bits |= mode << VKD3D_SM4_INTERPOLATION_MODE_SHIFT;
--        }
--    }
--    else
--    {
--        if (usage == D3D_NAME_UNDEFINED || profile->type == VKD3D_SHADER_TYPE_PIXEL)
--            instr.opcode = VKD3D_SM4_OP_DCL_OUTPUT;
--        else
--            instr.opcode = VKD3D_SM4_OP_DCL_OUTPUT_SIV;
--    }
--    switch (usage)
--    {
--        case D3D_NAME_COVERAGE:
--        case D3D_NAME_DEPTH:
--        case D3D_NAME_DEPTH_LESS_EQUAL:
--        case D3D_NAME_TARGET:
--        case D3D_NAME_UNDEFINED:
--            break;
--        default:
--            instr.idx_count = 1;
--            instr.idx[0] = usage;
--            break;
--    }
-+        .extra_bits = interpolation_flags << VKD3D_SM4_INTERPOLATION_MODE_SHIFT,
-+    };
-     write_sm4_instruction(tpf, &instr);
- }
--static void write_sm4_dcl_temps(const struct tpf_writer *tpf, uint32_t temp_count)
-+static void tpf_dcl_siv_semantic(const struct tpf_compiler *tpf, enum vkd3d_sm4_opcode opcode,
-+        const struct vkd3d_shader_register_semantic *semantic, uint32_t interpolation_flags)
- {
-     struct sm4_instruction instr =
-     {
--        .opcode = VKD3D_SM4_OP_DCL_TEMPS,
-+        .opcode = opcode,
--        .idx = {temp_count},
-+        .dsts[0] = semantic->reg,
-+        .dst_count = 1,
-+        .idx[0] = semantic->sysval_semantic,
-         .idx_count = 1,
-+        .extra_bits = interpolation_flags << VKD3D_SM4_INTERPOLATION_MODE_SHIFT,
-     };
-     write_sm4_instruction(tpf, &instr);
- }
--static void write_sm4_dcl_indexable_temp(const struct tpf_writer *tpf, uint32_t idx,
--        uint32_t size, uint32_t comp_count)
-+static void tpf_dcl_thread_group(const struct tpf_compiler *tpf, const struct vsir_thread_group_size *group_size)
- {
-     struct sm4_instruction instr =
-     {
--        .opcode = VKD3D_SM4_OP_DCL_INDEXABLE_TEMP,
-+        .opcode = VKD3D_SM5_OP_DCL_THREAD_GROUP,
--        .idx = {idx, size, comp_count},
-+        .idx = {group_size->x, group_size->y, group_size->z},
-         .idx_count = 3,
-     };
-     write_sm4_instruction(tpf, &instr);
- }
--static void write_sm4_dcl_thread_group(const struct tpf_writer *tpf, const uint32_t thread_count[3])
-+static void write_sm4_dcl_global_flags(const struct tpf_compiler *tpf, uint32_t flags)
- {
-     struct sm4_instruction instr =
-     {
--        .opcode = VKD3D_SM5_OP_DCL_THREAD_GROUP,
--        .idx[0] = thread_count[0],
--        .idx[1] = thread_count[1],
--        .idx[2] = thread_count[2],
--        .idx_count = 3,
-+        .opcode = VKD3D_SM4_OP_DCL_GLOBAL_FLAGS,
-+        .extra_bits = flags << VKD3D_SM4_GLOBAL_FLAGS_SHIFT,
-     };
-     write_sm4_instruction(tpf, &instr);
- }
--static void write_sm4_ret(const struct tpf_writer *tpf)
-+static void tpf_write_hs_decls(const struct tpf_compiler *tpf)
- {
-     struct sm4_instruction instr =
-     {
--        .opcode = VKD3D_SM4_OP_RET,
-+        .opcode = VKD3D_SM5_OP_HS_DECLS,
-     };
-     write_sm4_instruction(tpf, &instr);
- }
--static void write_sm4_unary_op(const struct tpf_writer *tpf, enum vkd3d_sm4_opcode opcode,
--        const struct hlsl_ir_node *dst, const struct hlsl_ir_node *src, enum vkd3d_shader_src_modifier src_mod)
-+static void tpf_write_hs_control_point_phase(const struct tpf_compiler *tpf)
- {
--    struct sm4_instruction instr;
--    memset(&instr, 0, sizeof(instr));
--    instr.opcode = opcode;
--    sm4_dst_from_node(&instr.dsts[0], dst);
--    instr.dst_count = 1;
--    sm4_src_from_node(tpf, &instr.srcs[0], src, instr.dsts[0].write_mask);
--    instr.srcs[0].modifiers = src_mod;
--    instr.src_count = 1;
-+    struct sm4_instruction instr =
-+    {
-+        .opcode = VKD3D_SM5_OP_HS_CONTROL_POINT_PHASE,
-+    };
-     write_sm4_instruction(tpf, &instr);
- }
--static void write_sm4_unary_op_with_two_destinations(const struct tpf_writer *tpf, enum vkd3d_sm4_opcode opcode,
--        const struct hlsl_ir_node *dst, unsigned int dst_idx, const struct hlsl_ir_node *src)
-+static void tpf_write_hs_fork_phase(const struct tpf_compiler *tpf)
- {
--    struct sm4_instruction instr;
--    memset(&instr, 0, sizeof(instr));
--    instr.opcode = opcode;
--    VKD3D_ASSERT(dst_idx < ARRAY_SIZE(instr.dsts));
--    sm4_dst_from_node(&instr.dsts[dst_idx], dst);
--    instr.dsts[1 - dst_idx].reg.type = VKD3DSPR_NULL;
--    instr.dsts[1 - dst_idx].reg.dimension = VSIR_DIMENSION_NONE;
--    instr.dsts[1 - dst_idx].reg.idx_count = 0;
--    instr.dst_count = 2;
--    sm4_src_from_node(tpf, &instr.srcs[0], src, instr.dsts[dst_idx].write_mask);
--    instr.src_count = 1;
-+    struct sm4_instruction instr =
-+    {
-+        .opcode = VKD3D_SM5_OP_HS_FORK_PHASE,
-+    };
-     write_sm4_instruction(tpf, &instr);
- }
--static void write_sm4_binary_op(const struct tpf_writer *tpf, enum vkd3d_sm4_opcode opcode,
--        const struct hlsl_ir_node *dst, const struct hlsl_ir_node *src1, const struct hlsl_ir_node *src2)
-+static void tpf_write_dcl_input_control_point_count(const struct tpf_compiler *tpf, const uint32_t count)
- {
--    struct sm4_instruction instr;
--    memset(&instr, 0, sizeof(instr));
--    instr.opcode = opcode;
--    sm4_dst_from_node(&instr.dsts[0], dst);
--    instr.dst_count = 1;
--    sm4_src_from_node(tpf, &instr.srcs[0], src1, instr.dsts[0].write_mask);
--    sm4_src_from_node(tpf, &instr.srcs[1], src2, instr.dsts[0].write_mask);
--    instr.src_count = 2;
-+    struct sm4_instruction instr =
-+    {
-+        .extra_bits = count << VKD3D_SM5_CONTROL_POINT_COUNT_SHIFT,
-+    };
-     write_sm4_instruction(tpf, &instr);
- }
--/* dp# instructions don't map the swizzle. */
--static void write_sm4_binary_op_dot(const struct tpf_writer *tpf, enum vkd3d_sm4_opcode opcode,
--        const struct hlsl_ir_node *dst, const struct hlsl_ir_node *src1, const struct hlsl_ir_node *src2)
-+static void tpf_write_dcl_output_control_point_count(const struct tpf_compiler *tpf, const uint32_t count)
- {
--    struct sm4_instruction instr;
--    memset(&instr, 0, sizeof(instr));
--    instr.opcode = opcode;
--    sm4_dst_from_node(&instr.dsts[0], dst);
--    instr.dst_count = 1;
--    sm4_src_from_node(tpf, &instr.srcs[0], src1, VKD3DSP_WRITEMASK_ALL);
--    sm4_src_from_node(tpf, &instr.srcs[1], src2, VKD3DSP_WRITEMASK_ALL);
--    instr.src_count = 2;
-+    struct sm4_instruction instr =
-+    {
-+        .extra_bits = count << VKD3D_SM5_CONTROL_POINT_COUNT_SHIFT,
-+    };
-     write_sm4_instruction(tpf, &instr);
- }
--static void write_sm4_binary_op_with_two_destinations(const struct tpf_writer *tpf,
--        enum vkd3d_sm4_opcode opcode, const struct hlsl_ir_node *dst, unsigned int dst_idx,
--        const struct hlsl_ir_node *src1, const struct hlsl_ir_node *src2)
-+static void tpf_write_dcl_tessellator_domain(const struct tpf_compiler *tpf, enum vkd3d_tessellator_domain domain)
- {
--    struct sm4_instruction instr;
--    memset(&instr, 0, sizeof(instr));
--    instr.opcode = opcode;
-+    struct sm4_instruction instr =
-+    {
-+        .opcode = VKD3D_SM5_OP_DCL_TESSELLATOR_DOMAIN,
-+        .extra_bits = domain << VKD3D_SM5_TESSELLATOR_SHIFT,
-+    };
--    VKD3D_ASSERT(dst_idx < ARRAY_SIZE(instr.dsts));
--    sm4_dst_from_node(&instr.dsts[dst_idx], dst);
--    instr.dsts[1 - dst_idx].reg.type = VKD3DSPR_NULL;
--    instr.dsts[1 - dst_idx].reg.dimension = VSIR_DIMENSION_NONE;
--    instr.dsts[1 - dst_idx].reg.idx_count = 0;
--    instr.dst_count = 2;
-+    write_sm4_instruction(tpf, &instr);
--    sm4_src_from_node(tpf, &instr.srcs[0], src1, instr.dsts[dst_idx].write_mask);
--    sm4_src_from_node(tpf, &instr.srcs[1], src2, instr.dsts[dst_idx].write_mask);
--    instr.src_count = 2;
-+static void tpf_write_dcl_tessellator_partitioning(const struct tpf_compiler *tpf,
-+        enum vkd3d_shader_tessellator_partitioning partitioning)
-+    struct sm4_instruction instr =
-+    {
-+        .extra_bits = partitioning << VKD3D_SM5_TESSELLATOR_SHIFT,
-+    };
-     write_sm4_instruction(tpf, &instr);
- }
--static void write_sm4_ternary_op(const struct tpf_writer *tpf, enum vkd3d_sm4_opcode opcode,
--        const struct hlsl_ir_node *dst, const struct hlsl_ir_node *src1, const struct hlsl_ir_node *src2,
--        const struct hlsl_ir_node *src3)
-+static void tpf_write_dcl_tessellator_output_primitive(const struct tpf_compiler *tpf,
-+        enum vkd3d_shader_tessellator_output_primitive output_primitive)
- {
--    struct sm4_instruction instr;
--    memset(&instr, 0, sizeof(instr));
--    instr.opcode = opcode;
-+    struct sm4_instruction instr =
-+    {
-+        .extra_bits = output_primitive << VKD3D_SM5_TESSELLATOR_SHIFT,
-+    };
--    sm4_dst_from_node(&instr.dsts[0], dst);
--    instr.dst_count = 1;
-+    write_sm4_instruction(tpf, &instr);
--    sm4_src_from_node(tpf, &instr.srcs[0], src1, instr.dsts[0].write_mask);
--    sm4_src_from_node(tpf, &instr.srcs[1], src2, instr.dsts[0].write_mask);
--    sm4_src_from_node(tpf, &instr.srcs[2], src3, instr.dsts[0].write_mask);
--    instr.src_count = 3;
-+static void write_sm4_ret(const struct tpf_compiler *tpf)
-+    struct sm4_instruction instr =
-+    {
-+        .opcode = VKD3D_SM4_OP_RET,
-+    };
-     write_sm4_instruction(tpf, &instr);
- }
--static void write_sm4_ld(const struct tpf_writer *tpf, const struct hlsl_ir_node *dst,
-+static void write_sm4_ld(const struct tpf_compiler *tpf, const struct hlsl_ir_node *dst,
-         const struct hlsl_deref *resource, const struct hlsl_ir_node *coords,
-         const struct hlsl_ir_node *sample_index, const struct hlsl_ir_node *texel_offset,
-         enum hlsl_sampler_dim dim)
-@@ -4715,12 +5043,16 @@ static void write_sm4_ld(const struct tpf_writer *tpf, const struct hlsl_ir_node
-     bool multisampled = resource_type->class == HLSL_CLASS_TEXTURE
-             && (resource_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS || resource_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY);
-     bool uav = (hlsl_deref_get_regset(tpf->ctx, resource) == HLSL_REGSET_UAVS);
-+    const struct vkd3d_shader_version *version = &tpf->program->shader_version;
-+    bool raw = resource_type->sampler_dim == HLSL_SAMPLER_DIM_RAW_BUFFER;
-     unsigned int coords_writemask = VKD3DSP_WRITEMASK_ALL;
-     struct sm4_instruction instr;
-     memset(&instr, 0, sizeof(instr));
-     if (uav)
-         instr.opcode = VKD3D_SM5_OP_LD_UAV_TYPED;
-+    else if (raw)
-+        instr.opcode = VKD3D_SM5_OP_LD_RAW;
-     else
-         instr.opcode = multisampled ? VKD3D_SM4_OP_LD2DMS : VKD3D_SM4_OP_LD;
-@@ -4769,7 +5101,7 @@ static void write_sm4_ld(const struct tpf_writer *tpf, const struct hlsl_ir_node
-             reg->dimension = VSIR_DIMENSION_SCALAR;
-             reg->u.immconst_u32[0] = index->value.u[0].u;
-         }
--        else if (tpf->ctx->profile->major_version == 4 && tpf->ctx->profile->minor_version == 0)
-+        else if (version->major == 4 && version->minor == 0)
-         {
-             hlsl_error(tpf->ctx, &sample_index->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Expected literal sample index.");
-         }
-@@ -4784,7 +5116,7 @@ static void write_sm4_ld(const struct tpf_writer *tpf, const struct hlsl_ir_node
-     write_sm4_instruction(tpf, &instr);
- }
--static void write_sm4_sample(const struct tpf_writer *tpf, const struct hlsl_ir_resource_load *load)
-+static void write_sm4_sample(const struct tpf_compiler *tpf, const struct hlsl_ir_resource_load *load)
- {
-     const struct hlsl_ir_node *texel_offset = load->texel_offset.node;
-     const struct hlsl_ir_node *coords = load->coords.node;
-@@ -4821,775 +5153,107 @@ static void write_sm4_sample(const struct tpf_writer *tpf, const struct hlsl_ir_
-             break;
-         default:
--            vkd3d_unreachable();
--    }
--    if (texel_offset)
--    {
--        if (!encode_texel_offset_as_aoffimmi(&instr, texel_offset))
--        {
--            hlsl_error(tpf->ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET,
--                    "Offset must resolve to integer literal in the range -8 to 7.");
--            return;
--        }
--    }
--    sm4_dst_from_node(&instr.dsts[0], dst);
--    instr.dst_count = 1;
--    sm4_src_from_node(tpf, &instr.srcs[0], coords, VKD3DSP_WRITEMASK_ALL);
--    sm4_src_from_deref(tpf, &instr.srcs[1], resource, instr.dsts[0].write_mask, &instr);
--    sm4_src_from_deref(tpf, &instr.srcs[2], sampler, VKD3DSP_WRITEMASK_ALL, &instr);
--    instr.src_count = 3;
--    if (load->load_type == HLSL_RESOURCE_SAMPLE_LOD
--           || load->load_type == HLSL_RESOURCE_SAMPLE_LOD_BIAS)
--    {
--        sm4_src_from_node(tpf, &instr.srcs[3], load->lod.node, VKD3DSP_WRITEMASK_ALL);
--        ++instr.src_count;
--    }
--    else if (load->load_type == HLSL_RESOURCE_SAMPLE_GRAD)
--    {
--        sm4_src_from_node(tpf, &instr.srcs[3], load->ddx.node, VKD3DSP_WRITEMASK_ALL);
--        sm4_src_from_node(tpf, &instr.srcs[4], load->ddy.node, VKD3DSP_WRITEMASK_ALL);
--        instr.src_count += 2;
--    }
--    else if (load->load_type == HLSL_RESOURCE_SAMPLE_CMP
--            || load->load_type == HLSL_RESOURCE_SAMPLE_CMP_LZ)
--    {
--        sm4_src_from_node(tpf, &instr.srcs[3], load->cmp.node, VKD3DSP_WRITEMASK_ALL);
--        ++instr.src_count;
--    }
--    write_sm4_instruction(tpf, &instr);
--static void write_sm4_sampleinfo(const struct tpf_writer *tpf, const struct hlsl_ir_resource_load *load)
--    const struct hlsl_deref *resource = &load->resource;
--    const struct hlsl_ir_node *dst = &load->node;
--    struct sm4_instruction instr;
--    VKD3D_ASSERT(dst->data_type->e.numeric.type == HLSL_TYPE_UINT || dst->data_type->e.numeric.type == HLSL_TYPE_FLOAT);
--    memset(&instr, 0, sizeof(instr));
--    instr.opcode = VKD3D_SM4_OP_SAMPLE_INFO;
--    if (dst->data_type->e.numeric.type == HLSL_TYPE_UINT)
--    sm4_dst_from_node(&instr.dsts[0], dst);
--    instr.dst_count = 1;
--    sm4_src_from_deref(tpf, &instr.srcs[0], resource, instr.dsts[0].write_mask, &instr);
--    instr.src_count = 1;
--    write_sm4_instruction(tpf, &instr);
--static void write_sm4_resinfo(const struct tpf_writer *tpf, const struct hlsl_ir_resource_load *load)
--    const struct hlsl_deref *resource = &load->resource;
--    const struct hlsl_ir_node *dst = &load->node;
--    struct sm4_instruction instr;
--    if (resource->data_type->sampler_dim == HLSL_SAMPLER_DIM_BUFFER
--            || resource->data_type->sampler_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER)
--    {
--        hlsl_fixme(tpf->ctx, &load->node.loc, "resinfo for buffers.");
--        return;
--    }
--    VKD3D_ASSERT(dst->data_type->e.numeric.type == HLSL_TYPE_UINT || dst->data_type->e.numeric.type == HLSL_TYPE_FLOAT);
--    memset(&instr, 0, sizeof(instr));
--    instr.opcode = VKD3D_SM4_OP_RESINFO;
--    if (dst->data_type->e.numeric.type == HLSL_TYPE_UINT)
--        instr.extra_bits |= VKD3DSI_RESINFO_UINT << VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT;
--    sm4_dst_from_node(&instr.dsts[0], dst);
--    instr.dst_count = 1;
--    sm4_src_from_node(tpf, &instr.srcs[0], load->lod.node, VKD3DSP_WRITEMASK_ALL);
--    sm4_src_from_deref(tpf, &instr.srcs[1], resource, instr.dsts[0].write_mask, &instr);
--    instr.src_count = 2;
--    write_sm4_instruction(tpf, &instr);
--static bool type_is_float(const struct hlsl_type *type)
--    return type->e.numeric.type == HLSL_TYPE_FLOAT || type->e.numeric.type == HLSL_TYPE_HALF;
--static void write_sm4_cast_from_bool(const struct tpf_writer *tpf, const struct hlsl_ir_expr *expr,
--        const struct hlsl_ir_node *arg, uint32_t mask)
--    struct sm4_instruction instr;
--    memset(&instr, 0, sizeof(instr));
--    instr.opcode = VKD3D_SM4_OP_AND;
--    sm4_dst_from_node(&instr.dsts[0], &expr->node);
--    instr.dst_count = 1;
--    sm4_src_from_node(tpf, &instr.srcs[0], arg, instr.dsts[0].write_mask);
--    instr.srcs[1].reg.type = VKD3DSPR_IMMCONST;
--    instr.srcs[1].reg.dimension = VSIR_DIMENSION_SCALAR;
--    instr.srcs[1].reg.u.immconst_u32[0] = mask;
--    instr.src_count = 2;
--    write_sm4_instruction(tpf, &instr);
--static void write_sm4_cast(const struct tpf_writer *tpf, const struct hlsl_ir_expr *expr)
--    static const union
--    {
--        uint32_t u;
--        float f;
--    } one = { .f = 1.0 };
--    const struct hlsl_ir_node *arg1 = expr->operands[0].node;
--    const struct hlsl_type *dst_type = expr->node.data_type;
--    const struct hlsl_type *src_type = arg1->data_type;
--    /* Narrowing casts were already lowered. */
--    VKD3D_ASSERT(src_type->dimx == dst_type->dimx);
--    switch (dst_type->e.numeric.type)
--    {
--        case HLSL_TYPE_HALF:
--        case HLSL_TYPE_FLOAT:
--            switch (src_type->e.numeric.type)
--            {
--                case HLSL_TYPE_HALF:
--                case HLSL_TYPE_FLOAT:
--                    write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0);
--                    break;
--                case HLSL_TYPE_INT:
--                    write_sm4_unary_op(tpf, VKD3D_SM4_OP_ITOF, &expr->node, arg1, 0);
--                    break;
--                case HLSL_TYPE_UINT:
--                    write_sm4_unary_op(tpf, VKD3D_SM4_OP_UTOF, &expr->node, arg1, 0);
--                    break;
--                case HLSL_TYPE_BOOL:
--                    write_sm4_cast_from_bool(tpf, expr, arg1, one.u);
--                    break;
--                case HLSL_TYPE_DOUBLE:
--                    hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 cast from double to float.");
--                    break;
--                default:
--                    vkd3d_unreachable();
--            }
--            break;
--        case HLSL_TYPE_INT:
--            switch (src_type->e.numeric.type)
--            {
--                case HLSL_TYPE_HALF:
--                case HLSL_TYPE_FLOAT:
--                    write_sm4_unary_op(tpf, VKD3D_SM4_OP_FTOI, &expr->node, arg1, 0);
--                    break;
--                case HLSL_TYPE_INT:
--                case HLSL_TYPE_UINT:
--                    write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0);
--                    break;
--                case HLSL_TYPE_BOOL:
--                    write_sm4_cast_from_bool(tpf, expr, arg1, 1);
--                    break;
--                case HLSL_TYPE_DOUBLE:
--                    hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 cast from double to int.");
--                    break;
--                default:
--                    vkd3d_unreachable();
--            }
--            break;
--        case HLSL_TYPE_UINT:
--            switch (src_type->e.numeric.type)
--            {
--                case HLSL_TYPE_HALF:
--                case HLSL_TYPE_FLOAT:
--                    write_sm4_unary_op(tpf, VKD3D_SM4_OP_FTOU, &expr->node, arg1, 0);
--                    break;
--                case HLSL_TYPE_INT:
--                case HLSL_TYPE_UINT:
--                    write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0);
--                    break;
--                case HLSL_TYPE_BOOL:
--                    write_sm4_cast_from_bool(tpf, expr, arg1, 1);
--                    break;
--                case HLSL_TYPE_DOUBLE:
--                    hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 cast from double to uint.");
--                    break;
--                default:
--                    vkd3d_unreachable();
--            }
--            break;
--        case HLSL_TYPE_DOUBLE:
--            hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 cast to double.");
--            break;
--        case HLSL_TYPE_BOOL:
--            /* Casts to bool should have already been lowered. */
--        default:
--            vkd3d_unreachable();
--    }
--static void write_sm4_store_uav_typed(const struct tpf_writer *tpf, const struct hlsl_deref *dst,
--        const struct hlsl_ir_node *coords, const struct hlsl_ir_node *value)
--    struct sm4_instruction instr;
--    memset(&instr, 0, sizeof(instr));
--    instr.opcode = VKD3D_SM5_OP_STORE_UAV_TYPED;
--    sm4_register_from_deref(tpf->ctx, &instr.dsts[0].reg, &instr.dsts[0].write_mask, dst, &instr);
--    instr.dst_count = 1;
--    sm4_src_from_node(tpf, &instr.srcs[0], coords, VKD3DSP_WRITEMASK_ALL);
--    sm4_src_from_node(tpf, &instr.srcs[1], value, VKD3DSP_WRITEMASK_ALL);
--    instr.src_count = 2;
--    write_sm4_instruction(tpf, &instr);
--static void write_sm4_rasterizer_sample_count(const struct tpf_writer *tpf, const struct hlsl_ir_node *dst)
--    struct sm4_instruction instr;
--    memset(&instr, 0, sizeof(instr));
--    instr.opcode = VKD3D_SM4_OP_SAMPLE_INFO;
--    sm4_dst_from_node(&instr.dsts[0], dst);
--    instr.dst_count = 1;
--    instr.srcs[0].reg.type = VKD3DSPR_RASTERIZER;
--    instr.srcs[0].reg.dimension = VSIR_DIMENSION_VEC4;
--    instr.srcs[0].swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X);
--    instr.src_count = 1;
--    write_sm4_instruction(tpf, &instr);
--static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_expr *expr)
--    const struct hlsl_ir_node *arg1 = expr->operands[0].node;
--    const struct hlsl_ir_node *arg2 = expr->operands[1].node;
--    const struct hlsl_ir_node *arg3 = expr->operands[2].node;
--    const struct hlsl_type *dst_type = expr->node.data_type;
--    struct vkd3d_string_buffer *dst_type_string;
--    VKD3D_ASSERT(expr->node.reg.allocated);
--    if (!(dst_type_string = hlsl_type_to_string(tpf->ctx, dst_type)))
--        return;
--    switch (expr->op)
--    {
--            if (tpf->ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL && hlsl_version_ge(tpf->ctx, 4, 1))
--                write_sm4_rasterizer_sample_count(tpf, &expr->node);
--            else
--                hlsl_error(tpf->ctx, &expr->node.loc, VKD3D_SHADER_ERROR_HLSL_INCOMPATIBLE_PROFILE,
--                        "GetRenderTargetSampleCount() can only be used from a pixel shader using version 4.1 or higher.");
--            break;
--        case HLSL_OP1_ABS:
--            switch (dst_type->e.numeric.type)
--            {
--                case HLSL_TYPE_FLOAT:
--                    write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV, &expr->node, arg1, VKD3DSPSM_ABS);
--                    break;
--                default:
--                    hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s absolute value expression.", dst_type_string->buffer);
--            }
--            break;
--        case HLSL_OP1_BIT_NOT:
--            VKD3D_ASSERT(type_is_integer(dst_type));
--            write_sm4_unary_op(tpf, VKD3D_SM4_OP_NOT, &expr->node, arg1, 0);
--            break;
--        case HLSL_OP1_CAST:
--            write_sm4_cast(tpf, expr);
--            break;
--        case HLSL_OP1_CEIL:
--            VKD3D_ASSERT(type_is_float(dst_type));
--            write_sm4_unary_op(tpf, VKD3D_SM4_OP_ROUND_PI, &expr->node, arg1, 0);
--            break;
--        case HLSL_OP1_COS:
--            VKD3D_ASSERT(type_is_float(dst_type));
--            write_sm4_unary_op_with_two_destinations(tpf, VKD3D_SM4_OP_SINCOS, &expr->node, 1, arg1);
--            break;
--        case HLSL_OP1_DSX:
--            VKD3D_ASSERT(type_is_float(dst_type));
--            write_sm4_unary_op(tpf, VKD3D_SM4_OP_DERIV_RTX, &expr->node, arg1, 0);
--            break;
--        case HLSL_OP1_DSX_COARSE:
--            VKD3D_ASSERT(type_is_float(dst_type));
--            write_sm4_unary_op(tpf, VKD3D_SM5_OP_DERIV_RTX_COARSE, &expr->node, arg1, 0);
--            break;
--        case HLSL_OP1_DSX_FINE:
--            VKD3D_ASSERT(type_is_float(dst_type));
--            write_sm4_unary_op(tpf, VKD3D_SM5_OP_DERIV_RTX_FINE, &expr->node, arg1, 0);
--            break;
--        case HLSL_OP1_DSY:
--            VKD3D_ASSERT(type_is_float(dst_type));
--            write_sm4_unary_op(tpf, VKD3D_SM4_OP_DERIV_RTY, &expr->node, arg1, 0);
--            break;
--        case HLSL_OP1_DSY_COARSE:
--            VKD3D_ASSERT(type_is_float(dst_type));
--            write_sm4_unary_op(tpf, VKD3D_SM5_OP_DERIV_RTY_COARSE, &expr->node, arg1, 0);
--            break;
--        case HLSL_OP1_DSY_FINE:
--            VKD3D_ASSERT(type_is_float(dst_type));
--            write_sm4_unary_op(tpf, VKD3D_SM5_OP_DERIV_RTY_FINE, &expr->node, arg1, 0);
--            break;
--        case HLSL_OP1_EXP2:
--            VKD3D_ASSERT(type_is_float(dst_type));
--            write_sm4_unary_op(tpf, VKD3D_SM4_OP_EXP, &expr->node, arg1, 0);
--            break;
--        case HLSL_OP1_F16TOF32:
--            VKD3D_ASSERT(type_is_float(dst_type));
--            VKD3D_ASSERT(hlsl_version_ge(tpf->ctx, 5, 0));
--            write_sm4_unary_op(tpf, VKD3D_SM5_OP_F16TOF32, &expr->node, arg1, 0);
--            break;
--        case HLSL_OP1_FLOOR:
--            VKD3D_ASSERT(type_is_float(dst_type));
--            write_sm4_unary_op(tpf, VKD3D_SM4_OP_ROUND_NI, &expr->node, arg1, 0);
--            break;
--        case HLSL_OP1_FRACT:
--            VKD3D_ASSERT(type_is_float(dst_type));
--            write_sm4_unary_op(tpf, VKD3D_SM4_OP_FRC, &expr->node, arg1, 0);
--            break;
--        case HLSL_OP1_LOG2:
--            VKD3D_ASSERT(type_is_float(dst_type));
--            write_sm4_unary_op(tpf, VKD3D_SM4_OP_LOG, &expr->node, arg1, 0);
--            break;
--        case HLSL_OP1_LOGIC_NOT:
--            VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL);
--            write_sm4_unary_op(tpf, VKD3D_SM4_OP_NOT, &expr->node, arg1, 0);
--            break;
--        case HLSL_OP1_NEG:
--            switch (dst_type->e.numeric.type)
--            {
--                case HLSL_TYPE_FLOAT:
--                    write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV, &expr->node, arg1, VKD3DSPSM_NEG);
--                    break;
--                case HLSL_TYPE_INT:
--                case HLSL_TYPE_UINT:
--                    write_sm4_unary_op(tpf, VKD3D_SM4_OP_INEG, &expr->node, arg1, 0);
--                    break;
--                default:
--                    hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s negation expression.", dst_type_string->buffer);
--            }
--            break;
--        case HLSL_OP1_RCP:
--            switch (dst_type->e.numeric.type)
--            {
--                case HLSL_TYPE_FLOAT:
--                    /* SM5 comes with a RCP opcode */
--                    if (tpf->ctx->profile->major_version >= 5)
--                    {
--                        write_sm4_unary_op(tpf, VKD3D_SM5_OP_RCP, &expr->node, arg1, 0);
--                    }
--                    else
--                    {
--                        /* For SM4, implement as DIV dst, 1.0, src */
--                        struct sm4_instruction instr;
--                        struct hlsl_constant_value one;
--                        VKD3D_ASSERT(type_is_float(dst_type));
--                        memset(&instr, 0, sizeof(instr));
--                        instr.opcode = VKD3D_SM4_OP_DIV;
--                        sm4_dst_from_node(&instr.dsts[0], &expr->node);
--                        instr.dst_count = 1;
--                        for (unsigned int i = 0; i < 4; i++)
--                            one.u[i].f = 1.0f;
--                        sm4_src_from_constant_value(&instr.srcs[0], &one, dst_type->dimx, instr.dsts[0].write_mask);
--                        sm4_src_from_node(tpf, &instr.srcs[1], arg1, instr.dsts[0].write_mask);
--                        instr.src_count = 2;
--                        write_sm4_instruction(tpf, &instr);
--                    }
--                    break;
--                default:
--                    hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s rcp expression.", dst_type_string->buffer);
--            }
--            break;
--        case HLSL_OP1_REINTERPRET:
--            write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0);
--            break;
--        case HLSL_OP1_ROUND:
--            VKD3D_ASSERT(type_is_float(dst_type));
--            write_sm4_unary_op(tpf, VKD3D_SM4_OP_ROUND_NE, &expr->node, arg1, 0);
--            break;
--        case HLSL_OP1_RSQ:
--            VKD3D_ASSERT(type_is_float(dst_type));
--            write_sm4_unary_op(tpf, VKD3D_SM4_OP_RSQ, &expr->node, arg1, 0);
--            break;
--        case HLSL_OP1_SAT:
--            VKD3D_ASSERT(type_is_float(dst_type));
--            write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV
--                    &expr->node, arg1, 0);
--            break;
--        case HLSL_OP1_SIN:
--            VKD3D_ASSERT(type_is_float(dst_type));
--            write_sm4_unary_op_with_two_destinations(tpf, VKD3D_SM4_OP_SINCOS, &expr->node, 0, arg1);
--            break;
--        case HLSL_OP1_SQRT:
--            VKD3D_ASSERT(type_is_float(dst_type));
--            write_sm4_unary_op(tpf, VKD3D_SM4_OP_SQRT, &expr->node, arg1, 0);
--            break;
--        case HLSL_OP1_TRUNC:
--            VKD3D_ASSERT(type_is_float(dst_type));
--            write_sm4_unary_op(tpf, VKD3D_SM4_OP_ROUND_Z, &expr->node, arg1, 0);
--            break;
--        case HLSL_OP2_ADD:
--            switch (dst_type->e.numeric.type)
--            {
--                case HLSL_TYPE_FLOAT:
--                    write_sm4_binary_op(tpf, VKD3D_SM4_OP_ADD, &expr->node, arg1, arg2);
--                    break;
--                case HLSL_TYPE_INT:
--                case HLSL_TYPE_UINT:
--                    write_sm4_binary_op(tpf, VKD3D_SM4_OP_IADD, &expr->node, arg1, arg2);
--                    break;
--                default:
--                    hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s addition expression.", dst_type_string->buffer);
--            }
--            break;
--        case HLSL_OP2_BIT_AND:
--            VKD3D_ASSERT(type_is_integer(dst_type));
--            write_sm4_binary_op(tpf, VKD3D_SM4_OP_AND, &expr->node, arg1, arg2);
--            break;
--        case HLSL_OP2_BIT_OR:
--            VKD3D_ASSERT(type_is_integer(dst_type));
--            write_sm4_binary_op(tpf, VKD3D_SM4_OP_OR, &expr->node, arg1, arg2);
--            break;
--        case HLSL_OP2_BIT_XOR:
--            VKD3D_ASSERT(type_is_integer(dst_type));
--            write_sm4_binary_op(tpf, VKD3D_SM4_OP_XOR, &expr->node, arg1, arg2);
--            break;
--        case HLSL_OP2_DIV:
--            switch (dst_type->e.numeric.type)
--            {
--                case HLSL_TYPE_FLOAT:
--                    write_sm4_binary_op(tpf, VKD3D_SM4_OP_DIV, &expr->node, arg1, arg2);
--                    break;
--                case HLSL_TYPE_UINT:
--                    write_sm4_binary_op_with_two_destinations(tpf, VKD3D_SM4_OP_UDIV, &expr->node, 0, arg1, arg2);
--                    break;
--                default:
--                    hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s division expression.", dst_type_string->buffer);
--            }
--            break;
--        case HLSL_OP2_DOT:
--            switch (dst_type->e.numeric.type)
--            {
--                case HLSL_TYPE_FLOAT:
--                    switch (arg1->data_type->dimx)
--                    {
--                        case 4:
--                            write_sm4_binary_op_dot(tpf, VKD3D_SM4_OP_DP4, &expr->node, arg1, arg2);
--                            break;
--                        case 3:
--                            write_sm4_binary_op_dot(tpf, VKD3D_SM4_OP_DP3, &expr->node, arg1, arg2);
--                            break;
--                        case 2:
--                            write_sm4_binary_op_dot(tpf, VKD3D_SM4_OP_DP2, &expr->node, arg1, arg2);
--                            break;
--                        case 1:
--                        default:
--                            vkd3d_unreachable();
--                    }
--                    break;
--                default:
--                    hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s dot expression.", dst_type_string->buffer);
--            }
--            break;
--        case HLSL_OP2_EQUAL:
--        {
--            const struct hlsl_type *src_type = arg1->data_type;
--            VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL);
--            switch (src_type->e.numeric.type)
--            {
--                case HLSL_TYPE_FLOAT:
--                    write_sm4_binary_op(tpf, VKD3D_SM4_OP_EQ, &expr->node, arg1, arg2);
--                    break;
--                case HLSL_TYPE_BOOL:
--                case HLSL_TYPE_INT:
--                case HLSL_TYPE_UINT:
--                    write_sm4_binary_op(tpf, VKD3D_SM4_OP_IEQ, &expr->node, arg1, arg2);
--                    break;
--                default:
--                    hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 equality between \"%s\" operands.",
--                            debug_hlsl_type(tpf->ctx, src_type));
--                    break;
--            }
--            break;
--        }
--        case HLSL_OP2_GEQUAL:
--        {
--            const struct hlsl_type *src_type = arg1->data_type;
--            VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL);
--            switch (src_type->e.numeric.type)
--            {
--                case HLSL_TYPE_FLOAT:
--                    write_sm4_binary_op(tpf, VKD3D_SM4_OP_GE, &expr->node, arg1, arg2);
--                    break;
--                case HLSL_TYPE_INT:
--                    write_sm4_binary_op(tpf, VKD3D_SM4_OP_IGE, &expr->node, arg1, arg2);
--                    break;
--                case HLSL_TYPE_BOOL:
--                case HLSL_TYPE_UINT:
--                    write_sm4_binary_op(tpf, VKD3D_SM4_OP_UGE, &expr->node, arg1, arg2);
--                    break;
--                default:
--                    hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 greater-than-or-equal between \"%s\" operands.",
--                            debug_hlsl_type(tpf->ctx, src_type));
--                    break;
--            }
--            break;
--        }
--        case HLSL_OP2_LESS:
--        {
--            const struct hlsl_type *src_type = arg1->data_type;
--            VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL);
--            switch (src_type->e.numeric.type)
--            {
--                case HLSL_TYPE_FLOAT:
--                    write_sm4_binary_op(tpf, VKD3D_SM4_OP_LT, &expr->node, arg1, arg2);
--                    break;
--                case HLSL_TYPE_INT:
--                    write_sm4_binary_op(tpf, VKD3D_SM4_OP_ILT, &expr->node, arg1, arg2);
--                    break;
--                case HLSL_TYPE_BOOL:
--                case HLSL_TYPE_UINT:
--                    write_sm4_binary_op(tpf, VKD3D_SM4_OP_ULT, &expr->node, arg1, arg2);
--                    break;
--                default:
--                    hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 less-than between \"%s\" operands.",
--                            debug_hlsl_type(tpf->ctx, src_type));
--                    break;
--            }
--            break;
--        }
--        case HLSL_OP2_LOGIC_AND:
--            VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL);
--            write_sm4_binary_op(tpf, VKD3D_SM4_OP_AND, &expr->node, arg1, arg2);
--            break;
--        case HLSL_OP2_LOGIC_OR:
--            VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL);
--            write_sm4_binary_op(tpf, VKD3D_SM4_OP_OR, &expr->node, arg1, arg2);
--            break;
--        case HLSL_OP2_LSHIFT:
--            VKD3D_ASSERT(type_is_integer(dst_type));
--            VKD3D_ASSERT(dst_type->e.numeric.type != HLSL_TYPE_BOOL);
--            write_sm4_binary_op(tpf, VKD3D_SM4_OP_ISHL, &expr->node, arg1, arg2);
--            break;
--        case HLSL_OP2_MAX:
--            switch (dst_type->e.numeric.type)
--            {
--                case HLSL_TYPE_FLOAT:
--                    write_sm4_binary_op(tpf, VKD3D_SM4_OP_MAX, &expr->node, arg1, arg2);
--                    break;
--                case HLSL_TYPE_INT:
--                    write_sm4_binary_op(tpf, VKD3D_SM4_OP_IMAX, &expr->node, arg1, arg2);
--                    break;
--                case HLSL_TYPE_UINT:
--                    write_sm4_binary_op(tpf, VKD3D_SM4_OP_UMAX, &expr->node, arg1, arg2);
--                    break;
--                default:
--                    hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s maximum expression.", dst_type_string->buffer);
--            }
--            break;
-+            vkd3d_unreachable();
-+    }
--        case HLSL_OP2_MIN:
--            switch (dst_type->e.numeric.type)
--            {
--                case HLSL_TYPE_FLOAT:
--                    write_sm4_binary_op(tpf, VKD3D_SM4_OP_MIN, &expr->node, arg1, arg2);
--                    break;
-+    if (texel_offset)
-+    {
-+        if (!encode_texel_offset_as_aoffimmi(&instr, texel_offset))
-+        {
-+            hlsl_error(tpf->ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET,
-+                    "Offset must resolve to integer literal in the range -8 to 7.");
-+            return;
-+        }
-+    }
--                case HLSL_TYPE_INT:
--                    write_sm4_binary_op(tpf, VKD3D_SM4_OP_IMIN, &expr->node, arg1, arg2);
--                    break;
-+    sm4_dst_from_node(&instr.dsts[0], dst);
-+    instr.dst_count = 1;
--                case HLSL_TYPE_UINT:
--                    write_sm4_binary_op(tpf, VKD3D_SM4_OP_UMIN, &expr->node, arg1, arg2);
--                    break;
-+    sm4_src_from_node(tpf, &instr.srcs[0], coords, VKD3DSP_WRITEMASK_ALL);
-+    sm4_src_from_deref(tpf, &instr.srcs[1], resource, instr.dsts[0].write_mask, &instr);
-+    sm4_src_from_deref(tpf, &instr.srcs[2], sampler, VKD3DSP_WRITEMASK_ALL, &instr);
-+    instr.src_count = 3;
--                default:
--                    hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s minimum expression.", dst_type_string->buffer);
--            }
--            break;
-+    if (load->load_type == HLSL_RESOURCE_SAMPLE_LOD
-+           || load->load_type == HLSL_RESOURCE_SAMPLE_LOD_BIAS)
-+    {
-+        sm4_src_from_node(tpf, &instr.srcs[3], load->lod.node, VKD3DSP_WRITEMASK_ALL);
-+        ++instr.src_count;
-+    }
-+    else if (load->load_type == HLSL_RESOURCE_SAMPLE_GRAD)
-+    {
-+        sm4_src_from_node(tpf, &instr.srcs[3], load->ddx.node, VKD3DSP_WRITEMASK_ALL);
-+        sm4_src_from_node(tpf, &instr.srcs[4], load->ddy.node, VKD3DSP_WRITEMASK_ALL);
-+        instr.src_count += 2;
-+    }
-+    else if (load->load_type == HLSL_RESOURCE_SAMPLE_CMP
-+            || load->load_type == HLSL_RESOURCE_SAMPLE_CMP_LZ)
-+    {
-+        sm4_src_from_node(tpf, &instr.srcs[3], load->cmp.node, VKD3DSP_WRITEMASK_ALL);
-+        ++instr.src_count;
-+    }
--        case HLSL_OP2_MOD:
--            switch (dst_type->e.numeric.type)
--            {
--                case HLSL_TYPE_UINT:
--                    write_sm4_binary_op_with_two_destinations(tpf, VKD3D_SM4_OP_UDIV, &expr->node, 1, arg1, arg2);
--                    break;
-+    write_sm4_instruction(tpf, &instr);
--                default:
--                    hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s modulus expression.", dst_type_string->buffer);
--            }
--            break;
-+static void write_sm4_sampleinfo(const struct tpf_compiler *tpf, const struct hlsl_ir_resource_load *load)
-+    const struct hlsl_deref *resource = &load->resource;
-+    const struct hlsl_ir_node *dst = &load->node;
-+    struct sm4_instruction instr;
--        case HLSL_OP2_MUL:
--            switch (dst_type->e.numeric.type)
--            {
--                case HLSL_TYPE_FLOAT:
--                    write_sm4_binary_op(tpf, VKD3D_SM4_OP_MUL, &expr->node, arg1, arg2);
--                    break;
-+    VKD3D_ASSERT(dst->data_type->e.numeric.type == HLSL_TYPE_UINT || dst->data_type->e.numeric.type == HLSL_TYPE_FLOAT);
--                case HLSL_TYPE_INT:
--                case HLSL_TYPE_UINT:
--                    /* Using IMUL instead of UMUL because we're taking the low
--                     * bits, and the native compiler generates IMUL. */
--                    write_sm4_binary_op_with_two_destinations(tpf, VKD3D_SM4_OP_IMUL, &expr->node, 1, arg1, arg2);
--                    break;
-+    memset(&instr, 0, sizeof(instr));
-+    instr.opcode = VKD3D_SM4_OP_SAMPLE_INFO;
-+    if (dst->data_type->e.numeric.type == HLSL_TYPE_UINT)
--                default:
--                    hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s multiplication expression.", dst_type_string->buffer);
--            }
--            break;
-+    sm4_dst_from_node(&instr.dsts[0], dst);
-+    instr.dst_count = 1;
--        case HLSL_OP2_NEQUAL:
--        {
--            const struct hlsl_type *src_type = arg1->data_type;
-+    sm4_src_from_deref(tpf, &instr.srcs[0], resource, instr.dsts[0].write_mask, &instr);
-+    instr.src_count = 1;
--            VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL);
-+    write_sm4_instruction(tpf, &instr);
--            switch (src_type->e.numeric.type)
--            {
--                case HLSL_TYPE_FLOAT:
--                    write_sm4_binary_op(tpf, VKD3D_SM4_OP_NE, &expr->node, arg1, arg2);
--                    break;
-+static void write_sm4_resinfo(const struct tpf_compiler *tpf, const struct hlsl_ir_resource_load *load)
-+    const struct hlsl_deref *resource = &load->resource;
-+    const struct hlsl_ir_node *dst = &load->node;
-+    struct sm4_instruction instr;
--                case HLSL_TYPE_BOOL:
--                case HLSL_TYPE_INT:
--                case HLSL_TYPE_UINT:
--                    write_sm4_binary_op(tpf, VKD3D_SM4_OP_INE, &expr->node, arg1, arg2);
--                    break;
-+    if (resource->data_type->sampler_dim == HLSL_SAMPLER_DIM_BUFFER
-+            || resource->data_type->sampler_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER)
-+    {
-+        hlsl_fixme(tpf->ctx, &load->node.loc, "resinfo for buffers.");
-+        return;
-+    }
--                default:
--                    hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 inequality between \"%s\" operands.",
--                            debug_hlsl_type(tpf->ctx, src_type));
--                    break;
--            }
--            break;
--        }
-+    VKD3D_ASSERT(dst->data_type->e.numeric.type == HLSL_TYPE_UINT || dst->data_type->e.numeric.type == HLSL_TYPE_FLOAT);
--        case HLSL_OP2_RSHIFT:
--            VKD3D_ASSERT(type_is_integer(dst_type));
--            VKD3D_ASSERT(dst_type->e.numeric.type != HLSL_TYPE_BOOL);
--            write_sm4_binary_op(tpf, dst_type->e.numeric.type == HLSL_TYPE_INT ? VKD3D_SM4_OP_ISHR : VKD3D_SM4_OP_USHR,
--                    &expr->node, arg1, arg2);
--            break;
-+    memset(&instr, 0, sizeof(instr));
-+    instr.opcode = VKD3D_SM4_OP_RESINFO;
-+    if (dst->data_type->e.numeric.type == HLSL_TYPE_UINT)
-+        instr.extra_bits |= VKD3DSI_RESINFO_UINT << VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT;
--        case HLSL_OP3_TERNARY:
--            write_sm4_ternary_op(tpf, VKD3D_SM4_OP_MOVC, &expr->node, arg1, arg2, arg3);
--            break;
-+    sm4_dst_from_node(&instr.dsts[0], dst);
-+    instr.dst_count = 1;
--        default:
--            hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s expression.", debug_hlsl_expr_op(expr->op));
--    }
-+    sm4_src_from_node(tpf, &instr.srcs[0], load->lod.node, VKD3DSP_WRITEMASK_ALL);
-+    sm4_src_from_deref(tpf, &instr.srcs[1], resource, instr.dsts[0].write_mask, &instr);
-+    instr.src_count = 2;
--    hlsl_release_string_buffer(tpf->ctx, dst_type_string);
-+    write_sm4_instruction(tpf, &instr);
- }
--static void write_sm4_if(const struct tpf_writer *tpf, const struct hlsl_ir_if *iff)
-+static void write_sm4_if(struct tpf_compiler *tpf, const struct hlsl_ir_if *iff)
- {
-     struct sm4_instruction instr =
-     {
--        .opcode = VKD3D_SM4_OP_IF | VKD3D_SM4_CONDITIONAL_NZ,
-+        .opcode = VKD3D_SM4_OP_IF,
-+        .extra_bits = VKD3D_SM4_CONDITIONAL_NZ,
-         .src_count = 1,
-     };
-@@ -5614,7 +5278,7 @@ static void write_sm4_if(const struct tpf_writer *tpf, const struct hlsl_ir_if *
-     write_sm4_instruction(tpf, &instr);
- }
--static void write_sm4_jump(const struct tpf_writer *tpf, const struct hlsl_ir_jump *jump)
-+static void write_sm4_jump(const struct tpf_compiler *tpf, const struct hlsl_ir_jump *jump)
- {
-     struct sm4_instruction instr = {0};
-@@ -5650,57 +5314,7 @@ static void write_sm4_jump(const struct tpf_writer *tpf, const struct hlsl_ir_ju
-     write_sm4_instruction(tpf, &instr);
- }
--/* Does this variable's data come directly from the API user, rather than being
-- * temporary or from a previous shader stage?
-- * I.e. is it a uniform or VS input? */
--static bool var_is_user_input(struct hlsl_ctx *ctx, const struct hlsl_ir_var *var)
--    if (var->is_uniform)
--        return true;
--    return var->is_input_semantic && ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX;
--static void write_sm4_load(const struct tpf_writer *tpf, const struct hlsl_ir_load *load)
--    const struct hlsl_type *type = load->node.data_type;
--    struct sm4_instruction instr;
--    memset(&instr, 0, sizeof(instr));
--    sm4_dst_from_node(&instr.dsts[0], &load->node);
--    instr.dst_count = 1;
--    VKD3D_ASSERT(hlsl_is_numeric_type(type));
--    if (type->e.numeric.type == HLSL_TYPE_BOOL && var_is_user_input(tpf->ctx, load->src.var))
--    {
--        struct hlsl_constant_value value;
--        /* Uniform bools can be specified as anything, but internal bools always
--         * have 0 for false and ~0 for true. Normalize that here. */
--        instr.opcode = VKD3D_SM4_OP_MOVC;
--        sm4_src_from_deref(tpf, &instr.srcs[0], &load->src, instr.dsts[0].write_mask, &instr);
--        memset(&value, 0xff, sizeof(value));
--        sm4_src_from_constant_value(&instr.srcs[1], &value, type->dimx, instr.dsts[0].write_mask);
--        memset(&value, 0, sizeof(value));
--        sm4_src_from_constant_value(&instr.srcs[2], &value, type->dimx, instr.dsts[0].write_mask);
--        instr.src_count = 3;
--    }
--    else
--    {
--        instr.opcode = VKD3D_SM4_OP_MOV;
--        sm4_src_from_deref(tpf, &instr.srcs[0], &load->src, instr.dsts[0].write_mask, &instr);
--        instr.src_count = 1;
--    }
--    write_sm4_instruction(tpf, &instr);
--static void write_sm4_loop(const struct tpf_writer *tpf, const struct hlsl_ir_loop *loop)
-+static void write_sm4_loop(struct tpf_compiler *tpf, const struct hlsl_ir_loop *loop)
- {
-     struct sm4_instruction instr =
-     {
-@@ -5715,10 +5329,11 @@ static void write_sm4_loop(const struct tpf_writer *tpf, const struct hlsl_ir_lo
-     write_sm4_instruction(tpf, &instr);
- }
--static void write_sm4_gather(const struct tpf_writer *tpf, const struct hlsl_ir_node *dst,
-+static void write_sm4_gather(const struct tpf_compiler *tpf, const struct hlsl_ir_node *dst,
-         const struct hlsl_deref *resource, const struct hlsl_deref *sampler,
-         const struct hlsl_ir_node *coords, uint32_t swizzle, const struct hlsl_ir_node *texel_offset)
- {
-+    const struct vkd3d_shader_version *version = &tpf->program->shader_version;
-     struct vkd3d_shader_src_param *src;
-     struct sm4_instruction instr;
-@@ -5735,7 +5350,7 @@ static void write_sm4_gather(const struct tpf_writer *tpf, const struct hlsl_ir_
-     {
-         if (!encode_texel_offset_as_aoffimmi(&instr, texel_offset))
-         {
--            if (tpf->ctx->profile->major_version < 5)
-+            if (!vkd3d_shader_ver_ge(version, 5, 0))
-             {
-                 hlsl_error(tpf->ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET,
-                     "Offset must resolve to integer literal in the range -8 to 7 for profiles < 5.");
-@@ -5756,7 +5371,7 @@ static void write_sm4_gather(const struct tpf_writer *tpf, const struct hlsl_ir_
-     write_sm4_instruction(tpf, &instr);
- }
--static void write_sm4_resource_load(const struct tpf_writer *tpf, const struct hlsl_ir_resource_load *load)
-+static void write_sm4_resource_load(const struct tpf_compiler *tpf, const struct hlsl_ir_resource_load *load)
- {
-     const struct hlsl_ir_node *texel_offset = load->texel_offset.node;
-     const struct hlsl_ir_node *sample_index = load->sample_index.node;
-@@ -5825,45 +5440,7 @@ static void write_sm4_resource_load(const struct tpf_writer *tpf, const struct h
-     }
- }
--static void write_sm4_resource_store(const struct tpf_writer *tpf, const struct hlsl_ir_resource_store *store)
--    struct hlsl_type *resource_type = hlsl_deref_get_type(tpf->ctx, &store->resource);
--    if (!store->resource.var->is_uniform)
--    {
--        hlsl_fixme(tpf->ctx, &store->node.loc, "Store to non-uniform resource variable.");
--        return;
--    }
--    if (resource_type->sampler_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER)
--    {
--        hlsl_fixme(tpf->ctx, &store->node.loc, "Structured buffers store is not implemented.");
--        return;
--    }
--    write_sm4_store_uav_typed(tpf, &store->resource, store->coords.node, store->value.node);
--static void write_sm4_store(const struct tpf_writer *tpf, const struct hlsl_ir_store *store)
--    const struct hlsl_ir_node *rhs = store->rhs.node;
--    struct sm4_instruction instr;
--    uint32_t writemask;
--    memset(&instr, 0, sizeof(instr));
--    instr.opcode = VKD3D_SM4_OP_MOV;
--    sm4_register_from_deref(tpf->ctx, &instr.dsts[0].reg, &writemask, &store->lhs, &instr);
--    instr.dsts[0].write_mask = hlsl_combine_writemasks(writemask, store->writemask);
--    instr.dst_count = 1;
--    sm4_src_from_node(tpf, &instr.srcs[0], rhs, instr.dsts[0].write_mask);
--    instr.src_count = 1;
--    write_sm4_instruction(tpf, &instr);
--static void write_sm4_switch(const struct tpf_writer *tpf, const struct hlsl_ir_switch *s)
-+static void write_sm4_switch(struct tpf_compiler *tpf, const struct hlsl_ir_switch *s)
- {
-     const struct hlsl_ir_node *selector = s->selector.node;
-     struct hlsl_ir_switch_case *c;
-@@ -5903,30 +5480,176 @@ static void write_sm4_switch(const struct tpf_writer *tpf, const struct hlsl_ir_
-     write_sm4_instruction(tpf, &instr);
- }
--static void write_sm4_swizzle(const struct tpf_writer *tpf, const struct hlsl_ir_swizzle *swizzle)
-+static void tpf_simple_instruction(struct tpf_compiler *tpf, const struct vkd3d_shader_instruction *ins)
- {
--    unsigned int hlsl_swizzle;
--    struct sm4_instruction instr;
--    uint32_t writemask;
-+    const struct vkd3d_sm4_opcode_info *info;
-+    struct sm4_instruction instr = {0};
-+    unsigned int dst_count, src_count;
--    memset(&instr, 0, sizeof(instr));
--    instr.opcode = VKD3D_SM4_OP_MOV;
-+    info = get_info_from_vsir_opcode(&tpf->lookup, ins->opcode);
-+    VKD3D_ASSERT(info);
--    sm4_dst_from_node(&instr.dsts[0], &swizzle->node);
--    instr.dst_count = 1;
-+    dst_count = opcode_info_get_dst_count(info);
-+    src_count = opcode_info_get_src_count(info);
--    sm4_register_from_node(&instr.srcs[0].reg, &writemask, swizzle->val.node);
--    hlsl_swizzle = hlsl_map_swizzle(hlsl_combine_swizzles(hlsl_swizzle_from_writemask(writemask),
--            swizzle->swizzle, swizzle->node.data_type->dimx), instr.dsts[0].write_mask);
--    instr.srcs[0].swizzle = swizzle_from_sm4(hlsl_swizzle);
--    instr.src_count = 1;
-+    if (ins->dst_count != dst_count)
-+    {
-+        ERR("Invalid destination count %u for vsir instruction %#x (expected %u).\n",
-+                ins->dst_count, ins->opcode, dst_count);
-+        tpf->result = VKD3D_ERROR_INVALID_SHADER;
-+        return;
-+    }
-+    if (ins->src_count != src_count)
-+    {
-+        ERR("Invalid source count %u for vsir instruction %#x (expected %u).\n",
-+                ins->src_count, ins->opcode, src_count);
-+        tpf->result = VKD3D_ERROR_INVALID_SHADER;
-+        return;
-+    }
-+    instr.opcode = info->opcode;
-+    instr.extra_bits = ins->flags << VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT;
-+    instr.dst_count = ins->dst_count;
-+    instr.src_count = ins->src_count;
-+    for (unsigned int i = 0; i < ins->dst_count; ++i)
-+    {
-+        instr.dsts[i] = ins->dst[i];
-+        if (instr.dsts[i].modifiers & VKD3DSPDM_SATURATE)
-+        {
-+            /* For vsir SATURATE is a dst modifier, while for tpf it is an instruction flag. */
-+            VKD3D_ASSERT(ins->dst_count == 1);
-+            instr.dsts[i].modifiers &= ~VKD3DSPDM_SATURATE;
-+        }
-+    }
-+    for (unsigned int i = 0; i < ins->src_count; ++i)
-+        instr.srcs[i] = ins->src[i];
-     write_sm4_instruction(tpf, &instr);
- }
--static void write_sm4_block(const struct tpf_writer *tpf, const struct hlsl_block *block)
-+static void tpf_handle_instruction(struct tpf_compiler *tpf, const struct vkd3d_shader_instruction *ins)
-+    switch (ins->opcode)
-+    {
-+        case VKD3DSIH_DCL_TEMPS:
-+            tpf_dcl_temps(tpf, ins->declaration.count);
-+            break;
-+            tpf_dcl_indexable_temp(tpf, &ins->declaration.indexable_temp);
-+            break;
-+        case VKD3DSIH_DCL_INPUT:
-+            tpf_dcl_semantic(tpf, VKD3D_SM4_OP_DCL_INPUT, &ins->declaration.dst, 0);
-+            break;
-+        case VKD3DSIH_DCL_INPUT_PS:
-+            tpf_dcl_semantic(tpf, VKD3D_SM4_OP_DCL_INPUT_PS, &ins->declaration.dst, ins->flags);
-+            break;
-+        case VKD3DSIH_DCL_INPUT_PS_SGV:
-+            tpf_dcl_siv_semantic(tpf, VKD3D_SM4_OP_DCL_INPUT_PS_SGV, &ins->declaration.register_semantic, 0);
-+            break;
-+        case VKD3DSIH_DCL_INPUT_PS_SIV:
-+            tpf_dcl_siv_semantic(tpf, VKD3D_SM4_OP_DCL_INPUT_PS_SIV, &ins->declaration.register_semantic, ins->flags);
-+            break;
-+        case VKD3DSIH_DCL_INPUT_SGV:
-+            tpf_dcl_siv_semantic(tpf, VKD3D_SM4_OP_DCL_INPUT_SGV, &ins->declaration.register_semantic, 0);
-+            break;
-+        case VKD3DSIH_DCL_INPUT_SIV:
-+            tpf_dcl_siv_semantic(tpf, VKD3D_SM4_OP_DCL_INPUT_SIV, &ins->declaration.register_semantic, 0);
-+            break;
-+        case VKD3DSIH_DCL_OUTPUT:
-+            tpf_dcl_semantic(tpf, VKD3D_SM4_OP_DCL_OUTPUT, &ins->declaration.dst, 0);
-+            break;
-+        case VKD3DSIH_DCL_OUTPUT_SIV:
-+            tpf_dcl_siv_semantic(tpf, VKD3D_SM4_OP_DCL_OUTPUT_SIV, &ins->declaration.register_semantic, 0);
-+            break;
-+        case VKD3DSIH_ADD:
-+        case VKD3DSIH_AND:
-+        case VKD3DSIH_DIV:
-+        case VKD3DSIH_DP2:
-+        case VKD3DSIH_DP3:
-+        case VKD3DSIH_DP4:
-+        case VKD3DSIH_DSX:
-+        case VKD3DSIH_DSX_COARSE:
-+        case VKD3DSIH_DSX_FINE:
-+        case VKD3DSIH_DSY:
-+        case VKD3DSIH_DSY_COARSE:
-+        case VKD3DSIH_DSY_FINE:
-+        case VKD3DSIH_EQO:
-+        case VKD3DSIH_EXP:
-+        case VKD3DSIH_F16TOF32:
-+        case VKD3DSIH_F32TOF16:
-+        case VKD3DSIH_FRC:
-+        case VKD3DSIH_FTOI:
-+        case VKD3DSIH_FTOU:
-+        case VKD3DSIH_GEO:
-+        case VKD3DSIH_IADD:
-+        case VKD3DSIH_IEQ:
-+        case VKD3DSIH_IGE:
-+        case VKD3DSIH_ILT:
-+        case VKD3DSIH_IMAD:
-+        case VKD3DSIH_IMAX:
-+        case VKD3DSIH_IMIN:
-+        case VKD3DSIH_IMUL:
-+        case VKD3DSIH_INE:
-+        case VKD3DSIH_INEG:
-+        case VKD3DSIH_ISHL:
-+        case VKD3DSIH_ISHR:
-+        case VKD3DSIH_ITOF:
-+        case VKD3DSIH_LOG:
-+        case VKD3DSIH_LTO:
-+        case VKD3DSIH_MAD:
-+        case VKD3DSIH_MAX:
-+        case VKD3DSIH_MIN:
-+        case VKD3DSIH_MOV:
-+        case VKD3DSIH_MOVC:
-+        case VKD3DSIH_MUL:
-+        case VKD3DSIH_NEU:
-+        case VKD3DSIH_NOT:
-+        case VKD3DSIH_OR:
-+        case VKD3DSIH_RCP:
-+        case VKD3DSIH_ROUND_NE:
-+        case VKD3DSIH_ROUND_NI:
-+        case VKD3DSIH_ROUND_PI:
-+        case VKD3DSIH_ROUND_Z:
-+        case VKD3DSIH_RSQ:
-+        case VKD3DSIH_SAMPLE_INFO:
-+        case VKD3DSIH_SINCOS:
-+        case VKD3DSIH_SQRT:
-+        case VKD3DSIH_STORE_RAW:
-+        case VKD3DSIH_STORE_UAV_TYPED:
-+        case VKD3DSIH_UDIV:
-+        case VKD3DSIH_UGE:
-+        case VKD3DSIH_ULT:
-+        case VKD3DSIH_UMAX:
-+        case VKD3DSIH_UMIN:
-+        case VKD3DSIH_USHR:
-+        case VKD3DSIH_UTOF:
-+        case VKD3DSIH_XOR:
-+            tpf_simple_instruction(tpf, ins);
-+            break;
-+        default:
-+            vkd3d_unreachable();
-+            break;
-+    }
-+static void write_sm4_block(struct tpf_compiler *tpf, const struct hlsl_block *block)
- {
-     const struct hlsl_ir_node *instr;
-+    unsigned int vsir_instr_idx;
-     LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry)
-     {
-@@ -5952,10 +5675,6 @@ static void write_sm4_block(const struct tpf_writer *tpf, const struct hlsl_bloc
-             case HLSL_IR_CONSTANT:
-                 vkd3d_unreachable();
--            case HLSL_IR_EXPR:
--                write_sm4_expr(tpf, hlsl_ir_expr(instr));
--                break;
-             case HLSL_IR_IF:
-                 write_sm4_if(tpf, hlsl_ir_if(instr));
-                 break;
-@@ -5964,32 +5683,21 @@ static void write_sm4_block(const struct tpf_writer *tpf, const struct hlsl_bloc
-                 write_sm4_jump(tpf, hlsl_ir_jump(instr));
-                 break;
--            case HLSL_IR_LOAD:
--                write_sm4_load(tpf, hlsl_ir_load(instr));
--                break;
-             case HLSL_IR_RESOURCE_LOAD:
-                 write_sm4_resource_load(tpf, hlsl_ir_resource_load(instr));
-                 break;
--            case HLSL_IR_RESOURCE_STORE:
--                write_sm4_resource_store(tpf, hlsl_ir_resource_store(instr));
--                break;
-             case HLSL_IR_LOOP:
-                 write_sm4_loop(tpf, hlsl_ir_loop(instr));
-                 break;
--            case HLSL_IR_STORE:
--                write_sm4_store(tpf, hlsl_ir_store(instr));
--                break;
-             case HLSL_IR_SWITCH:
-                 write_sm4_switch(tpf, hlsl_ir_switch(instr));
-                 break;
--            case HLSL_IR_SWIZZLE:
--                write_sm4_swizzle(tpf, hlsl_ir_swizzle(instr));
-+            case HLSL_IR_VSIR_INSTRUCTION_REF:
-+                vsir_instr_idx = hlsl_ir_vsir_instruction_ref(instr)->vsir_instr_idx;
-+                tpf_handle_instruction(tpf, &tpf->program->instructions.elements[vsir_instr_idx]);
-                 break;
-             default:
-@@ -5998,18 +5706,26 @@ static void write_sm4_block(const struct tpf_writer *tpf, const struct hlsl_bloc
-     }
- }
--static void write_sm4_shdr(struct hlsl_ctx *ctx,
--        const struct hlsl_ir_function_decl *entry_func, struct dxbc_writer *dxbc)
-+static void tpf_write_shader_function(struct tpf_compiler *tpf, struct hlsl_ir_function_decl *func)
- {
--    const struct hlsl_profile_info *profile = ctx->profile;
-+    if (tpf->program->shader_version.type == VKD3D_SHADER_TYPE_COMPUTE)
-+        tpf_dcl_thread_group(tpf, &tpf->program->thread_group_size);
-+    write_sm4_block(tpf, &func->body);
-+    write_sm4_ret(tpf);
-+static void tpf_write_shdr(struct tpf_compiler *tpf, struct hlsl_ir_function_decl *entry_func)
-+    const struct vkd3d_shader_version *version = &tpf->program->shader_version;
-     struct vkd3d_bytecode_buffer buffer = {0};
-     struct extern_resource *extern_resources;
-     unsigned int extern_resources_count, i;
-     const struct hlsl_buffer *cbuffer;
--    const struct hlsl_scope *scope;
--    const struct hlsl_ir_var *var;
-+    struct hlsl_ctx *ctx = tpf->ctx;
-     size_t token_count_position;
--    struct tpf_writer tpf;
-+    uint32_t global_flags = 0;
-     static const uint16_t shader_types[VKD3D_SHADER_TYPE_COUNT] =
-     {
-@@ -6024,17 +5740,54 @@ static void write_sm4_shdr(struct hlsl_ctx *ctx,
-         VKD3D_SM4_LIB,
-     };
--    tpf_writer_init(&tpf, ctx, &buffer);
-+    tpf->buffer = &buffer;
-     extern_resources = sm4_get_extern_resources(ctx, &extern_resources_count);
--    put_u32(&buffer, vkd3d_make_u32((profile->major_version << 4) | profile->minor_version, shader_types[profile->type]));
-+    put_u32(&buffer, vkd3d_make_u32((version->major << 4) | version->minor, shader_types[version->type]));
-     token_count_position = put_u32(&buffer, 0);
-+    if (version->major == 4)
-+    {
-+        for (i = 0; i < extern_resources_count; ++i)
-+        {
-+            const struct extern_resource *resource = &extern_resources[i];
-+            const struct hlsl_type *type = resource->component_type;
-+            if (type && type->class == HLSL_CLASS_TEXTURE && type->sampler_dim == HLSL_SAMPLER_DIM_RAW_BUFFER)
-+            {
-+                global_flags |= VKD3DSGF_ENABLE_RAW_AND_STRUCTURED_BUFFERS;
-+                break;
-+            }
-+        }
-+    }
-+    if (entry_func->early_depth_test && vkd3d_shader_ver_ge(version, 5, 0))
-+        global_flags |= VKD3DSGF_FORCE_EARLY_DEPTH_STENCIL;
-+    if (global_flags)
-+        write_sm4_dcl_global_flags(tpf, global_flags);
-+    if (version->type == VKD3D_SHADER_TYPE_HULL)
-+    {
-+        tpf_write_hs_decls(tpf);
-+        tpf_write_dcl_input_control_point_count(tpf, 1); /* TODO: Obtain from InputPatch */
-+        tpf_write_dcl_output_control_point_count(tpf, ctx->output_control_point_count);
-+        tpf_write_dcl_tessellator_domain(tpf, ctx->domain);
-+        tpf_write_dcl_tessellator_partitioning(tpf, ctx->partitioning);
-+        tpf_write_dcl_tessellator_output_primitive(tpf, ctx->output_primitive);
-+    }
-+    else if (version->type == VKD3D_SHADER_TYPE_DOMAIN)
-+    {
-+        tpf_write_dcl_input_control_point_count(tpf, 0); /* TODO: Obtain from OutputPatch */
-+        tpf_write_dcl_tessellator_domain(tpf, ctx->domain);
-+    }
-     LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry)
-     {
-         if (cbuffer->reg.allocated)
--            write_sm4_dcl_constant_buffer(&tpf, cbuffer);
-+            write_sm4_dcl_constant_buffer(tpf, cbuffer);
-     }
-     for (i = 0; i < extern_resources_count; ++i)
-@@ -6042,59 +5795,37 @@ static void write_sm4_shdr(struct hlsl_ctx *ctx,
-         const struct extern_resource *resource = &extern_resources[i];
-         if (resource->regset == HLSL_REGSET_SAMPLERS)
--            write_sm4_dcl_samplers(&tpf, resource);
-+            write_sm4_dcl_samplers(tpf, resource);
-         else if (resource->regset == HLSL_REGSET_TEXTURES)
--            write_sm4_dcl_textures(&tpf, resource, false);
-+            write_sm4_dcl_textures(tpf, resource, false);
-         else if (resource->regset == HLSL_REGSET_UAVS)
--            write_sm4_dcl_textures(&tpf, resource, true);
--    }
--    LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry)
--    {
--        if ((var->is_input_semantic && var->last_read) || (var->is_output_semantic && var->first_write))
--            write_sm4_dcl_semantic(&tpf, var);
-+            write_sm4_dcl_textures(tpf, resource, true);
-     }
--    if (profile->type == VKD3D_SHADER_TYPE_COMPUTE)
--        write_sm4_dcl_thread_group(&tpf, ctx->thread_count);
-+    if (version->type == VKD3D_SHADER_TYPE_HULL)
-+        tpf_write_hs_control_point_phase(tpf);
--    if (ctx->temp_count)
--        write_sm4_dcl_temps(&tpf, ctx->temp_count);
-+    tpf_write_shader_function(tpf, entry_func);
--    LIST_FOR_EACH_ENTRY(scope, &ctx->scopes, struct hlsl_scope, entry)
-+    if (version->type == VKD3D_SHADER_TYPE_HULL)
-     {
--        LIST_FOR_EACH_ENTRY(var, &scope->vars, struct hlsl_ir_var, scope_entry)
--        {
--            if (var->is_uniform || var->is_input_semantic || var->is_output_semantic)
--                continue;
--            if (!var->regs[HLSL_REGSET_NUMERIC].allocated)
--                continue;
--            if (var->indexable)
--            {
--                unsigned int id = var->regs[HLSL_REGSET_NUMERIC].id;
--                unsigned int size = align(var->data_type->reg_size[HLSL_REGSET_NUMERIC], 4) / 4;
--                write_sm4_dcl_indexable_temp(&tpf, id, size, 4);
--            }
--        }
-+        tpf_write_hs_fork_phase(tpf);
-+        tpf_write_shader_function(tpf, ctx->patch_constant_func);
-     }
--    write_sm4_block(&tpf, &entry_func->body);
--    write_sm4_ret(&tpf);
-     set_u32(&buffer, token_count_position, bytecode_get_size(&buffer) / sizeof(uint32_t));
--    add_section(ctx, dxbc, TAG_SHDR, &buffer);
-+    add_section(ctx, &tpf->dxbc, TAG_SHDR, &buffer);
-+    tpf->buffer = NULL;
-     sm4_free_extern_resources(extern_resources, extern_resources_count);
- }
--static void write_sm4_sfi0(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc)
-+static void tpf_write_sfi0(struct tpf_compiler *tpf)
- {
-     struct extern_resource *extern_resources;
-     unsigned int extern_resources_count;
-+    struct hlsl_ctx *ctx = tpf->ctx;
-     uint64_t *flags;
-     flags = vkd3d_calloc(1, sizeof(*flags));
-@@ -6110,29 +5841,101 @@ static void write_sm4_sfi0(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc)
-     /* FIXME: We also emit code that should require UAVS_AT_EVERY_STAGE,
--    if (flags)
--        dxbc_writer_add_section(dxbc, TAG_SFI0, flags, sizeof(*flags));
-+    if (*flags)
-+        dxbc_writer_add_section(&tpf->dxbc, TAG_SFI0, flags, sizeof(*flags));
-     else
-         vkd3d_free(flags);
- }
--int hlsl_sm4_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, struct vkd3d_shader_code *out)
-+static void tpf_write_stat(struct tpf_compiler *tpf)
- {
--    struct dxbc_writer dxbc;
-+    struct vkd3d_bytecode_buffer buffer = {0};
-+    const struct sm4_stat *stat = tpf->stat;
-+    struct hlsl_ctx *ctx = tpf->ctx;
-+    put_u32(&buffer, stat->fields[VKD3D_STAT_INSTR_COUNT]);
-+    put_u32(&buffer, stat->fields[VKD3D_STAT_TEMPS]);
-+    put_u32(&buffer, 0); /* Def count */
-+    put_u32(&buffer, 0); /* DCL count */
-+    put_u32(&buffer, stat->fields[VKD3D_STAT_FLOAT]);
-+    put_u32(&buffer, stat->fields[VKD3D_STAT_INT]);
-+    put_u32(&buffer, stat->fields[VKD3D_STAT_UINT]);
-+    put_u32(&buffer, 0); /* Static flow control count */
-+    put_u32(&buffer, 0); /* Dynamic flow control count */
-+    put_u32(&buffer, 0); /* Macro instruction count */
-+    put_u32(&buffer, 0); /* Temp array count */
-+    put_u32(&buffer, 0); /* Array instr count */
-+    put_u32(&buffer, stat->fields[VKD3D_STAT_CUT]);
-+    put_u32(&buffer, stat->fields[VKD3D_STAT_EMIT]);
-+    put_u32(&buffer, stat->fields[VKD3D_STAT_SAMPLE]);
-+    put_u32(&buffer, stat->fields[VKD3D_STAT_LOAD]);
-+    put_u32(&buffer, stat->fields[VKD3D_STAT_SAMPLE_C]);
-+    put_u32(&buffer, stat->fields[VKD3D_STAT_SAMPLE_BIAS]);
-+    put_u32(&buffer, stat->fields[VKD3D_STAT_SAMPLE_GRAD]);
-+    put_u32(&buffer, stat->fields[VKD3D_STAT_MOV]);
-+    put_u32(&buffer, stat->fields[VKD3D_STAT_MOVC]);
-+    put_u32(&buffer, stat->fields[VKD3D_STAT_CONV]);
-+    put_u32(&buffer, stat->fields[VKD3D_STAT_BITWISE]);
-+    put_u32(&buffer, stat->fields[VKD3D_STAT_DCL_INPUT_PRIMITIVE]);
-+    put_u32(&buffer, stat->fields[VKD3D_STAT_DCL_OUTPUT_TOPOLOGY]);
-+    put_u32(&buffer, stat->fields[VKD3D_STAT_DCL_VERTICES_OUT]);
-+    put_u32(&buffer, stat->fields[VKD3D_STAT_GATHER]);
-+    put_u32(&buffer, stat->fields[VKD3D_STAT_LOD]);
-+    put_u32(&buffer, 0); /* Sample frequency */
-+    if (hlsl_version_ge(ctx, 5, 0))
-+    {
-+        put_u32(&buffer, stat->fields[VKD3D_STAT_DCL_GS_INSTANCES]);
-+        put_u32(&buffer, stat->fields[VKD3D_STAT_TESS_CONTROL_POINT_COUNT]);
-+        put_u32(&buffer, stat->fields[VKD3D_STAT_TESS_OUTPUT_PRIMITIVE]);
-+        put_u32(&buffer, stat->fields[VKD3D_STAT_TESS_PARTITIONING]);
-+        put_u32(&buffer, stat->fields[VKD3D_STAT_TESS_DOMAIN]);
-+        put_u32(&buffer, stat->fields[VKD3D_STAT_BARRIER]);
-+        put_u32(&buffer, stat->fields[VKD3D_STAT_ATOMIC]);
-+        put_u32(&buffer, stat->fields[VKD3D_STAT_STORE]);
-+    }
-+    add_section(ctx, &tpf->dxbc, TAG_STAT, &buffer);
-+/* OBJECTIVE: Stop relying on ctx and entry_func on this function, receiving
-+ * data from the other parameters instead, so they can be removed from the
-+ * arguments and this function can be independent of HLSL structs.  */
-+int tpf_compile(struct vsir_program *program, uint64_t config_flags,
-+        struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context,
-+        struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func)
-+    enum vkd3d_shader_type shader_type = program->shader_version.type;
-+    struct tpf_compiler tpf = {0};
-+    struct sm4_stat stat = {0};
-     size_t i;
-     int ret;
--    dxbc_writer_init(&dxbc);
--    write_sm4_signature(ctx, &dxbc, false);
--    write_sm4_signature(ctx, &dxbc, true);
--    write_sm4_rdef(ctx, &dxbc);
--    write_sm4_shdr(ctx, entry_func, &dxbc);
--    write_sm4_sfi0(ctx, &dxbc);
--    if (!(ret = ctx->result))
--        ret = dxbc_writer_write(&dxbc, out);
--    for (i = 0; i < dxbc.section_count; ++i)
--        vkd3d_shader_free_shader_code(&dxbc.sections[i].data);
-+    tpf.ctx = ctx;
-+    tpf.program = program;
-+    tpf.buffer = NULL;
-+    tpf.stat = &stat;
-+    init_sm4_lookup_tables(&tpf.lookup);
-+    dxbc_writer_init(&tpf.dxbc);
-+    tpf_write_signature(&tpf, &program->input_signature, TAG_ISGN);
-+    tpf_write_signature(&tpf, &program->output_signature, TAG_OSGN);
-+    if (shader_type == VKD3D_SHADER_TYPE_HULL || shader_type == VKD3D_SHADER_TYPE_DOMAIN)
-+        tpf_write_signature(&tpf, &program->patch_constant_signature, TAG_PCSG);
-+    write_sm4_rdef(ctx, &tpf.dxbc);
-+    tpf_write_shdr(&tpf, entry_func);
-+    tpf_write_sfi0(&tpf);
-+    tpf_write_stat(&tpf);
-+    ret = VKD3D_OK;
-+    if (ctx->result)
-+        ret = ctx->result;
-+    if (tpf.result)
-+        ret = tpf.result;
-+    if (!ret)
-+        ret = dxbc_writer_write(&tpf.dxbc, out);
-+    for (i = 0; i < tpf.dxbc.section_count; ++i)
-+        vkd3d_shader_free_shader_code(&tpf.dxbc.sections[i].data);
-     return ret;
- }
-diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c
-index 306c1ca0dd8..db61eec8f28 100644
---- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c
-+++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c
-@@ -23,6 +23,8 @@
- #include <stdio.h>
- #include <math.h>
- static inline int char_to_int(char c)
- {
-     if ('0' <= c && c <= '9')
-@@ -443,30 +445,57 @@ void set_string(struct vkd3d_bytecode_buffer *buffer, size_t offset, const char
-     bytecode_set_bytes(buffer, offset, string, length);
- }
--static void vkd3d_shader_dump_blob(const char *path, const char *profile,
--        const char *suffix, const void *data, size_t size)
-+struct shader_dump_data
- {
--    static unsigned int shader_id = 0;
-+    uint8_t checksum[16];
-+    const char *path;
-+    const char *profile;
-+    const char *source_suffix;
-+    const char *target_suffix;
-+static void vkd3d_shader_dump_shader(const struct shader_dump_data *dump_data,
-+        const void *data, size_t size, bool source)
-+    static const char hexadecimal_digits[] = "0123456789abcdef";
-+    const uint8_t *checksum = dump_data->checksum;
-+    char str_checksum[33];
-+    unsigned int pos = 0;
-     char filename[1024];
--    unsigned int id;
-+    unsigned int i;
-     FILE *f;
--    id = vkd3d_atomic_increment_u32(&shader_id) - 1;
-+    if (!dump_data->path)
-+        return;
-+    for (i = 0; i < ARRAY_SIZE(dump_data->checksum); ++i)
-+    {
-+        str_checksum[2 * i] = hexadecimal_digits[checksum[i] >> 4];
-+        str_checksum[2 * i + 1] = hexadecimal_digits[checksum[i] & 0xf];
-+    }
-+    str_checksum[32] = '\0';
-+    pos = snprintf(filename, ARRAY_SIZE(filename), "%s/vkd3d-shader-%s", dump_data->path, str_checksum);
--    if (profile)
--        snprintf(filename, ARRAY_SIZE(filename), "%s/vkd3d-shader-%u-%s.%s", path, id, profile, suffix);
-+    if (dump_data->profile)
-+        pos += snprintf(filename + pos, ARRAY_SIZE(filename) - pos, "-%s", dump_data->profile);
-+    if (source)
-+        pos += snprintf(filename + pos, ARRAY_SIZE(filename) - pos, "-source.%s", dump_data->source_suffix);
-     else
--        snprintf(filename, ARRAY_SIZE(filename), "%s/vkd3d-shader-%u.%s", path, id, suffix);
-+        pos += snprintf(filename + pos, ARRAY_SIZE(filename) - pos, "-target.%s", dump_data->target_suffix);
-+    TRACE("Dumping shader to \"%s\".\n", filename);
-     if ((f = fopen(filename, "wb")))
-     {
-         if (fwrite(data, 1, size, f) != size)
--            ERR("Failed to write shader to %s.\n", filename);
-+            WARN("Failed to write shader to %s.\n", filename);
-         if (fclose(f))
--            ERR("Failed to close stream %s.\n", filename);
-+            WARN("Failed to close stream %s.\n", filename);
-     }
-     else
-     {
--        ERR("Failed to open %s for dumping shader.\n", filename);
-+        WARN("Failed to open %s for dumping shader.\n", filename);
-     }
- }
-@@ -488,37 +517,61 @@ static const char *shader_get_source_type_suffix(enum vkd3d_shader_source_type t
-     }
- }
--void vkd3d_shader_dump_shader(const struct vkd3d_shader_compile_info *compile_info)
-+static const char *shader_get_target_type_suffix(enum vkd3d_shader_target_type type)
-+    switch (type)
-+    {
-+            return "spv";
-+            return "spv.s";
-+        case VKD3D_SHADER_TARGET_D3D_ASM:
-+            return "d3d.s";
-+            return "d3dbc";
-+            return "dxbc";
-+        case VKD3D_SHADER_TARGET_GLSL:
-+            return "glsl";
-+        case VKD3D_SHADER_TARGET_FX:
-+            return "fx";
-+        case VKD3D_SHADER_TARGET_MSL:
-+            return "msl";
-+        default:
-+            FIXME("Unhandled target type %#x.\n", type);
-+            return "bin";
-+    }
-+static void fill_shader_dump_data(const struct vkd3d_shader_compile_info *compile_info,
-+        struct shader_dump_data *data)
- {
--    const struct vkd3d_shader_code *shader = &compile_info->source;
--    const struct vkd3d_shader_hlsl_source_info *hlsl_source_info;
--    const struct hlsl_profile_info *profile;
--    const char *profile_name = NULL;
-     static bool enabled = true;
--    const char *path;
-+    data->path = NULL;
-     if (!enabled)
-         return;
--    if (!(path = getenv("VKD3D_SHADER_DUMP_PATH")))
-+    if (!(data->path = getenv("VKD3D_SHADER_DUMP_PATH")))
-     {
-         enabled = false;
-         return;
-     }
-+    data->profile = NULL;
-     if (compile_info->source_type == VKD3D_SHADER_SOURCE_HLSL)
-     {
--        if (!(hlsl_source_info = vkd3d_find_struct(compile_info->next, HLSL_SOURCE_INFO)))
--            return;
--        if (!(profile = hlsl_get_target_info(hlsl_source_info->profile)))
--            return;
-+        const struct vkd3d_shader_hlsl_source_info *hlsl_source_info;
--        profile_name = profile->name;
-+        if ((hlsl_source_info = vkd3d_find_struct(compile_info->next, HLSL_SOURCE_INFO)))
-+            data->profile = hlsl_source_info->profile;
-     }
--    vkd3d_shader_dump_blob(path, profile_name, shader_get_source_type_suffix(compile_info->source_type),
--            shader->code, shader->size);
-+    vkd3d_compute_md5(compile_info->source.code, compile_info->source.size,
-+            (uint32_t *)data->checksum, VKD3D_MD5_STANDARD);
-+    data->source_suffix = shader_get_source_type_suffix(compile_info->source_type);
-+    data->target_suffix = shader_get_target_type_suffix(compile_info->target_type);
- }
- static void init_scan_signature_info(const struct vkd3d_shader_compile_info *info)
-@@ -627,6 +680,53 @@ static int vkd3d_shader_validate_compile_info(const struct vkd3d_shader_compile_
-     return VKD3D_OK;
- }
-+static enum vkd3d_result vsir_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t config_flags,
-+        struct vkd3d_shader_message_context *message_context, struct vsir_program *program)
-+    enum vkd3d_result ret;
-+    switch (compile_info->source_type)
-+    {
-+            ret = d3dbc_parse(compile_info, config_flags, message_context, program);
-+            break;
-+            ret = tpf_parse(compile_info, config_flags, message_context, program);
-+            break;
-+            ret = dxil_parse(compile_info, config_flags, message_context, program);
-+            break;
-+        default:
-+            ERR("Unsupported source type %#x.\n", compile_info->source_type);
-+            ret = VKD3D_ERROR_INVALID_ARGUMENT;
-+            break;
-+    }
-+    if (ret < 0)
-+    {
-+        WARN("Failed to parse shader.\n");
-+        return ret;
-+    }
-+    if ((ret = vsir_program_validate(program, config_flags, compile_info->source_name, message_context)) < 0)
-+    {
-+        WARN("Failed to validate shader after parsing, ret %d.\n", ret);
-+        if (TRACE_ON())
-+            vsir_program_trace(program);
-+        vsir_program_cleanup(program);
-+        return ret;
-+    }
-+    if (compile_info->target_type != VKD3D_SHADER_TARGET_NONE)
-+        ret = vsir_program_transform_early(program, config_flags, compile_info, message_context);
-+    return ret;
- void vkd3d_shader_free_messages(char *messages)
- {
-     TRACE("messages %p.\n", messages);
-@@ -707,6 +807,9 @@ struct vkd3d_shader_scan_context
-     struct vkd3d_shader_scan_combined_resource_sampler_info *combined_sampler_info;
-     size_t combined_samplers_size;
-+    enum vkd3d_shader_tessellator_output_primitive output_primitive;
-+    enum vkd3d_shader_tessellator_partitioning partitioning;
- };
- static VKD3D_PRINTF_FUNC(3, 4) void vkd3d_shader_scan_error(struct vkd3d_shader_scan_context *context,
-@@ -1164,6 +1267,12 @@ static int vkd3d_shader_scan_instruction(struct vkd3d_shader_scan_context *conte
-                     instruction->declaration.structured_resource.byte_stride, false, instruction->flags);
-             break;
-+            context->output_primitive = instruction->declaration.tessellator_output_primitive;
-+            break;
-+            context->partitioning = instruction->declaration.tessellator_partitioning;
-+            break;
-         case VKD3DSIH_IF:
-         case VKD3DSIH_IFC:
-             cf_info = vkd3d_shader_scan_push_cf_info(context);
-@@ -1404,6 +1513,7 @@ static int vsir_program_scan(struct vsir_program *program, const struct vkd3d_sh
-         struct vkd3d_shader_scan_descriptor_info1 *descriptor_info1)
- {
-     struct vkd3d_shader_scan_combined_resource_sampler_info *combined_sampler_info;
-+    struct vkd3d_shader_scan_hull_shader_tessellation_info *tessellation_info;
-     struct vkd3d_shader_scan_descriptor_info1 local_descriptor_info1 = {0};
-     struct vkd3d_shader_scan_descriptor_info *descriptor_info;
-     struct vkd3d_shader_scan_signature_info *signature_info;
-@@ -1432,11 +1542,13 @@ static int vsir_program_scan(struct vsir_program *program, const struct vkd3d_sh
-             descriptor_info1 = &local_descriptor_info1;
-     }
-+    tessellation_info = vkd3d_find_struct(compile_info->next, SCAN_HULL_SHADER_TESSELLATION_INFO);
-     vkd3d_shader_scan_context_init(&context, &program->shader_version, compile_info,
-             descriptor_info1, combined_sampler_info, message_context);
-     if (TRACE_ON())
--        vkd3d_shader_trace(program);
-+        vsir_program_trace(program);
-     for (i = 0; i < program->instructions.count; ++i)
-     {
-@@ -1475,6 +1587,12 @@ static int vsir_program_scan(struct vsir_program *program, const struct vkd3d_sh
-     if (!ret && descriptor_info)
-         ret = convert_descriptor_info(descriptor_info, descriptor_info1);
-+    if (!ret && tessellation_info)
-+    {
-+        tessellation_info->output_primitive = context.output_primitive;
-+        tessellation_info->partitioning = context.partitioning;
-+    }
-     if (ret < 0)
-     {
-         if (combined_sampler_info)
-@@ -1497,6 +1615,7 @@ static int vsir_program_scan(struct vsir_program *program, const struct vkd3d_sh
- int vkd3d_shader_scan(const struct vkd3d_shader_compile_info *compile_info, char **messages)
- {
-     struct vkd3d_shader_message_context message_context;
-+    struct shader_dump_data dump_data;
-     int ret;
-     TRACE("compile_info %p, messages %p.\n", compile_info, messages);
-@@ -1511,7 +1630,8 @@ int vkd3d_shader_scan(const struct vkd3d_shader_compile_info *compile_info, char
-     vkd3d_shader_message_context_init(&message_context, compile_info->log_level);
--    vkd3d_shader_dump_shader(compile_info);
-+    fill_shader_dump_data(compile_info, &dump_data);
-+    vkd3d_shader_dump_shader(&dump_data, compile_info->source.code, compile_info->source.size, true);
-     if (compile_info->source_type == VKD3D_SHADER_SOURCE_HLSL)
-     {
-@@ -1523,31 +1643,7 @@ int vkd3d_shader_scan(const struct vkd3d_shader_compile_info *compile_info, char
-         uint64_t config_flags = vkd3d_shader_init_config_flags();
-         struct vsir_program program;
--        switch (compile_info->source_type)
--        {
--            case VKD3D_SHADER_SOURCE_D3D_BYTECODE:
--                ret = d3dbc_parse(compile_info, config_flags, &message_context, &program);
--                break;
--            case VKD3D_SHADER_SOURCE_DXBC_TPF:
--                ret = tpf_parse(compile_info, config_flags, &message_context, &program);
--                break;
--            case VKD3D_SHADER_SOURCE_DXBC_DXIL:
--                ret = dxil_parse(compile_info, config_flags, &message_context, &program);
--                break;
--            default:
--                ERR("Unsupported source type %#x.\n", compile_info->source_type);
--                ret = VKD3D_ERROR_INVALID_ARGUMENT;
--                break;
--        }
--        if (ret < 0)
--        {
--            WARN("Failed to parse shader.\n");
--        }
--        else
-+        if (!(ret = vsir_parse(compile_info, config_flags, &message_context, &program)))
-         {
-             ret = vsir_program_scan(&program, compile_info, &message_context, NULL);
-             vsir_program_cleanup(&program);
-@@ -1565,6 +1661,7 @@ int vsir_program_compile(struct vsir_program *program, uint64_t config_flags,
-         const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *out,
-         struct vkd3d_shader_message_context *message_context)
- {
-+    struct vkd3d_shader_scan_combined_resource_sampler_info combined_sampler_info;
-     struct vkd3d_shader_scan_descriptor_info1 scan_descriptor_info;
-     struct vkd3d_shader_compile_info scan_info;
-     int ret;
-@@ -1578,9 +1675,14 @@ int vsir_program_compile(struct vsir_program *program, uint64_t config_flags,
-             break;
-         case VKD3D_SHADER_TARGET_GLSL:
-+            combined_sampler_info.next = scan_info.next;
-+            scan_info.next = &combined_sampler_info;
-             if ((ret = vsir_program_scan(program, &scan_info, message_context, &scan_descriptor_info)) < 0)
-                 return ret;
--            ret = glsl_compile(program, config_flags, compile_info, out, message_context);
-+            ret = glsl_compile(program, config_flags, &scan_descriptor_info,
-+                    &combined_sampler_info, compile_info, out, message_context);
-+            vkd3d_shader_free_scan_combined_resource_sampler_info(&combined_sampler_info);
-             vkd3d_shader_free_scan_descriptor_info1(&scan_descriptor_info);
-             break;
-@@ -1593,6 +1695,13 @@ int vsir_program_compile(struct vsir_program *program, uint64_t config_flags,
-             vkd3d_shader_free_scan_descriptor_info1(&scan_descriptor_info);
-             break;
-+        case VKD3D_SHADER_TARGET_MSL:
-+            if ((ret = vsir_program_scan(program, &scan_info, message_context, &scan_descriptor_info)) < 0)
-+                return ret;
-+            ret = msl_compile(program, config_flags, &scan_descriptor_info, compile_info, out, message_context);
-+            vkd3d_shader_free_scan_descriptor_info1(&scan_descriptor_info);
-+            break;
-         default:
-             /* Validation should prevent us from reaching this. */
-             vkd3d_unreachable();
-@@ -1620,6 +1729,7 @@ int vkd3d_shader_compile(const struct vkd3d_shader_compile_info *compile_info,
-         struct vkd3d_shader_code *out, char **messages)
- {
-     struct vkd3d_shader_message_context message_context;
-+    struct shader_dump_data dump_data;
-     int ret;
-     TRACE("compile_info %p, out %p, messages %p.\n", compile_info, out, messages);
-@@ -1634,48 +1744,32 @@ int vkd3d_shader_compile(const struct vkd3d_shader_compile_info *compile_info,
-     vkd3d_shader_message_context_init(&message_context, compile_info->log_level);
--    vkd3d_shader_dump_shader(compile_info);
-+    fill_shader_dump_data(compile_info, &dump_data);
-+    vkd3d_shader_dump_shader(&dump_data, compile_info->source.code, compile_info->source.size, true);
-     if (compile_info->source_type == VKD3D_SHADER_SOURCE_HLSL)
-     {
-         ret = compile_hlsl(compile_info, out, &message_context);
-     }
-+    else if (compile_info->source_type == VKD3D_SHADER_SOURCE_FX)
-+    {
-+        ret = fx_parse(compile_info, out, &message_context);
-+    }
-     else
-     {
-         uint64_t config_flags = vkd3d_shader_init_config_flags();
-         struct vsir_program program;
--        switch (compile_info->source_type)
--        {
--            case VKD3D_SHADER_SOURCE_D3D_BYTECODE:
--                ret = d3dbc_parse(compile_info, config_flags, &message_context, &program);
--                break;
--            case VKD3D_SHADER_SOURCE_DXBC_TPF:
--                ret = tpf_parse(compile_info, config_flags, &message_context, &program);
--                break;
--            case VKD3D_SHADER_SOURCE_DXBC_DXIL:
--                ret = dxil_parse(compile_info, config_flags, &message_context, &program);
--                break;
--            default:
--                ERR("Unsupported source type %#x.\n", compile_info->source_type);
--                ret = VKD3D_ERROR_INVALID_ARGUMENT;
--                break;
--        }
--        if (ret < 0)
--        {
--            WARN("Failed to parse shader.\n");
--        }
--        else
-+        if (!(ret = vsir_parse(compile_info, config_flags, &message_context, &program)))
-         {
-             ret = vsir_program_compile(&program, config_flags, compile_info, out, &message_context);
-             vsir_program_cleanup(&program);
-         }
-     }
-+    if (ret >= 0)
-+        vkd3d_shader_dump_shader(&dump_data, out->code, out->size, false);
-     vkd3d_shader_message_context_trace_messages(&message_context);
-     if (!vkd3d_shader_message_context_copy_messages(&message_context, messages))
-         ret = VKD3D_ERROR_OUT_OF_MEMORY;
-@@ -1777,6 +1871,8 @@ void shader_signature_cleanup(struct shader_signature *signature)
-     }
-     vkd3d_free(signature->elements);
-     signature->elements = NULL;
-+    signature->elements_capacity = 0;
-+    signature->element_count = 0;
- }
- int vkd3d_shader_parse_input_signature(const struct vkd3d_shader_code *dxbc,
-@@ -1868,6 +1964,7 @@ const enum vkd3d_shader_source_type *vkd3d_shader_get_supported_source_types(uns
- #endif
-     };
-     TRACE("count %p.\n", count);
-@@ -1888,6 +1985,9 @@ const enum vkd3d_shader_target_type *vkd3d_shader_get_supported_target_types(
- #endif
-     };
-@@ -1923,6 +2023,11 @@ const enum vkd3d_shader_target_type *vkd3d_shader_get_supported_target_types(
-     };
- #endif
-+    static const enum vkd3d_shader_target_type fx_types[] =
-+    {
-+    };
-     TRACE("source_type %#x, count %p.\n", source_type, count);
-     switch (source_type)
-@@ -1945,6 +2050,10 @@ const enum vkd3d_shader_target_type *vkd3d_shader_get_supported_target_types(
-             return dxbc_dxil_types;
- #endif
-+        case VKD3D_SHADER_SOURCE_FX:
-+            *count = ARRAY_SIZE(fx_types);
-+            return fx_types;
-         default:
-             *count = 0;
-             return NULL;
-diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h
-index ef66a8ca07a..ad04972b3fb 100644
---- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h
-+++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h
-@@ -59,6 +59,9 @@
- #define VKD3D_VEC4_SIZE 4
- #define VKD3D_DVEC2_SIZE 2
- enum vkd3d_shader_error
- {
-     VKD3D_SHADER_ERROR_DXBC_INVALID_SIZE                = 1,
-@@ -80,6 +83,7 @@ enum vkd3d_shader_error
-@@ -152,6 +156,13 @@ enum vkd3d_shader_error
-+    VKD3D_SHADER_ERROR_HLSL_INVALID_DOMAIN              = 5035,
-@@ -159,8 +170,11 @@ enum vkd3d_shader_error
-     VKD3D_SHADER_ERROR_GLSL_INTERNAL                    = 6000,
-+    VKD3D_SHADER_ERROR_GLSL_UNSUPPORTED                 = 6002,
-     VKD3D_SHADER_ERROR_D3DBC_UNEXPECTED_EOF             = 7000,
-@@ -169,6 +183,11 @@ enum vkd3d_shader_error
-     VKD3D_SHADER_ERROR_D3DBC_OUT_OF_MEMORY              = 7004,
-@@ -225,8 +244,18 @@ enum vkd3d_shader_error
-     VKD3D_SHADER_ERROR_VSIR_INVALID_GS                  = 9019,
-+    VKD3D_SHADER_ERROR_MSL_INTERNAL                     = 10000,
-+    VKD3D_SHADER_ERROR_MSL_BINDING_NOT_FOUND            = 10001,
-+    VKD3D_SHADER_ERROR_FX_NOT_IMPLEMENTED               = 11000,
-+    VKD3D_SHADER_ERROR_FX_INVALID_VERSION               = 11001,
-+    VKD3D_SHADER_ERROR_FX_INVALID_DATA                  = 11002,
- };
- enum vkd3d_shader_opcode
-@@ -556,6 +585,8 @@ enum vkd3d_shader_opcode
- };
- enum vkd3d_shader_register_type
-@@ -619,12 +650,20 @@ enum vkd3d_shader_register_type
-     VKD3DSPR_INVALID = ~0u,
- };
-+enum vsir_rastout_register
-+    VSIR_RASTOUT_FOG        = 0x1,
- enum vkd3d_shader_register_precision
- {
-@@ -642,9 +681,6 @@ enum vkd3d_data_type
- {
-@@ -740,7 +776,7 @@ enum vkd3d_shader_interpolation_mode
-     VKD3DSIM_COUNT = 8,
- };
--enum vkd3d_shader_global_flags
-+enum vsir_global_flags
- {
-     VKD3DSGF_REFACTORING_ALLOWED               = 0x01,
-@@ -1042,6 +1078,9 @@ enum vkd3d_shader_input_sysval_semantic
- struct signature_element
- {
-+    /* sort_index is not a property of the signature element, it is just a
-+     * convenience field used to retain the original order in a signature and
-+     * recover it after having permuted the signature itself. */
-     unsigned int sort_index;
-     const char *semantic_name;
-     unsigned int semantic_index;
-@@ -1080,6 +1119,8 @@ static inline bool vsir_sysval_semantic_is_clip_cull(enum vkd3d_shader_sysval_se
- struct signature_element *vsir_signature_find_element_for_reg(const struct shader_signature *signature,
-         unsigned int reg_idx, unsigned int write_mask);
-+bool vsir_signature_find_sysval(const struct shader_signature *signature,
-+        enum vkd3d_shader_sysval_semantic sysval, unsigned int semantic_index, unsigned int *element_index);
- void shader_signature_cleanup(struct shader_signature *signature);
- struct dxbc_shader_desc
-@@ -1145,7 +1186,7 @@ struct vkd3d_shader_tgsm_structured
-     bool zero_init;
- };
--struct vkd3d_shader_thread_group_size
-+struct vsir_thread_group_size
- {
-     unsigned int x, y, z;
- };
-@@ -1210,7 +1251,7 @@ struct vkd3d_shader_instruction
-     const struct vkd3d_shader_src_param *predicate;
-     union
-     {
--        enum vkd3d_shader_global_flags global_flags;
-+        enum vsir_global_flags global_flags;
-         struct vkd3d_shader_semantic semantic;
-         struct vkd3d_shader_register_semantic register_semantic;
-         struct vkd3d_shader_primitive_type primitive_type;
-@@ -1224,7 +1265,7 @@ struct vkd3d_shader_instruction
-         struct vkd3d_shader_structured_resource structured_resource;
-         struct vkd3d_shader_tgsm_raw tgsm_raw;
-         struct vkd3d_shader_tgsm_structured tgsm_structured;
--        struct vkd3d_shader_thread_group_size thread_group_size;
-+        struct vsir_thread_group_size thread_group_size;
-         enum vkd3d_tessellator_domain tessellator_domain;
-         enum vkd3d_shader_tessellator_output_primitive tessellator_output_primitive;
-         enum vkd3d_shader_tessellator_partitioning tessellator_partitioning;
-@@ -1344,8 +1385,6 @@ bool shader_instruction_array_add_icb(struct vkd3d_shader_instruction_array *ins
-         struct vkd3d_shader_immediate_constant_buffer *icb);
- bool shader_instruction_array_clone_instruction(struct vkd3d_shader_instruction_array *instructions,
-         unsigned int dst, unsigned int src);
--struct vkd3d_shader_src_param *instruction_array_create_outpointid_param(
--        struct vkd3d_shader_instruction_array *instructions);
- void shader_instruction_array_destroy(struct vkd3d_shader_instruction_array *instructions);
- enum vkd3d_shader_config_flags
-@@ -1353,6 +1392,19 @@ enum vkd3d_shader_config_flags
- };
-+enum vsir_control_flow_type
-+enum vsir_normalisation_level
- struct vsir_program
- {
-     struct vkd3d_shader_version shader_version;
-@@ -1367,11 +1419,19 @@ struct vsir_program
-     bool free_parameters;
-     unsigned int input_control_point_count, output_control_point_count;
-+    struct vsir_thread_group_size thread_group_size;
-     unsigned int flat_constant_count[3];
-     unsigned int block_count;
-     unsigned int temp_count;
-     unsigned int ssa_count;
-+    enum vsir_global_flags global_flags;
-     bool use_vocp;
-+    bool has_point_size;
-+    bool has_point_coord;
-+    uint8_t diffuse_written_mask;
-+    enum vsir_control_flow_type cf_type;
-+    enum vsir_normalisation_level normalisation_level;
-+    enum vkd3d_tessellator_domain tess_domain;
-     const char **block_names;
-     size_t block_name_count;
-@@ -1384,11 +1444,19 @@ int vsir_program_compile(struct vsir_program *program, uint64_t config_flags,
- const struct vkd3d_shader_parameter1 *vsir_program_get_parameter(
-         const struct vsir_program *program, enum vkd3d_shader_parameter_name name);
- bool vsir_program_init(struct vsir_program *program, const struct vkd3d_shader_compile_info *compile_info,
--        const struct vkd3d_shader_version *version, unsigned int reserve);
--enum vkd3d_result vsir_program_normalise(struct vsir_program *program, uint64_t config_flags,
-+        const struct vkd3d_shader_version *version, unsigned int reserve, enum vsir_control_flow_type cf_type,
-+        enum vsir_normalisation_level normalisation_level);
-+enum vkd3d_result vsir_program_transform(struct vsir_program *program, uint64_t config_flags,
-+        const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context);
-+enum vkd3d_result vsir_program_transform_early(struct vsir_program *program, uint64_t config_flags,
-         const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context);
- enum vkd3d_result vsir_program_validate(struct vsir_program *program, uint64_t config_flags,
-         const char *source_name, struct vkd3d_shader_message_context *message_context);
-+struct vkd3d_shader_src_param *vsir_program_create_outpointid_param(
-+        struct vsir_program *program);
-+bool vsir_instruction_init_with_params(struct vsir_program *program,
-+        struct vkd3d_shader_instruction *ins, const struct vkd3d_shader_location *location,
-+        enum vkd3d_shader_opcode opcode, unsigned int dst_count, unsigned int src_count);
- static inline struct vkd3d_shader_dst_param *vsir_program_get_dst_params(
-         struct vsir_program *program, unsigned int count)
-@@ -1417,12 +1485,6 @@ void vkd3d_shader_parser_init(struct vkd3d_shader_parser *parser, struct vsir_pr
- void vkd3d_shader_parser_warning(struct vkd3d_shader_parser *parser,
-         enum vkd3d_shader_error error, const char *format, ...) VKD3D_PRINTF_FUNC(3, 4);
--static inline enum vkd3d_result vkd3d_shader_parser_validate(struct vkd3d_shader_parser *parser, uint64_t config_flags)
--    return vsir_program_validate(parser->program, config_flags,
--            parser->location.source_name, parser->message_context);
- struct vkd3d_shader_descriptor_info1
- {
-     enum vkd3d_shader_descriptor_type type;
-@@ -1445,7 +1507,7 @@ struct vkd3d_shader_scan_descriptor_info1
-     unsigned int descriptor_count;
- };
--void vkd3d_shader_trace(const struct vsir_program *program);
-+void vsir_program_trace(const struct vsir_program *program);
- const char *shader_get_type_prefix(enum vkd3d_shader_type type);
-@@ -1465,6 +1527,7 @@ enum vsir_asm_flags
- {
- };
- enum vkd3d_result d3d_asm_compile(const struct vsir_program *program,
-@@ -1549,18 +1612,30 @@ void vkd3d_shader_warning(struct vkd3d_shader_message_context *context, const st
- void vkd3d_shader_vwarning(struct vkd3d_shader_message_context *context, const struct vkd3d_shader_location *location,
-         enum vkd3d_shader_error error, const char *format, va_list args);
--void vkd3d_shader_dump_shader(const struct vkd3d_shader_compile_info *compile_info);
- uint64_t vkd3d_shader_init_config_flags(void);
- void vkd3d_shader_trace_text_(const char *text, size_t size, const char *function);
- #define vkd3d_shader_trace_text(text, size) \
-         vkd3d_shader_trace_text_(text, size, __FUNCTION__)
-+bool sm1_register_from_semantic_name(const struct vkd3d_shader_version *version, const char *semantic_name,
-+        unsigned int semantic_index, bool output, enum vkd3d_shader_register_type *type, unsigned int *reg);
-+bool sm1_usage_from_semantic_name(const char *semantic_name,
-+        uint32_t semantic_index, enum vkd3d_decl_usage *usage, uint32_t *usage_idx);
-+bool sm4_register_from_semantic_name(const struct vkd3d_shader_version *version,
-+        const char *semantic_name, bool output, enum vkd3d_shader_register_type *type, bool *has_idx);
-+bool shader_sm4_is_scalar_register(const struct vkd3d_shader_register *reg);
-+bool sm4_sysval_semantic_from_semantic_name(enum vkd3d_shader_sysval_semantic *sysval_semantic,
-+        const struct vkd3d_shader_version *version, bool semantic_compat_mapping, enum vkd3d_tessellator_domain domain,
-+        const char *semantic_name, unsigned int semantic_idx, bool output, bool is_patch_constant_func);
- int d3dbc_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t config_flags,
-         struct vkd3d_shader_message_context *message_context, struct vsir_program *program);
- int dxil_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t config_flags,
-         struct vkd3d_shader_message_context *message_context, struct vsir_program *program);
- int tpf_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t config_flags,
-         struct vkd3d_shader_message_context *message_context, struct vsir_program *program);
-+int fx_parse(const struct vkd3d_shader_compile_info *compile_info,
-+        struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context);
- void free_dxbc_shader_desc(struct dxbc_shader_desc *desc);
-@@ -1570,8 +1645,10 @@ int shader_parse_input_signature(const struct vkd3d_shader_code *dxbc,
-         struct vkd3d_shader_message_context *message_context, struct shader_signature *signature);
- int glsl_compile(struct vsir_program *program, uint64_t config_flags,
--        const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *out,
--        struct vkd3d_shader_message_context *message_context);
-+        const struct vkd3d_shader_scan_descriptor_info1 *descriptor_info,
-+        const struct vkd3d_shader_scan_combined_resource_sampler_info *combined_sampler_info,
-+        const struct vkd3d_shader_compile_info *compile_info,
-+        struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context);
-@@ -1580,7 +1657,18 @@ int spirv_compile(struct vsir_program *program, uint64_t config_flags,
-         const struct vkd3d_shader_compile_info *compile_info,
-         struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context);
--void vkd3d_compute_dxbc_checksum(const void *dxbc, size_t size, uint32_t checksum[4]);
-+int msl_compile(struct vsir_program *program, uint64_t config_flags,
-+        const struct vkd3d_shader_scan_descriptor_info1 *descriptor_info,
-+        const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *out,
-+        struct vkd3d_shader_message_context *message_context);
-+enum vkd3d_md5_variant
-+    VKD3D_MD5_DXBC,
-+void vkd3d_compute_md5(const void *dxbc, size_t size, uint32_t checksum[4], enum vkd3d_md5_variant variant);
- int preproc_lexer_parse(const struct vkd3d_shader_compile_info *compile_info,
-         struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context);
-@@ -1853,7 +1941,7 @@ static inline void *vkd3d_find_struct_(const struct vkd3d_struct *chain,
- #define VKD3D_DXBC_HEADER_SIZE (8 * sizeof(uint32_t))
- #define VKD3D_DXBC_CHUNK_ALIGNMENT sizeof(uint32_t)
- struct dxbc_writer
- {
-diff --git a/libs/vkd3d/libs/vkd3d/command.c b/libs/vkd3d/libs/vkd3d/command.c
-index dcc7690876f..a55a97f6f2f 100644
---- a/libs/vkd3d/libs/vkd3d/command.c
-+++ b/libs/vkd3d/libs/vkd3d/command.c
-@@ -19,6 +19,7 @@
-  */
- #include "vkd3d_private.h"
-+#include <math.h>
- static void d3d12_fence_incref(struct d3d12_fence *fence);
- static void d3d12_fence_decref(struct d3d12_fence *fence);
-@@ -2004,6 +2005,8 @@ static void d3d12_command_list_invalidate_bindings(struct d3d12_command_list *li
-         vkd3d_array_reserve((void **)&bindings->vk_uav_counter_views, &bindings->vk_uav_counter_views_size,
-                 state->uav_counters.binding_count, sizeof(*bindings->vk_uav_counter_views));
-+        memset(bindings->vk_uav_counter_views, 0,
-+                state->uav_counters.binding_count * sizeof(*bindings->vk_uav_counter_views));
-         bindings->uav_counters_dirty = true;
-     }
- }
-@@ -2451,6 +2454,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_list_Close(ID3D12GraphicsCommandL
-     }
-     list->is_recording = false;
-+    list->has_depth_bounds = false;
-     if (!list->is_valid)
-     {
-@@ -2479,7 +2483,7 @@ static void d3d12_command_list_reset_state(struct d3d12_command_list *list,
-     list->fb_layer_count = 0;
-     list->xfb_enabled = false;
-+    list->has_depth_bounds = false;
-     list->is_predicated = false;
-     list->current_framebuffer = VK_NULL_HANDLE;
-@@ -2793,39 +2797,30 @@ static bool vk_write_descriptor_set_from_d3d12_desc(VkWriteDescriptorSet *vk_des
-             /* We use separate bindings for buffer and texture SRVs/UAVs.
-              * See d3d12_root_signature_init(). For unbounded ranges the
-              * descriptors exist in two consecutive sets, otherwise they occur
--             * in pairs in one set. */
--            if (range->descriptor_count == UINT_MAX)
--            {
--                if (vk_descriptor_type != VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER
--                        && vk_descriptor_type != VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER)
--                {
--                    vk_descriptor_write->dstSet = vk_descriptor_sets[set + 1];
--                    vk_descriptor_write->dstBinding = 0;
--                }
--            }
--            else
--            {
--                if (!use_array)
--                    vk_descriptor_write->dstBinding = vk_binding + 2 * index;
--                if (vk_descriptor_type != VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER
--                        && vk_descriptor_type != VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER)
--                    ++vk_descriptor_write->dstBinding;
--            }
-+             * as consecutive ranges within a set. */
-             if (vk_descriptor_type == VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER
-                     || vk_descriptor_type == VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER)
-             {
-                 vk_descriptor_write->pTexelBufferView = &u.view->v.u.vk_buffer_view;
-+                break;
-+            }
-+            if (range->descriptor_count == UINT_MAX)
-+            {
-+                vk_descriptor_write->dstSet = vk_descriptor_sets[set + 1];
-+                vk_descriptor_write->dstBinding = 0;
-             }
-             else
-             {
--                vk_image_info->sampler = VK_NULL_HANDLE;
--                vk_image_info->imageView = u.view->v.u.vk_image_view;
--                vk_image_info->imageLayout = u.header->magic == VKD3D_DESCRIPTOR_MAGIC_SRV
--                vk_descriptor_write->pImageInfo = vk_image_info;
-+                vk_descriptor_write->dstBinding += use_array ? 1 : range->descriptor_count;
-             }
-+            vk_image_info->sampler = VK_NULL_HANDLE;
-+            vk_image_info->imageView = u.view->v.u.vk_image_view;
-+            vk_image_info->imageLayout = u.header->magic == VKD3D_DESCRIPTOR_MAGIC_SRV
-+            vk_descriptor_write->pImageInfo = vk_image_info;
-             break;
-@@ -3078,7 +3073,7 @@ done:
-     vkd3d_free(vk_descriptor_writes);
- }
--static void d3d12_command_list_update_descriptors(struct d3d12_command_list *list,
-+static void d3d12_command_list_update_virtual_descriptors(struct d3d12_command_list *list,
-         enum vkd3d_pipeline_bind_point bind_point)
- {
-     struct vkd3d_pipeline_bindings *bindings = &list->pipeline_bindings[bind_point];
-@@ -3210,6 +3205,9 @@ static void command_list_flush_vk_heap_updates(struct d3d12_command_list *list)
- static void command_list_add_descriptor_heap(struct d3d12_command_list *list, struct d3d12_descriptor_heap *heap)
- {
-+    if (!list->device->use_vk_heaps)
-+        return;
-     if (!contains_heap(list->descriptor_heaps, list->descriptor_heap_count, heap))
-     {
-         if (list->descriptor_heap_count == ARRAY_SIZE(list->descriptor_heaps))
-@@ -3296,6 +3294,15 @@ static void d3d12_command_list_update_heap_descriptors(struct d3d12_command_list
-     d3d12_command_list_bind_descriptor_heap(list, bind_point, sampler_heap);
- }
-+static void d3d12_command_list_update_descriptors(struct d3d12_command_list *list,
-+        enum vkd3d_pipeline_bind_point bind_point)
-+    if (list->device->use_vk_heaps)
-+        d3d12_command_list_update_heap_descriptors(list, bind_point);
-+    else
-+        d3d12_command_list_update_virtual_descriptors(list, bind_point);
- static bool d3d12_command_list_update_compute_state(struct d3d12_command_list *list)
- {
-     d3d12_command_list_end_current_render_pass(list);
-@@ -3303,7 +3310,7 @@ static bool d3d12_command_list_update_compute_state(struct d3d12_command_list *l
-     if (!d3d12_command_list_update_compute_pipeline(list))
-         return false;
--    list->update_descriptors(list, VKD3D_PIPELINE_BIND_POINT_COMPUTE);
-+    d3d12_command_list_update_descriptors(list, VKD3D_PIPELINE_BIND_POINT_COMPUTE);
-     return true;
- }
-@@ -3320,7 +3327,7 @@ static bool d3d12_command_list_begin_render_pass(struct d3d12_command_list *list
-     if (!d3d12_command_list_update_current_framebuffer(list))
-         return false;
--    list->update_descriptors(list, VKD3D_PIPELINE_BIND_POINT_GRAPHICS);
-+    d3d12_command_list_update_descriptors(list, VKD3D_PIPELINE_BIND_POINT_GRAPHICS);
-     if (list->current_render_pass != VK_NULL_HANDLE)
-         return true;
-@@ -3351,6 +3358,12 @@ static bool d3d12_command_list_begin_render_pass(struct d3d12_command_list *list
-         list->xfb_enabled = true;
-     }
-+    if (graphics->ds_desc.depthBoundsTestEnable && !list->has_depth_bounds)
-+    {
-+        list->has_depth_bounds = true;
-+        VK_CALL(vkCmdSetDepthBounds(list->vk_command_buffer, 0.0f, 1.0f));
-+    }
-     return true;
- }
-@@ -4791,15 +4804,16 @@ static void STDMETHODCALLTYPE d3d12_command_list_IASetVertexBuffers(ID3D12Graphi
-     VkDeviceSize offsets[ARRAY_SIZE(list->strides)];
-     const struct vkd3d_vk_device_procs *vk_procs;
-     VkBuffer buffers[ARRAY_SIZE(list->strides)];
-+    struct d3d12_device *device = list->device;
-+    unsigned int i, stride, max_view_count;
-     struct d3d12_resource *resource;
-     bool invalidate = false;
--    unsigned int i, stride;
-     TRACE("iface %p, start_slot %u, view_count %u, views %p.\n", iface, start_slot, view_count, views);
--    vk_procs = &list->device->vk_procs;
--    null_resources = &list->device->null_resources;
--    gpu_va_allocator = &list->device->gpu_va_allocator;
-+    vk_procs = &device->vk_procs;
-+    null_resources = &device->null_resources;
-+    gpu_va_allocator = &device->gpu_va_allocator;
-     if (!vkd3d_bound_range(start_slot, view_count, ARRAY_SIZE(list->strides)))
-     {
-@@ -4807,6 +4821,27 @@ static void STDMETHODCALLTYPE d3d12_command_list_IASetVertexBuffers(ID3D12Graphi
-         return;
-     }
-+    max_view_count = device->vk_info.device_limits.maxVertexInputBindings;
-+    if (start_slot < max_view_count)
-+        max_view_count -= start_slot;
-+    else
-+        max_view_count = 0;
-+    /* Although simply skipping unsupported binding slots isn't especially
-+     * likely to work well in the general case, applications sometimes
-+     * explicitly set all 32 vertex buffer bindings slots supported by
-+     * Direct3D 12, with unused slots set to NULL. "Spider-Man Remastered" is
-+     * an example of such an application. */
-+    if (view_count > max_view_count)
-+    {
-+        for (i = max_view_count; i < view_count; ++i)
-+        {
-+            if (views && views[i].BufferLocation)
-+                WARN("Ignoring unsupported vertex buffer slot %u.\n", start_slot + i);
-+        }
-+        view_count = max_view_count;
-+    }
-     for (i = 0; i < view_count; ++i)
-     {
-         if (views && views[i].BufferLocation)
-@@ -5939,7 +5974,25 @@ static void STDMETHODCALLTYPE d3d12_command_list_AtomicCopyBufferUINT64(ID3D12Gr
- static void STDMETHODCALLTYPE d3d12_command_list_OMSetDepthBounds(ID3D12GraphicsCommandList6 *iface,
-         FLOAT min, FLOAT max)
- {
--    FIXME("iface %p, min %.8e, max %.8e stub!\n", iface, min, max);
-+    struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface);
-+    const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;
-+    TRACE("iface %p, min %.8e, max %.8e.\n", iface, min, max);
-+    if (isnan(max))
-+        max = 0.0f;
-+    if (isnan(min))
-+        min = 0.0f;
-+    if (!list->device->vk_info.EXT_depth_range_unrestricted && (min < 0.0f || min > 1.0f || max < 0.0f || max > 1.0f))
-+    {
-+        WARN("VK_EXT_depth_range_unrestricted was not found, clamping depth bounds to 0.0 and 1.0.\n");
-+        max = vkd3d_clamp(max, 0.0f, 1.0f);
-+        min = vkd3d_clamp(min, 0.0f, 1.0f);
-+    }
-+    list->has_depth_bounds = true;
-+    VK_CALL(vkCmdSetDepthBounds(list->vk_command_buffer, min, max));
- }
- static void STDMETHODCALLTYPE d3d12_command_list_SetSamplePositions(ID3D12GraphicsCommandList6 *iface,
-@@ -6189,8 +6242,6 @@ static HRESULT d3d12_command_list_init(struct d3d12_command_list *list, struct d
-     list->allocator = allocator;
--    list->update_descriptors = device->use_vk_heaps ? d3d12_command_list_update_heap_descriptors
--            : d3d12_command_list_update_descriptors;
-     list->descriptor_heap_count = 0;
-     if (SUCCEEDED(hr = d3d12_command_allocator_allocate_command_buffer(allocator, list)))
-diff --git a/libs/vkd3d/libs/vkd3d/device.c b/libs/vkd3d/libs/vkd3d/device.c
-index 01841c89692..e92373a36fa 100644
---- a/libs/vkd3d/libs/vkd3d/device.c
-+++ b/libs/vkd3d/libs/vkd3d/device.c
-@@ -102,6 +102,7 @@ static const struct vkd3d_optional_extension_info optional_device_extensions[] =
-+    VK_EXTENSION(EXT_DEPTH_RANGE_UNRESTRICTED, EXT_depth_range_unrestricted),
-     VK_EXTENSION(EXT_DEPTH_CLIP_ENABLE, EXT_depth_clip_enable),
-     VK_EXTENSION(EXT_FRAGMENT_SHADER_INTERLOCK, EXT_fragment_shader_interlock),
-@@ -135,7 +136,8 @@ static HRESULT vkd3d_create_vk_descriptor_heap_layout(struct d3d12_device *devic
-     };
--    if (device->vk_info.EXT_mutable_descriptor_type && index && index != VKD3D_SET_INDEX_UAV_COUNTER
-+    if (device->vk_info.EXT_mutable_descriptor_type
-+            && index != VKD3D_SET_INDEX_MUTABLE && index != VKD3D_SET_INDEX_UAV_COUNTER
-             && device->vk_descriptor_heap_layouts[index].applicable_heap_type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV)
-     {
-         device->vk_descriptor_heap_layouts[index].vk_set_layout = VK_NULL_HANDLE;
-@@ -143,7 +145,7 @@ static HRESULT vkd3d_create_vk_descriptor_heap_layout(struct d3d12_device *devic
-     }
-     binding.binding = 0;
--    binding.descriptorType = (device->vk_info.EXT_mutable_descriptor_type && !index)
-+    binding.descriptorType = (device->vk_info.EXT_mutable_descriptor_type && index == VKD3D_SET_INDEX_MUTABLE)
-             ? VK_DESCRIPTOR_TYPE_MUTABLE_EXT : device->vk_descriptor_heap_layouts[index].type;
-     binding.descriptorCount = device->vk_descriptor_heap_layouts[index].count;
-     binding.stageFlags = VK_SHADER_STAGE_ALL;
-@@ -199,14 +201,20 @@ static HRESULT vkd3d_vk_descriptor_heap_layouts_init(struct d3d12_device *device
- {
-     static const struct vkd3d_vk_descriptor_heap_layout vk_descriptor_heap_layouts[VKD3D_SET_INDEX_COUNT] =
-     {
--        /* UAV counters */
-     };
-     const struct vkd3d_device_descriptor_limits *limits = &device->vk_info.descriptor_limits;
-     enum vkd3d_vk_descriptor_set_index set;
-@@ -589,7 +597,7 @@ static HRESULT vkd3d_instance_init(struct vkd3d_instance *instance,
-     }
-     if (!create_info->pfn_create_thread != !create_info->pfn_join_thread)
-     {
--        ERR("Invalid create/join thread function pointers.\n");
-+        WARN("Invalid create/join thread function pointers.\n");
-         return E_INVALIDARG;
-     }
-     if (create_info->wchar_size != 2 && create_info->wchar_size != 4)
-@@ -607,7 +615,7 @@ static HRESULT vkd3d_instance_init(struct vkd3d_instance *instance,
-     if (FAILED(hr = vkd3d_init_vk_global_procs(instance, create_info->pfn_vkGetInstanceProcAddr)))
-     {
--        ERR("Failed to initialise Vulkan global procs, hr %s.\n", debugstr_hresult(hr));
-+        WARN("Failed to initialise Vulkan global procs, hr %s.\n", debugstr_hresult(hr));
-         return hr;
-     }
-@@ -689,7 +697,7 @@ static HRESULT vkd3d_instance_init(struct vkd3d_instance *instance,
-     vkd3d_free(extensions);
-     if (vr < 0)
-     {
--        ERR("Failed to create Vulkan instance, vr %d.\n", vr);
-+        WARN("Failed to create Vulkan instance, vr %d.\n", vr);
-         if (instance->libvulkan)
-             vkd3d_dlclose(instance->libvulkan);
-         return hresult_from_vk_result(vr);
-@@ -697,7 +705,7 @@ static HRESULT vkd3d_instance_init(struct vkd3d_instance *instance,
-     if (FAILED(hr = vkd3d_load_vk_instance_procs(&instance->vk_procs, vk_global_procs, vk_instance)))
-     {
--        ERR("Failed to load instance procs, hr %s.\n", debugstr_hresult(hr));
-+        WARN("Failed to load instance procs, hr %s.\n", debugstr_hresult(hr));
-         if (instance->vk_procs.vkDestroyInstance)
-             instance->vk_procs.vkDestroyInstance(vk_instance, NULL);
-         if (instance->libvulkan)
-@@ -1572,6 +1580,111 @@ static HRESULT vkd3d_check_device_extensions(struct d3d12_device *device,
-     return S_OK;
- }
-+static void vkd3d_override_caps(struct d3d12_device *device)
-+    const char *caps_override, *p;
-+    static const struct override_value
-+    {
-+        const char *str;
-+        uint32_t value;
-+    }
-+    feature_level_override_values[] =
-+    {
-+        {"11.0", D3D_FEATURE_LEVEL_11_0},
-+        {"11.1", D3D_FEATURE_LEVEL_11_1},
-+        {"12.0", D3D_FEATURE_LEVEL_12_0},
-+        {"12.1", D3D_FEATURE_LEVEL_12_1},
-+        {"12.2", D3D_FEATURE_LEVEL_12_2},
-+    },
-+    resource_binding_tier_override_values[] =
-+    {
-+        {"1", D3D12_RESOURCE_BINDING_TIER_1},
-+        {"2", D3D12_RESOURCE_BINDING_TIER_2},
-+        {"3", D3D12_RESOURCE_BINDING_TIER_3},
-+    };
-+    static const struct override_field
-+    {
-+        const char *name;
-+        size_t offset;
-+        const struct override_value *values;
-+        size_t value_count;
-+    }
-+    override_fields[] =
-+    {
-+        {
-+            "feature_level",
-+            offsetof(struct d3d12_device, vk_info.max_feature_level),
-+            feature_level_override_values,
-+            ARRAY_SIZE(feature_level_override_values)
-+        },
-+        {
-+            "resource_binding_tier",
-+            offsetof(struct d3d12_device, feature_options.ResourceBindingTier),
-+            resource_binding_tier_override_values,
-+            ARRAY_SIZE(resource_binding_tier_override_values)
-+        },
-+    };
-+    if (!(caps_override = getenv("VKD3D_CAPS_OVERRIDE")))
-+        return;
-+    p = caps_override;
-+    for (;;)
-+    {
-+        size_t i;
-+        for (i = 0; i < ARRAY_SIZE(override_fields); ++i)
-+        {
-+            const struct override_field *field = &override_fields[i];
-+            size_t len = strlen(field->name);
-+            if (strncmp(p, field->name, len) == 0 && p[len] == '=')
-+            {
-+                size_t j;
-+                p += len + 1;
-+                for (j = 0; j < field->value_count; ++j)
-+                {
-+                    const struct override_value *value = &field->values[j];
-+                    size_t value_len =  strlen(value->str);
-+                    if (strncmp(p, value->str, value_len) == 0
-+                            && (p[value_len] == '\0' || p[value_len] == ','))
-+                    {
-+                        memcpy(&((uint8_t *)device)[field->offset], (uint8_t *)&value->value, sizeof(value->value));
-+                        p += value_len;
-+                        if (p[0] == '\0')
-+                        {
-+                            TRACE("Overriding caps with: %s\n", caps_override);
-+                            return;
-+                        }
-+                        p += 1;
-+                        break;
-+                    }
-+                }
-+                if (j == field->value_count)
-+                {
-+                    WARN("Cannot parse the override caps string: %s\n", caps_override);
-+                    return;
-+                }
-+                break;
-+            }
-+        }
-+        if (i == ARRAY_SIZE(override_fields))
-+        {
-+            WARN("Cannot parse the override caps string: %s\n", caps_override);
-+            return;
-+        }
-+    }
- static HRESULT vkd3d_init_device_caps(struct d3d12_device *device,
-         const struct vkd3d_device_create_info *create_info,
-         struct vkd3d_physical_device_info *physical_device_info,
-@@ -1583,7 +1696,7 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device,
-     VkPhysicalDeviceDescriptorIndexingFeaturesEXT *descriptor_indexing;
-     VkPhysicalDevice physical_device = device->vk_physical_device;
-     struct vkd3d_vulkan_info *vulkan_info = &device->vk_info;
--    VkExtensionProperties *vk_extensions;
-+    VkExtensionProperties *vk_extensions = NULL;
-     VkPhysicalDeviceFeatures *features;
-     uint32_t vk_extension_count;
-     HRESULT hr;
-@@ -1741,6 +1854,9 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device,
-             vulkan_info->EXT_shader_viewport_index_layer;
-     vkd3d_init_feature_level(vulkan_info, features, &device->feature_options);
-+    vkd3d_override_caps(device);
-     if (vulkan_info->max_feature_level < create_info->minimum_feature_level)
-     {
-         WARN("Feature level %#x is not supported.\n", create_info->minimum_feature_level);
-@@ -1809,6 +1925,28 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device,
-             && descriptor_indexing->descriptorBindingUniformTexelBufferUpdateAfterBind
-             && descriptor_indexing->descriptorBindingStorageTexelBufferUpdateAfterBind;
-+    if (device->use_vk_heaps && device->vk_info.KHR_push_descriptor)
-+    {
-+        /* VKD3D_SET_INDEX_COUNT for the Vulkan heaps, one for the push
-+         * descriptors set and one for the static samplers set. */
-+        unsigned int descriptor_set_count = VKD3D_SET_INDEX_COUNT + 2;
-+        /* A mutable descriptor set can replace all those that should otherwise
-+         * back the SRV-UAV-CBV descriptor heap. */
-+        if (device->vk_info.EXT_mutable_descriptor_type)
-+            descriptor_set_count -= VKD3D_SET_INDEX_COUNT - (VKD3D_SET_INDEX_MUTABLE + 1);
-+        /* For many Vulkan implementations maxBoundDescriptorSets == 8; also,
-+         * if mutable descriptors are not available the descriptor set count
-+         * will be 9; so saving a descriptor set is going to be often
-+         * significant. */
-+        if (descriptor_set_count > device->vk_info.device_limits.maxBoundDescriptorSets)
-+        {
-+            WARN("Disabling VK_KHR_push_descriptor to save a descriptor set.\n");
-+            device->vk_info.KHR_push_descriptor = VK_FALSE;
-+        }
-+    }
-     if (device->use_vk_heaps)
-         vkd3d_device_vk_heaps_descriptor_limits_init(&vulkan_info->descriptor_limits,
-                 &physical_device_info->descriptor_indexing_properties);
-@@ -1816,6 +1954,13 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device,
-         vkd3d_device_descriptor_limits_init(&vulkan_info->descriptor_limits,
-                 &physical_device_info->properties2.properties.limits);
-+    TRACE("Device %p: using %s descriptor heaps, with%s descriptor indexing, "
-+            "with%s push descriptors, with%s mutable descriptors\n",
-+            device, device->use_vk_heaps ? "Vulkan" : "virtual",
-+            device->vk_info.EXT_descriptor_indexing ? "" : "out",
-+            device->vk_info.KHR_push_descriptor ? "" : "out",
-+            device->vk_info.EXT_mutable_descriptor_type ? "" : "out");
-     vkd3d_chain_physical_device_info_structures(physical_device_info, device);
-     return S_OK;
-diff --git a/libs/vkd3d/libs/vkd3d/resource.c b/libs/vkd3d/libs/vkd3d/resource.c
-index 6d6820d3752..1f7d90eb95f 100644
---- a/libs/vkd3d/libs/vkd3d/resource.c
-+++ b/libs/vkd3d/libs/vkd3d/resource.c
-@@ -2498,7 +2498,7 @@ static void d3d12_desc_write_vk_heap_null_descriptor(struct d3d12_descriptor_hea
-     enum vkd3d_vk_descriptor_set_index set, end;
-     unsigned int i = writes->count;
--    end = device->vk_info.EXT_mutable_descriptor_type ? VKD3D_SET_INDEX_UNIFORM_BUFFER
-+    end = device->vk_info.EXT_mutable_descriptor_type ? VKD3D_SET_INDEX_MUTABLE
-     /* Binding a shader with the wrong null descriptor type works in Windows.
-      * To support that here we must write one to all applicable Vulkan sets. */
-@@ -4250,7 +4250,8 @@ static HRESULT d3d12_descriptor_heap_create_descriptor_pool(struct d3d12_descrip
-         if (device->vk_descriptor_heap_layouts[set].applicable_heap_type == desc->Type
-                 && device->vk_descriptor_heap_layouts[set].vk_set_layout)
-         {
--            pool_sizes[pool_desc.poolSizeCount].type = (device->vk_info.EXT_mutable_descriptor_type && !set)
-+            pool_sizes[pool_desc.poolSizeCount].type =
-+                    (device->vk_info.EXT_mutable_descriptor_type && set == VKD3D_SET_INDEX_MUTABLE)
-                     ? VK_DESCRIPTOR_TYPE_MUTABLE_EXT : device->vk_descriptor_heap_layouts[set].type;
-             pool_sizes[pool_desc.poolSizeCount++].descriptorCount = desc->NumDescriptors;
-         }
-@@ -4280,11 +4281,12 @@ static HRESULT d3d12_descriptor_heap_create_descriptor_set(struct d3d12_descript
-     if (!device->vk_descriptor_heap_layouts[set].vk_set_layout)
-     {
--        /* Set 0 uses mutable descriptors, and this set is unused. */
--        if (!descriptor_heap->vk_descriptor_sets[0].vk_set
--                && FAILED(hr = d3d12_descriptor_heap_create_descriptor_set(descriptor_heap, device, 0)))
-+        /* Mutable descriptors are in use, and this set is unused. */
-+        if (!descriptor_heap->vk_descriptor_sets[VKD3D_SET_INDEX_MUTABLE].vk_set
-+                && FAILED(hr = d3d12_descriptor_heap_create_descriptor_set(descriptor_heap,
-+                device, VKD3D_SET_INDEX_MUTABLE)))
-             return hr;
--        descriptor_set->vk_set = descriptor_heap->vk_descriptor_sets[0].vk_set;
-+        descriptor_set->vk_set = descriptor_heap->vk_descriptor_sets[VKD3D_SET_INDEX_MUTABLE].vk_set;
-         descriptor_set->vk_type = device->vk_descriptor_heap_layouts[set].type;
-         return S_OK;
-     }
-diff --git a/libs/vkd3d/libs/vkd3d/state.c b/libs/vkd3d/libs/vkd3d/state.c
-index 682d488faa8..2b0f81d3812 100644
---- a/libs/vkd3d/libs/vkd3d/state.c
-+++ b/libs/vkd3d/libs/vkd3d/state.c
-@@ -219,6 +219,30 @@ static VkShaderStageFlags stage_flags_from_visibility(D3D12_SHADER_VISIBILITY vi
-     }
- }
-+static VkShaderStageFlags stage_flags_from_vkd3d_shader_visibility(enum vkd3d_shader_visibility visibility)
-+    switch (visibility)
-+    {
-+            return VK_SHADER_STAGE_ALL;
-+            return VK_SHADER_STAGE_VERTEX_BIT;
-+            return VK_SHADER_STAGE_GEOMETRY_BIT;
-+            return VK_SHADER_STAGE_FRAGMENT_BIT;
-+            return VK_SHADER_STAGE_COMPUTE_BIT;
-+        default:
-+            FIXME("Unhandled visibility %#x.\n", visibility);
-+            return VKD3D_SHADER_VISIBILITY_ALL;
-+    }
- static enum vkd3d_shader_visibility vkd3d_shader_visibility_from_d3d12(D3D12_SHADER_VISIBILITY visibility)
- {
-     switch (visibility)
-@@ -260,23 +284,6 @@ static VkDescriptorType vk_descriptor_type_from_vkd3d_descriptor_type(enum vkd3d
-     }
- }
--static VkDescriptorType vk_descriptor_type_from_d3d12_root_parameter(D3D12_ROOT_PARAMETER_TYPE type)
--    switch (type)
--    {
--        /* SRV and UAV root parameters are buffer views. */
--        case D3D12_ROOT_PARAMETER_TYPE_SRV:
--        case D3D12_ROOT_PARAMETER_TYPE_UAV:
--        case D3D12_ROOT_PARAMETER_TYPE_CBV:
--        default:
--            FIXME("Unhandled descriptor root parameter type %#x.\n", type);
--    }
- static enum vkd3d_shader_descriptor_type vkd3d_descriptor_type_from_d3d12_range_type(
-         D3D12_DESCRIPTOR_RANGE_TYPE type)
- {
-@@ -313,20 +320,6 @@ static enum vkd3d_shader_descriptor_type vkd3d_descriptor_type_from_d3d12_root_p
-     }
- }
--static bool vk_binding_from_d3d12_descriptor_range(struct VkDescriptorSetLayoutBinding *binding_desc,
--        enum vkd3d_shader_descriptor_type descriptor_type, D3D12_SHADER_VISIBILITY shader_visibility,
--        bool is_buffer, uint32_t vk_binding, unsigned int descriptor_count)
--    binding_desc->binding = vk_binding;
--    binding_desc->descriptorType
--            = vk_descriptor_type_from_vkd3d_descriptor_type(descriptor_type, is_buffer);
--    binding_desc->descriptorCount = descriptor_count;
--    binding_desc->stageFlags = stage_flags_from_visibility(shader_visibility);
--    binding_desc->pImmutableSamplers = NULL;
--    return true;
- struct d3d12_root_signature_info
- {
-     size_t binding_count;
-@@ -719,18 +712,66 @@ static HRESULT d3d12_root_signature_init_push_constants(struct d3d12_root_signat
-     return S_OK;
- }
-+struct vk_binding_array
-+    VkDescriptorSetLayoutBinding *bindings;
-+    size_t capacity, count;
-+    unsigned int table_index;
-+    unsigned int unbounded_offset;
-+    VkDescriptorSetLayoutCreateFlags flags;
-+static void vk_binding_array_cleanup(struct vk_binding_array *array)
-+    vkd3d_free(array->bindings);
-+    array->bindings = NULL;
-+static bool vk_binding_array_add_binding(struct vk_binding_array *array,
-+        VkDescriptorType descriptor_type, unsigned int descriptor_count,
-+        VkShaderStageFlags stage_flags, const VkSampler *immutable_sampler, unsigned int *binding_idx)
-+    unsigned int binding_count = array->count;
-+    VkDescriptorSetLayoutBinding *binding;
-+    if (!vkd3d_array_reserve((void **)&array->bindings, &array->capacity,
-+            array->count + 1, sizeof(*array->bindings)))
-+    {
-+        ERR("Failed to reallocate the Vulkan binding array.\n");
-+        return false;
-+    }
-+    *binding_idx = binding_count;
-+    binding = &array->bindings[binding_count];
-+    binding->binding = binding_count;
-+    binding->descriptorType = descriptor_type;
-+    binding->descriptorCount = descriptor_count;
-+    binding->stageFlags = stage_flags;
-+    binding->pImmutableSamplers = immutable_sampler;
-+    ++array->count;
-+    return true;
- struct vkd3d_descriptor_set_context
- {
--    VkDescriptorSetLayoutBinding *current_binding;
--    VkDescriptorSetLayoutBinding *first_binding;
-+    struct vk_binding_array vk_bindings[VKD3D_MAX_DESCRIPTOR_SETS];
-     unsigned int table_index;
-     unsigned int unbounded_offset;
-     unsigned int descriptor_index;
-     unsigned int uav_counter_index;
-     unsigned int push_constant_index;
--    uint32_t descriptor_binding;
- };
-+static void descriptor_set_context_cleanup(struct vkd3d_descriptor_set_context *context)
-+    size_t i;
-+    for (i = 0; i < ARRAY_SIZE(context->vk_bindings); ++i)
-+        vk_binding_array_cleanup(&context->vk_bindings[i]);
- static bool vkd3d_validate_descriptor_set_count(struct d3d12_device *device, unsigned int set_count)
- {
-     uint32_t max_count = min(VKD3D_MAX_DESCRIPTOR_SETS, device->vk_info.device_limits.maxBoundDescriptorSets);
-@@ -738,63 +779,63 @@ static bool vkd3d_validate_descriptor_set_count(struct d3d12_device *device, uns
-     if (set_count > max_count)
-     {
-         /* NOTE: If maxBoundDescriptorSets is < 9, try VKD3D_CONFIG=virtual_heaps */
--        ERR("Required descriptor set count exceeds maximum allowed count of %u.\n", max_count);
-+        WARN("Required descriptor set count exceeds maximum allowed count of %u.\n", max_count);
-         return false;
-     }
-     return true;
- }
--static HRESULT vkd3d_create_descriptor_set_layout(struct d3d12_device *device,
--        VkDescriptorSetLayoutCreateFlags flags, unsigned int binding_count, bool unbounded,
--        const VkDescriptorSetLayoutBinding *bindings, VkDescriptorSetLayout *set_layout);
--static HRESULT d3d12_root_signature_append_descriptor_set_layout(struct d3d12_root_signature *root_signature,
--    struct vkd3d_descriptor_set_context *context, VkDescriptorSetLayoutCreateFlags flags)
-+static struct vk_binding_array *d3d12_root_signature_current_vk_binding_array(
-+        struct d3d12_root_signature *root_signature, struct vkd3d_descriptor_set_context *context)
- {
--    struct d3d12_descriptor_set_layout *layout;
--    unsigned int index;
--    HRESULT hr;
--    if (!context->descriptor_binding)
--        return S_OK;
-+    if (root_signature->vk_set_count >= ARRAY_SIZE(context->vk_bindings))
-+        return NULL;
--    index = root_signature->vk_set_count;
--    layout = &root_signature->descriptor_set_layouts[index];
-+    return &context->vk_bindings[root_signature->vk_set_count];
--    if (!vkd3d_validate_descriptor_set_count(root_signature->device, index + 1))
--        return E_INVALIDARG;
-+static void d3d12_root_signature_append_vk_binding_array(struct d3d12_root_signature *root_signature,
-+        VkDescriptorSetLayoutCreateFlags flags, struct vkd3d_descriptor_set_context *context)
-+    struct vk_binding_array *array;
--    if (FAILED(hr = vkd3d_create_descriptor_set_layout(root_signature->device, flags, context->descriptor_binding,
--            context->unbounded_offset != UINT_MAX, context->first_binding, &layout->vk_layout)))
--        return hr;
--    layout->table_index = context->table_index;
--    layout->unbounded_offset = context->unbounded_offset;
--    ++root_signature->vk_set_count;
-+    if (!(array = d3d12_root_signature_current_vk_binding_array(root_signature, context)) || !array->count)
-+        return;
--    context->current_binding = context->first_binding;
--    context->descriptor_binding = 0;
-+    array->table_index = context->table_index;
-+    array->unbounded_offset = context->unbounded_offset;
-+    array->flags = flags;
--    return S_OK;
-+    ++root_signature->vk_set_count;
- }
- static HRESULT d3d12_root_signature_append_vk_binding(struct d3d12_root_signature *root_signature,
--        enum vkd3d_shader_descriptor_type descriptor_type, unsigned int register_space, unsigned int register_idx,
--        bool buffer_descriptor, enum vkd3d_shader_visibility shader_visibility,
--        unsigned int descriptor_count, struct vkd3d_descriptor_set_context *context)
-+        enum vkd3d_shader_descriptor_type descriptor_type, unsigned int register_space,
-+        unsigned int register_idx, bool buffer_descriptor, enum vkd3d_shader_visibility shader_visibility,
-+        unsigned int descriptor_count, struct vkd3d_descriptor_set_context *context,
-+        const VkSampler *immutable_sampler, unsigned int *binding_idx)
- {
-     struct vkd3d_shader_descriptor_offset *offset = root_signature->descriptor_offsets
-             ? &root_signature->descriptor_offsets[context->descriptor_index] : NULL;
--    struct vkd3d_shader_resource_binding *mapping
--            = &root_signature->descriptor_mapping[context->descriptor_index++];
-+    struct vkd3d_shader_resource_binding *mapping;
-+    struct vk_binding_array *array;
-+    unsigned int idx;
-+    if (!(array = d3d12_root_signature_current_vk_binding_array(root_signature, context))
-+            || !(vk_binding_array_add_binding(&context->vk_bindings[root_signature->vk_set_count],
-+                    vk_descriptor_type_from_vkd3d_descriptor_type(descriptor_type, buffer_descriptor), descriptor_count,
-+                    stage_flags_from_vkd3d_shader_visibility(shader_visibility), immutable_sampler, &idx)))
-+        return E_OUTOFMEMORY;
-+    mapping = &root_signature->descriptor_mapping[context->descriptor_index++];
-     mapping->type = descriptor_type;
-     mapping->register_space = register_space;
-     mapping->register_index = register_idx;
-     mapping->shader_visibility = shader_visibility;
-     mapping->flags = buffer_descriptor ? VKD3D_SHADER_BINDING_FLAG_BUFFER : VKD3D_SHADER_BINDING_FLAG_IMAGE;
-     mapping->binding.set = root_signature->vk_set_count;
--    mapping->binding.binding = context->descriptor_binding++;
-+    mapping->binding.binding = idx;
-     mapping->binding.count = descriptor_count;
-     if (offset)
-     {
-@@ -803,37 +844,11 @@ static HRESULT d3d12_root_signature_append_vk_binding(struct d3d12_root_signatur
-     }
-     if (context->unbounded_offset != UINT_MAX)
--        return d3d12_root_signature_append_descriptor_set_layout(root_signature, context, 0);
--    return S_OK;
--static HRESULT d3d12_root_signature_assign_vk_bindings(struct d3d12_root_signature *root_signature,
--        enum vkd3d_shader_descriptor_type descriptor_type, unsigned int register_space, unsigned int base_register_idx,
--        unsigned int binding_count, bool is_buffer_descriptor, bool duplicate_descriptors,
--        enum vkd3d_shader_visibility shader_visibility, struct vkd3d_descriptor_set_context *context,
--        uint32_t *first_binding)
--    unsigned int i;
--    HRESULT hr;
--    is_buffer_descriptor |= descriptor_type == VKD3D_SHADER_DESCRIPTOR_TYPE_CBV;
--    duplicate_descriptors = (descriptor_type == VKD3D_SHADER_DESCRIPTOR_TYPE_SRV
--            || descriptor_type == VKD3D_SHADER_DESCRIPTOR_TYPE_UAV)
--            && duplicate_descriptors;
-+        d3d12_root_signature_append_vk_binding_array(root_signature, 0, context);
--    *first_binding = context->descriptor_binding;
--    for (i = 0; i < binding_count; ++i)
--    {
--        if (duplicate_descriptors
--                && FAILED(hr = d3d12_root_signature_append_vk_binding(root_signature, descriptor_type,
--                register_space, base_register_idx + i, true, shader_visibility, 1, context)))
--            return hr;
-+    if (binding_idx)
-+        *binding_idx = idx;
--        if (FAILED(hr = d3d12_root_signature_append_vk_binding(root_signature, descriptor_type, register_space,
--                base_register_idx + i, is_buffer_descriptor, shader_visibility, 1, context)))
--            return hr;
--    }
-     return S_OK;
- }
-@@ -895,38 +910,41 @@ static unsigned int vk_binding_count_from_descriptor_range(const struct d3d12_ro
- }
--static HRESULT d3d12_root_signature_init_descriptor_array_binding(struct d3d12_root_signature *root_signature,
-+static HRESULT d3d12_root_signature_init_descriptor_table_binding(struct d3d12_root_signature *root_signature,
-         const struct d3d12_root_descriptor_table_range *range, D3D12_SHADER_VISIBILITY visibility,
-+        unsigned int vk_binding_array_count, unsigned int bindings_per_range,
-         struct vkd3d_descriptor_set_context *context)
- {
-     enum vkd3d_shader_visibility shader_visibility = vkd3d_shader_visibility_from_d3d12(visibility);
--    bool is_buffer = range->type == VKD3D_SHADER_DESCRIPTOR_TYPE_CBV;
-+    bool is_buffer = range->type != VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER;
-     enum vkd3d_shader_descriptor_type descriptor_type = range->type;
-+    unsigned int i, register_space = range->register_space;
-     HRESULT hr;
-     if (range->descriptor_count == UINT_MAX)
-         context->unbounded_offset = range->offset;
--    if (descriptor_type == VKD3D_SHADER_DESCRIPTOR_TYPE_SRV || descriptor_type == VKD3D_SHADER_DESCRIPTOR_TYPE_UAV)
-+    for (i = 0; i < bindings_per_range; ++i)
-     {
--        if (!vk_binding_from_d3d12_descriptor_range(context->current_binding,
--                descriptor_type, visibility, true, context->descriptor_binding, range->vk_binding_count))
--            return E_NOTIMPL;
--        ++context->current_binding;
--        if (FAILED(hr = d3d12_root_signature_append_vk_binding(root_signature, descriptor_type, range->register_space,
--                range->base_register_idx, true, shader_visibility, range->vk_binding_count, context)))
-+        if (FAILED(hr = d3d12_root_signature_append_vk_binding(root_signature, descriptor_type,
-+                register_space, range->base_register_idx + i, is_buffer, shader_visibility,
-+                vk_binding_array_count, context, NULL, NULL)))
-             return hr;
-     }
--    if (!vk_binding_from_d3d12_descriptor_range(context->current_binding,
--            descriptor_type, visibility, is_buffer, context->descriptor_binding, range->vk_binding_count))
--        return E_NOTIMPL;
--    ++context->current_binding;
-+    if (descriptor_type != VKD3D_SHADER_DESCRIPTOR_TYPE_SRV && descriptor_type != VKD3D_SHADER_DESCRIPTOR_TYPE_UAV)
-+    {
-+        context->unbounded_offset = UINT_MAX;
-+        return S_OK;
-+    }
--    if (FAILED(hr = d3d12_root_signature_append_vk_binding(root_signature, descriptor_type, range->register_space,
--            range->base_register_idx, is_buffer, shader_visibility, range->vk_binding_count, context)))
--        return hr;
-+    for (i = 0; i < bindings_per_range; ++i)
-+    {
-+        if (FAILED(hr = d3d12_root_signature_append_vk_binding(root_signature, descriptor_type,
-+                register_space, range->base_register_idx + i, false, shader_visibility,
-+                vk_binding_array_count, context, NULL, NULL)))
-+            return hr;
-+    }
-     context->unbounded_offset = UINT_MAX;
-@@ -998,7 +1016,7 @@ static void vkd3d_descriptor_heap_binding_from_descriptor_range(const struct d3d
-         }
-         else
-         {
--            binding->set = 0;
-+            binding->set = VKD3D_SET_INDEX_MUTABLE;
-             descriptor_set_size = descriptor_limits->sampled_image_max_descriptors;
-         }
-     }
-@@ -1107,18 +1125,19 @@ static int compare_descriptor_range(const void *a, const void *b)
-     if ((ret = vkd3d_u32_compare(range_a->offset, range_b->offset)))
-         return ret;
--    return (range_a->descriptor_count == UINT_MAX) - (range_b->descriptor_count == UINT_MAX);
-+    /* Place bounded ranges after unbounded ones of equal offset,
-+     * so the bounded range can be mapped to the unbounded one. */
-+    return (range_b->descriptor_count == UINT_MAX) - (range_a->descriptor_count == UINT_MAX);
- }
- static HRESULT d3d12_root_signature_init_root_descriptor_tables(struct d3d12_root_signature *root_signature,
-         const D3D12_ROOT_SIGNATURE_DESC *desc, const struct d3d12_root_signature_info *info,
-         struct vkd3d_descriptor_set_context *context)
- {
-+    unsigned int i, j, range_count, bindings_per_range, vk_binding_array_count;
-     const struct d3d12_device *device = root_signature->device;
-     bool use_vk_heaps = root_signature->device->use_vk_heaps;
-     struct d3d12_root_descriptor_table *table;
--    unsigned int i, j, k, range_count;
--    uint32_t vk_binding;
-     HRESULT hr;
-     root_signature->descriptor_table_mask = 0;
-@@ -1175,7 +1194,6 @@ static HRESULT d3d12_root_signature_init_root_descriptor_tables(struct d3d12_roo
-         for (j = 0; j < range_count; ++j)
-         {
-             struct d3d12_root_descriptor_table_range *range;
--            VkDescriptorSetLayoutBinding *cur_binding;
-             range = &table->ranges[j];
-@@ -1221,53 +1239,23 @@ static HRESULT d3d12_root_signature_init_root_descriptor_tables(struct d3d12_roo
-                     base_range = range;
-                 }
--                range->binding = context->descriptor_binding;
-                 range->vk_binding_count = vk_binding_count_from_descriptor_range(range,
-                         info, &device->vk_info.descriptor_limits);
--                if (FAILED(hr = d3d12_root_signature_init_descriptor_array_binding(root_signature,
--                        range, p->ShaderVisibility, context)))
--                    return hr;
--                continue;
-+                vk_binding_array_count = range->vk_binding_count;
-+                bindings_per_range = 1;
-             }
--            cur_binding = context->current_binding;
--            if (FAILED(hr = d3d12_root_signature_assign_vk_bindings(root_signature,
--                    range->type, range->register_space, range->base_register_idx, range->descriptor_count, false, true,
--                    shader_visibility, context, &vk_binding)))
--                return hr;
--            /* Unroll descriptor range. */
--            for (k = 0; k < range->descriptor_count; ++k)
-+            else
-             {
--                uint32_t vk_current_binding = vk_binding + k;
--                if (range->type == VKD3D_SHADER_DESCRIPTOR_TYPE_SRV
--                        || range->type == VKD3D_SHADER_DESCRIPTOR_TYPE_UAV)
--                {
--                    vk_current_binding = vk_binding + 2 * k;
--                    /* Assign binding for image view. */
--                    if (!vk_binding_from_d3d12_descriptor_range(cur_binding,
--                            range->type, p->ShaderVisibility, false, vk_current_binding + 1, 1))
--                        return E_NOTIMPL;
--                    ++cur_binding;
--                }
--                if (!vk_binding_from_d3d12_descriptor_range(cur_binding,
--                        range->type, p->ShaderVisibility, true, vk_current_binding, 1))
--                    return E_NOTIMPL;
--                ++cur_binding;
-+                range->vk_binding_count = range->descriptor_count;
-+                vk_binding_array_count = 1;
-+                bindings_per_range = range->descriptor_count;
-             }
--            table->ranges[j].vk_binding_count = table->ranges[j].descriptor_count;
--            table->ranges[j].binding = vk_binding;
-+            range->binding = context->vk_bindings[root_signature->vk_set_count].count;
--            context->current_binding = cur_binding;
-+            if (FAILED(hr = d3d12_root_signature_init_descriptor_table_binding(root_signature, range,
-+                    p->ShaderVisibility, vk_binding_array_count, bindings_per_range, context)))
-+                return hr;
-         }
-         ++context->push_constant_index;
-     }
-@@ -1278,8 +1266,7 @@ static HRESULT d3d12_root_signature_init_root_descriptor_tables(struct d3d12_roo
- static HRESULT d3d12_root_signature_init_root_descriptors(struct d3d12_root_signature *root_signature,
-         const D3D12_ROOT_SIGNATURE_DESC *desc, struct vkd3d_descriptor_set_context *context)
- {
--    VkDescriptorSetLayoutBinding *cur_binding = context->current_binding;
--    unsigned int i;
-+    unsigned int binding, i;
-     HRESULT hr;
-     root_signature->push_descriptor_mask = 0;
-@@ -1294,23 +1281,16 @@ static HRESULT d3d12_root_signature_init_root_descriptors(struct d3d12_root_sign
-         root_signature->push_descriptor_mask |= 1u << i;
--        if (FAILED(hr = d3d12_root_signature_assign_vk_bindings(root_signature,
-+        if (FAILED(hr = d3d12_root_signature_append_vk_binding(root_signature,
-                 vkd3d_descriptor_type_from_d3d12_root_parameter_type(p->ParameterType),
--                p->u.Descriptor.RegisterSpace, p->u.Descriptor.ShaderRegister, 1, true, false,
--                vkd3d_shader_visibility_from_d3d12(p->ShaderVisibility), context, &cur_binding->binding)))
-+                p->u.Descriptor.RegisterSpace, p->u.Descriptor.ShaderRegister, true,
-+                vkd3d_shader_visibility_from_d3d12(p->ShaderVisibility), 1, context, NULL, &binding)))
-             return hr;
--        cur_binding->descriptorType = vk_descriptor_type_from_d3d12_root_parameter(p->ParameterType);
--        cur_binding->descriptorCount = 1;
--        cur_binding->stageFlags = stage_flags_from_visibility(p->ShaderVisibility);
--        cur_binding->pImmutableSamplers = NULL;
-         root_signature->parameters[i].parameter_type = p->ParameterType;
--        root_signature->parameters[i].u.descriptor.binding = cur_binding->binding;
--        ++cur_binding;
-+        root_signature->parameters[i].u.descriptor.binding = binding;
-     }
--    context->current_binding = cur_binding;
-     return S_OK;
- }
-@@ -1318,7 +1298,6 @@ static HRESULT d3d12_root_signature_init_static_samplers(struct d3d12_root_signa
-         struct d3d12_device *device, const D3D12_ROOT_SIGNATURE_DESC *desc,
-         struct vkd3d_descriptor_set_context *context)
- {
--    VkDescriptorSetLayoutBinding *cur_binding = context->current_binding;
-     unsigned int i;
-     HRESULT hr;
-@@ -1330,21 +1309,15 @@ static HRESULT d3d12_root_signature_init_static_samplers(struct d3d12_root_signa
-         if (FAILED(hr = vkd3d_create_static_sampler(device, s, &root_signature->static_samplers[i])))
-             return hr;
--        if (FAILED(hr = d3d12_root_signature_assign_vk_bindings(root_signature,
--                VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER, s->RegisterSpace, s->ShaderRegister, 1, false, false,
--                vkd3d_shader_visibility_from_d3d12(s->ShaderVisibility), context, &cur_binding->binding)))
-+        if (FAILED(hr = d3d12_root_signature_append_vk_binding(root_signature,
-+                VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER, s->RegisterSpace, s->ShaderRegister, false,
-+                vkd3d_shader_visibility_from_d3d12(s->ShaderVisibility), 1, context,
-+                &root_signature->static_samplers[i], NULL)))
-             return hr;
--        cur_binding->descriptorType = VK_DESCRIPTOR_TYPE_SAMPLER;
--        cur_binding->descriptorCount = 1;
--        cur_binding->stageFlags = stage_flags_from_visibility(s->ShaderVisibility);
--        cur_binding->pImmutableSamplers = &root_signature->static_samplers[i];
--        ++cur_binding;
-     }
--    context->current_binding = cur_binding;
-     if (device->use_vk_heaps)
--        return d3d12_root_signature_append_descriptor_set_layout(root_signature, context, 0);
-+        d3d12_root_signature_append_vk_binding_array(root_signature, 0, context);
-     return S_OK;
- }
-@@ -1477,26 +1450,57 @@ static HRESULT vkd3d_create_pipeline_layout(struct d3d12_device *device,
-     return S_OK;
- }
-+static HRESULT d3d12_root_signature_create_descriptor_set_layouts(struct d3d12_root_signature *root_signature,
-+        struct vkd3d_descriptor_set_context *context)
-+    unsigned int i;
-+    HRESULT hr;
-+    d3d12_root_signature_append_vk_binding_array(root_signature, 0, context);
-+    if (!vkd3d_validate_descriptor_set_count(root_signature->device, root_signature->vk_set_count))
-+        return E_INVALIDARG;
-+    for (i = 0; i < root_signature->vk_set_count; ++i)
-+    {
-+        struct d3d12_descriptor_set_layout *layout = &root_signature->descriptor_set_layouts[i];
-+        struct vk_binding_array *array = &context->vk_bindings[i];
-+        VKD3D_ASSERT(array->count);
-+        if (FAILED(hr = vkd3d_create_descriptor_set_layout(root_signature->device, array->flags, array->count,
-+                array->unbounded_offset != UINT_MAX, array->bindings, &layout->vk_layout)))
-+            return hr;
-+        layout->unbounded_offset = array->unbounded_offset;
-+        layout->table_index = array->table_index;
-+    }
-+    return S_OK;
- static unsigned int d3d12_root_signature_copy_descriptor_set_layouts(const struct d3d12_root_signature *root_signature,
-         VkDescriptorSetLayout *vk_set_layouts)
- {
-     const struct d3d12_device *device = root_signature->device;
-     enum vkd3d_vk_descriptor_set_index set;
-+    VkDescriptorSetLayout vk_set_layout;
-     unsigned int i;
-     for (i = 0; i < root_signature->vk_set_count; ++i)
-         vk_set_layouts[i] = root_signature->descriptor_set_layouts[i].vk_layout;
--    if (device->use_vk_heaps)
-+    if (!device->use_vk_heaps)
-+        return i;
-+    for (set = 0; set < ARRAY_SIZE(device->vk_descriptor_heap_layouts); ++set)
-     {
--        VkDescriptorSetLayout mutable_layout = device->vk_descriptor_heap_layouts[0].vk_set_layout;
-+        vk_set_layout = device->vk_descriptor_heap_layouts[set].vk_set_layout;
--        for (set = 0; set < ARRAY_SIZE(device->vk_descriptor_heap_layouts); ++set)
--        {
--            VkDescriptorSetLayout vk_set_layout = device->vk_descriptor_heap_layouts[set].vk_set_layout;
--            /* All layouts must be valid, so if null, just set it to the mutable one. */
--            vk_set_layouts[i++] = vk_set_layout ? vk_set_layout : mutable_layout;
--        }
-+        VKD3D_ASSERT(vk_set_layout);
-+        vk_set_layouts[i++] = vk_set_layout;
-+        if (device->vk_info.EXT_mutable_descriptor_type && set == VKD3D_SET_INDEX_MUTABLE)
-+            break;
-     }
-     return i;
-@@ -1508,7 +1512,6 @@ static HRESULT d3d12_root_signature_init(struct d3d12_root_signature *root_signa
-     VkDescriptorSetLayout vk_layouts[VKD3D_MAX_DESCRIPTOR_SETS];
-     const struct vkd3d_vulkan_info *vk_info = &device->vk_info;
-     struct vkd3d_descriptor_set_context context;
--    VkDescriptorSetLayoutBinding *binding_desc;
-     struct d3d12_root_signature_info info;
-     bool use_vk_heaps;
-     unsigned int i;
-@@ -1516,7 +1519,6 @@ static HRESULT d3d12_root_signature_init(struct d3d12_root_signature *root_signa
-     memset(&context, 0, sizeof(context));
-     context.unbounded_offset = UINT_MAX;
--    binding_desc = NULL;
-     root_signature->ID3D12RootSignature_iface.lpVtbl = &d3d12_root_signature_vtbl;
-     root_signature->refcount = 1;
-@@ -1578,20 +1580,14 @@ static HRESULT d3d12_root_signature_init(struct d3d12_root_signature *root_signa
-             sizeof(*root_signature->static_samplers))))
-         goto fail;
--    if (!(binding_desc = vkd3d_calloc(info.binding_count, sizeof(*binding_desc))))
--        goto fail;
--    context.first_binding = binding_desc;
--    context.current_binding = binding_desc;
-     if (FAILED(hr = d3d12_root_signature_init_root_descriptors(root_signature, desc, &context)))
-         goto fail;
-     /* We use KHR_push_descriptor for root descriptor parameters. */
-     if (vk_info->KHR_push_descriptor)
-     {
--        if (FAILED(hr = d3d12_root_signature_append_descriptor_set_layout(root_signature,
--            goto fail;
-+        d3d12_root_signature_append_vk_binding_array(root_signature,
-     }
-     root_signature->main_set = root_signature->vk_set_count;
-@@ -1607,11 +1603,10 @@ static HRESULT d3d12_root_signature_init(struct d3d12_root_signature *root_signa
-     if (use_vk_heaps)
-         d3d12_root_signature_init_descriptor_table_push_constants(root_signature, &context);
--    if (FAILED(hr = d3d12_root_signature_append_descriptor_set_layout(root_signature, &context, 0)))
-+    if (FAILED(hr = d3d12_root_signature_create_descriptor_set_layouts(root_signature, &context)))
-         goto fail;
--    vkd3d_free(binding_desc);
--    binding_desc = NULL;
-+    descriptor_set_context_cleanup(&context);
-     i = d3d12_root_signature_copy_descriptor_set_layouts(root_signature, vk_layouts);
-     if (FAILED(hr = vkd3d_create_pipeline_layout(device, i,
-@@ -1627,7 +1622,7 @@ static HRESULT d3d12_root_signature_init(struct d3d12_root_signature *root_signa
-     return S_OK;
- fail:
--    vkd3d_free(binding_desc);
-+    descriptor_set_context_cleanup(&context);
-     d3d12_root_signature_cleanup(root_signature, device);
-     return hr;
- }
-@@ -2286,7 +2281,7 @@ static HRESULT create_shader_stage(struct d3d12_device *device,
-     const struct vkd3d_shader_compile_option options[] =
-     {
-         {VKD3D_SHADER_COMPILE_OPTION_TYPED_UAV, typed_uav_compile_option(device)},
-         {VKD3D_SHADER_COMPILE_OPTION_FEATURE, feature_flags_compile_option(device)},
-@@ -2341,7 +2336,7 @@ static int vkd3d_scan_dxbc(const struct d3d12_device *device, const D3D12_SHADER
-     const struct vkd3d_shader_compile_option options[] =
-     {
-         {VKD3D_SHADER_COMPILE_OPTION_TYPED_UAV, typed_uav_compile_option(device)},
-     };
-@@ -3867,6 +3862,7 @@ VkPipeline d3d12_pipeline_state_get_or_create_pipeline(struct d3d12_pipeline_sta
-     };
-     static const VkPipelineDynamicStateCreateInfo dynamic_desc =
-     {
-@@ -4019,7 +4015,7 @@ static int compile_hlsl_cs(const struct vkd3d_shader_code *hlsl, struct vkd3d_sh
-     static const struct vkd3d_shader_compile_option options[] =
-     {
-     };
-diff --git a/libs/vkd3d/libs/vkd3d/utils.c b/libs/vkd3d/libs/vkd3d/utils.c
-index 831dc07af56..839bb173854 100644
---- a/libs/vkd3d/libs/vkd3d/utils.c
-+++ b/libs/vkd3d/libs/vkd3d/utils.c
-@@ -703,7 +703,7 @@ const char *debug_vk_extent_3d(VkExtent3D extent)
- const char *debug_vk_queue_flags(VkQueueFlags flags)
- {
--    char buffer[159];
-+    char buffer[191];
-     buffer[0] = '\0';
- #define FLAG_TO_STR(f) if (flags & f) { strcat(buffer, " | "#f); flags &= ~f; }
-@@ -715,6 +715,7 @@ const char *debug_vk_queue_flags(VkQueueFlags flags)
- #undef FLAG_TO_STR
- #define FLAG_TO_STR(f, n) if (flags & f) { strcat(buffer, " | "#n); flags &= ~f; }
- #undef FLAG_TO_STR
-     if (flags)
-         FIXME("Unrecognized flag(s) %#x.\n", flags);
-diff --git a/libs/vkd3d/libs/vkd3d/vkd3d_main.c b/libs/vkd3d/libs/vkd3d/vkd3d_main.c
-index 9eccec111c7..5215cf8ef86 100644
---- a/libs/vkd3d/libs/vkd3d/vkd3d_main.c
-+++ b/libs/vkd3d/libs/vkd3d/vkd3d_main.c
-@@ -415,6 +415,7 @@ HRESULT vkd3d_create_versioned_root_signature_deserializer(const void *data, SIZ
-     if (FAILED(hr = d3d12_versioned_root_signature_deserializer_init(object, &dxbc)))
-     {
-         vkd3d_free(object);
-+        *deserializer = NULL;
-         return hr;
-     }
-diff --git a/libs/vkd3d/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/libs/vkd3d/vkd3d_private.h
-index ba4e2e8488d..97a99782d6a 100644
---- a/libs/vkd3d/libs/vkd3d/vkd3d_private.h
-+++ b/libs/vkd3d/libs/vkd3d/vkd3d_private.h
-@@ -131,6 +131,7 @@ struct vkd3d_vulkan_info
-     bool EXT_calibrated_timestamps;
-     bool EXT_conditional_rendering;
-     bool EXT_debug_marker;
-+    bool EXT_depth_range_unrestricted;
-     bool EXT_depth_clip_enable;
-     bool EXT_descriptor_indexing;
-     bool EXT_fragment_shader_interlock;
-@@ -771,14 +772,21 @@ void d3d12_dsv_desc_create_dsv(struct d3d12_dsv_desc *dsv_desc, struct d3d12_dev
- enum vkd3d_vk_descriptor_set_index
- {
-+    /* These are used when mutable descriptors are not available to back
-+     * SRV-UAV-CBV descriptor heaps. They must stay at the end of this
-+     * enumeration, so that they can be ignored when mutable descriptors are
-+     * used. */
- };
- extern const enum vkd3d_vk_descriptor_set_index vk_descriptor_set_index_table[];
-@@ -1254,7 +1262,7 @@ struct d3d12_command_list
-     VkFormat dsv_format;
-     bool xfb_enabled;
-+    bool has_depth_bounds;
-     bool is_predicated;
-     VkFramebuffer current_framebuffer;
-@@ -1271,7 +1279,6 @@ struct d3d12_command_list
-     VkBuffer so_counter_buffers[D3D12_SO_BUFFER_SLOT_COUNT];
-     VkDeviceSize so_counter_buffer_offsets[D3D12_SO_BUFFER_SLOT_COUNT];
--    void (*update_descriptors)(struct d3d12_command_list *list, enum vkd3d_pipeline_bind_point bind_point);
-     struct d3d12_descriptor_heap *descriptor_heaps[64];
-     unsigned int descriptor_heap_count;
diff --git a/staging/upstream-commit b/staging/upstream-commit
index 565fc3e0..a3833332 100644
--- a/staging/upstream-commit
+++ b/staging/upstream-commit
@@ -1 +1 @@