From 6d0c679f28b976983d6ea925d9adc5dff37bcb56 Mon Sep 17 00:00:00 2001
From: Alistair Leslie-Hughes <leslie_alistair@hotmail.com>
Date: Tue, 24 Jun 2025 13:36:03 +1000
Subject: [PATCH] Updated vkd3d to d8edf20c2b4224384d8e206c620bfbd61c56219d.

---
 libs/vkd3d/libs/vkd3d-shader/d3dbc.c          |    5 +-
 libs/vkd3d/libs/vkd3d-shader/dxil.c           |    2 +
 libs/vkd3d/libs/vkd3d-shader/fx.c             |  574 +++++-----
 libs/vkd3d/libs/vkd3d-shader/hlsl.c           |   16 +-
 libs/vkd3d/libs/vkd3d-shader/hlsl.h           |    7 +-
 libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c   |  129 ++-
 libs/vkd3d/libs/vkd3d-shader/ir.c             | 1009 ++++++++++++++++-
 libs/vkd3d/libs/vkd3d-shader/msl.c            |  259 ++++-
 libs/vkd3d/libs/vkd3d-shader/spirv.c          |   10 +-
 libs/vkd3d/libs/vkd3d-shader/tpf.c            |    3 +
 .../libs/vkd3d-shader/vkd3d_shader_private.h  |   10 +
 11 files changed, 1680 insertions(+), 344 deletions(-)

diff --git a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c
index fbc0235cdd0..f19a6283197 100644
--- a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c
+++ b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c
@@ -2137,6 +2137,9 @@ int d3dbc_compile(struct vsir_program *program, uint64_t config_flags,
     struct vkd3d_bytecode_buffer *buffer = &d3dbc.buffer;
     int result;
 
+    if ((result = vsir_allocate_temp_registers(program, message_context)))
+        return result;
+
     d3dbc.program = program;
     d3dbc.message_context = message_context;
     switch (version->type)
@@ -2156,7 +2159,7 @@ int d3dbc_compile(struct vsir_program *program, uint64_t config_flags,
     }
 
     put_u32(buffer, sm1_version(version->type, version->major, version->minor));
-    d3dbc_write_comment(&d3dbc, VKD3D_MAKE_TAG('C','T','A','B'), ctab);
+    d3dbc_write_comment(&d3dbc, TAG_CTAB, ctab);
     d3dbc_write_semantic_dcls(&d3dbc);
     d3dbc_write_program_instructions(&d3dbc);
 
diff --git a/libs/vkd3d/libs/vkd3d-shader/dxil.c b/libs/vkd3d/libs/vkd3d-shader/dxil.c
index 9ebcb6870e9..71fa81ec163 100644
--- a/libs/vkd3d/libs/vkd3d-shader/dxil.c
+++ b/libs/vkd3d/libs/vkd3d-shader/dxil.c
@@ -5180,6 +5180,8 @@ static void sm6_parser_emit_dx_cbuffer_load(struct sm6_parser *sm6, enum dx_intr
     src_param->reg.data_type = vkd3d_data_type_from_sm6_type(type);
     if (data_type_is_64_bit(src_param->reg.data_type))
         src_param->swizzle = vsir_swizzle_64_from_32(src_param->swizzle);
+    else
+        register_convert_to_minimum_precision(&src_param->reg);
 
     instruction_dst_param_init_ssa_vector(ins, sm6_type_max_vector_size(type), sm6);
 }
diff --git a/libs/vkd3d/libs/vkd3d-shader/fx.c b/libs/vkd3d/libs/vkd3d-shader/fx.c
index 95a172fd827..0ab1a676400 100644
--- a/libs/vkd3d/libs/vkd3d-shader/fx.c
+++ b/libs/vkd3d/libs/vkd3d-shader/fx.c
@@ -251,6 +251,8 @@ struct fx_write_context_ops
     void (*write_technique)(struct hlsl_ir_var *var, struct fx_write_context *fx);
     void (*write_pass)(struct hlsl_ir_var *var, struct fx_write_context *fx);
     void (*write_annotation)(struct hlsl_ir_var *var, struct fx_write_context *fx);
+    void (*write_state_assignment)(const struct hlsl_ir_var *var,
+            struct hlsl_state_block_entry *entry, struct fx_write_context *fx);
     bool are_child_effects_supported;
 };
 
@@ -313,6 +315,15 @@ static void write_pass(struct hlsl_ir_var *var, struct fx_write_context *fx)
     fx->ops->write_pass(var, fx);
 }
 
+static void write_state_assignment(const struct hlsl_ir_var *var,
+        struct hlsl_state_block_entry *entry, struct fx_write_context *fx)
+{
+    fx->ops->write_state_assignment(var, entry, fx);
+}
+
+static uint32_t write_state_block(struct hlsl_ir_var *var,
+        unsigned int block_index, struct fx_write_context *fx);
+
 static uint32_t write_annotations(struct hlsl_scope *scope, struct fx_write_context *fx)
 {
     struct hlsl_ctx *ctx = fx->ctx;
@@ -348,8 +359,6 @@ static void write_fx_4_annotations(struct hlsl_scope *scope, struct fx_write_con
 static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_context *fx);
 static const char * get_fx_4_type_name(const struct hlsl_type *type);
 static void write_fx_4_annotation(struct hlsl_ir_var *var, struct fx_write_context *fx);
-static void write_fx_4_state_block(struct hlsl_ir_var *var, unsigned int block_index,
-        uint32_t count_offset, struct fx_write_context *fx);
 
 static uint32_t write_type(const struct hlsl_type *type, struct fx_write_context *fx)
 {
@@ -502,17 +511,22 @@ static uint32_t write_fx_4_string(const char *string, struct fx_write_context *f
     return string_entry->offset;
 }
 
+static void fx_4_decompose_state_blocks(struct hlsl_ir_var *var, struct fx_write_context *fx);
+
 static void write_fx_4_pass(struct hlsl_ir_var *var, struct fx_write_context *fx)
 {
     struct vkd3d_bytecode_buffer *buffer = &fx->structured;
-    uint32_t name_offset, count_offset;
+    uint32_t name_offset, count_offset, count;
 
     name_offset = write_string(var->name, fx);
     put_u32(buffer, name_offset);
     count_offset = put_u32(buffer, 0);
 
+    fx_4_decompose_state_blocks(var, fx);
+
     write_fx_4_annotations(var->annotations, fx);
-    write_fx_4_state_block(var, 0, count_offset, fx);
+    count = write_state_block(var, 0, fx);
+    set_u32(buffer, count_offset, count);
 }
 
 static void write_fx_2_annotations(struct hlsl_ir_var *var, uint32_t count_offset, struct fx_write_context *fx)
@@ -775,9 +789,10 @@ static const struct rhs_named_value fx_2_filter_values[] =
     { NULL }
 };
 
-struct fx_2_state
+struct fx_state
 {
     const char *name;
+    enum hlsl_type_class container;
     enum hlsl_type_class class;
     enum state_property_component_type type;
     unsigned int dimx;
@@ -786,215 +801,215 @@ struct fx_2_state
     const struct rhs_named_value *values;
 };
 
-static const struct fx_2_state fx_2_pass_states[] =
-{
-    { "ZEnable",          HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 0, fx_2_zenable_values },
-    { "FillMode",         HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 1, fx_2_fillmode_values },
-    { "ShadeMode",        HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 2, fx_2_shademode_values },
-    { "ZWriteEnable",     HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 3 },
-    { "AlphaTestEnable",  HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 4 },
-    { "LastPixel",        HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 5 },
-    { "SrcBlend",         HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 6, fx_2_blendmode_values },
-    { "DestBlend",        HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 7, fx_2_blendmode_values },
-    { "CullMode",         HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 8, fx_2_cullmode_values },
-    { "ZFunc",            HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 9, fx_2_cmpfunc_values },
-    { "AlphaRef",         HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 10 },
-    { "AlphaFunc",        HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 11, fx_2_cmpfunc_values },
-    { "DitherEnable",     HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 12 },
-    { "AlphaBlendEnable", HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 13 },
-    { "FogEnable",        HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 14 },
-    { "SpecularEnable",   HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 15 },
-    { "FogColor",         HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 16 },
-    { "FogTableMode",     HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 17, fx_2_fogmode_values },
-    { "FogStart",         HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 18 },
-    { "FogEnd",           HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 19 },
-    { "FogDensity",       HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 20 },
-    { "RangeFogEnable",   HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 21 },
-    { "StencilEnable",    HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 22 },
-    { "StencilFail",      HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 23, fx_2_stencilcaps_values },
-    { "StencilZFail",     HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 24, fx_2_stencilcaps_values },
-    { "StencilPass",      HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 25, fx_2_stencilcaps_values },
-    { "StencilFunc",      HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 26, fx_2_cmpfunc_values },
-    { "StencilRef",       HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 27 },
-    { "StencilMask",      HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 28 },
-    { "StencilWriteMask", HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 29 },
-    { "TextureFactor",    HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 30 },
-    { "Wrap0",            HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 31, fx_2_wrap_values },
-    { "Wrap1",            HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 32, fx_2_wrap_values },
-    { "Wrap2",            HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 33, fx_2_wrap_values },
-    { "Wrap3",            HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 34, fx_2_wrap_values },
-    { "Wrap4",            HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 35, fx_2_wrap_values },
-    { "Wrap5",            HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 36, fx_2_wrap_values },
-    { "Wrap6",            HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 37, fx_2_wrap_values },
-    { "Wrap7",            HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 38, fx_2_wrap_values },
-    { "Wrap8",            HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 39, fx_2_wrap_values },
-    { "Wrap9",            HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 40, fx_2_wrap_values },
-    { "Wrap10",           HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 41, fx_2_wrap_values },
-    { "Wrap11",           HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 42, fx_2_wrap_values },
-    { "Wrap12",           HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 43, fx_2_wrap_values },
-    { "Wrap13",           HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 44, fx_2_wrap_values },
-    { "Wrap14",           HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 45, fx_2_wrap_values },
-    { "Wrap15",           HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 46, fx_2_wrap_values },
-    { "Clipping",         HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 47 },
-    { "Lighting",         HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 48 },
-    { "Ambient",          HLSL_CLASS_VECTOR, FX_FLOAT, 4, 1, 49 },
-    { "FogVertexMode",    HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 50, fx_2_fogmode_values },
-    { "ColorVertex",      HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 51 },
-    { "LocalViewer",      HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 52 },
-    { "NormalizeNormals", HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 53 },
-
-    { "DiffuseMaterialSource",  HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 54, fx_2_materialcolorsource_values },
-    { "SpecularMaterialSource", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 55, fx_2_materialcolorsource_values },
-    { "AmbientMaterialSource",  HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 56, fx_2_materialcolorsource_values },
-    { "EmissiveMaterialSource", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 57, fx_2_materialcolorsource_values },
-
-    { "VertexBlend",       HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 58, fx_2_vertexblend_values },
-    { "ClipPlaneEnable",   HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 59, fx_2_clipplane_values },
-    { "PointSize",         HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 60 },
-    { "PointSize_Min",     HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 61 },
-    { "PointSize_Max",     HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 62 },
-    { "PointSpriteEnable", HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 63 },
-    { "PointScaleEnable",  HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 64 },
-    { "PointScale_A",      HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 65 },
-    { "PointScale_B",      HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 66 },
-    { "PointScale_C",      HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 67 },
-
-    { "MultiSampleAntialias",     HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 68 },
-    { "MultiSampleMask",          HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 69 },
-    { "PatchEdgeStyle",           HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 70, fx_2_patchedgestyle_values },
-    { "DebugMonitorToken",        HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 71 },
-    { "IndexedVertexBlendEnable", HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 72 },
-    { "ColorWriteEnable",         HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 73, fx_2_colorwriteenable_values },
-    { "TweenFactor",              HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 74 },
-    { "BlendOp",                  HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 75, fx_2_blendop_values },
-    { "PositionDegree",           HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 76, fx_2_degree_values },
-    { "NormalDegree",             HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 77, fx_2_degree_values },
-    { "ScissorTestEnable",        HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 78 },
-    { "SlopeScaleDepthBias",      HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 79 },
-
-    { "AntialiasedLineEnable",     HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 80 },
-    { "MinTessellationLevel",      HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 81 },
-    { "MaxTessellationLevel",      HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 82 },
-    { "AdaptiveTess_X",            HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 83 },
-    { "AdaptiveTess_Y",            HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 84 },
-    { "AdaptiveTess_Z",            HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 85 },
-    { "AdaptiveTess_W",            HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 86 },
-    { "EnableAdaptiveTessellation",HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 87 },
-    { "TwoSidedStencilMode",       HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 88 },
-    { "StencilFail",               HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 89, fx_2_stencilcaps_values },
-    { "StencilZFail",              HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 90, fx_2_stencilcaps_values },
-    { "StencilPass",               HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 91, fx_2_stencilcaps_values },
-    { "StencilFunc",               HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 92, fx_2_cmpfunc_values },
-
-    { "ColorWriteEnable1",        HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 93, fx_2_colorwriteenable_values },
-    { "ColorWriteEnable2",        HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 94, fx_2_colorwriteenable_values },
-    { "ColorWriteEnable3",        HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 95, fx_2_colorwriteenable_values },
-    { "BlendFactor",              HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 96 },
-    { "SRGBWriteEnable",          HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 97 },
-    { "DepthBias",                HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 98 },
-    { "SeparateAlphaBlendEnable", HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 99 },
-    { "SrcBlendAlpha",            HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 100, fx_2_blendmode_values },
-    { "DestBlendAlpha",           HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 101, fx_2_blendmode_values },
-    { "BlendOpAlpha",             HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 102, fx_2_blendmode_values },
-
-    { "ColorOp",               HLSL_CLASS_SCALAR, FX_UINT, 1, 8, 103, fx_2_textureop_values },
-    { "ColorArg0",             HLSL_CLASS_SCALAR, FX_UINT, 1, 8, 104, fx_2_colorarg_values },
-    { "ColorArg1",             HLSL_CLASS_SCALAR, FX_UINT, 1, 8, 105, fx_2_colorarg_values },
-    { "ColorArg2",             HLSL_CLASS_SCALAR, FX_UINT, 1, 8, 106, fx_2_colorarg_values },
-    { "AlphaOp",               HLSL_CLASS_SCALAR, FX_UINT, 1, 8, 107, fx_2_textureop_values },
-    { "AlphaArg0",             HLSL_CLASS_SCALAR, FX_UINT, 1, 8, 108, fx_2_colorarg_values },
-    { "AlphaArg1",             HLSL_CLASS_SCALAR, FX_UINT, 1, 8, 109, fx_2_colorarg_values },
-    { "AlphaArg2",             HLSL_CLASS_SCALAR, FX_UINT, 1, 8, 110, fx_2_colorarg_values },
-    { "ResultArg",             HLSL_CLASS_SCALAR, FX_UINT, 1, 8, 111, fx_2_colorarg_values },
-    { "BumpEnvMat00",          HLSL_CLASS_SCALAR, FX_FLOAT, 1, 8, 112 },
-    { "BumpEnvMat01",          HLSL_CLASS_SCALAR, FX_FLOAT, 1, 8, 113 },
-    { "BumpEnvMat10",          HLSL_CLASS_SCALAR, FX_FLOAT, 1, 8, 114 },
-    { "BumpEnvMat11",          HLSL_CLASS_SCALAR, FX_FLOAT, 1, 8, 115 },
-    { "TexCoordIndex",         HLSL_CLASS_SCALAR, FX_UINT, 1, 8, 116 },
-    { "BumpEnvLScale",         HLSL_CLASS_SCALAR, FX_FLOAT, 1, 8, 117 },
-    { "BumpEnvLOffset",        HLSL_CLASS_SCALAR, FX_FLOAT, 1, 8, 118 },
-    { "TextureTransformFlags", HLSL_CLASS_SCALAR, FX_UINT, 1, 8, 119, fx_2_texturetransform_values },
-    { "Constant",              HLSL_CLASS_SCALAR, FX_UINT, 1, 8, 120 },
-    { "PatchSegments",         HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 121 },
-    { "FVF",                   HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 122 },
-
-    { "ProjectionTransform", HLSL_CLASS_MATRIX, FX_FLOAT, 4, 1, 123 },
-    { "ViewTransform",       HLSL_CLASS_MATRIX, FX_FLOAT, 4, 1, 124 },
-    { "WorldTransform",      HLSL_CLASS_MATRIX, FX_FLOAT, 4, 256, 125 },
-    { "TextureTransform",    HLSL_CLASS_MATRIX, FX_FLOAT, 4, 8, 126 },
-
-    { "MaterialAmbient",   HLSL_CLASS_VECTOR, FX_FLOAT, 4, 1, 127 },
-    { "MaterialDiffuse",   HLSL_CLASS_VECTOR, FX_FLOAT, 4, 1, 128 },
-    { "MaterialSpecular",  HLSL_CLASS_VECTOR, FX_FLOAT, 4, 1, 129 },
-    { "MaterialEmissive",  HLSL_CLASS_VECTOR, FX_FLOAT, 4, 1, 130 },
-    { "MaterialPower",     HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 131 },
-
-    { "LightType",         HLSL_CLASS_SCALAR, FX_UINT,  1, ~0u, 132, fx_2_lighttype_values },
-    { "LightDiffuse",      HLSL_CLASS_VECTOR, FX_FLOAT, 4, ~0u, 133 },
-    { "LightSpecular",     HLSL_CLASS_VECTOR, FX_FLOAT, 4, ~0u, 134 },
-    { "LightAmbient",      HLSL_CLASS_VECTOR, FX_FLOAT, 4, ~0u, 135 },
-    { "LightPosition",     HLSL_CLASS_VECTOR, FX_FLOAT, 3, ~0u, 136 },
-    { "LightDirection",    HLSL_CLASS_VECTOR, FX_FLOAT, 3, ~0u, 137 },
-    { "LightRange",        HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u, 138 },
-    { "LightFalloff",      HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u, 139 },
-    { "LightAttenuation0", HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u, 140 },
-    { "LightAttenuation1", HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u, 141 },
-    { "LightAttenuation2", HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u, 142 },
-    { "LightTheta",        HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u, 143 },
-    { "LightPhi",          HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u, 144 },
-    { "LightEnable",       HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u, 145 },
-
-    { "VertexShader",      HLSL_CLASS_SCALAR, FX_VERTEXSHADER, 1, 1, 146 },
-    { "PixelShader",       HLSL_CLASS_SCALAR, FX_PIXELSHADER,  1, 1, 147 },
-
-    { "VertexShaderConstantF", HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u, 148 },
-    { "VertexShaderConstantB", HLSL_CLASS_SCALAR, FX_BOOL,  1, ~0u, 149 },
-    { "VertexShaderConstantI", HLSL_CLASS_SCALAR, FX_UINT,  1, ~0u, 150 },
-    { "VertexShaderConstant",  HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u, 151 },
-    { "VertexShaderConstant1", HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u, 152 },
-    { "VertexShaderConstant2", HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u, 153 },
-    { "VertexShaderConstant3", HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u, 154 },
-    { "VertexShaderConstant4", HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u, 155 },
-
-    { "PixelShaderConstantF", HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u, 156 },
-    { "PixelShaderConstantB", HLSL_CLASS_SCALAR, FX_BOOL,  1, ~0u, 157 },
-    { "PixelShaderConstantI", HLSL_CLASS_SCALAR, FX_UINT,  1, ~0u, 158 },
-    { "PixelShaderConstant",  HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u, 159 },
-    { "PixelShaderConstant1", HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u, 160 },
-    { "PixelShaderConstant2", HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u, 161 },
-    { "PixelShaderConstant3", HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u, 162 },
-    { "PixelShaderConstant4", HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u, 163 },
-
-    { "Texture",           HLSL_CLASS_SCALAR, FX_TEXTURE, 1, 261, 164 },
-    { "AddressU",          HLSL_CLASS_SCALAR, FX_UINT,    1, 261, 165, fx_2_address_values },
-    { "AddressV",          HLSL_CLASS_SCALAR, FX_UINT,    1, 261, 166, fx_2_address_values },
-    { "AddressW",          HLSL_CLASS_SCALAR, FX_UINT,    1, 261, 167, fx_2_address_values },
-    { "BorderColor",       HLSL_CLASS_SCALAR, FX_UINT,    1, 261, 168 },
-    { "MagFilter",         HLSL_CLASS_SCALAR, FX_UINT,    1, 261, 169, fx_2_filter_values },
-    { "MinFilter",         HLSL_CLASS_SCALAR, FX_UINT,    1, 261, 170, fx_2_filter_values },
-    { "MipFilter",         HLSL_CLASS_SCALAR, FX_UINT,    1, 261, 171, fx_2_filter_values },
-    { "MipMapLodBias",     HLSL_CLASS_SCALAR, FX_UINT,    1, 261, 172 },
-    { "MaxMipLevel",       HLSL_CLASS_SCALAR, FX_UINT,    1, 261, 173 },
-    { "MaxAnisotropy",     HLSL_CLASS_SCALAR, FX_UINT,    1, 261, 174 },
-    { "SRGBTexture",       HLSL_CLASS_SCALAR, FX_UINT,    1, 261, 175 },
-    { "ElementIndex",      HLSL_CLASS_SCALAR, FX_UINT,    1, 261, 176 },
+static const struct fx_state fx_2_pass_states[] =
+{
+    { "ZEnable",          HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 0, fx_2_zenable_values },
+    { "FillMode",         HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 1, fx_2_fillmode_values },
+    { "ShadeMode",        HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 2, fx_2_shademode_values },
+    { "ZWriteEnable",     HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 3 },
+    { "AlphaTestEnable",  HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 4 },
+    { "LastPixel",        HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 5 },
+    { "SrcBlend",         HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 6, fx_2_blendmode_values },
+    { "DestBlend",        HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 7, fx_2_blendmode_values },
+    { "CullMode",         HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 8, fx_2_cullmode_values },
+    { "ZFunc",            HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 9, fx_2_cmpfunc_values },
+    { "AlphaRef",         HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 10 },
+    { "AlphaFunc",        HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 11, fx_2_cmpfunc_values },
+    { "DitherEnable",     HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 12 },
+    { "AlphaBlendEnable", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 13 },
+    { "FogEnable",        HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 14 },
+    { "SpecularEnable",   HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 15 },
+    { "FogColor",         HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 16 },
+    { "FogTableMode",     HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 17, fx_2_fogmode_values },
+    { "FogStart",         HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 18 },
+    { "FogEnd",           HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 19 },
+    { "FogDensity",       HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 20 },
+    { "RangeFogEnable",   HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 21 },
+    { "StencilEnable",    HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 22 },
+    { "StencilFail",      HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 23, fx_2_stencilcaps_values },
+    { "StencilZFail",     HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 24, fx_2_stencilcaps_values },
+    { "StencilPass",      HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 25, fx_2_stencilcaps_values },
+    { "StencilFunc",      HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 26, fx_2_cmpfunc_values },
+    { "StencilRef",       HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 27 },
+    { "StencilMask",      HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 28 },
+    { "StencilWriteMask", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 29 },
+    { "TextureFactor",    HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 30 },
+    { "Wrap0",            HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 31, fx_2_wrap_values },
+    { "Wrap1",            HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 32, fx_2_wrap_values },
+    { "Wrap2",            HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 33, fx_2_wrap_values },
+    { "Wrap3",            HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 34, fx_2_wrap_values },
+    { "Wrap4",            HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 35, fx_2_wrap_values },
+    { "Wrap5",            HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 36, fx_2_wrap_values },
+    { "Wrap6",            HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 37, fx_2_wrap_values },
+    { "Wrap7",            HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 38, fx_2_wrap_values },
+    { "Wrap8",            HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 39, fx_2_wrap_values },
+    { "Wrap9",            HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 40, fx_2_wrap_values },
+    { "Wrap10",           HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 41, fx_2_wrap_values },
+    { "Wrap11",           HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 42, fx_2_wrap_values },
+    { "Wrap12",           HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 43, fx_2_wrap_values },
+    { "Wrap13",           HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 44, fx_2_wrap_values },
+    { "Wrap14",           HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 45, fx_2_wrap_values },
+    { "Wrap15",           HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 46, fx_2_wrap_values },
+    { "Clipping",         HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 47 },
+    { "Lighting",         HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 48 },
+    { "Ambient",          HLSL_CLASS_PASS, HLSL_CLASS_VECTOR, FX_FLOAT, 4, 1, 49 },
+    { "FogVertexMode",    HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 50, fx_2_fogmode_values },
+    { "ColorVertex",      HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 51 },
+    { "LocalViewer",      HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 52 },
+    { "NormalizeNormals", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 53 },
+
+    { "DiffuseMaterialSource",  HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 54, fx_2_materialcolorsource_values },
+    { "SpecularMaterialSource", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 55, fx_2_materialcolorsource_values },
+    { "AmbientMaterialSource",  HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 56, fx_2_materialcolorsource_values },
+    { "EmissiveMaterialSource", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 57, fx_2_materialcolorsource_values },
+
+    { "VertexBlend",       HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 58, fx_2_vertexblend_values },
+    { "ClipPlaneEnable",   HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 59, fx_2_clipplane_values },
+    { "PointSize",         HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 60 },
+    { "PointSize_Min",     HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 61 },
+    { "PointSize_Max",     HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 62 },
+    { "PointSpriteEnable", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 63 },
+    { "PointScaleEnable",  HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 64 },
+    { "PointScale_A",      HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 65 },
+    { "PointScale_B",      HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 66 },
+    { "PointScale_C",      HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 67 },
+
+    { "MultiSampleAntialias",     HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 68 },
+    { "MultiSampleMask",          HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 69 },
+    { "PatchEdgeStyle",           HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 70, fx_2_patchedgestyle_values },
+    { "DebugMonitorToken",        HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 71 },
+    { "IndexedVertexBlendEnable", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 72 },
+    { "ColorWriteEnable",         HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 73, fx_2_colorwriteenable_values },
+    { "TweenFactor",              HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 74 },
+    { "BlendOp",                  HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 75, fx_2_blendop_values },
+    { "PositionDegree",           HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 76, fx_2_degree_values },
+    { "NormalDegree",             HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 77, fx_2_degree_values },
+    { "ScissorTestEnable",        HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 78 },
+    { "SlopeScaleDepthBias",      HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 79 },
+
+    { "AntialiasedLineEnable",     HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 80 },
+    { "MinTessellationLevel",      HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 81 },
+    { "MaxTessellationLevel",      HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 82 },
+    { "AdaptiveTess_X",            HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 83 },
+    { "AdaptiveTess_Y",            HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 84 },
+    { "AdaptiveTess_Z",            HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 85 },
+    { "AdaptiveTess_W",            HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 86 },
+    { "EnableAdaptiveTessellation",HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 87 },
+    { "TwoSidedStencilMode",       HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 88 },
+    { "StencilFail",               HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 89, fx_2_stencilcaps_values },
+    { "StencilZFail",              HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 90, fx_2_stencilcaps_values },
+    { "StencilPass",               HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 91, fx_2_stencilcaps_values },
+    { "StencilFunc",               HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 92, fx_2_cmpfunc_values },
+
+    { "ColorWriteEnable1",        HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 93, fx_2_colorwriteenable_values },
+    { "ColorWriteEnable2",        HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 94, fx_2_colorwriteenable_values },
+    { "ColorWriteEnable3",        HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 95, fx_2_colorwriteenable_values },
+    { "BlendFactor",              HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 96 },
+    { "SRGBWriteEnable",          HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 97 },
+    { "DepthBias",                HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 98 },
+    { "SeparateAlphaBlendEnable", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 99 },
+    { "SrcBlendAlpha",            HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 100, fx_2_blendmode_values },
+    { "DestBlendAlpha",           HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 101, fx_2_blendmode_values },
+    { "BlendOpAlpha",             HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT,  1, 1, 102, fx_2_blendmode_values },
+
+    { "ColorOp",               HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 8, 103, fx_2_textureop_values },
+    { "ColorArg0",             HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 8, 104, fx_2_colorarg_values },
+    { "ColorArg1",             HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 8, 105, fx_2_colorarg_values },
+    { "ColorArg2",             HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 8, 106, fx_2_colorarg_values },
+    { "AlphaOp",               HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 8, 107, fx_2_textureop_values },
+    { "AlphaArg0",             HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 8, 108, fx_2_colorarg_values },
+    { "AlphaArg1",             HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 8, 109, fx_2_colorarg_values },
+    { "AlphaArg2",             HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 8, 110, fx_2_colorarg_values },
+    { "ResultArg",             HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 8, 111, fx_2_colorarg_values },
+    { "BumpEnvMat00",          HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 8, 112 },
+    { "BumpEnvMat01",          HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 8, 113 },
+    { "BumpEnvMat10",          HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 8, 114 },
+    { "BumpEnvMat11",          HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 8, 115 },
+    { "TexCoordIndex",         HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 8, 116 },
+    { "BumpEnvLScale",         HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 8, 117 },
+    { "BumpEnvLOffset",        HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 8, 118 },
+    { "TextureTransformFlags", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 8, 119, fx_2_texturetransform_values },
+    { "Constant",              HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 8, 120 },
+    { "PatchSegments",         HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 121 },
+    { "FVF",                   HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 122 },
+
+    { "ProjectionTransform", HLSL_CLASS_PASS, HLSL_CLASS_MATRIX, FX_FLOAT, 4, 1, 123 },
+    { "ViewTransform",       HLSL_CLASS_PASS, HLSL_CLASS_MATRIX, FX_FLOAT, 4, 1, 124 },
+    { "WorldTransform",      HLSL_CLASS_PASS, HLSL_CLASS_MATRIX, FX_FLOAT, 4, 256, 125 },
+    { "TextureTransform",    HLSL_CLASS_PASS, HLSL_CLASS_MATRIX, FX_FLOAT, 4, 8, 126 },
+
+    { "MaterialDiffuse",   HLSL_CLASS_PASS, HLSL_CLASS_VECTOR, FX_FLOAT, 4, 1, 127 },
+    { "MaterialAmbient",   HLSL_CLASS_PASS, HLSL_CLASS_VECTOR, FX_FLOAT, 4, 1, 128 },
+    { "MaterialSpecular",  HLSL_CLASS_PASS, HLSL_CLASS_VECTOR, FX_FLOAT, 4, 1, 129 },
+    { "MaterialEmissive",  HLSL_CLASS_PASS, HLSL_CLASS_VECTOR, FX_FLOAT, 4, 1, 130 },
+    { "MaterialPower",     HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 131 },
+
+    { "LightType",         HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT,  1, ~0u, 132, fx_2_lighttype_values },
+    { "LightDiffuse",      HLSL_CLASS_PASS, HLSL_CLASS_VECTOR, FX_FLOAT, 4, ~0u, 133 },
+    { "LightSpecular",     HLSL_CLASS_PASS, HLSL_CLASS_VECTOR, FX_FLOAT, 4, ~0u, 134 },
+    { "LightAmbient",      HLSL_CLASS_PASS, HLSL_CLASS_VECTOR, FX_FLOAT, 4, ~0u, 135 },
+    { "LightPosition",     HLSL_CLASS_PASS, HLSL_CLASS_VECTOR, FX_FLOAT, 3, ~0u, 136 },
+    { "LightDirection",    HLSL_CLASS_PASS, HLSL_CLASS_VECTOR, FX_FLOAT, 3, ~0u, 137 },
+    { "LightRange",        HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u, 138 },
+    { "LightFalloff",      HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u, 139 },
+    { "LightAttenuation0", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u, 140 },
+    { "LightAttenuation1", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u, 141 },
+    { "LightAttenuation2", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u, 142 },
+    { "LightTheta",        HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u, 143 },
+    { "LightPhi",          HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u, 144 },
+    { "LightEnable",       HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u, 145 },
+
+    { "VertexShader", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_VERTEXSHADER, 1, 1, 146 },
+    { "PixelShader",  HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_PIXELSHADER,  1, 1, 147 },
+
+    { "VertexShaderConstantF", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u, 148 },
+    { "VertexShaderConstantB", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_BOOL,  1, ~0u, 149 },
+    { "VertexShaderConstantI", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT,  1, ~0u, 150 },
+    { "VertexShaderConstant",  HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u, 151 },
+    { "VertexShaderConstant1", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u, 152 },
+    { "VertexShaderConstant2", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u, 153 },
+    { "VertexShaderConstant3", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u, 154 },
+    { "VertexShaderConstant4", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u, 155 },
+
+    { "PixelShaderConstantF", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u, 156 },
+    { "PixelShaderConstantB", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_BOOL,  1, ~0u, 157 },
+    { "PixelShaderConstantI", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT,  1, ~0u, 158 },
+    { "PixelShaderConstant",  HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u, 159 },
+    { "PixelShaderConstant1", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u, 160 },
+    { "PixelShaderConstant2", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u, 161 },
+    { "PixelShaderConstant3", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u, 162 },
+    { "PixelShaderConstant4", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u, 163 },
+
+    { "Texture",       HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_TEXTURE, 1, 261, 164 },
+    { "AddressU",      HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT,    1, 261, 165, fx_2_address_values },
+    { "AddressV",      HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT,    1, 261, 166, fx_2_address_values },
+    { "AddressW",      HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT,    1, 261, 167, fx_2_address_values },
+    { "BorderColor",   HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT,    1, 261, 168 },
+    { "MagFilter",     HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT,    1, 261, 169, fx_2_filter_values },
+    { "MinFilter",     HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT,    1, 261, 170, fx_2_filter_values },
+    { "MipFilter",     HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT,    1, 261, 171, fx_2_filter_values },
+    { "MipMapLodBias", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT,    1, 261, 172 },
+    { "MaxMipLevel",   HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT,    1, 261, 173 },
+    { "MaxAnisotropy", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT,    1, 261, 174 },
+    { "SRGBTexture",   HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT,    1, 261, 175 },
+    { "ElementIndex",  HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT,    1, 261, 176 },
 };
 
-static const struct fx_2_state fx_2_sampler_states[] =
-{
-    { "Texture",           HLSL_CLASS_SCALAR, FX_TEXTURE, 1, 1, 164 },
-    { "AddressU",          HLSL_CLASS_SCALAR, FX_UINT,    1, 1, 165, fx_2_address_values },
-    { "AddressV",          HLSL_CLASS_SCALAR, FX_UINT,    1, 1, 166, fx_2_address_values },
-    { "AddressW",          HLSL_CLASS_SCALAR, FX_UINT,    1, 1, 167, fx_2_address_values },
-    { "BorderColor",       HLSL_CLASS_SCALAR, FX_UINT,    1, 1, 168 },
-    { "MagFilter",         HLSL_CLASS_SCALAR, FX_UINT,    1, 1, 169, fx_2_filter_values },
-    { "MinFilter",         HLSL_CLASS_SCALAR, FX_UINT,    1, 1, 170, fx_2_filter_values },
-    { "MipFilter",         HLSL_CLASS_SCALAR, FX_UINT,    1, 1, 171, fx_2_filter_values },
-    { "MipMapLodBias",     HLSL_CLASS_SCALAR, FX_UINT,    1, 1, 172 },
-    { "MaxMipLevel",       HLSL_CLASS_SCALAR, FX_UINT,    1, 1, 173 },
-    { "MaxAnisotropy",     HLSL_CLASS_SCALAR, FX_UINT,    1, 1, 174 },
-    { "SRGBTexture",       HLSL_CLASS_SCALAR, FX_UINT,    1, 1, 175 },
-    { "ElementIndex",      HLSL_CLASS_SCALAR, FX_UINT,    1, 1, 176 },
+static const struct fx_state fx_2_sampler_states[] =
+{
+    { "Texture",       HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_TEXTURE, 1, 1, 164 },
+    { "AddressU",      HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT,    1, 1, 165, fx_2_address_values },
+    { "AddressV",      HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT,    1, 1, 166, fx_2_address_values },
+    { "AddressW",      HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT,    1, 1, 167, fx_2_address_values },
+    { "BorderColor",   HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT,    1, 1, 168 },
+    { "MagFilter",     HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT,    1, 1, 169, fx_2_filter_values },
+    { "MinFilter",     HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT,    1, 1, 170, fx_2_filter_values },
+    { "MipFilter",     HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT,    1, 1, 171, fx_2_filter_values },
+    { "MipMapLodBias", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT,    1, 1, 172 },
+    { "MaxMipLevel",   HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT,    1, 1, 173 },
+    { "MaxAnisotropy", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT,    1, 1, 174 },
+    { "SRGBTexture",   HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT,    1, 1, 175 },
+    { "ElementIndex",  HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT,    1, 1, 176 },
 };
 
 static void write_fx_2_pass(struct hlsl_ir_var *var, struct fx_write_context *fx)
@@ -1979,12 +1994,21 @@ static void write_fx_2_annotation(struct hlsl_ir_var *var, struct fx_write_conte
     put_u32(buffer, value_offset);
 }
 
+static void write_fx_2_state_assignment(const struct hlsl_ir_var *var,
+        struct hlsl_state_block_entry *entry, struct fx_write_context *fx)
+{
+    struct hlsl_ctx *ctx = fx->ctx;
+
+    hlsl_fixme(ctx, &var->loc, "Writing fx_2_0 state assignments is not implemented.");
+}
+
 static const struct fx_write_context_ops fx_2_ops =
 {
     .write_string = write_fx_2_string,
     .write_technique = write_fx_2_technique,
     .write_pass = write_fx_2_pass,
     .write_annotation = write_fx_2_annotation,
+    .write_state_assignment = write_fx_2_state_assignment,
 };
 
 static int hlsl_fx_2_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out)
@@ -2047,12 +2071,16 @@ static int hlsl_fx_2_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out)
     return fx_write_context_cleanup(&fx);
 }
 
+static void write_fx_4_state_assignment(const struct hlsl_ir_var *var,
+        struct hlsl_state_block_entry *entry, struct fx_write_context *fx);
+
 static const struct fx_write_context_ops fx_4_ops =
 {
     .write_string = write_fx_4_string,
     .write_technique = write_fx_4_technique,
     .write_pass = write_fx_4_pass,
     .write_annotation = write_fx_4_annotation,
+    .write_state_assignment = write_fx_4_state_assignment,
     .are_child_effects_supported = true,
 };
 
@@ -2660,18 +2688,7 @@ static const struct rhs_named_value null_values[] =
     { NULL }
 };
 
-static const struct fx_4_state
-{
-    const char *name;
-    enum hlsl_type_class container;
-    enum hlsl_type_class class;
-    enum state_property_component_type type;
-    unsigned int dimx;
-    unsigned int array_size;
-    int id;
-    const struct rhs_named_value *values;
-}
-fx_4_states[] =
+static const struct fx_state fx_4_states[] =
 {
     { "RasterizerState",       HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_RASTERIZER,       1, 1, 0 },
     { "DepthStencilState",     HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_DEPTHSTENCIL,     1, 1, 1 },
@@ -2739,7 +2756,7 @@ fx_4_states[] =
     { "ComputeShader",  HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_COMPUTESHADER, 1, 1, 58 },
 };
 
-static const struct fx_4_state fx_5_blend_states[] =
+static const struct fx_state fx_5_blend_states[] =
 {
     { "AlphaToCoverageEnable", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_BOOL,  1, 1, 36, bool_values },
     { "BlendEnable",           HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_BOOL,  1, 8, 37, bool_values },
@@ -2752,45 +2769,61 @@ static const struct fx_4_state fx_5_blend_states[] =
     { "RenderTargetWriteMask", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_UINT8, 1, 8, 44 },
 };
 
-struct fx_4_state_table
+struct fx_state_table
 {
-    const struct fx_4_state *ptr;
+    const struct fx_state *ptr;
     unsigned int count;
 };
 
-static struct fx_4_state_table fx_4_get_state_table(enum hlsl_type_class type_class,
+static struct fx_state_table fx_get_state_table(enum hlsl_type_class type_class,
         unsigned int major, unsigned int minor)
 {
-    struct fx_4_state_table table;
+    struct fx_state_table table;
 
-    if (type_class == HLSL_CLASS_BLEND_STATE && (major == 5 || (major == 4 && minor == 1)))
+    if (major == 2)
     {
-        table.ptr = fx_5_blend_states;
-        table.count = ARRAY_SIZE(fx_5_blend_states);
+        if (type_class == HLSL_CLASS_PASS)
+        {
+            table.ptr = fx_2_pass_states;
+            table.count = ARRAY_SIZE(fx_2_pass_states);
+        }
+        else
+        {
+            table.ptr = fx_2_sampler_states;
+            table.count = ARRAY_SIZE(fx_2_sampler_states);
+        }
     }
     else
     {
-        table.ptr = fx_4_states;
-        table.count = ARRAY_SIZE(fx_4_states);
+        if (type_class == HLSL_CLASS_BLEND_STATE && (major == 5 || (major == 4 && minor == 1)))
+        {
+            table.ptr = fx_5_blend_states;
+            table.count = ARRAY_SIZE(fx_5_blend_states);
+        }
+        else
+        {
+            table.ptr = fx_4_states;
+            table.count = ARRAY_SIZE(fx_4_states);
+        }
     }
 
     return table;
 }
 
-static void resolve_fx_4_state_block_values(struct hlsl_ir_var *var,
+static void resolve_fx_state_block_values(struct hlsl_ir_var *var,
         struct hlsl_state_block_entry *entry, struct fx_write_context *fx)
 {
     const struct hlsl_type *type = hlsl_get_multiarray_element_type(var->data_type);
     struct replace_state_context replace_context;
-    const struct fx_4_state *state = NULL;
+    const struct fx_state *state = NULL;
     struct hlsl_type *state_type = NULL;
     struct hlsl_ctx *ctx = fx->ctx;
     enum hlsl_base_type base_type;
-    struct fx_4_state_table table;
+    struct fx_state_table table;
     struct hlsl_ir_node *node;
     unsigned int i;
 
-    table = fx_4_get_state_table(type->class, ctx->profile->major_version, ctx->profile->minor_version);
+    table = fx_get_state_table(type->class, ctx->profile->major_version, ctx->profile->minor_version);
 
     for (i = 0; i < table.count; ++i)
     {
@@ -3076,21 +3109,34 @@ static unsigned int decompose_fx_4_state_block(struct hlsl_ir_var *var, struct h
     return decompose_fx_4_state_block_expand_array(var, block, entry_index, fx);
 }
 
-static void write_fx_4_state_block(struct hlsl_ir_var *var, unsigned int block_index,
-        uint32_t count_offset, struct fx_write_context *fx)
+static void fx_4_decompose_state_blocks(struct hlsl_ir_var *var, struct fx_write_context *fx)
 {
-    struct vkd3d_bytecode_buffer *buffer = &fx->structured;
+    unsigned int block_count = hlsl_get_multiarray_size(var->data_type);
     struct hlsl_state_block *block;
-    uint32_t i, count = 0;
 
-    if (var->state_blocks)
+    if (!var->state_blocks)
+        return;
+
+    for (unsigned int i = 0; i < block_count; ++i)
     {
-        block = var->state_blocks[block_index];
+        block = var->state_blocks[i];
 
-        for (i = 0; i < block->count;)
+        for (unsigned int j = 0; j < block->count;)
         {
-            i += decompose_fx_4_state_block(var, block, i, fx);
+            j += decompose_fx_4_state_block(var, block, j, fx);
         }
+    }
+}
+
+static uint32_t write_state_block(struct hlsl_ir_var *var, unsigned int block_index,
+        struct fx_write_context *fx)
+{
+    struct hlsl_state_block *block;
+    uint32_t i, count = 0;
+
+    if (var->state_blocks)
+    {
+        block = var->state_blocks[block_index];
 
         for (i = 0; i < block->count; ++i)
         {
@@ -3101,27 +3147,29 @@ static void write_fx_4_state_block(struct hlsl_ir_var *var, unsigned int block_i
                 continue;
 
             /* Resolve special constant names and property names. */
-            resolve_fx_4_state_block_values(var, entry, fx);
+            resolve_fx_state_block_values(var, entry, fx);
 
-            write_fx_4_state_assignment(var, entry, fx);
+            write_state_assignment(var, entry, fx);
             ++count;
         }
     }
 
-    set_u32(buffer, count_offset, count);
+    return count;
 }
 
 static void write_fx_4_state_object_initializer(struct hlsl_ir_var *var, struct fx_write_context *fx)
 {
     uint32_t elements_count = hlsl_get_multiarray_size(var->data_type), i;
     struct vkd3d_bytecode_buffer *buffer = &fx->structured;
-    uint32_t count_offset;
+    uint32_t count_offset, count;
+
+    fx_4_decompose_state_blocks(var, fx);
 
     for (i = 0; i < elements_count; ++i)
     {
         count_offset = put_u32(buffer, 0);
-
-        write_fx_4_state_block(var, i, count_offset, fx);
+        count = write_state_block(var, i, fx);
+        set_u32(buffer, count_offset, count);
     }
 }
 
@@ -3977,17 +4025,13 @@ static void fx_parse_fx_2_annotations(struct fx_parser *parser, uint32_t count)
     vkd3d_string_buffer_printf(&parser->buffer, ">");
 }
 
-static const struct fx_2_state *fx_2_get_state_by_id(enum hlsl_type_class container, uint32_t id)
+static const struct fx_state *fx_2_get_state_by_id(enum hlsl_type_class container, uint32_t id)
 {
-    const struct fx_2_state *table;
-    unsigned int count;
-
-    count = container == HLSL_CLASS_PASS ? ARRAY_SIZE(fx_2_pass_states) : ARRAY_SIZE(fx_2_sampler_states);
-    table = container == HLSL_CLASS_PASS ? fx_2_pass_states : fx_2_sampler_states;
+    struct fx_state_table table = fx_get_state_table(container, 2, 0);
 
     /* State identifiers are sequential, no gaps */
-    if (id >= table[0].id && id <= table[count - 1].id)
-        return &table[id - table[0].id];
+    if (id >= table.ptr[0].id && id <= table.ptr[table.count - 1].id)
+        return &table.ptr[id - table.ptr[0].id];
 
     return NULL;
 }
@@ -3996,7 +4040,7 @@ static void fx_parse_fx_2_assignment(struct fx_parser *parser, enum hlsl_type_cl
         const struct fx_assignment *entry)
 {
     const struct rhs_named_value *named_value = NULL;
-    const struct fx_2_state *state;
+    const struct fx_state *state;
 
     if ((state = fx_2_get_state_by_id(container, entry->id)))
     {
@@ -4700,7 +4744,7 @@ static bool fx_4_object_has_initializer(const struct fx_4_binary_type *type)
 
 static int fx_4_state_id_compare(const void *a, const void *b)
 {
-    const struct fx_4_state *state = b;
+    const struct fx_state *state = b;
     int id = *(int *)a;
 
     return id - state->id;
@@ -5186,12 +5230,12 @@ static void fx_4_parse_state_object_initializer(struct fx_parser *parser, uint32
     };
     const struct rhs_named_value *named_value;
     struct fx_5_shader shader = { 0 };
-    struct fx_4_state_table table;
+    struct fx_state_table table;
     unsigned int shader_type = 0;
     uint32_t i, j, comp_count;
-    struct fx_4_state *state;
+    struct fx_state *state;
 
-    table = fx_4_get_state_table(type_class, parser->version.major, parser->version.minor);
+    table = fx_get_state_table(type_class, parser->version.major, parser->version.minor);
 
     for (i = 0; i < count; ++i)
     {
diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.c b/libs/vkd3d/libs/vkd3d-shader/hlsl.c
index 2b88a04a120..73cd4da906a 100644
--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.c
+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.c
@@ -3531,21 +3531,7 @@ static void dump_deref(struct vkd3d_string_buffer *buffer, const struct hlsl_der
 
 const char *debug_hlsl_writemask(unsigned int writemask)
 {
-    static const char components[] = {'x', 'y', 'z', 'w'};
-    char string[5];
-    unsigned int i = 0, pos = 0;
-
-    VKD3D_ASSERT(!(writemask & ~VKD3DSP_WRITEMASK_ALL));
-
-    while (writemask)
-    {
-        if (writemask & 1)
-            string[pos++] = components[i];
-        writemask >>= 1;
-        i++;
-    }
-    string[pos] = '\0';
-    return vkd3d_dbg_sprintf(".%s", string);
+    return debug_vsir_writemask(writemask);
 }
 
 const char *debug_hlsl_swizzle(uint32_t swizzle, unsigned int size)
diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h
index bb37f0be6cf..369181cada8 100644
--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.h
+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h
@@ -59,8 +59,7 @@ static inline unsigned int hlsl_swizzle_get_component(uint32_t swizzle, unsigned
 
 static inline void hlsl_swizzle_set_component(uint32_t *swizzle, unsigned int idx, unsigned int component)
 {
-    *swizzle &= ~(VKD3D_SHADER_SWIZZLE_MASK << VKD3D_SHADER_SWIZZLE_SHIFT(idx));
-    *swizzle |= component << VKD3D_SHADER_SWIZZLE_SHIFT(idx);
+    vsir_swizzle_set_component(swizzle, idx, component);
 }
 
 enum hlsl_type_class
@@ -309,6 +308,8 @@ struct hlsl_reg
     unsigned int writemask;
     /* Whether the register has been allocated. */
     bool allocated;
+    /* Currently only used for numeric registers. */
+    enum vkd3d_shader_register_type type;
 };
 
 /* Types of instruction nodes for the IR.
@@ -1187,6 +1188,8 @@ struct hlsl_ctx
     } constant_defs;
     /* 'c' registers where the constants expected by SM2 sincos are stored. */
     struct hlsl_reg d3dsincosconst1, d3dsincosconst2;
+    /* Number of allocated SSA IDs, used in translation to vsir. */
+    unsigned int ssa_count;
 
     /* Number of threads to be executed (on the X, Y, and Z dimensions) in a single thread group in
      *   compute shader profiles. It is set using the numthreads() attribute in the entry point. */
diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c
index d4e29e16b7c..04bb2d98b26 100644
--- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c
+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c
@@ -5656,6 +5656,7 @@ static struct hlsl_reg allocate_register(struct hlsl_ctx *ctx, struct register_a
                 unsigned int writemask = hlsl_combine_writemasks(available_writemask,
                         vkd3d_write_mask_from_component_count(reg_size));
 
+                ret.type = VKD3DSPR_TEMP;
                 ret.id = reg_idx;
                 ret.writemask = hlsl_combine_writemasks(writemask,
                         vkd3d_write_mask_from_component_count(component_count));
@@ -5666,6 +5667,7 @@ static struct hlsl_reg allocate_register(struct hlsl_ctx *ctx, struct register_a
         }
     }
 
+    ret.type = VKD3DSPR_TEMP;
     ret.id = allocator->reg_count;
     ret.writemask = vkd3d_write_mask_from_component_count(component_count);
     record_allocation(ctx, allocator, allocator->reg_count,
@@ -5692,6 +5694,7 @@ static struct hlsl_reg allocate_register_with_masks(struct hlsl_ctx *ctx,
 
     record_allocation(ctx, allocator, reg_idx, reg_writemask, first_write, last_read, mode, vip);
 
+    ret.type = VKD3DSPR_TEMP;
     ret.id = reg_idx;
     ret.allocation_size = 1;
     ret.writemask = writemask;
@@ -5737,6 +5740,7 @@ static struct hlsl_reg allocate_range(struct hlsl_ctx *ctx, struct register_allo
         record_allocation(ctx, allocator, reg_idx + (reg_size / 4),
                 (1u << (reg_size % 4)) - 1, first_write, last_read, mode, vip);
 
+    ret.type = VKD3DSPR_TEMP;
     ret.id = reg_idx;
     ret.allocation_size = align(reg_size, 4) / 4;
     ret.allocated = true;
@@ -5757,20 +5761,30 @@ static struct hlsl_reg allocate_numeric_registers_for_type(struct hlsl_ctx *ctx,
         return allocate_range(ctx, allocator, first_write, last_read, reg_size, 0, false);
 }
 
-static const char *debug_register(char class, struct hlsl_reg reg, const struct hlsl_type *type)
+static const char *debug_register(struct hlsl_reg reg, const struct hlsl_type *type)
 {
     static const char writemask_offset[] = {'w','x','y','z'};
     unsigned int reg_size = type->reg_size[HLSL_REGSET_NUMERIC];
+    const char *class = "r";
+
+    if (reg.type == VKD3DSPR_CONST)
+        class = "c";
+    else if (reg.type == VKD3DSPR_INPUT)
+        class = "v";
+    else if (reg.type == VKD3DSPR_OUTPUT)
+        class = "o";
+    else if (reg.type == VKD3DSPR_SSA)
+        class = "sr";
 
     if (reg_size > 4 && !hlsl_type_is_patch_array(type))
     {
         if (reg_size & 3)
-            return vkd3d_dbg_sprintf("%c%u-%c%u.%c", class, reg.id, class, reg.id + (reg_size / 4),
+            return vkd3d_dbg_sprintf("%s%u-%s%u.%c", class, reg.id, class, reg.id + (reg_size / 4),
                     writemask_offset[reg_size & 3]);
 
-        return vkd3d_dbg_sprintf("%c%u-%c%u", class, reg.id, class, reg.id + (reg_size / 4) - 1);
+        return vkd3d_dbg_sprintf("%s%u-%s%u", class, reg.id, class, reg.id + (reg_size / 4) - 1);
     }
-    return vkd3d_dbg_sprintf("%c%u%s", class, reg.id, debug_hlsl_writemask(reg.writemask));
+    return vkd3d_dbg_sprintf("%s%u%s", class, reg.id, debug_hlsl_writemask(reg.writemask));
 }
 
 static bool track_object_components_sampler_dim(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context)
@@ -5910,11 +5924,12 @@ static void allocate_instr_temp_register(struct hlsl_ctx *ctx,
         struct hlsl_ir_node *instr, struct register_allocator *allocator)
 {
     unsigned int reg_writemask = 0, dst_writemask = 0;
+    bool is_per_component = false;
 
     if (instr->reg.allocated || !instr->last_read)
         return;
 
-    if (instr->type == HLSL_IR_EXPR)
+    if (instr->type == HLSL_IR_EXPR && ctx->profile->major_version < 4)
     {
         switch (hlsl_ir_expr(instr)->op)
         {
@@ -5928,20 +5943,42 @@ static void allocate_instr_temp_register(struct hlsl_ctx *ctx,
                 reg_writemask = ctx->profile->major_version < 3 ? (1 << 3) - 1 : VKD3DSP_WRITEMASK_1;
                 break;
 
+            case HLSL_OP1_EXP2:
+            case HLSL_OP1_LOG2:
+            case HLSL_OP1_RCP:
+            case HLSL_OP1_RSQ:
+                /* These ops can only be written one component at a time in sm1,
+                 * so it'll take more than one instruction to fill the variable
+                 * and thus we can't use an SSA.
+                 * FIXME: We should probably handle this by splitting at the vsir
+                 * level instead. */
+                is_per_component = true;
+                break;
+
             default:
                 break;
         }
     }
 
+    VKD3D_ASSERT(instr->data_type->class <= HLSL_CLASS_VECTOR);
+
     if (reg_writemask)
-        instr->reg = allocate_register_with_masks(ctx, allocator, instr->index,
-                instr->last_read, reg_writemask, dst_writemask, 0, false);
-    else
+        instr->reg = allocate_register_with_masks(ctx, allocator,
+                instr->index, instr->last_read, reg_writemask, dst_writemask, 0, false);
+    else if (is_per_component)
         instr->reg = allocate_numeric_registers_for_type(ctx, allocator,
                 instr->index, instr->last_read, instr->data_type);
+    else
+    {
+        instr->reg.writemask = vkd3d_write_mask_from_component_count(instr->data_type->e.numeric.dimx);
+        instr->reg.allocation_size = 1;
+        instr->reg.allocated = true;
+        instr->reg.type = VKD3DSPR_SSA;
+        instr->reg.id = ctx->ssa_count++;
+    }
 
     TRACE("Allocated anonymous expression @%u to %s (liveness %u-%u).\n", instr->index,
-            debug_register('r', instr->reg, instr->data_type), instr->index, instr->last_read);
+            debug_register(instr->reg, instr->data_type), instr->index, instr->last_read);
 }
 
 static void allocate_variable_temp_register(struct hlsl_ctx *ctx,
@@ -5966,8 +6003,8 @@ static void allocate_variable_temp_register(struct hlsl_ctx *ctx,
             var->regs[HLSL_REGSET_NUMERIC] = allocate_numeric_registers_for_type(ctx, allocator,
                     var->first_write, var->last_read, var->data_type);
 
-            TRACE("Allocated %s to %s (liveness %u-%u).\n", var->name, debug_register('r',
-                    var->regs[HLSL_REGSET_NUMERIC], var->data_type), var->first_write, var->last_read);
+            TRACE("Allocated %s to %s (liveness %u-%u).\n", var->name,
+                    debug_register(var->regs[HLSL_REGSET_NUMERIC], var->data_type), var->first_write, var->last_read);
         }
     }
 }
@@ -6051,6 +6088,7 @@ static bool find_constant(struct hlsl_ctx *ctx, const float *f, unsigned int cou
             if ((reg->allocated_mask & writemask) == writemask
                     && !memcmp(f, &reg->value.f[j], count * sizeof(float)))
             {
+                ret->type = VKD3DSPR_CONST;
                 ret->id = reg->index;
                 ret->allocation_size = 1;
                 ret->writemask = writemask;
@@ -6144,12 +6182,13 @@ static void allocate_const_registers_recurse(struct hlsl_ctx *ctx,
                 if (find_constant(ctx, f, type->e.numeric.dimx, &constant->reg))
                 {
                     TRACE("Reusing already allocated constant %s for @%u.\n",
-                            debug_register('c', constant->reg, type), instr->index);
+                            debug_register(constant->reg, type), instr->index);
                     break;
                 }
 
                 constant->reg = allocate_numeric_registers_for_type(ctx, allocator, 1, UINT_MAX, type);
-                TRACE("Allocated constant @%u to %s.\n", instr->index, debug_register('c', constant->reg, type));
+                constant->reg.type = VKD3DSPR_CONST;
+                TRACE("Allocated constant @%u to %s.\n", instr->index, debug_register(constant->reg, type));
 
                 for (unsigned int x = 0, i = 0; x < 4; ++x)
                 {
@@ -6246,14 +6285,16 @@ static void allocate_sincos_const_registers(struct hlsl_ctx *ctx, struct hlsl_bl
             type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, 4);
 
             ctx->d3dsincosconst1 = allocate_numeric_registers_for_type(ctx, allocator, 1, UINT_MAX, type);
-            TRACE("Allocated D3DSINCOSCONST1 to %s.\n", debug_register('c', ctx->d3dsincosconst1, type));
+            ctx->d3dsincosconst1.type = VKD3DSPR_CONST;
+            TRACE("Allocated D3DSINCOSCONST1 to %s.\n", debug_register(ctx->d3dsincosconst1, type));
             record_constant(ctx, ctx->d3dsincosconst1.id * 4 + 0, -1.55009923e-06f, &instr->loc);
             record_constant(ctx, ctx->d3dsincosconst1.id * 4 + 1, -2.17013894e-05f, &instr->loc);
             record_constant(ctx, ctx->d3dsincosconst1.id * 4 + 2,  2.60416674e-03f, &instr->loc);
             record_constant(ctx, ctx->d3dsincosconst1.id * 4 + 3,  2.60416680e-04f, &instr->loc);
 
             ctx->d3dsincosconst2 = allocate_numeric_registers_for_type(ctx, allocator, 1, UINT_MAX, type);
-            TRACE("Allocated D3DSINCOSCONST2 to %s.\n", debug_register('c', ctx->d3dsincosconst2, type));
+            ctx->d3dsincosconst2.type = VKD3DSPR_CONST;
+            TRACE("Allocated D3DSINCOSCONST2 to %s.\n", debug_register(ctx->d3dsincosconst2, type));
             record_constant(ctx, ctx->d3dsincosconst2.id * 4 + 0, -2.08333340e-02f, &instr->loc);
             record_constant(ctx, ctx->d3dsincosconst2.id * 4 + 1, -1.25000000e-01f, &instr->loc);
             record_constant(ctx, ctx->d3dsincosconst2.id * 4 + 2,  1.00000000e+00f, &instr->loc);
@@ -6301,12 +6342,13 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi
                 record_allocation(ctx, &allocator, reg_idx + i, VKD3DSP_WRITEMASK_ALL, 1, UINT_MAX, 0, false);
             }
 
+            var->regs[HLSL_REGSET_NUMERIC].type = VKD3DSPR_CONST;
             var->regs[HLSL_REGSET_NUMERIC].id = reg_idx;
             var->regs[HLSL_REGSET_NUMERIC].allocation_size = reg_size / 4;
             var->regs[HLSL_REGSET_NUMERIC].writemask = VKD3DSP_WRITEMASK_ALL;
             var->regs[HLSL_REGSET_NUMERIC].allocated = true;
             TRACE("Allocated reserved %s to %s.\n", var->name,
-                    debug_register('c', var->regs[HLSL_REGSET_NUMERIC], var->data_type));
+                    debug_register(var->regs[HLSL_REGSET_NUMERIC], var->data_type));
         }
     }
 
@@ -6322,8 +6364,9 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi
         if (!var->regs[HLSL_REGSET_NUMERIC].allocated)
         {
             var->regs[HLSL_REGSET_NUMERIC] = allocate_range(ctx, &allocator, 1, UINT_MAX, alloc_size, 0, false);
+            var->regs[HLSL_REGSET_NUMERIC].type = VKD3DSPR_CONST;
             TRACE("Allocated %s to %s.\n", var->name,
-                    debug_register('c', var->regs[HLSL_REGSET_NUMERIC], var->data_type));
+                    debug_register(var->regs[HLSL_REGSET_NUMERIC], var->data_type));
         }
     }
 
@@ -6519,9 +6562,10 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var
 
         var->regs[HLSL_REGSET_NUMERIC] = allocate_register(ctx, allocator, 1, UINT_MAX,
                 reg_size, component_count, mode, var->force_align, vip_allocation);
+        var->regs[HLSL_REGSET_NUMERIC].type = output ? VKD3DSPR_OUTPUT : VKD3DSPR_INPUT;
 
-        TRACE("Allocated %s to %s (mode %d).\n", var->name, debug_register(output ? 'o' : 'v',
-                var->regs[HLSL_REGSET_NUMERIC], var->data_type), mode);
+        TRACE("Allocated %s to %s (mode %d).\n", var->name,
+                debug_register(var->regs[HLSL_REGSET_NUMERIC], var->data_type), mode);
     }
 }
 
@@ -7733,8 +7777,6 @@ static void validate_and_record_stream_outputs(struct hlsl_ctx *ctx)
             reported_invalid_index = true;
         }
     }
-
-    /* TODO: check that maxvertexcount * outputdatasize <= 1024. */
 }
 
 static void validate_max_output_size(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func,
@@ -8376,7 +8418,7 @@ static void vsir_src_from_hlsl_node(struct vkd3d_shader_src_param *src,
     }
     else
     {
-        vsir_register_init(&src->reg, VKD3DSPR_TEMP, vsir_data_type_from_hlsl_instruction(ctx, instr), 1);
+        vsir_register_init(&src->reg, instr->reg.type, vsir_data_type_from_hlsl_instruction(ctx, instr), 1);
         src->reg.idx[0].offset = instr->reg.id;
         src->reg.dimension = VSIR_DIMENSION_VEC4;
         src->swizzle = generate_vsir_get_src_swizzle(instr->reg.writemask, map_writemask);
@@ -8657,7 +8699,7 @@ static void vsir_dst_from_hlsl_node(struct vkd3d_shader_dst_param *dst,
         struct hlsl_ctx *ctx, const struct hlsl_ir_node *instr)
 {
     VKD3D_ASSERT(instr->reg.allocated);
-    vsir_dst_param_init(dst, VKD3DSPR_TEMP, vsir_data_type_from_hlsl_instruction(ctx, instr), 1);
+    vsir_dst_param_init(dst, instr->reg.type, vsir_data_type_from_hlsl_instruction(ctx, instr), 1);
     dst->reg.idx[0].offset = instr->reg.id;
     dst->reg.dimension = VSIR_DIMENSION_VEC4;
     dst->write_mask = instr->reg.writemask;
@@ -8767,13 +8809,13 @@ static void sm1_generate_vsir_instr_expr_per_component_instr_op(struct hlsl_ctx
                 return;
 
             dst_param = &ins->dst[0];
-            vsir_register_init(&dst_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1);
+            vsir_register_init(&dst_param->reg, instr->reg.type, VKD3D_DATA_FLOAT, 1);
             dst_param->reg.idx[0].offset = instr->reg.id;
             dst_param->reg.dimension = VSIR_DIMENSION_VEC4;
             dst_param->write_mask = 1u << i;
 
             src_param = &ins->src[0];
-            vsir_register_init(&src_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1);
+            vsir_register_init(&src_param->reg, operand->reg.type, VKD3D_DATA_FLOAT, 1);
             src_param->reg.idx[0].offset = operand->reg.id;
             src_param->reg.dimension = VSIR_DIMENSION_VEC4;
             c = vsir_swizzle_get_component(src_swizzle, i);
@@ -9372,7 +9414,7 @@ static void generate_vsir_instr_swizzle(struct hlsl_ctx *ctx,
 
     src_param = &ins->src[0];
     VKD3D_ASSERT(val->type != HLSL_IR_CONSTANT);
-    vsir_register_init(&src_param->reg, VKD3DSPR_TEMP, vsir_data_type_from_hlsl_instruction(ctx, val), 1);
+    vsir_register_init(&src_param->reg, val->reg.type, vsir_data_type_from_hlsl_instruction(ctx, val), 1);
     src_param->reg.idx[0].offset = val->reg.id;
     src_param->reg.dimension = VSIR_DIMENSION_VEC4;
     src_param->swizzle = swizzle;
@@ -9528,6 +9570,7 @@ static void sm1_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl
         return;
     }
 
+    program->ssa_count = 0;
     program->temp_count = allocate_temp_registers(ctx, entry_func);
     if (ctx->result)
         return;
@@ -9540,6 +9583,8 @@ static void sm1_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl
     list_move_head(&entry_func->body.instrs, &block.instrs);
 
     sm1_generate_vsir_block(ctx, &entry_func->body, program);
+
+    program->ssa_count = ctx->ssa_count;
 }
 
 D3DXPARAMETER_CLASS hlsl_sm1_class(const struct hlsl_type *type)
@@ -12323,6 +12368,8 @@ static void sm4_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl
         }
     }
 
+    program->ssa_count = 0;
+
     if (version.type == VKD3D_SHADER_TYPE_HULL)
         generate_vsir_add_program_instruction(ctx, program,
                 &ctx->patch_constant_func->loc, VKD3DSIH_HS_CONTROL_POINT_PHASE, 0, 0);
@@ -12336,6 +12383,8 @@ static void sm4_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl
 
     generate_vsir_scan_required_features(ctx, program);
     generate_vsir_scan_global_flags(ctx, program, func);
+
+    program->ssa_count = ctx->ssa_count;
 }
 
 /* For some reason, for matrices, values from default value initializers end
@@ -13523,6 +13572,19 @@ static void process_entry_function(struct hlsl_ctx *ctx,
     lower_ir(ctx, lower_matrix_swizzles, body);
     lower_ir(ctx, lower_index_loads, body);
 
+    if (entry_func->return_var)
+    {
+        if (profile->type == VKD3D_SHADER_TYPE_GEOMETRY)
+            hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_INCOMPATIBLE_PROFILE,
+                    "Geometry shaders cannot return values.");
+        else if (entry_func->return_var->data_type->class != HLSL_CLASS_STRUCT
+                && !entry_func->return_var->semantic.name)
+            hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_SEMANTIC,
+                    "Entry point \"%s\" is missing a return value semantic.", entry_func->func->name);
+
+        append_output_var_copy(ctx, entry_func, entry_func->return_var);
+    }
+
     for (i = 0; i < entry_func->parameters.count; ++i)
     {
         var = entry_func->parameters.vars[i];
@@ -13627,18 +13689,9 @@ static void process_entry_function(struct hlsl_ctx *ctx,
             }
         }
     }
+
     if (entry_func->return_var)
     {
-        if (profile->type == VKD3D_SHADER_TYPE_GEOMETRY)
-            hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_INCOMPATIBLE_PROFILE,
-                    "Geometry shaders cannot return values.");
-        else if (entry_func->return_var->data_type->class != HLSL_CLASS_STRUCT
-                && !entry_func->return_var->semantic.name)
-            hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_SEMANTIC,
-                    "Entry point \"%s\" is missing a return value semantic.", entry_func->func->name);
-
-        append_output_var_copy(ctx, entry_func, entry_func->return_var);
-
         if (profile->type == VKD3D_SHADER_TYPE_HULL && !ctx->is_patch_constant_func)
             ctx->output_control_point_type = entry_func->return_var->data_type;
     }
@@ -13882,6 +13935,8 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry
                 return ctx->result;
             }
 
+            vsir_program_trace(&program);
+
             result = d3dbc_compile(&program, config_flags, NULL, &ctab, out, ctx->message_context);
             vsir_program_cleanup(&program);
             vkd3d_shader_free_shader_code(&ctab);
@@ -13907,6 +13962,8 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry
                 return ctx->result;
             }
 
+            vsir_program_trace(&program);
+
             result = tpf_compile(&program, config_flags, &rdef, out, ctx->message_context);
             vsir_program_cleanup(&program);
             vkd3d_shader_free_shader_code(&rdef);
diff --git a/libs/vkd3d/libs/vkd3d-shader/ir.c b/libs/vkd3d/libs/vkd3d-shader/ir.c
index 1429c3a8778..8489d0b5ecb 100644
--- a/libs/vkd3d/libs/vkd3d-shader/ir.c
+++ b/libs/vkd3d/libs/vkd3d-shader/ir.c
@@ -503,6 +503,53 @@ bool vsir_signature_find_sysval(const struct shader_signature *signature,
     return false;
 }
 
+const char *debug_vsir_writemask(unsigned int writemask)
+{
+    static const char components[] = {'x', 'y', 'z', 'w'};
+    char string[5];
+    unsigned int i = 0, pos = 0;
+
+    VKD3D_ASSERT(!(writemask & ~VKD3DSP_WRITEMASK_ALL));
+
+    while (writemask)
+    {
+        if (writemask & 1)
+            string[pos++] = components[i];
+        writemask >>= 1;
+        i++;
+    }
+    string[pos] = '\0';
+    return vkd3d_dbg_sprintf(".%s", string);
+}
+
+static unsigned int vsir_combine_write_masks(unsigned int first, unsigned int second)
+{
+    unsigned int ret = 0, j = 0;
+
+    for (unsigned int i = 0; i < VKD3D_VEC4_SIZE; ++i)
+    {
+        if (first & (1u << i))
+        {
+            if (second & (1u << j++))
+                ret |= (1u << i);
+        }
+    }
+
+    return ret;
+}
+
+static uint32_t vsir_combine_swizzles(uint32_t first, uint32_t second)
+{
+    uint32_t ret = 0;
+
+    for (unsigned int i = 0; i < VKD3D_VEC4_SIZE; ++i)
+    {
+        unsigned int s = vsir_swizzle_get_component(second, i);
+        vsir_swizzle_set_component(&ret, i, vsir_swizzle_get_component(first, s));
+    }
+    return ret;
+}
+
 void vsir_register_init(struct vkd3d_shader_register *reg, enum vkd3d_shader_register_type reg_type,
         enum vkd3d_data_type data_type, unsigned int idx_count)
 {
@@ -7725,6 +7772,892 @@ static enum vkd3d_result vsir_program_insert_vertex_fog(struct vsir_program *pro
     return VKD3D_OK;
 }
 
+
+/* Distinguishes between instruction sources which are masked, where the used
+ * components of the source are determined by the write mask, and sources which
+ * are not masked, where the used components are pre-defined.
+ *
+ * E.g. "add r0.yz, r1.xyzw, r2.xyzw" uses the .yz components of r1 and r2, and
+ * therefore those sources are considered "masked", but
+ * "dp3 r0.y, r1.xyzw, r2.xyzw" uses the .xyz components. */
+static bool vsir_src_is_masked(enum vkd3d_shader_opcode opcode, unsigned int src_idx)
+{
+    switch (opcode)
+    {
+        case VKD3DSIH_ABS:
+        case VKD3DSIH_ACOS:
+        case VKD3DSIH_ADD:
+        case VKD3DSIH_AND:
+        case VKD3DSIH_ASIN:
+        case VKD3DSIH_ATAN:
+        case VKD3DSIH_BFI:
+        case VKD3DSIH_BFREV:
+        case VKD3DSIH_CMP:
+        case VKD3DSIH_CND:
+        case VKD3DSIH_COS:
+        case VKD3DSIH_COUNTBITS:
+        case VKD3DSIH_DADD: /* NB: These are masked, but the mask is double-sized. */
+        case VKD3DSIH_DDIV:
+        case VKD3DSIH_DFMA:
+        case VKD3DSIH_DIV:
+        case VKD3DSIH_DMAX:
+        case VKD3DSIH_DMIN:
+        case VKD3DSIH_DMOV:
+        case VKD3DSIH_DMOVC:
+        case VKD3DSIH_DMUL:
+        case VKD3DSIH_DRCP:
+        case VKD3DSIH_DSX:
+        case VKD3DSIH_DSX_COARSE:
+        case VKD3DSIH_DSX_FINE:
+        case VKD3DSIH_DSY:
+        case VKD3DSIH_DSY_COARSE:
+        case VKD3DSIH_DSY_FINE:
+        case VKD3DSIH_EQO:
+        case VKD3DSIH_EQU:
+        case VKD3DSIH_EXP:
+        case VKD3DSIH_EXPP:
+        case VKD3DSIH_F16TOF32:
+        case VKD3DSIH_F32TOF16:
+        case VKD3DSIH_FIRSTBIT_HI:
+        case VKD3DSIH_FIRSTBIT_LO:
+        case VKD3DSIH_FIRSTBIT_SHI:
+        case VKD3DSIH_FRC:
+        case VKD3DSIH_FREM:
+        case VKD3DSIH_FTOD:
+        case VKD3DSIH_FTOI:
+        case VKD3DSIH_FTOU:
+        case VKD3DSIH_GEO:
+        case VKD3DSIH_GEU:
+        case VKD3DSIH_HCOS:
+        case VKD3DSIH_HSIN:
+        case VKD3DSIH_HTAN:
+        case VKD3DSIH_IADD:
+        case VKD3DSIH_IBFE:
+        case VKD3DSIH_IDIV:
+        case VKD3DSIH_IEQ:
+        case VKD3DSIH_IGE:
+        case VKD3DSIH_ILT:
+        case VKD3DSIH_IMAD:
+        case VKD3DSIH_IMAX:
+        case VKD3DSIH_IMIN:
+        case VKD3DSIH_IMUL:
+        case VKD3DSIH_INE:
+        case VKD3DSIH_INEG:
+        case VKD3DSIH_ISFINITE:
+        case VKD3DSIH_ISHL:
+        case VKD3DSIH_ISHR:
+        case VKD3DSIH_ISINF:
+        case VKD3DSIH_ISNAN:
+        case VKD3DSIH_ITOD:
+        case VKD3DSIH_ITOF:
+        case VKD3DSIH_ITOI:
+        case VKD3DSIH_LOG:
+        case VKD3DSIH_LOGP:
+        case VKD3DSIH_LRP:
+        case VKD3DSIH_LTO:
+        case VKD3DSIH_LTU:
+        case VKD3DSIH_MAD:
+        case VKD3DSIH_MAX:
+        case VKD3DSIH_MIN:
+        case VKD3DSIH_MOV:
+        case VKD3DSIH_MOVA:
+        case VKD3DSIH_MOVC:
+        case VKD3DSIH_MSAD: /* FIXME: Is this correct? */
+        case VKD3DSIH_MUL:
+        case VKD3DSIH_NEO:
+        case VKD3DSIH_NEU:
+        case VKD3DSIH_NOT:
+        case VKD3DSIH_OR:
+        case VKD3DSIH_ORD:
+        case VKD3DSIH_PHI:
+        case VKD3DSIH_POW:
+        case VKD3DSIH_QUAD_READ_ACROSS_D:
+        case VKD3DSIH_QUAD_READ_ACROSS_X:
+        case VKD3DSIH_QUAD_READ_ACROSS_Y:
+        case VKD3DSIH_RCP:
+        case VKD3DSIH_ROUND_NE:
+        case VKD3DSIH_ROUND_NI:
+        case VKD3DSIH_ROUND_PI:
+        case VKD3DSIH_ROUND_Z:
+        case VKD3DSIH_RSQ:
+        case VKD3DSIH_SETP:
+        case VKD3DSIH_SGE:
+        case VKD3DSIH_SGN:
+        case VKD3DSIH_SIN:
+        case VKD3DSIH_SINCOS: /* FIXME: Only for sm4. */
+        case VKD3DSIH_SLT:
+        case VKD3DSIH_SQRT:
+        case VKD3DSIH_SUB:
+        case VKD3DSIH_SWAPC:
+        case VKD3DSIH_TAN:
+        case VKD3DSIH_UBFE:
+        case VKD3DSIH_UDIV:
+        case VKD3DSIH_UGE:
+        case VKD3DSIH_ULT:
+        case VKD3DSIH_UMAX:
+        case VKD3DSIH_UMIN:
+        case VKD3DSIH_UMUL:
+        case VKD3DSIH_UNO:
+        case VKD3DSIH_USHR:
+        case VKD3DSIH_UTOD:
+        case VKD3DSIH_UTOF:
+        case VKD3DSIH_UTOU:
+        case VKD3DSIH_WAVE_ACTIVE_ALL_EQUAL:
+        case VKD3DSIH_WAVE_ACTIVE_BIT_AND:
+        case VKD3DSIH_WAVE_ACTIVE_BIT_OR:
+        case VKD3DSIH_WAVE_ACTIVE_BIT_XOR:
+        case VKD3DSIH_WAVE_ALL_TRUE:
+        case VKD3DSIH_WAVE_ANY_TRUE:
+        case VKD3DSIH_WAVE_OP_ADD:
+        case VKD3DSIH_WAVE_OP_IMAX:
+        case VKD3DSIH_WAVE_OP_IMIN:
+        case VKD3DSIH_WAVE_OP_MAX:
+        case VKD3DSIH_WAVE_OP_MIN:
+        case VKD3DSIH_WAVE_OP_MUL:
+        case VKD3DSIH_WAVE_OP_UMAX:
+        case VKD3DSIH_WAVE_OP_UMIN:
+        case VKD3DSIH_WAVE_READ_LANE_FIRST:
+        case VKD3DSIH_XOR:
+            return true;
+
+        /* Atomics can't have a writemask. */
+        case VKD3DSIH_ATOMIC_AND:
+        case VKD3DSIH_ATOMIC_CMP_STORE:
+        case VKD3DSIH_ATOMIC_IADD:
+        case VKD3DSIH_ATOMIC_IMAX:
+        case VKD3DSIH_ATOMIC_IMIN:
+        case VKD3DSIH_ATOMIC_OR:
+        case VKD3DSIH_ATOMIC_UMAX:
+        case VKD3DSIH_ATOMIC_UMIN:
+        case VKD3DSIH_ATOMIC_XOR:
+        case VKD3DSIH_BEM:
+        case VKD3DSIH_BRANCH:
+        case VKD3DSIH_BREAK:
+        case VKD3DSIH_BREAKC:
+        case VKD3DSIH_BREAKP:
+        case VKD3DSIH_BUFINFO:
+        case VKD3DSIH_CALL:
+        case VKD3DSIH_CALLNZ:
+        case VKD3DSIH_CASE:
+        case VKD3DSIH_CHECK_ACCESS_FULLY_MAPPED: /* FIXME: Is this correct? */
+        case VKD3DSIH_CONTINUE:
+        case VKD3DSIH_CONTINUEP:
+        case VKD3DSIH_CRS:
+        case VKD3DSIH_CUT:
+        case VKD3DSIH_CUT_STREAM:
+        case VKD3DSIH_DCL:
+        case VKD3DSIH_DCL_CONSTANT_BUFFER:
+        case VKD3DSIH_DCL_FUNCTION_BODY:
+        case VKD3DSIH_DCL_FUNCTION_TABLE:
+        case VKD3DSIH_DCL_GLOBAL_FLAGS:
+        case VKD3DSIH_DCL_GS_INSTANCES:
+        case VKD3DSIH_DCL_HS_FORK_PHASE_INSTANCE_COUNT:
+        case VKD3DSIH_DCL_HS_JOIN_PHASE_INSTANCE_COUNT:
+        case VKD3DSIH_DCL_HS_MAX_TESSFACTOR:
+        case VKD3DSIH_DCL_IMMEDIATE_CONSTANT_BUFFER:
+        case VKD3DSIH_DCL_INDEXABLE_TEMP:
+        case VKD3DSIH_DCL_INDEX_RANGE:
+        case VKD3DSIH_DCL_INPUT:
+        case VKD3DSIH_DCL_INPUT_CONTROL_POINT_COUNT:
+        case VKD3DSIH_DCL_INPUT_PRIMITIVE:
+        case VKD3DSIH_DCL_INPUT_PS:
+        case VKD3DSIH_DCL_INPUT_PS_SGV:
+        case VKD3DSIH_DCL_INPUT_PS_SIV:
+        case VKD3DSIH_DCL_INPUT_SGV:
+        case VKD3DSIH_DCL_INPUT_SIV:
+        case VKD3DSIH_DCL_INTERFACE:
+        case VKD3DSIH_DCL_OUTPUT:
+        case VKD3DSIH_DCL_OUTPUT_CONTROL_POINT_COUNT:
+        case VKD3DSIH_DCL_OUTPUT_SGV:
+        case VKD3DSIH_DCL_OUTPUT_SIV:
+        case VKD3DSIH_DCL_OUTPUT_TOPOLOGY:
+        case VKD3DSIH_DCL_RESOURCE_RAW:
+        case VKD3DSIH_DCL_RESOURCE_STRUCTURED:
+        case VKD3DSIH_DCL_SAMPLER:
+        case VKD3DSIH_DCL_STREAM:
+        case VKD3DSIH_DCL_TEMPS:
+        case VKD3DSIH_DCL_TESSELLATOR_DOMAIN:
+        case VKD3DSIH_DCL_TESSELLATOR_OUTPUT_PRIMITIVE:
+        case VKD3DSIH_DCL_TESSELLATOR_PARTITIONING:
+        case VKD3DSIH_DCL_TGSM_RAW:
+        case VKD3DSIH_DCL_TGSM_STRUCTURED:
+        case VKD3DSIH_DCL_THREAD_GROUP:
+        case VKD3DSIH_DCL_UAV_RAW:
+        case VKD3DSIH_DCL_UAV_STRUCTURED:
+        case VKD3DSIH_DCL_UAV_TYPED:
+        case VKD3DSIH_DCL_VERTICES_OUT:
+        case VKD3DSIH_DEF:
+        case VKD3DSIH_DEFAULT:
+        case VKD3DSIH_DEFB:
+        case VKD3DSIH_DEFI:
+        case VKD3DSIH_DEQO:
+        case VKD3DSIH_DGEO:
+        case VKD3DSIH_DISCARD:
+        case VKD3DSIH_DLT:
+        case VKD3DSIH_DNE:
+        case VKD3DSIH_DP2:
+        case VKD3DSIH_DP2ADD:
+        case VKD3DSIH_DP3:
+        case VKD3DSIH_DP4:
+        case VKD3DSIH_DST:
+        case VKD3DSIH_DTOF:
+        case VKD3DSIH_DTOI:
+        case VKD3DSIH_DTOU:
+        case VKD3DSIH_ELSE:
+        case VKD3DSIH_EMIT:
+        case VKD3DSIH_EMIT_STREAM:
+        case VKD3DSIH_ENDIF:
+        case VKD3DSIH_ENDLOOP:
+        case VKD3DSIH_ENDREP:
+        case VKD3DSIH_ENDSWITCH:
+        case VKD3DSIH_FCALL:
+        case VKD3DSIH_HS_CONTROL_POINT_PHASE:
+        case VKD3DSIH_HS_DECLS:
+        case VKD3DSIH_HS_FORK_PHASE:
+        case VKD3DSIH_HS_JOIN_PHASE:
+        case VKD3DSIH_IF:
+        case VKD3DSIH_IFC:
+        /* It's unclear if any mapping is done for the source value.
+         * Does it require replicate swizzle? */
+        case VKD3DSIH_IMM_ATOMIC_ALLOC:
+        case VKD3DSIH_IMM_ATOMIC_AND:
+        case VKD3DSIH_IMM_ATOMIC_CMP_EXCH:
+        case VKD3DSIH_IMM_ATOMIC_CONSUME:
+        case VKD3DSIH_IMM_ATOMIC_EXCH:
+        case VKD3DSIH_IMM_ATOMIC_IADD:
+        case VKD3DSIH_IMM_ATOMIC_IMAX:
+        case VKD3DSIH_IMM_ATOMIC_IMIN:
+        case VKD3DSIH_IMM_ATOMIC_OR:
+        case VKD3DSIH_IMM_ATOMIC_UMAX:
+        case VKD3DSIH_IMM_ATOMIC_UMIN:
+        case VKD3DSIH_IMM_ATOMIC_XOR:
+        case VKD3DSIH_LABEL:
+        case VKD3DSIH_LOOP:
+        case VKD3DSIH_LIT:
+        case VKD3DSIH_M3x2:
+        case VKD3DSIH_M3x3:
+        case VKD3DSIH_M3x4:
+        case VKD3DSIH_M4x3:
+        case VKD3DSIH_M4x4:
+        case VKD3DSIH_NOP:
+        /* NRM writemask must be .xyz or .xyzw. */
+        case VKD3DSIH_NRM:
+        case VKD3DSIH_PHASE:
+        case VKD3DSIH_REP:
+        case VKD3DSIH_RET:
+        case VKD3DSIH_RETP:
+        /* Store instructions always require a trivial writemask. */
+        case VKD3DSIH_STORE_RAW:
+        case VKD3DSIH_STORE_STRUCTURED:
+        case VKD3DSIH_STORE_UAV_TYPED:
+        case VKD3DSIH_SWITCH:
+        case VKD3DSIH_SWITCH_MONOLITHIC:
+        case VKD3DSIH_SYNC:
+        case VKD3DSIH_TEX:
+        case VKD3DSIH_TEXBEM:
+        case VKD3DSIH_TEXBEML:
+        case VKD3DSIH_TEXCOORD:
+        case VKD3DSIH_TEXCRD:
+        case VKD3DSIH_TEXDEPTH:
+        case VKD3DSIH_TEXDP3:
+        case VKD3DSIH_TEXDP3TEX:
+        case VKD3DSIH_TEXKILL:
+        case VKD3DSIH_TEXLD:
+        case VKD3DSIH_TEXLDD:
+        case VKD3DSIH_TEXLDL:
+        case VKD3DSIH_TEXM3x2DEPTH:
+        case VKD3DSIH_TEXM3x2PAD:
+        case VKD3DSIH_TEXM3x2TEX:
+        case VKD3DSIH_TEXM3x3:
+        case VKD3DSIH_TEXM3x3DIFF:
+        case VKD3DSIH_TEXM3x3PAD:
+        case VKD3DSIH_TEXM3x3SPEC:
+        case VKD3DSIH_TEXM3x3TEX:
+        case VKD3DSIH_TEXM3x3VSPEC:
+        case VKD3DSIH_TEXREG2AR:
+        case VKD3DSIH_TEXREG2GB:
+        case VKD3DSIH_TEXREG2RGB:
+        case VKD3DSIH_WAVE_ACTIVE_BALLOT:
+        case VKD3DSIH_WAVE_ALL_BIT_COUNT:
+        case VKD3DSIH_WAVE_IS_FIRST_LANE:
+        case VKD3DSIH_WAVE_PREFIX_BIT_COUNT:
+            return false;
+
+        case VKD3DSIH_QUAD_READ_LANE_AT:
+        case VKD3DSIH_WAVE_READ_LANE_AT:
+            return (src_idx == 0);
+
+        /* sm4 resource instructions are an odd case, since they're not actually
+         * per-component. However, the "swizzle" placed on the resource allows
+         * arbitrary destination writemasks to be used.
+         *
+         * This means that for the purposes of the "remapping" done by
+         * temp_allocator_set_dst(), we can basically treat those sources as
+         * "mapped", altering them when we reassign the destination writemask. */
+
+        /* FIXME: The documentation seems to say that these instructions behave
+         * this way, but is it correct?
+         * (It's silent about EVAL_*, but presumably they behave the same way.) */
+        case VKD3DSIH_EVAL_CENTROID:
+        case VKD3DSIH_EVAL_SAMPLE_INDEX:
+        case VKD3DSIH_SAMPLE_INFO:
+        case VKD3DSIH_SAMPLE_POS:
+            return (src_idx == 0);
+        case VKD3DSIH_GATHER4:
+        case VKD3DSIH_GATHER4_C:
+        case VKD3DSIH_GATHER4_C_S:
+        case VKD3DSIH_GATHER4_S:
+        case VKD3DSIH_LD:
+        case VKD3DSIH_LD2DMS:
+        case VKD3DSIH_LD2DMS_S:
+        case VKD3DSIH_LD_RAW:
+        case VKD3DSIH_LD_RAW_S:
+        case VKD3DSIH_LD_S:
+        case VKD3DSIH_LD_UAV_TYPED:
+        case VKD3DSIH_LD_UAV_TYPED_S:
+        case VKD3DSIH_LOD:
+        case VKD3DSIH_RESINFO:
+        case VKD3DSIH_SAMPLE:
+        case VKD3DSIH_SAMPLE_B:
+        case VKD3DSIH_SAMPLE_B_CL_S:
+        case VKD3DSIH_SAMPLE_C:
+        case VKD3DSIH_SAMPLE_CL_S:
+        case VKD3DSIH_SAMPLE_C_CL_S:
+        case VKD3DSIH_SAMPLE_C_LZ:
+        case VKD3DSIH_SAMPLE_C_LZ_S:
+        case VKD3DSIH_SAMPLE_GRAD:
+        case VKD3DSIH_SAMPLE_GRAD_CL_S:
+        case VKD3DSIH_SAMPLE_LOD:
+        case VKD3DSIH_SAMPLE_LOD_S:
+            return (src_idx == 1);
+        case VKD3DSIH_GATHER4_PO:
+        case VKD3DSIH_GATHER4_PO_C:
+        case VKD3DSIH_GATHER4_PO_C_S:
+        case VKD3DSIH_GATHER4_PO_S:
+        case VKD3DSIH_LD_STRUCTURED:
+        case VKD3DSIH_LD_STRUCTURED_S:
+            return (src_idx == 2);
+
+        case VKD3DSIH_INVALID:
+        case VKD3DSIH_COUNT:
+            break;
+    }
+
+    vkd3d_unreachable();
+}
+
+struct liveness_tracker
+{
+    struct liveness_tracker_reg
+    {
+        bool written;
+        bool fixed_mask;
+        uint8_t mask;
+        unsigned int first_write, last_access;
+    } *ssa_regs;
+};
+
+static void liveness_track_src(struct liveness_tracker *tracker,
+        struct vkd3d_shader_src_param *src, unsigned int index)
+{
+    for (unsigned int k = 0; k < src->reg.idx_count; ++k)
+    {
+        if (src->reg.idx[k].rel_addr)
+            liveness_track_src(tracker, src->reg.idx[k].rel_addr, index);
+    }
+
+    if (src->reg.type == VKD3DSPR_SSA)
+        tracker->ssa_regs[src->reg.idx[0].offset].last_access = index;
+}
+
+static void liveness_track_dst(struct liveness_tracker *tracker, struct vkd3d_shader_dst_param *dst,
+        unsigned int index, const struct vkd3d_shader_version *version, enum vkd3d_shader_opcode opcode)
+{
+    struct liveness_tracker_reg *reg;
+
+    for (unsigned int k = 0; k < dst->reg.idx_count; ++k)
+    {
+        if (dst->reg.idx[k].rel_addr)
+            liveness_track_src(tracker, dst->reg.idx[k].rel_addr, index);
+    }
+
+    if (dst->reg.type == VKD3DSPR_SSA)
+        reg = &tracker->ssa_regs[dst->reg.idx[0].offset];
+    else
+        return;
+
+    if (!reg->written)
+        reg->first_write = index;
+    reg->last_access = index;
+    reg->written = true;
+    reg->mask |= dst->write_mask;
+
+    switch (opcode)
+    {
+        case VKD3DSIH_BEM:
+        case VKD3DSIH_CRS:
+        case VKD3DSIH_DST:
+        case VKD3DSIH_LIT:
+        case VKD3DSIH_M3x2:
+        case VKD3DSIH_M3x3:
+        case VKD3DSIH_M3x4:
+        case VKD3DSIH_M4x3:
+        case VKD3DSIH_M4x4:
+        case VKD3DSIH_NRM:
+        case VKD3DSIH_TEX:
+        case VKD3DSIH_TEXBEM:
+        case VKD3DSIH_TEXBEML:
+        case VKD3DSIH_TEXCOORD:
+        case VKD3DSIH_TEXCRD:
+        case VKD3DSIH_TEXDEPTH:
+        case VKD3DSIH_TEXDP3:
+        case VKD3DSIH_TEXDP3TEX:
+        case VKD3DSIH_TEXLD:
+        case VKD3DSIH_TEXLDD:
+        case VKD3DSIH_TEXLDL:
+        case VKD3DSIH_TEXM3x2DEPTH:
+        case VKD3DSIH_TEXM3x2PAD:
+        case VKD3DSIH_TEXM3x2TEX:
+        case VKD3DSIH_TEXM3x3:
+        case VKD3DSIH_TEXM3x3DIFF:
+        case VKD3DSIH_TEXM3x3PAD:
+        case VKD3DSIH_TEXM3x3SPEC:
+        case VKD3DSIH_TEXM3x3TEX:
+        case VKD3DSIH_TEXM3x3VSPEC:
+        case VKD3DSIH_TEXREG2AR:
+        case VKD3DSIH_TEXREG2GB:
+        case VKD3DSIH_TEXREG2RGB:
+            /* All of these instructions have fixed destinations—they can
+             * in some cases be masked, but the destination cannot be
+             * reallocated to a different set of components. */
+        case VKD3DSIH_IDIV:
+        case VKD3DSIH_IMUL:
+        case VKD3DSIH_SWAPC:
+        case VKD3DSIH_UDIV:
+        case VKD3DSIH_UMUL:
+            /* These instructions don't have fixed destinations, but they have
+             * multiple destination and are per-component, meaning that the
+             * destination masks for each component have to match.
+             * This is a bit tricky to pull off, so for now we just force
+             * these to have a fixed mask as well.
+             * This assumes that the destination masks are equal to each other
+             * to begin with! */
+            reg->fixed_mask = true;
+            break;
+
+        case VKD3DSIH_SINCOS:
+            /* sm1 has a fixed destination like LIT, NRM.
+             * sm4 is two-component and masked, like IMUL. */
+            if (version->major < 3)
+            {
+                /* We have the additional constraint here that sincos scratches
+                 * whichever components of .xyz it doesn't write. We can achieve
+                 * this by simply adding those components to reg->mask. */
+                reg->mask |= 0x7;
+            }
+            reg->fixed_mask = true;
+            break;
+
+        default:
+            break;
+    }
+}
+
+static void liveness_tracker_cleanup(struct liveness_tracker *tracker)
+{
+    vkd3d_free(tracker->ssa_regs);
+}
+
+static enum vkd3d_result track_liveness(struct vsir_program *program, struct liveness_tracker *tracker)
+{
+    struct liveness_tracker_reg *regs;
+    unsigned int loop_depth = 0;
+    unsigned int loop_start = 0;
+
+    memset(tracker, 0, sizeof(*tracker));
+
+    if (!(regs = vkd3d_calloc(program->ssa_count, sizeof(*regs))))
+        return VKD3D_ERROR_OUT_OF_MEMORY;
+    tracker->ssa_regs = regs;
+
+    for (unsigned int i = 0; i < program->instructions.count; ++i)
+    {
+        const struct vkd3d_shader_instruction *ins = &program->instructions.elements[i];
+
+        if (ins->opcode == VKD3DSIH_LOOP || ins->opcode == VKD3DSIH_REP)
+        {
+            if (!loop_depth++)
+                loop_start = i;
+        }
+        else if (ins->opcode == VKD3DSIH_ENDLOOP || ins->opcode == VKD3DSIH_ENDREP)
+        {
+            if (!--loop_depth)
+            {
+                /* Go through the allocator, find anything that was touched
+                 * during the loop, and extend its liveness to the whole range
+                 * of the loop.
+                 * This isn't very sophisticated (e.g. we could try to avoid
+                 * this for registers first written inside a loop body and only
+                 * ever read inside one), but many of the cases that matter are
+                 * affected by other optimizations such as copy propagation
+                 * anyway.
+                 *
+                 * This is overkill for SSA registers. If an SSA register is
+                 * written in loop L and last read in L, we don't need to touch
+                 * its liveness. If it's last read in an inferior loop of L, we
+                 * only need to extend its last-read to the end of L. (And it
+                 * should be illegal for an SSA value to be read in a block
+                 * containing L.)
+                 * We don't try to perform this optimization yet, in the name of
+                 * maximal simplicity, and also because this code is intended to
+                 * be extended to non-SSA values. */
+                for (unsigned int j = 0; j < program->ssa_count; ++j)
+                {
+                    struct liveness_tracker_reg *reg = &tracker->ssa_regs[j];
+
+                    if (reg->first_write > loop_start)
+                        reg->first_write = loop_start;
+                    if (reg->last_access < i)
+                        reg->last_access = i;
+                }
+            }
+        }
+
+        for (unsigned int j = 0; j < ins->dst_count; ++j)
+            liveness_track_dst(tracker, &ins->dst[j], i, &program->shader_version, ins->opcode);
+        for (unsigned int j = 0; j < ins->src_count; ++j)
+            liveness_track_src(tracker, &ins->src[j], i);
+    }
+
+    return VKD3D_OK;
+}
+
+struct temp_allocator
+{
+    struct vkd3d_shader_message_context *message_context;
+    struct temp_allocator_reg
+    {
+        uint8_t allocated_mask;
+        uint32_t temp_id;
+    } *ssa_regs;
+    size_t allocated_ssa_count;
+    enum vkd3d_result result;
+};
+
+static uint8_t get_available_writemask(const struct temp_allocator *allocator,
+        struct liveness_tracker *tracker, unsigned int first_write, unsigned int last_access, uint32_t temp_id)
+{
+    uint8_t writemask = VKD3DSP_WRITEMASK_ALL;
+
+    for (size_t i = 0; i < allocator->allocated_ssa_count; ++i)
+    {
+        const struct temp_allocator_reg *reg = &allocator->ssa_regs[i];
+        const struct liveness_tracker_reg *liveness_reg = &tracker->ssa_regs[i];
+
+        /* We do not overlap if first write == last read:
+         * this is the case where we are allocating the result of that
+         * expression, e.g. "add r0, r0, r1". */
+
+        if (reg->temp_id == temp_id
+                && first_write < liveness_reg->last_access
+                && last_access > liveness_reg->first_write)
+            writemask &= ~reg->allocated_mask;
+
+        if (!writemask)
+            return writemask;
+    }
+
+    return writemask;
+}
+
+static void temp_allocator_allocate(struct temp_allocator *allocator, struct liveness_tracker *tracker,
+        struct temp_allocator_reg *reg, const struct liveness_tracker_reg *liveness_reg, uint32_t base_id)
+{
+    if (!liveness_reg->written)
+        return;
+
+    for (uint32_t id = base_id;; ++id)
+    {
+        uint8_t available_mask = get_available_writemask(allocator, tracker,
+                liveness_reg->first_write, liveness_reg->last_access, id);
+
+        if (liveness_reg->fixed_mask)
+        {
+            if ((available_mask & liveness_reg->mask) == liveness_reg->mask)
+            {
+                reg->temp_id = id;
+                reg->allocated_mask = liveness_reg->mask;
+                return;
+            }
+        }
+        else
+        {
+            /* For SSA values the mask is always zero-based and contiguous.
+             * We don't correctly handle cases where it's not, currently. */
+            VKD3D_ASSERT((liveness_reg->mask | (liveness_reg->mask - 1)) == liveness_reg->mask);
+
+            if (vkd3d_popcount(available_mask) >= vkd3d_popcount(liveness_reg->mask))
+            {
+                reg->temp_id = id;
+                reg->allocated_mask = vsir_combine_write_masks(available_mask, liveness_reg->mask);
+                return;
+            }
+        }
+    }
+}
+
+static void temp_allocator_set_src(struct temp_allocator *allocator, struct vkd3d_shader_src_param *src)
+{
+    struct temp_allocator_reg *reg;
+
+    for (unsigned int k = 0; k < src->reg.idx_count; ++k)
+    {
+        if (src->reg.idx[k].rel_addr)
+            temp_allocator_set_src(allocator, src->reg.idx[k].rel_addr);
+    }
+
+    if (src->reg.type == VKD3DSPR_SSA)
+        reg = &allocator->ssa_regs[src->reg.idx[0].offset];
+    else
+        return;
+
+    src->reg.type = VKD3DSPR_TEMP;
+    src->reg.idx[0].offset = reg->temp_id;
+    src->swizzle = vsir_combine_swizzles(vsir_swizzle_from_writemask(reg->allocated_mask), src->swizzle);
+}
+
+static uint32_t vsir_map_swizzle(uint32_t swizzle, unsigned int writemask)
+{
+    unsigned int src_component = 0;
+    uint32_t ret = 0;
+
+    /* Leave replicate swizzles alone; some instructions need them. */
+    if (swizzle == VKD3D_SHADER_SWIZZLE(X, X, X, X)
+            || swizzle == VKD3D_SHADER_SWIZZLE(Y, Y, Y, Y)
+            || swizzle == VKD3D_SHADER_SWIZZLE(Z, Z, Z, Z)
+            || swizzle == VKD3D_SHADER_SWIZZLE(W, W, W, W))
+        return swizzle;
+
+    for (unsigned int dst_component = 0; dst_component < VKD3D_VEC4_SIZE; ++dst_component)
+    {
+        if (writemask & (1u << dst_component))
+            vsir_swizzle_set_component(&ret, dst_component, vsir_swizzle_get_component(swizzle, src_component++));
+    }
+    return ret;
+}
+
+static void vsir_remap_immconst(struct vkd3d_shader_src_param *src, unsigned int writemask)
+{
+    union vsir_immediate_constant prev = src->reg.u;
+    unsigned int src_component = 0;
+
+    for (unsigned int dst_component = 0; dst_component < VKD3D_VEC4_SIZE; ++dst_component)
+    {
+        if (writemask & (1u << dst_component))
+            src->reg.u.immconst_u32[dst_component] = prev.immconst_u32[src_component++];
+    }
+}
+
+static void vsir_remap_immconst64(struct vkd3d_shader_src_param *src, unsigned int writemask)
+{
+    if (writemask == (VKD3DSP_WRITEMASK_2 | VKD3DSP_WRITEMASK_3))
+        src->reg.u.immconst_u64[1] = src->reg.u.immconst_u64[0];
+}
+
+static bool vsir_opcode_is_double(enum vkd3d_shader_opcode opcode)
+{
+    switch (opcode)
+    {
+        case VKD3DSIH_DADD:
+        case VKD3DSIH_DDIV:
+        case VKD3DSIH_DFMA:
+        case VKD3DSIH_DMAX:
+        case VKD3DSIH_DMIN:
+        case VKD3DSIH_DMOV:
+        case VKD3DSIH_DMOVC:
+        case VKD3DSIH_DMUL:
+        case VKD3DSIH_DRCP:
+        case VKD3DSIH_DEQO:
+        case VKD3DSIH_DGEO:
+        case VKD3DSIH_DLT:
+        case VKD3DSIH_DNE:
+        case VKD3DSIH_DTOF:
+        case VKD3DSIH_DTOI:
+        case VKD3DSIH_DTOU:
+        case VKD3DSIH_FTOD:
+            return true;
+
+        default:
+            return false;
+    }
+}
+
+static void temp_allocator_set_dst(struct temp_allocator *allocator,
+        struct vkd3d_shader_dst_param *dst, const struct vkd3d_shader_instruction *ins)
+{
+    struct temp_allocator_reg *reg;
+
+    for (unsigned int k = 0; k < dst->reg.idx_count; ++k)
+    {
+        if (dst->reg.idx[k].rel_addr)
+            temp_allocator_set_src(allocator, dst->reg.idx[k].rel_addr);
+    }
+
+    if (dst->reg.type == VKD3DSPR_SSA)
+        reg = &allocator->ssa_regs[dst->reg.idx[0].offset];
+    else
+        return;
+
+    dst->reg.type = VKD3DSPR_TEMP;
+    dst->reg.idx[0].offset = reg->temp_id;
+    if (reg->allocated_mask != dst->write_mask)
+    {
+        dst->write_mask = reg->allocated_mask;
+
+        if (vsir_opcode_is_double(ins->opcode))
+        {
+            vkd3d_shader_error(allocator->message_context, &ins->location,
+                    VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, "Doubles are not currently handled.");
+            allocator->result = VKD3D_ERROR_NOT_IMPLEMENTED;
+        }
+
+        for (unsigned int i = 0; i < ins->src_count; ++i)
+        {
+            struct vkd3d_shader_src_param *src = &ins->src[i];
+
+            if (vsir_src_is_masked(ins->opcode, i))
+            {
+                if (src->reg.type == VKD3DSPR_IMMCONST)
+                    vsir_remap_immconst(src, dst->write_mask);
+                else if (src->reg.type == VKD3DSPR_IMMCONST64)
+                    vsir_remap_immconst64(src, dst->write_mask);
+                else
+                    src->swizzle = vsir_map_swizzle(src->swizzle, dst->write_mask);
+            }
+        }
+    }
+}
+
+enum vkd3d_result vsir_allocate_temp_registers(struct vsir_program *program,
+        struct vkd3d_shader_message_context *message_context)
+{
+    struct temp_allocator allocator = {0};
+    struct temp_allocator_reg *regs;
+    struct liveness_tracker tracker;
+    uint32_t temp_count = 0;
+    enum vkd3d_result ret;
+
+    if (!program->ssa_count)
+        return VKD3D_OK;
+
+    if ((ret = track_liveness(program, &tracker)))
+        return ret;
+
+    if (!(regs = vkd3d_calloc(program->ssa_count, sizeof(*regs))))
+    {
+        liveness_tracker_cleanup(&tracker);
+        return VKD3D_ERROR_OUT_OF_MEMORY;
+    }
+    allocator.message_context = message_context;
+    allocator.ssa_regs = regs;
+
+    for (unsigned int i = 0; i < program->ssa_count; ++i)
+    {
+        const struct liveness_tracker_reg *liveness_reg = &tracker.ssa_regs[i];
+        struct temp_allocator_reg *reg = &allocator.ssa_regs[i];
+
+        temp_allocator_allocate(&allocator, &tracker, reg, liveness_reg, program->temp_count);
+        TRACE("Allocated r%u%s to sr%u (liveness %u-%u).\n",
+                reg->temp_id, debug_vsir_writemask(reg->allocated_mask), i,
+                liveness_reg->first_write, liveness_reg->last_access);
+        ++allocator.allocated_ssa_count;
+    }
+
+    for (unsigned int i = 0; i < program->instructions.count; ++i)
+    {
+        const struct vkd3d_shader_instruction *ins = &program->instructions.elements[i];
+
+        /* Make sure we do the srcs first; setting the dst writemask may need
+         * to remap their swizzles. */
+        for (unsigned int j = 0; j < ins->src_count; ++j)
+            temp_allocator_set_src(&allocator, &ins->src[j]);
+        for (unsigned int j = 0; j < ins->dst_count; ++j)
+            temp_allocator_set_dst(&allocator, &ins->dst[j], ins);
+    }
+
+    /* Rewrite dcl_temps to reflect the new temp count.
+     * Note that dcl_temps appears once per phase, and should reflect only the
+     * number of temps needed by that phase.
+     * Therefore we iterate backwards through the shader, finding the maximum
+     * register used by any instruction, update the dcl_temps at the beginning
+     * of each phase, and then reset the temp count back to 0 for the next
+     * phase (if any). */
+    for (int i = program->instructions.count - 1; i >= 0; --i)
+    {
+        struct vkd3d_shader_instruction *ins = &program->instructions.elements[i];
+
+        if (ins->opcode == VKD3DSIH_DCL_TEMPS)
+        {
+            ins->declaration.count = temp_count;
+            temp_count = 0;
+            continue;
+        }
+        if (temp_count && program->shader_version.major >= 4
+                && (ins->opcode == VKD3DSIH_HS_CONTROL_POINT_PHASE
+                        || ins->opcode == VKD3DSIH_HS_FORK_PHASE
+                        || ins->opcode == VKD3DSIH_HS_JOIN_PHASE))
+        {
+            /* The phase didn't have a dcl_temps instruction, but we added
+             * temps here, so we need to insert one. */
+            if (!shader_instruction_array_insert_at(&program->instructions, i + 1, 1))
+            {
+                vkd3d_free(regs);
+                liveness_tracker_cleanup(&tracker);
+                return VKD3D_ERROR_OUT_OF_MEMORY;
+            }
+
+            ins = &program->instructions.elements[i + 1];
+            vsir_instruction_init(ins, &program->instructions.elements[i].location, VKD3DSIH_DCL_TEMPS);
+            ins->declaration.count = temp_count;
+            temp_count = 0;
+            continue;
+        }
+
+        /* No need to check sources. If we've produced an unwritten source then
+         * that's a bug somewhere in this pass. */
+        for (unsigned int j = 0; j < ins->dst_count; ++j)
+        {
+            if (ins->dst[j].reg.type == VKD3DSPR_TEMP)
+            {
+                temp_count = max(temp_count, ins->dst[j].reg.idx[0].offset + 1);
+                program->temp_count = max(program->temp_count, temp_count);
+            }
+        }
+    }
+
+    if (temp_count && program->shader_version.major >= 4)
+    {
+        struct vkd3d_shader_instruction *ins;
+
+        if (!shader_instruction_array_insert_at(&program->instructions, 0, 1))
+        {
+            vkd3d_free(regs);
+            liveness_tracker_cleanup(&tracker);
+            return VKD3D_ERROR_OUT_OF_MEMORY;
+        }
+
+        ins = &program->instructions.elements[0];
+        vsir_instruction_init(ins, &program->instructions.elements[1].location, VKD3DSIH_DCL_TEMPS);
+        ins->declaration.count = temp_count;
+    }
+
+    program->ssa_count = 0;
+
+    vkd3d_free(regs);
+    liveness_tracker_cleanup(&tracker);
+    return allocator.result;
+}
+
 struct validation_context
 {
     struct vkd3d_shader_message_context *message_context;
@@ -8280,10 +9213,6 @@ static void vsir_validate_descriptor_indices(struct validation_context *ctx,
 static void vsir_validate_constbuffer_register(struct validation_context *ctx,
         const struct vkd3d_shader_register *reg)
 {
-    if (reg->precision != VKD3D_SHADER_REGISTER_PRECISION_DEFAULT)
-        validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_PRECISION,
-                "Invalid precision %#x for a CONSTBUFFER register.", reg->precision);
-
     if (reg->dimension != VSIR_DIMENSION_VEC4)
         validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION,
                 "Invalid dimension %#x for a CONSTBUFFER register.", reg->dimension);
@@ -8795,6 +9724,11 @@ static void vsir_validate_src_param(struct validation_context *ctx,
         [VKD3DSPSM_BIASNEG] = {F32_BIT},
         [VKD3DSPSM_SIGN]    = {F32_BIT},
         [VKD3DSPSM_SIGNNEG] = {F32_BIT},
+        [VKD3DSPSM_COMP]    = {F32_BIT},
+        [VKD3DSPSM_X2]      = {F32_BIT},
+        [VKD3DSPSM_X2NEG]   = {F32_BIT},
+        [VKD3DSPSM_DZ]      = {F32_BIT},
+        [VKD3DSPSM_DW]      = {F32_BIT},
     };
     vsir_validate_register(ctx, &src->reg);
 
@@ -9455,6 +10389,17 @@ static void vsir_validate_elementwise_operation(struct validation_context *ctx,
     }
 }
 
+static void vsir_validate_double_elementwise_operation(struct validation_context *ctx,
+        const struct vkd3d_shader_instruction *instruction)
+{
+    static const bool types[VKD3D_DATA_COUNT] =
+    {
+        [VKD3D_DATA_DOUBLE] = true,
+    };
+
+    vsir_validate_elementwise_operation(ctx, instruction, types);
+}
+
 static void vsir_validate_float_elementwise_operation(struct validation_context *ctx,
         const struct vkd3d_shader_instruction *instruction)
 {
@@ -9479,6 +10424,52 @@ static void vsir_validate_logic_elementwise_operation(struct validation_context
     vsir_validate_elementwise_operation(ctx, instruction, types);
 }
 
+static void vsir_validate_comparison_operation(struct validation_context *ctx,
+        const struct vkd3d_shader_instruction *instruction, const bool types[VKD3D_DATA_COUNT])
+{
+    enum vkd3d_data_type dst_data_type, src_data_type;
+    unsigned int i;
+
+    if (instruction->dst_count < 1)
+        return;
+
+    dst_data_type = instruction->dst[0].reg.data_type;
+
+    if (dst_data_type != VKD3D_DATA_UINT && dst_data_type != VKD3D_DATA_BOOL)
+        validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE,
+                "Invalid data type %#x for result of comparison operation \"%s\" (%#x).",
+                dst_data_type, vsir_opcode_get_name(instruction->opcode, "<unknown>"), instruction->opcode);
+
+    if (instruction->src_count < 1)
+        return;
+
+    src_data_type = instruction->src[0].reg.data_type;
+
+    if (src_data_type >= VKD3D_DATA_COUNT)
+        return;
+
+    for (i = 1; i < instruction->src_count; ++i)
+    {
+        if (instruction->src[i].reg.data_type != src_data_type)
+            validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE,
+                    "Data type %#x for operand %u doesn't match the first operands data type %#x "
+                    "for comparison operation \"%s\" (%#x).",
+                    instruction->src[i].reg.data_type, i, src_data_type,
+                    vsir_opcode_get_name(instruction->opcode, "<unknown>"), instruction->opcode);
+    }
+}
+
+static void vsir_validate_double_comparison_operation(struct validation_context *ctx,
+        const struct vkd3d_shader_instruction *instruction)
+{
+    static const bool types[VKD3D_DATA_COUNT] =
+    {
+        [VKD3D_DATA_DOUBLE] = true,
+    };
+
+    vsir_validate_comparison_operation(ctx, instruction, types);
+}
+
 static void vsir_validate_branch(struct validation_context *ctx, const struct vkd3d_shader_instruction *instruction)
 {
     size_t i;
@@ -10171,6 +11162,16 @@ static const struct vsir_validator_instruction_desc vsir_validator_instructions[
     [VKD3DSIH_ASIN] =                             {1,   1, vsir_validate_float_elementwise_operation},
     [VKD3DSIH_ATAN] =                             {1,   1, vsir_validate_float_elementwise_operation},
     [VKD3DSIH_BRANCH] =                           {0, ~0u, vsir_validate_branch},
+    [VKD3DSIH_DADD] =                             {1,   2, vsir_validate_double_elementwise_operation},
+    [VKD3DSIH_DDIV] =                             {1,   2, vsir_validate_double_elementwise_operation},
+    [VKD3DSIH_DFMA] =                             {1,   3, vsir_validate_double_elementwise_operation},
+    [VKD3DSIH_DGEO] =                             {1,   2, vsir_validate_double_comparison_operation},
+    [VKD3DSIH_DIV] =                              {1,   2, vsir_validate_float_elementwise_operation},
+    [VKD3DSIH_DLT] =                              {1,   2, vsir_validate_double_comparison_operation},
+    [VKD3DSIH_DMAX] =                             {1,   2, vsir_validate_double_elementwise_operation},
+    [VKD3DSIH_DMIN] =                             {1,   2, vsir_validate_double_elementwise_operation},
+    [VKD3DSIH_DMOV] =                             {1,   1, vsir_validate_double_elementwise_operation},
+    [VKD3DSIH_DMUL] =                             {1,   2, vsir_validate_double_elementwise_operation},
     [VKD3DSIH_HS_CONTROL_POINT_PHASE] =           {0,   0, vsir_validate_hull_shader_phase},
     [VKD3DSIH_HS_DECLS] =                         {0,   0, vsir_validate_hull_shader_phase},
     [VKD3DSIH_HS_FORK_PHASE] =                    {0,   0, vsir_validate_hull_shader_phase},
diff --git a/libs/vkd3d/libs/vkd3d-shader/msl.c b/libs/vkd3d/libs/vkd3d-shader/msl.c
index 08519787b0a..ac9f7412d56 100644
--- a/libs/vkd3d/libs/vkd3d-shader/msl.c
+++ b/libs/vkd3d/libs/vkd3d-shader/msl.c
@@ -55,8 +55,14 @@ struct msl_generator
 
 struct msl_resource_type_info
 {
-    size_t read_coord_size;
+    /* The number of coordinates needed to address/sample the resource type. */
+    size_t coord_size;
+    /* Whether the resource type is an array type. */
     bool array;
+    /* Whether the resource type has a shadow/comparison variant. */
+    bool comparison;
+    /* The type suffix for the resource type. I.e., the "2d_ms" part of
+     * "texture2d_ms_array" or "depth2d_ms_array". */
     const char *type_suffix;
 };
 
@@ -78,17 +84,17 @@ static const struct msl_resource_type_info *msl_get_resource_type_info(enum vkd3
 {
     static const struct msl_resource_type_info info[] =
     {
-        [VKD3D_SHADER_RESOURCE_NONE]              = {0, 0, "none"},
-        [VKD3D_SHADER_RESOURCE_BUFFER]            = {1, 0, "_buffer"},
-        [VKD3D_SHADER_RESOURCE_TEXTURE_1D]        = {1, 0, "1d"},
-        [VKD3D_SHADER_RESOURCE_TEXTURE_2D]        = {2, 0, "2d"},
-        [VKD3D_SHADER_RESOURCE_TEXTURE_2DMS]      = {2, 0, "2d_ms"},
-        [VKD3D_SHADER_RESOURCE_TEXTURE_3D]        = {3, 0, "3d"},
-        [VKD3D_SHADER_RESOURCE_TEXTURE_CUBE]      = {2, 0, "cube"},
-        [VKD3D_SHADER_RESOURCE_TEXTURE_1DARRAY]   = {1, 1, "1d_array"},
-        [VKD3D_SHADER_RESOURCE_TEXTURE_2DARRAY]   = {2, 1, "2d_array"},
-        [VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY] = {2, 1, "2d_ms_array"},
-        [VKD3D_SHADER_RESOURCE_TEXTURE_CUBEARRAY] = {2, 1, "cube_array"},
+        [VKD3D_SHADER_RESOURCE_NONE]              = {0, 0, 0, "none"},
+        [VKD3D_SHADER_RESOURCE_BUFFER]            = {1, 0, 0, "_buffer"},
+        [VKD3D_SHADER_RESOURCE_TEXTURE_1D]        = {1, 0, 0, "1d"},
+        [VKD3D_SHADER_RESOURCE_TEXTURE_2D]        = {2, 0, 1, "2d"},
+        [VKD3D_SHADER_RESOURCE_TEXTURE_2DMS]      = {2, 0, 1, "2d_ms"},
+        [VKD3D_SHADER_RESOURCE_TEXTURE_3D]        = {3, 0, 0, "3d"},
+        [VKD3D_SHADER_RESOURCE_TEXTURE_CUBE]      = {3, 0, 1, "cube"},
+        [VKD3D_SHADER_RESOURCE_TEXTURE_1DARRAY]   = {1, 1, 0, "1d"},
+        [VKD3D_SHADER_RESOURCE_TEXTURE_2DARRAY]   = {2, 1, 1, "2d"},
+        [VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY] = {2, 1, 1, "2d_ms"},
+        [VKD3D_SHADER_RESOURCE_TEXTURE_CUBEARRAY] = {3, 1, 1, "cube"},
     };
 
     if (!t || t >= ARRAY_SIZE(info))
@@ -228,6 +234,35 @@ static const struct vkd3d_shader_descriptor_binding *msl_get_cbv_binding(const s
     return NULL;
 }
 
+static const struct vkd3d_shader_descriptor_binding *msl_get_sampler_binding(const struct msl_generator *gen,
+        unsigned int register_space, unsigned int register_idx)
+{
+    const struct vkd3d_shader_interface_info *interface_info = gen->interface_info;
+    const struct vkd3d_shader_resource_binding *binding;
+    unsigned int i;
+
+    if (!interface_info)
+        return NULL;
+
+    for (i = 0; i < interface_info->binding_count; ++i)
+    {
+        binding = &interface_info->bindings[i];
+
+        if (binding->type != VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER)
+            continue;
+        if (binding->register_space != register_space)
+            continue;
+        if (binding->register_index != register_idx)
+            continue;
+        if (!msl_check_shader_visibility(gen, binding->shader_visibility))
+            continue;
+
+        return &binding->binding;
+    }
+
+    return NULL;
+}
+
 static const struct vkd3d_shader_descriptor_binding *msl_get_srv_binding(const struct msl_generator *gen,
         unsigned int register_space, unsigned int register_idx, enum vkd3d_shader_resource_type resource_type)
 {
@@ -267,11 +302,17 @@ static void msl_print_cbv_name(struct vkd3d_string_buffer *buffer, unsigned int
     vkd3d_string_buffer_printf(buffer, "descriptors[%u].buf<vkd3d_vec4>()", binding);
 }
 
+static void msl_print_sampler_name(struct vkd3d_string_buffer *buffer, unsigned int binding)
+{
+    vkd3d_string_buffer_printf(buffer, "descriptors[%u].as<sampler>()", binding);
+}
+
 static void msl_print_srv_name(struct vkd3d_string_buffer *buffer, struct msl_generator *gen, unsigned int binding,
-        const struct msl_resource_type_info *resource_type_info, enum vkd3d_data_type resource_data_type)
+        const struct msl_resource_type_info *resource_type_info, enum vkd3d_data_type resource_data_type, bool compare)
 {
-    vkd3d_string_buffer_printf(buffer, "descriptors[%u].tex<texture%s<",
-            binding, resource_type_info->type_suffix);
+    vkd3d_string_buffer_printf(buffer, "descriptors[%u].as<%s%s%s<",
+            binding, compare ? "depth" : "texture", resource_type_info->type_suffix,
+            resource_type_info->array ? "_array" : "");
     msl_print_resource_datatype(gen, buffer, resource_data_type);
     vkd3d_string_buffer_printf(buffer, ">>()");
 }
@@ -877,7 +918,7 @@ static void msl_ld(struct msl_generator *gen, const struct vkd3d_shader_instruct
                 "Internal compiler error: Unhandled resource type %#x.", resource_type);
         resource_type_info = msl_get_resource_type_info(VKD3D_SHADER_RESOURCE_TEXTURE_2D);
     }
-    coord_mask = vkd3d_write_mask_from_component_count(resource_type_info->read_coord_size);
+    coord_mask = vkd3d_write_mask_from_component_count(resource_type_info->coord_size);
 
     if ((binding = msl_get_srv_binding(gen, resource_space, resource_idx, resource_type)))
     {
@@ -895,7 +936,7 @@ static void msl_ld(struct msl_generator *gen, const struct vkd3d_shader_instruct
     read = vkd3d_string_buffer_get(&gen->string_buffers);
 
     vkd3d_string_buffer_printf(read, "as_type<uint4>(");
-    msl_print_srv_name(read, gen, srv_binding, resource_type_info, data_type);
+    msl_print_srv_name(read, gen, srv_binding, resource_type_info, data_type, false);
     vkd3d_string_buffer_printf(read, ".read(");
     msl_print_src_with_type(read, gen, &ins->src[0], coord_mask, VKD3D_DATA_UINT);
     if (resource_type_info->array)
@@ -920,6 +961,181 @@ static void msl_ld(struct msl_generator *gen, const struct vkd3d_shader_instruct
     msl_dst_cleanup(&dst, &gen->string_buffers);
 }
 
+static void msl_sample(struct msl_generator *gen, const struct vkd3d_shader_instruction *ins)
+{
+    const struct msl_resource_type_info *resource_type_info;
+    unsigned int resource_id, resource_idx, resource_space;
+    bool bias, compare, comparison_sampler, grad, lod_zero;
+    const struct vkd3d_shader_descriptor_binding *binding;
+    unsigned int sampler_id, sampler_idx, sampler_space;
+    const struct vkd3d_shader_descriptor_info1 *d;
+    enum vkd3d_shader_resource_type resource_type;
+    unsigned int srv_binding, sampler_binding;
+    struct vkd3d_string_buffer *sample;
+    enum vkd3d_data_type data_type;
+    uint32_t coord_mask;
+    struct msl_dst dst;
+
+    bias = ins->opcode == VKD3DSIH_SAMPLE_B;
+    compare = ins->opcode == VKD3DSIH_SAMPLE_C || ins->opcode == VKD3DSIH_SAMPLE_C_LZ;
+    grad = ins->opcode == VKD3DSIH_SAMPLE_GRAD;
+    lod_zero = ins->opcode == VKD3DSIH_SAMPLE_C_LZ;
+
+    if (vkd3d_shader_instruction_has_texel_offset(ins))
+        msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL,
+                "Internal compiler error: Unhandled texel sample offset.");
+
+    if (ins->src[1].reg.idx[0].rel_addr || ins->src[1].reg.idx[1].rel_addr
+            || ins->src[2].reg.idx[0].rel_addr || ins->src[2].reg.idx[1].rel_addr)
+        msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_UNSUPPORTED,
+                "Descriptor indexing is not supported.");
+
+    resource_id = ins->src[1].reg.idx[0].offset;
+    resource_idx = ins->src[1].reg.idx[1].offset;
+    if ((d = vkd3d_shader_find_descriptor(&gen->program->descriptors,
+            VKD3D_SHADER_DESCRIPTOR_TYPE_SRV, resource_id)))
+    {
+        resource_space = d->register_space;
+        resource_type = d->resource_type;
+        data_type = d->resource_data_type;
+    }
+    else
+    {
+        msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL,
+                "Internal compiler error: Undeclared resource descriptor %u.", resource_id);
+        resource_space = 0;
+        resource_type = VKD3D_SHADER_RESOURCE_TEXTURE_2D;
+        data_type = VKD3D_DATA_FLOAT;
+    }
+
+    if (resource_type == VKD3D_SHADER_RESOURCE_BUFFER
+            || resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_2DMS
+            || resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY)
+        msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_UNSUPPORTED,
+                "Sampling resource type %#x is not supported.", resource_type);
+
+    if ((resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_1D || resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_1DARRAY)
+            && (bias || grad || lod_zero))
+        msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_UNSUPPORTED,
+                "Resource type %#x does not support mipmapping.", resource_type);
+
+    if (!(resource_type_info = msl_get_resource_type_info(resource_type)))
+    {
+        msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL,
+                "Internal compiler error: Unhandled resource type %#x.", resource_type);
+        resource_type_info = msl_get_resource_type_info(VKD3D_SHADER_RESOURCE_TEXTURE_2D);
+    }
+    coord_mask = vkd3d_write_mask_from_component_count(resource_type_info->coord_size);
+
+    if ((binding = msl_get_srv_binding(gen, resource_space, resource_idx, resource_type)))
+    {
+        srv_binding = binding->binding;
+    }
+    else
+    {
+        msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_BINDING_NOT_FOUND,
+                "No descriptor binding specified for SRV %u (index %u, space %u).",
+                resource_id, resource_idx, resource_space);
+        srv_binding = 0;
+    }
+
+    sampler_id = ins->src[2].reg.idx[0].offset;
+    sampler_idx = ins->src[2].reg.idx[1].offset;
+    if ((d = vkd3d_shader_find_descriptor(&gen->program->descriptors,
+            VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER, sampler_id)))
+    {
+        sampler_space = d->register_space;
+        comparison_sampler = d->flags & VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_SAMPLER_COMPARISON_MODE;
+
+        if (compare)
+        {
+            if (!comparison_sampler)
+                msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL,
+                        "Internal compiler error: Sampler %u is not a comparison sampler.", sampler_id);
+        }
+        else
+        {
+            if (comparison_sampler)
+                msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL,
+                        "Internal compiler error: Sampler %u is a comparison sampler.", sampler_id);
+        }
+    }
+    else
+    {
+        msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL,
+                "Internal compiler error: Undeclared sampler descriptor %u.", sampler_id);
+        sampler_space = 0;
+    }
+
+    if ((binding = msl_get_sampler_binding(gen, sampler_space, sampler_idx)))
+    {
+        sampler_binding = binding->binding;
+    }
+    else
+    {
+        msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_BINDING_NOT_FOUND,
+                "No descriptor binding specified for sampler %u (index %u, space %u).",
+                sampler_id, sampler_idx, sampler_space);
+        sampler_binding = 0;
+    }
+
+    msl_dst_init(&dst, gen, ins, &ins->dst[0]);
+    sample = vkd3d_string_buffer_get(&gen->string_buffers);
+
+    if (ins->dst[0].reg.data_type == VKD3D_DATA_UINT)
+        vkd3d_string_buffer_printf(sample, "as_type<uint4>(");
+    msl_print_srv_name(sample, gen, srv_binding, resource_type_info, data_type, compare);
+    if (compare)
+        vkd3d_string_buffer_printf(sample, ".sample_compare(");
+    else
+        vkd3d_string_buffer_printf(sample, ".sample(");
+    msl_print_sampler_name(sample, sampler_binding);
+    vkd3d_string_buffer_printf(sample, ", ");
+    msl_print_src_with_type(sample, gen, &ins->src[0], coord_mask, ins->src[0].reg.data_type);
+    if (resource_type_info->array)
+    {
+        vkd3d_string_buffer_printf(sample, ", uint(");
+        msl_print_src_with_type(sample, gen, &ins->src[0], coord_mask + 1, ins->src[0].reg.data_type);
+        vkd3d_string_buffer_printf(sample, ")");
+    }
+    if (compare)
+    {
+        if (!resource_type_info->comparison)
+            msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_UNSUPPORTED,
+                    "Comparison samplers are not supported with resource type %#x.", resource_type);
+        vkd3d_string_buffer_printf(sample, ", ");
+        msl_print_src_with_type(sample, gen, &ins->src[3], VKD3DSP_WRITEMASK_0, ins->src[3].reg.data_type);
+    }
+    if (grad)
+    {
+        vkd3d_string_buffer_printf(sample, ", gradient%s(", resource_type_info->type_suffix);
+        msl_print_src_with_type(sample, gen, &ins->src[3], coord_mask, ins->src[3].reg.data_type);
+        vkd3d_string_buffer_printf(sample, ", ");
+        msl_print_src_with_type(sample, gen, &ins->src[4], coord_mask, ins->src[4].reg.data_type);
+        vkd3d_string_buffer_printf(sample, ")");
+    }
+    if (lod_zero)
+    {
+        vkd3d_string_buffer_printf(sample, ", level(0.0f)");
+    }
+    if (bias)
+    {
+        vkd3d_string_buffer_printf(sample, ", bias(");
+        msl_print_src_with_type(sample, gen, &ins->src[3], VKD3DSP_WRITEMASK_0, ins->src[3].reg.data_type);
+        vkd3d_string_buffer_printf(sample, ")");
+    }
+    vkd3d_string_buffer_printf(sample, ")");
+    if (ins->dst[0].reg.data_type == VKD3D_DATA_UINT)
+        vkd3d_string_buffer_printf(sample, ")");
+    if (!compare)
+        msl_print_swizzle(sample, ins->src[1].swizzle, ins->dst[0].write_mask);
+
+    msl_print_assignment(gen, &dst, "%s", sample->buffer);
+
+    vkd3d_string_buffer_release(&gen->string_buffers, sample);
+    msl_dst_cleanup(&dst, &gen->string_buffers);
+}
+
 static void msl_unary_op(struct msl_generator *gen, const struct vkd3d_shader_instruction *ins, const char *op)
 {
     struct msl_src src;
@@ -1086,6 +1302,13 @@ static void msl_handle_instruction(struct msl_generator *gen, const struct vkd3d
         case VKD3DSIH_FTOU:
             msl_cast(gen, ins, "uint");
             break;
+        case VKD3DSIH_SAMPLE:
+        case VKD3DSIH_SAMPLE_B:
+        case VKD3DSIH_SAMPLE_C:
+        case VKD3DSIH_SAMPLE_C_LZ:
+        case VKD3DSIH_SAMPLE_GRAD:
+            msl_sample(gen, ins);
+            break;
         case VKD3DSIH_GEO:
         case VKD3DSIH_IGE:
             msl_relop(gen, ins, ">=");
@@ -1631,7 +1854,7 @@ static int msl_generator_generate(struct msl_generator *gen, struct vkd3d_shader
                 "    const device void *ptr;\n"
                 "\n"
                 "    template<typename T>\n"
-                "    constant T &tex() constant\n"
+                "    constant T &as() constant\n"
                 "    {\n"
                 "        return reinterpret_cast<constant T &>(this->ptr);\n"
                 "    }\n"
diff --git a/libs/vkd3d/libs/vkd3d-shader/spirv.c b/libs/vkd3d/libs/vkd3d-shader/spirv.c
index 0413cd7c344..2bf6f5d9363 100644
--- a/libs/vkd3d/libs/vkd3d-shader/spirv.c
+++ b/libs/vkd3d/libs/vkd3d-shader/spirv.c
@@ -3522,8 +3522,12 @@ static struct vkd3d_shader_descriptor_binding spirv_compiler_get_descriptor_bind
         goto done;
     }
 
-    resource_type_flag = resource_type == VKD3D_SHADER_RESOURCE_BUFFER
-            ? VKD3D_SHADER_BINDING_FLAG_BUFFER : VKD3D_SHADER_BINDING_FLAG_IMAGE;
+    if (resource_type == VKD3D_SHADER_RESOURCE_NONE)
+        resource_type_flag = 0;
+    else if (resource_type == VKD3D_SHADER_RESOURCE_BUFFER)
+        resource_type_flag = VKD3D_SHADER_BINDING_FLAG_BUFFER;
+    else
+        resource_type_flag = VKD3D_SHADER_BINDING_FLAG_IMAGE;
 
     if (is_uav_counter)
     {
@@ -3567,7 +3571,7 @@ static struct vkd3d_shader_descriptor_binding spirv_compiler_get_descriptor_bind
         {
             const struct vkd3d_shader_resource_binding *current = &shader_interface->bindings[i];
 
-            if (!(current->flags & resource_type_flag))
+            if ((current->flags & resource_type_flag) != resource_type_flag)
                 continue;
 
             if (!spirv_compiler_check_shader_visibility(compiler, current->shader_visibility))
diff --git a/libs/vkd3d/libs/vkd3d-shader/tpf.c b/libs/vkd3d/libs/vkd3d-shader/tpf.c
index ae3fa1650bf..01af2f6ebbd 100644
--- a/libs/vkd3d/libs/vkd3d-shader/tpf.c
+++ b/libs/vkd3d/libs/vkd3d-shader/tpf.c
@@ -4463,6 +4463,9 @@ int tpf_compile(struct vsir_program *program, uint64_t config_flags, const struc
     size_t i;
     int ret;
 
+    if ((ret = vsir_allocate_temp_registers(program, message_context)))
+        return ret;
+
     tpf.program = program;
     tpf.buffer = NULL;
     tpf.stat = &stat;
diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h
index cefd9f753a1..07e4b913e6f 100644
--- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h
+++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h
@@ -1503,6 +1503,8 @@ struct vsir_program
     size_t block_name_count;
 };
 
+enum vkd3d_result vsir_allocate_temp_registers(struct vsir_program *program,
+        struct vkd3d_shader_message_context *message_context);
 void vsir_program_cleanup(struct vsir_program *program);
 int vsir_program_compile(struct vsir_program *program, uint64_t config_flags,
         const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *out,
@@ -1850,6 +1852,8 @@ static inline uint32_t vsir_write_mask_32_from_64(uint32_t write_mask64)
     }
 }
 
+const char *debug_vsir_writemask(unsigned int writemask);
+
 static inline uint32_t vsir_swizzle_64_from_32(uint32_t swizzle32)
 {
     switch (swizzle32)
@@ -1899,6 +1903,12 @@ static inline unsigned int vsir_swizzle_get_component(uint32_t swizzle, unsigned
     return (swizzle >> VKD3D_SHADER_SWIZZLE_SHIFT(idx)) & VKD3D_SHADER_SWIZZLE_MASK;
 }
 
+static inline void vsir_swizzle_set_component(uint32_t *swizzle, unsigned int idx, unsigned int component)
+{
+    *swizzle &= ~(VKD3D_SHADER_SWIZZLE_MASK << VKD3D_SHADER_SWIZZLE_SHIFT(idx));
+    *swizzle |= component << VKD3D_SHADER_SWIZZLE_SHIFT(idx);
+}
+
 static inline unsigned int vkd3d_compact_swizzle(uint32_t swizzle, uint32_t write_mask)
 {
     unsigned int i, compacted_swizzle = 0;
-- 
2.47.2