diff --git a/patches/vkd3d-latest/0001-Updated-vkd3d-to-166dc24b2f73b0541a14815081ee4c8d9ea.patch b/patches/vkd3d-latest/0001-Updated-vkd3d-to-166dc24b2f73b0541a14815081ee4c8d9ea.patch index 4812bc1c..db0afa0c 100644 --- a/patches/vkd3d-latest/0001-Updated-vkd3d-to-166dc24b2f73b0541a14815081ee4c8d9ea.patch +++ b/patches/vkd3d-latest/0001-Updated-vkd3d-to-166dc24b2f73b0541a14815081ee4c8d9ea.patch @@ -1,4 +1,4 @@ -From c1f714e2b6dd6ea60e58506a85237fb3adee2467 Mon Sep 17 00:00:00 2001 +From a75eeda1df719b1399d103375cd68f2d983ed19d Mon Sep 17 00:00:00 2001 From: Alistair Leslie-Hughes Date: Thu, 7 Mar 2024 10:40:41 +1100 Subject: [PATCH] Updated vkd3d to 166dc24b2f73b0541a14815081ee4c8d9eab3269. diff --git a/patches/vkd3d-latest/0002-Updated-vkd3d-to-b1eaf8327bf59b516f80e232e86332473ed.patch b/patches/vkd3d-latest/0002-Updated-vkd3d-to-b1eaf8327bf59b516f80e232e86332473ed.patch index be871b78..88952ab6 100644 --- a/patches/vkd3d-latest/0002-Updated-vkd3d-to-b1eaf8327bf59b516f80e232e86332473ed.patch +++ b/patches/vkd3d-latest/0002-Updated-vkd3d-to-b1eaf8327bf59b516f80e232e86332473ed.patch @@ -1,4 +1,4 @@ -From b2202d65c0c1ea4fcd083e4fbacb7dfc1c2d7bca Mon Sep 17 00:00:00 2001 +From 5707fcd6181912494fa9dc59a21e6c906376f51f Mon Sep 17 00:00:00 2001 From: Alistair Leslie-Hughes Date: Thu, 28 Mar 2024 10:39:27 +1100 Subject: [PATCH] Updated vkd3d to b1eaf8327bf59b516f80e232e86332473ed97edc. diff --git a/patches/vkd3d-latest/0003-Updated-vkd3d-to-4b0a328a2b58a86e3529ddcc2cdc785a086.patch b/patches/vkd3d-latest/0003-Updated-vkd3d-to-4b0a328a2b58a86e3529ddcc2cdc785a086.patch index cb531e01..6fe0f391 100644 --- a/patches/vkd3d-latest/0003-Updated-vkd3d-to-4b0a328a2b58a86e3529ddcc2cdc785a086.patch +++ b/patches/vkd3d-latest/0003-Updated-vkd3d-to-4b0a328a2b58a86e3529ddcc2cdc785a086.patch @@ -1,4 +1,4 @@ -From 5785aacc06fe654c69632c9f95449015e379fa24 Mon Sep 17 00:00:00 2001 +From f5a216ff65b1a44151f1f2d7431922d11c311bb7 Mon Sep 17 00:00:00 2001 From: Alistair Leslie-Hughes Date: Thu, 4 Apr 2024 09:47:35 +1100 Subject: [PATCH] Updated vkd3d to 4b0a328a2b58a86e3529ddcc2cdc785a08625f81. diff --git a/patches/vkd3d-latest/0004-Updated-vkd3d-to-9c0d04c86204fd360a7528faf2b53acc730.patch b/patches/vkd3d-latest/0004-Updated-vkd3d-to-9c0d04c86204fd360a7528faf2b53acc730.patch index ea5b5f49..c7e685aa 100644 --- a/patches/vkd3d-latest/0004-Updated-vkd3d-to-9c0d04c86204fd360a7528faf2b53acc730.patch +++ b/patches/vkd3d-latest/0004-Updated-vkd3d-to-9c0d04c86204fd360a7528faf2b53acc730.patch @@ -1,4 +1,4 @@ -From 315fe78a04bc6e851f29d38ab3ab82e725967ae1 Mon Sep 17 00:00:00 2001 +From 2af18a23afa158aa15c4655ee5ea87fdd7450b72 Mon Sep 17 00:00:00 2001 From: Alistair Leslie-Hughes Date: Fri, 5 Apr 2024 08:20:45 +1100 Subject: [PATCH] Updated vkd3d to 9c0d04c86204fd360a7528faf2b53acc7301b598. diff --git a/patches/vkd3d-latest/0005-Updated-vkd3d-to-7d6f0f2592a8aedf749c2dff36ea330e9cc.patch b/patches/vkd3d-latest/0005-Updated-vkd3d-to-7d6f0f2592a8aedf749c2dff36ea330e9cc.patch new file mode 100644 index 00000000..0326e249 --- /dev/null +++ b/patches/vkd3d-latest/0005-Updated-vkd3d-to-7d6f0f2592a8aedf749c2dff36ea330e9cc.patch @@ -0,0 +1,5531 @@ +From 79df439b6169fac8d43c95411edbad8ee5a2c482 Mon Sep 17 00:00:00 2001 +From: Alistair Leslie-Hughes +Date: Tue, 16 Apr 2024 12:05:29 +1000 +Subject: [PATCH] Updated vkd3d to 7d6f0f2592a8aedf749c2dff36ea330e9ccb49d1. + +--- + libs/vkd3d/Makefile.in | 1 + + libs/vkd3d/include/private/vkd3d_common.h | 6 + + libs/vkd3d/libs/vkd3d-shader/d3d_asm.c | 528 +++--- + libs/vkd3d/libs/vkd3d-shader/d3dbc.c | 135 +- + libs/vkd3d/libs/vkd3d-shader/dxil.c | 158 ++ + libs/vkd3d/libs/vkd3d-shader/fx.c | 150 +- + libs/vkd3d/libs/vkd3d-shader/hlsl.c | 104 +- + libs/vkd3d/libs/vkd3d-shader/hlsl.h | 77 +- + libs/vkd3d/libs/vkd3d-shader/hlsl.y | 495 ++++-- + libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 134 +- + .../libs/vkd3d-shader/hlsl_constant_ops.c | 23 +- + libs/vkd3d/libs/vkd3d-shader/ir.c | 1451 ++++++++++------- + libs/vkd3d/libs/vkd3d-shader/spirv.c | 35 +- + libs/vkd3d/libs/vkd3d-shader/tpf.c | 35 +- + .../libs/vkd3d-shader/vkd3d_shader_private.h | 25 + + libs/vkd3d/libs/vkd3d/cache.c | 59 + + libs/vkd3d/libs/vkd3d/device.c | 62 + + libs/vkd3d/libs/vkd3d/resource.c | 12 + + libs/vkd3d/libs/vkd3d/state.c | 28 +- + libs/vkd3d/libs/vkd3d/vkd3d_private.h | 7 + + 20 files changed, 2329 insertions(+), 1196 deletions(-) + create mode 100644 libs/vkd3d/libs/vkd3d/cache.c + +diff --git a/libs/vkd3d/Makefile.in b/libs/vkd3d/Makefile.in +index 448e9a0e61d..94e4833dc9a 100644 +--- a/libs/vkd3d/Makefile.in ++++ b/libs/vkd3d/Makefile.in +@@ -30,6 +30,7 @@ SOURCES = \ + libs/vkd3d-shader/spirv.c \ + libs/vkd3d-shader/tpf.c \ + libs/vkd3d-shader/vkd3d_shader_main.c \ ++ libs/vkd3d/cache.c \ + libs/vkd3d/command.c \ + libs/vkd3d/device.c \ + libs/vkd3d/resource.c \ +diff --git a/libs/vkd3d/include/private/vkd3d_common.h b/libs/vkd3d/include/private/vkd3d_common.h +index 1cc8ecc38f3..f9df47d339c 100644 +--- a/libs/vkd3d/include/private/vkd3d_common.h ++++ b/libs/vkd3d/include/private/vkd3d_common.h +@@ -438,6 +438,12 @@ struct vkd3d_mutex + #endif + }; + ++#ifdef _WIN32 ++#define VKD3D_MUTEX_INITIALIZER {{NULL, -1, 0, 0, 0, 0}} ++#else ++#define VKD3D_MUTEX_INITIALIZER PTHREAD_MUTEX_INITIALIZER ++#endif ++ + static inline void vkd3d_mutex_init(struct vkd3d_mutex *lock) + { + #ifdef _WIN32 +diff --git a/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c +index 2b5feb94103..459fdfc9abf 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c ++++ b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c +@@ -330,37 +330,6 @@ static const char * const shader_opcode_names[] = + [VKD3DSIH_XOR ] = "xor", + }; + +-static const struct +-{ +- enum vkd3d_shader_input_sysval_semantic sysval_semantic; +- const char *sysval_name; +-} +-shader_input_sysval_semantic_names[] = +-{ +- {VKD3D_SIV_POSITION, "position"}, +- {VKD3D_SIV_CLIP_DISTANCE, "clip_distance"}, +- {VKD3D_SIV_CULL_DISTANCE, "cull_distance"}, +- {VKD3D_SIV_RENDER_TARGET_ARRAY_INDEX, "render_target_array_index"}, +- {VKD3D_SIV_VIEWPORT_ARRAY_INDEX, "viewport_array_index"}, +- {VKD3D_SIV_VERTEX_ID, "vertex_id"}, +- {VKD3D_SIV_INSTANCE_ID, "instance_id"}, +- {VKD3D_SIV_PRIMITIVE_ID, "primitive_id"}, +- {VKD3D_SIV_IS_FRONT_FACE, "is_front_face"}, +- {VKD3D_SIV_SAMPLE_INDEX, "sample_index"}, +- {VKD3D_SIV_QUAD_U0_TESS_FACTOR, "finalQuadUeq0EdgeTessFactor"}, +- {VKD3D_SIV_QUAD_V0_TESS_FACTOR, "finalQuadVeq0EdgeTessFactor"}, +- {VKD3D_SIV_QUAD_U1_TESS_FACTOR, "finalQuadUeq1EdgeTessFactor"}, +- {VKD3D_SIV_QUAD_V1_TESS_FACTOR, "finalQuadVeq1EdgeTessFactor"}, +- {VKD3D_SIV_QUAD_U_INNER_TESS_FACTOR, "finalQuadUInsideTessFactor"}, +- {VKD3D_SIV_QUAD_V_INNER_TESS_FACTOR, "finalQuadVInsideTessFactor"}, +- {VKD3D_SIV_TRIANGLE_U_TESS_FACTOR, "finalTriUeq0EdgeTessFactor"}, +- {VKD3D_SIV_TRIANGLE_V_TESS_FACTOR, "finalTriVeq0EdgeTessFactor"}, +- {VKD3D_SIV_TRIANGLE_W_TESS_FACTOR, "finalTriWeq0EdgeTessFactor"}, +- {VKD3D_SIV_TRIANGLE_INNER_TESS_FACTOR, "finalTriInsideTessFactor"}, +- {VKD3D_SIV_LINE_DETAIL_TESS_FACTOR, "finalLineDetailTessFactor"}, +- {VKD3D_SIV_LINE_DENSITY_TESS_FACTOR, "finalLineDensityTessFactor"}, +-}; +- + struct vkd3d_d3d_asm_colours + { + const char *reset; +@@ -615,21 +584,54 @@ static void shader_print_tessellator_partitioning(struct vkd3d_d3d_asm_compiler + vkd3d_string_buffer_printf(buffer, "%s%s%s", prefix, partitioning, suffix); + } + +-static void shader_dump_shader_input_sysval_semantic(struct vkd3d_d3d_asm_compiler *compiler, +- enum vkd3d_shader_input_sysval_semantic semantic) ++static void shader_print_input_sysval_semantic(struct vkd3d_d3d_asm_compiler *compiler, ++ const char *prefix, enum vkd3d_shader_input_sysval_semantic semantic, const char *suffix) + { + unsigned int i; + ++ static const struct ++ { ++ enum vkd3d_shader_input_sysval_semantic sysval_semantic; ++ const char *sysval_name; ++ } ++ shader_input_sysval_semantic_names[] = ++ { ++ {VKD3D_SIV_POSITION, "position"}, ++ {VKD3D_SIV_CLIP_DISTANCE, "clip_distance"}, ++ {VKD3D_SIV_CULL_DISTANCE, "cull_distance"}, ++ {VKD3D_SIV_RENDER_TARGET_ARRAY_INDEX, "render_target_array_index"}, ++ {VKD3D_SIV_VIEWPORT_ARRAY_INDEX, "viewport_array_index"}, ++ {VKD3D_SIV_VERTEX_ID, "vertex_id"}, ++ {VKD3D_SIV_INSTANCE_ID, "instance_id"}, ++ {VKD3D_SIV_PRIMITIVE_ID, "primitive_id"}, ++ {VKD3D_SIV_IS_FRONT_FACE, "is_front_face"}, ++ {VKD3D_SIV_SAMPLE_INDEX, "sample_index"}, ++ {VKD3D_SIV_QUAD_U0_TESS_FACTOR, "finalQuadUeq0EdgeTessFactor"}, ++ {VKD3D_SIV_QUAD_V0_TESS_FACTOR, "finalQuadVeq0EdgeTessFactor"}, ++ {VKD3D_SIV_QUAD_U1_TESS_FACTOR, "finalQuadUeq1EdgeTessFactor"}, ++ {VKD3D_SIV_QUAD_V1_TESS_FACTOR, "finalQuadVeq1EdgeTessFactor"}, ++ {VKD3D_SIV_QUAD_U_INNER_TESS_FACTOR, "finalQuadUInsideTessFactor"}, ++ {VKD3D_SIV_QUAD_V_INNER_TESS_FACTOR, "finalQuadVInsideTessFactor"}, ++ {VKD3D_SIV_TRIANGLE_U_TESS_FACTOR, "finalTriUeq0EdgeTessFactor"}, ++ {VKD3D_SIV_TRIANGLE_V_TESS_FACTOR, "finalTriVeq0EdgeTessFactor"}, ++ {VKD3D_SIV_TRIANGLE_W_TESS_FACTOR, "finalTriWeq0EdgeTessFactor"}, ++ {VKD3D_SIV_TRIANGLE_INNER_TESS_FACTOR, "finalTriInsideTessFactor"}, ++ {VKD3D_SIV_LINE_DETAIL_TESS_FACTOR, "finalLineDetailTessFactor"}, ++ {VKD3D_SIV_LINE_DENSITY_TESS_FACTOR, "finalLineDensityTessFactor"}, ++ }; ++ + for (i = 0; i < ARRAY_SIZE(shader_input_sysval_semantic_names); ++i) + { +- if (shader_input_sysval_semantic_names[i].sysval_semantic == semantic) +- { +- vkd3d_string_buffer_printf(&compiler->buffer, "%s", shader_input_sysval_semantic_names[i].sysval_name); +- return; +- } ++ if (shader_input_sysval_semantic_names[i].sysval_semantic != semantic) ++ continue; ++ ++ vkd3d_string_buffer_printf(&compiler->buffer, "%s%s%s", ++ prefix, shader_input_sysval_semantic_names[i].sysval_name, suffix); ++ return; + } + +- vkd3d_string_buffer_printf(&compiler->buffer, "unknown_shader_input_sysval_semantic(%#x)", semantic); ++ vkd3d_string_buffer_printf(&compiler->buffer, "%s%s%s%s", ++ prefix, compiler->colours.error, semantic, compiler->colours.reset, suffix); + } + + static void shader_dump_resource_type(struct vkd3d_d3d_asm_compiler *compiler, enum vkd3d_shader_resource_type type) +@@ -704,124 +706,129 @@ static void shader_dump_resource_data_type(struct vkd3d_d3d_asm_compiler *compil + vkd3d_string_buffer_printf(&compiler->buffer, ")"); + } + +-static void shader_dump_decl_usage(struct vkd3d_d3d_asm_compiler *compiler, +- const struct vkd3d_shader_semantic *semantic, uint32_t flags) ++static void shader_print_dcl_usage(struct vkd3d_d3d_asm_compiler *compiler, ++ const char *prefix, const struct vkd3d_shader_semantic *semantic, uint32_t flags, const char *suffix) + { + struct vkd3d_string_buffer *buffer = &compiler->buffer; ++ unsigned int usage_idx; ++ const char *usage; ++ bool indexed; + + if (semantic->resource.reg.reg.type == VKD3DSPR_COMBINED_SAMPLER) + { + switch (semantic->resource_type) + { + case VKD3D_SHADER_RESOURCE_TEXTURE_2D: +- shader_addline(buffer, "_2d"); ++ usage = "2d"; + break; +- + case VKD3D_SHADER_RESOURCE_TEXTURE_3D: +- shader_addline(buffer, "_volume"); ++ usage = "volume"; + break; +- + case VKD3D_SHADER_RESOURCE_TEXTURE_CUBE: +- shader_addline(buffer, "_cube"); ++ usage = "cube"; + break; +- + default: +- shader_addline(buffer, "_unknown_resource_type(%#x)", semantic->resource_type); +- break; ++ vkd3d_string_buffer_printf(buffer, "%s%s%s%s", ++ prefix, compiler->colours.error, semantic->resource_type, compiler->colours.reset, suffix); ++ return; + } ++ ++ vkd3d_string_buffer_printf(buffer, "%s%s%s", prefix, usage, suffix); ++ return; + } +- else if (semantic->resource.reg.reg.type == VKD3DSPR_RESOURCE || semantic->resource.reg.reg.type == VKD3DSPR_UAV) ++ ++ if (semantic->resource.reg.reg.type == VKD3DSPR_RESOURCE || semantic->resource.reg.reg.type == VKD3DSPR_UAV) + { ++ vkd3d_string_buffer_printf(buffer, "%s", prefix); + if (semantic->resource.reg.reg.type == VKD3DSPR_RESOURCE) +- shader_addline(buffer, "_resource"); ++ vkd3d_string_buffer_printf(buffer, "resource_"); + +- shader_addline(buffer, "_"); + shader_dump_resource_type(compiler, semantic->resource_type); + if (semantic->resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_2DMS + || semantic->resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY) + { +- shader_addline(buffer, "(%u)", semantic->sample_count); ++ vkd3d_string_buffer_printf(buffer, "(%u)", semantic->sample_count); + } + if (semantic->resource.reg.reg.type == VKD3DSPR_UAV) + shader_dump_uav_flags(compiler, flags); +- shader_addline(buffer, " "); ++ vkd3d_string_buffer_printf(buffer, " "); + shader_dump_resource_data_type(compiler, semantic->resource_data_type); ++ vkd3d_string_buffer_printf(buffer, "%s", suffix); ++ return; + } +- else +- { +- /* Pixel shaders 3.0 don't have usage semantics. */ +- if (!vkd3d_shader_ver_ge(&compiler->shader_version, 3, 0) +- && compiler->shader_version.type == VKD3D_SHADER_TYPE_PIXEL) +- return; +- else +- shader_addline(buffer, "_"); +- +- switch (semantic->usage) +- { +- case VKD3D_DECL_USAGE_POSITION: +- shader_addline(buffer, "position%u", semantic->usage_idx); +- break; +- +- case VKD3D_DECL_USAGE_BLEND_INDICES: +- shader_addline(buffer, "blend"); +- break; +- +- case VKD3D_DECL_USAGE_BLEND_WEIGHT: +- shader_addline(buffer, "weight"); +- break; +- +- case VKD3D_DECL_USAGE_NORMAL: +- shader_addline(buffer, "normal%u", semantic->usage_idx); +- break; +- +- case VKD3D_DECL_USAGE_PSIZE: +- shader_addline(buffer, "psize"); +- break; +- +- case VKD3D_DECL_USAGE_COLOR: +- if (!semantic->usage_idx) +- shader_addline(buffer, "color"); +- else +- shader_addline(buffer, "specular%u", (semantic->usage_idx - 1)); +- break; +- +- case VKD3D_DECL_USAGE_TEXCOORD: +- shader_addline(buffer, "texcoord%u", semantic->usage_idx); +- break; +- +- case VKD3D_DECL_USAGE_TANGENT: +- shader_addline(buffer, "tangent"); +- break; +- +- case VKD3D_DECL_USAGE_BINORMAL: +- shader_addline(buffer, "binormal"); +- break; +- +- case VKD3D_DECL_USAGE_TESS_FACTOR: +- shader_addline(buffer, "tessfactor"); +- break; +- +- case VKD3D_DECL_USAGE_POSITIONT: +- shader_addline(buffer, "positionT%u", semantic->usage_idx); +- break; +- +- case VKD3D_DECL_USAGE_FOG: +- shader_addline(buffer, "fog"); +- break; + +- case VKD3D_DECL_USAGE_DEPTH: +- shader_addline(buffer, "depth"); +- break; ++ /* Pixel shaders 3.0 don't have usage semantics. */ ++ if (!vkd3d_shader_ver_ge(&compiler->shader_version, 3, 0) ++ && compiler->shader_version.type == VKD3D_SHADER_TYPE_PIXEL) ++ return; + +- case VKD3D_DECL_USAGE_SAMPLE: +- shader_addline(buffer, "sample"); ++ indexed = false; ++ usage_idx = semantic->usage_idx; ++ switch (semantic->usage) ++ { ++ case VKD3D_DECL_USAGE_POSITION: ++ usage = "position"; ++ indexed = true; ++ break; ++ case VKD3D_DECL_USAGE_BLEND_INDICES: ++ usage = "blend"; ++ break; ++ case VKD3D_DECL_USAGE_BLEND_WEIGHT: ++ usage = "weight"; ++ break; ++ case VKD3D_DECL_USAGE_NORMAL: ++ usage = "normal"; ++ indexed = true; ++ break; ++ case VKD3D_DECL_USAGE_PSIZE: ++ usage = "psize"; ++ break; ++ case VKD3D_DECL_USAGE_COLOR: ++ if (semantic->usage_idx) ++ { ++ usage = "specular"; ++ indexed = true; ++ --usage_idx; + break; +- +- default: +- shader_addline(buffer, "", semantic->usage); +- FIXME("Unrecognised semantic usage %#x.\n", semantic->usage); +- } ++ } ++ usage = "color"; ++ break; ++ case VKD3D_DECL_USAGE_TEXCOORD: ++ usage = "texcoord"; ++ indexed = true; ++ break; ++ case VKD3D_DECL_USAGE_TANGENT: ++ usage = "tangent"; ++ break; ++ case VKD3D_DECL_USAGE_BINORMAL: ++ usage = "binormal"; ++ break; ++ case VKD3D_DECL_USAGE_TESS_FACTOR: ++ usage = "tessfactor"; ++ break; ++ case VKD3D_DECL_USAGE_POSITIONT: ++ usage = "positionT"; ++ indexed = true; ++ break; ++ case VKD3D_DECL_USAGE_FOG: ++ usage = "fog"; ++ break; ++ case VKD3D_DECL_USAGE_DEPTH: ++ usage = "depth"; ++ break; ++ case VKD3D_DECL_USAGE_SAMPLE: ++ usage = "sample"; ++ break; ++ default: ++ vkd3d_string_buffer_printf(buffer, "%s%s%s%s", ++ prefix, compiler->colours.error, semantic->usage, usage_idx, compiler->colours.reset, suffix); ++ return; + } ++ ++ if (indexed) ++ vkd3d_string_buffer_printf(buffer, "%s%s%u%s", prefix, usage, usage_idx, suffix); ++ else ++ vkd3d_string_buffer_printf(buffer, "%s%s%s", prefix, usage, suffix); + } + + static void shader_print_src_param(struct vkd3d_d3d_asm_compiler *compiler, +@@ -937,8 +944,8 @@ static void shader_print_subscript_range(struct vkd3d_d3d_asm_compiler *compiler + vkd3d_string_buffer_printf(&compiler->buffer, "*]"); + } + +-static void shader_dump_register(struct vkd3d_d3d_asm_compiler *compiler, const struct vkd3d_shader_register *reg, +- bool is_declaration) ++static void shader_print_register(struct vkd3d_d3d_asm_compiler *compiler, const char *prefix, ++ const struct vkd3d_shader_register *reg, bool is_declaration, const char *suffix) + { + struct vkd3d_string_buffer *buffer = &compiler->buffer; + unsigned int offset = reg->idx[0].offset; +@@ -947,22 +954,23 @@ static void shader_dump_register(struct vkd3d_d3d_asm_compiler *compiler, const + static const char * const rastout_reg_names[] = {"oPos", "oFog", "oPts"}; + static const char * const misctype_reg_names[] = {"vPos", "vFace"}; + +- shader_addline(buffer, "%s", reg->type == VKD3DSPR_LABEL ? compiler->colours.label : compiler->colours.reg); ++ vkd3d_string_buffer_printf(buffer, "%s%s", prefix, ++ reg->type == VKD3DSPR_LABEL ? compiler->colours.label : compiler->colours.reg); + switch (reg->type) + { + case VKD3DSPR_TEMP: +- shader_addline(buffer, "r"); ++ vkd3d_string_buffer_printf(buffer, "r"); + break; + + case VKD3DSPR_INPUT: +- shader_addline(buffer, "v"); ++ vkd3d_string_buffer_printf(buffer, "v"); + break; + + case VKD3DSPR_CONST: + case VKD3DSPR_CONST2: + case VKD3DSPR_CONST3: + case VKD3DSPR_CONST4: +- shader_addline(buffer, "c"); ++ vkd3d_string_buffer_printf(buffer, "c"); + offset = shader_get_float_offset(reg->type, offset); + break; + +@@ -972,205 +980,202 @@ static void shader_dump_register(struct vkd3d_d3d_asm_compiler *compiler, const + break; + + case VKD3DSPR_RASTOUT: +- shader_addline(buffer, "%s", rastout_reg_names[offset]); ++ vkd3d_string_buffer_printf(buffer, "%s", rastout_reg_names[offset]); + break; + + case VKD3DSPR_COLOROUT: +- shader_addline(buffer, "oC"); ++ vkd3d_string_buffer_printf(buffer, "oC"); + break; + + case VKD3DSPR_DEPTHOUT: +- shader_addline(buffer, "oDepth"); ++ vkd3d_string_buffer_printf(buffer, "oDepth"); + break; + + case VKD3DSPR_DEPTHOUTGE: +- shader_addline(buffer, "oDepthGE"); ++ vkd3d_string_buffer_printf(buffer, "oDepthGE"); + break; + + case VKD3DSPR_DEPTHOUTLE: +- shader_addline(buffer, "oDepthLE"); ++ vkd3d_string_buffer_printf(buffer, "oDepthLE"); + break; + + case VKD3DSPR_ATTROUT: +- shader_addline(buffer, "oD"); ++ vkd3d_string_buffer_printf(buffer, "oD"); + break; + + case VKD3DSPR_TEXCRDOUT: + /* Vertex shaders >= 3.0 use general purpose output registers + * (VKD3DSPR_OUTPUT), which can include an address token. */ + if (vkd3d_shader_ver_ge(&compiler->shader_version, 3, 0)) +- shader_addline(buffer, "o"); ++ vkd3d_string_buffer_printf(buffer, "o"); + else +- shader_addline(buffer, "oT"); ++ vkd3d_string_buffer_printf(buffer, "oT"); + break; + + case VKD3DSPR_CONSTINT: +- shader_addline(buffer, "i"); ++ vkd3d_string_buffer_printf(buffer, "i"); + break; + + case VKD3DSPR_CONSTBOOL: +- shader_addline(buffer, "b"); ++ vkd3d_string_buffer_printf(buffer, "b"); + break; + + case VKD3DSPR_LABEL: +- shader_addline(buffer, "l"); ++ vkd3d_string_buffer_printf(buffer, "l"); + break; + + case VKD3DSPR_LOOP: +- shader_addline(buffer, "aL"); ++ vkd3d_string_buffer_printf(buffer, "aL"); + break; + + case VKD3DSPR_COMBINED_SAMPLER: + case VKD3DSPR_SAMPLER: +- shader_addline(buffer, "s"); ++ vkd3d_string_buffer_printf(buffer, "s"); + is_descriptor = true; + break; + + case VKD3DSPR_MISCTYPE: + if (offset > 1) +- { +- FIXME("Unhandled misctype register %u.\n", offset); +- shader_addline(buffer, "", offset); +- } ++ vkd3d_string_buffer_printf(buffer, "%s%s", ++ compiler->colours.error, offset, compiler->colours.reset); + else +- { +- shader_addline(buffer, "%s", misctype_reg_names[offset]); +- } ++ vkd3d_string_buffer_printf(buffer, "%s", misctype_reg_names[offset]); + break; + + case VKD3DSPR_PREDICATE: +- shader_addline(buffer, "p"); ++ vkd3d_string_buffer_printf(buffer, "p"); + break; + + case VKD3DSPR_IMMCONST: +- shader_addline(buffer, "l"); ++ vkd3d_string_buffer_printf(buffer, "l"); + break; + + case VKD3DSPR_IMMCONST64: +- shader_addline(buffer, "d"); ++ vkd3d_string_buffer_printf(buffer, "d"); + break; + + case VKD3DSPR_CONSTBUFFER: +- shader_addline(buffer, "cb"); ++ vkd3d_string_buffer_printf(buffer, "cb"); + is_descriptor = true; + break; + + case VKD3DSPR_IMMCONSTBUFFER: +- shader_addline(buffer, "icb"); ++ vkd3d_string_buffer_printf(buffer, "icb"); + break; + + case VKD3DSPR_PRIMID: +- shader_addline(buffer, "primID"); ++ vkd3d_string_buffer_printf(buffer, "primID"); + break; + + case VKD3DSPR_NULL: +- shader_addline(buffer, "null"); ++ vkd3d_string_buffer_printf(buffer, "null"); + break; + + case VKD3DSPR_RASTERIZER: +- shader_addline(buffer, "rasterizer"); ++ vkd3d_string_buffer_printf(buffer, "rasterizer"); + break; + + case VKD3DSPR_RESOURCE: +- shader_addline(buffer, "t"); ++ vkd3d_string_buffer_printf(buffer, "t"); + is_descriptor = true; + break; + + case VKD3DSPR_UAV: +- shader_addline(buffer, "u"); ++ vkd3d_string_buffer_printf(buffer, "u"); + is_descriptor = true; + break; + + case VKD3DSPR_OUTPOINTID: +- shader_addline(buffer, "vOutputControlPointID"); ++ vkd3d_string_buffer_printf(buffer, "vOutputControlPointID"); + break; + + case VKD3DSPR_FORKINSTID: +- shader_addline(buffer, "vForkInstanceId"); ++ vkd3d_string_buffer_printf(buffer, "vForkInstanceId"); + break; + + case VKD3DSPR_JOININSTID: +- shader_addline(buffer, "vJoinInstanceId"); ++ vkd3d_string_buffer_printf(buffer, "vJoinInstanceId"); + break; + + case VKD3DSPR_INCONTROLPOINT: +- shader_addline(buffer, "vicp"); ++ vkd3d_string_buffer_printf(buffer, "vicp"); + break; + + case VKD3DSPR_OUTCONTROLPOINT: +- shader_addline(buffer, "vocp"); ++ vkd3d_string_buffer_printf(buffer, "vocp"); + break; + + case VKD3DSPR_PATCHCONST: +- shader_addline(buffer, "vpc"); ++ vkd3d_string_buffer_printf(buffer, "vpc"); + break; + + case VKD3DSPR_TESSCOORD: +- shader_addline(buffer, "vDomainLocation"); ++ vkd3d_string_buffer_printf(buffer, "vDomainLocation"); + break; + + case VKD3DSPR_GROUPSHAREDMEM: +- shader_addline(buffer, "g"); ++ vkd3d_string_buffer_printf(buffer, "g"); + break; + + case VKD3DSPR_THREADID: +- shader_addline(buffer, "vThreadID"); ++ vkd3d_string_buffer_printf(buffer, "vThreadID"); + break; + + case VKD3DSPR_THREADGROUPID: +- shader_addline(buffer, "vThreadGroupID"); ++ vkd3d_string_buffer_printf(buffer, "vThreadGroupID"); + break; + + case VKD3DSPR_LOCALTHREADID: +- shader_addline(buffer, "vThreadIDInGroup"); ++ vkd3d_string_buffer_printf(buffer, "vThreadIDInGroup"); + break; + + case VKD3DSPR_LOCALTHREADINDEX: +- shader_addline(buffer, "vThreadIDInGroupFlattened"); ++ vkd3d_string_buffer_printf(buffer, "vThreadIDInGroupFlattened"); + break; + + case VKD3DSPR_IDXTEMP: +- shader_addline(buffer, "x"); ++ vkd3d_string_buffer_printf(buffer, "x"); + break; + + case VKD3DSPR_STREAM: +- shader_addline(buffer, "m"); ++ vkd3d_string_buffer_printf(buffer, "m"); + break; + + case VKD3DSPR_FUNCTIONBODY: +- shader_addline(buffer, "fb"); ++ vkd3d_string_buffer_printf(buffer, "fb"); + break; + + case VKD3DSPR_FUNCTIONPOINTER: +- shader_addline(buffer, "fp"); ++ vkd3d_string_buffer_printf(buffer, "fp"); + break; + + case VKD3DSPR_COVERAGE: +- shader_addline(buffer, "vCoverage"); ++ vkd3d_string_buffer_printf(buffer, "vCoverage"); + break; + + case VKD3DSPR_SAMPLEMASK: +- shader_addline(buffer, "oMask"); ++ vkd3d_string_buffer_printf(buffer, "oMask"); + break; + + case VKD3DSPR_GSINSTID: +- shader_addline(buffer, "vGSInstanceID"); ++ vkd3d_string_buffer_printf(buffer, "vGSInstanceID"); + break; + + case VKD3DSPR_OUTSTENCILREF: +- shader_addline(buffer, "oStencilRef"); ++ vkd3d_string_buffer_printf(buffer, "oStencilRef"); + break; + + case VKD3DSPR_UNDEF: +- shader_addline(buffer, "undef"); ++ vkd3d_string_buffer_printf(buffer, "undef"); + break; + + case VKD3DSPR_SSA: +- shader_addline(buffer, "sr"); ++ vkd3d_string_buffer_printf(buffer, "sr"); + break; + + default: +- shader_addline(buffer, "", reg->type); ++ vkd3d_string_buffer_printf(buffer, "%s%s", ++ compiler->colours.error, reg->type, compiler->colours.reset); + break; + } + +@@ -1189,7 +1194,7 @@ static void shader_dump_register(struct vkd3d_d3d_asm_compiler *compiler, const + break; + } + +- shader_addline(buffer, "%s(", compiler->colours.reset); ++ vkd3d_string_buffer_printf(buffer, "%s(", compiler->colours.reset); + switch (reg->dimension) + { + case VSIR_DIMENSION_SCALAR: +@@ -1210,7 +1215,8 @@ static void shader_dump_register(struct vkd3d_d3d_asm_compiler *compiler, const + shader_print_uint_literal(compiler, "", reg->u.immconst_u32[0], ""); + break; + default: +- shader_addline(buffer, "", reg->data_type); ++ vkd3d_string_buffer_printf(buffer, "%s%s", ++ compiler->colours.error, reg->data_type, compiler->colours.reset); + break; + } + break; +@@ -1249,20 +1255,22 @@ static void shader_dump_register(struct vkd3d_d3d_asm_compiler *compiler, const + shader_print_uint_literal(compiler, ", ", reg->u.immconst_u32[3], ""); + break; + default: +- shader_addline(buffer, "", reg->data_type); ++ vkd3d_string_buffer_printf(buffer, "%s%s", ++ compiler->colours.error, reg->data_type, compiler->colours.reset); + break; + } + break; + + default: +- shader_addline(buffer, "", reg->dimension); ++ vkd3d_string_buffer_printf(buffer, "%s%s", ++ compiler->colours.error, reg->dimension, compiler->colours.reset); + break; + } +- shader_addline(buffer, ")"); ++ vkd3d_string_buffer_printf(buffer, ")"); + } + else if (reg->type == VKD3DSPR_IMMCONST64) + { +- shader_addline(buffer, "%s(", compiler->colours.reset); ++ vkd3d_string_buffer_printf(buffer, "%s(", compiler->colours.reset); + /* A double2 vector is treated as a float4 vector in enum vsir_dimension. */ + if (reg->dimension == VSIR_DIMENSION_SCALAR || reg->dimension == VSIR_DIMENSION_VEC4) + { +@@ -1280,14 +1288,16 @@ static void shader_dump_register(struct vkd3d_d3d_asm_compiler *compiler, const + } + else + { +- shader_addline(buffer, "", reg->data_type); ++ vkd3d_string_buffer_printf(buffer, "%s%s", ++ compiler->colours.error, reg->data_type, compiler->colours.reset); + } + } + else + { +- shader_addline(buffer, "", reg->dimension); ++ vkd3d_string_buffer_printf(buffer, "%s%s", ++ compiler->colours.error, reg->dimension, compiler->colours.reset); + } +- shader_addline(buffer, ")"); ++ vkd3d_string_buffer_printf(buffer, ")"); + } + else if (reg->type != VKD3DSPR_RASTOUT + && reg->type != VKD3DSPR_MISCTYPE +@@ -1331,7 +1341,7 @@ static void shader_dump_register(struct vkd3d_d3d_asm_compiler *compiler, const + } + else + { +- shader_addline(buffer, "%s", compiler->colours.reset); ++ vkd3d_string_buffer_printf(buffer, "%s", compiler->colours.reset); + } + + if (reg->type == VKD3DSPR_FUNCTIONPOINTER) +@@ -1339,8 +1349,9 @@ static void shader_dump_register(struct vkd3d_d3d_asm_compiler *compiler, const + } + else + { +- shader_addline(buffer, "%s", compiler->colours.reset); ++ vkd3d_string_buffer_printf(buffer, "%s", compiler->colours.reset); + } ++ vkd3d_string_buffer_printf(buffer, "%s", suffix); + } + + static void shader_print_precision(struct vkd3d_d3d_asm_compiler *compiler, const struct vkd3d_shader_register *reg) +@@ -1384,8 +1395,8 @@ static void shader_print_non_uniform(struct vkd3d_d3d_asm_compiler *compiler, co + compiler->colours.modifier, compiler->colours.reset); + } + +-static void shader_dump_reg_type(struct vkd3d_d3d_asm_compiler *compiler, +- const struct vkd3d_shader_register *reg) ++static void shader_print_reg_type(struct vkd3d_d3d_asm_compiler *compiler, ++ const char *prefix, const struct vkd3d_shader_register *reg, const char *suffix) + { + static const char *dimensions[] = + { +@@ -1398,7 +1409,10 @@ static void shader_dump_reg_type(struct vkd3d_d3d_asm_compiler *compiler, + const char *dimension; + + if (!(compiler->flags & VSIR_ASM_FLAG_DUMP_TYPES)) ++ { ++ vkd3d_string_buffer_printf(buffer, "%s%s", prefix, suffix); + return; ++ } + + if (reg->data_type == VKD3D_DATA_UNUSED) + return; +@@ -1408,9 +1422,9 @@ static void shader_dump_reg_type(struct vkd3d_d3d_asm_compiler *compiler, + else + dimension = "??"; + +- shader_addline(buffer, " <%s", dimension); ++ vkd3d_string_buffer_printf(buffer, "%s <%s", prefix, dimension); + shader_dump_data_type(compiler, reg->data_type); +- shader_addline(buffer, ">"); ++ vkd3d_string_buffer_printf(buffer, ">%s", suffix); + } + + static void shader_print_write_mask(struct vkd3d_d3d_asm_compiler *compiler, +@@ -1444,8 +1458,7 @@ static void shader_print_dst_param(struct vkd3d_d3d_asm_compiler *compiler, + { + uint32_t write_mask = param->write_mask; + +- vkd3d_string_buffer_printf(&compiler->buffer, "%s", prefix); +- shader_dump_register(compiler, ¶m->reg, is_declaration); ++ shader_print_register(compiler, prefix, ¶m->reg, is_declaration, ""); + + if (write_mask && param->reg.dimension == VSIR_DIMENSION_VEC4) + { +@@ -1457,8 +1470,7 @@ static void shader_print_dst_param(struct vkd3d_d3d_asm_compiler *compiler, + + shader_print_precision(compiler, ¶m->reg); + shader_print_non_uniform(compiler, ¶m->reg); +- shader_dump_reg_type(compiler, ¶m->reg); +- vkd3d_string_buffer_printf(&compiler->buffer, "%s", suffix); ++ shader_print_reg_type(compiler, "", ¶m->reg, suffix); + } + + static void shader_print_src_param(struct vkd3d_d3d_asm_compiler *compiler, +@@ -1468,6 +1480,7 @@ static void shader_print_src_param(struct vkd3d_d3d_asm_compiler *compiler, + struct vkd3d_string_buffer *buffer = &compiler->buffer; + uint32_t swizzle = param->swizzle; + const char *modifier = ""; ++ bool is_abs = false; + + if (src_modifier == VKD3DSPSM_NEG + || src_modifier == VKD3DSPSM_BIASNEG +@@ -1482,9 +1495,9 @@ static void shader_print_src_param(struct vkd3d_d3d_asm_compiler *compiler, + vkd3d_string_buffer_printf(buffer, "%s%s", prefix, modifier); + + if (src_modifier == VKD3DSPSM_ABS || src_modifier == VKD3DSPSM_ABSNEG) +- vkd3d_string_buffer_printf(buffer, "|"); ++ is_abs = true; + +- shader_dump_register(compiler, ¶m->reg, false); ++ shader_print_register(compiler, is_abs ? "|" : "", ¶m->reg, false, ""); + + switch (src_modifier) + { +@@ -1543,13 +1556,12 @@ static void shader_print_src_param(struct vkd3d_d3d_asm_compiler *compiler, + swizzle_chars[swizzle_z], swizzle_chars[swizzle_w], compiler->colours.reset); + } + +- if (src_modifier == VKD3DSPSM_ABS || src_modifier == VKD3DSPSM_ABSNEG) ++ if (is_abs) + vkd3d_string_buffer_printf(buffer, "|"); + + shader_print_precision(compiler, ¶m->reg); + shader_print_non_uniform(compiler, ¶m->reg); +- shader_dump_reg_type(compiler, ¶m->reg); +- vkd3d_string_buffer_printf(buffer, "%s", suffix); ++ shader_print_reg_type(compiler, "", ¶m->reg, suffix); + } + + static void shader_dump_ins_modifiers(struct vkd3d_d3d_asm_compiler *compiler, +@@ -1578,87 +1590,93 @@ static void shader_dump_ins_modifiers(struct vkd3d_d3d_asm_compiler *compiler, + if (mmask) FIXME("Unrecognised modifier %#x.\n", mmask); + } + +-static void shader_dump_primitive_type(struct vkd3d_d3d_asm_compiler *compiler, +- const struct vkd3d_shader_primitive_type *primitive_type) ++static void shader_print_primitive_type(struct vkd3d_d3d_asm_compiler *compiler, ++ const char *prefix, const struct vkd3d_shader_primitive_type *p, const char *suffix) + { + struct vkd3d_string_buffer *buffer = &compiler->buffer; ++ const char *primitive_type; + +- switch (primitive_type->type) ++ switch (p->type) + { + case VKD3D_PT_UNDEFINED: +- shader_addline(buffer, "undefined"); ++ primitive_type = "undefined"; + break; + case VKD3D_PT_POINTLIST: +- shader_addline(buffer, "pointlist"); ++ primitive_type = "pointlist"; + break; + case VKD3D_PT_LINELIST: +- shader_addline(buffer, "linelist"); ++ primitive_type = "linelist"; + break; + case VKD3D_PT_LINESTRIP: +- shader_addline(buffer, "linestrip"); ++ primitive_type = "linestrip"; + break; + case VKD3D_PT_TRIANGLELIST: +- shader_addline(buffer, "trianglelist"); ++ primitive_type = "trianglelist"; + break; + case VKD3D_PT_TRIANGLESTRIP: +- shader_addline(buffer, "trianglestrip"); ++ primitive_type = "trianglestrip"; + break; + case VKD3D_PT_TRIANGLEFAN: +- shader_addline(buffer, "trianglefan"); ++ primitive_type = "trianglefan"; + break; + case VKD3D_PT_LINELIST_ADJ: +- shader_addline(buffer, "linelist_adj"); ++ primitive_type = "linelist_adj"; + break; + case VKD3D_PT_LINESTRIP_ADJ: +- shader_addline(buffer, "linestrip_adj"); ++ primitive_type = "linestrip_adj"; + break; + case VKD3D_PT_TRIANGLELIST_ADJ: +- shader_addline(buffer, "trianglelist_adj"); ++ primitive_type = "trianglelist_adj"; + break; + case VKD3D_PT_TRIANGLESTRIP_ADJ: +- shader_addline(buffer, "trianglestrip_adj"); ++ primitive_type = "trianglestrip_adj"; + break; + case VKD3D_PT_PATCH: +- shader_addline(buffer, "patch%u", primitive_type->patch_vertex_count); +- break; ++ vkd3d_string_buffer_printf(buffer, "%spatch%u%s", prefix, p->patch_vertex_count, suffix); ++ return; + default: +- shader_addline(buffer, "", primitive_type->type); +- break; ++ vkd3d_string_buffer_printf(buffer, "%s%s%s%s", ++ prefix, compiler->colours.error, p->type, compiler->colours.reset, suffix); ++ return; + } ++ vkd3d_string_buffer_printf(buffer, "%s%s%s", prefix, primitive_type, suffix); + } + +-static void shader_dump_interpolation_mode(struct vkd3d_d3d_asm_compiler *compiler, +- enum vkd3d_shader_interpolation_mode interpolation_mode) ++static void shader_print_interpolation_mode(struct vkd3d_d3d_asm_compiler *compiler, ++ const char *prefix, enum vkd3d_shader_interpolation_mode m, const char *suffix) + { + struct vkd3d_string_buffer *buffer = &compiler->buffer; ++ const char *mode; + +- switch (interpolation_mode) ++ switch (m) + { + case VKD3DSIM_CONSTANT: +- shader_addline(buffer, "constant"); ++ mode = "constant"; + break; + case VKD3DSIM_LINEAR: +- shader_addline(buffer, "linear"); ++ mode = "linear"; + break; + case VKD3DSIM_LINEAR_CENTROID: +- shader_addline(buffer, "linear centroid"); ++ mode = "linear centroid"; + break; + case VKD3DSIM_LINEAR_NOPERSPECTIVE: +- shader_addline(buffer, "linear noperspective"); ++ mode = "linear noperspective"; + break; + case VKD3DSIM_LINEAR_SAMPLE: +- shader_addline(buffer, "linear sample"); ++ mode = "linear sample"; + break; + case VKD3DSIM_LINEAR_NOPERSPECTIVE_CENTROID: +- shader_addline(buffer, "linear noperspective centroid"); ++ mode = "linear noperspective centroid"; + break; + case VKD3DSIM_LINEAR_NOPERSPECTIVE_SAMPLE: +- shader_addline(buffer, "linear noperspective sample"); ++ mode = "linear noperspective sample"; + break; + default: +- shader_addline(buffer, "", interpolation_mode); +- break; ++ vkd3d_string_buffer_printf(buffer, "%s%s%s%s", ++ prefix, compiler->colours.error, m, compiler->colours.reset, suffix); ++ return; + } ++ vkd3d_string_buffer_printf(buffer, "%s%s%s", prefix, mode, suffix); + } + + const char *shader_get_type_prefix(enum vkd3d_shader_type type) +@@ -1849,16 +1867,15 @@ static void shader_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler, + case VKD3DSIH_DCL: + case VKD3DSIH_DCL_UAV_TYPED: + vkd3d_string_buffer_printf(buffer, "%s", compiler->colours.opcode); +- shader_dump_decl_usage(compiler, &ins->declaration.semantic, ins->flags); ++ shader_print_dcl_usage(compiler, "_", &ins->declaration.semantic, ins->flags, ""); + shader_dump_ins_modifiers(compiler, &ins->declaration.semantic.resource.reg); +- vkd3d_string_buffer_printf(buffer, "%s ", compiler->colours.reset); +- shader_dump_register(compiler, &ins->declaration.semantic.resource.reg.reg, true); ++ vkd3d_string_buffer_printf(buffer, "%s", compiler->colours.reset); ++ shader_print_register(compiler, " ", &ins->declaration.semantic.resource.reg.reg, true, ""); + shader_dump_register_space(compiler, ins->declaration.semantic.resource.range.space); + break; + + case VKD3DSIH_DCL_CONSTANT_BUFFER: +- vkd3d_string_buffer_printf(buffer, " "); +- shader_dump_register(compiler, &ins->declaration.cb.src.reg, true); ++ shader_print_register(compiler, " ", &ins->declaration.cb.src.reg, true, ""); + if (vkd3d_shader_ver_ge(&compiler->shader_version, 6, 0)) + shader_print_subscript(compiler, ins->declaration.cb.size, NULL); + else if (vkd3d_shader_ver_ge(&compiler->shader_version, 5, 1)) +@@ -1906,8 +1923,7 @@ static void shader_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler, + break; + + case VKD3DSIH_DCL_INPUT_PS: +- vkd3d_string_buffer_printf(buffer, " "); +- shader_dump_interpolation_mode(compiler, ins->flags); ++ shader_print_interpolation_mode(compiler, " ", ins->flags, ""); + shader_print_dst_param(compiler, " ", &ins->declaration.dst, true, ""); + break; + +@@ -1916,16 +1932,13 @@ static void shader_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler, + case VKD3DSIH_DCL_INPUT_SIV: + case VKD3DSIH_DCL_OUTPUT_SIV: + shader_print_dst_param(compiler, " ", &ins->declaration.register_semantic.reg, true, ""); +- shader_addline(buffer, ", "); +- shader_dump_shader_input_sysval_semantic(compiler, ins->declaration.register_semantic.sysval_semantic); ++ shader_print_input_sysval_semantic(compiler, ", ", ins->declaration.register_semantic.sysval_semantic, ""); + break; + + case VKD3DSIH_DCL_INPUT_PS_SIV: +- vkd3d_string_buffer_printf(buffer, " "); +- shader_dump_interpolation_mode(compiler, ins->flags); ++ shader_print_interpolation_mode(compiler, " ", ins->flags, ""); + shader_print_dst_param(compiler, " ", &ins->declaration.register_semantic.reg, true, ""); +- shader_addline(buffer, ", "); +- shader_dump_shader_input_sysval_semantic(compiler, ins->declaration.register_semantic.sysval_semantic); ++ shader_print_input_sysval_semantic(compiler, ", ", ins->declaration.register_semantic.sysval_semantic, ""); + break; + + case VKD3DSIH_DCL_INPUT: +@@ -1935,8 +1948,7 @@ static void shader_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler, + + case VKD3DSIH_DCL_INPUT_PRIMITIVE: + case VKD3DSIH_DCL_OUTPUT_TOPOLOGY: +- vkd3d_string_buffer_printf(buffer, " "); +- shader_dump_primitive_type(compiler, &ins->declaration.primitive_type); ++ shader_print_primitive_type(compiler, " ", &ins->declaration.primitive_type, ""); + break; + + case VKD3DSIH_DCL_INTERFACE: +@@ -1958,10 +1970,8 @@ static void shader_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler, + break; + + case VKD3DSIH_DCL_SAMPLER: +- vkd3d_string_buffer_printf(buffer, " "); +- shader_dump_register(compiler, &ins->declaration.sampler.src.reg, true); +- if (ins->flags == VKD3DSI_SAMPLER_COMPARISON_MODE) +- shader_addline(buffer, ", comparisonMode"); ++ shader_print_register(compiler, " ", &ins->declaration.sampler.src.reg, true, ++ ins->flags == VKD3DSI_SAMPLER_COMPARISON_MODE ? ", comparisonMode" : ""); + shader_dump_register_space(compiler, ins->declaration.sampler.range.space); + break; + +@@ -2354,6 +2364,10 @@ enum vkd3d_result d3d_asm_compile(const struct vsir_program *program, + break; + + case VKD3DSIH_LABEL: ++ case VKD3DSIH_HS_DECLS: ++ case VKD3DSIH_HS_CONTROL_POINT_PHASE: ++ case VKD3DSIH_HS_FORK_PHASE: ++ case VKD3DSIH_HS_JOIN_PHASE: + indent = 0; + break; + +diff --git a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c +index 099729fbb6c..4685afa082d 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c ++++ b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c +@@ -1522,63 +1522,94 @@ D3DXPARAMETER_CLASS hlsl_sm1_class(const struct hlsl_type *type) + + D3DXPARAMETER_TYPE hlsl_sm1_base_type(const struct hlsl_type *type) + { +- switch (type->base_type) ++ switch (type->class) + { +- case HLSL_TYPE_BOOL: +- return D3DXPT_BOOL; +- case HLSL_TYPE_FLOAT: +- case HLSL_TYPE_HALF: +- return D3DXPT_FLOAT; +- case HLSL_TYPE_INT: +- case HLSL_TYPE_UINT: +- return D3DXPT_INT; +- case HLSL_TYPE_PIXELSHADER: +- return D3DXPT_PIXELSHADER; +- case HLSL_TYPE_SAMPLER: +- switch (type->sampler_dim) ++ case HLSL_CLASS_SCALAR: ++ case HLSL_CLASS_VECTOR: ++ case HLSL_CLASS_MATRIX: ++ switch (type->base_type) + { +- case HLSL_SAMPLER_DIM_1D: +- return D3DXPT_SAMPLER1D; +- case HLSL_SAMPLER_DIM_2D: +- return D3DXPT_SAMPLER2D; +- case HLSL_SAMPLER_DIM_3D: +- return D3DXPT_SAMPLER3D; +- case HLSL_SAMPLER_DIM_CUBE: +- return D3DXPT_SAMPLERCUBE; +- case HLSL_SAMPLER_DIM_GENERIC: +- return D3DXPT_SAMPLER; ++ case HLSL_TYPE_BOOL: ++ return D3DXPT_BOOL; ++ /* Actually double behaves differently depending on DLL version: ++ * For <= 36, it maps to D3DXPT_FLOAT. ++ * For 37-40, it maps to zero (D3DXPT_VOID). ++ * For >= 41, it maps to 39, which is D3D_SVT_DOUBLE (note D3D_SVT_* ++ * values are mostly compatible with D3DXPT_*). ++ * However, the latter two cases look like bugs, and a reasonable ++ * application certainly wouldn't know what to do with them. ++ * For fx_2_0 it's always D3DXPT_FLOAT regardless of DLL version. */ ++ case HLSL_TYPE_DOUBLE: ++ case HLSL_TYPE_FLOAT: ++ case HLSL_TYPE_HALF: ++ return D3DXPT_FLOAT; ++ case HLSL_TYPE_INT: ++ case HLSL_TYPE_UINT: ++ return D3DXPT_INT; + default: +- ERR("Invalid dimension %#x.\n", type->sampler_dim); + vkd3d_unreachable(); + } +- break; +- case HLSL_TYPE_STRING: +- return D3DXPT_STRING; +- case HLSL_TYPE_TEXTURE: +- switch (type->sampler_dim) ++ ++ case HLSL_CLASS_OBJECT: ++ switch (type->base_type) + { +- case HLSL_SAMPLER_DIM_1D: +- return D3DXPT_TEXTURE1D; +- case HLSL_SAMPLER_DIM_2D: +- return D3DXPT_TEXTURE2D; +- case HLSL_SAMPLER_DIM_3D: +- return D3DXPT_TEXTURE3D; +- case HLSL_SAMPLER_DIM_CUBE: +- return D3DXPT_TEXTURECUBE; +- case HLSL_SAMPLER_DIM_GENERIC: +- return D3DXPT_TEXTURE; ++ case HLSL_TYPE_PIXELSHADER: ++ return D3DXPT_PIXELSHADER; ++ case HLSL_TYPE_SAMPLER: ++ switch (type->sampler_dim) ++ { ++ case HLSL_SAMPLER_DIM_1D: ++ return D3DXPT_SAMPLER1D; ++ case HLSL_SAMPLER_DIM_2D: ++ return D3DXPT_SAMPLER2D; ++ case HLSL_SAMPLER_DIM_3D: ++ return D3DXPT_SAMPLER3D; ++ case HLSL_SAMPLER_DIM_CUBE: ++ return D3DXPT_SAMPLERCUBE; ++ case HLSL_SAMPLER_DIM_GENERIC: ++ return D3DXPT_SAMPLER; ++ default: ++ ERR("Invalid dimension %#x.\n", type->sampler_dim); ++ vkd3d_unreachable(); ++ } ++ break; ++ case HLSL_TYPE_STRING: ++ return D3DXPT_STRING; ++ case HLSL_TYPE_TEXTURE: ++ switch (type->sampler_dim) ++ { ++ case HLSL_SAMPLER_DIM_1D: ++ return D3DXPT_TEXTURE1D; ++ case HLSL_SAMPLER_DIM_2D: ++ return D3DXPT_TEXTURE2D; ++ case HLSL_SAMPLER_DIM_3D: ++ return D3DXPT_TEXTURE3D; ++ case HLSL_SAMPLER_DIM_CUBE: ++ return D3DXPT_TEXTURECUBE; ++ case HLSL_SAMPLER_DIM_GENERIC: ++ return D3DXPT_TEXTURE; ++ default: ++ ERR("Invalid dimension %#x.\n", type->sampler_dim); ++ vkd3d_unreachable(); ++ } ++ break; ++ case HLSL_TYPE_VERTEXSHADER: ++ return D3DXPT_VERTEXSHADER; ++ case HLSL_TYPE_VOID: ++ return D3DXPT_VOID; + default: +- ERR("Invalid dimension %#x.\n", type->sampler_dim); + vkd3d_unreachable(); + } +- break; +- case HLSL_TYPE_VERTEXSHADER: +- return D3DXPT_VERTEXSHADER; +- case HLSL_TYPE_VOID: +- return D3DXPT_VOID; +- default: + vkd3d_unreachable(); ++ ++ case HLSL_CLASS_ARRAY: ++ return hlsl_sm1_base_type(type->e.array.type); ++ ++ case HLSL_CLASS_STRUCT: ++ return D3DXPT_VOID; + } ++ ++ vkd3d_unreachable(); + } + + static void write_sm1_type(struct vkd3d_bytecode_buffer *buffer, struct hlsl_type *type, unsigned int ctab_start) +@@ -2572,19 +2603,11 @@ static void write_sm1_instructions(struct hlsl_ctx *ctx, struct vkd3d_bytecode_b + { + if (instr->data_type) + { +- if (instr->data_type->class == HLSL_CLASS_MATRIX) ++ if (instr->data_type->class != HLSL_CLASS_SCALAR && instr->data_type->class != HLSL_CLASS_VECTOR) + { +- /* These need to be lowered. */ +- hlsl_fixme(ctx, &instr->loc, "SM1 matrix expression."); +- continue; +- } +- else if (instr->data_type->class == HLSL_CLASS_OBJECT) +- { +- hlsl_fixme(ctx, &instr->loc, "Object copy."); ++ hlsl_fixme(ctx, &instr->loc, "Class %#x should have been lowered or removed.", instr->data_type->class); + break; + } +- +- assert(instr->data_type->class == HLSL_CLASS_SCALAR || instr->data_type->class == HLSL_CLASS_VECTOR); + } + + switch (instr->type) +diff --git a/libs/vkd3d/libs/vkd3d-shader/dxil.c b/libs/vkd3d/libs/vkd3d-shader/dxil.c +index b5a61d99d3f..da8ba662dbc 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/dxil.c ++++ b/libs/vkd3d/libs/vkd3d-shader/dxil.c +@@ -404,6 +404,7 @@ enum dx_intrinsic_opcode + DX_ATOMIC_BINOP = 78, + DX_ATOMIC_CMP_XCHG = 79, + DX_BARRIER = 80, ++ DX_CALCULATE_LOD = 81, + DX_DISCARD = 82, + DX_DERIV_COARSEX = 83, + DX_DERIV_COARSEY = 84, +@@ -2885,6 +2886,122 @@ static enum vkd3d_result value_allocate_constant_array(struct sm6_value *dst, co + return VKD3D_OK; + } + ++static enum vkd3d_result sm6_parser_init_constexpr_gep(struct sm6_parser *sm6, const struct dxil_record *record, ++ struct sm6_value *dst) ++{ ++ const struct sm6_type *elem_type, *pointee_type, *gep_type, *ptr_type; ++ struct sm6_value *operands[3]; ++ unsigned int i, j, offset; ++ uint64_t value; ++ ++ i = 0; ++ pointee_type = (record->operand_count & 1) ? sm6_parser_get_type(sm6, record->operands[i++]) : NULL; ++ ++ if (!dxil_record_validate_operand_count(record, i + 6, i + 6, sm6)) ++ return VKD3D_ERROR_INVALID_SHADER; ++ ++ for (j = 0; i < record->operand_count; i += 2, ++j) ++ { ++ if (!(elem_type = sm6_parser_get_type(sm6, record->operands[i]))) ++ return VKD3D_ERROR_INVALID_SHADER; ++ ++ if ((value = record->operands[i + 1]) >= sm6->cur_max_value) ++ { ++ WARN("Invalid value index %"PRIu64".\n", value); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, ++ "Invalid value index %"PRIu64".", value); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ else if (value == sm6->value_count) ++ { ++ WARN("Invalid value self-reference at %"PRIu64".\n", value); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, ++ "Invalid value self-reference for a constexpr GEP."); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ ++ operands[j] = &sm6->values[value]; ++ if (value > sm6->value_count) ++ { ++ operands[j]->type = elem_type; ++ } ++ else if (operands[j]->type != elem_type) ++ { ++ WARN("Type mismatch.\n"); ++ vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_TYPE_MISMATCH, ++ "Type mismatch in constexpr GEP elements."); ++ } ++ } ++ ++ if (operands[0]->u.reg.idx_count > 1) ++ { ++ WARN("Unsupported stacked GEP.\n"); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, ++ "A GEP instruction on the result of a previous GEP is unsupported."); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ ++ if (!sm6_value_is_constant_zero(operands[1])) ++ { ++ WARN("Expected constant zero.\n"); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, ++ "The pointer dereference index for a constexpr GEP instruction is not constant zero."); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ if (!sm6_value_is_constant(operands[2]) || !sm6_type_is_integer(operands[2]->type)) ++ { ++ WARN("Element index is not constant int.\n"); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, ++ "A constexpr GEP element index is not a constant integer."); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ ++ dst->structure_stride = operands[0]->structure_stride; ++ ++ ptr_type = operands[0]->type; ++ if (!sm6_type_is_pointer(ptr_type)) ++ { ++ WARN("Constexpr GEP base value is not a pointer.\n"); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, ++ "A constexpr GEP base value is not a pointer."); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ ++ if (!pointee_type) ++ { ++ pointee_type = ptr_type->u.pointer.type; ++ } ++ else if (pointee_type != ptr_type->u.pointer.type) ++ { ++ WARN("Explicit pointee type mismatch.\n"); ++ vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_TYPE_MISMATCH, ++ "Explicit pointee type for constexpr GEP does not match the element type."); ++ } ++ ++ offset = sm6_value_get_constant_uint(operands[2]); ++ if (!(gep_type = sm6_type_get_element_type_at_index(pointee_type, offset))) ++ { ++ WARN("Failed to get element type.\n"); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, ++ "Failed to get the element type of a constexpr GEP."); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ ++ if (!(dst->type = sm6_type_get_pointer_to_type(gep_type, ptr_type->u.pointer.addr_space, sm6))) ++ { ++ WARN("Failed to get pointer type for type %u.\n", gep_type->class); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_MODULE, ++ "Module does not define a pointer type for a constexpr GEP result."); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ dst->u.reg = operands[0]->u.reg; ++ dst->u.reg.idx[1].offset = offset; ++ dst->u.reg.idx[1].is_in_bounds = record->code == CST_CODE_CE_INBOUNDS_GEP; ++ dst->u.reg.idx_count = 2; ++ ++ return VKD3D_OK; ++} ++ + static enum vkd3d_result sm6_parser_constants_init(struct sm6_parser *sm6, const struct dxil_block *block) + { + enum vkd3d_shader_register_type reg_type = VKD3DSPR_INVALID; +@@ -3005,6 +3122,12 @@ static enum vkd3d_result sm6_parser_constants_init(struct sm6_parser *sm6, const + + break; + ++ case CST_CODE_CE_GEP: ++ case CST_CODE_CE_INBOUNDS_GEP: ++ if ((ret = sm6_parser_init_constexpr_gep(sm6, record, dst)) < 0) ++ return ret; ++ break; ++ + case CST_CODE_UNDEF: + dxil_record_validate_operand_max_count(record, 0, sm6); + dst->u.reg.type = VKD3DSPR_UNDEF; +@@ -4364,6 +4487,40 @@ static void sm6_parser_emit_dx_buffer_update_counter(struct sm6_parser *sm6, enu + instruction_dst_param_init_ssa_scalar(ins, sm6); + } + ++static void sm6_parser_emit_dx_calculate_lod(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, ++ const struct sm6_value **operands, struct function_emission_state *state) ++{ ++ const struct sm6_value *resource, *sampler; ++ struct vkd3d_shader_src_param *src_params; ++ struct vkd3d_shader_instruction *ins; ++ struct vkd3d_shader_register coord; ++ unsigned int clamp; ++ ++ resource = operands[0]; ++ sampler = operands[1]; ++ if (!sm6_value_validate_is_texture_handle(resource, op, sm6) ++ || !sm6_value_validate_is_sampler_handle(sampler, op, sm6)) ++ { ++ return; ++ } ++ ++ if (!sm6_parser_emit_coordinate_construct(sm6, &operands[2], 3, NULL, state, &coord)) ++ return; ++ ++ clamp = sm6_value_get_constant_uint(operands[5]); ++ ++ ins = state->ins; ++ vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_LOD); ++ if (!(src_params = instruction_src_params_alloc(ins, 3, sm6))) ++ return; ++ src_param_init_vector_from_reg(&src_params[0], &coord); ++ src_params[1].reg = resource->u.handle.reg; ++ src_param_init_scalar(&src_params[1], !clamp); ++ src_param_init_vector_from_reg(&src_params[2], &sampler->u.handle.reg); ++ ++ instruction_dst_param_init_ssa_scalar(ins, sm6); ++} ++ + static void sm6_parser_emit_dx_cbuffer_load(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, + const struct sm6_value **operands, struct function_emission_state *state) + { +@@ -5392,6 +5549,7 @@ static const struct sm6_dx_opcode_info sm6_dx_op_table[] = + [DX_BUFFER_LOAD ] = {"o", "Hii", sm6_parser_emit_dx_buffer_load}, + [DX_BUFFER_STORE ] = {"v", "Hiiooooc", sm6_parser_emit_dx_buffer_store}, + [DX_BUFFER_UPDATE_COUNTER ] = {"i", "H8", sm6_parser_emit_dx_buffer_update_counter}, ++ [DX_CALCULATE_LOD ] = {"f", "HHfffb", sm6_parser_emit_dx_calculate_lod}, + [DX_CBUFFER_LOAD_LEGACY ] = {"o", "Hi", sm6_parser_emit_dx_cbuffer_load}, + [DX_COS ] = {"g", "R", sm6_parser_emit_dx_sincos}, + [DX_COUNT_BITS ] = {"i", "m", sm6_parser_emit_dx_unary}, +diff --git a/libs/vkd3d/libs/vkd3d-shader/fx.c b/libs/vkd3d/libs/vkd3d-shader/fx.c +index 466908cd82b..1d90cd70e03 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/fx.c ++++ b/libs/vkd3d/libs/vkd3d-shader/fx.c +@@ -115,6 +115,9 @@ static uint32_t write_string(const char *string, struct fx_write_context *fx) + + static void write_pass(struct hlsl_ir_var *var, struct fx_write_context *fx) + { ++ if (var->state_block_count) ++ hlsl_fixme(fx->ctx, &var->loc, "Write state block assignments."); ++ + fx->ops->write_pass(var, fx); + } + +@@ -401,14 +404,6 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co + uint32_t name_offset, offset, size, stride, numeric_desc; + uint32_t elements_count = 0; + const char *name; +- static const uint32_t variable_type[] = +- { +- [HLSL_CLASS_SCALAR] = 1, +- [HLSL_CLASS_VECTOR] = 1, +- [HLSL_CLASS_MATRIX] = 1, +- [HLSL_CLASS_OBJECT] = 2, +- [HLSL_CLASS_STRUCT] = 3, +- }; + struct hlsl_ctx *ctx = fx->ctx; + + /* Resolve arrays to element type and number of elements. */ +@@ -428,13 +423,19 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co + case HLSL_CLASS_SCALAR: + case HLSL_CLASS_VECTOR: + case HLSL_CLASS_MATRIX: ++ put_u32_unaligned(buffer, 1); ++ break; ++ + case HLSL_CLASS_OBJECT: ++ put_u32_unaligned(buffer, 2); ++ break; ++ + case HLSL_CLASS_STRUCT: +- put_u32_unaligned(buffer, variable_type[type->class]); ++ put_u32_unaligned(buffer, 3); + break; +- default: +- hlsl_fixme(ctx, &ctx->location, "Writing type class %u is not implemented.", type->class); +- return 0; ++ ++ case HLSL_CLASS_ARRAY: ++ vkd3d_unreachable(); + } + + size = stride = type->reg_size[HLSL_REGSET_NUMERIC] * sizeof(float); +@@ -630,7 +631,6 @@ static uint32_t write_fx_2_parameter(const struct hlsl_type *type, const char *n + { + struct vkd3d_bytecode_buffer *buffer = &fx->unstructured; + uint32_t semantic_offset, offset, elements_count = 0, name_offset; +- struct hlsl_ctx *ctx = fx->ctx; + size_t i; + + /* Resolve arrays to element type and number of elements. */ +@@ -643,22 +643,6 @@ static uint32_t write_fx_2_parameter(const struct hlsl_type *type, const char *n + name_offset = write_string(name, fx); + semantic_offset = write_string(semantic->name, fx); + +- switch (type->base_type) +- { +- case HLSL_TYPE_HALF: +- case HLSL_TYPE_FLOAT: +- case HLSL_TYPE_BOOL: +- case HLSL_TYPE_INT: +- case HLSL_TYPE_UINT: +- case HLSL_TYPE_VOID: +- case HLSL_TYPE_TEXTURE: +- break; +- default: +- hlsl_fixme(ctx, &ctx->location, "Writing parameter type %u is not implemented.", +- type->base_type); +- return 0; +- }; +- + offset = put_u32(buffer, hlsl_sm1_base_type(type)); + put_u32(buffer, hlsl_sm1_class(type)); + put_u32(buffer, name_offset); +@@ -688,6 +672,9 @@ static uint32_t write_fx_2_parameter(const struct hlsl_type *type, const char *n + for (i = 0; i < type->e.record.field_count; ++i) + { + const struct hlsl_struct_field *field = &type->e.record.fields[i]; ++ ++ /* Validated in check_invalid_object_fields(). */ ++ assert(hlsl_is_numeric_type(field->type)); + write_fx_2_parameter(field->type, field->name, &field->semantic, fx); + } + } +@@ -746,7 +733,7 @@ static uint32_t write_fx_2_initial_value(const struct hlsl_ir_var *var, struct f + { + struct vkd3d_bytecode_buffer *buffer = &fx->unstructured; + const struct hlsl_type *type = var->data_type; +- uint32_t i, offset, size, elements_count = 1; ++ uint32_t offset, size, elements_count = 1; + + size = get_fx_2_type_size(type); + +@@ -756,63 +743,80 @@ static uint32_t write_fx_2_initial_value(const struct hlsl_ir_var *var, struct f + type = hlsl_get_multiarray_element_type(type); + } + +- if (type->class == HLSL_CLASS_OBJECT) +- { +- /* Objects are given sequential ids. */ +- offset = put_u32(buffer, fx->object_variable_count++); +- for (i = 1; i < elements_count; ++i) +- put_u32(buffer, fx->object_variable_count++); +- } +- else ++ /* Note that struct fields must all be numeric; ++ * this was validated in check_invalid_object_fields(). */ ++ switch (type->class) + { +- /* FIXME: write actual initial value */ +- offset = put_u32(buffer, 0); ++ case HLSL_CLASS_SCALAR: ++ case HLSL_CLASS_VECTOR: ++ case HLSL_CLASS_MATRIX: ++ case HLSL_CLASS_STRUCT: ++ /* FIXME: write actual initial value */ ++ offset = put_u32(buffer, 0); + +- for (i = 1; i < size / sizeof(uint32_t); ++i) +- put_u32(buffer, 0); ++ for (uint32_t i = 1; i < size / sizeof(uint32_t); ++i) ++ put_u32(buffer, 0); ++ break; ++ ++ default: ++ /* Objects are given sequential ids. */ ++ offset = put_u32(buffer, fx->object_variable_count++); ++ for (uint32_t i = 1; i < elements_count; ++i) ++ put_u32(buffer, fx->object_variable_count++); ++ break; + } + + return offset; + } + +-static bool is_type_supported_fx_2(const struct hlsl_type *type) ++static bool is_type_supported_fx_2(struct hlsl_ctx *ctx, const struct hlsl_type *type, ++ const struct vkd3d_shader_location *loc) + { +- type = hlsl_get_multiarray_element_type(type); +- +- if (type->class == HLSL_CLASS_STRUCT) +- return true; +- +- switch (type->base_type) ++ switch (type->class) + { +- case HLSL_TYPE_FLOAT: +- case HLSL_TYPE_HALF: +- case HLSL_TYPE_DOUBLE: +- case HLSL_TYPE_INT: +- case HLSL_TYPE_UINT: +- case HLSL_TYPE_BOOL: +- case HLSL_TYPE_PIXELSHADER: +- case HLSL_TYPE_VERTEXSHADER: +- case HLSL_TYPE_STRING: ++ case HLSL_CLASS_STRUCT: ++ /* Note that the fields must all be numeric; this was validated in ++ * check_invalid_object_fields(). */ + return true; +- case HLSL_TYPE_TEXTURE: +- case HLSL_TYPE_SAMPLER: +- switch (type->sampler_dim) ++ ++ case HLSL_CLASS_SCALAR: ++ case HLSL_CLASS_VECTOR: ++ case HLSL_CLASS_MATRIX: ++ return true; ++ ++ case HLSL_CLASS_ARRAY: ++ return is_type_supported_fx_2(ctx, type->e.array.type, loc); ++ ++ case HLSL_CLASS_OBJECT: ++ switch (type->base_type) + { +- case HLSL_SAMPLER_DIM_1D: +- case HLSL_SAMPLER_DIM_2D: +- case HLSL_SAMPLER_DIM_3D: +- case HLSL_SAMPLER_DIM_CUBE: +- case HLSL_SAMPLER_DIM_GENERIC: +- return true; ++ case HLSL_TYPE_TEXTURE: ++ switch (type->sampler_dim) ++ { ++ case HLSL_SAMPLER_DIM_1D: ++ case HLSL_SAMPLER_DIM_2D: ++ case HLSL_SAMPLER_DIM_3D: ++ case HLSL_SAMPLER_DIM_CUBE: ++ case HLSL_SAMPLER_DIM_GENERIC: ++ return true; ++ default: ++ return false; ++ } ++ break; ++ ++ case HLSL_TYPE_SAMPLER: ++ case HLSL_TYPE_STRING: ++ case HLSL_TYPE_PIXELSHADER: ++ case HLSL_TYPE_VERTEXSHADER: ++ hlsl_fixme(ctx, loc, "Write fx 2.0 parameter object type %#x.", type->base_type); ++ return false; ++ + default: +- ; ++ return false; + } +- break; +- default: +- return false; + } + +- return false; ++ vkd3d_unreachable(); + } + + static void write_fx_2_parameters(struct fx_write_context *fx) +@@ -828,7 +832,7 @@ static void write_fx_2_parameters(struct fx_write_context *fx) + + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { +- if (!is_type_supported_fx_2(var->data_type)) ++ if (!is_type_supported_fx_2(ctx, var->data_type, &var->loc)) + continue; + + desc_offset = write_fx_2_parameter(var->data_type, var->name, &var->semantic, fx); +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.c b/libs/vkd3d/libs/vkd3d-shader/hlsl.c +index 5638a03a8f5..ed80e2b75c8 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.c ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.c +@@ -134,14 +134,39 @@ struct hlsl_ir_var *hlsl_get_var(struct hlsl_scope *scope, const char *name) + return hlsl_get_var(scope->upper, name); + } + +-void hlsl_free_var(struct hlsl_ir_var *decl) ++static void free_state_block_entry(struct hlsl_state_block_entry *entry) ++{ ++ vkd3d_free(entry->name); ++ vkd3d_free(entry->args); ++ hlsl_block_cleanup(entry->instrs); ++ vkd3d_free(entry->instrs); ++ vkd3d_free(entry); ++} ++ ++void hlsl_free_state_block(struct hlsl_state_block *state_block) + { + unsigned int k; + ++ assert(state_block); ++ for (k = 0; k < state_block->count; ++k) ++ free_state_block_entry(state_block->entries[k]); ++ vkd3d_free(state_block->entries); ++ vkd3d_free(state_block); ++} ++ ++void hlsl_free_var(struct hlsl_ir_var *decl) ++{ ++ unsigned int k, i; ++ + vkd3d_free((void *)decl->name); + hlsl_cleanup_semantic(&decl->semantic); + for (k = 0; k <= HLSL_REGSET_LAST_OBJECT; ++k) + vkd3d_free((void *)decl->objects_usage[k]); ++ ++ for (i = 0; i < decl->state_block_count; ++i) ++ hlsl_free_state_block(decl->state_blocks[i]); ++ vkd3d_free(decl->state_blocks); ++ + vkd3d_free(decl); + } + +@@ -1561,6 +1586,27 @@ struct hlsl_ir_node *hlsl_new_swizzle(struct hlsl_ctx *ctx, uint32_t s, unsigned + return &swizzle->node; + } + ++struct hlsl_ir_node *hlsl_new_stateblock_constant(struct hlsl_ctx *ctx, const char *name, ++ struct vkd3d_shader_location *loc) ++{ ++ struct hlsl_ir_stateblock_constant *constant; ++ struct hlsl_type *type = hlsl_get_scalar_type(ctx, HLSL_TYPE_INT); ++ ++ if (!(constant = hlsl_alloc(ctx, sizeof(*constant)))) ++ return NULL; ++ ++ init_node(&constant->node, HLSL_IR_STATEBLOCK_CONSTANT, type, loc); ++ ++ if (!(constant->name = hlsl_alloc(ctx, strlen(name) + 1))) ++ { ++ vkd3d_free(constant); ++ return NULL; ++ } ++ strcpy(constant->name, name); ++ ++ return &constant->node; ++} ++ + bool hlsl_index_is_noncontiguous(struct hlsl_ir_index *index) + { + struct hlsl_type *type = index->val.node->data_type; +@@ -1570,7 +1616,10 @@ bool hlsl_index_is_noncontiguous(struct hlsl_ir_index *index) + + bool hlsl_index_is_resource_access(struct hlsl_ir_index *index) + { +- return index->val.node->data_type->class == HLSL_CLASS_OBJECT; ++ const struct hlsl_type *type = index->val.node->data_type; ++ ++ return type->class == HLSL_CLASS_OBJECT ++ && (type->base_type == HLSL_TYPE_TEXTURE || type->base_type == HLSL_TYPE_UAV); + } + + bool hlsl_index_chain_has_resource_access(struct hlsl_ir_index *index) +@@ -1881,6 +1930,12 @@ static struct hlsl_ir_node *clone_index(struct hlsl_ctx *ctx, struct clone_instr + return dst; + } + ++static struct hlsl_ir_node *clone_stateblock_constant(struct hlsl_ctx *ctx, ++ struct clone_instr_map *map, struct hlsl_ir_stateblock_constant *constant) ++{ ++ return hlsl_new_stateblock_constant(ctx, constant->name, &constant->node.loc); ++} ++ + void hlsl_free_ir_switch_case(struct hlsl_ir_switch_case *c) + { + hlsl_block_cleanup(&c->body); +@@ -1976,6 +2031,9 @@ static struct hlsl_ir_node *clone_instr(struct hlsl_ctx *ctx, + + case HLSL_IR_SWIZZLE: + return clone_swizzle(ctx, map, hlsl_ir_swizzle(instr)); ++ ++ case HLSL_IR_STATEBLOCK_CONSTANT: ++ return clone_stateblock_constant(ctx, map, hlsl_ir_stateblock_constant(instr)); + } + + vkd3d_unreachable(); +@@ -2631,7 +2689,6 @@ const char *debug_hlsl_expr_op(enum hlsl_ir_expr_op op) + + [HLSL_OP3_CMP] = "cmp", + [HLSL_OP3_DP2ADD] = "dp2add", +- [HLSL_OP3_MOVC] = "movc", + [HLSL_OP3_TERNARY] = "ternary", + }; + +@@ -2808,6 +2865,12 @@ static void dump_ir_index(struct vkd3d_string_buffer *buffer, const struct hlsl_ + vkd3d_string_buffer_printf(buffer, "]"); + } + ++static void dump_ir_stateblock_constant(struct vkd3d_string_buffer *buffer, ++ const struct hlsl_ir_stateblock_constant *constant) ++{ ++ vkd3d_string_buffer_printf(buffer, "%s", constant->name); ++} ++ + static void dump_ir_switch(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, const struct hlsl_ir_switch *s) + { + struct hlsl_ir_switch_case *c; +@@ -2896,6 +2959,10 @@ static void dump_instr(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, + case HLSL_IR_SWIZZLE: + dump_ir_swizzle(buffer, hlsl_ir_swizzle(instr)); + break; ++ ++ case HLSL_IR_STATEBLOCK_CONSTANT: ++ dump_ir_stateblock_constant(buffer, hlsl_ir_stateblock_constant(instr)); ++ break; + } + } + +@@ -3068,6 +3135,12 @@ static void free_ir_index(struct hlsl_ir_index *index) + vkd3d_free(index); + } + ++static void free_ir_stateblock_constant(struct hlsl_ir_stateblock_constant *constant) ++{ ++ vkd3d_free(constant->name); ++ vkd3d_free(constant); ++} ++ + void hlsl_free_instr(struct hlsl_ir_node *node) + { + assert(list_empty(&node->uses)); +@@ -3125,6 +3198,10 @@ void hlsl_free_instr(struct hlsl_ir_node *node) + case HLSL_IR_SWITCH: + free_ir_switch(hlsl_ir_switch(node)); + break; ++ ++ case HLSL_IR_STATEBLOCK_CONSTANT: ++ free_ir_stateblock_constant(hlsl_ir_stateblock_constant(node)); ++ break; + } + } + +@@ -3290,7 +3367,9 @@ const struct hlsl_profile_info *hlsl_get_target_info(const char *target) + {"cs_4_0", VKD3D_SHADER_TYPE_COMPUTE, 4, 0, 0, 0, false}, + {"cs_4_1", VKD3D_SHADER_TYPE_COMPUTE, 4, 1, 0, 0, false}, + {"cs_5_0", VKD3D_SHADER_TYPE_COMPUTE, 5, 0, 0, 0, false}, ++ {"cs_5_1", VKD3D_SHADER_TYPE_COMPUTE, 5, 1, 0, 0, false}, + {"ds_5_0", VKD3D_SHADER_TYPE_DOMAIN, 5, 0, 0, 0, false}, ++ {"ds_5_1", VKD3D_SHADER_TYPE_DOMAIN, 5, 1, 0, 0, false}, + {"fx_2_0", VKD3D_SHADER_TYPE_EFFECT, 2, 0, 0, 0, false}, + {"fx_4_0", VKD3D_SHADER_TYPE_EFFECT, 4, 0, 0, 0, false}, + {"fx_4_1", VKD3D_SHADER_TYPE_EFFECT, 4, 1, 0, 0, false}, +@@ -3298,7 +3377,9 @@ const struct hlsl_profile_info *hlsl_get_target_info(const char *target) + {"gs_4_0", VKD3D_SHADER_TYPE_GEOMETRY, 4, 0, 0, 0, false}, + {"gs_4_1", VKD3D_SHADER_TYPE_GEOMETRY, 4, 1, 0, 0, false}, + {"gs_5_0", VKD3D_SHADER_TYPE_GEOMETRY, 5, 0, 0, 0, false}, ++ {"gs_5_1", VKD3D_SHADER_TYPE_GEOMETRY, 5, 1, 0, 0, false}, + {"hs_5_0", VKD3D_SHADER_TYPE_HULL, 5, 0, 0, 0, false}, ++ {"hs_5_1", VKD3D_SHADER_TYPE_HULL, 5, 1, 0, 0, false}, + {"ps.1.0", VKD3D_SHADER_TYPE_PIXEL, 1, 0, 0, 0, false}, + {"ps.1.1", VKD3D_SHADER_TYPE_PIXEL, 1, 1, 0, 0, false}, + {"ps.1.2", VKD3D_SHADER_TYPE_PIXEL, 1, 2, 0, 0, false}, +@@ -3326,6 +3407,7 @@ const struct hlsl_profile_info *hlsl_get_target_info(const char *target) + {"ps_4_0_level_9_3", VKD3D_SHADER_TYPE_PIXEL, 4, 0, 9, 3, false}, + {"ps_4_1", VKD3D_SHADER_TYPE_PIXEL, 4, 1, 0, 0, false}, + {"ps_5_0", VKD3D_SHADER_TYPE_PIXEL, 5, 0, 0, 0, false}, ++ {"ps_5_1", VKD3D_SHADER_TYPE_PIXEL, 5, 1, 0, 0, false}, + {"tx_1_0", VKD3D_SHADER_TYPE_TEXTURE, 1, 0, 0, 0, false}, + {"vs.1.0", VKD3D_SHADER_TYPE_VERTEX, 1, 0, 0, 0, false}, + {"vs.1.1", VKD3D_SHADER_TYPE_VERTEX, 1, 1, 0, 0, false}, +@@ -3347,6 +3429,7 @@ const struct hlsl_profile_info *hlsl_get_target_info(const char *target) + {"vs_4_0_level_9_3", VKD3D_SHADER_TYPE_VERTEX, 4, 0, 9, 3, false}, + {"vs_4_1", VKD3D_SHADER_TYPE_VERTEX, 4, 1, 0, 0, false}, + {"vs_5_0", VKD3D_SHADER_TYPE_VERTEX, 5, 0, 0, 0, false}, ++ {"vs_5_1", VKD3D_SHADER_TYPE_VERTEX, 5, 1, 0, 0, false}, + }; + + for (i = 0; i < ARRAY_SIZE(profiles); ++i) +@@ -3651,6 +3734,21 @@ static void hlsl_ctx_cleanup(struct hlsl_ctx *ctx) + + rb_destroy(&ctx->functions, free_function_rb, NULL); + ++ /* State blocks must be free before the variables, because they contain instructions that may ++ * refer to them. */ ++ LIST_FOR_EACH_ENTRY_SAFE(scope, next_scope, &ctx->scopes, struct hlsl_scope, entry) ++ { ++ LIST_FOR_EACH_ENTRY_SAFE(var, next_var, &scope->vars, struct hlsl_ir_var, scope_entry) ++ { ++ for (i = 0; i < var->state_block_count; ++i) ++ hlsl_free_state_block(var->state_blocks[i]); ++ vkd3d_free(var->state_blocks); ++ var->state_blocks = NULL; ++ var->state_block_count = 0; ++ var->state_block_capacity = 0; ++ } ++ } ++ + LIST_FOR_EACH_ENTRY_SAFE(scope, next_scope, &ctx->scopes, struct hlsl_scope, entry) + { + LIST_FOR_EACH_ENTRY_SAFE(var, next_var, &scope->vars, struct hlsl_ir_var, scope_entry) +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h +index aa9cb14fc8d..4225098bc87 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.h ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h +@@ -298,6 +298,7 @@ enum hlsl_ir_node_type + HLSL_IR_STORE, + HLSL_IR_SWIZZLE, + HLSL_IR_SWITCH, ++ HLSL_IR_STATEBLOCK_CONSTANT, + }; + + /* Common data for every type of IR instruction node. */ +@@ -423,6 +424,14 @@ struct hlsl_ir_var + /* Scope that contains annotations for this variable. */ + struct hlsl_scope *annotations; + ++ /* A dynamic array containing the state block on the variable's declaration, if any. ++ * An array variable may contain multiple state blocks. ++ * A technique pass will always contain one. ++ * These are only really used for effect profiles. */ ++ struct hlsl_state_block **state_blocks; ++ unsigned int state_block_count; ++ size_t state_block_capacity; ++ + /* Indexes of the IR instructions where the variable is first written and last read (liveness + * range). The IR instructions are numerated starting from 2, because 0 means unused, and 1 + * means function entry. */ +@@ -458,6 +467,38 @@ struct hlsl_ir_var + uint32_t is_separated_resource : 1; + }; + ++/* This struct is used to represent assignments in state block entries: ++ * name = {args[0], args[1], ...}; ++ * - or - ++ * name = args[0] ++ * - or - ++ * name[lhs_index] = args[0] ++ * - or - ++ * name[lhs_index] = {args[0], args[1], ...}; ++ */ ++struct hlsl_state_block_entry ++{ ++ /* For assignments, the name in the lhs. */ ++ char *name; ++ ++ /* Whether the lhs in the assignment is indexed and, in that case, its index. */ ++ bool lhs_has_index; ++ unsigned int lhs_index; ++ ++ /* Instructions present in the rhs. */ ++ struct hlsl_block *instrs; ++ ++ /* For assignments, arguments of the rhs initializer. */ ++ struct hlsl_ir_node **args; ++ unsigned int args_count; ++}; ++ ++struct hlsl_state_block ++{ ++ struct hlsl_state_block_entry **entries; ++ size_t count, capacity; ++}; ++ + /* Sized array of variables representing a function's parameters. */ + struct hlsl_func_parameters + { +@@ -601,14 +642,9 @@ enum hlsl_ir_expr_op + /* DP2ADD(a, b, c) computes the scalar product of a.xy and b.xy, + * then adds c. */ + HLSL_OP3_DP2ADD, +- /* MOVC(a, b, c) returns c if a is bitwise zero and b otherwise. +- * TERNARY(a, b, c) returns c if a == 0 and b otherwise. +- * They differ for floating point numbers, because +- * -0.0 == 0.0, but it is not bitwise zero. CMP(a, b, c) returns b +- if a >= 0, and c otherwise. It's used only for SM1-SM3 targets, while +- SM4+ is using MOVC in such cases. */ ++ /* TERNARY(a, b, c) returns 'b' if 'a' is true and 'c' otherwise. 'a' must always be boolean. ++ * CMP(a, b, c) returns 'b' if 'a' >= 0, and 'c' otherwise. It's used only for SM1-SM3 targets. */ + HLSL_OP3_CMP, +- HLSL_OP3_MOVC, + HLSL_OP3_TERNARY, + }; + +@@ -754,6 +790,14 @@ struct hlsl_ir_constant + struct hlsl_reg reg; + }; + ++/* Stateblock constants are undeclared values found on state blocks or technique passes descriptions, ++ * that do not concern regular pixel, vertex, or compute shaders, except for parsing. */ ++struct hlsl_ir_stateblock_constant ++{ ++ struct hlsl_ir_node node; ++ char *name; ++}; ++ + struct hlsl_scope + { + /* Item entry for hlsl_ctx.scopes. */ +@@ -932,6 +976,16 @@ struct hlsl_ctx + bool warn_implicit_truncation; + }; + ++static inline bool hlsl_version_ge(const struct hlsl_ctx *ctx, unsigned int major, unsigned int minor) ++{ ++ return ctx->profile->major_version > major || (ctx->profile->major_version == major && ctx->profile->minor_version >= minor); ++} ++ ++static inline bool hlsl_version_lt(const struct hlsl_ctx *ctx, unsigned int major, unsigned int minor) ++{ ++ return !hlsl_version_ge(ctx, major, minor); ++} ++ + struct hlsl_resource_load_params + { + struct hlsl_type *format; +@@ -1019,6 +1073,12 @@ static inline struct hlsl_ir_switch *hlsl_ir_switch(const struct hlsl_ir_node *n + return CONTAINING_RECORD(node, struct hlsl_ir_switch, node); + } + ++static inline struct hlsl_ir_stateblock_constant *hlsl_ir_stateblock_constant(const struct hlsl_ir_node *node) ++{ ++ assert(node->type == HLSL_IR_STATEBLOCK_CONSTANT); ++ return CONTAINING_RECORD(node, struct hlsl_ir_stateblock_constant, node); ++} ++ + static inline void hlsl_block_init(struct hlsl_block *block) + { + list_init(&block->instrs); +@@ -1211,6 +1271,7 @@ void hlsl_replace_node(struct hlsl_ir_node *old, struct hlsl_ir_node *new); + void hlsl_free_attribute(struct hlsl_attribute *attr); + void hlsl_free_instr(struct hlsl_ir_node *node); + void hlsl_free_instr_list(struct list *list); ++void hlsl_free_state_block(struct hlsl_state_block *state_block); + void hlsl_free_type(struct hlsl_type *type); + void hlsl_free_var(struct hlsl_ir_var *decl); + +@@ -1292,6 +1353,8 @@ struct hlsl_type *hlsl_new_struct_type(struct hlsl_ctx *ctx, const char *name, + struct hlsl_struct_field *fields, size_t field_count); + struct hlsl_ir_node *hlsl_new_swizzle(struct hlsl_ctx *ctx, uint32_t s, unsigned int components, + struct hlsl_ir_node *val, const struct vkd3d_shader_location *loc); ++struct hlsl_ir_node *hlsl_new_stateblock_constant(struct hlsl_ctx *ctx, const char *name, ++ struct vkd3d_shader_location *loc); + struct hlsl_ir_var *hlsl_new_synthetic_var(struct hlsl_ctx *ctx, const char *template, + struct hlsl_type *type, const struct vkd3d_shader_location *loc); + struct hlsl_ir_var *hlsl_new_synthetic_var_named(struct hlsl_ctx *ctx, const char *name, +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.y b/libs/vkd3d/libs/vkd3d-shader/hlsl.y +index e02e0c540f9..8f71556757a 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.y ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.y +@@ -77,6 +77,10 @@ struct parse_variable_def + struct hlsl_type *basic_type; + uint32_t modifiers; + struct vkd3d_shader_location modifiers_loc; ++ ++ struct hlsl_state_block **state_blocks; ++ unsigned int state_block_count; ++ size_t state_block_capacity; + }; + + struct parse_function +@@ -114,6 +118,12 @@ struct parse_attribute_list + const struct hlsl_attribute **attrs; + }; + ++struct state_block_index ++{ ++ bool has_index; ++ unsigned int index; ++}; ++ + } + + %code provides +@@ -931,24 +941,10 @@ static void free_parse_variable_def(struct parse_variable_def *v) + vkd3d_free(v->arrays.sizes); + vkd3d_free(v->name); + hlsl_cleanup_semantic(&v->semantic); ++ assert(!v->state_blocks); + vkd3d_free(v); + } + +-static bool shader_is_sm_5_1(const struct hlsl_ctx *ctx) +-{ +- return ctx->profile->major_version == 5 && ctx->profile->minor_version >= 1; +-} +- +-static bool shader_profile_version_ge(const struct hlsl_ctx *ctx, unsigned int major, unsigned int minor) +-{ +- return ctx->profile->major_version > major || (ctx->profile->major_version == major && ctx->profile->minor_version >= minor); +-} +- +-static bool shader_profile_version_lt(const struct hlsl_ctx *ctx, unsigned int major, unsigned int minor) +-{ +- return !shader_profile_version_ge(ctx, major, minor); +-} +- + static bool gen_struct_fields(struct hlsl_ctx *ctx, struct parse_fields *fields, + struct hlsl_type *type, uint32_t modifiers, struct list *defs) + { +@@ -971,7 +967,7 @@ static bool gen_struct_fields(struct hlsl_ctx *ctx, struct parse_fields *fields, + + field->type = type; + +- if (shader_is_sm_5_1(ctx) && type->class == HLSL_CLASS_OBJECT) ++ if (hlsl_version_ge(ctx, 5, 1) && hlsl_type_is_resource(type)) + { + for (k = 0; k < v->arrays.count; ++k) + unbounded_res_array |= (v->arrays.sizes[k] == HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT); +@@ -1121,7 +1117,7 @@ static bool add_func_parameter(struct hlsl_ctx *ctx, struct hlsl_func_parameters + } + + static bool add_pass(struct hlsl_ctx *ctx, const char *name, struct hlsl_scope *annotations, +- const struct vkd3d_shader_location *loc) ++ struct hlsl_state_block *state_block, const struct vkd3d_shader_location *loc) + { + struct hlsl_ir_var *var; + struct hlsl_type *type; +@@ -1131,6 +1127,11 @@ static bool add_pass(struct hlsl_ctx *ctx, const char *name, struct hlsl_scope * + return false; + var->annotations = annotations; + ++ var->state_blocks = hlsl_alloc(ctx, sizeof(*var->state_blocks)); ++ var->state_blocks[0] = state_block; ++ var->state_block_count = 1; ++ var->state_block_capacity = 1; ++ + if (!hlsl_add_var(ctx, var, false)) + { + struct hlsl_ir_var *old = hlsl_get_var(ctx->cur_scope, var->name); +@@ -1216,7 +1217,7 @@ static struct hlsl_reg_reservation parse_packoffset(struct hlsl_ctx *ctx, const + struct hlsl_reg_reservation reservation = {0}; + char *endptr; + +- if (shader_profile_version_lt(ctx, 4, 0)) ++ if (hlsl_version_lt(ctx, 4, 0)) + return reservation; + + reservation.offset_index = strtoul(reg_string + 1, &endptr, 10); +@@ -1299,6 +1300,7 @@ static unsigned int evaluate_static_expression_as_uint(struct hlsl_ctx *ctx, str + case HLSL_IR_RESOURCE_STORE: + case HLSL_IR_STORE: + case HLSL_IR_SWITCH: ++ case HLSL_IR_STATEBLOCK_CONSTANT: + hlsl_error(ctx, &node->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, + "Expected literal expression."); + } +@@ -2177,7 +2179,7 @@ static void declare_var(struct hlsl_ctx *ctx, struct parse_variable_def *v) + + type = basic_type; + +- if (shader_is_sm_5_1(ctx) && type->class == HLSL_CLASS_OBJECT) ++ if (hlsl_version_ge(ctx, 5, 1) && hlsl_type_is_resource(type)) + { + for (i = 0; i < v->arrays.count; ++i) + unbounded_res_array |= (v->arrays.sizes[i] == HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT); +@@ -2362,8 +2364,25 @@ static struct hlsl_block *initialize_vars(struct hlsl_ctx *ctx, struct list *var + free_parse_variable_def(v); + continue; + } ++ + type = var->data_type; + ++ var->state_blocks = v->state_blocks; ++ var->state_block_count = v->state_block_count; ++ var->state_block_capacity = v->state_block_capacity; ++ v->state_block_count = 0; ++ v->state_block_capacity = 0; ++ v->state_blocks = NULL; ++ ++ if (var->state_blocks && hlsl_type_component_count(type) != var->state_block_count) ++ { ++ hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, ++ "Expected %u state blocks, but got %u.", ++ hlsl_type_component_count(type), var->state_block_count); ++ free_parse_variable_def(v); ++ continue; ++ } ++ + if (v->initializer.args_count) + { + if (v->initializer.braces) +@@ -2663,12 +2682,14 @@ static bool elementwise_intrinsic_convert_args(struct hlsl_ctx *ctx, + static bool elementwise_intrinsic_float_convert_args(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) + { ++ enum hlsl_base_type base_type; + struct hlsl_type *type; + + if (!(type = elementwise_intrinsic_get_common_type(ctx, params, loc))) + return false; + +- type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_FLOAT, type->dimx, type->dimy); ++ base_type = type->base_type == HLSL_TYPE_HALF ? HLSL_TYPE_HALF : HLSL_TYPE_FLOAT; ++ type = hlsl_get_numeric_type(ctx, type->class, base_type, type->dimx, type->dimy); + + return convert_args(ctx, params, type, loc); + } +@@ -2728,81 +2749,62 @@ static bool intrinsic_acos(struct hlsl_ctx *ctx, + return write_acos_or_asin(ctx, params, loc, false); + } + +-static bool intrinsic_all(struct hlsl_ctx *ctx, +- const struct parse_initializer *params, const struct vkd3d_shader_location *loc) ++/* Find the type corresponding to the given source type, with the same ++ * dimensions but a different base type. */ ++static struct hlsl_type *convert_numeric_type(const struct hlsl_ctx *ctx, ++ const struct hlsl_type *type, enum hlsl_base_type base_type) + { +- struct hlsl_ir_node *arg = params->args[0], *mul, *one, *zero, *load; ++ return hlsl_get_numeric_type(ctx, type->class, base_type, type->dimx, type->dimy); ++} ++ ++static bool add_combine_components(struct hlsl_ctx *ctx, const struct parse_initializer *params, ++ struct hlsl_ir_node *arg, enum hlsl_ir_expr_op op, const struct vkd3d_shader_location *loc) ++{ ++ struct hlsl_ir_node *res, *load; + unsigned int i, count; + +- if (!(one = hlsl_new_float_constant(ctx, 1.0f, loc))) +- return false; +- hlsl_block_add_instr(params->instrs, one); ++ count = hlsl_type_component_count(arg->data_type); + +- if (!(zero = hlsl_new_float_constant(ctx, 0.0f, loc))) ++ if (!(res = hlsl_add_load_component(ctx, params->instrs, arg, 0, loc))) + return false; +- hlsl_block_add_instr(params->instrs, zero); + +- mul = one; +- +- count = hlsl_type_component_count(arg->data_type); +- for (i = 0; i < count; ++i) ++ for (i = 1; i < count; ++i) + { + if (!(load = hlsl_add_load_component(ctx, params->instrs, arg, i, loc))) + return false; + +- if (!(mul = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, load, mul, loc))) +- return false; ++ if (!(res = hlsl_new_binary_expr(ctx, op, res, load))) ++ return NULL; ++ hlsl_block_add_instr(params->instrs, res); + } + +- return !!add_binary_comparison_expr(ctx, params->instrs, HLSL_OP2_NEQUAL, mul, zero, loc); ++ return true; + } + +-static bool intrinsic_any(struct hlsl_ctx *ctx, ++static bool intrinsic_all(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) + { +- struct hlsl_ir_node *arg = params->args[0], *dot, *or, *zero, *bfalse, *load; +- unsigned int i, count; ++ struct hlsl_ir_node *arg = params->args[0], *cast; ++ struct hlsl_type *bool_type; + +- if (arg->data_type->class != HLSL_CLASS_VECTOR && arg->data_type->class != HLSL_CLASS_SCALAR) +- { +- hlsl_fixme(ctx, loc, "any() implementation for non-vector, non-scalar"); ++ bool_type = convert_numeric_type(ctx, arg->data_type, HLSL_TYPE_BOOL); ++ if (!(cast = add_cast(ctx, params->instrs, arg, bool_type, loc))) + return false; +- } +- +- if (arg->data_type->base_type == HLSL_TYPE_FLOAT) +- { +- if (!(zero = hlsl_new_float_constant(ctx, 0.0f, loc))) +- return false; +- hlsl_block_add_instr(params->instrs, zero); +- +- if (!(dot = add_binary_dot_expr(ctx, params->instrs, arg, arg, loc))) +- return false; + +- return !!add_binary_comparison_expr(ctx, params->instrs, HLSL_OP2_NEQUAL, dot, zero, loc); +- } +- else if (arg->data_type->base_type == HLSL_TYPE_BOOL) +- { +- if (!(bfalse = hlsl_new_bool_constant(ctx, false, loc))) +- return false; +- hlsl_block_add_instr(params->instrs, bfalse); +- +- or = bfalse; +- +- count = hlsl_type_component_count(arg->data_type); +- for (i = 0; i < count; ++i) +- { +- if (!(load = hlsl_add_load_component(ctx, params->instrs, arg, i, loc))) +- return false; ++ return add_combine_components(ctx, params, cast, HLSL_OP2_LOGIC_AND, loc); ++} + +- if (!(or = add_binary_bitwise_expr(ctx, params->instrs, HLSL_OP2_BIT_OR, or, load, loc))) +- return false; +- } ++static bool intrinsic_any(struct hlsl_ctx *ctx, const struct parse_initializer *params, ++ const struct vkd3d_shader_location *loc) ++{ ++ struct hlsl_ir_node *arg = params->args[0], *cast; ++ struct hlsl_type *bool_type; + +- return true; +- } ++ bool_type = convert_numeric_type(ctx, arg->data_type, HLSL_TYPE_BOOL); ++ if (!(cast = add_cast(ctx, params->instrs, arg, bool_type, loc))) ++ return false; + +- hlsl_fixme(ctx, loc, "any() implementation for non-float, non-bool"); +- return false; ++ return add_combine_components(ctx, params, cast, HLSL_OP2_LOGIC_OR, loc); + } + + static bool intrinsic_asin(struct hlsl_ctx *ctx, +@@ -2870,20 +2872,20 @@ static bool write_atan_or_atan2(struct hlsl_ctx *ctx, + type->name, type->name, type->name); + if (ret < 0) + { +- vkd3d_string_buffer_cleanup(buf); ++ hlsl_release_string_buffer(ctx, buf); + return false; + } + + ret = vkd3d_string_buffer_printf(buf, body_template, type->name); + if (ret < 0) + { +- vkd3d_string_buffer_cleanup(buf); ++ hlsl_release_string_buffer(ctx, buf); + return false; + } + + func = hlsl_compile_internal_function(ctx, + atan2_mode ? atan2_name : atan_name, buf->buffer); +- vkd3d_string_buffer_cleanup(buf); ++ hlsl_release_string_buffer(ctx, buf); + if (!func) + return false; + +@@ -2903,15 +2905,6 @@ static bool intrinsic_atan2(struct hlsl_ctx *ctx, + return write_atan_or_atan2(ctx, params, loc, true); + } + +- +-/* Find the type corresponding to the given source type, with the same +- * dimensions but a different base type. */ +-static struct hlsl_type *convert_numeric_type(const struct hlsl_ctx *ctx, +- const struct hlsl_type *type, enum hlsl_base_type base_type) +-{ +- return hlsl_get_numeric_type(ctx, type->class, base_type, type->dimx, type->dimy); +-} +- + static bool intrinsic_asfloat(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) + { +@@ -3035,6 +3028,46 @@ static bool intrinsic_cos(struct hlsl_ctx *ctx, + return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_COS, arg, loc); + } + ++static bool write_cosh_or_sinh(struct hlsl_ctx *ctx, ++ const struct parse_initializer *params, const struct vkd3d_shader_location *loc, bool sinh_mode) ++{ ++ struct hlsl_ir_function_decl *func; ++ struct hlsl_ir_node *arg; ++ const char *fn_name, *type_name; ++ char *body; ++ ++ static const char template[] = ++ "%s %s(%s x)\n" ++ "{\n" ++ " return (exp(x) %s exp(-x)) / 2;\n" ++ "}\n"; ++ static const char fn_name_sinh[] = "sinh"; ++ static const char fn_name_cosh[] = "cosh"; ++ ++ if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) ++ return false; ++ ++ type_name = arg->data_type->name; ++ fn_name = sinh_mode ? fn_name_sinh : fn_name_cosh; ++ ++ if (!(body = hlsl_sprintf_alloc(ctx, template, ++ type_name, fn_name, type_name, sinh_mode ? "-" : "+"))) ++ return false; ++ ++ func = hlsl_compile_internal_function(ctx, fn_name, body); ++ vkd3d_free(body); ++ if (!func) ++ return false; ++ ++ return add_user_call(ctx, func, params, loc); ++} ++ ++static bool intrinsic_cosh(struct hlsl_ctx *ctx, ++ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) ++{ ++ return write_cosh_or_sinh(ctx, params, loc, false); ++} ++ + static bool intrinsic_cross(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) + { +@@ -3747,6 +3780,59 @@ static bool intrinsic_reflect(struct hlsl_ctx *ctx, + return !!add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_ADD, i, neg, loc); + } + ++static bool intrinsic_refract(struct hlsl_ctx *ctx, ++ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) ++{ ++ struct hlsl_type *r_type = params->args[0]->data_type; ++ struct hlsl_type *n_type = params->args[1]->data_type; ++ struct hlsl_type *i_type = params->args[2]->data_type; ++ struct hlsl_type *res_type, *idx_type, *scal_type; ++ struct parse_initializer mut_params; ++ struct hlsl_ir_function_decl *func; ++ enum hlsl_base_type base; ++ char *body; ++ ++ static const char template[] = ++ "%s refract(%s r, %s n, %s i)\n" ++ "{\n" ++ " %s d, t;\n" ++ " d = dot(r, n);\n" ++ " t = 1 - i.x * i.x * (1 - d * d);\n" ++ " return t >= 0.0 ? i.x * r - (i.x * d + sqrt(t)) * n : 0;\n" ++ "}"; ++ ++ if (r_type->class == HLSL_CLASS_MATRIX ++ || n_type->class == HLSL_CLASS_MATRIX ++ || i_type->class == HLSL_CLASS_MATRIX) ++ { ++ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Matrix arguments are not supported."); ++ return false; ++ } ++ ++ assert(params->args_count == 3); ++ mut_params = *params; ++ mut_params.args_count = 2; ++ if (!(res_type = elementwise_intrinsic_get_common_type(ctx, &mut_params, loc))) ++ return false; ++ ++ base = expr_common_base_type(res_type->base_type, i_type->base_type); ++ base = base == HLSL_TYPE_HALF ? HLSL_TYPE_HALF : HLSL_TYPE_FLOAT; ++ res_type = convert_numeric_type(ctx, res_type, base); ++ idx_type = convert_numeric_type(ctx, i_type, base); ++ scal_type = hlsl_get_scalar_type(ctx, base); ++ ++ if (!(body = hlsl_sprintf_alloc(ctx, template, res_type->name, res_type->name, ++ res_type->name, idx_type->name, scal_type->name))) ++ return false; ++ ++ func = hlsl_compile_internal_function(ctx, "refract", body); ++ vkd3d_free(body); ++ if (!func) ++ return false; ++ ++ return add_user_call(ctx, func, params, loc); ++} ++ + static bool intrinsic_round(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) + { +@@ -3827,6 +3913,12 @@ static bool intrinsic_sin(struct hlsl_ctx *ctx, + return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_SIN, arg, loc); + } + ++static bool intrinsic_sinh(struct hlsl_ctx *ctx, ++ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) ++{ ++ return write_cosh_or_sinh(ctx, params, loc, true); ++} ++ + /* smoothstep(a, b, x) = p^2 (3 - 2p), where p = saturate((x - a)/(b - a)) */ + static bool intrinsic_smoothstep(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +@@ -3899,6 +3991,39 @@ static bool intrinsic_tan(struct hlsl_ctx *ctx, + return !!add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_DIV, sin, cos, loc); + } + ++static bool intrinsic_tanh(struct hlsl_ctx *ctx, ++ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) ++{ ++ struct hlsl_ir_function_decl *func; ++ struct hlsl_ir_node *arg; ++ struct hlsl_type *type; ++ char *body; ++ ++ static const char template[] = ++ "%s tanh(%s x)\n" ++ "{\n" ++ " %s exp_pos, exp_neg;\n" ++ " exp_pos = exp(x);\n" ++ " exp_neg = exp(-x);\n" ++ " return (exp_pos - exp_neg) / (exp_pos + exp_neg);\n" ++ "}\n"; ++ ++ if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) ++ return false; ++ type = arg->data_type; ++ ++ if (!(body = hlsl_sprintf_alloc(ctx, template, ++ type->name, type->name, type->name))) ++ return false; ++ ++ func = hlsl_compile_internal_function(ctx, "tanh", body); ++ vkd3d_free(body); ++ if (!func) ++ return false; ++ ++ return add_user_call(ctx, func, params, loc); ++} ++ + static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer *params, + const struct vkd3d_shader_location *loc, const char *name, enum hlsl_sampler_dim dim) + { +@@ -3967,7 +4092,7 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * + return false; + } + +- if (shader_profile_version_ge(ctx, 4, 0)) ++ if (hlsl_version_ge(ctx, 4, 0)) + { + unsigned int count = hlsl_sampler_dim_count(dim); + struct hlsl_ir_node *divisor; +@@ -4014,7 +4139,7 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * + return false; + + initialize_var_components(ctx, params->instrs, var, &idx, coords); +- if (shader_profile_version_ge(ctx, 4, 0)) ++ if (hlsl_version_ge(ctx, 4, 0)) + { + if (!(half = hlsl_new_float_constant(ctx, 0.5f, loc))) + return false; +@@ -4200,7 +4325,7 @@ static bool intrinsic_d3dcolor_to_ubyte4(struct hlsl_ctx *ctx, + if (!(ret = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, arg, c, loc))) + return false; + +- if (shader_profile_version_ge(ctx, 4, 0)) ++ if (hlsl_version_ge(ctx, 4, 0)) + return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_TRUNC, ret, loc); + + return true; +@@ -4231,6 +4356,7 @@ intrinsic_functions[] = + {"clamp", 3, true, intrinsic_clamp}, + {"clip", 1, true, intrinsic_clip}, + {"cos", 1, true, intrinsic_cos}, ++ {"cosh", 1, true, intrinsic_cosh}, + {"cross", 2, true, intrinsic_cross}, + {"ddx", 1, true, intrinsic_ddx}, + {"ddx_coarse", 1, true, intrinsic_ddx_coarse}, +@@ -4262,15 +4388,18 @@ intrinsic_functions[] = + {"pow", 2, true, intrinsic_pow}, + {"radians", 1, true, intrinsic_radians}, + {"reflect", 2, true, intrinsic_reflect}, ++ {"refract", 3, true, intrinsic_refract}, + {"round", 1, true, intrinsic_round}, + {"rsqrt", 1, true, intrinsic_rsqrt}, + {"saturate", 1, true, intrinsic_saturate}, + {"sign", 1, true, intrinsic_sign}, + {"sin", 1, true, intrinsic_sin}, ++ {"sinh", 1, true, intrinsic_sinh}, + {"smoothstep", 3, true, intrinsic_smoothstep}, + {"sqrt", 1, true, intrinsic_sqrt}, + {"step", 2, true, intrinsic_step}, + {"tan", 1, true, intrinsic_tan}, ++ {"tanh", 1, true, intrinsic_tanh}, + {"tex1D", -1, false, intrinsic_tex1D}, + {"tex2D", -1, false, intrinsic_tex2D}, + {"tex2Dlod", 2, false, intrinsic_tex2Dlod}, +@@ -4405,26 +4534,34 @@ static bool add_ternary(struct hlsl_ctx *ctx, struct hlsl_block *block, + if (!(cond = add_implicit_conversion(ctx, block, cond, cond_type, &cond->loc))) + return false; + } +- else if (common_type->dimx == 1 && common_type->dimy == 1) +- { +- common_type = hlsl_get_numeric_type(ctx, cond_type->class, +- common_type->base_type, cond_type->dimx, cond_type->dimy); +- } +- else if (cond_type->dimx != common_type->dimx || cond_type->dimy != common_type->dimy) ++ else + { +- /* This condition looks wrong but is correct. +- * floatN is compatible with float1xN, but not with floatNx1. */ +- +- struct vkd3d_string_buffer *cond_string, *value_string; ++ cond_type = hlsl_get_numeric_type(ctx, cond_type->class, HLSL_TYPE_BOOL, ++ cond_type->dimx, cond_type->dimy); ++ if (!(cond = add_implicit_conversion(ctx, block, cond, cond_type, &cond->loc))) ++ return false; + +- cond_string = hlsl_type_to_string(ctx, cond_type); +- value_string = hlsl_type_to_string(ctx, common_type); +- if (cond_string && value_string) +- hlsl_error(ctx, &first->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, +- "Ternary condition type '%s' is not compatible with value type '%s'.", +- cond_string->buffer, value_string->buffer); +- hlsl_release_string_buffer(ctx, cond_string); +- hlsl_release_string_buffer(ctx, value_string); ++ if (common_type->dimx == 1 && common_type->dimy == 1) ++ { ++ common_type = hlsl_get_numeric_type(ctx, cond_type->class, ++ common_type->base_type, cond_type->dimx, cond_type->dimy); ++ } ++ else if (cond_type->dimx != common_type->dimx || cond_type->dimy != common_type->dimy) ++ { ++ /* This condition looks wrong but is correct. ++ * floatN is compatible with float1xN, but not with floatNx1. */ ++ ++ struct vkd3d_string_buffer *cond_string, *value_string; ++ ++ cond_string = hlsl_type_to_string(ctx, cond_type); ++ value_string = hlsl_type_to_string(ctx, common_type); ++ if (cond_string && value_string) ++ hlsl_error(ctx, &first->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, ++ "Ternary condition type '%s' is not compatible with value type '%s'.", ++ cond_string->buffer, value_string->buffer); ++ hlsl_release_string_buffer(ctx, cond_string); ++ hlsl_release_string_buffer(ctx, value_string); ++ } + } + + if (!(first = add_implicit_conversion(ctx, block, first, common_type, &first->loc))) +@@ -4449,9 +4586,16 @@ static bool add_ternary(struct hlsl_ctx *ctx, struct hlsl_block *block, + hlsl_release_string_buffer(ctx, second_string); + } + ++ cond_type = hlsl_get_numeric_type(ctx, cond_type->class, HLSL_TYPE_BOOL, ++ cond_type->dimx, cond_type->dimy); ++ if (!(cond = add_implicit_conversion(ctx, block, cond, cond_type, &cond->loc))) ++ return false; ++ + common_type = first->data_type; + } + ++ assert(cond->data_type->base_type == HLSL_TYPE_BOOL); ++ + args[0] = cond; + args[1] = first; + args[2] = second; +@@ -5280,6 +5424,16 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, + hlsl_release_string_buffer(ctx, string); + } + ++static bool state_block_add_entry(struct hlsl_state_block *state_block, struct hlsl_state_block_entry *entry) ++{ ++ if (!vkd3d_array_reserve((void **)&state_block->entries, &state_block->capacity, state_block->count + 1, ++ sizeof(*state_block->entries))) ++ return false; ++ ++ state_block->entries[state_block->count++] = entry; ++ return true; ++} ++ + } + + %locations +@@ -5320,6 +5474,8 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, + struct parse_attribute_list attr_list; + struct hlsl_ir_switch_case *switch_case; + struct hlsl_scope *scope; ++ struct hlsl_state_block *state_block; ++ struct state_block_index state_block_index; + } + + %token KW_BLENDSTATE +@@ -5519,6 +5675,7 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, + + %type any_identifier + %type var_identifier ++%type stateblock_lhs_identifier + %type name_opt + + %type parameter +@@ -5533,6 +5690,10 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, + + %type semantic + ++%type state_block ++ ++%type state_block_index_opt ++ + %type switch_case + + %type field_type +@@ -5543,6 +5704,7 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, + %type type_no_void + %type typedef_type + ++%type state_block_list + %type type_spec + %type variable_decl + %type variable_def +@@ -5573,9 +5735,9 @@ name_opt: + | any_identifier + + pass: +- KW_PASS name_opt annotations_opt '{' '}' ++ KW_PASS name_opt annotations_opt '{' state_block_start state_block '}' + { +- if (!add_pass(ctx, $2, $3, &@1)) ++ if (!add_pass(ctx, $2, $3, $6, &@1)) + YYABORT; + } + +@@ -6474,7 +6636,7 @@ type_no_void: + { + validate_texture_format_type(ctx, $3, &@3); + +- if (shader_profile_version_lt(ctx, 4, 1)) ++ if (hlsl_version_lt(ctx, 4, 1)) + { + hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Multisampled texture object declaration needs sample count for profile %s.", ctx->profile->name); +@@ -6513,7 +6675,7 @@ type_no_void: + $$ = hlsl_get_type(ctx->cur_scope, $1, true, true); + if ($$->is_minimum_precision) + { +- if (shader_profile_version_lt(ctx, 4, 0)) ++ if (hlsl_version_lt(ctx, 4, 0)) + { + hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Target profile doesn't support minimum-precision types."); +@@ -6677,22 +6839,91 @@ variable_decl: + $$->reg_reservation = $3.reg_reservation; + } + +-state: +- any_identifier '=' expr ';' ++state_block_start: ++ %empty + { +- vkd3d_free($1); +- destroy_block($3); ++ ctx->in_state_block = 1; + } + +-state_block_start: ++stateblock_lhs_identifier: ++ any_identifier ++ { ++ $$ = $1; ++ } ++ | KW_PIXELSHADER ++ { ++ if (!($$ = hlsl_strdup(ctx, "pixelshader"))) ++ YYABORT; ++ } ++ | KW_VERTEXSHADER ++ { ++ if (!($$ = hlsl_strdup(ctx, "vertexshader"))) ++ YYABORT; ++ } ++ ++state_block_index_opt: + %empty + { +- ctx->in_state_block = 1; ++ $$.has_index = false; ++ $$.index = 0; + } ++ | '[' C_INTEGER ']' ++ { ++ if ($2 < 0) ++ { ++ hlsl_error(ctx, &@2, VKD3D_SHADER_ERROR_HLSL_INVALID_INDEX, ++ "State block array index is not a positive integer constant."); ++ YYABORT; ++ } ++ $$.has_index = true; ++ $$.index = $2; ++ } + + state_block: + %empty +- | state_block state ++ { ++ if (!($$ = hlsl_alloc(ctx, sizeof(*$$)))) ++ YYABORT; ++ } ++ | state_block stateblock_lhs_identifier state_block_index_opt '=' complex_initializer ';' ++ { ++ struct hlsl_state_block_entry *entry; ++ ++ if (!(entry = hlsl_alloc(ctx, sizeof(*entry)))) ++ YYABORT; ++ ++ entry->name = $2; ++ entry->lhs_has_index = $3.has_index; ++ entry->lhs_index = $3.index; ++ ++ entry->instrs = $5.instrs; ++ entry->args = $5.args; ++ entry->args_count = $5.args_count; ++ ++ $$ = $1; ++ state_block_add_entry($$, entry); ++ } ++ ++state_block_list: ++ '{' state_block '}' ++ { ++ if (!($$ = hlsl_alloc(ctx, sizeof(*$$)))) ++ YYABORT; ++ ++ if(!(vkd3d_array_reserve((void **)&$$->state_blocks, &$$->state_block_capacity, ++ $$->state_block_count + 1, sizeof(*$$->state_blocks)))) ++ YYABORT; ++ $$->state_blocks[$$->state_block_count++] = $2; ++ } ++ | state_block_list ',' '{' state_block '}' ++ { ++ $$ = $1; ++ ++ if(!(vkd3d_array_reserve((void **)&$$->state_blocks, &$$->state_block_capacity, ++ $$->state_block_count + 1, sizeof(*$$->state_blocks)))) ++ YYABORT; ++ $$->state_blocks[$$->state_block_count++] = $4; ++ } + + variable_def: + variable_decl +@@ -6705,6 +6936,24 @@ variable_def: + { + $$ = $1; + ctx->in_state_block = 0; ++ ++ if(!(vkd3d_array_reserve((void **)&$$->state_blocks, &$$->state_block_capacity, ++ $$->state_block_count + 1, sizeof(*$$->state_blocks)))) ++ YYABORT; ++ $$->state_blocks[$$->state_block_count++] = $4; ++ } ++ | variable_decl '{' state_block_start state_block_list '}' ++ { ++ $$ = $1; ++ ctx->in_state_block = 0; ++ ++ $$->state_blocks = $4->state_blocks; ++ $$->state_block_count = $4->state_block_count; ++ $$->state_block_capacity = $4->state_block_capacity; ++ $4->state_blocks = NULL; ++ $4->state_block_count = 0; ++ $4->state_block_capacity = 0; ++ free_parse_variable_def($4); + } + + variable_def_typed: +@@ -7330,15 +7579,13 @@ primary_expr: + { + if (ctx->in_state_block) + { +- struct hlsl_ir_load *load; +- struct hlsl_ir_var *var; ++ struct hlsl_ir_node *constant; + +- if (!(var = hlsl_new_synthetic_var(ctx, "state_block_expr", +- hlsl_get_scalar_type(ctx, HLSL_TYPE_INT), &@1))) ++ if (!(constant = hlsl_new_stateblock_constant(ctx, $1, &@1))) + YYABORT; +- if (!(load = hlsl_new_var_load(ctx, var, &@1))) +- YYABORT; +- if (!($$ = make_block(ctx, &load->node))) ++ vkd3d_free($1); ++ ++ if (!($$ = make_block(ctx, constant))) + YYABORT; + } + else +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c +index 6f2de93767b..eaa72836d8a 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c +@@ -1565,7 +1565,7 @@ static bool copy_propagation_replace_with_single_instr(struct hlsl_ctx *ctx, + var->name, start, start + count, debug_hlsl_swizzle(swizzle, instr_component_count), + new_instr, debug_hlsl_swizzle(ret_swizzle, instr_component_count)); + +- if (instr->data_type->class != HLSL_CLASS_OBJECT) ++ if (new_instr->data_type->class == HLSL_CLASS_SCALAR || new_instr->data_type->class == HLSL_CLASS_VECTOR) + { + struct hlsl_ir_node *swizzle_node; + +@@ -1742,7 +1742,7 @@ static void copy_propagation_record_store(struct hlsl_ctx *ctx, struct hlsl_ir_s + { + unsigned int writemask = store->writemask; + +- if (store->rhs.node->data_type->class == HLSL_CLASS_OBJECT) ++ if (!hlsl_is_numeric_type(store->rhs.node->data_type)) + writemask = VKD3DSP_WRITEMASK_0; + copy_propagation_set_value(ctx, var_def, start, writemask, store->rhs.node, store->node.index); + } +@@ -2954,12 +2954,11 @@ static bool lower_logic_not(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, st + return true; + } + +-/* Use movc/cmp for the ternary operator. */ ++/* Lower TERNARY to CMP for SM1. */ + static bool lower_ternary(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) + { + struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = { 0 }, *replacement; +- struct hlsl_ir_node *zero, *cond, *first, *second; +- struct hlsl_constant_value zero_value = { 0 }; ++ struct hlsl_ir_node *cond, *first, *second, *float_cond, *neg; + struct hlsl_ir_expr *expr; + struct hlsl_type *type; + +@@ -2980,48 +2979,25 @@ static bool lower_ternary(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, stru + return false; + } + +- if (ctx->profile->major_version < 4) +- { +- struct hlsl_ir_node *abs, *neg; ++ assert(cond->data_type->base_type == HLSL_TYPE_BOOL); + +- if (!(abs = hlsl_new_unary_expr(ctx, HLSL_OP1_ABS, cond, &instr->loc))) +- return false; +- hlsl_block_add_instr(block, abs); ++ type = hlsl_get_numeric_type(ctx, instr->data_type->class, HLSL_TYPE_FLOAT, ++ instr->data_type->dimx, instr->data_type->dimy); + +- if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, abs, &instr->loc))) +- return false; +- hlsl_block_add_instr(block, neg); +- +- operands[0] = neg; +- operands[1] = second; +- operands[2] = first; +- if (!(replacement = hlsl_new_expr(ctx, HLSL_OP3_CMP, operands, first->data_type, &instr->loc))) +- return false; +- } +- else +- { +- if (cond->data_type->base_type == HLSL_TYPE_FLOAT) +- { +- if (!(zero = hlsl_new_constant(ctx, cond->data_type, &zero_value, &instr->loc))) +- return false; +- hlsl_block_add_instr(block, zero); ++ if (!(float_cond = hlsl_new_cast(ctx, cond, type, &instr->loc))) ++ return false; ++ hlsl_block_add_instr(block, float_cond); + +- operands[0] = zero; +- operands[1] = cond; +- type = cond->data_type; +- type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_BOOL, type->dimx, type->dimy); +- if (!(cond = hlsl_new_expr(ctx, HLSL_OP2_NEQUAL, operands, type, &instr->loc))) +- return false; +- hlsl_block_add_instr(block, cond); +- } ++ if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, float_cond, &instr->loc))) ++ return false; ++ hlsl_block_add_instr(block, neg); + +- memset(operands, 0, sizeof(operands)); +- operands[0] = cond; +- operands[1] = first; +- operands[2] = second; +- if (!(replacement = hlsl_new_expr(ctx, HLSL_OP3_MOVC, operands, first->data_type, &instr->loc))) +- return false; +- } ++ memset(operands, 0, sizeof(operands)); ++ operands[0] = neg; ++ operands[1] = second; ++ operands[2] = first; ++ if (!(replacement = hlsl_new_expr(ctx, HLSL_OP3_CMP, operands, first->data_type, &instr->loc))) ++ return false; + + hlsl_block_add_instr(block, replacement); + return true; +@@ -3319,11 +3295,21 @@ static bool lower_casts_to_bool(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr + struct hlsl_ir_node *hlsl_add_conditional(struct hlsl_ctx *ctx, struct hlsl_block *instrs, + struct hlsl_ir_node *condition, struct hlsl_ir_node *if_true, struct hlsl_ir_node *if_false) + { ++ struct hlsl_type *cond_type = condition->data_type; + struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS]; + struct hlsl_ir_node *cond; + + assert(hlsl_types_are_equal(if_true->data_type, if_false->data_type)); + ++ if (cond_type->base_type != HLSL_TYPE_BOOL) ++ { ++ cond_type = hlsl_get_numeric_type(ctx, cond_type->class, HLSL_TYPE_BOOL, cond_type->dimx, cond_type->dimy); ++ ++ if (!(condition = hlsl_new_cast(ctx, condition, cond_type, &condition->loc))) ++ return NULL; ++ hlsl_block_add_instr(instrs, condition); ++ } ++ + operands[0] = condition; + operands[1] = if_true; + operands[2] = if_false; +@@ -3760,6 +3746,9 @@ static bool dce(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) + case HLSL_IR_RESOURCE_STORE: + case HLSL_IR_SWITCH: + break; ++ case HLSL_IR_STATEBLOCK_CONSTANT: ++ /* Stateblock constants should not appear in the shader program. */ ++ vkd3d_unreachable(); + } + + return false; +@@ -3848,6 +3837,22 @@ static void allocate_register_reservations(struct hlsl_ctx *ctx) + } + } + ++static void deref_mark_last_read(struct hlsl_deref *deref, unsigned int last_read) ++{ ++ unsigned int i; ++ ++ if (hlsl_deref_is_lowered(deref)) ++ { ++ if (deref->rel_offset.node) ++ deref->rel_offset.node->last_read = last_read; ++ } ++ else ++ { ++ for (i = 0; i < deref->path_len; ++i) ++ deref->path[i].node->last_read = last_read; ++ } ++} ++ + /* Compute the earliest and latest liveness for each variable. In the case that + * a variable is accessed inside of a loop, we promote its liveness to extend + * to at least the range of the entire loop. We also do this for nodes, so that +@@ -3867,6 +3872,9 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop + case HLSL_IR_CALL: + /* We should have inlined all calls before computing liveness. */ + vkd3d_unreachable(); ++ case HLSL_IR_STATEBLOCK_CONSTANT: ++ /* Stateblock constants should not appear in the shader program. */ ++ vkd3d_unreachable(); + + case HLSL_IR_STORE: + { +@@ -3876,8 +3884,7 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop + if (!var->first_write) + var->first_write = loop_first ? min(instr->index, loop_first) : instr->index; + store->rhs.node->last_read = last_read; +- if (store->lhs.rel_offset.node) +- store->lhs.rel_offset.node->last_read = last_read; ++ deref_mark_last_read(&store->lhs, last_read); + break; + } + case HLSL_IR_EXPR: +@@ -3904,8 +3911,7 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop + + var = load->src.var; + var->last_read = max(var->last_read, last_read); +- if (load->src.rel_offset.node) +- load->src.rel_offset.node->last_read = last_read; ++ deref_mark_last_read(&load->src, last_read); + break; + } + case HLSL_IR_LOOP: +@@ -3922,14 +3928,12 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop + + var = load->resource.var; + var->last_read = max(var->last_read, last_read); +- if (load->resource.rel_offset.node) +- load->resource.rel_offset.node->last_read = last_read; ++ deref_mark_last_read(&load->resource, last_read); + + if ((var = load->sampler.var)) + { + var->last_read = max(var->last_read, last_read); +- if (load->sampler.rel_offset.node) +- load->sampler.rel_offset.node->last_read = last_read; ++ deref_mark_last_read(&load->sampler, last_read); + } + + if (load->coords.node) +@@ -3954,8 +3958,7 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop + + var = store->resource.var; + var->last_read = max(var->last_read, last_read); +- if (store->resource.rel_offset.node) +- store->resource.rel_offset.node->last_read = last_read; ++ deref_mark_last_read(&store->resource, last_read); + store->coords.node->last_read = last_read; + store->value.node->last_read = last_read; + break; +@@ -4790,7 +4793,9 @@ static void validate_buffer_offsets(struct hlsl_ctx *ctx) + continue; + + if (var1->reg_reservation.offset_type +- || (var1->data_type->class == HLSL_CLASS_OBJECT && var1->reg_reservation.reg_type)) ++ || var1->reg_reservation.reg_type == 's' ++ || var1->reg_reservation.reg_type == 't' ++ || var1->reg_reservation.reg_type == 'u') + buffer->manually_packed_elements = true; + else + buffer->automatically_packed_elements = true; +@@ -5400,11 +5405,11 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry + hlsl_transform_ir(ctx, split_matrix_copies, body, NULL); + + lower_ir(ctx, lower_narrowing_casts, body); +- lower_ir(ctx, lower_casts_to_bool, body); + lower_ir(ctx, lower_int_dot, body); + lower_ir(ctx, lower_int_division, body); + lower_ir(ctx, lower_int_modulus, body); + lower_ir(ctx, lower_int_abs, body); ++ lower_ir(ctx, lower_casts_to_bool, body); + lower_ir(ctx, lower_float_modulus, body); + hlsl_transform_ir(ctx, fold_redundant_casts, body, NULL); + do +@@ -5420,13 +5425,6 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry + remove_unreachable_code(ctx, body); + hlsl_transform_ir(ctx, normalize_switch_cases, body, NULL); + +- if (profile-> major_version < 4) +- { +- lower_ir(ctx, lower_nonfloat_exprs, body); +- /* Constants casted to float must be folded. */ +- hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, body, NULL); +- } +- + lower_ir(ctx, lower_nonconstant_vector_derefs, body); + lower_ir(ctx, lower_casts_to_bool, body); + lower_ir(ctx, lower_int_dot, body); +@@ -5438,9 +5436,15 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry + hlsl_transform_ir(ctx, track_object_components_usage, body, NULL); + sort_synthetic_separated_samplers_first(ctx); + +- lower_ir(ctx, lower_ternary, body); + if (profile->major_version < 4) + { ++ lower_ir(ctx, lower_ternary, body); ++ ++ lower_ir(ctx, lower_nonfloat_exprs, body); ++ /* Constants casted to float must be folded, and new casts to bool also need to be lowered. */ ++ hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, body, NULL); ++ lower_ir(ctx, lower_casts_to_bool, body); ++ + lower_ir(ctx, lower_casts_to_int, body); + lower_ir(ctx, lower_division, body); + lower_ir(ctx, lower_sqrt, body); +@@ -5463,6 +5467,10 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry + + lower_ir(ctx, validate_nonconstant_vector_store_derefs, body); + ++ do ++ compute_liveness(ctx, entry_func); ++ while (hlsl_transform_ir(ctx, dce, body, NULL)); ++ + /* TODO: move forward, remove when no longer needed */ + transform_derefs(ctx, replace_deref_path_with_offset, body); + while (hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, body, NULL)); +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c +index b76b1fce507..4cea98e9286 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c +@@ -1177,30 +1177,11 @@ static bool fold_ternary(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, + + assert(dst_type->base_type == src2->node.data_type->base_type); + assert(dst_type->base_type == src3->node.data_type->base_type); ++ assert(src1->node.data_type->base_type == HLSL_TYPE_BOOL); + + for (k = 0; k < dst_type->dimx; ++k) +- { +- switch (src1->node.data_type->base_type) +- { +- case HLSL_TYPE_FLOAT: +- case HLSL_TYPE_HALF: +- dst->u[k] = src1->value.u[k].f != 0.0f ? src2->value.u[k] : src3->value.u[k]; +- break; +- +- case HLSL_TYPE_DOUBLE: +- dst->u[k] = src1->value.u[k].d != 0.0 ? src2->value.u[k] : src3->value.u[k]; +- break; ++ dst->u[k] = src1->value.u[k].u ? src2->value.u[k] : src3->value.u[k]; + +- case HLSL_TYPE_INT: +- case HLSL_TYPE_UINT: +- case HLSL_TYPE_BOOL: +- dst->u[k] = src1->value.u[k].u ? src2->value.u[k] : src3->value.u[k]; +- break; +- +- default: +- vkd3d_unreachable(); +- } +- } + return true; + } + +diff --git a/libs/vkd3d/libs/vkd3d-shader/ir.c b/libs/vkd3d/libs/vkd3d-shader/ir.c +index 8af537390f9..610d907d981 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/ir.c ++++ b/libs/vkd3d/libs/vkd3d-shader/ir.c +@@ -17,6 +17,7 @@ + */ + + #include "vkd3d_shader_private.h" ++#include "vkd3d_types.h" + + bool vsir_program_init(struct vsir_program *program, const struct vkd3d_shader_version *version, unsigned int reserve) + { +@@ -56,19 +57,6 @@ static void vkd3d_shader_instruction_make_nop(struct vkd3d_shader_instruction *i + vsir_instruction_init(ins, &location, VKD3DSIH_NOP); + } + +-static void remove_dcl_temps(struct vsir_program *program) +-{ +- unsigned int i; +- +- for (i = 0; i < program->instructions.count; ++i) +- { +- struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; +- +- if (ins->handler_idx == VKD3DSIH_DCL_TEMPS) +- vkd3d_shader_instruction_make_nop(ins); +- } +-} +- + static bool vsir_instruction_init_with_params(struct vsir_program *program, + struct vkd3d_shader_instruction *ins, const struct vkd3d_shader_location *location, + enum vkd3d_shader_opcode handler_idx, unsigned int dst_count, unsigned int src_count) +@@ -94,85 +82,163 @@ static bool vsir_instruction_init_with_params(struct vsir_program *program, + return true; + } + +-static enum vkd3d_result vsir_program_lower_texkills(struct vsir_program *program) ++static enum vkd3d_result vsir_program_lower_texkill(struct vsir_program *program, ++ struct vkd3d_shader_instruction *texkill, unsigned int *tmp_idx) + { ++ const unsigned int components_read = 3 + (program->shader_version.major >= 2); + struct vkd3d_shader_instruction_array *instructions = &program->instructions; +- struct vkd3d_shader_instruction *texkill_ins, *ins; +- unsigned int components_read = 3 + (program->shader_version.major >= 2); +- unsigned int tmp_idx = ~0u; +- unsigned int i, k; +- +- for (i = 0; i < instructions->count; ++i) +- { +- texkill_ins = &instructions->elements[i]; ++ size_t pos = texkill - instructions->elements; ++ struct vkd3d_shader_instruction *ins; ++ unsigned int j; + +- if (texkill_ins->handler_idx != VKD3DSIH_TEXKILL) +- continue; ++ if (!shader_instruction_array_insert_at(instructions, pos + 1, components_read + 1)) ++ return VKD3D_ERROR_OUT_OF_MEMORY; + +- if (!shader_instruction_array_insert_at(instructions, i + 1, components_read + 1)) +- return VKD3D_ERROR_OUT_OF_MEMORY; ++ if (*tmp_idx == ~0u) ++ *tmp_idx = program->temp_count++; + +- if (tmp_idx == ~0u) +- tmp_idx = program->temp_count++; ++ /* tmp = ins->dst[0] < 0 */ + +- /* tmp = ins->dst[0] < 0 */ ++ ins = &instructions->elements[pos + 1]; ++ if (!vsir_instruction_init_with_params(program, ins, &texkill->location, VKD3DSIH_LTO, 1, 2)) ++ return VKD3D_ERROR_OUT_OF_MEMORY; + +- ins = &instructions->elements[i + 1]; +- if (!vsir_instruction_init_with_params(program, ins, &texkill_ins->location, VKD3DSIH_LTO, 1, 2)) ++ vsir_register_init(&ins->dst[0].reg, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1); ++ ins->dst[0].reg.dimension = VSIR_DIMENSION_VEC4; ++ ins->dst[0].reg.idx[0].offset = *tmp_idx; ++ ins->dst[0].write_mask = VKD3DSP_WRITEMASK_ALL; ++ ++ ins->src[0].reg = texkill->dst[0].reg; ++ ins->src[0].swizzle = VKD3D_SHADER_NO_SWIZZLE; ++ vsir_register_init(&ins->src[1].reg, VKD3DSPR_IMMCONST, VKD3D_DATA_FLOAT, 0); ++ ins->src[1].reg.dimension = VSIR_DIMENSION_VEC4; ++ ins->src[1].reg.u.immconst_f32[0] = 0.0f; ++ ins->src[1].reg.u.immconst_f32[1] = 0.0f; ++ ins->src[1].reg.u.immconst_f32[2] = 0.0f; ++ ins->src[1].reg.u.immconst_f32[3] = 0.0f; ++ ++ /* tmp.x = tmp.x || tmp.y */ ++ /* tmp.x = tmp.x || tmp.z */ ++ /* tmp.x = tmp.x || tmp.w, if sm >= 2.0 */ ++ ++ for (j = 1; j < components_read; ++j) ++ { ++ ins = &instructions->elements[pos + 1 + j]; ++ if (!(vsir_instruction_init_with_params(program, ins, &texkill->location, VKD3DSIH_OR, 1, 2))) + return VKD3D_ERROR_OUT_OF_MEMORY; + + vsir_register_init(&ins->dst[0].reg, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1); + ins->dst[0].reg.dimension = VSIR_DIMENSION_VEC4; +- ins->dst[0].reg.idx[0].offset = tmp_idx; +- ins->dst[0].write_mask = VKD3DSP_WRITEMASK_ALL; ++ ins->dst[0].reg.idx[0].offset = *tmp_idx; ++ ins->dst[0].write_mask = VKD3DSP_WRITEMASK_0; + +- ins->src[0].reg = texkill_ins->dst[0].reg; +- vsir_register_init(&ins->src[1].reg, VKD3DSPR_IMMCONST, VKD3D_DATA_FLOAT, 0); ++ vsir_register_init(&ins->src[0].reg, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1); ++ ins->src[0].reg.dimension = VSIR_DIMENSION_VEC4; ++ ins->src[0].reg.idx[0].offset = *tmp_idx; ++ ins->src[0].swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X); ++ vsir_register_init(&ins->src[1].reg, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1); + ins->src[1].reg.dimension = VSIR_DIMENSION_VEC4; +- ins->src[1].reg.u.immconst_f32[0] = 0.0f; +- ins->src[1].reg.u.immconst_f32[1] = 0.0f; +- ins->src[1].reg.u.immconst_f32[2] = 0.0f; +- ins->src[1].reg.u.immconst_f32[3] = 0.0f; ++ ins->src[1].reg.idx[0].offset = *tmp_idx; ++ ins->src[1].swizzle = vkd3d_shader_create_swizzle(j, j, j, j); ++ } + +- /* tmp.x = tmp.x || tmp.y */ +- /* tmp.x = tmp.x || tmp.z */ +- /* tmp.x = tmp.x || tmp.w, if sm >= 2.0 */ ++ /* discard_nz tmp.x */ + +- for (k = 1; k < components_read; ++k) +- { +- ins = &instructions->elements[i + 1 + k]; +- if (!(vsir_instruction_init_with_params(program, ins, &texkill_ins->location, VKD3DSIH_OR, 1, 2))) +- return VKD3D_ERROR_OUT_OF_MEMORY; ++ ins = &instructions->elements[pos + 1 + components_read]; ++ if (!(vsir_instruction_init_with_params(program, ins, &texkill->location, VKD3DSIH_DISCARD, 0, 1))) ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ ins->flags = VKD3D_SHADER_CONDITIONAL_OP_NZ; + +- vsir_register_init(&ins->dst[0].reg, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1); +- ins->dst[0].reg.dimension = VSIR_DIMENSION_VEC4; +- ins->dst[0].reg.idx[0].offset = tmp_idx; +- ins->dst[0].write_mask = VKD3DSP_WRITEMASK_0; +- +- vsir_register_init(&ins->src[0].reg, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1); +- ins->src[0].reg.dimension = VSIR_DIMENSION_VEC4; +- ins->src[0].reg.idx[0].offset = tmp_idx; +- ins->src[0].swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X); +- vsir_register_init(&ins->src[1].reg, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1); +- ins->src[1].reg.dimension = VSIR_DIMENSION_VEC4; +- ins->src[1].reg.idx[0].offset = tmp_idx; +- ins->src[1].swizzle = vkd3d_shader_create_swizzle(k, k, k, k); +- } ++ vsir_register_init(&ins->src[0].reg, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1); ++ ins->src[0].reg.dimension = VSIR_DIMENSION_VEC4; ++ ins->src[0].reg.idx[0].offset = *tmp_idx; ++ ins->src[0].swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X); + +- /* discard_nz tmp.x */ ++ /* Make the original instruction no-op */ ++ vkd3d_shader_instruction_make_nop(texkill); + +- ins = &instructions->elements[i + 1 + components_read]; +- if (!(vsir_instruction_init_with_params(program, ins, &texkill_ins->location, VKD3DSIH_DISCARD, 0, 1))) +- return VKD3D_ERROR_OUT_OF_MEMORY; +- ins->flags = VKD3D_SHADER_CONDITIONAL_OP_NZ; ++ return VKD3D_OK; ++} + +- vsir_register_init(&ins->src[0].reg, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1); +- ins->src[0].reg.dimension = VSIR_DIMENSION_VEC4; +- ins->src[0].reg.idx[0].offset = tmp_idx; +- ins->src[0].swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X); ++/* The Shader Model 5 Assembly documentation states: "If components of a mad ++ * instruction are tagged as precise, the hardware must execute a mad instruction ++ * or the exact equivalent, and it cannot split it into a multiply followed by an add." ++ * But DXIL.rst states the opposite: "Floating point multiply & add. This operation is ++ * not fused for "precise" operations." ++ * Windows drivers seem to conform with the latter, for SM 4-5 and SM 6. */ ++static enum vkd3d_result vsir_program_lower_precise_mad(struct vsir_program *program, ++ struct vkd3d_shader_instruction *mad, unsigned int *tmp_idx) ++{ ++ struct vkd3d_shader_instruction_array *instructions = &program->instructions; ++ struct vkd3d_shader_instruction *mul_ins, *add_ins; ++ size_t pos = mad - instructions->elements; ++ struct vkd3d_shader_dst_param *mul_dst; ++ ++ if (!(mad->flags & VKD3DSI_PRECISE_XYZW)) ++ return VKD3D_OK; ++ ++ if (!shader_instruction_array_insert_at(instructions, pos + 1, 1)) ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ ++ if (*tmp_idx == ~0u) ++ *tmp_idx = program->temp_count++; ++ ++ mul_ins = &instructions->elements[pos]; ++ add_ins = &instructions->elements[pos + 1]; ++ ++ mul_ins->handler_idx = VKD3DSIH_MUL; ++ mul_ins->src_count = 2; ++ ++ if (!(vsir_instruction_init_with_params(program, add_ins, &mul_ins->location, VKD3DSIH_ADD, 1, 2))) ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ ++ add_ins->flags = mul_ins->flags & VKD3DSI_PRECISE_XYZW; ++ ++ mul_dst = mul_ins->dst; ++ *add_ins->dst = *mul_dst; ++ ++ mul_dst->modifiers = 0; ++ vsir_register_init(&mul_dst->reg, VKD3DSPR_TEMP, mul_ins->src[0].reg.data_type, 1); ++ mul_dst->reg.dimension = add_ins->dst->reg.dimension; ++ mul_dst->reg.idx[0].offset = *tmp_idx; ++ ++ add_ins->src[0].reg = mul_dst->reg; ++ add_ins->src[0].swizzle = vsir_swizzle_from_writemask(mul_dst->write_mask); ++ add_ins->src[0].modifiers = 0; ++ add_ins->src[1] = mul_ins->src[2]; ++ ++ return VKD3D_OK; ++} ++ ++static enum vkd3d_result vsir_program_lower_instructions(struct vsir_program *program) ++{ ++ struct vkd3d_shader_instruction_array *instructions = &program->instructions; ++ unsigned int tmp_idx = ~0u, i; ++ enum vkd3d_result ret; ++ ++ for (i = 0; i < instructions->count; ++i) ++ { ++ struct vkd3d_shader_instruction *ins = &instructions->elements[i]; ++ ++ switch (ins->handler_idx) ++ { ++ case VKD3DSIH_TEXKILL: ++ if ((ret = vsir_program_lower_texkill(program, ins, &tmp_idx)) < 0) ++ return ret; ++ break; + +- /* Make the original instruction no-op */ +- vkd3d_shader_instruction_make_nop(texkill_ins); ++ case VKD3DSIH_MAD: ++ if ((ret = vsir_program_lower_precise_mad(program, ins, &tmp_idx)) < 0) ++ return ret; ++ break; ++ ++ case VKD3DSIH_DCL_TEMPS: ++ vkd3d_shader_instruction_make_nop(ins); ++ break; ++ ++ default: ++ break; ++ } + } + + return VKD3D_OK; +@@ -2577,97 +2643,6 @@ static enum vkd3d_result lower_switch_to_if_ladder(struct vsir_program *program) + } + } + +- /* Second subpass: creating new blocks might have broken +- * references in PHI instructions, so we use the block map to fix +- * them. */ +- current_label = 0; +- for (i = 0; i < ins_count; ++i) +- { +- struct vkd3d_shader_instruction *ins = &instructions[i]; +- struct vkd3d_shader_src_param *new_src; +- unsigned int j, l, new_src_count = 0; +- +- switch (ins->handler_idx) +- { +- case VKD3DSIH_LABEL: +- current_label = label_from_src_param(&ins->src[0]); +- continue; +- +- case VKD3DSIH_PHI: +- break; +- +- default: +- continue; +- } +- +- /* First count how many source parameters we need. */ +- for (j = 0; j < ins->src_count; j += 2) +- { +- unsigned int source_label = label_from_src_param(&ins->src[j + 1]); +- size_t k, match_count = 0; +- +- for (k = 0; k < map_count; ++k) +- { +- struct lower_switch_to_if_ladder_block_mapping *mapping = &block_map[k]; +- +- if (mapping->switch_label == source_label && mapping->target_label == current_label) +- match_count += 1; +- } +- +- new_src_count += (match_count != 0) ? 2 * match_count : 2; +- } +- +- assert(new_src_count >= ins->src_count); +- +- /* Allocate more source parameters if needed. */ +- if (new_src_count == ins->src_count) +- { +- new_src = ins->src; +- } +- else +- { +- if (!(new_src = vsir_program_get_src_params(program, new_src_count))) +- { +- ERR("Failed to allocate %u source parameters.\n", new_src_count); +- goto fail; +- } +- } +- +- /* Then do the copy. */ +- for (j = 0, l = 0; j < ins->src_count; j += 2) +- { +- unsigned int source_label = label_from_src_param(&ins->src[j + 1]); +- size_t k, match_count = 0; +- +- for (k = 0; k < map_count; ++k) +- { +- struct lower_switch_to_if_ladder_block_mapping *mapping = &block_map[k]; +- +- if (mapping->switch_label == source_label && mapping->target_label == current_label) +- { +- match_count += 1; +- +- new_src[l] = ins->src[j]; +- new_src[l + 1] = ins->src[j + 1]; +- new_src[l + 1].reg.idx[0].offset = mapping->if_label; +- l += 2; +- } +- } +- +- if (match_count == 0) +- { +- new_src[l] = ins->src[j]; +- new_src[l + 1] = ins->src[j + 1]; +- l += 2; +- } +- } +- +- assert(l == new_src_count); +- +- ins->src_count = new_src_count; +- ins->src = new_src; +- } +- + vkd3d_free(program->instructions.elements); + vkd3d_free(block_map); + program->instructions.elements = instructions; +@@ -2685,148 +2660,139 @@ fail: + return VKD3D_ERROR_OUT_OF_MEMORY; + } + +-static void materialize_ssas_to_temps_process_src_param(struct vsir_program *program, +- struct vkd3d_shader_src_param *src); ++struct ssas_to_temps_alloc ++{ ++ unsigned int *table; ++ unsigned int next_temp_idx; ++}; + +-/* This is idempotent: it can be safely applied more than once on the +- * same register. */ +-static void materialize_ssas_to_temps_process_reg(struct vsir_program *program, struct vkd3d_shader_register *reg) ++static bool ssas_to_temps_alloc_init(struct ssas_to_temps_alloc *alloc, unsigned int ssa_count, unsigned int temp_count) + { +- unsigned int i; ++ size_t i = ssa_count * sizeof(*alloc->table); + +- if (reg->type == VKD3DSPR_SSA) ++ if (!(alloc->table = vkd3d_malloc(i))) + { +- reg->type = VKD3DSPR_TEMP; +- reg->idx[0].offset += program->temp_count; ++ ERR("Failed to allocate SSA table.\n"); ++ return false; + } ++ memset(alloc->table, 0xff, i); + +- for (i = 0; i < reg->idx_count; ++i) +- if (reg->idx[i].rel_addr) +- materialize_ssas_to_temps_process_src_param(program, reg->idx[i].rel_addr); +-} +- +-static void materialize_ssas_to_temps_process_dst_param(struct vsir_program *program, +- struct vkd3d_shader_dst_param *dst) +-{ +- materialize_ssas_to_temps_process_reg(program, &dst->reg); +-} +- +-static void materialize_ssas_to_temps_process_src_param(struct vsir_program *program, +- struct vkd3d_shader_src_param *src) +-{ +- materialize_ssas_to_temps_process_reg(program, &src->reg); ++ alloc->next_temp_idx = temp_count; ++ return true; + } + +-static const struct vkd3d_shader_src_param *materialize_ssas_to_temps_compute_source(struct vkd3d_shader_instruction *ins, +- unsigned int label) ++/* This is idempotent: it can be safely applied more than once on the ++ * same register. */ ++static void materialize_ssas_to_temps_process_reg(struct vsir_program *program, struct ssas_to_temps_alloc *alloc, ++ struct vkd3d_shader_register *reg) + { + unsigned int i; + +- assert(ins->handler_idx == VKD3DSIH_PHI); +- +- for (i = 0; i < ins->src_count; i += 2) ++ if (reg->type == VKD3DSPR_SSA && alloc->table[reg->idx[0].offset] != UINT_MAX) + { +- if (label_from_src_param(&ins->src[i + 1]) == label) +- return &ins->src[i]; ++ reg->type = VKD3DSPR_TEMP; ++ reg->idx[0].offset = alloc->table[reg->idx[0].offset]; + } + +- vkd3d_unreachable(); ++ for (i = 0; i < reg->idx_count; ++i) ++ if (reg->idx[i].rel_addr) ++ materialize_ssas_to_temps_process_reg(program, alloc, ®->idx[i].rel_addr->reg); + } + +-static bool materialize_ssas_to_temps_synthesize_mov(struct vsir_program *program, +- struct vkd3d_shader_instruction *instruction, const struct vkd3d_shader_location *loc, +- const struct vkd3d_shader_dst_param *dest, const struct vkd3d_shader_src_param *cond, +- const struct vkd3d_shader_src_param *source, bool invert) ++struct ssas_to_temps_block_info + { +- struct vkd3d_shader_src_param *src; +- struct vkd3d_shader_dst_param *dst; +- +- if (!vsir_instruction_init_with_params(program, instruction, loc, +- cond ? VKD3DSIH_MOVC : VKD3DSIH_MOV, 1, cond ? 3 : 1)) +- return false; +- +- dst = instruction->dst; +- src = instruction->src; +- +- dst[0] = *dest; +- materialize_ssas_to_temps_process_dst_param(program, &dst[0]); ++ struct phi_incoming_to_temp ++ { ++ struct vkd3d_shader_src_param *src; ++ struct vkd3d_shader_dst_param *dst; ++ } *incomings; ++ size_t incoming_capacity; ++ size_t incoming_count; ++}; + +- assert(dst[0].write_mask == VKD3DSP_WRITEMASK_0); +- assert(dst[0].modifiers == 0); +- assert(dst[0].shift == 0); ++static void ssas_to_temps_block_info_cleanup(struct ssas_to_temps_block_info *block_info, ++ size_t count) ++{ ++ size_t i; + +- if (cond) +- { +- src[0] = *cond; +- src[1 + invert] = *source; +- memset(&src[2 - invert], 0, sizeof(src[2 - invert])); +- src[2 - invert].reg = dst[0].reg; +- materialize_ssas_to_temps_process_src_param(program, &src[1]); +- materialize_ssas_to_temps_process_src_param(program, &src[2]); +- } +- else +- { +- src[0] = *source; +- materialize_ssas_to_temps_process_src_param(program, &src[0]); +- } ++ for (i = 0; i < count; ++i) ++ vkd3d_free(block_info[i].incomings); + +- return true; ++ vkd3d_free(block_info); + } + +-static enum vkd3d_result vsir_program_materialise_ssas_to_temps(struct vsir_program *program) ++static enum vkd3d_result vsir_program_materialise_phi_ssas_to_temps(struct vsir_program *program) + { ++ size_t ins_capacity = 0, ins_count = 0, phi_count, incoming_count, i; ++ struct ssas_to_temps_block_info *info, *block_info = NULL; + struct vkd3d_shader_instruction *instructions = NULL; +- struct materialize_ssas_to_temps_block_data +- { +- size_t phi_begin; +- size_t phi_count; +- } *block_index = NULL; +- size_t ins_capacity = 0, ins_count = 0, i; ++ struct ssas_to_temps_alloc alloc = {0}; + unsigned int current_label = 0; + +- if (!reserve_instructions(&instructions, &ins_capacity, program->instructions.count)) +- goto fail; +- +- if (!(block_index = vkd3d_calloc(program->block_count, sizeof(*block_index)))) ++ if (!(block_info = vkd3d_calloc(program->block_count, sizeof(*block_info)))) + { +- ERR("Failed to allocate block index.\n"); ++ ERR("Failed to allocate block info array.\n"); + goto fail; + } + +- for (i = 0; i < program->instructions.count; ++i) ++ if (!ssas_to_temps_alloc_init(&alloc, program->ssa_count, program->temp_count)) ++ goto fail; ++ ++ for (i = 0, phi_count = 0, incoming_count = 0; i < program->instructions.count; ++i) + { + struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; ++ unsigned int j, temp_idx; + +- switch (ins->handler_idx) ++ /* Only phi src/dst SSA values need be converted here. Structurisation may ++ * introduce new cases of undominated SSA use, which will be handled later. */ ++ if (ins->handler_idx != VKD3DSIH_PHI) ++ continue; ++ ++phi_count; ++ ++ temp_idx = alloc.next_temp_idx++; ++ ++ for (j = 0; j < ins->src_count; j += 2) + { +- case VKD3DSIH_LABEL: +- current_label = label_from_src_param(&ins->src[0]); +- break; ++ struct phi_incoming_to_temp *incoming; ++ unsigned int label; + +- case VKD3DSIH_PHI: +- assert(current_label != 0); +- assert(i != 0); +- if (block_index[current_label - 1].phi_begin == 0) +- block_index[current_label - 1].phi_begin = i; +- block_index[current_label - 1].phi_count += 1; +- break; ++ label = label_from_src_param(&ins->src[j + 1]); ++ assert(label); + +- default: +- current_label = 0; +- break; ++ info = &block_info[label - 1]; ++ ++ if (!(vkd3d_array_reserve((void **)&info->incomings, &info->incoming_capacity, info->incoming_count + 1, ++ sizeof(*info->incomings)))) ++ goto fail; ++ ++ incoming = &info->incomings[info->incoming_count++]; ++ incoming->src = &ins->src[j]; ++ incoming->dst = ins->dst; ++ ++ alloc.table[ins->dst->reg.idx[0].offset] = temp_idx; ++ ++ ++incoming_count; + } ++ ++ materialize_ssas_to_temps_process_reg(program, &alloc, &ins->dst->reg); + } + ++ if (!phi_count) ++ goto done; ++ ++ if (!reserve_instructions(&instructions, &ins_capacity, program->instructions.count + incoming_count - phi_count)) ++ goto fail; ++ + for (i = 0; i < program->instructions.count; ++i) + { +- struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; ++ struct vkd3d_shader_instruction *mov_ins, *ins = &program->instructions.elements[i]; + size_t j; + + for (j = 0; j < ins->dst_count; ++j) +- materialize_ssas_to_temps_process_dst_param(program, &ins->dst[j]); ++ materialize_ssas_to_temps_process_reg(program, &alloc, &ins->dst[j].reg); + + for (j = 0; j < ins->src_count; ++j) +- materialize_ssas_to_temps_process_src_param(program, &ins->src[j]); ++ materialize_ssas_to_temps_process_reg(program, &alloc, &ins->src[j].reg); + + switch (ins->handler_idx) + { +@@ -2835,65 +2801,21 @@ static enum vkd3d_result vsir_program_materialise_ssas_to_temps(struct vsir_prog + break; + + case VKD3DSIH_BRANCH: +- { +- if (vsir_register_is_label(&ins->src[0].reg)) +- { +- const struct materialize_ssas_to_temps_block_data *data = &block_index[label_from_src_param(&ins->src[0]) - 1]; +- +- if (!reserve_instructions(&instructions, &ins_capacity, ins_count + data->phi_count)) +- goto fail; +- +- for (j = data->phi_begin; j < data->phi_begin + data->phi_count; ++j) +- { +- const struct vkd3d_shader_src_param *source; +- +- source = materialize_ssas_to_temps_compute_source(&program->instructions.elements[j], +- current_label); +- if (!materialize_ssas_to_temps_synthesize_mov(program, &instructions[ins_count], +- &ins->location, &program->instructions.elements[j].dst[0], NULL, source, false)) +- goto fail; ++ case VKD3DSIH_SWITCH_MONOLITHIC: ++ info = &block_info[current_label - 1]; + +- ++ins_count; +- } +- } +- else ++ for (j = 0; j < info->incoming_count; ++j) + { +- struct materialize_ssas_to_temps_block_data *data_true = &block_index[label_from_src_param(&ins->src[1]) - 1], +- *data_false = &block_index[label_from_src_param(&ins->src[2]) - 1]; +- const struct vkd3d_shader_src_param *cond = &ins->src[0]; ++ struct phi_incoming_to_temp *incoming = &info->incomings[j]; + +- if (!reserve_instructions(&instructions, &ins_capacity, +- ins_count + data_true->phi_count + data_false->phi_count)) ++ mov_ins = &instructions[ins_count++]; ++ if (!vsir_instruction_init_with_params(program, mov_ins, &ins->location, VKD3DSIH_MOV, 1, 0)) + goto fail; +- +- for (j = data_true->phi_begin; j < data_true->phi_begin + data_true->phi_count; ++j) +- { +- const struct vkd3d_shader_src_param *source; +- +- source = materialize_ssas_to_temps_compute_source(&program->instructions.elements[j], +- current_label); +- if (!materialize_ssas_to_temps_synthesize_mov(program, &instructions[ins_count], +- &ins->location, &program->instructions.elements[j].dst[0], cond, source, false)) +- goto fail; +- +- ++ins_count; +- } +- +- for (j = data_false->phi_begin; j < data_false->phi_begin + data_false->phi_count; ++j) +- { +- const struct vkd3d_shader_src_param *source; +- +- source = materialize_ssas_to_temps_compute_source(&program->instructions.elements[j], +- current_label); +- if (!materialize_ssas_to_temps_synthesize_mov(program, &instructions[ins_count], +- &ins->location, &program->instructions.elements[j].dst[0], cond, source, true)) +- goto fail; +- +- ++ins_count; +- } ++ *mov_ins->dst = *incoming->dst; ++ mov_ins->src = incoming->src; ++ mov_ins->src_count = 1; + } + break; +- } + + case VKD3DSIH_PHI: + continue; +@@ -2902,25 +2824,24 @@ static enum vkd3d_result vsir_program_materialise_ssas_to_temps(struct vsir_prog + break; + } + +- if (!reserve_instructions(&instructions, &ins_capacity, ins_count + 1)) +- goto fail; +- + instructions[ins_count++] = *ins; + } + + vkd3d_free(program->instructions.elements); +- vkd3d_free(block_index); + program->instructions.elements = instructions; + program->instructions.capacity = ins_capacity; + program->instructions.count = ins_count; +- program->temp_count += program->ssa_count; +- program->ssa_count = 0; ++ program->temp_count = alloc.next_temp_idx; ++done: ++ ssas_to_temps_block_info_cleanup(block_info, program->block_count); ++ vkd3d_free(alloc.table); + + return VKD3D_OK; + + fail: + vkd3d_free(instructions); +- vkd3d_free(block_index); ++ ssas_to_temps_block_info_cleanup(block_info, program->block_count); ++ vkd3d_free(alloc.table); + + return VKD3D_ERROR_OUT_OF_MEMORY; + } +@@ -3061,19 +2982,19 @@ struct vsir_cfg_structure + union + { + struct vsir_block *block; +- struct ++ struct vsir_cfg_structure_loop + { + struct vsir_cfg_structure_list body; + unsigned idx; + } loop; +- struct ++ struct vsir_cfg_structure_selection + { + struct vkd3d_shader_src_param *condition; + struct vsir_cfg_structure_list if_body; + struct vsir_cfg_structure_list else_body; + bool invert_condition; + } selection; +- struct ++ struct vsir_cfg_structure_jump + { + enum vsir_cfg_jump_type + { +@@ -3157,6 +3078,14 @@ static void vsir_cfg_structure_cleanup(struct vsir_cfg_structure *structure) + } + } + ++struct vsir_cfg_emit_target ++{ ++ struct vkd3d_shader_instruction *instructions; ++ size_t ins_capacity, ins_count; ++ unsigned int jump_target_temp_idx; ++ unsigned int temp_count; ++}; ++ + struct vsir_cfg + { + struct vkd3d_shader_message_context *message_context; +@@ -3200,15 +3129,15 @@ struct vsir_cfg + * block), but we still try to keep `begin' as forward as + * possible, to keep the loop scope as small as possible. */ + bool synthetic; ++ /* The number of jump instructions (both conditional and ++ * unconditional) that target this loop. */ ++ unsigned int target_count; + } *loop_intervals; + size_t loop_interval_count, loop_interval_capacity; + + struct vsir_cfg_structure_list structured_program; + +- struct vkd3d_shader_instruction *instructions; +- size_t ins_capacity, ins_count; +- unsigned int jump_target_temp_idx; +- unsigned int temp_count; ++ struct vsir_cfg_emit_target *target; + }; + + static void vsir_cfg_cleanup(struct vsir_cfg *cfg) +@@ -3248,6 +3177,7 @@ static enum vkd3d_result vsir_cfg_add_loop_interval(struct vsir_cfg *cfg, unsign + interval->begin = begin; + interval->end = end; + interval->synthetic = synthetic; ++ interval->target_count = 0; + + return VKD3D_OK; + } +@@ -3402,7 +3332,7 @@ static void vsir_cfg_dump_structured_program(struct vsir_cfg *cfg) + } + + static enum vkd3d_result vsir_cfg_init(struct vsir_cfg *cfg, struct vsir_program *program, +- struct vkd3d_shader_message_context *message_context) ++ struct vkd3d_shader_message_context *message_context, struct vsir_cfg_emit_target *target) + { + struct vsir_block *current_block = NULL; + enum vkd3d_result ret; +@@ -3412,6 +3342,7 @@ static enum vkd3d_result vsir_cfg_init(struct vsir_cfg *cfg, struct vsir_program + cfg->message_context = message_context; + cfg->program = program; + cfg->block_count = program->block_count; ++ cfg->target = target; + + vsir_block_list_init(&cfg->order); + +@@ -4250,53 +4181,157 @@ fail: + return VKD3D_ERROR_OUT_OF_MEMORY; + } + +-static void vsir_cfg_remove_trailing_continue(struct vsir_cfg_structure_list *list, unsigned int target) ++static void vsir_cfg_remove_trailing_continue(struct vsir_cfg *cfg, ++ struct vsir_cfg_structure_list *list, unsigned int target) + { + struct vsir_cfg_structure *last = &list->structures[list->count - 1]; + + if (last->type == STRUCTURE_TYPE_JUMP && last->u.jump.type == JUMP_CONTINUE + && !last->u.jump.condition && last->u.jump.target == target) ++ { + --list->count; ++ assert(cfg->loop_intervals[target].target_count > 0); ++ --cfg->loop_intervals[target].target_count; ++ } + } + +-static enum vkd3d_result vsir_cfg_synthesize_selections(struct vsir_cfg_structure_list *list) ++static struct vsir_cfg_structure *vsir_cfg_get_trailing_break(struct vsir_cfg_structure_list *list) + { +- enum vkd3d_result ret; +- size_t i; ++ struct vsir_cfg_structure *structure; ++ size_t count = list->count; + +- for (i = 0; i < list->count; ++i) +- { +- struct vsir_cfg_structure *structure = &list->structures[i], new_selection, *new_jump; ++ if (count == 0) ++ return NULL; + +- if (structure->type != STRUCTURE_TYPE_JUMP || !structure->u.jump.condition) +- continue; ++ structure = &list->structures[count - 1]; + +- vsir_cfg_structure_init(&new_selection, STRUCTURE_TYPE_SELECTION); +- new_selection.u.selection.condition = structure->u.jump.condition; +- new_selection.u.selection.invert_condition = structure->u.jump.invert_condition; ++ if (structure->type != STRUCTURE_TYPE_JUMP || structure->u.jump.type != JUMP_BREAK ++ || structure->u.jump.condition) ++ return NULL; + +- if (!(new_jump = vsir_cfg_structure_list_append(&new_selection.u.selection.if_body, +- STRUCTURE_TYPE_JUMP))) +- return VKD3D_ERROR_OUT_OF_MEMORY; +- new_jump->u.jump.type = structure->u.jump.type; +- new_jump->u.jump.target = structure->u.jump.target; ++ return structure; ++} + +- /* Move the rest of the structure list in the else branch +- * rather than leaving it after the selection construct. The +- * reason is that this is more conducive to further +- * optimization, because all the conditional `break's appear +- * as the last instruction of a branch of a cascade of +- * selection constructs at the end of the structure list we're +- * processing, instead of being buried in the middle of the +- * structure list itself. */ +- if ((ret = vsir_cfg_structure_list_append_from_region(&new_selection.u.selection.else_body, +- &list->structures[i + 1], list->count - i - 1)) < 0) +- return ret; ++/* When the last instruction in both branches of a selection construct ++ * is an unconditional break, any of them can be moved after the ++ * selection construct. If they break the same loop both of them can ++ * be moved out, otherwise we can choose which one: we choose the one ++ * that breaks the innermost loop, because we hope to eventually ++ * remove the loop itself. ++ * ++ * In principle a similar movement could be done when the last ++ * instructions are continue and continue, or continue and break. But ++ * in practice I don't think those situations can happen given the ++ * previous passes we do on the program, so we don't care. */ ++static enum vkd3d_result vsir_cfg_move_breaks_out_of_selections(struct vsir_cfg *cfg, ++ struct vsir_cfg_structure_list *list) ++{ ++ struct vsir_cfg_structure *selection, *if_break, *else_break, *new_break; ++ unsigned int if_target, else_target, max_target; ++ size_t pos = list->count - 1; ++ ++ selection = &list->structures[pos]; ++ assert(selection->type == STRUCTURE_TYPE_SELECTION); ++ ++ if_break = vsir_cfg_get_trailing_break(&selection->u.selection.if_body); ++ else_break = vsir_cfg_get_trailing_break(&selection->u.selection.else_body); ++ ++ if (!if_break || !else_break) ++ return VKD3D_OK; ++ ++ if_target = if_break->u.jump.target; ++ else_target = else_break->u.jump.target; ++ max_target = max(if_target, else_target); ++ ++ if (!(new_break = vsir_cfg_structure_list_append(list, STRUCTURE_TYPE_JUMP))) ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ new_break->u.jump.type = JUMP_BREAK; ++ new_break->u.jump.target = max_target; ++ ++cfg->loop_intervals[max_target].target_count; ++ ++ /* Pointer `selection' could have been invalidated by the append ++ * operation. */ ++ selection = &list->structures[pos]; ++ assert(selection->type == STRUCTURE_TYPE_SELECTION); ++ ++ if (if_target == max_target) ++ { ++ --selection->u.selection.if_body.count; ++ assert(cfg->loop_intervals[if_target].target_count > 0); ++ --cfg->loop_intervals[if_target].target_count; ++ } ++ ++ if (else_target == max_target) ++ { ++ --selection->u.selection.else_body.count; ++ assert(cfg->loop_intervals[else_target].target_count > 0); ++ --cfg->loop_intervals[else_target].target_count; ++ } ++ ++ return VKD3D_OK; ++} ++ ++static enum vkd3d_result vsir_cfg_move_breaks_out_of_selections_recursively(struct vsir_cfg *cfg, ++ struct vsir_cfg_structure_list *list) ++{ ++ struct vsir_cfg_structure *trailing; ++ ++ if (list->count == 0) ++ return VKD3D_OK; ++ ++ trailing = &list->structures[list->count - 1]; ++ ++ if (trailing->type != STRUCTURE_TYPE_SELECTION) ++ return VKD3D_OK; ++ ++ vsir_cfg_move_breaks_out_of_selections_recursively(cfg, &trailing->u.selection.if_body); ++ vsir_cfg_move_breaks_out_of_selections_recursively(cfg, &trailing->u.selection.else_body); ++ ++ return vsir_cfg_move_breaks_out_of_selections(cfg, list); ++} ++ ++static enum vkd3d_result vsir_cfg_synthesize_selections(struct vsir_cfg *cfg, ++ struct vsir_cfg_structure_list *list) ++{ ++ enum vkd3d_result ret; ++ size_t i; ++ ++ for (i = 0; i < list->count; ++i) ++ { ++ struct vsir_cfg_structure *structure = &list->structures[i], new_selection, *new_jump; ++ ++ if (structure->type != STRUCTURE_TYPE_JUMP || !structure->u.jump.condition) ++ continue; ++ ++ vsir_cfg_structure_init(&new_selection, STRUCTURE_TYPE_SELECTION); ++ new_selection.u.selection.condition = structure->u.jump.condition; ++ new_selection.u.selection.invert_condition = structure->u.jump.invert_condition; ++ ++ if (!(new_jump = vsir_cfg_structure_list_append(&new_selection.u.selection.if_body, ++ STRUCTURE_TYPE_JUMP))) ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ new_jump->u.jump.type = structure->u.jump.type; ++ new_jump->u.jump.target = structure->u.jump.target; ++ ++ /* Move the rest of the structure list in the else branch ++ * rather than leaving it after the selection construct. The ++ * reason is that this is more conducive to further ++ * optimization, because all the conditional `break's appear ++ * as the last instruction of a branch of a cascade of ++ * selection constructs at the end of the structure list we're ++ * processing, instead of being buried in the middle of the ++ * structure list itself. */ ++ if ((ret = vsir_cfg_structure_list_append_from_region(&new_selection.u.selection.else_body, ++ &list->structures[i + 1], list->count - i - 1)) < 0) ++ return ret; + + *structure = new_selection; + list->count = i + 1; + +- if ((ret = vsir_cfg_synthesize_selections(&structure->u.selection.else_body)) < 0) ++ if ((ret = vsir_cfg_synthesize_selections(cfg, &structure->u.selection.else_body)) < 0) ++ return ret; ++ ++ if ((ret = vsir_cfg_move_breaks_out_of_selections(cfg, list)) < 0) + return ret; + + break; +@@ -4305,40 +4340,164 @@ static enum vkd3d_result vsir_cfg_synthesize_selections(struct vsir_cfg_structur + return VKD3D_OK; + } + ++static enum vkd3d_result vsir_cfg_append_loop(struct vsir_cfg *cfg, ++ struct vsir_cfg_structure_list *new_list, struct vsir_cfg_structure *loop) ++{ ++ struct vsir_cfg_structure_list *loop_body = &loop->u.loop.body; ++ unsigned int target, loop_idx = loop->u.loop.idx; ++ struct vsir_cfg_structure *trailing_break; ++ enum vkd3d_result ret; ++ ++ trailing_break = vsir_cfg_get_trailing_break(loop_body); ++ ++ /* If the loop's last instruction is not a break, we cannot remove ++ * the loop itself. */ ++ if (!trailing_break) ++ { ++ if ((ret = vsir_cfg_structure_list_append_from_region(new_list, loop, 1)) < 0) ++ return ret; ++ memset(loop, 0, sizeof(*loop)); ++ return VKD3D_OK; ++ } ++ ++ target = trailing_break->u.jump.target; ++ assert(cfg->loop_intervals[target].target_count > 0); ++ ++ /* If the loop is not targeted by any jump, we can remove it. The ++ * trailing `break' then targets another loop, so we have to keep ++ * it. */ ++ if (cfg->loop_intervals[loop_idx].target_count == 0) ++ { ++ if ((ret = vsir_cfg_structure_list_append_from_region(new_list, ++ &loop_body->structures[0], loop_body->count)) < 0) ++ return ret; ++ loop_body->count = 0; ++ return VKD3D_OK; ++ } ++ ++ /* If the loop is targeted only by its own trailing `break' ++ * instruction, then we can remove it together with the `break' ++ * itself. */ ++ if (target == loop_idx && cfg->loop_intervals[loop_idx].target_count == 1) ++ { ++ --cfg->loop_intervals[loop_idx].target_count; ++ if ((ret = vsir_cfg_structure_list_append_from_region(new_list, ++ &loop_body->structures[0], loop_body->count - 1)) < 0) ++ return ret; ++ loop_body->count = 0; ++ return VKD3D_OK; ++ } ++ ++ if ((ret = vsir_cfg_structure_list_append_from_region(new_list, loop, 1)) < 0) ++ return ret; ++ memset(loop, 0, sizeof(*loop)); ++ ++ return VKD3D_OK; ++} ++ + static enum vkd3d_result vsir_cfg_optimize_recurse(struct vsir_cfg *cfg, struct vsir_cfg_structure_list *list) + { ++ struct vsir_cfg_structure_list old_list = *list, *new_list = list; + enum vkd3d_result ret; + size_t i; + +- for (i = 0; i < list->count; ++i) ++ memset(new_list, 0, sizeof(*new_list)); ++ ++ for (i = 0; i < old_list.count; ++i) + { +- struct vsir_cfg_structure *loop = &list->structures[i]; ++ struct vsir_cfg_structure *loop = &old_list.structures[i], *selection; + struct vsir_cfg_structure_list *loop_body; + + if (loop->type != STRUCTURE_TYPE_LOOP) ++ { ++ if ((ret = vsir_cfg_structure_list_append_from_region(new_list, loop, 1)) < 0) ++ goto out; ++ memset(loop, 0, sizeof(*loop)); + continue; ++ } + + loop_body = &loop->u.loop.body; + + if (loop_body->count == 0) ++ { ++ if ((ret = vsir_cfg_structure_list_append_from_region(new_list, loop, 1)) < 0) ++ goto out; ++ memset(loop, 0, sizeof(*loop)); + continue; ++ } + +- vsir_cfg_remove_trailing_continue(loop_body, loop->u.loop.idx); ++ vsir_cfg_remove_trailing_continue(cfg, loop_body, loop->u.loop.idx); + + if ((ret = vsir_cfg_optimize_recurse(cfg, loop_body)) < 0) +- return ret; ++ goto out; + +- if ((ret = vsir_cfg_synthesize_selections(loop_body)) < 0) +- return ret; ++ if ((ret = vsir_cfg_synthesize_selections(cfg, loop_body)) < 0) ++ goto out; ++ ++ if ((ret = vsir_cfg_append_loop(cfg, new_list, loop)) < 0) ++ goto out; ++ ++ /* If the last pushed instruction is a selection and one of the branches terminates with a ++ * `break', start pushing to the other branch, in the hope of eventually push a `break' ++ * there too and be able to remove a loop. */ ++ if (new_list->count == 0) ++ continue; ++ ++ selection = &new_list->structures[new_list->count - 1]; ++ ++ if (selection->type == STRUCTURE_TYPE_SELECTION) ++ { ++ if (vsir_cfg_get_trailing_break(&selection->u.selection.if_body)) ++ new_list = &selection->u.selection.else_body; ++ else if (vsir_cfg_get_trailing_break(&selection->u.selection.else_body)) ++ new_list = &selection->u.selection.if_body; ++ } + } + +- return VKD3D_OK; ++ ret = vsir_cfg_move_breaks_out_of_selections_recursively(cfg, list); ++ ++out: ++ vsir_cfg_structure_list_cleanup(&old_list); ++ ++ return ret; ++} ++ ++static void vsir_cfg_count_targets(struct vsir_cfg *cfg, struct vsir_cfg_structure_list *list) ++{ ++ size_t i; ++ ++ for (i = 0; i < list->count; ++i) ++ { ++ struct vsir_cfg_structure *structure = &list->structures[i]; ++ ++ switch (structure->type) ++ { ++ case STRUCTURE_TYPE_BLOCK: ++ break; ++ ++ case STRUCTURE_TYPE_LOOP: ++ vsir_cfg_count_targets(cfg, &structure->u.loop.body); ++ break; ++ ++ case STRUCTURE_TYPE_SELECTION: ++ vsir_cfg_count_targets(cfg, &structure->u.selection.if_body); ++ vsir_cfg_count_targets(cfg, &structure->u.selection.else_body); ++ break; ++ ++ case STRUCTURE_TYPE_JUMP: ++ if (structure->u.jump.type == JUMP_BREAK || structure->u.jump.type == JUMP_CONTINUE) ++ ++cfg->loop_intervals[structure->u.jump.target].target_count; ++ break; ++ } ++ } + } + + static enum vkd3d_result vsir_cfg_optimize(struct vsir_cfg *cfg) + { + enum vkd3d_result ret; + ++ vsir_cfg_count_targets(cfg, &cfg->structured_program); ++ + ret = vsir_cfg_optimize_recurse(cfg, &cfg->structured_program); + + if (TRACE_ON()) +@@ -4348,199 +4507,244 @@ static enum vkd3d_result vsir_cfg_optimize(struct vsir_cfg *cfg) + } + + static enum vkd3d_result vsir_cfg_structure_list_emit(struct vsir_cfg *cfg, +- struct vsir_cfg_structure_list *list, unsigned int loop_idx) ++ struct vsir_cfg_structure_list *list, unsigned int loop_idx); ++ ++static enum vkd3d_result vsir_cfg_structure_list_emit_block(struct vsir_cfg *cfg, ++ struct vsir_block *block) ++{ ++ struct vsir_cfg_emit_target *target = cfg->target; ++ ++ if (!reserve_instructions(&target->instructions, &target->ins_capacity, ++ target->ins_count + (block->end - block->begin))) ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ ++ memcpy(&target->instructions[target->ins_count], block->begin, ++ (char *)block->end - (char *)block->begin); ++ ++ target->ins_count += block->end - block->begin; ++ ++ return VKD3D_OK; ++} ++ ++static enum vkd3d_result vsir_cfg_structure_list_emit_loop(struct vsir_cfg *cfg, ++ struct vsir_cfg_structure_loop *loop, unsigned int loop_idx) + { ++ struct vsir_cfg_emit_target *target = cfg->target; + const struct vkd3d_shader_location no_loc = {0}; + enum vkd3d_result ret; +- size_t i; + +- for (i = 0; i < list->count; ++i) ++ if (!reserve_instructions(&target->instructions, &target->ins_capacity, target->ins_count + 1)) ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ ++ vsir_instruction_init(&target->instructions[target->ins_count++], &no_loc, VKD3DSIH_LOOP); ++ ++ if ((ret = vsir_cfg_structure_list_emit(cfg, &loop->body, loop->idx)) < 0) ++ return ret; ++ ++ if (!reserve_instructions(&target->instructions, &target->ins_capacity, target->ins_count + 5)) ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ ++ vsir_instruction_init(&target->instructions[target->ins_count++], &no_loc, VKD3DSIH_ENDLOOP); ++ ++ /* Add a trampoline to implement multilevel jumping depending on the stored ++ * jump_target value. */ ++ if (loop_idx != UINT_MAX) + { +- struct vsir_cfg_structure *structure = &list->structures[i]; ++ /* If the multilevel jump is a `continue' and the target is the loop we're inside ++ * right now, then we can finally do the `continue'. */ ++ const unsigned int outer_continue_target = loop_idx << 1 | 1; ++ /* If the multilevel jump is a `continue' to any other target, or if it is a `break' ++ * and the target is not the loop we just finished emitting, then it means that ++ * we have to reach an outer loop, so we keep breaking. */ ++ const unsigned int inner_break_target = loop->idx << 1; ++ ++ if (!vsir_instruction_init_with_params(cfg->program, &target->instructions[target->ins_count], ++ &no_loc, VKD3DSIH_IEQ, 1, 2)) ++ return VKD3D_ERROR_OUT_OF_MEMORY; + +- switch (structure->type) +- { +- case STRUCTURE_TYPE_BLOCK: +- { +- struct vsir_block *block = structure->u.block; ++ dst_param_init_temp_bool(&target->instructions[target->ins_count].dst[0], target->temp_count); ++ src_param_init_temp_uint(&target->instructions[target->ins_count].src[0], target->jump_target_temp_idx); ++ src_param_init_const_uint(&target->instructions[target->ins_count].src[1], outer_continue_target); + +- if (!reserve_instructions(&cfg->instructions, &cfg->ins_capacity, cfg->ins_count + (block->end - block->begin))) +- return VKD3D_ERROR_OUT_OF_MEMORY; ++ ++target->ins_count; + +- memcpy(&cfg->instructions[cfg->ins_count], block->begin, (char *)block->end - (char *)block->begin); ++ if (!vsir_instruction_init_with_params(cfg->program, &target->instructions[target->ins_count], ++ &no_loc, VKD3DSIH_CONTINUEP, 0, 1)) ++ return VKD3D_ERROR_OUT_OF_MEMORY; + +- cfg->ins_count += block->end - block->begin; +- break; +- } ++ src_param_init_temp_bool(&target->instructions[target->ins_count].src[0], target->temp_count); + +- case STRUCTURE_TYPE_LOOP: +- { +- if (!reserve_instructions(&cfg->instructions, &cfg->ins_capacity, cfg->ins_count + 1)) +- return VKD3D_ERROR_OUT_OF_MEMORY; ++ ++target->ins_count; ++ ++target->temp_count; + +- vsir_instruction_init(&cfg->instructions[cfg->ins_count++], &no_loc, VKD3DSIH_LOOP); ++ if (!vsir_instruction_init_with_params(cfg->program, &target->instructions[target->ins_count], ++ &no_loc, VKD3DSIH_IEQ, 1, 2)) ++ return VKD3D_ERROR_OUT_OF_MEMORY; + +- if ((ret = vsir_cfg_structure_list_emit(cfg, &structure->u.loop.body, structure->u.loop.idx)) < 0) +- return ret; ++ dst_param_init_temp_bool(&target->instructions[target->ins_count].dst[0], target->temp_count); ++ src_param_init_temp_uint(&target->instructions[target->ins_count].src[0], target->jump_target_temp_idx); ++ src_param_init_const_uint(&target->instructions[target->ins_count].src[1], inner_break_target); + +- if (!reserve_instructions(&cfg->instructions, &cfg->ins_capacity, cfg->ins_count + 5)) +- return VKD3D_ERROR_OUT_OF_MEMORY; ++ ++target->ins_count; + +- vsir_instruction_init(&cfg->instructions[cfg->ins_count++], &no_loc, VKD3DSIH_ENDLOOP); ++ if (!vsir_instruction_init_with_params(cfg->program, &target->instructions[target->ins_count], ++ &no_loc, VKD3DSIH_BREAKP, 0, 1)) ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ target->instructions[target->ins_count].flags |= VKD3D_SHADER_CONDITIONAL_OP_Z; + +- /* Add a trampoline to implement multilevel jumping depending on the stored +- * jump_target value. */ +- if (loop_idx != UINT_MAX) +- { +- /* If the multilevel jump is a `continue' and the target is the loop we're inside +- * right now, then we can finally do the `continue'. */ +- const unsigned int outer_continue_target = loop_idx << 1 | 1; +- /* If the multilevel jump is a `continue' to any other target, or if it is a `break' +- * and the target is not the loop we just finished emitting, then it means that +- * we have to reach an outer loop, so we keep breaking. */ +- const unsigned int inner_break_target = structure->u.loop.idx << 1; ++ src_param_init_temp_bool(&target->instructions[target->ins_count].src[0], target->temp_count); + +- if (!vsir_instruction_init_with_params(cfg->program, &cfg->instructions[cfg->ins_count], +- &no_loc, VKD3DSIH_IEQ, 1, 2)) +- return VKD3D_ERROR_OUT_OF_MEMORY; ++ ++target->ins_count; ++ ++target->temp_count; ++ } + +- dst_param_init_temp_bool(&cfg->instructions[cfg->ins_count].dst[0], cfg->temp_count); +- src_param_init_temp_uint(&cfg->instructions[cfg->ins_count].src[0], cfg->jump_target_temp_idx); +- src_param_init_const_uint(&cfg->instructions[cfg->ins_count].src[1], outer_continue_target); ++ return VKD3D_OK; ++} + +- ++cfg->ins_count; ++static enum vkd3d_result vsir_cfg_structure_list_emit_selection(struct vsir_cfg *cfg, ++ struct vsir_cfg_structure_selection *selection, unsigned int loop_idx) ++{ ++ struct vsir_cfg_emit_target *target = cfg->target; ++ const struct vkd3d_shader_location no_loc = {0}; ++ enum vkd3d_result ret; + +- if (!vsir_instruction_init_with_params(cfg->program, &cfg->instructions[cfg->ins_count], +- &no_loc, VKD3DSIH_CONTINUEP, 0, 1)) +- return VKD3D_ERROR_OUT_OF_MEMORY; ++ if (!reserve_instructions(&target->instructions, &target->ins_capacity, target->ins_count + 1)) ++ return VKD3D_ERROR_OUT_OF_MEMORY; + +- src_param_init_temp_bool(&cfg->instructions[cfg->ins_count].src[0], cfg->temp_count); ++ if (!vsir_instruction_init_with_params(cfg->program, &target->instructions[target->ins_count], ++ &no_loc, VKD3DSIH_IF, 0, 1)) ++ return VKD3D_ERROR_OUT_OF_MEMORY; + +- ++cfg->ins_count; +- ++cfg->temp_count; ++ target->instructions[target->ins_count].src[0] = *selection->condition; + +- if (!vsir_instruction_init_with_params(cfg->program, &cfg->instructions[cfg->ins_count], +- &no_loc, VKD3DSIH_IEQ, 1, 2)) +- return VKD3D_ERROR_OUT_OF_MEMORY; ++ if (selection->invert_condition) ++ target->instructions[target->ins_count].flags |= VKD3D_SHADER_CONDITIONAL_OP_Z; + +- dst_param_init_temp_bool(&cfg->instructions[cfg->ins_count].dst[0], cfg->temp_count); +- src_param_init_temp_uint(&cfg->instructions[cfg->ins_count].src[0], cfg->jump_target_temp_idx); +- src_param_init_const_uint(&cfg->instructions[cfg->ins_count].src[1], inner_break_target); ++ ++target->ins_count; + +- ++cfg->ins_count; ++ if ((ret = vsir_cfg_structure_list_emit(cfg, &selection->if_body, loop_idx)) < 0) ++ return ret; + +- if (!vsir_instruction_init_with_params(cfg->program, &cfg->instructions[cfg->ins_count], +- &no_loc, VKD3DSIH_BREAKP, 0, 1)) +- return VKD3D_ERROR_OUT_OF_MEMORY; +- cfg->instructions[cfg->ins_count].flags |= VKD3D_SHADER_CONDITIONAL_OP_Z; ++ if (selection->else_body.count != 0) ++ { ++ if (!reserve_instructions(&target->instructions, &target->ins_capacity, target->ins_count + 1)) ++ return VKD3D_ERROR_OUT_OF_MEMORY; + +- src_param_init_temp_bool(&cfg->instructions[cfg->ins_count].src[0], cfg->temp_count); ++ vsir_instruction_init(&target->instructions[target->ins_count++], &no_loc, VKD3DSIH_ELSE); + +- ++cfg->ins_count; +- ++cfg->temp_count; +- } ++ if ((ret = vsir_cfg_structure_list_emit(cfg, &selection->else_body, loop_idx)) < 0) ++ return ret; ++ } + +- break; +- } ++ if (!reserve_instructions(&target->instructions, &target->ins_capacity, target->ins_count + 1)) ++ return VKD3D_ERROR_OUT_OF_MEMORY; + +- case STRUCTURE_TYPE_SELECTION: +- if (!reserve_instructions(&cfg->instructions, &cfg->ins_capacity, cfg->ins_count + 1)) +- return VKD3D_ERROR_OUT_OF_MEMORY; ++ vsir_instruction_init(&target->instructions[target->ins_count++], &no_loc, VKD3DSIH_ENDIF); + +- if (!vsir_instruction_init_with_params(cfg->program, &cfg->instructions[cfg->ins_count], &no_loc, +- VKD3DSIH_IF, 0, 1)) +- return VKD3D_ERROR_OUT_OF_MEMORY; ++ return VKD3D_OK; ++} + +- cfg->instructions[cfg->ins_count].src[0] = *structure->u.selection.condition; ++static enum vkd3d_result vsir_cfg_structure_list_emit_jump(struct vsir_cfg *cfg, ++ struct vsir_cfg_structure_jump *jump, unsigned int loop_idx) ++{ ++ struct vsir_cfg_emit_target *target = cfg->target; ++ const struct vkd3d_shader_location no_loc = {0}; ++ /* Encode the jump target as the loop index plus a bit to remember whether ++ * we're breaking or continueing. */ ++ unsigned int jump_target = jump->target << 1; ++ enum vkd3d_shader_opcode opcode; + +- if (structure->u.selection.invert_condition) +- cfg->instructions[cfg->ins_count].flags |= VKD3D_SHADER_CONDITIONAL_OP_Z; ++ switch (jump->type) ++ { ++ case JUMP_CONTINUE: ++ /* If we're continueing the loop we're directly inside, then we can emit a ++ * `continue'. Otherwise we first have to break all the loops between here ++ * and the loop to continue, recording our intention to continue ++ * in the lowest bit of jump_target. */ ++ if (jump->target == loop_idx) ++ { ++ opcode = jump->condition ? VKD3DSIH_CONTINUEP : VKD3DSIH_CONTINUE; ++ break; ++ } ++ jump_target |= 1; ++ /* fall through */ + +- ++cfg->ins_count; ++ case JUMP_BREAK: ++ opcode = jump->condition ? VKD3DSIH_BREAKP : VKD3DSIH_BREAK; ++ break; + +- if ((ret = vsir_cfg_structure_list_emit(cfg, &structure->u.selection.if_body, loop_idx)) < 0) +- return ret; ++ case JUMP_RET: ++ assert(!jump->condition); ++ opcode = VKD3DSIH_RET; ++ break; + +- if (structure->u.selection.else_body.count != 0) +- { +- if (!reserve_instructions(&cfg->instructions, &cfg->ins_capacity, cfg->ins_count + 1)) +- return VKD3D_ERROR_OUT_OF_MEMORY; ++ default: ++ vkd3d_unreachable(); ++ } + +- vsir_instruction_init(&cfg->instructions[cfg->ins_count++], &no_loc, VKD3DSIH_ELSE); ++ if (!reserve_instructions(&target->instructions, &target->ins_capacity, target->ins_count + 2)) ++ return VKD3D_ERROR_OUT_OF_MEMORY; + +- if ((ret = vsir_cfg_structure_list_emit(cfg, &structure->u.selection.else_body, loop_idx)) < 0) +- return ret; +- } ++ if (opcode == VKD3DSIH_BREAK || opcode == VKD3DSIH_BREAKP) ++ { ++ if (!vsir_instruction_init_with_params(cfg->program, &target->instructions[target->ins_count], ++ &no_loc, VKD3DSIH_MOV, 1, 1)) ++ return VKD3D_ERROR_OUT_OF_MEMORY; + +- if (!reserve_instructions(&cfg->instructions, &cfg->ins_capacity, cfg->ins_count + 1)) +- return VKD3D_ERROR_OUT_OF_MEMORY; ++ dst_param_init_temp_uint(&target->instructions[target->ins_count].dst[0], target->jump_target_temp_idx); ++ src_param_init_const_uint(&target->instructions[target->ins_count].src[0], jump_target); + +- vsir_instruction_init(&cfg->instructions[cfg->ins_count++], &no_loc, VKD3DSIH_ENDIF); +- break; ++ ++target->ins_count; ++ } + +- case STRUCTURE_TYPE_JUMP: +- { +- /* Encode the jump target as the loop index plus a bit to remember whether +- * we're breaking or continueing. */ +- unsigned int jump_target = structure->u.jump.target << 1; +- enum vkd3d_shader_opcode opcode; ++ if (!vsir_instruction_init_with_params(cfg->program, &target->instructions[target->ins_count], ++ &no_loc, opcode, 0, !!jump->condition)) ++ return VKD3D_ERROR_OUT_OF_MEMORY; + +- switch (structure->u.jump.type) +- { +- case JUMP_CONTINUE: +- /* If we're continueing the loop we're directly inside, then we can emit a +- * `continue'. Otherwise we first have to break all the loops between here +- * and the loop to continue, recording our intention to continue +- * in the lowest bit of jump_target. */ +- if (structure->u.jump.target == loop_idx) +- { +- opcode = structure->u.jump.condition ? VKD3DSIH_CONTINUEP : VKD3DSIH_CONTINUE; +- break; +- } +- jump_target |= 1; +- /* fall through */ +- +- case JUMP_BREAK: +- opcode = structure->u.jump.condition ? VKD3DSIH_BREAKP : VKD3DSIH_BREAK; +- break; +- +- case JUMP_RET: +- assert(!structure->u.jump.condition); +- opcode = VKD3DSIH_RET; +- break; +- +- default: +- vkd3d_unreachable(); +- } ++ if (jump->invert_condition) ++ target->instructions[target->ins_count].flags |= VKD3D_SHADER_CONDITIONAL_OP_Z; + +- if (!reserve_instructions(&cfg->instructions, &cfg->ins_capacity, cfg->ins_count + 2)) +- return VKD3D_ERROR_OUT_OF_MEMORY; ++ if (jump->condition) ++ target->instructions[target->ins_count].src[0] = *jump->condition; + +- if (opcode == VKD3DSIH_BREAK || opcode == VKD3DSIH_BREAKP) +- { +- if (!vsir_instruction_init_with_params(cfg->program, &cfg->instructions[cfg->ins_count], +- &no_loc, VKD3DSIH_MOV, 1, 1)) +- return VKD3D_ERROR_OUT_OF_MEMORY; ++ ++target->ins_count; + +- dst_param_init_temp_uint(&cfg->instructions[cfg->ins_count].dst[0], cfg->jump_target_temp_idx); +- src_param_init_const_uint(&cfg->instructions[cfg->ins_count].src[0], jump_target); ++ return VKD3D_OK; ++} + +- ++cfg->ins_count; +- } ++static enum vkd3d_result vsir_cfg_structure_list_emit(struct vsir_cfg *cfg, ++ struct vsir_cfg_structure_list *list, unsigned int loop_idx) ++{ ++ enum vkd3d_result ret; ++ size_t i; + +- if (!vsir_instruction_init_with_params(cfg->program, &cfg->instructions[cfg->ins_count], +- &no_loc, opcode, 0, !!structure->u.jump.condition)) +- return VKD3D_ERROR_OUT_OF_MEMORY; ++ for (i = 0; i < list->count; ++i) ++ { ++ struct vsir_cfg_structure *structure = &list->structures[i]; + +- if (structure->u.jump.invert_condition) +- cfg->instructions[cfg->ins_count].flags |= VKD3D_SHADER_CONDITIONAL_OP_Z; ++ switch (structure->type) ++ { ++ case STRUCTURE_TYPE_BLOCK: ++ if ((ret = vsir_cfg_structure_list_emit_block(cfg, structure->u.block)) < 0) ++ return ret; ++ break; + +- if (structure->u.jump.condition) +- cfg->instructions[cfg->ins_count].src[0] = *structure->u.jump.condition; ++ case STRUCTURE_TYPE_LOOP: ++ if ((ret = vsir_cfg_structure_list_emit_loop(cfg, &structure->u.loop, loop_idx)) < 0) ++ return ret; ++ break; + +- ++cfg->ins_count; ++ case STRUCTURE_TYPE_SELECTION: ++ if ((ret = vsir_cfg_structure_list_emit_selection(cfg, &structure->u.selection, ++ loop_idx)) < 0) ++ return ret; ++ break; ++ ++ case STRUCTURE_TYPE_JUMP: ++ if ((ret = vsir_cfg_structure_list_emit_jump(cfg, &structure->u.jump, ++ loop_idx)) < 0) ++ return ret; + break; +- } + + default: + vkd3d_unreachable(); +@@ -4551,40 +4755,191 @@ static enum vkd3d_result vsir_cfg_structure_list_emit(struct vsir_cfg *cfg, + } + + static enum vkd3d_result vsir_cfg_emit_structured_program(struct vsir_cfg *cfg) ++{ ++ return vsir_cfg_structure_list_emit(cfg, &cfg->structured_program, UINT_MAX); ++} ++ ++static enum vkd3d_result vsir_program_structurize_function(struct vsir_program *program, ++ struct vkd3d_shader_message_context *message_context, struct vsir_cfg_emit_target *target) + { + enum vkd3d_result ret; +- size_t i; ++ struct vsir_cfg cfg; ++ ++ if ((ret = vsir_cfg_init(&cfg, program, message_context, target)) < 0) ++ return ret; + +- cfg->jump_target_temp_idx = cfg->program->temp_count; +- cfg->temp_count = cfg->program->temp_count + 1; ++ vsir_cfg_compute_dominators(&cfg); + +- if (!reserve_instructions(&cfg->instructions, &cfg->ins_capacity, cfg->program->instructions.count)) ++ if ((ret = vsir_cfg_compute_loops(&cfg)) < 0) ++ goto out; ++ ++ if ((ret = vsir_cfg_sort_nodes(&cfg)) < 0) ++ goto out; ++ ++ if ((ret = vsir_cfg_generate_synthetic_loop_intervals(&cfg)) < 0) ++ goto out; ++ ++ if ((ret = vsir_cfg_build_structured_program(&cfg)) < 0) ++ goto out; ++ ++ if ((ret = vsir_cfg_optimize(&cfg)) < 0) ++ goto out; ++ ++ ret = vsir_cfg_emit_structured_program(&cfg); ++ ++out: ++ vsir_cfg_cleanup(&cfg); ++ ++ return ret; ++} ++ ++static enum vkd3d_result vsir_program_structurize(struct vsir_program *program, ++ struct vkd3d_shader_message_context *message_context) ++{ ++ struct vsir_cfg_emit_target target = {0}; ++ enum vkd3d_result ret; ++ unsigned int i; ++ ++ target.jump_target_temp_idx = program->temp_count; ++ target.temp_count = program->temp_count + 1; ++ ++ if (!reserve_instructions(&target.instructions, &target.ins_capacity, program->instructions.count)) + return VKD3D_ERROR_OUT_OF_MEMORY; + + /* Copy declarations until the first block. */ +- for (i = 0; i < cfg->program->instructions.count; ++i) ++ for (i = 0; i < program->instructions.count; ++i) + { +- struct vkd3d_shader_instruction *ins = &cfg->program->instructions.elements[i]; ++ struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; + + if (ins->handler_idx == VKD3DSIH_LABEL) + break; + +- cfg->instructions[cfg->ins_count++] = *ins; ++ target.instructions[target.ins_count++] = *ins; + } + +- if ((ret = vsir_cfg_structure_list_emit(cfg, &cfg->structured_program, UINT_MAX)) < 0) ++ if ((ret = vsir_program_structurize_function(program, message_context, &target)) < 0) + goto fail; + +- vkd3d_free(cfg->program->instructions.elements); +- cfg->program->instructions.elements = cfg->instructions; +- cfg->program->instructions.capacity = cfg->ins_capacity; +- cfg->program->instructions.count = cfg->ins_count; +- cfg->program->temp_count = cfg->temp_count; ++ vkd3d_free(program->instructions.elements); ++ program->instructions.elements = target.instructions; ++ program->instructions.capacity = target.ins_capacity; ++ program->instructions.count = target.ins_count; ++ program->temp_count = target.temp_count; + + return VKD3D_OK; + + fail: +- vkd3d_free(cfg->instructions); ++ vkd3d_free(target.instructions); ++ ++ return ret; ++} ++ ++static void register_map_undominated_use(struct vkd3d_shader_register *reg, struct ssas_to_temps_alloc *alloc, ++ struct vsir_block *block, struct vsir_block **origin_blocks) ++{ ++ unsigned int i; ++ ++ if (!register_is_ssa(reg)) ++ return; ++ ++ i = reg->idx[0].offset; ++ if (alloc->table[i] == UINT_MAX && !vsir_block_dominates(origin_blocks[i], block)) ++ alloc->table[i] = alloc->next_temp_idx++; ++ ++ for (i = 0; i < reg->idx_count; ++i) ++ if (reg->idx[i].rel_addr) ++ register_map_undominated_use(®->idx[i].rel_addr->reg, alloc, block, origin_blocks); ++} ++ ++/* Drivers are not necessarily optimised to handle very large numbers of temps. For example, ++ * using them only where necessary fixes stuttering issues in Horizon Zero Dawn on RADV. ++ * This can also result in the backend emitting less code because temps typically need an ++ * access chain and a load/store. Conversion of phi SSA values to temps should eliminate all ++ * undominated SSA use, but structurisation may create new occurrences. */ ++static enum vkd3d_result vsir_cfg_materialize_undominated_ssas_to_temps(struct vsir_cfg *cfg) ++{ ++ struct vsir_program *program = cfg->program; ++ struct ssas_to_temps_alloc alloc = {0}; ++ struct vsir_block **origin_blocks; ++ unsigned int j; ++ size_t i; ++ ++ if (!(origin_blocks = vkd3d_calloc(program->ssa_count, sizeof(*origin_blocks)))) ++ { ++ ERR("Failed to allocate origin block array.\n"); ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ } ++ if (!ssas_to_temps_alloc_init(&alloc, program->ssa_count, program->temp_count)) ++ { ++ vkd3d_free(origin_blocks); ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ } ++ ++ for (i = 0; i < cfg->block_count; ++i) ++ { ++ struct vsir_block *block = &cfg->blocks[i]; ++ struct vkd3d_shader_instruction *ins; ++ ++ for (ins = block->begin; ins <= block->end; ++ins) ++ { ++ for (j = 0; j < ins->dst_count; ++j) ++ { ++ if (register_is_ssa(&ins->dst[j].reg)) ++ origin_blocks[ins->dst[j].reg.idx[0].offset] = block; ++ } ++ } ++ } ++ ++ for (i = 0; i < cfg->block_count; ++i) ++ { ++ struct vsir_block *block = &cfg->blocks[i]; ++ struct vkd3d_shader_instruction *ins; ++ ++ for (ins = block->begin; ins <= block->end; ++ins) ++ { ++ for (j = 0; j < ins->src_count; ++j) ++ register_map_undominated_use(&ins->src[j].reg, &alloc, block, origin_blocks); ++ } ++ } ++ ++ if (alloc.next_temp_idx == program->temp_count) ++ goto done; ++ ++ TRACE("Emitting temps for %u values with undominated usage.\n", alloc.next_temp_idx - program->temp_count); ++ ++ for (i = 0; i < program->instructions.count; ++i) ++ { ++ struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; ++ ++ for (j = 0; j < ins->dst_count; ++j) ++ materialize_ssas_to_temps_process_reg(program, &alloc, &ins->dst[j].reg); ++ ++ for (j = 0; j < ins->src_count; ++j) ++ materialize_ssas_to_temps_process_reg(program, &alloc, &ins->src[j].reg); ++ } ++ ++ program->temp_count = alloc.next_temp_idx; ++done: ++ vkd3d_free(origin_blocks); ++ vkd3d_free(alloc.table); ++ ++ return VKD3D_OK; ++} ++ ++static enum vkd3d_result vsir_program_materialize_undominated_ssas_to_temps(struct vsir_program *program, ++ struct vkd3d_shader_message_context *message_context) ++{ ++ enum vkd3d_result ret; ++ struct vsir_cfg cfg; ++ ++ if ((ret = vsir_cfg_init(&cfg, program, message_context, NULL)) < 0) ++ return ret; ++ ++ vsir_cfg_compute_dominators(&cfg); ++ ++ ret = vsir_cfg_materialize_undominated_ssas_to_temps(&cfg); ++ ++ vsir_cfg_cleanup(&cfg); + + return ret; + } +@@ -5459,63 +5814,25 @@ enum vkd3d_result vsir_program_normalise(struct vsir_program *program, uint64_t + { + enum vkd3d_result result = VKD3D_OK; + +- remove_dcl_temps(program); +- +- if ((result = vsir_program_lower_texkills(program)) < 0) ++ if ((result = vsir_program_lower_instructions(program)) < 0) + return result; + + if (program->shader_version.major >= 6) + { +- struct vsir_cfg cfg; +- +- if ((result = lower_switch_to_if_ladder(program)) < 0) +- return result; +- +- if ((result = vsir_program_materialise_ssas_to_temps(program)) < 0) +- return result; +- +- if ((result = vsir_cfg_init(&cfg, program, message_context)) < 0) ++ if ((result = vsir_program_materialise_phi_ssas_to_temps(program)) < 0) + return result; + +- vsir_cfg_compute_dominators(&cfg); +- +- if ((result = vsir_cfg_compute_loops(&cfg)) < 0) +- { +- vsir_cfg_cleanup(&cfg); ++ if ((result = lower_switch_to_if_ladder(program)) < 0) + return result; +- } + +- if ((result = vsir_cfg_sort_nodes(&cfg)) < 0) +- { +- vsir_cfg_cleanup(&cfg); ++ if ((result = vsir_program_structurize(program, message_context)) < 0) + return result; +- } + +- if ((result = vsir_cfg_generate_synthetic_loop_intervals(&cfg)) < 0) +- { +- vsir_cfg_cleanup(&cfg); ++ if ((result = vsir_program_flatten_control_flow_constructs(program, message_context)) < 0) + return result; +- } + +- if ((result = vsir_cfg_build_structured_program(&cfg)) < 0) +- { +- vsir_cfg_cleanup(&cfg); ++ if ((result = vsir_program_materialize_undominated_ssas_to_temps(program, message_context)) < 0) + return result; +- } +- +- if ((result = vsir_cfg_optimize(&cfg)) < 0) +- { +- vsir_cfg_cleanup(&cfg); +- return result; +- } +- +- if ((result = vsir_cfg_emit_structured_program(&cfg)) < 0) +- { +- vsir_cfg_cleanup(&cfg); +- return result; +- } +- +- vsir_cfg_cleanup(&cfg); + } + else + { +@@ -5545,10 +5862,10 @@ enum vkd3d_result vsir_program_normalise(struct vsir_program *program, uint64_t + + if ((result = vsir_program_normalise_combined_samplers(program, message_context)) < 0) + return result; +- } + +- if ((result = vsir_program_flatten_control_flow_constructs(program, message_context)) < 0) +- return result; ++ if ((result = vsir_program_flatten_control_flow_constructs(program, message_context)) < 0) ++ return result; ++ } + + if (TRACE_ON()) + vkd3d_shader_trace(program); +diff --git a/libs/vkd3d/libs/vkd3d-shader/spirv.c b/libs/vkd3d/libs/vkd3d-shader/spirv.c +index 1cae2d7d9d4..dfab1cb229b 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/spirv.c ++++ b/libs/vkd3d/libs/vkd3d-shader/spirv.c +@@ -4419,11 +4419,11 @@ static void spirv_compiler_emit_store_dst_components(struct spirv_compiler *comp + { + unsigned int component_count = vsir_write_mask_component_count(dst->write_mask); + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; +- uint32_t type_id, val_id; ++ uint32_t type_id, dst_type_id, val_id; + ++ type_id = vkd3d_spirv_get_type_id(builder, component_type, component_count); + if (component_count > 1) + { +- type_id = vkd3d_spirv_get_type_id(builder, component_type, component_count); + val_id = vkd3d_spirv_build_op_composite_construct(builder, + type_id, component_ids, component_count); + } +@@ -4431,6 +4431,11 @@ static void spirv_compiler_emit_store_dst_components(struct spirv_compiler *comp + { + val_id = *component_ids; + } ++ ++ dst_type_id = vkd3d_spirv_get_type_id_for_data_type(builder, dst->reg.data_type, component_count); ++ if (dst_type_id != type_id) ++ val_id = vkd3d_spirv_build_op_bitcast(builder, dst_type_id, val_id); ++ + spirv_compiler_emit_store_dst(compiler, dst, val_id); + } + +@@ -7283,8 +7288,12 @@ static void spirv_compiler_emit_mov(struct spirv_compiler *compiler, + } + + general_implementation: +- write_mask = (src->reg.type == VKD3DSPR_IMMCONST64 && !data_type_is_64_bit(dst->reg.data_type)) +- ? vsir_write_mask_64_from_32(dst->write_mask) : dst->write_mask; ++ write_mask = dst->write_mask; ++ if (src->reg.type == VKD3DSPR_IMMCONST64 && !data_type_is_64_bit(dst->reg.data_type)) ++ write_mask = vsir_write_mask_64_from_32(write_mask); ++ else if (!data_type_is_64_bit(src->reg.data_type) && data_type_is_64_bit(dst->reg.data_type)) ++ write_mask = vsir_write_mask_32_from_64(write_mask); ++ + val_id = spirv_compiler_emit_load_src(compiler, src, write_mask); + if (dst->reg.data_type != src->reg.data_type) + { +@@ -8895,8 +8904,8 @@ static void spirv_compiler_emit_store_tgsm(struct spirv_compiler *compiler, + const struct vkd3d_shader_dst_param *dst = instruction->dst; + const struct vkd3d_shader_src_param *src = instruction->src; + uint32_t base_coordinate_id, component_idx; +- const struct vkd3d_shader_src_param *data; + struct vkd3d_shader_register_info reg_info; ++ struct vkd3d_shader_src_param data; + unsigned int component_count; + + if (!spirv_compiler_get_register_info(compiler, &dst->reg, ®_info)) +@@ -8908,8 +8917,9 @@ static void spirv_compiler_emit_store_tgsm(struct spirv_compiler *compiler, + base_coordinate_id = spirv_compiler_emit_raw_structured_addressing(compiler, + type_id, reg_info.structure_stride, &src[0], VKD3DSP_WRITEMASK_0, &src[1], VKD3DSP_WRITEMASK_0); + +- data = &src[instruction->src_count - 1]; +- val_id = spirv_compiler_emit_load_src(compiler, data, dst->write_mask); ++ data = src[instruction->src_count - 1]; ++ data.reg.data_type = VKD3D_DATA_UINT; ++ val_id = spirv_compiler_emit_load_src(compiler, &data, dst->write_mask); + + component_count = vsir_write_mask_component_count(dst->write_mask); + for (component_idx = 0; component_idx < component_count; ++component_idx) +@@ -9334,6 +9344,7 @@ static void spirv_compiler_emit_resinfo(struct spirv_compiler *compiler, + const struct vkd3d_shader_dst_param *dst = instruction->dst; + const struct vkd3d_shader_src_param *src = instruction->src; + uint32_t type_id, lod_id, val_id, miplevel_count_id; ++ enum vkd3d_shader_component_type component_type; + uint32_t constituents[VKD3D_VEC4_SIZE]; + unsigned int i, size_component_count; + struct vkd3d_shader_image image; +@@ -9370,10 +9381,16 @@ static void spirv_compiler_emit_resinfo(struct spirv_compiler *compiler, + val_id = vkd3d_spirv_build_op_composite_construct(builder, + type_id, constituents, i + 2); + ++ component_type = VKD3D_SHADER_COMPONENT_FLOAT; ++ + type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_FLOAT, VKD3D_VEC4_SIZE); + if (instruction->flags == VKD3DSI_RESINFO_UINT) + { +- val_id = vkd3d_spirv_build_op_bitcast(builder, type_id, val_id); ++ /* SSA registers must match the specified result type. */ ++ if (!register_is_ssa(&dst->reg)) ++ val_id = vkd3d_spirv_build_op_bitcast(builder, type_id, val_id); ++ else ++ component_type = VKD3D_SHADER_COMPONENT_UINT; + } + else + { +@@ -9382,7 +9399,7 @@ static void spirv_compiler_emit_resinfo(struct spirv_compiler *compiler, + val_id = vkd3d_spirv_build_op_convert_utof(builder, type_id, val_id); + } + val_id = spirv_compiler_emit_swizzle(compiler, val_id, VKD3DSP_WRITEMASK_ALL, +- VKD3D_SHADER_COMPONENT_FLOAT, src[1].swizzle, dst->write_mask); ++ component_type, src[1].swizzle, dst->write_mask); + + spirv_compiler_emit_store_dst(compiler, dst, val_id); + } +diff --git a/libs/vkd3d/libs/vkd3d-shader/tpf.c b/libs/vkd3d/libs/vkd3d-shader/tpf.c +index 4d0658313d5..d5019a5dd63 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/tpf.c ++++ b/libs/vkd3d/libs/vkd3d-shader/tpf.c +@@ -3385,10 +3385,10 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) + + if (profile->major_version >= 5) + { +- put_u32(&buffer, TAG_RD11); ++ put_u32(&buffer, hlsl_version_ge(ctx, 5, 1) ? TAG_RD11_REVERSE : TAG_RD11); + put_u32(&buffer, 15 * sizeof(uint32_t)); /* size of RDEF header including this header */ + put_u32(&buffer, 6 * sizeof(uint32_t)); /* size of buffer desc */ +- put_u32(&buffer, 8 * sizeof(uint32_t)); /* size of binding desc */ ++ put_u32(&buffer, (hlsl_version_ge(ctx, 5, 1) ? 10 : 8) * sizeof(uint32_t)); /* size of binding desc */ + put_u32(&buffer, 10 * sizeof(uint32_t)); /* size of variable desc */ + put_u32(&buffer, 9 * sizeof(uint32_t)); /* size of type desc */ + put_u32(&buffer, 3 * sizeof(uint32_t)); /* size of member desc */ +@@ -3405,6 +3405,9 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) + const struct extern_resource *resource = &extern_resources[i]; + uint32_t flags = 0; + ++ if (hlsl_version_ge(ctx, 5, 1)) ++ hlsl_fixme(ctx, &resource->var->loc, "Shader model 5.1 resource reflection."); ++ + if (resource->is_user_packed) + flags |= D3D_SIF_USERPACKED; + +@@ -3437,6 +3440,9 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) + if (!cbuffer->reg.allocated) + continue; + ++ if (hlsl_version_ge(ctx, 5, 1)) ++ hlsl_fixme(ctx, &cbuffer->loc, "Shader model 5.1 resource reflection."); ++ + if (cbuffer->reservation.reg_type) + flags |= D3D_SIF_USERPACKED; + +@@ -5343,7 +5349,7 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex + &expr->node, arg1, arg2); + break; + +- case HLSL_OP3_MOVC: ++ case HLSL_OP3_TERNARY: + write_sm4_ternary_op(tpf, VKD3D_SM4_OP_MOVC, &expr->node, arg1, arg2, arg3); + break; + +@@ -5399,7 +5405,8 @@ static void write_sm4_jump(const struct tpf_writer *tpf, const struct hlsl_ir_ju + + case HLSL_IR_JUMP_DISCARD_NZ: + { +- instr.opcode = VKD3D_SM4_OP_DISCARD | VKD3D_SM4_CONDITIONAL_NZ; ++ instr.opcode = VKD3D_SM4_OP_DISCARD; ++ instr.extra_bits = VKD3D_SM4_CONDITIONAL_NZ; + + memset(&instr.srcs[0], 0, sizeof(*instr.srcs)); + instr.src_count = 1; +@@ -5700,19 +5707,13 @@ static void write_sm4_block(const struct tpf_writer *tpf, const struct hlsl_bloc + { + if (instr->data_type) + { +- if (instr->data_type->class == HLSL_CLASS_MATRIX) +- { +- hlsl_fixme(tpf->ctx, &instr->loc, "Matrix operations need to be lowered."); +- break; +- } +- else if (instr->data_type->class == HLSL_CLASS_OBJECT) ++ if (instr->data_type->class != HLSL_CLASS_SCALAR && instr->data_type->class != HLSL_CLASS_VECTOR) + { +- hlsl_fixme(tpf->ctx, &instr->loc, "Object copy."); ++ hlsl_fixme(tpf->ctx, &instr->loc, "Class %#x should have been lowered or removed.", ++ instr->data_type->class); + break; + } + +- assert(instr->data_type->class == HLSL_CLASS_SCALAR || instr->data_type->class == HLSL_CLASS_VECTOR); +- + if (!instr->reg.allocated) + { + assert(instr->type == HLSL_IR_CONSTANT); +@@ -5808,13 +5809,21 @@ static void write_sm4_shdr(struct hlsl_ctx *ctx, + LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) + { + if (cbuffer->reg.allocated) ++ { ++ if (hlsl_version_ge(ctx, 5, 1)) ++ hlsl_fixme(ctx, &cbuffer->loc, "Shader model 5.1 resource definition."); ++ + write_sm4_dcl_constant_buffer(&tpf, cbuffer); ++ } + } + + for (i = 0; i < extern_resources_count; ++i) + { + const struct extern_resource *resource = &extern_resources[i]; + ++ if (hlsl_version_ge(ctx, 5, 1)) ++ hlsl_fixme(ctx, &resource->var->loc, "Shader model 5.1 resource declaration."); ++ + if (resource->regset == HLSL_REGSET_SAMPLERS) + write_sm4_dcl_samplers(&tpf, resource); + else if (resource->regset == HLSL_REGSET_TEXTURES) +diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h +index 6d442cd517d..9b37bbef70b 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h ++++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h +@@ -1761,6 +1761,31 @@ static inline unsigned int vkd3d_compact_swizzle(uint32_t swizzle, uint32_t writ + return compacted_swizzle; + } + ++static inline uint32_t vsir_swizzle_from_writemask(unsigned int writemask) ++{ ++ static const unsigned int swizzles[16] = ++ { ++ 0, ++ VKD3D_SHADER_SWIZZLE(X, X, X, X), ++ VKD3D_SHADER_SWIZZLE(Y, Y, Y, Y), ++ VKD3D_SHADER_SWIZZLE(X, Y, X, X), ++ VKD3D_SHADER_SWIZZLE(Z, Z, Z, Z), ++ VKD3D_SHADER_SWIZZLE(X, Z, X, X), ++ VKD3D_SHADER_SWIZZLE(Y, Z, X, X), ++ VKD3D_SHADER_SWIZZLE(X, Y, Z, X), ++ VKD3D_SHADER_SWIZZLE(W, W, W, W), ++ VKD3D_SHADER_SWIZZLE(X, W, X, X), ++ VKD3D_SHADER_SWIZZLE(Y, W, X, X), ++ VKD3D_SHADER_SWIZZLE(X, Y, W, X), ++ VKD3D_SHADER_SWIZZLE(Z, W, X, X), ++ VKD3D_SHADER_SWIZZLE(X, Z, W, X), ++ VKD3D_SHADER_SWIZZLE(Y, Z, W, X), ++ VKD3D_SHADER_SWIZZLE(X, Y, Z, W), ++ }; ++ ++ return swizzles[writemask & 0xf]; ++} ++ + struct vkd3d_struct + { + enum vkd3d_shader_structure_type type; +diff --git a/libs/vkd3d/libs/vkd3d/cache.c b/libs/vkd3d/libs/vkd3d/cache.c +new file mode 100644 +index 00000000000..56ba6990420 +--- /dev/null ++++ b/libs/vkd3d/libs/vkd3d/cache.c +@@ -0,0 +1,59 @@ ++/* ++ * Copyright 2024 Stefan Dösinger for CodeWeavers ++ * ++ * This library is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * This library is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with this library; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA ++ */ ++ ++#include "vkd3d_private.h" ++ ++struct vkd3d_shader_cache ++{ ++ unsigned int refcount; ++}; ++ ++int vkd3d_shader_open_cache(struct vkd3d_shader_cache **cache) ++{ ++ struct vkd3d_shader_cache *object; ++ ++ TRACE("%p.\n", cache); ++ ++ object = vkd3d_malloc(sizeof(*object)); ++ if (!object) ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ ++ object->refcount = 1; ++ *cache = object; ++ ++ return VKD3D_OK; ++} ++ ++unsigned int vkd3d_shader_cache_incref(struct vkd3d_shader_cache *cache) ++{ ++ unsigned int refcount = vkd3d_atomic_increment_u32(&cache->refcount); ++ TRACE("cache %p refcount %u.\n", cache, refcount); ++ return refcount; ++} ++ ++unsigned int vkd3d_shader_cache_decref(struct vkd3d_shader_cache *cache) ++{ ++ unsigned int refcount = vkd3d_atomic_decrement_u32(&cache->refcount); ++ TRACE("cache %p refcount %u.\n", cache, refcount); ++ ++ if (refcount) ++ return refcount; ++ ++ vkd3d_free(cache); ++ return 0; ++} +diff --git a/libs/vkd3d/libs/vkd3d/device.c b/libs/vkd3d/libs/vkd3d/device.c +index 65db8b70bfd..90de27c53b6 100644 +--- a/libs/vkd3d/libs/vkd3d/device.c ++++ b/libs/vkd3d/libs/vkd3d/device.c +@@ -2529,11 +2529,17 @@ struct d3d12_cache_session + ID3D12ShaderCacheSession ID3D12ShaderCacheSession_iface; + unsigned int refcount; + ++ struct list cache_list_entry; ++ + struct d3d12_device *device; + struct vkd3d_private_store private_store; + D3D12_SHADER_CACHE_SESSION_DESC desc; ++ struct vkd3d_shader_cache *cache; + }; + ++static struct vkd3d_mutex cache_list_mutex = VKD3D_MUTEX_INITIALIZER; ++static struct list cache_list = LIST_INIT(cache_list); ++ + static inline struct d3d12_cache_session *impl_from_ID3D12ShaderCacheSession(ID3D12ShaderCacheSession *iface) + { + return CONTAINING_RECORD(iface, struct d3d12_cache_session, ID3D12ShaderCacheSession_iface); +@@ -2582,6 +2588,11 @@ static void d3d12_cache_session_destroy(struct d3d12_cache_session *session) + + TRACE("Destroying cache session %p.\n", session); + ++ vkd3d_mutex_lock(&cache_list_mutex); ++ list_remove(&session->cache_list_entry); ++ vkd3d_mutex_unlock(&cache_list_mutex); ++ ++ vkd3d_shader_cache_decref(session->cache); + vkd3d_private_store_destroy(&session->private_store); + vkd3d_free(session); + +@@ -2707,11 +2718,14 @@ static const struct ID3D12ShaderCacheSessionVtbl d3d12_cache_session_vtbl = + static HRESULT d3d12_cache_session_init(struct d3d12_cache_session *session, + struct d3d12_device *device, const D3D12_SHADER_CACHE_SESSION_DESC *desc) + { ++ struct d3d12_cache_session *i; ++ enum vkd3d_result ret; + HRESULT hr; + + session->ID3D12ShaderCacheSession_iface.lpVtbl = &d3d12_cache_session_vtbl; + session->refcount = 1; + session->desc = *desc; ++ session->cache = NULL; + + if (!session->desc.MaximumValueFileSizeBytes) + session->desc.MaximumValueFileSizeBytes = 128 * 1024 * 1024; +@@ -2723,9 +2737,56 @@ static HRESULT d3d12_cache_session_init(struct d3d12_cache_session *session, + if (FAILED(hr = vkd3d_private_store_init(&session->private_store))) + return hr; + ++ vkd3d_mutex_lock(&cache_list_mutex); ++ ++ /* We expect the number of open caches to be small. */ ++ LIST_FOR_EACH_ENTRY(i, &cache_list, struct d3d12_cache_session, cache_list_entry) ++ { ++ if (!memcmp(&i->desc.Identifier, &desc->Identifier, sizeof(desc->Identifier))) ++ { ++ TRACE("Found an existing cache %p from session %p.\n", i->cache, i); ++ if (desc->Version == i->desc.Version) ++ { ++ session->desc = i->desc; ++ vkd3d_shader_cache_incref(session->cache = i->cache); ++ break; ++ } ++ else ++ { ++ WARN("version mismatch: Existing %"PRIu64" new %"PRIu64".\n", ++ i->desc.Version, desc->Version); ++ hr = DXGI_ERROR_ALREADY_EXISTS; ++ goto error; ++ } ++ } ++ } ++ ++ if (!session->cache) ++ { ++ if (session->desc.Mode == D3D12_SHADER_CACHE_MODE_DISK) ++ FIXME("Disk caches are not yet implemented.\n"); ++ ++ ret = vkd3d_shader_open_cache(&session->cache); ++ if (ret) ++ { ++ WARN("Failed to open shader cache.\n"); ++ hr = hresult_from_vkd3d_result(ret); ++ goto error; ++ } ++ } ++ ++ /* Add it to the list even if we reused an existing cache. The other session might be destroyed, ++ * but the cache stays alive and can be opened a third time. */ ++ list_add_tail(&cache_list, &session->cache_list_entry); + d3d12_device_add_ref(session->device = device); + ++ vkd3d_mutex_unlock(&cache_list_mutex); + return S_OK; ++ ++error: ++ vkd3d_private_store_destroy(&session->private_store); ++ vkd3d_mutex_unlock(&cache_list_mutex); ++ return hr; + } + + /* ID3D12Device */ +@@ -4874,6 +4935,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateShaderCacheSession(ID3D12Dev + WARN("No output pointer, returning S_FALSE.\n"); + return S_FALSE; + } ++ *session = NULL; + + if (!(object = vkd3d_malloc(sizeof(*object)))) + return E_OUTOFMEMORY; +diff --git a/libs/vkd3d/libs/vkd3d/resource.c b/libs/vkd3d/libs/vkd3d/resource.c +index b83a45d0606..179999148bc 100644 +--- a/libs/vkd3d/libs/vkd3d/resource.c ++++ b/libs/vkd3d/libs/vkd3d/resource.c +@@ -1893,6 +1893,13 @@ HRESULT d3d12_resource_validate_desc(const D3D12_RESOURCE_DESC1 *desc, struct d3 + WARN("Invalid sample count 0.\n"); + return E_INVALIDARG; + } ++ if (desc->SampleDesc.Count > 1 ++ && !(desc->Flags & (D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET | D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL))) ++ { ++ WARN("Sample count %u invalid without ALLOW_RENDER_TARGET or ALLOW_DEPTH_STENCIL.\n", ++ desc->SampleDesc.Count); ++ return E_INVALIDARG; ++ } + + if (!(format = vkd3d_format_from_d3d12_resource_desc(device, desc, 0))) + { +@@ -1996,6 +2003,11 @@ static HRESULT d3d12_resource_init(struct d3d12_resource *resource, struct d3d12 + WARN("Invalid initial resource state %#x.\n", initial_state); + return E_INVALIDARG; + } ++ if (initial_state == D3D12_RESOURCE_STATE_RENDER_TARGET && !(desc->Flags & D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET)) ++ { ++ WARN("Invalid initial resource state %#x for non-render-target.\n", initial_state); ++ return E_INVALIDARG; ++ } + + if (optimized_clear_value && d3d12_resource_is_buffer(resource)) + { +diff --git a/libs/vkd3d/libs/vkd3d/state.c b/libs/vkd3d/libs/vkd3d/state.c +index 08cc110e8f7..b8328216a29 100644 +--- a/libs/vkd3d/libs/vkd3d/state.c ++++ b/libs/vkd3d/libs/vkd3d/state.c +@@ -2045,6 +2045,9 @@ static ULONG STDMETHODCALLTYPE d3d12_pipeline_state_Release(ID3D12PipelineState + + d3d12_pipeline_uav_counter_state_cleanup(&state->uav_counters, device); + ++ if (state->implicit_root_signature) ++ d3d12_root_signature_Release(state->implicit_root_signature); ++ + vkd3d_free(state); + + d3d12_device_release(device); +@@ -2413,8 +2416,8 @@ static HRESULT d3d12_pipeline_state_init_compute(struct d3d12_pipeline_state *st + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + struct vkd3d_shader_interface_info shader_interface; + struct vkd3d_shader_descriptor_offset_info offset_info; +- const struct d3d12_root_signature *root_signature; + struct vkd3d_shader_spirv_target_info target_info; ++ struct d3d12_root_signature *root_signature; + VkPipelineLayout vk_pipeline_layout; + HRESULT hr; + +@@ -2425,13 +2428,27 @@ static HRESULT d3d12_pipeline_state_init_compute(struct d3d12_pipeline_state *st + + if (!(root_signature = unsafe_impl_from_ID3D12RootSignature(desc->root_signature))) + { +- WARN("Root signature is NULL.\n"); +- return E_INVALIDARG; ++ TRACE("Root signature is NULL, looking for an embedded signature.\n"); ++ if (FAILED(hr = d3d12_root_signature_create(device, ++ desc->cs.pShaderBytecode, desc->cs.BytecodeLength, &root_signature))) ++ { ++ WARN("Failed to find an embedded root signature, hr %s.\n", debugstr_hresult(hr)); ++ return hr; ++ } ++ state->implicit_root_signature = &root_signature->ID3D12RootSignature_iface; ++ } ++ else ++ { ++ state->implicit_root_signature = NULL; + } + + if (FAILED(hr = d3d12_pipeline_state_find_and_init_uav_counters(state, device, root_signature, + &desc->cs, VK_SHADER_STAGE_COMPUTE_BIT))) ++ { ++ if (state->implicit_root_signature) ++ d3d12_root_signature_Release(state->implicit_root_signature); + return hr; ++ } + + memset(&target_info, 0, sizeof(target_info)); + target_info.type = VKD3D_SHADER_STRUCTURE_TYPE_SPIRV_TARGET_INFO; +@@ -2476,6 +2493,8 @@ static HRESULT d3d12_pipeline_state_init_compute(struct d3d12_pipeline_state *st + { + WARN("Failed to create Vulkan compute pipeline, hr %s.\n", debugstr_hresult(hr)); + d3d12_pipeline_uav_counter_state_cleanup(&state->uav_counters, device); ++ if (state->implicit_root_signature) ++ d3d12_root_signature_Release(state->implicit_root_signature); + return hr; + } + +@@ -2483,6 +2502,8 @@ static HRESULT d3d12_pipeline_state_init_compute(struct d3d12_pipeline_state *st + { + VK_CALL(vkDestroyPipeline(device->vk_device, state->u.compute.vk_pipeline, NULL)); + d3d12_pipeline_uav_counter_state_cleanup(&state->uav_counters, device); ++ if (state->implicit_root_signature) ++ d3d12_root_signature_Release(state->implicit_root_signature); + return hr; + } + +@@ -3484,6 +3505,7 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s + goto fail; + + state->vk_bind_point = VK_PIPELINE_BIND_POINT_GRAPHICS; ++ state->implicit_root_signature = NULL; + d3d12_device_add_ref(state->device = device); + + return S_OK; +diff --git a/libs/vkd3d/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/libs/vkd3d/vkd3d_private.h +index 14c8eb54574..9f5f759667a 100644 +--- a/libs/vkd3d/libs/vkd3d/vkd3d_private.h ++++ b/libs/vkd3d/libs/vkd3d/vkd3d_private.h +@@ -1213,6 +1213,7 @@ struct d3d12_pipeline_state + + struct d3d12_pipeline_uav_counter_state uav_counters; + ++ ID3D12RootSignature *implicit_root_signature; + struct d3d12_device *device; + + struct vkd3d_private_store private_store; +@@ -1927,4 +1928,10 @@ static inline void vkd3d_prepend_struct(void *header, void *structure) + vkd3d_header->next = vkd3d_structure; + } + ++struct vkd3d_shader_cache; ++ ++int vkd3d_shader_open_cache(struct vkd3d_shader_cache **cache); ++unsigned int vkd3d_shader_cache_incref(struct vkd3d_shader_cache *cache); ++unsigned int vkd3d_shader_cache_decref(struct vkd3d_shader_cache *cache); ++ + #endif /* __VKD3D_PRIVATE_H */ +-- +2.43.0 +