From 36020b4a0e5f6087a24920e8fa5b79c22b6c9a02 Mon Sep 17 00:00:00 2001 From: Alistair Leslie-Hughes Date: Thu, 10 Apr 2025 08:23:14 +1000 Subject: [PATCH] Updated vkd3d-latest patchset Squash and update. --- ...f576ecc9929dd98c900bb8bc0335b91a1a0.patch} | 8411 +++++++++++++++-- ...-a4f58be00c58e06b5bd60bec7eb9e37b6f1.patch | 3494 ------- ...-cbce3a8631116ec10895e6c9c4a00b89b05.patch | 214 + ...-e418bbcfac74230dfbf0c49b72c4f059cb1.patch | 831 -- ...-110edf32d0b2a2f0a49cdd76c977b9eedd0.patch | 1534 --- ...-f576ecc9929dd98c900bb8bc0335b91a1a0.patch | 1779 ---- 6 files changed, 7963 insertions(+), 8300 deletions(-) rename patches/vkd3d-latest/{0001-Updated-vkd3d-to-81dc67b1ef411ea3cd7c09e6e935765ac15.patch => 0001-Updated-vkd3d-to-f576ecc9929dd98c900bb8bc0335b91a1a0.patch} (67%) delete mode 100644 patches/vkd3d-latest/0002-Updated-vkd3d-to-a4f58be00c58e06b5bd60bec7eb9e37b6f1.patch create mode 100644 patches/vkd3d-latest/0002-Updated-vkd3d-to-cbce3a8631116ec10895e6c9c4a00b89b05.patch delete mode 100644 patches/vkd3d-latest/0003-Updated-vkd3d-to-e418bbcfac74230dfbf0c49b72c4f059cb1.patch delete mode 100644 patches/vkd3d-latest/0004-Updated-vkd3d-to-110edf32d0b2a2f0a49cdd76c977b9eedd0.patch delete mode 100644 patches/vkd3d-latest/0005-Updated-vkd3d-to-f576ecc9929dd98c900bb8bc0335b91a1a0.patch diff --git a/patches/vkd3d-latest/0001-Updated-vkd3d-to-81dc67b1ef411ea3cd7c09e6e935765ac15.patch b/patches/vkd3d-latest/0001-Updated-vkd3d-to-f576ecc9929dd98c900bb8bc0335b91a1a0.patch similarity index 67% rename from patches/vkd3d-latest/0001-Updated-vkd3d-to-81dc67b1ef411ea3cd7c09e6e935765ac15.patch rename to patches/vkd3d-latest/0001-Updated-vkd3d-to-f576ecc9929dd98c900bb8bc0335b91a1a0.patch index 4c56b616..c98bcdf8 100644 --- a/patches/vkd3d-latest/0001-Updated-vkd3d-to-81dc67b1ef411ea3cd7c09e6e935765ac15.patch +++ b/patches/vkd3d-latest/0001-Updated-vkd3d-to-f576ecc9929dd98c900bb8bc0335b91a1a0.patch @@ -1,7 +1,8 @@ -From a616817217f5d5276a38ec1aac6ffc26506a0740 Mon Sep 17 00:00:00 2001 +From 8e84e1a5750fac027178bb160050030e2786680b Mon Sep 17 00:00:00 2001 From: Alistair Leslie-Hughes Date: Fri, 21 Feb 2025 09:15:01 +1100 -Subject: [PATCH] Updated vkd3d to 81dc67b1ef411ea3cd7c09e6e935765ac1583d28. +Subject: [PATCH 1/2] Updated vkd3d to + f576ecc9929dd98c900bb8bc0335b91a1a0d3bff. --- libs/vkd3d/include/private/spirv_grammar.h | 10103 ++++++++++++++++ @@ -10,29 +11,33 @@ Subject: [PATCH] Updated vkd3d to 81dc67b1ef411ea3cd7c09e6e935765ac1583d28. libs/vkd3d/include/private/vkd3d_version.h | 2 +- libs/vkd3d/include/vkd3d_shader.h | 15 +- libs/vkd3d/libs/vkd3d-common/blob.c | 1 + - libs/vkd3d/libs/vkd3d-shader/d3d_asm.c | 23 +- + libs/vkd3d/libs/vkd3d-shader/d3d_asm.c | 24 +- + libs/vkd3d/libs/vkd3d-shader/d3dbc.c | 32 +- libs/vkd3d/libs/vkd3d-shader/dxbc.c | 34 +- - libs/vkd3d/libs/vkd3d-shader/dxil.c | 95 +- - libs/vkd3d/libs/vkd3d-shader/fx.c | 1467 ++- - libs/vkd3d/libs/vkd3d-shader/hlsl.c | 263 +- - libs/vkd3d/libs/vkd3d-shader/hlsl.h | 69 +- - libs/vkd3d/libs/vkd3d-shader/hlsl.y | 578 +- - libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 1287 +- - .../libs/vkd3d-shader/hlsl_constant_ops.c | 71 +- - libs/vkd3d/libs/vkd3d-shader/ir.c | 222 +- + libs/vkd3d/libs/vkd3d-shader/dxil.c | 99 +- + libs/vkd3d/libs/vkd3d-shader/fx.c | 1862 ++- + libs/vkd3d/libs/vkd3d-shader/glsl.c | 27 +- + libs/vkd3d/libs/vkd3d-shader/hlsl.c | 377 +- + libs/vkd3d/libs/vkd3d-shader/hlsl.h | 104 +- + libs/vkd3d/libs/vkd3d-shader/hlsl.l | 5 + + libs/vkd3d/libs/vkd3d-shader/hlsl.y | 1309 +- + libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 3568 +++--- + .../libs/vkd3d-shader/hlsl_constant_ops.c | 81 +- + libs/vkd3d/libs/vkd3d-shader/ir.c | 433 +- + libs/vkd3d/libs/vkd3d-shader/msl.c | 15 +- libs/vkd3d/libs/vkd3d-shader/preproc.l | 1 + libs/vkd3d/libs/vkd3d-shader/preproc.y | 10 + - libs/vkd3d/libs/vkd3d-shader/spirv.c | 427 +- - libs/vkd3d/libs/vkd3d-shader/tpf.c | 25 +- - .../libs/vkd3d-shader/vkd3d_shader_main.c | 81 +- - .../libs/vkd3d-shader/vkd3d_shader_private.h | 6 +- + libs/vkd3d/libs/vkd3d-shader/spirv.c | 464 +- + libs/vkd3d/libs/vkd3d-shader/tpf.c | 128 +- + .../libs/vkd3d-shader/vkd3d_shader_main.c | 198 +- + .../libs/vkd3d-shader/vkd3d_shader_private.h | 71 +- .../vkd3d/libs/vkd3d-utils/vkd3d_utils_main.c | 2 + libs/vkd3d/libs/vkd3d/command.c | 50 +- libs/vkd3d/libs/vkd3d/device.c | 37 +- libs/vkd3d/libs/vkd3d/resource.c | 14 +- libs/vkd3d/libs/vkd3d/state.c | 118 +- libs/vkd3d/libs/vkd3d/vkd3d_private.h | 2 +- - 28 files changed, 13284 insertions(+), 1728 deletions(-) + 32 files changed, 16168 insertions(+), 3027 deletions(-) create mode 100644 libs/vkd3d/include/private/spirv_grammar.h diff --git a/libs/vkd3d/include/private/spirv_grammar.h b/libs/vkd3d/include/private/spirv_grammar.h @@ -10192,12 +10197,12 @@ index c9f8001e590..00052a89988 100644 } diff --git a/libs/vkd3d/include/private/vkd3d_version.h b/libs/vkd3d/include/private/vkd3d_version.h -index 0edc4428022..a73ae3fb03c 100644 +index 0edc4428022..795bc2dc490 100644 --- a/libs/vkd3d/include/private/vkd3d_version.h +++ b/libs/vkd3d/include/private/vkd3d_version.h @@ -1 +1 @@ -#define VKD3D_VCS_ID " (Wine bundled)" -+#define VKD3D_VCS_ID " (git 81dc67b1)" ++#define VKD3D_VCS_ID " (git a4f58be0)" diff --git a/libs/vkd3d/include/vkd3d_shader.h b/libs/vkd3d/include/vkd3d_shader.h index 058166aa2f9..2e1f37f12e6 100644 --- a/libs/vkd3d/include/vkd3d_shader.h @@ -10251,7 +10256,7 @@ index f60ef7db769..c2c6ad67804 100644 #include "vkd3d.h" #include "vkd3d_blob.h" diff --git a/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c -index 0639da83aa6..7b69535a445 100644 +index 0639da83aa6..764f0888490 100644 --- a/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c +++ b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c @@ -2069,15 +2069,22 @@ static const char *get_component_type_name(enum vkd3d_shader_component_type type @@ -10285,6 +10290,111 @@ index 0639da83aa6..7b69535a445 100644 } static const char *get_minimum_precision_name(enum vkd3d_shader_minimum_precision prec) +@@ -2097,6 +2104,7 @@ static const char *get_semantic_register_name(enum vkd3d_shader_sysval_semantic + { + switch (semantic) + { ++ case VKD3D_SHADER_SV_PRIMITIVE_ID: return "primID"; + case VKD3D_SHADER_SV_DEPTH: return "oDepth"; + case VKD3D_SHADER_SV_DEPTH_GREATER_EQUAL: return "oDepthGE"; + case VKD3D_SHADER_SV_DEPTH_LESS_EQUAL: return "oDepthLE"; +diff --git a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c +index 58e35cf22e8..b49ef9865db 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c ++++ b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c +@@ -1759,27 +1759,40 @@ static bool is_inconsequential_instr(const struct vkd3d_shader_instruction *ins) + + static void write_sm1_dst_register(struct vkd3d_bytecode_buffer *buffer, const struct vkd3d_shader_dst_param *reg) + { ++ uint32_t offset = reg->reg.idx_count ? reg->reg.idx[0].offset : 0; ++ + VKD3D_ASSERT(reg->write_mask); + put_u32(buffer, VKD3D_SM1_INSTRUCTION_PARAMETER + | sm1_encode_register_type(®->reg) + | (reg->modifiers << VKD3D_SM1_DST_MODIFIER_SHIFT) + | (reg->write_mask << VKD3D_SM1_WRITEMASK_SHIFT) +- | (reg->reg.idx[0].offset & VKD3D_SM1_REGISTER_NUMBER_MASK)); ++ | (offset & VKD3D_SM1_REGISTER_NUMBER_MASK)); + } + + static void write_sm1_src_register(struct vkd3d_bytecode_buffer *buffer, const struct vkd3d_shader_src_param *reg) + { ++ uint32_t address_mode = VKD3D_SM1_ADDRESS_MODE_ABSOLUTE, offset = 0; ++ ++ if (reg->reg.idx_count) ++ { ++ offset = reg->reg.idx[0].offset; ++ if (reg->reg.idx[0].rel_addr) ++ address_mode = VKD3D_SM1_ADDRESS_MODE_RELATIVE; ++ } ++ + put_u32(buffer, VKD3D_SM1_INSTRUCTION_PARAMETER + | sm1_encode_register_type(®->reg) ++ | (address_mode << VKD3D_SM1_ADDRESS_MODE_SHIFT) + | (reg->modifiers << VKD3D_SM1_SRC_MODIFIER_SHIFT) + | (swizzle_from_vsir(reg->swizzle) << VKD3D_SM1_SWIZZLE_SHIFT) +- | (reg->reg.idx[0].offset & VKD3D_SM1_REGISTER_NUMBER_MASK)); ++ | (offset & VKD3D_SM1_REGISTER_NUMBER_MASK)); + } + + static void d3dbc_write_instruction(struct d3dbc_compiler *d3dbc, const struct vkd3d_shader_instruction *ins) + { + const struct vkd3d_shader_version *version = &d3dbc->program->shader_version; + struct vkd3d_bytecode_buffer *buffer = &d3dbc->buffer; ++ const struct vkd3d_shader_src_param *src; + const struct vkd3d_sm1_opcode_info *info; + unsigned int i; + uint32_t token; +@@ -1810,13 +1823,10 @@ static void d3dbc_write_instruction(struct d3dbc_compiler *d3dbc, const struct v + + for (i = 0; i < ins->src_count; ++i) + { +- if (ins->src[i].reg.idx[0].rel_addr) +- { +- vkd3d_shader_error(d3dbc->message_context, &ins->location, VKD3D_SHADER_ERROR_D3DBC_NOT_IMPLEMENTED, +- "Unhandled relative addressing on source register."); +- d3dbc->failed = true; +- } +- write_sm1_src_register(buffer, &ins->src[i]); ++ src = &ins->src[i]; ++ write_sm1_src_register(buffer, src); ++ if (src->reg.idx_count && src->reg.idx[0].rel_addr) ++ write_sm1_src_register(buffer, src->reg.idx[0].rel_addr); + } + }; + +@@ -1831,6 +1841,7 @@ static void d3dbc_write_vsir_def(struct d3dbc_compiler *d3dbc, const struct vkd3 + .reg.type = VKD3DSPR_CONST, + .write_mask = VKD3DSP_WRITEMASK_ALL, + .reg.idx[0].offset = ins->dst[0].reg.idx[0].offset, ++ .reg.idx_count = 1, + }; + + token = VKD3D_SM1_OP_DEF; +@@ -1863,6 +1874,7 @@ static void d3dbc_write_vsir_sampler_dcl(struct d3dbc_compiler *d3dbc, + reg.reg.type = VKD3DSPR_COMBINED_SAMPLER; + reg.write_mask = VKD3DSP_WRITEMASK_ALL; + reg.reg.idx[0].offset = reg_id; ++ reg.reg.idx_count = 1; + + write_sm1_dst_register(buffer, ®); + } +@@ -1938,6 +1950,7 @@ static void d3dbc_write_vsir_instruction(struct d3dbc_compiler *d3dbc, const str + case VKD3DSIH_MAX: + case VKD3DSIH_MIN: + case VKD3DSIH_MOV: ++ case VKD3DSIH_MOVA: + case VKD3DSIH_MUL: + case VKD3DSIH_SINCOS: + case VKD3DSIH_SLT: +@@ -1982,6 +1995,7 @@ static void d3dbc_write_semantic_dcl(struct d3dbc_compiler *d3dbc, + uint32_t token, usage_idx; + bool ret; + ++ reg.reg.idx_count = 1; + if (sm1_register_from_semantic_name(version, element->semantic_name, + element->semantic_index, output, ®.reg.type, ®.reg.idx[0].offset)) + { diff --git a/libs/vkd3d/libs/vkd3d-shader/dxbc.c b/libs/vkd3d/libs/vkd3d-shader/dxbc.c index 81af62f7810..9e3a57132a1 100644 --- a/libs/vkd3d/libs/vkd3d-shader/dxbc.c @@ -10371,7 +10481,7 @@ index 81af62f7810..9e3a57132a1 100644 { FIXME("Multiple patch constant signatures.\n"); diff --git a/libs/vkd3d/libs/vkd3d-shader/dxil.c b/libs/vkd3d/libs/vkd3d-shader/dxil.c -index a10de68008a..29e714ee2ac 100644 +index a10de68008a..ac4828d6f59 100644 --- a/libs/vkd3d/libs/vkd3d-shader/dxil.c +++ b/libs/vkd3d/libs/vkd3d-shader/dxil.c @@ -3911,23 +3911,51 @@ static void sm6_parser_init_signature(struct sm6_parser *sm6, const struct shade @@ -10538,7 +10648,39 @@ index a10de68008a..29e714ee2ac 100644 return VKD3D_OK; } -@@ -10432,9 +10478,6 @@ static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, struct vsir_pro +@@ -9917,6 +9963,7 @@ static void sm6_parser_gs_properties_init(struct sm6_parser *sm6, const struct s + { + input_primitive = VKD3D_PT_PATCH; + patch_vertex_count = i - INPUT_PRIMITIVE_PATCH1 + 1; ++ input_control_point_count = patch_vertex_count; + break; + } + +@@ -9927,6 +9974,7 @@ static void sm6_parser_gs_properties_init(struct sm6_parser *sm6, const struct s + } + + sm6_parser_emit_dcl_primitive_topology(sm6, VKD3DSIH_DCL_INPUT_PRIMITIVE, input_primitive, patch_vertex_count); ++ sm6->p.program->input_primitive = input_primitive; + sm6->p.program->input_control_point_count = input_control_point_count; + + i = operands[1]; +@@ -9938,6 +9986,7 @@ static void sm6_parser_gs_properties_init(struct sm6_parser *sm6, const struct s + "Geometry shader output vertex count %u is invalid.", i); + } + sm6_parser_emit_dcl_count(sm6, VKD3DSIH_DCL_VERTICES_OUT, i); ++ sm6->p.program->vertices_out_count = i; + + if (operands[2] > 1) + { +@@ -9955,6 +10004,7 @@ static void sm6_parser_gs_properties_init(struct sm6_parser *sm6, const struct s + output_primitive = VKD3D_PT_TRIANGLELIST; + } + sm6_parser_emit_dcl_primitive_topology(sm6, VKD3DSIH_DCL_OUTPUT_TOPOLOGY, output_primitive, 0); ++ sm6->p.program->output_topology = output_primitive; + + i = operands[4]; + if (!i || i > MAX_GS_INSTANCE_COUNT) +@@ -10432,9 +10482,6 @@ static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, struct vsir_pro input_signature = &program->input_signature; output_signature = &program->output_signature; patch_constant_signature = &program->patch_constant_signature; @@ -10548,7 +10690,7 @@ index a10de68008a..29e714ee2ac 100644 program->features = dxbc_desc->features; memset(dxbc_desc, 0, sizeof(*dxbc_desc)); -@@ -10498,18 +10541,6 @@ static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, struct vsir_pro +@@ -10498,18 +10545,6 @@ static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, struct vsir_pro goto fail; } @@ -10567,7 +10709,7 @@ index a10de68008a..29e714ee2ac 100644 function_count = dxil_block_compute_function_count(&sm6->root_block); if (!(sm6->functions = vkd3d_calloc(function_count, sizeof(*sm6->functions)))) { -@@ -10669,8 +10700,6 @@ int dxil_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t co +@@ -10669,8 +10704,6 @@ int dxil_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t co uint32_t *byte_code = NULL; int ret; @@ -10577,13 +10719,21 @@ index a10de68008a..29e714ee2ac 100644 if ((ret = shader_extract_from_dxbc(&compile_info->source, message_context, compile_info->source_name, &dxbc_desc)) < 0) diff --git a/libs/vkd3d/libs/vkd3d-shader/fx.c b/libs/vkd3d/libs/vkd3d-shader/fx.c -index bd7e7b420db..4fbce393707 100644 +index bd7e7b420db..debcb261811 100644 --- a/libs/vkd3d/libs/vkd3d-shader/fx.c +++ b/libs/vkd3d/libs/vkd3d-shader/fx.c -@@ -25,6 +25,41 @@ static inline size_t put_u32_unaligned(struct vkd3d_bytecode_buffer *buffer, uin +@@ -25,6 +25,49 @@ static inline size_t put_u32_unaligned(struct vkd3d_bytecode_buffer *buffer, uin return bytecode_put_bytes_unaligned(buffer, &value, sizeof(value)); } ++enum fx_2_type_constants ++{ ++ /* Assignment types */ ++ FX_2_ASSIGNMENT_CODE_BLOB = 0x0, ++ FX_2_ASSIGNMENT_PARAMETER = 0x1, ++ FX_2_ASSIGNMENT_ARRAY_SELECTOR = 0x2, ++}; ++ +enum state_property_component_type +{ + FX_BOOL, @@ -10622,7 +10772,23 @@ index bd7e7b420db..4fbce393707 100644 struct fx_4_binary_type { uint32_t name; -@@ -461,6 +496,407 @@ static void write_fx_2_annotations(struct hlsl_ir_var *var, uint32_t count_offse +@@ -246,6 +289,15 @@ static void set_status(struct fx_write_context *fx, int status) + fx->status = status; + } + ++static void fx_print_string(struct vkd3d_string_buffer *buffer, const char *prefix, ++ const char *s, size_t len) ++{ ++ if (len) ++ --len; /* Trim terminating null. */ ++ vkd3d_string_buffer_printf(buffer, "%s", prefix); ++ vkd3d_string_buffer_print_string_escaped(buffer, s, len); ++} ++ + static uint32_t write_string(const char *string, struct fx_write_context *fx) + { + return fx->ops->write_string(string, fx); +@@ -461,6 +513,461 @@ static void write_fx_2_annotations(struct hlsl_ir_var *var, uint32_t count_offse set_u32(buffer, count_offset, count); } @@ -10855,6 +11021,28 @@ index bd7e7b420db..4fbce393707 100644 + { NULL } +}; + ++static const struct rhs_named_value fx_2_address_values[] = ++{ ++ { "WRAP", 1 }, ++ { "MIRROR", 2 }, ++ { "CLAMP", 3 }, ++ { "BORDER", 4 }, ++ { "MIRROR_ONCE", 5 }, ++ { NULL } ++}; ++ ++static const struct rhs_named_value fx_2_filter_values[] = ++{ ++ { "NONE", 0 }, ++ { "POINT", 1 }, ++ { "LINEAR", 2 }, ++ { "ANISOTROPIC", 3 }, ++ { "PYRAMIDALQUAD", 6 }, ++ { "GAUSSIANQUAD", 7 }, ++ { "CONVOLUTIONMONO", 8 }, ++ { NULL } ++}; ++ +static const struct fx_2_state +{ + const char *name; @@ -11025,12 +11213,44 @@ index bd7e7b420db..4fbce393707 100644 + + { "VertexShader", HLSL_CLASS_SCALAR, FX_VERTEXSHADER, 1, 1, 146 }, + { "PixelShader", HLSL_CLASS_SCALAR, FX_PIXELSHADER, 1, 1, 147 }, ++ ++ { "VertexShaderConstantF", HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u-1, 148 }, ++ { "VertexShaderConstantB", HLSL_CLASS_SCALAR, FX_BOOL, 1, ~0u-1, 149 }, ++ { "VertexShaderConstantI", HLSL_CLASS_SCALAR, FX_UINT, 1, ~0u-1, 150 }, ++ { "VertexShaderConstant", HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u-1, 151 }, ++ { "VertexShaderConstant1", HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u-1, 152 }, ++ { "VertexShaderConstant2", HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u-1, 153 }, ++ { "VertexShaderConstant3", HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u-1, 154 }, ++ { "VertexShaderConstant4", HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u-1, 155 }, ++ ++ { "PixelShaderConstantF", HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u-1, 156 }, ++ { "PixelShaderConstantB", HLSL_CLASS_SCALAR, FX_BOOL, 1, ~0u-1, 157 }, ++ { "PixelShaderConstantI", HLSL_CLASS_SCALAR, FX_UINT, 1, ~0u-1, 158 }, ++ { "PixelShaderConstant", HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u-1, 159 }, ++ { "PixelShaderConstant1", HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u-1, 160 }, ++ { "PixelShaderConstant2", HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u-1, 161 }, ++ { "PixelShaderConstant3", HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u-1, 162 }, ++ { "PixelShaderConstant4", HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u-1, 163 }, ++ ++ { "Texture", HLSL_CLASS_SCALAR, FX_TEXTURE, 1, 1, 164 }, ++ { "AddressU", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 165, fx_2_address_values }, ++ { "AddressV", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 166, fx_2_address_values }, ++ { "AddressW", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 167, fx_2_address_values }, ++ { "BorderColor", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 168 }, ++ { "MagFilter", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 169, fx_2_filter_values }, ++ { "MinFilter", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 170, fx_2_filter_values }, ++ { "MipFilter", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 171, fx_2_filter_values }, ++ { "MipMapLodBias", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 172 }, ++ { "MaxMipLevel", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 173 }, ++ { "MaxAnisotropy", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 174 }, ++ { "SRBTexture", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 175 }, ++ { "ElementIndex", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 176 }, +}; + static void write_fx_2_pass(struct hlsl_ir_var *var, struct fx_write_context *fx) { struct vkd3d_bytecode_buffer *buffer = &fx->structured; -@@ -560,8 +996,21 @@ enum fx_4_type_constants +@@ -560,8 +1067,21 @@ enum fx_4_type_constants FX_4_ASSIGNMENT_VARIABLE = 0x2, FX_4_ASSIGNMENT_ARRAY_CONSTANT_INDEX = 0x3, FX_4_ASSIGNMENT_ARRAY_VARIABLE_INDEX = 0x4, @@ -11052,7 +11272,21 @@ index bd7e7b420db..4fbce393707 100644 }; static const uint32_t fx_4_numeric_base_types[] = -@@ -1594,12 +2043,6 @@ static void write_fx_4_annotation(struct hlsl_ir_var *var, struct fx_write_conte +@@ -1210,7 +1730,13 @@ static uint32_t write_fx_2_object_initializer(const struct hlsl_ir_var *var, str + put_u32(buffer, id); + put_u32(buffer, size); + if (size) ++ { ++ static const uint32_t pad; ++ + bytecode_put_bytes(buffer, data, size); ++ if (size % 4) ++ bytecode_put_bytes_unaligned(buffer, &pad, 4 - (size % 4)); ++ } + } + } + +@@ -1594,12 +2120,6 @@ static void write_fx_4_annotation(struct hlsl_ir_var *var, struct fx_write_conte } } @@ -11065,7 +11299,7 @@ index bd7e7b420db..4fbce393707 100644 static bool get_fx_4_state_enum_value(const struct rhs_named_value *pairs, const char *name, unsigned int *value) { -@@ -1831,27 +2274,6 @@ static bool replace_state_block_constant(struct hlsl_ctx *ctx, struct hlsl_ir_no +@@ -1831,27 +2351,6 @@ static bool replace_state_block_constant(struct hlsl_ctx *ctx, struct hlsl_ir_no return true; } @@ -11093,7 +11327,25 @@ index bd7e7b420db..4fbce393707 100644 static inline bool is_object_fx_type(enum state_property_component_type type) { switch (type) -@@ -2395,7 +2817,15 @@ static unsigned int decompose_fx_4_state_function_call(struct hlsl_ir_var *var, +@@ -1867,6 +2366,7 @@ static inline bool is_object_fx_type(enum state_property_component_type type) + case FX_BLEND: + case FX_VERTEXSHADER: + case FX_PIXELSHADER: ++ case FX_GEOMETRYSHADER: + return true; + default: + return false; +@@ -2262,7 +2762,8 @@ static void resolve_fx_4_state_block_values(struct hlsl_ir_var *var, struct hlsl + struct hlsl_ir_constant *c = hlsl_ir_constant(node); + struct hlsl_type *data_type = c->node.data_type; + +- if (data_type->class == HLSL_CLASS_SCALAR && data_type->e.numeric.type == HLSL_TYPE_UINT) ++ if (data_type->class == HLSL_CLASS_SCALAR ++ && (data_type->e.numeric.type == HLSL_TYPE_INT || data_type->e.numeric.type == HLSL_TYPE_UINT)) + { + if (c->value.u[0].u != 0) + hlsl_error(ctx, &ctx->location, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, +@@ -2395,7 +2896,15 @@ static unsigned int decompose_fx_4_state_function_call(struct hlsl_ir_var *var, static unsigned int decompose_fx_4_state_block_expand_array(struct hlsl_ir_var *var, struct hlsl_state_block *block, unsigned int entry_index, struct fx_write_context *fx) { @@ -11110,7 +11362,7 @@ index bd7e7b420db..4fbce393707 100644 const struct hlsl_type *type = hlsl_get_multiarray_element_type(var->data_type); struct hlsl_state_block_entry *entry = block->entries[entry_index]; static const unsigned int array_size = 8; -@@ -2914,6 +3344,11 @@ struct fx_parser +@@ -2914,6 +3423,11 @@ struct fx_parser uint32_t buffer_count; uint32_t object_count; uint32_t group_count; @@ -11122,7 +11374,7 @@ index bd7e7b420db..4fbce393707 100644 bool failed; }; -@@ -2965,13 +3400,6 @@ static void VKD3D_PRINTF_FUNC(3, 4) fx_parser_error(struct fx_parser *parser, en +@@ -2965,13 +3479,6 @@ static void VKD3D_PRINTF_FUNC(3, 4) fx_parser_error(struct fx_parser *parser, en parser->failed = true; } @@ -11136,84 +11388,114 @@ index bd7e7b420db..4fbce393707 100644 static const void *fx_parser_get_unstructured_ptr(struct fx_parser *parser, uint32_t offset, size_t size) { const uint8_t *ptr = parser->unstructured.ptr; -@@ -2986,91 +3414,592 @@ static const void *fx_parser_get_unstructured_ptr(struct fx_parser *parser, uint +@@ -2986,40 +3493,27 @@ static const void *fx_parser_get_unstructured_ptr(struct fx_parser *parser, uint return &ptr[offset]; } -static void fx_parser_read_unstructured(struct fx_parser *parser, void *dst, uint32_t offset, size_t size) -+static const void *fx_parser_get_ptr(struct fx_parser *parser, size_t size) -+{ -+ if (parser->end - parser->ptr < size) -+ { -+ parser->failed = true; -+ return NULL; -+ } -+ -+ return parser->ptr; -+} -+ -+static uint32_t fx_parser_read_unstructured(struct fx_parser *parser, void *dst, uint32_t offset, size_t size) - { - const uint8_t *ptr; - - memset(dst, 0, size); - if (!(ptr = fx_parser_get_unstructured_ptr(parser, offset, size))) +-{ +- const uint8_t *ptr; +- +- memset(dst, 0, size); +- if (!(ptr = fx_parser_get_unstructured_ptr(parser, offset, size))) - return; -+ return offset; - - memcpy(dst, ptr, size); -+ return offset + size; - } - +- +- memcpy(dst, ptr, size); +-} +- -static const char *fx_4_get_string(struct fx_parser *parser, uint32_t offset) -+static void parse_fx_start_indent(struct fx_parser *parser) ++static const void *fx_parser_get_ptr(struct fx_parser *parser, size_t size) { - const uint8_t *ptr = parser->unstructured.ptr; - const uint8_t *end = parser->unstructured.end; -+ ++parser->indent; -+} - +- - if (offset >= parser->unstructured.size) -- { -- parser->failed = true; ++ if (parser->end - parser->ptr < size) + { + parser->failed = true; - return ""; -- } -+static void parse_fx_end_indent(struct fx_parser *parser) -+{ -+ --parser->indent; -+} ++ return NULL; + } - ptr += offset; -+static void parse_fx_print_indent(struct fx_parser *parser) -+{ -+ vkd3d_string_buffer_printf(&parser->buffer, "%*s", 4 * parser->indent, ""); ++ return parser->ptr; +} - while (ptr < end && *ptr) - ++ptr; -+static const char *fx_2_get_string(struct fx_parser *parser, uint32_t offset) ++static uint32_t fx_parser_read_unstructured(struct fx_parser *parser, void *dst, uint32_t offset, size_t size) +{ -+ const char *ptr; -+ uint32_t size; ++ const uint8_t *ptr; - if (*ptr) -+ fx_parser_read_unstructured(parser, &size, offset, sizeof(size)); -+ ptr = fx_parser_get_unstructured_ptr(parser, offset + 4, size); -+ -+ if (!ptr) - { - parser->failed = true; - return ""; - } +- { +- parser->failed = true; +- return ""; +- } ++ memset(dst, 0, size); ++ if (!(ptr = fx_parser_get_unstructured_ptr(parser, offset, size))) ++ return offset; - return (const char *)(parser->unstructured.ptr + offset); ++ memcpy(dst, ptr, size); ++ return offset + size; + } + + static void parse_fx_start_indent(struct fx_parser *parser) +@@ -3037,46 +3531,787 @@ static void parse_fx_print_indent(struct fx_parser *parser) + vkd3d_string_buffer_printf(&parser->buffer, "%*s", 4 * parser->indent, ""); + } + +-static void parse_fx_4_numeric_value(struct fx_parser *parser, uint32_t offset, +- const struct fx_4_binary_type *type) ++static const char *fx_2_get_string(struct fx_parser *parser, uint32_t offset, uint32_t *size) + { +- unsigned int base_type, comp_count; +- size_t i; ++ const char *ptr; + +- base_type = (type->typeinfo >> FX_4_NUMERIC_BASE_TYPE_SHIFT) & 0xf; ++ fx_parser_read_unstructured(parser, size, offset, sizeof(*size)); ++ ptr = fx_parser_get_unstructured_ptr(parser, offset + 4, *size); + +- comp_count = type->packed_size / sizeof(uint32_t); +- for (i = 0; i < comp_count; ++i) ++ if (!ptr) + { +- union hlsl_constant_value_component value; +- +- fx_parser_read_unstructured(parser, &value, offset, sizeof(uint32_t)); +- +- if (base_type == FX_4_NUMERIC_TYPE_FLOAT) +- vkd3d_string_buffer_printf(&parser->buffer, "%f", value.f); +- else if (base_type == FX_4_NUMERIC_TYPE_INT) +- vkd3d_string_buffer_printf(&parser->buffer, "%d", value.i); +- else if (base_type == FX_4_NUMERIC_TYPE_UINT) +- vkd3d_string_buffer_printf(&parser->buffer, "%u", value.u); +- else if (base_type == FX_4_NUMERIC_TYPE_BOOL) +- vkd3d_string_buffer_printf(&parser->buffer, "%s", value.u ? "true" : "false" ); +- else +- vkd3d_string_buffer_printf(&parser->buffer, "%#x", value.u); +- +- if (i < comp_count - 1) +- vkd3d_string_buffer_printf(&parser->buffer, ", "); +- +- offset += sizeof(uint32_t); ++ parser->failed = true; ++ return ""; + } +-} + +-static void fx_4_parse_string_initializer(struct fx_parser *parser, uint32_t offset) +-{ +- const char *str = fx_4_get_string(parser, offset); +- vkd3d_string_buffer_printf(&parser->buffer, "\"%s\"", str); + return ptr; } --static void parse_fx_start_indent(struct fx_parser *parser) +-static void fx_parse_fx_4_annotations(struct fx_parser *parser) +static unsigned int fx_get_fx_2_type_size(struct fx_parser *parser, uint32_t *offset) - { -- ++parser->indent; ++{ + uint32_t element_count, member_count, class, columns, rows; + unsigned int size = 0; + @@ -11249,9 +11531,8 @@ index bd7e7b420db..4fbce393707 100644 + if (element_count) + size *= element_count; + return size; - } - --static void parse_fx_end_indent(struct fx_parser *parser) ++} ++ +static const char *const fx_2_types[] = +{ + [D3DXPT_VOID] = "void", @@ -11277,8 +11558,7 @@ index bd7e7b420db..4fbce393707 100644 +}; + +static void fx_parse_fx_2_type(struct fx_parser *parser, uint32_t offset) - { -- --parser->indent; ++{ + uint32_t type, class, rows, columns; + const char *name; + @@ -11303,13 +11583,11 @@ index bd7e7b420db..4fbce393707 100644 + fx_parser_read_unstructured(parser, &columns, offset + 24, sizeof(columns)); + vkd3d_string_buffer_printf(&parser->buffer, "%ux%u", rows, columns); + } - } - --static void parse_fx_print_indent(struct fx_parser *parser) ++} ++ +static void parse_fx_2_object_value(struct fx_parser *parser, uint32_t element_count, + uint32_t type, uint32_t offset) - { -- vkd3d_string_buffer_printf(&parser->buffer, "%*s", 4 * parser->indent, ""); ++{ + uint32_t id; + + element_count = max(element_count, 1); @@ -11328,53 +11606,38 @@ index bd7e7b420db..4fbce393707 100644 + } + + - } - --static void parse_fx_4_numeric_value(struct fx_parser *parser, uint32_t offset, -- const struct fx_4_binary_type *type) ++} ++ +static void parse_fx_2_numeric_value(struct fx_parser *parser, uint32_t offset, + unsigned int size, uint32_t base_type) - { -- unsigned int base_type, comp_count; -- size_t i; -- -- base_type = (type->typeinfo >> FX_4_NUMERIC_BASE_TYPE_SHIFT) & 0xf; ++{ + unsigned int i, comp_count; - -- comp_count = type->packed_size / sizeof(uint32_t); ++ + comp_count = size / sizeof(uint32_t); + if (comp_count > 1) + vkd3d_string_buffer_printf(&parser->buffer, "{"); - for (i = 0; i < comp_count; ++i) - { - union hlsl_constant_value_component value; - - fx_parser_read_unstructured(parser, &value, offset, sizeof(uint32_t)); - -- if (base_type == FX_4_NUMERIC_TYPE_FLOAT) -- vkd3d_string_buffer_printf(&parser->buffer, "%f", value.f); -- else if (base_type == FX_4_NUMERIC_TYPE_INT) ++ for (i = 0; i < comp_count; ++i) ++ { ++ union hlsl_constant_value_component value; ++ ++ fx_parser_read_unstructured(parser, &value, offset, sizeof(uint32_t)); ++ + if (base_type == D3DXPT_INT) - vkd3d_string_buffer_printf(&parser->buffer, "%d", value.i); -- else if (base_type == FX_4_NUMERIC_TYPE_UINT) -- vkd3d_string_buffer_printf(&parser->buffer, "%u", value.u); -- else if (base_type == FX_4_NUMERIC_TYPE_BOOL) ++ vkd3d_string_buffer_printf(&parser->buffer, "%d", value.i); + else if (base_type == D3DXPT_BOOL) - vkd3d_string_buffer_printf(&parser->buffer, "%s", value.u ? "true" : "false" ); - else -- vkd3d_string_buffer_printf(&parser->buffer, "%#x", value.u); ++ vkd3d_string_buffer_printf(&parser->buffer, "%s", value.u ? "true" : "false" ); ++ else + vkd3d_string_buffer_print_f32(&parser->buffer, value.f); - - if (i < comp_count - 1) - vkd3d_string_buffer_printf(&parser->buffer, ", "); - - offset += sizeof(uint32_t); - } ++ ++ if (i < comp_count - 1) ++ vkd3d_string_buffer_printf(&parser->buffer, ", "); ++ ++ offset += sizeof(uint32_t); ++ } + if (comp_count > 1) + vkd3d_string_buffer_printf(&parser->buffer, "}"); - } - --static void fx_4_parse_string_initializer(struct fx_parser *parser, uint32_t offset) ++} ++ +static void fx_parse_fx_2_parameter(struct fx_parser *parser, uint32_t offset) +{ + struct fx_2_var @@ -11386,13 +11649,14 @@ index bd7e7b420db..4fbce393707 100644 + uint32_t element_count; + } var; + const char *name; ++ uint32_t size; + + fx_parser_read_unstructured(parser, &var, offset, sizeof(var)); + + fx_parse_fx_2_type(parser, offset); + -+ name = fx_2_get_string(parser, var.name); -+ vkd3d_string_buffer_printf(&parser->buffer, " %s", name); ++ name = fx_2_get_string(parser, var.name, &size); ++ fx_print_string(&parser->buffer, " ", name, size); + if (var.element_count) + vkd3d_string_buffer_printf(&parser->buffer, "[%u]", var.element_count); +} @@ -11406,6 +11670,40 @@ index bd7e7b420db..4fbce393707 100644 + || type == D3DXPT_SAMPLERCUBE; +} + ++static void fx_parse_fx_2_assignment(struct fx_parser *parser, const struct fx_assignment *entry); ++ ++static void parse_fx_2_sampler(struct fx_parser *parser, uint32_t element_count, ++ uint32_t offset) ++{ ++ struct fx_assignment entry; ++ uint32_t count; ++ ++ element_count = max(element_count, 1); ++ ++ vkd3d_string_buffer_printf(&parser->buffer, "\n"); ++ for (uint32_t i = 0; i < element_count; ++i) ++ { ++ fx_parser_read_unstructured(parser, &count, offset, sizeof(count)); ++ offset += sizeof(count); ++ ++ parse_fx_start_indent(parser); ++ parse_fx_print_indent(parser); ++ vkd3d_string_buffer_printf(&parser->buffer, "{\n"); ++ parse_fx_start_indent(parser); ++ for (uint32_t j = 0; j < count; ++j, offset += sizeof(entry)) ++ { ++ fx_parser_read_unstructured(parser, &entry, offset, sizeof(entry)); ++ ++ parse_fx_print_indent(parser); ++ fx_parse_fx_2_assignment(parser, &entry); ++ } ++ parse_fx_end_indent(parser); ++ parse_fx_print_indent(parser); ++ vkd3d_string_buffer_printf(&parser->buffer, "},\n"); ++ parse_fx_end_indent(parser); ++ } ++} ++ +static void fx_parse_fx_2_initial_value(struct fx_parser *parser, uint32_t param, uint32_t value) +{ + struct fx_2_var @@ -11434,8 +11732,7 @@ index bd7e7b420db..4fbce393707 100644 + if (var.class == D3DXPC_OBJECT) + { + if (is_fx_2_sampler(var.type)) -+ fx_parser_error(parser, VKD3D_SHADER_ERROR_FX_NOT_IMPLEMENTED, -+ "Parsing sampler initializers is not supported."); ++ parse_fx_2_sampler(parser, var.element_count, value); + else + parse_fx_2_object_value(parser, var.element_count, var.type, value); + } @@ -11476,24 +11773,22 @@ index bd7e7b420db..4fbce393707 100644 + vkd3d_string_buffer_printf(&parser->buffer, ">"); +} + -+static void fx_parse_fx_2_assignment(struct fx_parser *parser) ++static void fx_parse_fx_2_assignment(struct fx_parser *parser, const struct fx_assignment *entry) +{ + const struct rhs_named_value *named_value = NULL; + const struct fx_2_state *state = NULL; -+ struct fx_assignment entry; + -+ fx_parser_read_u32s(parser, &entry, sizeof(entry)); -+ if (entry.id <= ARRAY_SIZE(fx_2_states)) ++ if (entry->id <= ARRAY_SIZE(fx_2_states)) + { -+ state = &fx_2_states[entry.id]; ++ state = &fx_2_states[entry->id]; + + vkd3d_string_buffer_printf(&parser->buffer, "%s", state->name); + if (state->array_size > 1) -+ vkd3d_string_buffer_printf(&parser->buffer, "[%u]", entry.lhs_index); ++ vkd3d_string_buffer_printf(&parser->buffer, "[%u]", entry->lhs_index); + } + else + { -+ vkd3d_string_buffer_printf(&parser->buffer, "", entry.id); ++ vkd3d_string_buffer_printf(&parser->buffer, "", entry->id); + } + vkd3d_string_buffer_printf(&parser->buffer, " = "); + @@ -11502,7 +11797,7 @@ index bd7e7b420db..4fbce393707 100644 + const struct rhs_named_value *ptr = state->values; + uint32_t value; + -+ fx_parser_read_unstructured(parser, &value, entry.value, sizeof(value)); ++ fx_parser_read_unstructured(parser, &value, entry->value, sizeof(value)); + + while (ptr->name) + { @@ -11519,13 +11814,27 @@ index bd7e7b420db..4fbce393707 100644 + { + vkd3d_string_buffer_printf(&parser->buffer, "%s /* %u */", named_value->name, named_value->value); + } -+ else if (state && (state->type == FX_UINT || state->type == FX_FLOAT)) ++ else if (state) + { -+ uint32_t offset = entry.type; -+ unsigned int size; ++ if (state->type == FX_UINT || state->type == FX_FLOAT) ++ { ++ uint32_t offset = entry->type; ++ unsigned int size; + -+ size = fx_get_fx_2_type_size(parser, &offset); -+ parse_fx_2_numeric_value(parser, entry.value, size, entry.type); ++ size = fx_get_fx_2_type_size(parser, &offset); ++ parse_fx_2_numeric_value(parser, entry->value, size, entry->type); ++ } ++ else if (state->type == FX_VERTEXSHADER || state->type == FX_PIXELSHADER) ++ { ++ uint32_t id; ++ ++ fx_parser_read_unstructured(parser, &id, entry->value, sizeof(id)); ++ vkd3d_string_buffer_printf(&parser->buffer, "", id); ++ } ++ else ++ { ++ vkd3d_string_buffer_printf(&parser->buffer, ""); ++ } + } + else + { @@ -11549,16 +11858,17 @@ index bd7e7b420db..4fbce393707 100644 + uint32_t assignment_count; + } pass; + const char *name; ++ uint32_t size; + + if (parser->failed) + return; + + fx_parser_read_u32s(parser, &technique, sizeof(technique)); + -+ name = fx_2_get_string(parser, technique.name); ++ name = fx_2_get_string(parser, technique.name, &size); + + parse_fx_print_indent(parser); -+ vkd3d_string_buffer_printf(&parser->buffer, "technique %s", name); ++ fx_print_string(&parser->buffer, "technique ", name, size); + fx_parse_fx_2_annotations(parser, technique.annotation_count); + + vkd3d_string_buffer_printf(&parser->buffer, "\n"); @@ -11569,10 +11879,10 @@ index bd7e7b420db..4fbce393707 100644 + for (uint32_t i = 0; i < technique.pass_count; ++i) + { + fx_parser_read_u32s(parser, &pass, sizeof(pass)); -+ name = fx_2_get_string(parser, pass.name); ++ name = fx_2_get_string(parser, pass.name, &size); + + parse_fx_print_indent(parser); -+ vkd3d_string_buffer_printf(&parser->buffer, "pass %s", name); ++ fx_print_string(&parser->buffer, "pass ", name, size); + fx_parse_fx_2_annotations(parser, pass.annotation_count); + + vkd3d_string_buffer_printf(&parser->buffer, "\n"); @@ -11582,8 +11892,11 @@ index bd7e7b420db..4fbce393707 100644 + parse_fx_start_indent(parser); + for (uint32_t j = 0; j < pass.assignment_count; ++j) + { ++ struct fx_assignment entry; ++ + parse_fx_print_indent(parser); -+ fx_parse_fx_2_assignment(parser); ++ fx_parser_read_u32s(parser, &entry, sizeof(entry)); ++ fx_parse_fx_2_assignment(parser, &entry); + } + parse_fx_end_indent(parser); + @@ -11620,10 +11933,62 @@ index bd7e7b420db..4fbce393707 100644 + vkd3d_string_buffer_printf(&parser->buffer, "\n"); +} + ++static void fx_parse_shader_blob(struct fx_parser *parser, enum vkd3d_shader_source_type source_type, ++ const void *data, uint32_t data_size) ++{ ++ struct vkd3d_shader_compile_info info = { 0 }; ++ struct vkd3d_shader_code output; ++ const char *p, *q, *end; ++ int ret; ++ ++ static const struct vkd3d_shader_compile_option options[] = ++ { ++ {VKD3D_SHADER_COMPILE_OPTION_API_VERSION, VKD3D_SHADER_API_VERSION_1_15}, ++ }; ++ ++ info.type = VKD3D_SHADER_STRUCTURE_TYPE_COMPILE_INFO; ++ info.source.code = data; ++ info.source.size = data_size; ++ info.source_type = source_type; ++ info.target_type = VKD3D_SHADER_TARGET_D3D_ASM; ++ info.options = options; ++ info.option_count = ARRAY_SIZE(options); ++ info.log_level = VKD3D_SHADER_LOG_INFO; ++ ++ if ((ret = vkd3d_shader_compile(&info, &output, NULL)) < 0) ++ { ++ fx_parser_error(parser, VKD3D_SHADER_ERROR_FX_INVALID_DATA, ++ "Failed to disassemble shader blob."); ++ return; ++ } ++ parse_fx_print_indent(parser); ++ vkd3d_string_buffer_printf(&parser->buffer, "asm {\n"); ++ ++ parse_fx_start_indent(parser); ++ ++ end = (const char *)output.code + output.size; ++ for (p = output.code; p < end; p = q) ++ { ++ if (!(q = memchr(p, '\n', end - p))) ++ q = end; ++ else ++ ++q; ++ ++ parse_fx_print_indent(parser); ++ vkd3d_string_buffer_printf(&parser->buffer, "%.*s", (int)(q - p), p); ++ } ++ ++ parse_fx_end_indent(parser); ++ parse_fx_print_indent(parser); ++ vkd3d_string_buffer_printf(&parser->buffer, "}"); ++ ++ vkd3d_shader_free_shader_code(&output); ++} ++ +static void fx_parse_fx_2_data_blob(struct fx_parser *parser) +{ + uint32_t id, size; -+ const char *str; ++ const void *data; + + id = fx_parser_read_u32(parser); + size = fx_parser_read_u32(parser); @@ -11644,13 +12009,24 @@ index bd7e7b420db..4fbce393707 100644 + case D3DXPT_VERTEXSHADER: + vkd3d_string_buffer_printf(&parser->buffer, "%s object %u size %u bytes%s\n", + fx_2_types[type], id, size, size ? ":" : ","); -+ if (size && type == D3DXPT_STRING) ++ ++ if (size) + { -+ parse_fx_start_indent(parser); -+ parse_fx_print_indent(parser); -+ str = fx_parser_get_ptr(parser, size); -+ vkd3d_string_buffer_printf(&parser->buffer, "\"%.*s\"\n", size, str); -+ parse_fx_end_indent(parser); ++ data = fx_parser_get_ptr(parser, size); ++ ++ if (type == D3DXPT_STRING) ++ { ++ parse_fx_start_indent(parser); ++ parse_fx_print_indent(parser); ++ fx_print_string(&parser->buffer, "\"", (const char *)data, size); ++ vkd3d_string_buffer_printf(&parser->buffer, "\""); ++ parse_fx_end_indent(parser); ++ } ++ else if (type == D3DXPT_PIXELSHADER || type == D3DXPT_VERTEXSHADER) ++ { ++ fx_parse_shader_blob(parser, VKD3D_SHADER_SOURCE_D3D_BYTECODE, data, size); ++ } ++ vkd3d_string_buffer_printf(&parser->buffer, "\n"); + } + break; + default: @@ -11665,9 +12041,117 @@ index bd7e7b420db..4fbce393707 100644 + fx_parser_skip(parser, align(size, 4)); +} + ++static void fx_dump_blob(struct fx_parser *parser, const void *blob, uint32_t size) ++{ ++ const uint32_t *data = blob; ++ unsigned int i, j, n; ++ ++ size /= sizeof(*data); ++ i = 0; ++ while (i < size) ++ { ++ parse_fx_print_indent(parser); ++ n = min(size - i, 8); ++ for (j = 0; j < n; ++j) ++ vkd3d_string_buffer_printf(&parser->buffer, "0x%08x,", data[i + j]); ++ i += n; ++ vkd3d_string_buffer_printf(&parser->buffer, "\n"); ++ } ++} ++ ++static void fx_parse_fx_2_array_selector(struct fx_parser *parser, uint32_t size) ++{ ++ const uint8_t *end = parser->ptr + size; ++ uint32_t name_size, blob_size = 0; ++ const void *blob = NULL; ++ const char *name; ++ ++ name_size = fx_parser_read_u32(parser); ++ name = fx_parser_get_ptr(parser, name_size); ++ fx_parser_skip(parser, name_size); ++ ++ if (!name || (uint8_t *)name >= end) ++ fx_parser_error(parser, VKD3D_SHADER_ERROR_FX_INVALID_DATA, ++ "Malformed name entry in the array selector."); ++ ++ if (parser->ptr <= end) ++ { ++ blob_size = end - parser->ptr; ++ blob = fx_parser_get_ptr(parser, blob_size); ++ fx_parser_skip(parser, blob_size); ++ } ++ else ++ { ++ fx_parser_error(parser, VKD3D_SHADER_ERROR_FX_INVALID_DATA, ++ "Malformed blob entry in the array selector."); ++ } ++ ++ if (name) ++ { ++ fx_print_string(&parser->buffer, "array \"", name, name_size); ++ vkd3d_string_buffer_printf(&parser->buffer, "\"\n"); ++ } ++ if (blob) ++ { ++ parse_fx_print_indent(parser); ++ vkd3d_string_buffer_printf(&parser->buffer, "selector blob size %u\n", blob_size); ++ fx_dump_blob(parser, blob, blob_size); ++ } ++} ++ ++static void fx_parse_fx_2_complex_state(struct fx_parser *parser) ++{ ++ struct ++ { ++ uint32_t technique; ++ uint32_t index; ++ uint32_t element; ++ uint32_t state; ++ uint32_t assignment_type; ++ } state; ++ const char *data; ++ uint32_t size; ++ ++ fx_parser_read_u32s(parser, &state, sizeof(state)); ++ ++ if (state.technique == ~0u) ++ { ++ vkd3d_string_buffer_printf(&parser->buffer, "parameter %u[%u], state %u =\n", ++ state.index, state.element, state.state); ++ } ++ else ++ { ++ vkd3d_string_buffer_printf(&parser->buffer, "technique %u, pass %u, state %u =\n", ++ state.technique, state.index, state.state); ++ } ++ ++ size = fx_parser_read_u32(parser); ++ ++ parse_fx_print_indent(parser); ++ ++ if (state.assignment_type == FX_2_ASSIGNMENT_PARAMETER) ++ { ++ data = fx_parser_get_ptr(parser, size); ++ fx_print_string(&parser->buffer, "parameter \"", data, size); ++ vkd3d_string_buffer_printf(&parser->buffer, "\"\n"); ++ fx_parser_skip(parser, align(size, 4)); ++ } ++ else if (state.assignment_type == FX_2_ASSIGNMENT_ARRAY_SELECTOR) ++ { ++ fx_parse_fx_2_array_selector(parser, size); ++ } ++ else ++ { ++ vkd3d_string_buffer_printf(&parser->buffer, "blob size %u\n", size); ++ data = fx_parser_get_ptr(parser, size); ++ fx_dump_blob(parser, data, size); ++ fx_parser_skip(parser, align(size, 4)); ++ } ++} ++ +static void fx_2_parse(struct fx_parser *parser) +{ -+ uint32_t i, size, parameter_count, technique_count, blob_count; ++ uint32_t i, size, parameter_count, technique_count, blob_count, state_count; + + fx_parser_skip(parser, sizeof(uint32_t)); /* Version */ + size = fx_parser_read_u32(parser); @@ -11693,13 +12177,20 @@ index bd7e7b420db..4fbce393707 100644 + fx_parse_fx_2_technique(parser); + + blob_count = fx_parser_read_u32(parser); -+ fx_parser_read_u32(parser); /* Resource count */ ++ state_count = fx_parser_read_u32(parser); + + vkd3d_string_buffer_printf(&parser->buffer, "object data {\n"); + parse_fx_start_indent(parser); + for (i = 0; i < blob_count; ++i) + fx_parse_fx_2_data_blob(parser); + parse_fx_end_indent(parser); ++ vkd3d_string_buffer_printf(&parser->buffer, "}\n\n"); ++ ++ vkd3d_string_buffer_printf(&parser->buffer, "state data {\n"); ++ parse_fx_start_indent(parser); ++ for (i = 0; i < state_count; ++i) ++ fx_parse_fx_2_complex_state(parser); ++ parse_fx_end_indent(parser); + vkd3d_string_buffer_printf(&parser->buffer, "}\n"); +} + @@ -11762,24 +12253,92 @@ index bd7e7b420db..4fbce393707 100644 +} + +static void fx_4_parse_string_initializer(struct fx_parser *parser, uint32_t offset) ++{ ++ const char *str = fx_4_get_string(parser, offset); ++ vkd3d_string_buffer_printf(&parser->buffer, "\"%s\"", str); ++} ++ ++static void fx_parse_fx_4_annotations(struct fx_parser *parser) { - const char *str = fx_4_get_string(parser, offset); - vkd3d_string_buffer_printf(&parser->buffer, "\"%s\"", str); -@@ -3240,6 +4169,13 @@ static void fx_4_parse_shader_blob(struct fx_parser *parser, unsigned int object - {VKD3D_SHADER_COMPILE_OPTION_API_VERSION, VKD3D_SHADER_API_VERSION_1_15}, - }; + struct fx_4_annotation + { +@@ -3228,17 +4463,15 @@ static void fx_parse_buffers(struct fx_parser *parser) + static void fx_4_parse_shader_blob(struct fx_parser *parser, unsigned int object_type, const struct fx_5_shader *shader) + { +- struct vkd3d_shader_compile_info info = { 0 }; +- struct vkd3d_shader_code output; + const void *data = NULL; +- const char *p, *q, *end; + uint32_t data_size; +- int ret; + +- static const struct vkd3d_shader_compile_option options[] = + if (!shader->offset) -+ { + { +- {VKD3D_SHADER_COMPILE_OPTION_API_VERSION, VKD3D_SHADER_API_VERSION_1_15}, +- }; + parse_fx_print_indent(parser); + vkd3d_string_buffer_printf(&parser->buffer, "NULL"); + return; + } -+ + fx_parser_read_unstructured(parser, &data_size, shader->offset, sizeof(data_size)); if (data_size) - data = fx_parser_get_unstructured_ptr(parser, shader->offset + 4, data_size); -@@ -3366,16 +4302,298 @@ static int fx_4_state_id_compare(const void *a, const void *b) +@@ -3247,42 +4480,8 @@ static void fx_4_parse_shader_blob(struct fx_parser *parser, unsigned int object + if (!data) + return; + +- info.type = VKD3D_SHADER_STRUCTURE_TYPE_COMPILE_INFO; +- info.source.code = data; +- info.source.size = data_size; +- info.source_type = VKD3D_SHADER_SOURCE_DXBC_TPF; +- info.target_type = VKD3D_SHADER_TARGET_D3D_ASM; +- info.options = options; +- info.option_count = ARRAY_SIZE(options); +- info.log_level = VKD3D_SHADER_LOG_INFO; +- +- if ((ret = vkd3d_shader_compile(&info, &output, NULL)) < 0) +- { +- fx_parser_error(parser, VKD3D_SHADER_ERROR_FX_INVALID_DATA, +- "Failed to disassemble shader blob."); +- return; +- } +- parse_fx_print_indent(parser); +- vkd3d_string_buffer_printf(&parser->buffer, "asm {\n"); +- +- parse_fx_start_indent(parser); +- +- end = (const char *)output.code + output.size; +- for (p = output.code; p < end; p = q) +- { +- if (!(q = memchr(p, '\n', end - p))) +- q = end; +- else +- ++q; +- +- parse_fx_print_indent(parser); +- vkd3d_string_buffer_printf(&parser->buffer, "%.*s", (int)(q - p), p); +- } +- +- parse_fx_end_indent(parser); ++ fx_parse_shader_blob(parser, VKD3D_SHADER_SOURCE_DXBC_TPF, data, data_size); + +- parse_fx_print_indent(parser); +- vkd3d_string_buffer_printf(&parser->buffer, "}"); + if (object_type == FX_4_OBJECT_TYPE_GEOMETRY_SHADER_SO && shader->sodecl[0]) + { + vkd3d_string_buffer_printf(&parser->buffer, "\n/* Stream output declaration: \"%s\" */", +@@ -3299,8 +4498,6 @@ static void fx_4_parse_shader_blob(struct fx_parser *parser, unsigned int object + if (shader->sodecl_count) + vkd3d_string_buffer_printf(&parser->buffer, "\n/* Rasterized stream %u */", shader->rast_stream); + } +- +- vkd3d_shader_free_shader_code(&output); + } + + static void fx_4_parse_shader_initializer(struct fx_parser *parser, unsigned int object_type) +@@ -3366,16 +4563,298 @@ static int fx_4_state_id_compare(const void *a, const void *b) return id - state->id; } @@ -12085,7 +12644,7 @@ index bd7e7b420db..4fbce393707 100644 struct { uint32_t name; -@@ -3390,7 +4608,7 @@ static void fx_4_parse_state_object_initializer(struct fx_parser *parser, uint32 +@@ -3390,7 +4869,7 @@ static void fx_4_parse_state_object_initializer(struct fx_parser *parser, uint32 float f; }; } value; @@ -12094,7 +12653,7 @@ index bd7e7b420db..4fbce393707 100644 { [FX_BOOL] = "bool", [FX_FLOAT] = "float", -@@ -3496,6 +4714,19 @@ static void fx_4_parse_state_object_initializer(struct fx_parser *parser, uint32 +@@ -3496,6 +4975,19 @@ static void fx_4_parse_state_object_initializer(struct fx_parser *parser, uint32 vkd3d_string_buffer_printf(&parser->buffer, "%s[%s]", fx_4_get_string(parser, index.name), fx_4_get_string(parser, index.index)); break; @@ -12114,7 +12673,7 @@ index bd7e7b420db..4fbce393707 100644 case FX_4_ASSIGNMENT_INLINE_SHADER: case FX_5_ASSIGNMENT_INLINE_SHADER: { -@@ -3544,12 +4775,14 @@ static void fx_4_parse_object_initializer(struct fx_parser *parser, const struct +@@ -3544,12 +5036,14 @@ static void fx_4_parse_object_initializer(struct fx_parser *parser, const struct }; unsigned int i, element_count, count; uint32_t value; @@ -12129,7 +12688,7 @@ index bd7e7b420db..4fbce393707 100644 for (i = 0; i < element_count; ++i) { switch (type->typeinfo) -@@ -3565,9 +4798,21 @@ static void fx_4_parse_object_initializer(struct fx_parser *parser, const struct +@@ -3565,9 +5059,21 @@ static void fx_4_parse_object_initializer(struct fx_parser *parser, const struct case FX_4_OBJECT_TYPE_SAMPLER_STATE: count = fx_parser_read_u32(parser); @@ -12151,7 +12710,7 @@ index bd7e7b420db..4fbce393707 100644 break; case FX_4_OBJECT_TYPE_PIXEL_SHADER: case FX_4_OBJECT_TYPE_VERTEX_SHADER: -@@ -3586,7 +4831,7 @@ static void fx_4_parse_object_initializer(struct fx_parser *parser, const struct +@@ -3586,7 +5092,7 @@ static void fx_4_parse_object_initializer(struct fx_parser *parser, const struct "Parsing object type %u is not implemented.", type->typeinfo); return; } @@ -12160,7 +12719,7 @@ index bd7e7b420db..4fbce393707 100644 } vkd3d_string_buffer_printf(&parser->buffer, "}"); } -@@ -3719,7 +4964,7 @@ static void fx_parse_groups(struct fx_parser *parser) +@@ -3719,7 +5225,7 @@ static void fx_parse_groups(struct fx_parser *parser) } } @@ -12169,7 +12728,7 @@ index bd7e7b420db..4fbce393707 100644 { struct fx_4_header { -@@ -3752,8 +4997,9 @@ static int fx_4_parse(struct fx_parser *parser) +@@ -3752,8 +5258,9 @@ static int fx_4_parse(struct fx_parser *parser) if (parser->end - parser->ptr < header.unstructured_size) { @@ -12181,7 +12740,7 @@ index bd7e7b420db..4fbce393707 100644 } parser->unstructured.ptr = parser->ptr; -@@ -3766,11 +5012,9 @@ static int fx_4_parse(struct fx_parser *parser) +@@ -3766,11 +5273,9 @@ static int fx_4_parse(struct fx_parser *parser) for (i = 0; i < header.technique_count; ++i) fx_parse_fx_4_technique(parser); @@ -12194,7 +12753,7 @@ index bd7e7b420db..4fbce393707 100644 { struct fx_5_header { -@@ -3808,8 +5052,9 @@ static int fx_5_parse(struct fx_parser *parser) +@@ -3808,8 +5313,9 @@ static int fx_5_parse(struct fx_parser *parser) if (parser->end - parser->ptr < header.unstructured_size) { @@ -12206,7 +12765,7 @@ index bd7e7b420db..4fbce393707 100644 } parser->unstructured.ptr = parser->ptr; -@@ -3821,48 +5066,62 @@ static int fx_5_parse(struct fx_parser *parser) +@@ -3821,48 +5327,62 @@ static int fx_5_parse(struct fx_parser *parser) fx_4_parse_objects(parser); fx_parse_groups(parser); @@ -12285,14 +12844,174 @@ index bd7e7b420db..4fbce393707 100644 + return VKD3D_ERROR_INVALID_SHADER; + return VKD3D_OK; } +diff --git a/libs/vkd3d/libs/vkd3d-shader/glsl.c b/libs/vkd3d/libs/vkd3d-shader/glsl.c +index ab6604bd703..a87ade5e467 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/glsl.c ++++ b/libs/vkd3d/libs/vkd3d-shader/glsl.c +@@ -64,7 +64,6 @@ struct vkd3d_glsl_generator + + const struct vkd3d_shader_interface_info *interface_info; + const struct vkd3d_shader_descriptor_offset_info *offset_info; +- const struct vkd3d_shader_scan_descriptor_info1 *descriptor_info; + const struct vkd3d_shader_scan_combined_resource_sampler_info *combined_sampler_info; + }; + +@@ -130,7 +129,7 @@ static const struct glsl_resource_type_info *shader_glsl_get_resource_type_info( + static const struct vkd3d_shader_descriptor_info1 *shader_glsl_get_descriptor(struct vkd3d_glsl_generator *gen, + enum vkd3d_shader_descriptor_type type, unsigned int idx, unsigned int space) + { +- const struct vkd3d_shader_scan_descriptor_info1 *info = gen->descriptor_info; ++ const struct vkd3d_shader_scan_descriptor_info1 *info = &gen->program->descriptors; + + for (unsigned int i = 0; i < info->descriptor_count; ++i) + { +@@ -146,7 +145,7 @@ static const struct vkd3d_shader_descriptor_info1 *shader_glsl_get_descriptor(st + static const struct vkd3d_shader_descriptor_info1 *shader_glsl_get_descriptor_by_id( + struct vkd3d_glsl_generator *gen, enum vkd3d_shader_descriptor_type type, unsigned int id) + { +- const struct vkd3d_shader_scan_descriptor_info1 *info = gen->descriptor_info; ++ const struct vkd3d_shader_scan_descriptor_info1 *info = &gen->program->descriptors; + + for (unsigned int i = 0; i < info->descriptor_count; ++i) + { +@@ -269,15 +268,15 @@ static void shader_glsl_print_register_name(struct vkd3d_string_buffer *buffer, + vkd3d_string_buffer_printf(buffer, "", reg->type); + break; + } +- if (reg->idx[0].rel_addr || reg->idx[2].rel_addr) ++ if (reg->idx[0].rel_addr) + { + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, + "Internal compiler error: Unhandled constant buffer register indirect addressing."); + vkd3d_string_buffer_printf(buffer, "", reg->type); + break; + } +- vkd3d_string_buffer_printf(buffer, "%s_cb_%u[%u]", +- gen->prefix, reg->idx[0].offset, reg->idx[2].offset); ++ vkd3d_string_buffer_printf(buffer, "%s_cb_%u", gen->prefix, reg->idx[0].offset); ++ shader_glsl_print_subscript(buffer, gen, reg->idx[2].rel_addr, reg->idx[2].offset); + break; + + case VKD3DSPR_THREADID: +@@ -485,8 +484,7 @@ static void shader_glsl_print_subscript(struct vkd3d_string_buffer *buffer, stru + vkd3d_string_buffer_printf(buffer, "[%s", r.str->buffer); + if (offset) + vkd3d_string_buffer_printf(buffer, " + %u", offset); +- else +- vkd3d_string_buffer_printf(buffer, "]"); ++ vkd3d_string_buffer_printf(buffer, "]"); + glsl_src_cleanup(&r, &gen->string_buffers); + } + +@@ -1298,7 +1296,7 @@ static void shader_glsl_print_sysval_name(struct vkd3d_string_buffer *buffer, st + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, + "Internal compiler error: Unhandled SV_POSITION index %u.", idx); + if (version->type == VKD3D_SHADER_TYPE_PIXEL) +- vkd3d_string_buffer_printf(buffer, "gl_FragCoord"); ++ vkd3d_string_buffer_printf(buffer, "vec4(gl_FragCoord.xyz, 1.0 / gl_FragCoord.w)"); + else + vkd3d_string_buffer_printf(buffer, "gl_Position"); + break; +@@ -1658,6 +1656,9 @@ static void vkd3d_glsl_handle_instruction(struct vkd3d_glsl_generator *gen, + case VKD3DSIH_SWITCH: + shader_glsl_switch(gen, ins); + break; ++ case VKD3DSIH_XOR: ++ shader_glsl_binop(gen, ins, "^"); ++ break; + default: + shader_glsl_unhandled(gen, ins); + break; +@@ -2078,7 +2079,7 @@ static void shader_glsl_generate_sampler_declaration(struct vkd3d_glsl_generator + static void shader_glsl_generate_descriptor_declarations(struct vkd3d_glsl_generator *gen) + { + const struct vkd3d_shader_scan_combined_resource_sampler_info *sampler_info = gen->combined_sampler_info; +- const struct vkd3d_shader_scan_descriptor_info1 *info = gen->descriptor_info; ++ const struct vkd3d_shader_scan_descriptor_info1 *info = &gen->program->descriptors; + const struct vkd3d_shader_descriptor_info1 *descriptor; + unsigned int i; + +@@ -2429,7 +2430,6 @@ static void shader_glsl_init_limits(struct vkd3d_glsl_generator *gen, const stru + + static void vkd3d_glsl_generator_init(struct vkd3d_glsl_generator *gen, + struct vsir_program *program, const struct vkd3d_shader_compile_info *compile_info, +- const struct vkd3d_shader_scan_descriptor_info1 *descriptor_info, + const struct vkd3d_shader_scan_combined_resource_sampler_info *combined_sampler_info, + struct vkd3d_shader_message_context *message_context) + { +@@ -2453,12 +2453,10 @@ static void vkd3d_glsl_generator_init(struct vkd3d_glsl_generator *gen, + + gen->interface_info = vkd3d_find_struct(compile_info->next, INTERFACE_INFO); + gen->offset_info = vkd3d_find_struct(compile_info->next, DESCRIPTOR_OFFSET_INFO); +- gen->descriptor_info = descriptor_info; + gen->combined_sampler_info = combined_sampler_info; + } + + int glsl_compile(struct vsir_program *program, uint64_t config_flags, +- const struct vkd3d_shader_scan_descriptor_info1 *descriptor_info, + const struct vkd3d_shader_scan_combined_resource_sampler_info *combined_sampler_info, + const struct vkd3d_shader_compile_info *compile_info, + struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context) +@@ -2470,9 +2468,10 @@ int glsl_compile(struct vsir_program *program, uint64_t config_flags, + return ret; + + VKD3D_ASSERT(program->normalisation_level == VSIR_NORMALISED_SM6); ++ VKD3D_ASSERT(program->has_descriptor_info); + + vkd3d_glsl_generator_init(&generator, program, compile_info, +- descriptor_info, combined_sampler_info, message_context); ++ combined_sampler_info, message_context); + ret = vkd3d_glsl_generator_generate(&generator, out); + vkd3d_glsl_generator_cleanup(&generator); + diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.c b/libs/vkd3d/libs/vkd3d-shader/hlsl.c -index 41586550203..a7641a203f3 100644 +index 41586550203..d1d20b7384c 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.c +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.c -@@ -298,6 +298,39 @@ bool hlsl_type_is_patch_array(const struct hlsl_type *type) +@@ -234,6 +234,33 @@ unsigned int hlsl_get_multiarray_size(const struct hlsl_type *type) + return 1; + } + ++const struct hlsl_type *hlsl_get_stream_output_type(const struct hlsl_type *type) ++{ ++ unsigned int i; ++ ++ switch (type->class) ++ { ++ case HLSL_CLASS_ARRAY: ++ return hlsl_get_stream_output_type(type->e.array.type); ++ ++ case HLSL_CLASS_STRUCT: ++ for (i = 0; i < type->e.record.field_count; ++i) ++ { ++ const struct hlsl_type *field_type = hlsl_get_stream_output_type(type->e.record.fields[i].type); ++ ++ if (field_type) ++ return field_type; ++ } ++ return NULL; ++ ++ case HLSL_CLASS_STREAM_OUTPUT: ++ return type; ++ ++ default: ++ return NULL; ++ } ++} ++ + bool hlsl_type_is_resource(const struct hlsl_type *type) + { + switch (type->class) +@@ -298,6 +325,45 @@ bool hlsl_type_is_patch_array(const struct hlsl_type *type) || type->e.array.array_type == HLSL_ARRAY_PATCH_OUTPUT); } ++bool hlsl_type_is_primitive_array(const struct hlsl_type *type) ++{ ++ return type->class == HLSL_CLASS_ARRAY && (type->e.array.array_type != HLSL_ARRAY_GENERIC ++ || (type->modifiers & HLSL_PRIMITIVE_MODIFIERS_MASK)); ++} ++ +bool hlsl_base_type_is_integer(enum hlsl_base_type type) +{ + switch (type) @@ -12329,7 +13048,7 @@ index 41586550203..a7641a203f3 100644 /* Only intended to be used for derefs (after copies have been lowered to components or vectors) or * resources, since for both their data types span across a single regset. */ static enum hlsl_regset type_get_regset(const struct hlsl_type *type) -@@ -484,6 +517,8 @@ static struct hlsl_type *hlsl_new_type(struct hlsl_ctx *ctx, const char *name, e +@@ -484,6 +550,8 @@ static struct hlsl_type *hlsl_new_type(struct hlsl_ctx *ctx, const char *name, e { struct hlsl_type *type; @@ -12338,7 +13057,35 @@ index 41586550203..a7641a203f3 100644 if (!(type = hlsl_alloc(ctx, sizeof(*type)))) return NULL; if (!(type->name = hlsl_strdup(ctx, name))) -@@ -845,13 +880,7 @@ static bool init_deref_from_component_index(struct hlsl_ctx *ctx, struct hlsl_bl +@@ -704,8 +772,7 @@ unsigned int hlsl_type_get_component_offset(struct hlsl_ctx *ctx, struct hlsl_ty + return offset[*regset]; + } + +-static bool init_deref(struct hlsl_ctx *ctx, struct hlsl_deref *deref, struct hlsl_ir_var *var, +- unsigned int path_len) ++bool hlsl_init_deref(struct hlsl_ctx *ctx, struct hlsl_deref *deref, struct hlsl_ir_var *var, unsigned int path_len) + { + deref->var = var; + deref->path_len = path_len; +@@ -763,7 +830,7 @@ bool hlsl_init_deref_from_index_chain(struct hlsl_ctx *ctx, struct hlsl_deref *d + } + load = hlsl_ir_load(ptr); + +- if (!init_deref(ctx, deref, load->src.var, load->src.path_len + chain_len)) ++ if (!hlsl_init_deref(ctx, deref, load->src.var, load->src.path_len + chain_len)) + return false; + + for (i = 0; i < load->src.path_len; ++i) +@@ -832,7 +899,7 @@ static bool init_deref_from_component_index(struct hlsl_ctx *ctx, struct hlsl_bl + ++path_len; + } + +- if (!init_deref(ctx, deref, prefix->var, prefix->path_len + path_len)) ++ if (!hlsl_init_deref(ctx, deref, prefix->var, prefix->path_len + path_len)) + return false; + + deref_path_len = 0; +@@ -845,13 +912,7 @@ static bool init_deref_from_component_index(struct hlsl_ctx *ctx, struct hlsl_bl { unsigned int next_index = traverse_path_from_component_index(ctx, &path_type, &path_index); @@ -12353,7 +13100,23 @@ index 41586550203..a7641a203f3 100644 hlsl_src_from_node(&deref->path[deref_path_len++], c); } -@@ -1324,15 +1353,16 @@ bool hlsl_scope_add_type(struct hlsl_scope *scope, struct hlsl_type *type) +@@ -1104,6 +1165,7 @@ unsigned int hlsl_type_component_count(const struct hlsl_type *type) + case HLSL_CLASS_HULL_SHADER: + case HLSL_CLASS_GEOMETRY_SHADER: + case HLSL_CLASS_BLEND_STATE: ++ case HLSL_CLASS_STREAM_OUTPUT: + case HLSL_CLASS_NULL: + return 1; + +@@ -1111,7 +1173,6 @@ unsigned int hlsl_type_component_count(const struct hlsl_type *type) + case HLSL_CLASS_PASS: + case HLSL_CLASS_TECHNIQUE: + case HLSL_CLASS_VOID: +- case HLSL_CLASS_STREAM_OUTPUT: + break; + } + +@@ -1324,15 +1385,16 @@ bool hlsl_scope_add_type(struct hlsl_scope *scope, struct hlsl_type *type) return true; } @@ -12377,7 +13140,25 @@ index 41586550203..a7641a203f3 100644 } struct hlsl_ir_node *hlsl_new_copy(struct hlsl_ctx *ctx, struct hlsl_ir_node *node) -@@ -1510,22 +1540,36 @@ struct hlsl_ir_node *hlsl_new_store_index(struct hlsl_ctx *ctx, const struct hls +@@ -1429,7 +1491,7 @@ bool hlsl_copy_deref(struct hlsl_ctx *ctx, struct hlsl_deref *deref, const struc + + VKD3D_ASSERT(!hlsl_deref_is_lowered(other)); + +- if (!init_deref(ctx, deref, other->var, other->path_len)) ++ if (!hlsl_init_deref(ctx, deref, other->var, other->path_len)) + return false; + + for (i = 0; i < deref->path_len; ++i) +@@ -1491,7 +1553,7 @@ struct hlsl_ir_node *hlsl_new_store_index(struct hlsl_ctx *ctx, const struct hls + return NULL; + init_node(&store->node, HLSL_IR_STORE, NULL, loc); + +- if (!init_deref(ctx, &store->lhs, lhs->var, lhs->path_len + !!idx)) ++ if (!hlsl_init_deref(ctx, &store->lhs, lhs->var, lhs->path_len + !!idx)) + { + vkd3d_free(store); + return NULL; +@@ -1510,22 +1572,73 @@ struct hlsl_ir_node *hlsl_new_store_index(struct hlsl_ctx *ctx, const struct hls return &store->node; } @@ -12398,6 +13179,43 @@ index 41586550203..a7641a203f3 100644 + hlsl_block_add_store_index(ctx, block, &lhs_deref, NULL, rhs, 0, &rhs->loc); +} + ++static struct hlsl_ir_node *hlsl_new_store_parent(struct hlsl_ctx *ctx, ++ const struct hlsl_deref *lhs, unsigned int path_len, struct hlsl_ir_node *rhs, ++ unsigned int writemask, const struct vkd3d_shader_location *loc) ++{ ++ struct hlsl_ir_store *store; ++ ++ VKD3D_ASSERT(!hlsl_deref_is_lowered(lhs)); ++ VKD3D_ASSERT(lhs->path_len >= path_len); ++ ++ if (!(store = hlsl_alloc(ctx, sizeof(*store)))) ++ return NULL; ++ init_node(&store->node, HLSL_IR_STORE, NULL, loc); ++ ++ if (!hlsl_init_deref(ctx, &store->lhs, lhs->var, path_len)) ++ { ++ vkd3d_free(store); ++ return NULL; ++ } ++ for (unsigned int i = 0; i < path_len; ++i) ++ hlsl_src_from_node(&store->lhs.path[i], lhs->path[i].node); ++ ++ hlsl_src_from_node(&store->rhs, rhs); ++ ++ if (!writemask && type_is_single_reg(rhs->data_type)) ++ writemask = (1 << rhs->data_type->e.numeric.dimx) - 1; ++ store->writemask = writemask; ++ ++ return &store->node; ++} ++ ++void hlsl_block_add_store_parent(struct hlsl_ctx *ctx, struct hlsl_block *block, ++ const struct hlsl_deref *lhs, unsigned int path_len, struct hlsl_ir_node *rhs, ++ unsigned int writemask, const struct vkd3d_shader_location *loc) ++{ ++ append_new_instr(ctx, block, hlsl_new_store_parent(ctx, lhs, path_len, rhs, writemask, loc)); ++} ++ +void hlsl_block_add_store_component(struct hlsl_ctx *ctx, struct hlsl_block *block, const struct hlsl_deref *lhs, unsigned int comp, struct hlsl_ir_node *rhs) { @@ -12419,7 +13237,7 @@ index 41586550203..a7641a203f3 100644 } hlsl_block_add_block(block, &comp_path_block); hlsl_src_from_node(&store->rhs, rhs); -@@ -1534,8 +1578,6 @@ bool hlsl_new_store_component(struct hlsl_ctx *ctx, struct hlsl_block *block, +@@ -1534,8 +1647,6 @@ bool hlsl_new_store_component(struct hlsl_ctx *ctx, struct hlsl_block *block, store->writemask = (1 << rhs->data_type->e.numeric.dimx) - 1; hlsl_block_add_instr(block, &store->node); @@ -12428,7 +13246,7 @@ index 41586550203..a7641a203f3 100644 } struct hlsl_ir_node *hlsl_new_call(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *decl, -@@ -1575,7 +1617,7 @@ struct hlsl_ir_node *hlsl_new_bool_constant(struct hlsl_ctx *ctx, bool b, const +@@ -1575,7 +1686,7 @@ struct hlsl_ir_node *hlsl_new_bool_constant(struct hlsl_ctx *ctx, bool b, const return hlsl_new_constant(ctx, hlsl_get_scalar_type(ctx, HLSL_TYPE_BOOL), &value, loc); } @@ -12437,7 +13255,7 @@ index 41586550203..a7641a203f3 100644 const struct vkd3d_shader_location *loc) { struct hlsl_constant_value value; -@@ -1584,7 +1626,14 @@ struct hlsl_ir_node *hlsl_new_float_constant(struct hlsl_ctx *ctx, float f, +@@ -1584,7 +1695,14 @@ struct hlsl_ir_node *hlsl_new_float_constant(struct hlsl_ctx *ctx, float f, return hlsl_new_constant(ctx, hlsl_get_scalar_type(ctx, HLSL_TYPE_FLOAT), &value, loc); } @@ -12453,7 +13271,7 @@ index 41586550203..a7641a203f3 100644 { struct hlsl_constant_value value; -@@ -1592,6 +1641,12 @@ struct hlsl_ir_node *hlsl_new_int_constant(struct hlsl_ctx *ctx, int32_t n, cons +@@ -1592,6 +1710,12 @@ struct hlsl_ir_node *hlsl_new_int_constant(struct hlsl_ctx *ctx, int32_t n, cons return hlsl_new_constant(ctx, hlsl_get_scalar_type(ctx, HLSL_TYPE_INT), &value, loc); } @@ -12466,7 +13284,7 @@ index 41586550203..a7641a203f3 100644 struct hlsl_ir_node *hlsl_new_uint_constant(struct hlsl_ctx *ctx, unsigned int n, const struct vkd3d_shader_location *loc) { -@@ -1601,6 +1656,12 @@ struct hlsl_ir_node *hlsl_new_uint_constant(struct hlsl_ctx *ctx, unsigned int n +@@ -1601,6 +1725,12 @@ struct hlsl_ir_node *hlsl_new_uint_constant(struct hlsl_ctx *ctx, unsigned int n return hlsl_new_constant(ctx, hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), &value, loc); } @@ -12479,7 +13297,7 @@ index 41586550203..a7641a203f3 100644 struct hlsl_ir_node *hlsl_new_string_constant(struct hlsl_ctx *ctx, const char *str, const struct vkd3d_shader_location *loc) { -@@ -1625,7 +1686,7 @@ struct hlsl_ir_node *hlsl_new_null_constant(struct hlsl_ctx *ctx, const struct v +@@ -1625,7 +1755,7 @@ struct hlsl_ir_node *hlsl_new_null_constant(struct hlsl_ctx *ctx, const struct v return hlsl_new_constant(ctx, ctx->builtin_types.null, &value, loc); } @@ -12488,7 +13306,7 @@ index 41586550203..a7641a203f3 100644 struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS], struct hlsl_type *data_type, const struct vkd3d_shader_location *loc) { -@@ -1641,7 +1702,14 @@ struct hlsl_ir_node *hlsl_new_expr(struct hlsl_ctx *ctx, enum hlsl_ir_expr_op op +@@ -1641,7 +1771,14 @@ struct hlsl_ir_node *hlsl_new_expr(struct hlsl_ctx *ctx, enum hlsl_ir_expr_op op return &expr->node; } @@ -12504,7 +13322,7 @@ index 41586550203..a7641a203f3 100644 struct hlsl_ir_node *arg, const struct vkd3d_shader_location *loc) { struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {arg}; -@@ -1649,6 +1717,12 @@ struct hlsl_ir_node *hlsl_new_unary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_expr +@@ -1649,6 +1786,12 @@ struct hlsl_ir_node *hlsl_new_unary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_expr return hlsl_new_expr(ctx, op, operands, arg->data_type, loc); } @@ -12517,7 +13335,7 @@ index 41586550203..a7641a203f3 100644 struct hlsl_ir_node *hlsl_new_binary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2) { -@@ -1657,16 +1731,37 @@ struct hlsl_ir_node *hlsl_new_binary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_exp +@@ -1657,16 +1800,37 @@ struct hlsl_ir_node *hlsl_new_binary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_exp return hlsl_new_expr(ctx, op, operands, arg1->data_type, &arg1->loc); } @@ -12557,7 +13375,7 @@ index 41586550203..a7641a203f3 100644 static struct hlsl_ir_node *hlsl_new_error_expr(struct hlsl_ctx *ctx) { static const struct vkd3d_shader_location loc = {.source_name = ""}; -@@ -1694,6 +1789,23 @@ struct hlsl_ir_node *hlsl_new_if(struct hlsl_ctx *ctx, struct hlsl_ir_node *cond +@@ -1694,6 +1858,23 @@ struct hlsl_ir_node *hlsl_new_if(struct hlsl_ctx *ctx, struct hlsl_ir_node *cond return &iff->node; } @@ -12581,7 +13399,16 @@ index 41586550203..a7641a203f3 100644 struct hlsl_ir_switch_case *hlsl_new_switch_case(struct hlsl_ctx *ctx, unsigned int value, bool is_default, struct hlsl_block *body, const struct vkd3d_shader_location *loc) { -@@ -1758,6 +1870,14 @@ struct hlsl_ir_load *hlsl_new_load_index(struct hlsl_ctx *ctx, const struct hlsl +@@ -1745,7 +1926,7 @@ struct hlsl_ir_load *hlsl_new_load_index(struct hlsl_ctx *ctx, const struct hlsl + return NULL; + init_node(&load->node, HLSL_IR_LOAD, type, loc); + +- if (!init_deref(ctx, &load->src, deref->var, deref->path_len + !!idx)) ++ if (!hlsl_init_deref(ctx, &load->src, deref->var, deref->path_len + !!idx)) + { + vkd3d_free(load); + return NULL; +@@ -1758,6 +1939,14 @@ struct hlsl_ir_load *hlsl_new_load_index(struct hlsl_ctx *ctx, const struct hlsl return load; } @@ -12596,7 +13423,7 @@ index 41586550203..a7641a203f3 100644 struct hlsl_ir_load *hlsl_new_load_parent(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, const struct vkd3d_shader_location *loc) { -@@ -1780,17 +1900,27 @@ struct hlsl_ir_load *hlsl_new_var_load(struct hlsl_ctx *ctx, struct hlsl_ir_var +@@ -1780,17 +1969,27 @@ struct hlsl_ir_load *hlsl_new_var_load(struct hlsl_ctx *ctx, struct hlsl_ir_var return hlsl_new_load_index(ctx, &var_deref, NULL, loc); } @@ -12628,7 +13455,7 @@ index 41586550203..a7641a203f3 100644 type = hlsl_deref_get_type(ctx, deref); comp_type = hlsl_type_get_component_type(ctx, type, comp); -@@ -1799,7 +1929,8 @@ struct hlsl_ir_node *hlsl_new_load_component(struct hlsl_ctx *ctx, struct hlsl_b +@@ -1799,7 +1998,8 @@ struct hlsl_ir_node *hlsl_new_load_component(struct hlsl_ctx *ctx, struct hlsl_b if (!init_deref_from_component_index(ctx, &comp_path_block, &load->src, deref, comp, loc)) { vkd3d_free(load); @@ -12638,16 +13465,31 @@ index 41586550203..a7641a203f3 100644 } hlsl_block_add_block(block, &comp_path_block); -@@ -1847,7 +1978,7 @@ struct hlsl_ir_node *hlsl_new_resource_load(struct hlsl_ctx *ctx, +@@ -1808,7 +2008,7 @@ struct hlsl_ir_node *hlsl_new_load_component(struct hlsl_ctx *ctx, struct hlsl_b + return &load->node; + } + +-struct hlsl_ir_node *hlsl_new_resource_load(struct hlsl_ctx *ctx, ++static struct hlsl_ir_node *hlsl_new_resource_load(struct hlsl_ctx *ctx, + const struct hlsl_resource_load_params *params, const struct vkd3d_shader_location *loc) + { + struct hlsl_ir_resource_load *load; +@@ -1847,7 +2047,13 @@ struct hlsl_ir_node *hlsl_new_resource_load(struct hlsl_ctx *ctx, return &load->node; } -struct hlsl_ir_node *hlsl_new_resource_store(struct hlsl_ctx *ctx, const struct hlsl_deref *resource, ++struct hlsl_ir_node *hlsl_block_add_resource_load(struct hlsl_ctx *ctx, struct hlsl_block *block, ++ const struct hlsl_resource_load_params *params, const struct vkd3d_shader_location *loc) ++{ ++ return append_new_instr(ctx, block, hlsl_new_resource_load(ctx, params, loc)); ++} ++ +static struct hlsl_ir_node *hlsl_new_resource_store(struct hlsl_ctx *ctx, const struct hlsl_deref *resource, struct hlsl_ir_node *coords, struct hlsl_ir_node *value, const struct vkd3d_shader_location *loc) { struct hlsl_ir_resource_store *store; -@@ -1861,12 +1992,21 @@ struct hlsl_ir_node *hlsl_new_resource_store(struct hlsl_ctx *ctx, const struct +@@ -1861,12 +2067,21 @@ struct hlsl_ir_node *hlsl_new_resource_store(struct hlsl_ctx *ctx, const struct return &store->node; } @@ -12669,7 +13511,20 @@ index 41586550203..a7641a203f3 100644 VKD3D_ASSERT(val->data_type->class <= HLSL_CLASS_VECTOR); if (!(swizzle = hlsl_alloc(ctx, sizeof(*swizzle)))) -@@ -2078,7 +2218,7 @@ bool hlsl_index_chain_has_resource_access(struct hlsl_ir_index *index) +@@ -1882,6 +2097,12 @@ struct hlsl_ir_node *hlsl_new_swizzle(struct hlsl_ctx *ctx, uint32_t s, unsigned + return &swizzle->node; + } + ++struct hlsl_ir_node *hlsl_block_add_swizzle(struct hlsl_ctx *ctx, struct hlsl_block *block, uint32_t s, ++ unsigned int width, struct hlsl_ir_node *val, const struct vkd3d_shader_location *loc) ++{ ++ return append_new_instr(ctx, block, hlsl_new_swizzle(ctx, s, width, val, loc)); ++} ++ + struct hlsl_ir_node *hlsl_new_matrix_swizzle(struct hlsl_ctx *ctx, struct hlsl_matrix_swizzle s, + unsigned int component_count, struct hlsl_ir_node *val, const struct vkd3d_shader_location *loc) + { +@@ -2078,7 +2299,7 @@ bool hlsl_index_chain_has_resource_access(struct hlsl_ir_index *index) return false; } @@ -12678,7 +13533,7 @@ index 41586550203..a7641a203f3 100644 struct hlsl_ir_node *idx, const struct vkd3d_shader_location *loc) { struct hlsl_type *type = val->data_type; -@@ -2100,7 +2240,13 @@ struct hlsl_ir_node *hlsl_new_index(struct hlsl_ctx *ctx, struct hlsl_ir_node *v +@@ -2100,7 +2321,13 @@ struct hlsl_ir_node *hlsl_new_index(struct hlsl_ctx *ctx, struct hlsl_ir_node *v return &index->node; } @@ -12693,7 +13548,7 @@ index 41586550203..a7641a203f3 100644 struct hlsl_ir_node *condition, const struct vkd3d_shader_location *loc) { struct hlsl_ir_jump *jump; -@@ -2113,7 +2259,13 @@ struct hlsl_ir_node *hlsl_new_jump(struct hlsl_ctx *ctx, enum hlsl_ir_jump_type +@@ -2113,7 +2340,13 @@ struct hlsl_ir_node *hlsl_new_jump(struct hlsl_ctx *ctx, enum hlsl_ir_jump_type return &jump->node; } @@ -12708,7 +13563,7 @@ index 41586550203..a7641a203f3 100644 struct hlsl_block *block, enum hlsl_loop_unroll_type unroll_type, unsigned int unroll_limit, const struct vkd3d_shader_location *loc) { -@@ -2134,6 +2286,18 @@ struct hlsl_ir_node *hlsl_new_loop(struct hlsl_ctx *ctx, struct hlsl_block *iter +@@ -2134,6 +2367,18 @@ struct hlsl_ir_node *hlsl_new_loop(struct hlsl_ctx *ctx, struct hlsl_block *iter return &loop->node; } @@ -12727,7 +13582,16 @@ index 41586550203..a7641a203f3 100644 struct clone_instr_map { struct -@@ -2650,8 +2814,8 @@ struct hlsl_ir_function_decl *hlsl_new_func_decl(struct hlsl_ctx *ctx, +@@ -2203,7 +2448,7 @@ static bool clone_deref(struct hlsl_ctx *ctx, struct clone_instr_map *map, + + VKD3D_ASSERT(!hlsl_deref_is_lowered(src)); + +- if (!init_deref(ctx, dst, src->var, src->path_len)) ++ if (!hlsl_init_deref(ctx, dst, src->var, src->path_len)) + return false; + + for (i = 0; i < src->path_len; ++i) +@@ -2650,8 +2895,8 @@ struct hlsl_ir_function_decl *hlsl_new_func_decl(struct hlsl_ctx *ctx, struct hlsl_type *return_type, const struct hlsl_func_parameters *parameters, const struct hlsl_semantic *semantic, const struct vkd3d_shader_location *loc) { @@ -12737,7 +13601,7 @@ index 41586550203..a7641a203f3 100644 if (!(decl = hlsl_alloc(ctx, sizeof(*decl)))) return NULL; -@@ -2679,9 +2843,7 @@ struct hlsl_ir_function_decl *hlsl_new_func_decl(struct hlsl_ctx *ctx, +@@ -2679,9 +2924,7 @@ struct hlsl_ir_function_decl *hlsl_new_func_decl(struct hlsl_ctx *ctx, return decl; hlsl_block_add_instr(&decl->body, constant); @@ -12748,7 +13612,7 @@ index 41586550203..a7641a203f3 100644 return decl; } -@@ -2796,6 +2958,7 @@ static void hlsl_dump_type(struct vkd3d_string_buffer *buffer, const struct hlsl +@@ -2796,6 +3039,7 @@ static void hlsl_dump_type(struct vkd3d_string_buffer *buffer, const struct hlsl [HLSL_TYPE_HALF] = "half", [HLSL_TYPE_DOUBLE] = "double", [HLSL_TYPE_INT] = "int", @@ -12756,7 +13620,24 @@ index 41586550203..a7641a203f3 100644 [HLSL_TYPE_UINT] = "uint", [HLSL_TYPE_BOOL] = "bool", }; -@@ -3263,6 +3426,7 @@ static void dump_ir_constant(struct vkd3d_string_buffer *buffer, const struct hl +@@ -3040,6 +3284,16 @@ struct vkd3d_string_buffer *hlsl_modifiers_to_string(struct hlsl_ctx *ctx, uint3 + vkd3d_string_buffer_printf(string, "row_major "); + if (modifiers & HLSL_MODIFIER_COLUMN_MAJOR) + vkd3d_string_buffer_printf(string, "column_major "); ++ if (modifiers & HLSL_PRIMITIVE_POINT) ++ vkd3d_string_buffer_printf(string, "point "); ++ if (modifiers & HLSL_PRIMITIVE_LINE) ++ vkd3d_string_buffer_printf(string, "line "); ++ if (modifiers & HLSL_PRIMITIVE_TRIANGLE) ++ vkd3d_string_buffer_printf(string, "triangle "); ++ if (modifiers & HLSL_PRIMITIVE_LINEADJ) ++ vkd3d_string_buffer_printf(string, "lineadj "); ++ if (modifiers & HLSL_PRIMITIVE_TRIANGLEADJ) ++ vkd3d_string_buffer_printf(string, "triangleadj "); + if ((modifiers & (HLSL_STORAGE_IN | HLSL_STORAGE_OUT)) == (HLSL_STORAGE_IN | HLSL_STORAGE_OUT)) + vkd3d_string_buffer_printf(string, "inout "); + else if (modifiers & HLSL_STORAGE_IN) +@@ -3263,6 +3517,7 @@ static void dump_ir_constant(struct vkd3d_string_buffer *buffer, const struct hl vkd3d_string_buffer_printf(buffer, "%d ", value->i); break; @@ -12764,7 +13645,7 @@ index 41586550203..a7641a203f3 100644 case HLSL_TYPE_UINT: vkd3d_string_buffer_printf(buffer, "%u ", value->u); break; -@@ -4289,17 +4453,17 @@ static void declare_predefined_types(struct hlsl_ctx *ctx) +@@ -4289,17 +4544,17 @@ static void declare_predefined_types(struct hlsl_ctx *ctx) static const char * const names[] = { @@ -12789,7 +13670,7 @@ index 41586550203..a7641a203f3 100644 static const char *const sampler_names[] = { -@@ -4390,11 +4554,6 @@ static void declare_predefined_types(struct hlsl_ctx *ctx) +@@ -4390,11 +4645,6 @@ static void declare_predefined_types(struct hlsl_ctx *ctx) n_variants = ARRAY_SIZE(variants_int); break; @@ -12801,8 +13682,25 @@ index 41586550203..a7641a203f3 100644 default: n_variants = 0; variants = NULL; +@@ -4577,6 +4827,8 @@ static bool hlsl_ctx_init(struct hlsl_ctx *ctx, const struct vkd3d_shader_compil + ctx->output_primitive = 0; + ctx->partitioning = 0; + ctx->input_control_point_count = UINT_MAX; ++ ctx->max_vertex_count = 0; ++ ctx->input_primitive_type = VKD3D_PT_UNDEFINED; + + return true; + } +@@ -4742,6 +4994,7 @@ int hlsl_compile_shader(const struct vkd3d_shader_code *hlsl, const struct vkd3d + + if (target_type == VKD3D_SHADER_TARGET_SPIRV_BINARY + || target_type == VKD3D_SHADER_TARGET_SPIRV_TEXT ++ || target_type == VKD3D_SHADER_TARGET_GLSL + || target_type == VKD3D_SHADER_TARGET_D3D_ASM) + { + uint64_t config_flags = vkd3d_shader_init_config_flags(); diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h -index f614e12036e..2ef84d35ff2 100644 +index f614e12036e..fafa5740963 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.h +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h @@ -103,6 +103,7 @@ enum hlsl_base_type @@ -12813,7 +13711,29 @@ index f614e12036e..2ef84d35ff2 100644 HLSL_TYPE_BOOL, HLSL_TYPE_LAST_SCALAR = HLSL_TYPE_BOOL, }; -@@ -482,6 +483,9 @@ struct hlsl_ir_var +@@ -416,6 +417,11 @@ struct hlsl_attribute + #define HLSL_STORAGE_ANNOTATION 0x00080000 + #define HLSL_MODIFIER_UNORM 0x00100000 + #define HLSL_MODIFIER_SNORM 0x00200000 ++#define HLSL_PRIMITIVE_POINT 0x00400000 ++#define HLSL_PRIMITIVE_LINE 0x00800000 ++#define HLSL_PRIMITIVE_TRIANGLE 0x01000000 ++#define HLSL_PRIMITIVE_LINEADJ 0x02000000 ++#define HLSL_PRIMITIVE_TRIANGLEADJ 0x04000000 + + #define HLSL_TYPE_MODIFIERS_MASK (HLSL_MODIFIER_PRECISE | HLSL_MODIFIER_VOLATILE | \ + HLSL_MODIFIER_CONST | HLSL_MODIFIER_ROW_MAJOR | \ +@@ -426,6 +432,9 @@ struct hlsl_attribute + + #define HLSL_MODIFIERS_MAJORITY_MASK (HLSL_MODIFIER_ROW_MAJOR | HLSL_MODIFIER_COLUMN_MAJOR) + ++#define HLSL_PRIMITIVE_MODIFIERS_MASK (HLSL_PRIMITIVE_POINT | HLSL_PRIMITIVE_LINE | HLSL_PRIMITIVE_TRIANGLE | \ ++ HLSL_PRIMITIVE_LINEADJ | HLSL_PRIMITIVE_TRIANGLEADJ) ++ + #define HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT 0 + + /* Reservation of a register and/or an offset for objects inside constant buffers, to be used as a +@@ -482,6 +491,9 @@ struct hlsl_ir_var union hlsl_constant_value_component number; } *default_values; @@ -12823,7 +13743,46 @@ index f614e12036e..2ef84d35ff2 100644 /* A dynamic array containing the state block on the variable's declaration, if any. * An array variable may contain multiple state blocks. * A technique pass will always contain one. -@@ -1502,6 +1506,45 @@ struct hlsl_ir_node *hlsl_add_conditional(struct hlsl_ctx *ctx, struct hlsl_bloc +@@ -1143,6 +1155,7 @@ struct hlsl_ctx + struct hlsl_constant_register + { + uint32_t index; ++ uint32_t allocated_mask; + struct hlsl_vec4 value; + struct vkd3d_shader_location loc; + } *regs; +@@ -1180,10 +1193,18 @@ struct hlsl_ctx + unsigned int input_control_point_count; + struct hlsl_type *input_control_point_type; + ++ /* The first declared input primitive parameter in tessellation and geometry shaders. */ ++ struct hlsl_ir_var *input_primitive_param; ++ + /* Whether the current function being processed during HLSL codegen is + * the patch constant function in a hull shader. */ + bool is_patch_constant_func; + ++ /* The maximum output vertex count of a geometry shader. */ ++ unsigned int max_vertex_count; ++ /* The input primitive type of a geometry shader. */ ++ enum vkd3d_primitive_type input_primitive_type; ++ + /* In some cases we generate opcodes by parsing an HLSL function and then + * invoking it. If not NULL, this field is the name of the function that we + * are currently parsing, "mangled" with an internal prefix to avoid +@@ -1454,6 +1475,11 @@ static inline bool hlsl_is_numeric_type(const struct hlsl_type *type) + return type->class <= HLSL_CLASS_LAST_NUMERIC; + } + ++static inline bool hlsl_is_vec1(const struct hlsl_type *type) ++{ ++ return type->class == HLSL_CLASS_SCALAR || (type->class == HLSL_CLASS_VECTOR && type->e.numeric.dimx == 1); ++} ++ + static inline unsigned int hlsl_sampler_dim_count(enum hlsl_sampler_dim dim) + { + switch (dim) +@@ -1502,6 +1528,52 @@ struct hlsl_ir_node *hlsl_add_conditional(struct hlsl_ctx *ctx, struct hlsl_bloc void hlsl_add_function(struct hlsl_ctx *ctx, char *name, struct hlsl_ir_function_decl *decl); void hlsl_add_var(struct hlsl_ctx *ctx, struct hlsl_ir_var *decl); @@ -12851,6 +13810,8 @@ index f614e12036e..2ef84d35ff2 100644 +void hlsl_block_add_loop(struct hlsl_ctx *ctx, struct hlsl_block *block, + struct hlsl_block *iter, struct hlsl_block *body, enum hlsl_loop_unroll_type unroll_type, + unsigned int unroll_limit, const struct vkd3d_shader_location *loc); ++struct hlsl_ir_node *hlsl_block_add_resource_load(struct hlsl_ctx *ctx, struct hlsl_block *block, ++ const struct hlsl_resource_load_params *params, const struct vkd3d_shader_location *loc); +void hlsl_block_add_resource_store(struct hlsl_ctx *ctx, struct hlsl_block *block, const struct hlsl_deref *resource, + struct hlsl_ir_node *coords, struct hlsl_ir_node *value, const struct vkd3d_shader_location *loc); +struct hlsl_ir_node *hlsl_block_add_simple_load(struct hlsl_ctx *ctx, struct hlsl_block *block, @@ -12862,6 +13823,11 @@ index f614e12036e..2ef84d35ff2 100644 +void hlsl_block_add_store_index(struct hlsl_ctx *ctx, struct hlsl_block *block, + const struct hlsl_deref *lhs, struct hlsl_ir_node *idx, struct hlsl_ir_node *rhs, + unsigned int writemask, const struct vkd3d_shader_location *loc); ++void hlsl_block_add_store_parent(struct hlsl_ctx *ctx, struct hlsl_block *block, ++ const struct hlsl_deref *lhs, unsigned int path_len, struct hlsl_ir_node *rhs, ++ unsigned int writemask, const struct vkd3d_shader_location *loc); ++struct hlsl_ir_node *hlsl_block_add_swizzle(struct hlsl_ctx *ctx, struct hlsl_block *block, uint32_t s, ++ unsigned int width, struct hlsl_ir_node *val, const struct vkd3d_shader_location *loc); +struct hlsl_ir_node *hlsl_block_add_uint_constant(struct hlsl_ctx *ctx, struct hlsl_block *block, + unsigned int n, const struct vkd3d_shader_location *loc); +struct hlsl_ir_node *hlsl_block_add_unary_expr(struct hlsl_ctx *ctx, struct hlsl_block *block, @@ -12869,7 +13835,15 @@ index f614e12036e..2ef84d35ff2 100644 void hlsl_block_cleanup(struct hlsl_block *block); bool hlsl_clone_block(struct hlsl_ctx *ctx, struct hlsl_block *dst_block, const struct hlsl_block *src_block); -@@ -1573,19 +1616,11 @@ struct hlsl_ir_node *hlsl_new_cast(struct hlsl_ctx *ctx, struct hlsl_ir_node *no +@@ -1524,6 +1596,7 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry + enum vkd3d_shader_target_type target_type, struct vkd3d_shader_code *out); + int hlsl_emit_effect_binary(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out); + ++bool hlsl_init_deref(struct hlsl_ctx *ctx, struct hlsl_deref *deref, struct hlsl_ir_var *var, unsigned int path_len); + bool hlsl_init_deref_from_index_chain(struct hlsl_ctx *ctx, struct hlsl_deref *deref, struct hlsl_ir_node *chain); + bool hlsl_copy_deref(struct hlsl_ctx *ctx, struct hlsl_deref *deref, const struct hlsl_deref *other); + +@@ -1573,19 +1646,11 @@ struct hlsl_ir_node *hlsl_new_cast(struct hlsl_ctx *ctx, struct hlsl_ir_node *no struct hlsl_ir_node *hlsl_new_constant(struct hlsl_ctx *ctx, struct hlsl_type *type, const struct hlsl_constant_value *value, const struct vkd3d_shader_location *loc); struct hlsl_ir_node *hlsl_new_copy(struct hlsl_ctx *ctx, struct hlsl_ir_node *node); @@ -12889,7 +13863,7 @@ index f614e12036e..2ef84d35ff2 100644 struct hlsl_type *hlsl_new_stream_output_type(struct hlsl_ctx *ctx, enum hlsl_so_object_type so_type, struct hlsl_type *type); struct hlsl_ir_node *hlsl_new_ternary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_expr_op op, -@@ -1599,16 +1634,12 @@ struct hlsl_ir_load *hlsl_new_load_index(struct hlsl_ctx *ctx, const struct hlsl +@@ -1599,16 +1664,12 @@ struct hlsl_ir_load *hlsl_new_load_index(struct hlsl_ctx *ctx, const struct hlsl struct hlsl_ir_node *idx, const struct vkd3d_shader_location *loc); struct hlsl_ir_load *hlsl_new_load_parent(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, const struct vkd3d_shader_location *loc); @@ -12906,7 +13880,7 @@ index f614e12036e..2ef84d35ff2 100644 bool hlsl_index_is_noncontiguous(struct hlsl_ir_index *index); bool hlsl_index_is_resource_access(struct hlsl_ir_index *index); -@@ -1617,20 +1648,13 @@ bool hlsl_index_chain_has_resource_access(struct hlsl_ir_index *index); +@@ -1617,20 +1678,11 @@ bool hlsl_index_chain_has_resource_access(struct hlsl_ir_index *index); struct hlsl_ir_node *hlsl_new_compile(struct hlsl_ctx *ctx, enum hlsl_compile_type compile_type, const char *profile_name, struct hlsl_ir_node **args, unsigned int args_count, struct hlsl_block *args_instrs, const struct vkd3d_shader_location *loc); @@ -12920,14 +13894,14 @@ index f614e12036e..2ef84d35ff2 100644 - unsigned int unroll_limit, const struct vkd3d_shader_location *loc); struct hlsl_ir_node *hlsl_new_matrix_swizzle(struct hlsl_ctx *ctx, struct hlsl_matrix_swizzle s, unsigned int width, struct hlsl_ir_node *val, const struct vkd3d_shader_location *loc); - struct hlsl_ir_node *hlsl_new_resource_load(struct hlsl_ctx *ctx, - const struct hlsl_resource_load_params *params, const struct vkd3d_shader_location *loc); +-struct hlsl_ir_node *hlsl_new_resource_load(struct hlsl_ctx *ctx, +- const struct hlsl_resource_load_params *params, const struct vkd3d_shader_location *loc); -struct hlsl_ir_node *hlsl_new_resource_store(struct hlsl_ctx *ctx, const struct hlsl_deref *resource, - struct hlsl_ir_node *coords, struct hlsl_ir_node *value, const struct vkd3d_shader_location *loc); struct hlsl_type *hlsl_new_struct_type(struct hlsl_ctx *ctx, const char *name, struct hlsl_struct_field *fields, size_t field_count); struct hlsl_ir_node *hlsl_new_swizzle(struct hlsl_ctx *ctx, uint32_t s, unsigned int components, -@@ -1653,8 +1677,6 @@ struct hlsl_type *hlsl_new_cb_type(struct hlsl_ctx *ctx, struct hlsl_type *forma +@@ -1653,8 +1705,6 @@ struct hlsl_type *hlsl_new_cb_type(struct hlsl_ctx *ctx, struct hlsl_type *forma struct hlsl_ir_node *hlsl_new_uint_constant(struct hlsl_ctx *ctx, unsigned int n, const struct vkd3d_shader_location *loc); struct hlsl_ir_node *hlsl_new_null_constant(struct hlsl_ctx *ctx, const struct vkd3d_shader_location *loc); @@ -12936,7 +13910,7 @@ index f614e12036e..2ef84d35ff2 100644 struct hlsl_ir_var *hlsl_new_var(struct hlsl_ctx *ctx, const char *name, struct hlsl_type *type, const struct vkd3d_shader_location *loc, const struct hlsl_semantic *semantic, uint32_t modifiers, const struct hlsl_reg_reservation *reg_reservation); -@@ -1677,6 +1699,8 @@ void hlsl_pop_scope(struct hlsl_ctx *ctx); +@@ -1677,6 +1727,8 @@ void hlsl_pop_scope(struct hlsl_ctx *ctx); bool hlsl_scope_add_type(struct hlsl_scope *scope, struct hlsl_type *type); @@ -12945,7 +13919,7 @@ index f614e12036e..2ef84d35ff2 100644 struct hlsl_type *hlsl_type_clone(struct hlsl_ctx *ctx, struct hlsl_type *old, unsigned int default_majority, uint32_t modifiers); unsigned int hlsl_type_component_count(const struct hlsl_type *type); -@@ -1685,10 +1709,13 @@ struct hlsl_type *hlsl_type_get_component_type(struct hlsl_ctx *ctx, struct hlsl +@@ -1685,13 +1737,17 @@ struct hlsl_type *hlsl_type_get_component_type(struct hlsl_ctx *ctx, struct hlsl unsigned int index); unsigned int hlsl_type_get_component_offset(struct hlsl_ctx *ctx, struct hlsl_type *type, unsigned int index, enum hlsl_regset *regset); @@ -12959,8 +13933,51 @@ index f614e12036e..2ef84d35ff2 100644 bool hlsl_type_is_resource(const struct hlsl_type *type); bool hlsl_type_is_shader(const struct hlsl_type *type); bool hlsl_type_is_patch_array(const struct hlsl_type *type); ++bool hlsl_type_is_primitive_array(const struct hlsl_type *type); + unsigned int hlsl_type_get_sm4_offset(const struct hlsl_type *type, unsigned int offset); + bool hlsl_types_are_equal(const struct hlsl_type *t1, const struct hlsl_type *t2); + +@@ -1700,6 +1756,8 @@ void hlsl_calculate_buffer_offsets(struct hlsl_ctx *ctx); + const struct hlsl_type *hlsl_get_multiarray_element_type(const struct hlsl_type *type); + unsigned int hlsl_get_multiarray_size(const struct hlsl_type *type); + ++const struct hlsl_type *hlsl_get_stream_output_type(const struct hlsl_type *type); ++ + uint32_t hlsl_combine_swizzles(uint32_t first, uint32_t second, unsigned int dim); + unsigned int hlsl_combine_writemasks(unsigned int first, unsigned int second); + uint32_t hlsl_map_swizzle(uint32_t swizzle, unsigned int writemask); +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.l b/libs/vkd3d/libs/vkd3d-shader/hlsl.l +index 605a9abaa93..d9fd43b5e78 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.l ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.l +@@ -106,6 +106,8 @@ inline {return KW_INLINE; } + inout {return KW_INOUT; } + InputPatch {return KW_INPUTPATCH; } + LineStream {return KW_LINESTREAM; } ++line {return KW_LINE; } ++lineadj {return KW_LINEADJ; } + linear {return KW_LINEAR; } + matrix {return KW_MATRIX; } + namespace {return KW_NAMESPACE; } +@@ -119,6 +121,7 @@ pass {return KW_PASS; } + PixelShader {return KW_PIXELSHADER; } + PointStream {return KW_POINTSTREAM; } + pixelshader {return KW_PIXELSHADER; } ++point {return KW_POINT; } + RasterizerOrderedBuffer {return KW_RASTERIZERORDEREDBUFFER; } + RasterizerOrderedStructuredBuffer {return KW_RASTERIZERORDEREDSTRUCTUREDBUFFER; } + RasterizerOrderedTexture1D {return KW_RASTERIZERORDEREDTEXTURE1D; } +@@ -175,6 +178,8 @@ TextureCube {return KW_TEXTURECUBE; } + textureCUBE {return KW_TEXTURECUBE; } + TextureCubeArray {return KW_TEXTURECUBEARRAY; } + TriangleStream {return KW_TRIANGLESTREAM; } ++triangle {return KW_TRIANGLE; } ++triangleadj {return KW_TRIANGLEADJ; } + true {return KW_TRUE; } + typedef {return KW_TYPEDEF; } + unsigned {return KW_UNSIGNED; } diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.y b/libs/vkd3d/libs/vkd3d-shader/hlsl.y -index 7afc9274c2e..71802fce388 100644 +index 7afc9274c2e..ff3d58da8f4 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.y +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.y @@ -351,7 +351,6 @@ static struct hlsl_ir_node *add_cast(struct hlsl_ctx *ctx, struct hlsl_block *bl @@ -12984,22 +14001,95 @@ index 7afc9274c2e..71802fce388 100644 } static struct hlsl_ir_node *add_implicit_conversion(struct hlsl_ctx *ctx, struct hlsl_block *block, -@@ -456,7 +451,7 @@ static uint32_t add_modifiers(struct hlsl_ctx *ctx, uint32_t modifiers, uint32_t +@@ -377,7 +372,15 @@ static struct hlsl_ir_node *add_implicit_conversion(struct hlsl_ctx *ctx, struct + if (node->type == HLSL_IR_SAMPLER_STATE && dst_type->class == HLSL_CLASS_SAMPLER) + return node; - static bool append_conditional_break(struct hlsl_ctx *ctx, struct hlsl_block *cond_block) +- if (!implicit_compatible_data_types(ctx, src_type, dst_type)) ++ if (implicit_compatible_data_types(ctx, src_type, dst_type)) ++ { ++ if (hlsl_is_numeric_type(dst_type) && hlsl_is_numeric_type(src_type) ++ && dst_type->e.numeric.dimx * dst_type->e.numeric.dimy < src_type->e.numeric.dimx * src_type->e.numeric.dimy ++ && ctx->warn_implicit_truncation) ++ hlsl_warning(ctx, loc, VKD3D_SHADER_WARNING_HLSL_IMPLICIT_TRUNCATION, "Implicit truncation of %s type.", ++ src_type->class == HLSL_CLASS_VECTOR ? "vector" : "matrix"); ++ } ++ else + { + struct vkd3d_string_buffer *src_string, *dst_string; + +@@ -388,19 +391,12 @@ static struct hlsl_ir_node *add_implicit_conversion(struct hlsl_ctx *ctx, struct + "Can't implicitly convert from %s to %s.", src_string->buffer, dst_string->buffer); + hlsl_release_string_buffer(ctx, src_string); + hlsl_release_string_buffer(ctx, dst_string); +- return NULL; + } + +- if (hlsl_is_numeric_type(dst_type) && hlsl_is_numeric_type(src_type) +- && dst_type->e.numeric.dimx * dst_type->e.numeric.dimy < src_type->e.numeric.dimx * src_type->e.numeric.dimy +- && ctx->warn_implicit_truncation) +- hlsl_warning(ctx, loc, VKD3D_SHADER_WARNING_HLSL_IMPLICIT_TRUNCATION, "Implicit truncation of %s type.", +- src_type->class == HLSL_CLASS_VECTOR ? "vector" : "matrix"); +- + return add_cast(ctx, block, node, dst_type, loc); + } + +-static bool add_explicit_conversion(struct hlsl_ctx *ctx, struct hlsl_block *block, ++static void add_explicit_conversion(struct hlsl_ctx *ctx, struct hlsl_block *block, + struct hlsl_type *dst_type, const struct parse_array_sizes *arrays, const struct vkd3d_shader_location *loc) + { + struct hlsl_ir_node *instr = node_from_block(block); +@@ -419,7 +415,7 @@ static bool add_explicit_conversion(struct hlsl_ctx *ctx, struct hlsl_block *blo + } + + if (instr->data_type->class == HLSL_CLASS_ERROR) +- return true; ++ return; + + if (!explicit_compatible_data_types(ctx, src_type, dst_type)) + { +@@ -432,10 +428,9 @@ static bool add_explicit_conversion(struct hlsl_ctx *ctx, struct hlsl_block *blo + src_string->buffer, dst_string->buffer); + hlsl_release_string_buffer(ctx, src_string); + hlsl_release_string_buffer(ctx, dst_string); +- return false; + } + +- return add_cast(ctx, block, instr, dst_type, loc); ++ add_cast(ctx, block, instr, dst_type, loc); + } + + static uint32_t add_modifiers(struct hlsl_ctx *ctx, uint32_t modifiers, uint32_t mod, +@@ -454,15 +449,15 @@ static uint32_t add_modifiers(struct hlsl_ctx *ctx, uint32_t modifiers, uint32_t + return modifiers | mod; + } + +-static bool append_conditional_break(struct hlsl_ctx *ctx, struct hlsl_block *cond_block) ++static void append_conditional_break(struct hlsl_ctx *ctx, struct hlsl_block *cond_block) { - struct hlsl_ir_node *condition, *cast, *not, *iff, *jump; + struct hlsl_ir_node *condition, *cast, *not; struct hlsl_block then_block; struct hlsl_type *bool_type; -@@ -474,19 +469,11 @@ static bool append_conditional_break(struct hlsl_ctx *ctx, struct hlsl_block *co - if (!(cast = add_cast(ctx, cond_block, condition, bool_type, &condition->loc))) - return false; + /* E.g. "for (i = 0; ; ++i)". */ + if (list_empty(&cond_block->instrs)) +- return true; ++ return; + condition = node_from_block(cond_block); + +@@ -471,23 +466,12 @@ static bool append_conditional_break(struct hlsl_ctx *ctx, struct hlsl_block *co + bool_type = hlsl_get_scalar_type(ctx, HLSL_TYPE_BOOL); + /* We already checked for a 1-component numeric type, so + * add_implicit_conversion() is equivalent to add_cast() here. */ +- if (!(cast = add_cast(ctx, cond_block, condition, bool_type, &condition->loc))) +- return false; +- - if (!(not = hlsl_new_unary_expr(ctx, HLSL_OP1_LOGIC_NOT, cast, &condition->loc))) - return false; - hlsl_block_add_instr(cond_block, not); ++ cast = add_cast(ctx, cond_block, condition, bool_type, &condition->loc); + not = hlsl_block_add_unary_expr(ctx, cond_block, HLSL_OP1_LOGIC_NOT, cast, &condition->loc); hlsl_block_init(&then_block); @@ -13011,12 +14101,39 @@ index 7afc9274c2e..71802fce388 100644 - if (!(iff = hlsl_new_if(ctx, not, &then_block, NULL, &condition->loc))) - return false; - hlsl_block_add_instr(cond_block, iff); +- return true; + hlsl_block_add_jump(ctx, &then_block, HLSL_IR_JUMP_BREAK, NULL, &condition->loc); + hlsl_block_add_if(ctx, cond_block, not, &then_block, NULL, &condition->loc); - return true; } -@@ -668,7 +655,6 @@ static struct hlsl_block *create_loop(struct hlsl_ctx *ctx, enum hlsl_loop_type + static void check_attribute_list_for_duplicates(struct hlsl_ctx *ctx, const struct parse_attribute_list *attrs) +@@ -531,11 +515,7 @@ static void resolve_loop_continue(struct hlsl_ctx *ctx, struct hlsl_block *block + { + if (!hlsl_clone_block(ctx, &cond_block, cond)) + return; +- if (!append_conditional_break(ctx, &cond_block)) +- { +- hlsl_block_cleanup(&cond_block); +- return; +- } ++ append_conditional_break(ctx, &cond_block); + list_move_before(&instr->entry, &cond_block.instrs); + } + } +@@ -611,11 +591,7 @@ static struct hlsl_default_value evaluate_static_expression(struct hlsl_ctx *ctx + return ret; + hlsl_block_add_block(&expr, block); + +- if (!(node = add_implicit_conversion(ctx, &expr, node_from_block(&expr), dst_type, loc))) +- { +- hlsl_block_cleanup(&expr); +- return ret; +- } ++ node = add_implicit_conversion(ctx, &expr, node_from_block(&expr), dst_type, loc); + + /* Wrap the node into a src to allow the reference to survive the multiple const passes. */ + hlsl_src_from_node(&src, node); +@@ -668,7 +644,6 @@ static struct hlsl_block *create_loop(struct hlsl_ctx *ctx, enum hlsl_loop_type { enum hlsl_loop_unroll_type unroll_type = HLSL_LOOP_UNROLL; unsigned int i, unroll_limit = 0; @@ -13024,7 +14141,16 @@ index 7afc9274c2e..71802fce388 100644 check_attribute_list_for_duplicates(ctx, attributes); check_loop_attributes(ctx, attributes, loc); -@@ -727,9 +713,7 @@ static struct hlsl_block *create_loop(struct hlsl_ctx *ctx, enum hlsl_loop_type +@@ -719,17 +694,14 @@ static struct hlsl_block *create_loop(struct hlsl_ctx *ctx, enum hlsl_loop_type + if (!init && !(init = make_empty_block(ctx))) + goto oom; + +- if (!append_conditional_break(ctx, cond)) +- goto oom; ++ append_conditional_break(ctx, cond); + + if (type == HLSL_LOOP_DO_WHILE) + list_move_tail(&body->instrs, &cond->instrs); else list_move_head(&body->instrs, &cond->instrs); @@ -13035,7 +14161,7 @@ index 7afc9274c2e..71802fce388 100644 destroy_block(cond); destroy_block(body); -@@ -858,23 +842,18 @@ static bool add_return(struct hlsl_ctx *ctx, struct hlsl_block *block, +@@ -858,23 +830,16 @@ static bool add_return(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *return_value, const struct vkd3d_shader_location *loc) { struct hlsl_type *return_type = ctx->cur_function->return_type; @@ -13050,17 +14176,18 @@ index 7afc9274c2e..71802fce388 100644 if (return_value->data_type->class == HLSL_CLASS_ERROR) return true; - if (!(return_value = add_implicit_conversion(ctx, block, return_value, return_type, loc))) - return false; - +- if (!(return_value = add_implicit_conversion(ctx, block, return_value, return_type, loc))) +- return false; +- - if (!(store = hlsl_new_simple_store(ctx, ctx->cur_function->return_var, return_value))) - return false; - list_add_after(&return_value->entry, &store->entry); ++ return_value = add_implicit_conversion(ctx, block, return_value, return_type, loc); + hlsl_block_add_simple_store(ctx, block, ctx->cur_function->return_var, return_value); } else { -@@ -888,52 +867,37 @@ static bool add_return(struct hlsl_ctx *ctx, struct hlsl_block *block, +@@ -888,52 +853,37 @@ static bool add_return(struct hlsl_ctx *ctx, struct hlsl_block *block, hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RETURN, "Void functions cannot return a value."); } @@ -13124,7 +14251,7 @@ index 7afc9274c2e..71802fce388 100644 } static struct hlsl_ir_node *add_binary_arithmetic_expr(struct hlsl_ctx *ctx, struct hlsl_block *block, -@@ -944,7 +908,6 @@ static bool add_array_access(struct hlsl_ctx *ctx, struct hlsl_block *block, str +@@ -944,7 +894,6 @@ static bool add_array_access(struct hlsl_ctx *ctx, struct hlsl_block *block, str struct hlsl_ir_node *index, const struct vkd3d_shader_location *loc) { const struct hlsl_type *expr_type = array->data_type, *index_type = index->data_type; @@ -13132,19 +14259,25 @@ index 7afc9274c2e..71802fce388 100644 if (array->data_type->class == HLSL_CLASS_ERROR || index->data_type->class == HLSL_CLASS_ERROR) { -@@ -972,10 +935,7 @@ static bool add_array_access(struct hlsl_ctx *ctx, struct hlsl_block *block, str - hlsl_get_vector_type(ctx, HLSL_TYPE_UINT, dim_count), &index->loc))) +@@ -968,14 +917,9 @@ static bool add_array_access(struct hlsl_ctx *ctx, struct hlsl_block *block, str return false; + } +- if (!(index = add_implicit_conversion(ctx, block, index, +- hlsl_get_vector_type(ctx, HLSL_TYPE_UINT, dim_count), &index->loc))) +- return false; +- - if (!(return_index = hlsl_new_index(ctx, array, index, loc))) - return false; - hlsl_block_add_instr(block, return_index); - ++ index = add_implicit_conversion(ctx, block, index, ++ hlsl_get_vector_type(ctx, HLSL_TYPE_UINT, dim_count), &index->loc); + hlsl_block_add_index(ctx, block, array, index, loc); return true; } -@@ -985,10 +945,7 @@ static bool add_array_access(struct hlsl_ctx *ctx, struct hlsl_block *block, str +@@ -985,10 +929,7 @@ static bool add_array_access(struct hlsl_ctx *ctx, struct hlsl_block *block, str return false; } @@ -13156,7 +14289,7 @@ index 7afc9274c2e..71802fce388 100644 if (expr_type->class != HLSL_CLASS_ARRAY && expr_type->class != HLSL_CLASS_VECTOR && expr_type->class != HLSL_CLASS_MATRIX) { -@@ -999,10 +956,7 @@ static bool add_array_access(struct hlsl_ctx *ctx, struct hlsl_block *block, str +@@ -999,10 +940,7 @@ static bool add_array_access(struct hlsl_ctx *ctx, struct hlsl_block *block, str return false; } @@ -13168,7 +14301,7 @@ index 7afc9274c2e..71802fce388 100644 return true; } -@@ -1144,31 +1098,34 @@ static bool gen_struct_fields(struct hlsl_ctx *ctx, struct parse_fields *fields, +@@ -1144,31 +1082,34 @@ static bool gen_struct_fields(struct hlsl_ctx *ctx, struct parse_fields *fields, return true; } @@ -13212,7 +14345,43 @@ index 7afc9274c2e..71802fce388 100644 } static bool add_typedef(struct hlsl_ctx *ctx, struct hlsl_type *const orig_type, struct list *list) -@@ -1517,7 +1474,11 @@ static enum hlsl_base_type expr_common_base_type(enum hlsl_base_type t1, enum hl +@@ -1239,6 +1180,14 @@ static bool add_typedef(struct hlsl_ctx *ctx, struct hlsl_type *const orig_type, + return true; + } + ++static void check_invalid_stream_output_object(struct hlsl_ctx *ctx, const struct hlsl_type *type, ++ const char *name, const struct vkd3d_shader_location* loc) ++{ ++ if (hlsl_type_component_count(type) != 1) ++ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, ++ "Stream output object '%s' is not single-element.", name); ++} ++ + static void initialize_var_components(struct hlsl_ctx *ctx, struct hlsl_block *instrs, + struct hlsl_ir_var *dst, unsigned int *store_index, struct hlsl_ir_node *src, + bool is_default_values_initializer); +@@ -1273,6 +1222,9 @@ static bool add_func_parameter(struct hlsl_ctx *ctx, struct hlsl_func_parameters + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, + "Output parameter '%s' has a default value.", param->name); + ++ if (hlsl_get_stream_output_type(param->type)) ++ check_invalid_stream_output_object(ctx, param->type, param->name, loc); ++ + if (!(var = hlsl_new_var(ctx, param->name, param->type, loc, ¶m->semantic, param->modifiers, + ¶m->reg_reservation))) + return false; +@@ -1289,9 +1241,7 @@ static bool add_func_parameter(struct hlsl_ctx *ctx, struct hlsl_func_parameters + + if (!param->initializer.braces) + { +- if (!(add_implicit_conversion(ctx, param->initializer.instrs, param->initializer.args[0], param->type, loc))) +- return false; +- ++ add_implicit_conversion(ctx, param->initializer.instrs, param->initializer.args[0], param->type, loc); + param->initializer.args[0] = node_from_block(param->initializer.instrs); + } + +@@ -1517,7 +1467,11 @@ static enum hlsl_base_type expr_common_base_type(enum hlsl_base_type t1, enum hl return HLSL_TYPE_FLOAT; if (t1 == HLSL_TYPE_UINT || t2 == HLSL_TYPE_UINT) return HLSL_TYPE_UINT; @@ -13225,7 +14394,7 @@ index 7afc9274c2e..71802fce388 100644 } static bool expr_common_shape(struct hlsl_ctx *ctx, struct hlsl_type *t1, struct hlsl_type *t2, -@@ -1600,15 +1561,12 @@ static struct hlsl_ir_node *add_expr(struct hlsl_ctx *ctx, struct hlsl_block *bl +@@ -1600,15 +1554,12 @@ static struct hlsl_ir_node *add_expr(struct hlsl_ctx *ctx, struct hlsl_block *bl enum hlsl_ir_expr_op op, struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS], struct hlsl_type *type, const struct vkd3d_shader_location *loc) { @@ -13241,7 +14410,7 @@ index 7afc9274c2e..71802fce388 100644 struct hlsl_ir_var *var; scalar_type = hlsl_get_scalar_type(ctx, type->e.numeric.type); -@@ -1620,58 +1578,24 @@ static struct hlsl_ir_node *add_expr(struct hlsl_ctx *ctx, struct hlsl_block *bl +@@ -1620,58 +1571,24 @@ static struct hlsl_ir_node *add_expr(struct hlsl_ctx *ctx, struct hlsl_block *bl for (i = 0; i < type->e.numeric.dimy * type->e.numeric.dimx; ++i) { struct hlsl_ir_node *value, *cell_operands[HLSL_MAX_OPERANDS] = { NULL }; @@ -13304,7 +14473,7 @@ index 7afc9274c2e..71802fce388 100644 } static void check_integer_type(struct hlsl_ctx *ctx, const struct hlsl_ir_node *instr) -@@ -1679,7 +1603,7 @@ static void check_integer_type(struct hlsl_ctx *ctx, const struct hlsl_ir_node * +@@ -1679,7 +1596,7 @@ static void check_integer_type(struct hlsl_ctx *ctx, const struct hlsl_ir_node * const struct hlsl_type *type = instr->data_type; struct vkd3d_string_buffer *string; @@ -13313,7 +14482,136 @@ index 7afc9274c2e..71802fce388 100644 return; if ((string = hlsl_type_to_string(ctx, type))) -@@ -2186,7 +2110,6 @@ static bool add_assignment(struct hlsl_ctx *ctx, struct hlsl_block *block, struc +@@ -1721,10 +1638,7 @@ static struct hlsl_ir_node *add_unary_logical_expr(struct hlsl_ctx *ctx, struct + + bool_type = hlsl_get_numeric_type(ctx, arg->data_type->class, HLSL_TYPE_BOOL, + arg->data_type->e.numeric.dimx, arg->data_type->e.numeric.dimy); +- +- if (!(args[0] = add_implicit_conversion(ctx, block, arg, bool_type, loc))) +- return NULL; +- ++ args[0] = add_implicit_conversion(ctx, block, arg, bool_type, loc); + return add_expr(ctx, block, op, args, bool_type, loc); + } + +@@ -1754,12 +1668,8 @@ static struct hlsl_ir_node *add_binary_arithmetic_expr(struct hlsl_ctx *ctx, str + return block->value; + } + +- if (!(args[0] = add_implicit_conversion(ctx, block, arg1, common_type, loc))) +- return NULL; +- +- if (!(args[1] = add_implicit_conversion(ctx, block, arg2, common_type, loc))) +- return NULL; +- ++ args[0] = add_implicit_conversion(ctx, block, arg1, common_type, loc); ++ args[1] = add_implicit_conversion(ctx, block, arg2, common_type, loc); + return add_expr(ctx, block, op, args, common_type, loc); + } + +@@ -1790,12 +1700,8 @@ static struct hlsl_ir_node *add_binary_comparison_expr(struct hlsl_ctx *ctx, str + common_type = hlsl_get_numeric_type(ctx, type, base, dimx, dimy); + return_type = hlsl_get_numeric_type(ctx, type, HLSL_TYPE_BOOL, dimx, dimy); + +- if (!(args[0] = add_implicit_conversion(ctx, block, arg1, common_type, loc))) +- return NULL; +- +- if (!(args[1] = add_implicit_conversion(ctx, block, arg2, common_type, loc))) +- return NULL; +- ++ args[0] = add_implicit_conversion(ctx, block, arg1, common_type, loc); ++ args[1] = add_implicit_conversion(ctx, block, arg2, common_type, loc); + return add_expr(ctx, block, op, args, return_type, loc); + } + +@@ -1813,12 +1719,8 @@ static struct hlsl_ir_node *add_binary_logical_expr(struct hlsl_ctx *ctx, struct + + common_type = hlsl_get_numeric_type(ctx, type, HLSL_TYPE_BOOL, dimx, dimy); + +- if (!(args[0] = add_implicit_conversion(ctx, block, arg1, common_type, loc))) +- return NULL; +- +- if (!(args[1] = add_implicit_conversion(ctx, block, arg2, common_type, loc))) +- return NULL; +- ++ args[0] = add_implicit_conversion(ctx, block, arg1, common_type, loc); ++ args[1] = add_implicit_conversion(ctx, block, arg2, common_type, loc); + return add_expr(ctx, block, op, args, common_type, loc); + } + +@@ -1844,12 +1746,8 @@ static struct hlsl_ir_node *add_binary_shift_expr(struct hlsl_ctx *ctx, struct h + return_type = hlsl_get_numeric_type(ctx, type, base, dimx, dimy); + integer_type = hlsl_get_numeric_type(ctx, type, HLSL_TYPE_INT, dimx, dimy); + +- if (!(args[0] = add_implicit_conversion(ctx, block, arg1, return_type, loc))) +- return NULL; +- +- if (!(args[1] = add_implicit_conversion(ctx, block, arg2, integer_type, loc))) +- return NULL; +- ++ args[0] = add_implicit_conversion(ctx, block, arg1, return_type, loc); ++ args[1] = add_implicit_conversion(ctx, block, arg2, integer_type, loc); + return add_expr(ctx, block, op, args, return_type, loc); + } + +@@ -1897,12 +1795,8 @@ static struct hlsl_ir_node *add_binary_dot_expr(struct hlsl_ctx *ctx, struct hls + common_type = hlsl_get_vector_type(ctx, base, dim); + ret_type = hlsl_get_scalar_type(ctx, base); + +- if (!(args[0] = add_implicit_conversion(ctx, instrs, arg1, common_type, loc))) +- return NULL; +- +- if (!(args[1] = add_implicit_conversion(ctx, instrs, arg2, common_type, loc))) +- return NULL; +- ++ args[0] = add_implicit_conversion(ctx, instrs, arg1, common_type, loc); ++ args[1] = add_implicit_conversion(ctx, instrs, arg2, common_type, loc); + return add_expr(ctx, instrs, op, args, ret_type, loc); + } + +@@ -2098,8 +1992,7 @@ static bool add_assignment(struct hlsl_ctx *ctx, struct hlsl_block *block, struc + width = size; + } + +- if (!(rhs = add_implicit_conversion(ctx, block, rhs, lhs_type, &rhs->loc))) +- return false; ++ rhs = add_implicit_conversion(ctx, block, rhs, lhs_type, &rhs->loc); + + while (lhs->type != HLSL_IR_LOAD && lhs->type != HLSL_IR_INDEX) + { +@@ -2129,7 +2022,6 @@ static bool add_assignment(struct hlsl_ctx *ctx, struct hlsl_block *block, struc + else if (lhs->type == HLSL_IR_SWIZZLE) + { + struct hlsl_ir_swizzle *swizzle = hlsl_ir_swizzle(lhs); +- struct hlsl_ir_node *new_swizzle; + uint32_t s; + + VKD3D_ASSERT(!matrix_writemask); +@@ -2160,13 +2052,9 @@ static bool add_assignment(struct hlsl_ctx *ctx, struct hlsl_block *block, struc + } + } + +- if (!(new_swizzle = hlsl_new_swizzle(ctx, s, width, rhs, &swizzle->node.loc))) +- return false; +- hlsl_block_add_instr(block, new_swizzle); +- ++ rhs = hlsl_block_add_swizzle(ctx, block, s, width, rhs, &swizzle->node.loc); + lhs = swizzle->val.node; + lhs_type = hlsl_get_vector_type(ctx, lhs_type->e.numeric.type, width); +- rhs = new_swizzle; + } + else + { +@@ -2178,15 +2066,13 @@ static bool add_assignment(struct hlsl_ctx *ctx, struct hlsl_block *block, struc + /* lhs casts could have resulted in a discrepancy between the + * rhs->data_type and the type of the variable that will be ulimately + * stored to. This is corrected. */ +- if (!(rhs = add_cast(ctx, block, rhs, lhs_type, &rhs->loc))) +- return false; ++ rhs = add_cast(ctx, block, rhs, lhs_type, &rhs->loc); + + if (lhs->type == HLSL_IR_INDEX && hlsl_index_chain_has_resource_access(hlsl_ir_index(lhs))) + { struct hlsl_ir_node *coords = hlsl_ir_index(lhs)->idx.node; struct hlsl_deref resource_deref; struct hlsl_type *resource_type; @@ -13321,7 +14619,7 @@ index 7afc9274c2e..71802fce388 100644 unsigned int dim_count; if (!hlsl_index_is_resource_access(hlsl_ir_index(lhs))) -@@ -2215,12 +2138,7 @@ static bool add_assignment(struct hlsl_ctx *ctx, struct hlsl_block *block, struc +@@ -2215,12 +2101,7 @@ static bool add_assignment(struct hlsl_ctx *ctx, struct hlsl_block *block, struc VKD3D_ASSERT(coords->data_type->e.numeric.type == HLSL_TYPE_UINT); VKD3D_ASSERT(coords->data_type->e.numeric.dimx == dim_count); @@ -13335,7 +14633,7 @@ index 7afc9274c2e..71802fce388 100644 hlsl_cleanup_deref(&resource_deref); } else if (matrix_writemask) -@@ -2235,25 +2153,14 @@ static bool add_assignment(struct hlsl_ctx *ctx, struct hlsl_block *block, struc +@@ -2235,25 +2116,14 @@ static bool add_assignment(struct hlsl_ctx *ctx, struct hlsl_block *block, struc for (j = 0; j < lhs->data_type->e.numeric.dimx; ++j) { struct hlsl_ir_node *load; @@ -13363,7 +14661,7 @@ index 7afc9274c2e..71802fce388 100644 } } -@@ -2269,49 +2176,32 @@ static bool add_assignment(struct hlsl_ctx *ctx, struct hlsl_block *block, struc +@@ -2269,49 +2139,32 @@ static bool add_assignment(struct hlsl_ctx *ctx, struct hlsl_block *block, struc for (i = 0; i < mat->data_type->e.numeric.dimx; ++i) { @@ -13419,7 +14717,7 @@ index 7afc9274c2e..71802fce388 100644 hlsl_cleanup_deref(&deref); } -@@ -2332,9 +2222,7 @@ static bool add_increment(struct hlsl_ctx *ctx, struct hlsl_block *block, bool d +@@ -2332,9 +2185,7 @@ static bool add_increment(struct hlsl_ctx *ctx, struct hlsl_block *block, bool d hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_MODIFIES_CONST, "Argument to %s%screment operator is const.", post ? "post" : "pre", decrement ? "de" : "in"); @@ -13430,7 +14728,7 @@ index 7afc9274c2e..71802fce388 100644 if (!add_assignment(ctx, block, lhs, decrement ? ASSIGN_OP_SUB : ASSIGN_OP_ADD, one, false)) return false; -@@ -2371,8 +2259,7 @@ static void initialize_var_components(struct hlsl_ctx *ctx, struct hlsl_block *i +@@ -2371,8 +2222,7 @@ static void initialize_var_components(struct hlsl_ctx *ctx, struct hlsl_block *i struct hlsl_type *dst_comp_type; struct hlsl_block block; @@ -13440,18 +14738,83 @@ index 7afc9274c2e..71802fce388 100644 dst_comp_type = hlsl_type_get_component_type(ctx, dst->data_type, *store_index); -@@ -2441,9 +2328,7 @@ static void initialize_var_components(struct hlsl_ctx *ctx, struct hlsl_block *i - if (!(conv = add_implicit_conversion(ctx, instrs, load, dst_comp_type, &src->loc))) - return; - +@@ -2438,12 +2288,8 @@ static void initialize_var_components(struct hlsl_ctx *ctx, struct hlsl_block *i + } + else + { +- if (!(conv = add_implicit_conversion(ctx, instrs, load, dst_comp_type, &src->loc))) +- return; +- - if (!hlsl_new_store_component(ctx, &block, &dst_deref, *store_index, conv)) - return; - hlsl_block_add_block(instrs, &block); ++ conv = add_implicit_conversion(ctx, instrs, load, dst_comp_type, &src->loc); + hlsl_block_add_store_component(ctx, instrs, &dst_deref, *store_index, conv); } } -@@ -2859,7 +2744,7 @@ static struct hlsl_block *initialize_vars(struct hlsl_ctx *ctx, struct list *var +@@ -2516,10 +2362,10 @@ static bool type_has_numeric_components(struct hlsl_type *type) + return false; + } + +-static void check_invalid_in_out_modifiers(struct hlsl_ctx *ctx, unsigned int modifiers, ++static void check_invalid_non_parameter_modifiers(struct hlsl_ctx *ctx, unsigned int modifiers, + const struct vkd3d_shader_location *loc) + { +- modifiers &= (HLSL_STORAGE_IN | HLSL_STORAGE_OUT); ++ modifiers &= (HLSL_STORAGE_IN | HLSL_STORAGE_OUT | HLSL_PRIMITIVE_MODIFIERS_MASK); + if (modifiers) + { + struct vkd3d_string_buffer *string; +@@ -2553,6 +2399,7 @@ static void declare_var(struct hlsl_ctx *ctx, struct parse_variable_def *v) + bool constant_buffer = false; + struct hlsl_ir_var *var; + struct hlsl_type *type; ++ bool stream_output; + char *var_name; + unsigned int i; + +@@ -2644,6 +2491,10 @@ static void declare_var(struct hlsl_ctx *ctx, struct parse_variable_def *v) + hlsl_fixme(ctx, &v->loc, "Shader model 5.1+ resource array."); + } + ++ stream_output = !!hlsl_get_stream_output_type(type); ++ if (stream_output) ++ check_invalid_stream_output_object(ctx, type, v->name, &v->loc); ++ + if (!(var_name = vkd3d_strdup(v->name))) + return; + +@@ -2698,6 +2549,10 @@ static void declare_var(struct hlsl_ctx *ctx, struct parse_variable_def *v) + if (!(modifiers & HLSL_STORAGE_STATIC)) + var->storage_modifiers |= HLSL_STORAGE_UNIFORM; + ++ if (stream_output) ++ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_MISPLACED_STREAM_OUTPUT, ++ "Stream output object '%s' is not allowed in the global scope.", var->name); ++ + if ((ctx->profile->major_version < 5 || ctx->profile->type == VKD3D_SHADER_TYPE_EFFECT) + && (var->storage_modifiers & HLSL_STORAGE_UNIFORM)) + { +@@ -2828,15 +2683,8 @@ static struct hlsl_block *initialize_vars(struct hlsl_ctx *ctx, struct list *var + } + + if (!v->initializer.braces) +- { +- if (!(add_implicit_conversion(ctx, v->initializer.instrs, v->initializer.args[0], type, &v->loc))) +- { +- free_parse_variable_def(v); +- continue; +- } +- +- v->initializer.args[0] = node_from_block(v->initializer.instrs); +- } ++ v->initializer.args[0] = add_implicit_conversion(ctx, ++ v->initializer.instrs, v->initializer.args[0], type, &v->loc); + + if (var->data_type->class != HLSL_CLASS_ERROR) + initialize_var(ctx, var, &v->initializer, is_default_values_initializer); +@@ -2859,7 +2707,7 @@ static struct hlsl_block *initialize_vars(struct hlsl_ctx *ctx, struct list *var } else if (var->storage_modifiers & HLSL_STORAGE_STATIC) { @@ -13460,7 +14823,7 @@ index 7afc9274c2e..71802fce388 100644 /* Initialize statics to zero by default. */ -@@ -2869,12 +2754,7 @@ static struct hlsl_block *initialize_vars(struct hlsl_ctx *ctx, struct list *var +@@ -2869,25 +2717,9 @@ static struct hlsl_block *initialize_vars(struct hlsl_ctx *ctx, struct list *var continue; } @@ -13470,25 +14833,26 @@ index 7afc9274c2e..71802fce388 100644 - continue; - } - hlsl_block_add_instr(&ctx->static_initializers, zero); -+ zero = hlsl_block_add_uint_constant(ctx, &ctx->static_initializers, 0, &var->loc); - - if (!(cast = add_cast(ctx, &ctx->static_initializers, zero, var->data_type, &var->loc))) - { -@@ -2882,12 +2762,7 @@ static struct hlsl_block *initialize_vars(struct hlsl_ctx *ctx, struct list *var - continue; - } - +- +- if (!(cast = add_cast(ctx, &ctx->static_initializers, zero, var->data_type, &var->loc))) +- { +- free_parse_variable_def(v); +- continue; +- } +- - if (!(store = hlsl_new_simple_store(ctx, var, cast))) - { - free_parse_variable_def(v); - continue; - } - hlsl_block_add_instr(&ctx->static_initializers, store); ++ zero = hlsl_block_add_uint_constant(ctx, &ctx->static_initializers, 0, &var->loc); ++ cast = add_cast(ctx, &ctx->static_initializers, zero, var->data_type, &var->loc); + hlsl_block_add_simple_store(ctx, &ctx->static_initializers, var, cast); } free_parse_variable_def(v); } -@@ -2934,6 +2809,7 @@ static enum hlsl_base_type hlsl_base_type_class(enum hlsl_base_type t) +@@ -2934,6 +2766,7 @@ static enum hlsl_base_type hlsl_base_type_class(enum hlsl_base_type t) return HLSL_TYPE_FLOAT; case HLSL_TYPE_INT: @@ -13496,7 +14860,7 @@ index 7afc9274c2e..71802fce388 100644 case HLSL_TYPE_UINT: return HLSL_TYPE_INT; -@@ -2949,6 +2825,7 @@ static unsigned int hlsl_base_type_width(enum hlsl_base_type t) +@@ -2949,6 +2782,7 @@ static unsigned int hlsl_base_type_width(enum hlsl_base_type t) switch (t) { case HLSL_TYPE_HALF: @@ -13504,7 +14868,7 @@ index 7afc9274c2e..71802fce388 100644 return 16; case HLSL_TYPE_FLOAT: -@@ -3123,11 +3000,12 @@ static struct hlsl_ir_function_decl *find_function_call(struct hlsl_ctx *ctx, +@@ -3123,11 +2957,12 @@ static struct hlsl_ir_function_decl *find_function_call(struct hlsl_ctx *ctx, return decl; } @@ -13519,18 +14883,21 @@ index 7afc9274c2e..71802fce388 100644 } static struct hlsl_ir_node *add_user_call(struct hlsl_ctx *ctx, -@@ -3154,8 +3032,6 @@ static struct hlsl_ir_node *add_user_call(struct hlsl_ctx *ctx, +@@ -3154,20 +2989,10 @@ static struct hlsl_ir_node *add_user_call(struct hlsl_ctx *ctx, if (param->storage_modifiers & HLSL_STORAGE_IN) { - struct hlsl_ir_node *store; - if (!hlsl_types_are_equal(arg->data_type, param->data_type)) - { - struct hlsl_ir_node *cast; -@@ -3165,9 +3041,7 @@ static struct hlsl_ir_node *add_user_call(struct hlsl_ctx *ctx, - arg = cast; - } +- { +- struct hlsl_ir_node *cast; +- +- if (!(cast = add_cast(ctx, args->instrs, arg, param->data_type, &arg->loc))) +- return NULL; +- arg = cast; +- } ++ arg = add_cast(ctx, args->instrs, arg, param->data_type, &arg->loc); - if (!(store = hlsl_new_simple_store(ctx, param, arg))) - return NULL; @@ -13539,7 +14906,7 @@ index 7afc9274c2e..71802fce388 100644 } ++k; -@@ -3192,7 +3066,6 @@ static struct hlsl_ir_node *add_user_call(struct hlsl_ctx *ctx, +@@ -3192,7 +3017,6 @@ static struct hlsl_ir_node *add_user_call(struct hlsl_ctx *ctx, struct hlsl_type *type = hlsl_type_get_component_type(ctx, param->data_type, j); struct hlsl_constant_value value; struct hlsl_ir_node *comp; @@ -13547,7 +14914,7 @@ index 7afc9274c2e..71802fce388 100644 if (!param->default_values[j].string) { -@@ -3201,9 +3074,7 @@ static struct hlsl_ir_node *add_user_call(struct hlsl_ctx *ctx, +@@ -3201,9 +3025,7 @@ static struct hlsl_ir_node *add_user_call(struct hlsl_ctx *ctx, return NULL; hlsl_block_add_instr(args->instrs, comp); @@ -13558,7 +14925,7 @@ index 7afc9274c2e..71802fce388 100644 } } } -@@ -3222,37 +3093,22 @@ static struct hlsl_ir_node *add_user_call(struct hlsl_ctx *ctx, +@@ -3222,37 +3044,22 @@ static struct hlsl_ir_node *add_user_call(struct hlsl_ctx *ctx, if (param->storage_modifiers & HLSL_STORAGE_OUT) { @@ -13601,7 +14968,7 @@ index 7afc9274c2e..71802fce388 100644 return call; } -@@ -3262,7 +3118,7 @@ static struct hlsl_ir_node *intrinsic_float_convert_arg(struct hlsl_ctx *ctx, +@@ -3262,28 +3069,20 @@ static struct hlsl_ir_node *intrinsic_float_convert_arg(struct hlsl_ctx *ctx, { struct hlsl_type *type = arg->data_type; @@ -13610,7 +14977,40 @@ index 7afc9274c2e..71802fce388 100644 return arg; type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_FLOAT, type->e.numeric.dimx, type->e.numeric.dimy); -@@ -3354,7 +3210,7 @@ static bool elementwise_intrinsic_float_convert_args(struct hlsl_ctx *ctx, + return add_implicit_conversion(ctx, params->instrs, arg, type, loc); + } + +-static bool convert_args(struct hlsl_ctx *ctx, const struct parse_initializer *params, ++static void convert_args(struct hlsl_ctx *ctx, const struct parse_initializer *params, + struct hlsl_type *type, const struct vkd3d_shader_location *loc) + { + unsigned int i; + + for (i = 0; i < params->args_count; ++i) +- { +- struct hlsl_ir_node *new_arg; +- +- if (!(new_arg = add_implicit_conversion(ctx, params->instrs, params->args[i], type, loc))) +- return false; +- params->args[i] = new_arg; +- } +- +- return true; ++ params->args[i] = add_implicit_conversion(ctx, params->instrs, params->args[i], type, loc); + } + + static struct hlsl_type *elementwise_intrinsic_get_common_type(struct hlsl_ctx *ctx, +@@ -3344,7 +3143,8 @@ static bool elementwise_intrinsic_convert_args(struct hlsl_ctx *ctx, + if (!(common_type = elementwise_intrinsic_get_common_type(ctx, params, loc))) + return false; + +- return convert_args(ctx, params, common_type, loc); ++ convert_args(ctx, params, common_type, loc); ++ return true; + } + + static bool elementwise_intrinsic_float_convert_args(struct hlsl_ctx *ctx, +@@ -3354,10 +3154,11 @@ static bool elementwise_intrinsic_float_convert_args(struct hlsl_ctx *ctx, if (!(type = elementwise_intrinsic_get_common_type(ctx, params, loc))) return false; @@ -13618,8 +15018,33 @@ index 7afc9274c2e..71802fce388 100644 + if (hlsl_type_is_integer(type)) type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_FLOAT, type->e.numeric.dimx, type->e.numeric.dimy); - return convert_args(ctx, params, type, loc); -@@ -3438,7 +3294,7 @@ static struct hlsl_type *convert_numeric_type(const struct hlsl_ctx *ctx, +- return convert_args(ctx, params, type, loc); ++ convert_args(ctx, params, type, loc); ++ return true; + } + + static bool elementwise_intrinsic_uint_convert_args(struct hlsl_ctx *ctx, +@@ -3370,7 +3171,8 @@ static bool elementwise_intrinsic_uint_convert_args(struct hlsl_ctx *ctx, + + type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_UINT, type->e.numeric.dimx, type->e.numeric.dimy); + +- return convert_args(ctx, params, type, loc); ++ convert_args(ctx, params, type, loc); ++ return true; + } + + static bool intrinsic_abs(struct hlsl_ctx *ctx, +@@ -3407,8 +3209,7 @@ static bool write_acos_or_asin(struct hlsl_ctx *ctx, + + const char *fn_name = asin_mode ? fn_name_asin : fn_name_acos; + +- if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) +- return false; ++ arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc); + type = arg->data_type; + + if (!(body = hlsl_sprintf_alloc(ctx, template, +@@ -3438,7 +3239,7 @@ static struct hlsl_type *convert_numeric_type(const struct hlsl_ctx *ctx, return hlsl_get_numeric_type(ctx, type->class, base_type, type->e.numeric.dimx, type->e.numeric.dimy); } @@ -13628,7 +15053,7 @@ index 7afc9274c2e..71802fce388 100644 struct hlsl_ir_node *arg, enum hlsl_ir_expr_op op, const struct vkd3d_shader_location *loc) { struct hlsl_ir_node *res, *load; -@@ -3446,20 +3302,13 @@ static bool add_combine_components(struct hlsl_ctx *ctx, const struct parse_init +@@ -3446,20 +3247,13 @@ static bool add_combine_components(struct hlsl_ctx *ctx, const struct parse_init count = hlsl_type_component_count(arg->data_type); @@ -13652,27 +15077,47 @@ index 7afc9274c2e..71802fce388 100644 } static bool intrinsic_all(struct hlsl_ctx *ctx, -@@ -3472,7 +3321,8 @@ static bool intrinsic_all(struct hlsl_ctx *ctx, - if (!(cast = add_cast(ctx, params->instrs, arg, bool_type, loc))) - return false; +@@ -3469,10 +3263,9 @@ static bool intrinsic_all(struct hlsl_ctx *ctx, + struct hlsl_type *bool_type; + bool_type = convert_numeric_type(ctx, arg->data_type, HLSL_TYPE_BOOL); +- if (!(cast = add_cast(ctx, params->instrs, arg, bool_type, loc))) +- return false; +- - return add_combine_components(ctx, params, cast, HLSL_OP2_LOGIC_AND, loc); ++ cast = add_cast(ctx, params->instrs, arg, bool_type, loc); + add_combine_components(ctx, params, cast, HLSL_OP2_LOGIC_AND, loc); + return true; } static bool intrinsic_any(struct hlsl_ctx *ctx, const struct parse_initializer *params, -@@ -3485,7 +3335,8 @@ static bool intrinsic_any(struct hlsl_ctx *ctx, const struct parse_initializer * - if (!(cast = add_cast(ctx, params->instrs, arg, bool_type, loc))) - return false; +@@ -3482,10 +3275,9 @@ static bool intrinsic_any(struct hlsl_ctx *ctx, const struct parse_initializer * + struct hlsl_type *bool_type; + bool_type = convert_numeric_type(ctx, arg->data_type, HLSL_TYPE_BOOL); +- if (!(cast = add_cast(ctx, params->instrs, arg, bool_type, loc))) +- return false; +- - return add_combine_components(ctx, params, cast, HLSL_OP2_LOGIC_OR, loc); ++ cast = add_cast(ctx, params->instrs, arg, bool_type, loc); + add_combine_components(ctx, params, cast, HLSL_OP2_LOGIC_OR, loc); + return true; } static bool intrinsic_asin(struct hlsl_ctx *ctx, -@@ -3696,7 +3547,7 @@ static bool intrinsic_clamp(struct hlsl_ctx *ctx, +@@ -3671,10 +3463,7 @@ static bool intrinsic_asuint(struct hlsl_ctx *ctx, + static bool intrinsic_ceil(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) + { +- struct hlsl_ir_node *arg; +- +- if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) +- return false; ++ struct hlsl_ir_node *arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc); + + return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_CEIL, arg, loc); + } +@@ -3696,7 +3485,7 @@ static bool intrinsic_clamp(struct hlsl_ctx *ctx, static bool intrinsic_clip(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { @@ -13681,7 +15126,7 @@ index 7afc9274c2e..71802fce388 100644 if (!elementwise_intrinsic_float_convert_args(ctx, params, loc)) return false; -@@ -3714,10 +3565,7 @@ static bool intrinsic_clip(struct hlsl_ctx *ctx, +@@ -3714,20 +3503,14 @@ static bool intrinsic_clip(struct hlsl_ctx *ctx, return false; } @@ -13693,7 +15138,28 @@ index 7afc9274c2e..71802fce388 100644 return true; } -@@ -3782,7 +3630,7 @@ static bool intrinsic_cross(struct hlsl_ctx *ctx, + static bool intrinsic_cos(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) + { +- struct hlsl_ir_node *arg; +- +- if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) +- return false; ++ struct hlsl_ir_node *arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc); + + return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_COS, arg, loc); + } +@@ -3748,8 +3531,7 @@ static bool write_cosh_or_sinh(struct hlsl_ctx *ctx, + static const char fn_name_sinh[] = "sinh"; + static const char fn_name_cosh[] = "cosh"; + +- if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) +- return false; ++ arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc); + + type_name = arg->data_type->name; + fn_name = sinh_mode ? fn_name_sinh : fn_name_cosh; +@@ -3782,39 +3564,23 @@ static bool intrinsic_cross(struct hlsl_ctx *ctx, enum hlsl_base_type base; base = expr_common_base_type(arg1->data_type->e.numeric.type, arg2->data_type->e.numeric.type); @@ -13702,7 +15168,25 @@ index 7afc9274c2e..71802fce388 100644 base = HLSL_TYPE_FLOAT; cast_type = hlsl_get_vector_type(ctx, base, 3); -@@ -3804,9 +3652,7 @@ static bool intrinsic_cross(struct hlsl_ctx *ctx, + +- if (!(arg1_cast = add_implicit_conversion(ctx, params->instrs, arg1, cast_type, loc))) +- return false; +- +- if (!(arg2_cast = add_implicit_conversion(ctx, params->instrs, arg2, cast_type, loc))) +- return false; +- +- if (!(arg1_swzl1 = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(Z, X, Y, Z), 3, arg1_cast, loc))) +- return false; +- hlsl_block_add_instr(params->instrs, arg1_swzl1); +- +- if (!(arg2_swzl1 = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(Y, Z, X, Y), 3, arg2_cast, loc))) +- return false; +- hlsl_block_add_instr(params->instrs, arg2_swzl1); ++ arg1_cast = add_implicit_conversion(ctx, params->instrs, arg1, cast_type, loc); ++ arg2_cast = add_implicit_conversion(ctx, params->instrs, arg2, cast_type, loc); ++ arg1_swzl1 = hlsl_block_add_swizzle(ctx, params->instrs, HLSL_SWIZZLE(Z, X, Y, Z), 3, arg1_cast, loc); ++ arg2_swzl1 = hlsl_block_add_swizzle(ctx, params->instrs, HLSL_SWIZZLE(Y, Z, X, Y), 3, arg2_cast, loc); + if (!(mul1 = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, arg1_swzl1, arg2_swzl1, loc))) return false; @@ -13711,10 +15195,75 @@ index 7afc9274c2e..71802fce388 100644 - hlsl_block_add_instr(params->instrs, mul1_neg); + mul1_neg = hlsl_block_add_unary_expr(ctx, params->instrs, HLSL_OP1_NEG, mul1, loc); - if (!(arg1_swzl2 = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(Y, Z, X, Y), 3, arg1_cast, loc))) - return false; -@@ -3886,10 +3732,7 @@ static bool intrinsic_degrees(struct hlsl_ctx *ctx, +- if (!(arg1_swzl2 = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(Y, Z, X, Y), 3, arg1_cast, loc))) +- return false; +- hlsl_block_add_instr(params->instrs, arg1_swzl2); +- +- if (!(arg2_swzl2 = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(Z, X, Y, Z), 3, arg2_cast, loc))) +- return false; +- hlsl_block_add_instr(params->instrs, arg2_swzl2); ++ arg1_swzl2 = hlsl_block_add_swizzle(ctx, params->instrs, HLSL_SWIZZLE(Y, Z, X, Y), 3, arg1_cast, loc); ++ arg2_swzl2 = hlsl_block_add_swizzle(ctx, params->instrs, HLSL_SWIZZLE(Z, X, Y, Z), 3, arg2_cast, loc); + + if (!(mul2 = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, arg1_swzl2, arg2_swzl2, loc))) return false; +@@ -3827,8 +3593,7 @@ static bool intrinsic_ddx(struct hlsl_ctx *ctx, + { + struct hlsl_ir_node *arg; + +- if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) +- return false; ++ arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc); + + return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_DSX, arg, loc); + } +@@ -3838,8 +3603,7 @@ static bool intrinsic_ddx_coarse(struct hlsl_ctx *ctx, + { + struct hlsl_ir_node *arg; + +- if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) +- return false; ++ arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc); + + return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_DSX_COARSE, arg, loc); + } +@@ -3849,8 +3613,7 @@ static bool intrinsic_ddx_fine(struct hlsl_ctx *ctx, + { + struct hlsl_ir_node *arg; + +- if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) +- return false; ++ arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc); + + return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_DSX_FINE, arg, loc); + } +@@ -3860,8 +3623,7 @@ static bool intrinsic_ddy(struct hlsl_ctx *ctx, + { + struct hlsl_ir_node *arg; + +- if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) +- return false; ++ arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc); + + return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_DSY, arg, loc); + } +@@ -3871,8 +3633,7 @@ static bool intrinsic_ddy_coarse(struct hlsl_ctx *ctx, + { + struct hlsl_ir_node *arg; + +- if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) +- return false; ++ arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc); + + return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_DSY_COARSE, arg, loc); + } +@@ -3882,14 +3643,10 @@ static bool intrinsic_degrees(struct hlsl_ctx *ctx, + { + struct hlsl_ir_node *arg, *deg; + +- if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) +- return false; ++ arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc); /* 1 rad = 180/pi degree = 57.2957795 degree */ - if (!(deg = hlsl_new_float_constant(ctx, 57.2957795f, loc))) @@ -13725,8 +15274,47 @@ index 7afc9274c2e..71802fce388 100644 return !!add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, arg, deg, loc); } -@@ -4073,9 +3916,7 @@ static bool intrinsic_exp(struct hlsl_ctx *ctx, +@@ -3898,8 +3655,7 @@ static bool intrinsic_ddy_fine(struct hlsl_ctx *ctx, + { + struct hlsl_ir_node *arg; + +- if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) +- return false; ++ arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc); + + return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_DSY_FINE, arg, loc); + } +@@ -3953,8 +3709,7 @@ static bool intrinsic_determinant(struct hlsl_ctx *ctx, return false; + } + +- if (!(arg = intrinsic_float_convert_arg(ctx, params, arg, loc))) +- return false; ++ arg = intrinsic_float_convert_arg(ctx, params, arg, loc); + + dim = min(type->e.numeric.dimx, type->e.numeric.dimy); + if (dim == 1) +@@ -3996,11 +3751,8 @@ static bool intrinsic_distance(struct hlsl_ctx *ctx, + { + struct hlsl_ir_node *arg1, *arg2, *neg, *add, *dot; + +- if (!(arg1 = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) +- return false; +- +- if (!(arg2 = intrinsic_float_convert_arg(ctx, params, params->args[1], loc))) +- return false; ++ arg1 = intrinsic_float_convert_arg(ctx, params, params->args[0], loc); ++ arg2 = intrinsic_float_convert_arg(ctx, params, params->args[1], loc); + + if (!(neg = add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_NEG, arg2, loc))) + return false; +@@ -4069,13 +3821,10 @@ static bool intrinsic_exp(struct hlsl_ctx *ctx, + { + struct hlsl_ir_node *arg, *mul, *coeff; + +- if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) +- return false; ++ arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc); /* 1/ln(2) */ - if (!(coeff = hlsl_new_float_constant(ctx, 1.442695f, loc))) @@ -13736,7 +15324,61 @@ index 7afc9274c2e..71802fce388 100644 if (!(mul = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, coeff, arg, loc))) return false; -@@ -4314,21 +4155,6 @@ static bool intrinsic_lerp(struct hlsl_ctx *ctx, +@@ -4088,8 +3837,7 @@ static bool intrinsic_exp2(struct hlsl_ctx *ctx, + { + struct hlsl_ir_node *arg; + +- if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) +- return false; ++ arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc); + + return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_EXP2, arg, loc); + } +@@ -4157,8 +3905,7 @@ static bool intrinsic_floor(struct hlsl_ctx *ctx, + { + struct hlsl_ir_node *arg; + +- if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) +- return false; ++ arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc); + + return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_FLOOR, arg, loc); + } +@@ -4170,11 +3917,8 @@ static bool intrinsic_fmod(struct hlsl_ctx *ctx, const struct parse_initializer + struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = { 0 }; + static const struct hlsl_constant_value zero_value; + +- if (!(x = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) +- return false; +- +- if (!(y = intrinsic_float_convert_arg(ctx, params, params->args[1], loc))) +- return false; ++ x = intrinsic_float_convert_arg(ctx, params, params->args[0], loc); ++ y = intrinsic_float_convert_arg(ctx, params, params->args[1], loc); + + if (!(div = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_DIV, x, y, loc))) + return false; +@@ -4209,8 +3953,7 @@ static bool intrinsic_frac(struct hlsl_ctx *ctx, + { + struct hlsl_ir_node *arg; + +- if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) +- return false; ++ arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc); + + return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_FRACT, arg, loc); + } +@@ -4285,8 +4028,7 @@ static bool intrinsic_length(struct hlsl_ctx *ctx, + hlsl_release_string_buffer(ctx, string); + } + +- if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) +- return false; ++ arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc); + + if (!(dot = add_binary_dot_expr(ctx, params->instrs, arg, arg, loc))) + return false; +@@ -4314,21 +4056,6 @@ static bool intrinsic_lerp(struct hlsl_ctx *ctx, return !!add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_ADD, params->args[0], mul, loc); } @@ -13758,7 +15400,15 @@ index 7afc9274c2e..71802fce388 100644 static bool intrinsic_lit(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { -@@ -4370,10 +4196,7 @@ static bool intrinsic_log(struct hlsl_ctx *ctx, +@@ -4363,17 +4090,13 @@ static bool intrinsic_log(struct hlsl_ctx *ctx, + { + struct hlsl_ir_node *log, *arg, *coeff; + +- if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) +- return false; ++ arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc); + + if (!(log = add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_LOG2, arg, loc))) return false; /* ln(2) */ @@ -13770,7 +15420,15 @@ index 7afc9274c2e..71802fce388 100644 return !!add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, log, coeff, loc); } -@@ -4389,10 +4212,7 @@ static bool intrinsic_log10(struct hlsl_ctx *ctx, +@@ -4382,17 +4105,13 @@ static bool intrinsic_log10(struct hlsl_ctx *ctx, + { + struct hlsl_ir_node *log, *arg, *coeff; + +- if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) +- return false; ++ arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc); + + if (!(log = add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_LOG2, arg, loc))) return false; /* 1 / log2(10) */ @@ -13782,7 +15440,17 @@ index 7afc9274c2e..71802fce388 100644 return !!add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, log, coeff, loc); } -@@ -4476,7 +4296,7 @@ static bool intrinsic_mul(struct hlsl_ctx *ctx, +@@ -4401,8 +4120,7 @@ static bool intrinsic_log2(struct hlsl_ctx *ctx, + { + struct hlsl_ir_node *arg; + +- if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) +- return false; ++ arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc); + + return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_LOG2, arg, loc); + } +@@ -4476,7 +4194,7 @@ static bool intrinsic_mul(struct hlsl_ctx *ctx, struct hlsl_type *cast_type1 = arg1->data_type, *cast_type2 = arg2->data_type, *matrix_type, *ret_type; unsigned int i, j, k, vect_count = 0; struct hlsl_deref var_deref; @@ -13791,7 +15459,21 @@ index 7afc9274c2e..71802fce388 100644 struct hlsl_ir_var *var; if (arg1->data_type->class == HLSL_CLASS_SCALAR || arg2->data_type->class == HLSL_CLASS_SCALAR) -@@ -4525,19 +4345,15 @@ static bool intrinsic_mul(struct hlsl_ctx *ctx, +@@ -4510,11 +4228,8 @@ static bool intrinsic_mul(struct hlsl_ctx *ctx, + ret_type = hlsl_get_scalar_type(ctx, base); + } + +- if (!(cast1 = add_implicit_conversion(ctx, params->instrs, arg1, cast_type1, loc))) +- return false; +- +- if (!(cast2 = add_implicit_conversion(ctx, params->instrs, arg2, cast_type2, loc))) +- return false; ++ cast1 = add_implicit_conversion(ctx, params->instrs, arg1, cast_type1, loc); ++ cast2 = add_implicit_conversion(ctx, params->instrs, arg2, cast_type2, loc); + + if (!(var = hlsl_new_synthetic_var(ctx, "mul", matrix_type, loc))) + return false; +@@ -4525,19 +4240,15 @@ static bool intrinsic_mul(struct hlsl_ctx *ctx, for (j = 0; j < matrix_type->e.numeric.dimy; ++j) { struct hlsl_ir_node *instr = NULL; @@ -13815,7 +15497,7 @@ index 7afc9274c2e..71802fce388 100644 if (!(mul = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, value1, value2, loc))) return false; -@@ -4553,17 +4369,13 @@ static bool intrinsic_mul(struct hlsl_ctx *ctx, +@@ -4553,17 +4264,14 @@ static bool intrinsic_mul(struct hlsl_ctx *ctx, } } @@ -13833,11 +15515,22 @@ index 7afc9274c2e..71802fce388 100644 - - return !!add_implicit_conversion(ctx, params->instrs, &load->node, ret_type, loc); + load = hlsl_block_add_simple_load(ctx, params->instrs, var, loc); -+ return !!add_implicit_conversion(ctx, params->instrs, load, ret_type, loc); ++ add_implicit_conversion(ctx, params->instrs, load, ret_type, loc); ++ return true; } static bool intrinsic_normalize(struct hlsl_ctx *ctx, -@@ -4597,10 +4409,18 @@ static bool intrinsic_normalize(struct hlsl_ctx *ctx, +@@ -4582,8 +4290,7 @@ static bool intrinsic_normalize(struct hlsl_ctx *ctx, + hlsl_release_string_buffer(ctx, string); + } + +- if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) +- return false; ++ arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc); + + if (!(dot = add_binary_dot_expr(ctx, params->instrs, arg, arg, loc))) + return false; +@@ -4597,10 +4304,18 @@ static bool intrinsic_normalize(struct hlsl_ctx *ctx, static bool intrinsic_pow(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { @@ -13857,8 +15550,13 @@ index 7afc9274c2e..71802fce388 100644 } static bool intrinsic_radians(struct hlsl_ctx *ctx, -@@ -4612,10 +4432,7 @@ static bool intrinsic_radians(struct hlsl_ctx *ctx, - return false; +@@ -4608,14 +4323,10 @@ static bool intrinsic_radians(struct hlsl_ctx *ctx, + { + struct hlsl_ir_node *arg, *rad; + +- if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) +- return false; ++ arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc); /* 1 degree = pi/180 rad = 0.0174532925f rad */ - if (!(rad = hlsl_new_float_constant(ctx, 0.0174532925f, loc))) @@ -13869,7 +15567,235 @@ index 7afc9274c2e..71802fce388 100644 return !!add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, arg, rad, loc); } -@@ -5052,8 +4869,6 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * +@@ -4624,8 +4335,7 @@ static bool intrinsic_rcp(struct hlsl_ctx *ctx, + { + struct hlsl_ir_node *arg; + +- if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) +- return false; ++ arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc); + + return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_RCP, arg, loc); + } +@@ -4656,7 +4366,6 @@ static bool intrinsic_refract(struct hlsl_ctx *ctx, + { + struct hlsl_type *type, *scalar_type; + struct hlsl_ir_function_decl *func; +- struct hlsl_ir_node *index; + char *body; + + static const char template[] = +@@ -4686,9 +4395,7 @@ static bool intrinsic_refract(struct hlsl_ctx *ctx, + * which we will only use the first component of. */ + + scalar_type = hlsl_get_scalar_type(ctx, params->args[2]->data_type->e.numeric.type); +- if (!(index = add_implicit_conversion(ctx, params->instrs, params->args[2], scalar_type, loc))) +- return false; +- params->args[2] = index; ++ params->args[2] = add_implicit_conversion(ctx, params->instrs, params->args[2], scalar_type, loc); + + if (!elementwise_intrinsic_float_convert_args(ctx, params, loc)) + return false; +@@ -4711,8 +4418,7 @@ static bool intrinsic_round(struct hlsl_ctx *ctx, + { + struct hlsl_ir_node *arg; + +- if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) +- return false; ++ arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc); + + return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_ROUND, arg, loc); + } +@@ -4722,8 +4428,7 @@ static bool intrinsic_rsqrt(struct hlsl_ctx *ctx, + { + struct hlsl_ir_node *arg; + +- if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) +- return false; ++ arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc); + + return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_RSQ, arg, loc); + } +@@ -4733,8 +4438,7 @@ static bool intrinsic_saturate(struct hlsl_ctx *ctx, + { + struct hlsl_ir_node *arg; + +- if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) +- return false; ++ arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc); + + return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_SAT, arg, loc); + } +@@ -4757,16 +4461,14 @@ static bool intrinsic_sign(struct hlsl_ctx *ctx, + if (!(lt = add_binary_comparison_expr(ctx, params->instrs, HLSL_OP2_LESS, zero, arg, loc))) + return false; + +- if (!(op1 = add_implicit_conversion(ctx, params->instrs, lt, int_type, loc))) +- return false; ++ op1 = add_implicit_conversion(ctx, params->instrs, lt, int_type, loc); + + /* Check if arg < 0, cast bool to int and invert (meaning true is -1) */ + + if (!(lt = add_binary_comparison_expr(ctx, params->instrs, HLSL_OP2_LESS, arg, zero, loc))) + return false; + +- if (!(op2 = add_implicit_conversion(ctx, params->instrs, lt, int_type, loc))) +- return false; ++ op2 = add_implicit_conversion(ctx, params->instrs, lt, int_type, loc); + + if (!(neg = add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_NEG, op2, loc))) + return false; +@@ -4780,8 +4482,7 @@ static bool intrinsic_sin(struct hlsl_ctx *ctx, + { + struct hlsl_ir_node *arg; + +- if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) +- return false; ++ arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc); + + return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_SIN, arg, loc); + } +@@ -4855,8 +4556,7 @@ static bool intrinsic_sqrt(struct hlsl_ctx *ctx, + { + struct hlsl_ir_node *arg; + +- if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) +- return false; ++ arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc); + + return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_SQRT, arg, loc); + } +@@ -4875,7 +4575,8 @@ static bool intrinsic_step(struct hlsl_ctx *ctx, + params->args[1], params->args[0], loc))) + return false; + +- return !!add_implicit_conversion(ctx, params->instrs, ge, type, loc); ++ add_implicit_conversion(ctx, params->instrs, ge, type, loc); ++ return true; + } + + static bool intrinsic_tan(struct hlsl_ctx *ctx, +@@ -4909,8 +4610,7 @@ static bool intrinsic_tanh(struct hlsl_ctx *ctx, + " return (exp_pos - exp_neg) / (exp_pos + exp_neg);\n" + "}\n"; + +- if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) +- return false; ++ arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc); + type = arg->data_type; + + if (!(body = hlsl_sprintf_alloc(ctx, template, +@@ -4931,7 +4631,7 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * + unsigned int sampler_dim = hlsl_sampler_dim_count(dim); + struct hlsl_resource_load_params load_params = { 0 }; + const struct hlsl_type *sampler_type; +- struct hlsl_ir_node *coords, *sample; ++ struct hlsl_ir_node *coords; + + if (params->args_count != 2 && params->args_count != 4) + { +@@ -4963,47 +4663,27 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * + else + load_params.type = HLSL_RESOURCE_SAMPLE_LOD_BIAS; + +- if (!(c = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, Y, Z, W), sampler_dim, params->args[1], loc))) +- return false; +- hlsl_block_add_instr(params->instrs, c); +- +- if (!(coords = add_implicit_conversion(ctx, params->instrs, c, +- hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) +- { +- return false; +- } +- +- if (!(lod = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(W, W, W, W), 1, params->args[1], loc))) +- return false; +- hlsl_block_add_instr(params->instrs, lod); ++ c = hlsl_block_add_swizzle(ctx, params->instrs, HLSL_SWIZZLE(X, Y, Z, W), sampler_dim, params->args[1], loc); ++ coords = add_implicit_conversion(ctx, params->instrs, c, ++ hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc); + +- if (!(load_params.lod = add_implicit_conversion(ctx, params->instrs, lod, +- hlsl_get_scalar_type(ctx, HLSL_TYPE_FLOAT), loc))) +- { +- return false; +- } ++ lod = hlsl_block_add_swizzle(ctx, params->instrs, HLSL_SWIZZLE(W, W, W, W), 1, params->args[1], loc); ++ load_params.lod = add_implicit_conversion(ctx, params->instrs, lod, ++ hlsl_get_scalar_type(ctx, HLSL_TYPE_FLOAT), loc); + } + else if (!strcmp(name, "tex2Dproj") + || !strcmp(name, "tex3Dproj") + || !strcmp(name, "texCUBEproj")) + { +- if (!(coords = add_implicit_conversion(ctx, params->instrs, params->args[1], +- hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, 4), loc))) +- { +- return false; +- } ++ coords = add_implicit_conversion(ctx, params->instrs, params->args[1], ++ hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, 4), loc); + + if (hlsl_version_ge(ctx, 4, 0)) + { + struct hlsl_ir_node *divisor; + +- if (!(divisor = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(W, W, W, W), sampler_dim, coords, loc))) +- return false; +- hlsl_block_add_instr(params->instrs, divisor); +- +- if (!(coords = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, Y, Z, W), sampler_dim, coords, loc))) +- return false; +- hlsl_block_add_instr(params->instrs, coords); ++ divisor = hlsl_block_add_swizzle(ctx, params->instrs, HLSL_SWIZZLE(W, W, W, W), sampler_dim, coords, loc); ++ coords = hlsl_block_add_swizzle(ctx, params->instrs, HLSL_SWIZZLE(X, Y, Z, W), sampler_dim, coords, loc); + + if (!(coords = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_DIV, coords, divisor, loc))) + return false; +@@ -5017,43 +4697,25 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * + } + else if (params->args_count == 4) /* Gradient sampling. */ + { +- if (!(coords = add_implicit_conversion(ctx, params->instrs, params->args[1], +- hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) +- { +- return false; +- } +- +- if (!(load_params.ddx = add_implicit_conversion(ctx, params->instrs, params->args[2], +- hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) +- { +- return false; +- } +- +- if (!(load_params.ddy = add_implicit_conversion(ctx, params->instrs, params->args[3], +- hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) +- { +- return false; +- } +- ++ coords = add_implicit_conversion(ctx, params->instrs, params->args[1], ++ hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc); ++ load_params.ddx = add_implicit_conversion(ctx, params->instrs, params->args[2], ++ hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc); ++ load_params.ddy = add_implicit_conversion(ctx, params->instrs, params->args[3], ++ hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc); + load_params.type = HLSL_RESOURCE_SAMPLE_GRAD; + } + else + { + load_params.type = HLSL_RESOURCE_SAMPLE; +- +- if (!(coords = add_implicit_conversion(ctx, params->instrs, params->args[1], +- hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) +- { +- return false; +- } ++ coords = add_implicit_conversion(ctx, params->instrs, params->args[1], ++ hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc); + } + + /* tex1D() functions never produce 1D resource declarations. For newer profiles half offset is used for the second coordinate, while older ones appear to replicate first coordinate.*/ if (dim == HLSL_SAMPLER_DIM_1D) { @@ -13878,7 +15804,7 @@ index 7afc9274c2e..71802fce388 100644 struct hlsl_ir_var *var; unsigned int idx = 0; -@@ -5062,22 +4877,10 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * +@@ -5062,22 +4724,10 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * initialize_var_components(ctx, params->instrs, var, &idx, coords, false); if (hlsl_version_ge(ctx, 4, 0)) @@ -13904,7 +15830,18 @@ index 7afc9274c2e..71802fce388 100644 dim = HLSL_SAMPLER_DIM_2D; } -@@ -5175,7 +4978,6 @@ static bool intrinsic_transpose(struct hlsl_ctx *ctx, +@@ -5086,9 +4736,7 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * + load_params.format = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, 4); + load_params.sampling_dim = dim; + +- if (!(sample = hlsl_new_resource_load(ctx, &load_params, loc))) +- return false; +- hlsl_block_add_instr(params->instrs, sample); ++ hlsl_block_add_resource_load(ctx, params->instrs, &load_params, loc); + return true; + } + +@@ -5175,7 +4823,6 @@ static bool intrinsic_transpose(struct hlsl_ctx *ctx, { struct hlsl_ir_node *arg = params->args[0]; struct hlsl_type *arg_type = arg->data_type; @@ -13912,7 +15849,7 @@ index 7afc9274c2e..71802fce388 100644 struct hlsl_deref var_deref; struct hlsl_type *mat_type; struct hlsl_ir_node *load; -@@ -5210,22 +5012,14 @@ static bool intrinsic_transpose(struct hlsl_ctx *ctx, +@@ -5210,32 +4857,21 @@ static bool intrinsic_transpose(struct hlsl_ctx *ctx, { for (j = 0; j < arg_type->e.numeric.dimy; ++j) { @@ -13940,18 +15877,52 @@ index 7afc9274c2e..71802fce388 100644 return true; } -@@ -5263,9 +5057,7 @@ static bool intrinsic_d3dcolor_to_ubyte4(struct hlsl_ctx *ctx, - if (!(arg = intrinsic_float_convert_arg(ctx, params, arg, loc))) - return false; + static bool intrinsic_trunc(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) + { +- struct hlsl_ir_node *arg; +- +- if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) +- return false; ++ struct hlsl_ir_node *arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc); + return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_TRUNC, arg, loc); + } +@@ -5243,7 +4879,7 @@ static bool intrinsic_trunc(struct hlsl_ctx *ctx, + static bool intrinsic_d3dcolor_to_ubyte4(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) + { +- struct hlsl_ir_node *arg = params->args[0], *ret, *c, *swizzle; ++ struct hlsl_ir_node *arg = params->args[0], *ret, *c; + struct hlsl_type *arg_type = arg->data_type; + + if (arg_type->class != HLSL_CLASS_SCALAR && !(arg_type->class == HLSL_CLASS_VECTOR +@@ -5260,21 +4896,11 @@ static bool intrinsic_d3dcolor_to_ubyte4(struct hlsl_ctx *ctx, + return false; + } + +- if (!(arg = intrinsic_float_convert_arg(ctx, params, arg, loc))) +- return false; +- - if (!(c = hlsl_new_float_constant(ctx, 255.0f + (0.5f / 256.0f), loc))) - return false; - hlsl_block_add_instr(params->instrs, c); ++ arg = intrinsic_float_convert_arg(ctx, params, arg, loc); + c = hlsl_block_add_float_constant(ctx, params->instrs, 255.0f + (0.5f / 256.0f), loc); if (arg_type->class == HLSL_CLASS_VECTOR) - { -@@ -5289,25 +5081,20 @@ static bool intrinsic_GetRenderTargetSampleCount(struct hlsl_ctx *ctx, +- { +- if (!(swizzle = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(Z, Y, X, W), 4, arg, loc))) +- return false; +- hlsl_block_add_instr(params->instrs, swizzle); +- +- arg = swizzle; +- } ++ arg = hlsl_block_add_swizzle(ctx, params->instrs, HLSL_SWIZZLE(Z, Y, X, W), 4, arg, loc); + + if (!(ret = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, arg, c, loc))) + return false; +@@ -5289,25 +4915,20 @@ static bool intrinsic_GetRenderTargetSampleCount(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {0}; @@ -13980,7 +15951,7 @@ index 7afc9274c2e..71802fce388 100644 struct hlsl_type *lhs_type, *val_type; struct vkd3d_string_buffer *string; struct hlsl_deref dst_deref; -@@ -5421,10 +5208,7 @@ static bool intrinsic_interlocked(struct hlsl_ctx *ctx, enum hlsl_interlocked_op +@@ -5421,10 +5042,7 @@ static bool intrinsic_interlocked(struct hlsl_ctx *ctx, enum hlsl_interlocked_op return false; } @@ -13992,7 +15963,7 @@ index 7afc9274c2e..71802fce388 100644 return true; } -@@ -5759,7 +5543,6 @@ static struct hlsl_block *add_compile_variant(struct hlsl_ctx *ctx, enum hlsl_co +@@ -5759,7 +5377,6 @@ static struct hlsl_block *add_compile_variant(struct hlsl_ctx *ctx, enum hlsl_co static struct hlsl_block *add_constructor(struct hlsl_ctx *ctx, struct hlsl_type *type, struct parse_initializer *params, const struct vkd3d_shader_location *loc) { @@ -14000,7 +15971,7 @@ index 7afc9274c2e..71802fce388 100644 struct hlsl_ir_var *var; if (!hlsl_is_numeric_type(type)) -@@ -5778,9 +5561,7 @@ static struct hlsl_block *add_constructor(struct hlsl_ctx *ctx, struct hlsl_type +@@ -5778,9 +5395,7 @@ static struct hlsl_block *add_constructor(struct hlsl_ctx *ctx, struct hlsl_type initialize_var(ctx, var, params, false); @@ -14011,7 +15982,277 @@ index 7afc9274c2e..71802fce388 100644 vkd3d_free(params->args); return params->instrs; -@@ -6361,9 +6142,7 @@ static bool add_assignment_from_component(struct hlsl_ctx *ctx, struct hlsl_bloc +@@ -5822,8 +5437,7 @@ static bool add_ternary(struct hlsl_ctx *ctx, struct hlsl_block *block, + { + cond_type = hlsl_get_numeric_type(ctx, common_type->class, + HLSL_TYPE_BOOL, common_type->e.numeric.dimx, common_type->e.numeric.dimy); +- if (!(cond = add_implicit_conversion(ctx, block, cond, cond_type, &cond->loc))) +- return false; ++ cond = add_implicit_conversion(ctx, block, cond, cond_type, &cond->loc); + } + else + { +@@ -5852,15 +5466,11 @@ static bool add_ternary(struct hlsl_ctx *ctx, struct hlsl_block *block, + + cond_type = hlsl_get_numeric_type(ctx, common_type->class, HLSL_TYPE_BOOL, + common_type->e.numeric.dimx, common_type->e.numeric.dimy); +- if (!(cond = add_implicit_conversion(ctx, block, cond, cond_type, &cond->loc))) +- return false; ++ cond = add_implicit_conversion(ctx, block, cond, cond_type, &cond->loc); + } + +- if (!(first = add_implicit_conversion(ctx, block, first, common_type, &first->loc))) +- return false; +- +- if (!(second = add_implicit_conversion(ctx, block, second, common_type, &second->loc))) +- return false; ++ first = add_implicit_conversion(ctx, block, first, common_type, &first->loc); ++ second = add_implicit_conversion(ctx, block, second, common_type, &second->loc); + } + else + { +@@ -5880,9 +5490,7 @@ static bool add_ternary(struct hlsl_ctx *ctx, struct hlsl_block *block, + + cond_type = hlsl_get_numeric_type(ctx, cond_type->class, HLSL_TYPE_BOOL, + cond_type->e.numeric.dimx, cond_type->e.numeric.dimy); +- if (!(cond = add_implicit_conversion(ctx, block, cond, cond_type, &cond->loc))) +- return false; +- ++ cond = add_implicit_conversion(ctx, block, cond, cond_type, &cond->loc); + common_type = first->data_type; + } + +@@ -5935,7 +5543,6 @@ static bool add_raw_load_method_call(struct hlsl_ctx *ctx, struct hlsl_block *bl + const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) + { + struct hlsl_resource_load_params load_params = {.type = HLSL_RESOURCE_LOAD}; +- struct hlsl_ir_node *load; + unsigned int value_dim; + + if (params->args_count != 1 && params->args_count != 2) +@@ -5967,16 +5574,11 @@ static bool add_raw_load_method_call(struct hlsl_ctx *ctx, struct hlsl_block *bl + else + value_dim = 4; + +- if (!(load_params.coords = add_implicit_conversion(ctx, block, params->args[0], +- hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), loc))) +- return false; +- ++ load_params.coords = add_implicit_conversion(ctx, block, params->args[0], ++ hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), loc); + load_params.format = hlsl_get_vector_type(ctx, HLSL_TYPE_UINT, value_dim); + load_params.resource = object; +- +- if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) +- return false; +- hlsl_block_add_instr(block, load); ++ hlsl_block_add_resource_load(ctx, block, &load_params, loc); + return true; + } + +@@ -5986,7 +5588,6 @@ static bool add_load_method_call(struct hlsl_ctx *ctx, struct hlsl_block *block, + const struct hlsl_type *object_type = object->data_type; + struct hlsl_resource_load_params load_params = {.type = HLSL_RESOURCE_LOAD}; + unsigned int sampler_dim, offset_dim; +- struct hlsl_ir_node *load; + bool multisampled; + + if (object_type->sampler_dim == HLSL_SAMPLER_DIM_RAW_BUFFER) +@@ -6013,18 +5614,12 @@ static bool add_load_method_call(struct hlsl_ctx *ctx, struct hlsl_block *block, + } + + if (multisampled) +- { +- if (!(load_params.sample_index = add_implicit_conversion(ctx, block, params->args[1], +- hlsl_get_scalar_type(ctx, HLSL_TYPE_INT), loc))) +- return false; +- } ++ load_params.sample_index = add_implicit_conversion(ctx, block, params->args[1], ++ hlsl_get_scalar_type(ctx, HLSL_TYPE_INT), loc); + + if (!!offset_dim && params->args_count > 1 + multisampled) +- { +- if (!(load_params.texel_offset = add_implicit_conversion(ctx, block, params->args[1 + multisampled], +- hlsl_get_vector_type(ctx, HLSL_TYPE_INT, offset_dim), loc))) +- return false; +- } ++ load_params.texel_offset = add_implicit_conversion(ctx, block, params->args[1 + multisampled], ++ hlsl_get_vector_type(ctx, HLSL_TYPE_INT, offset_dim), loc); + + if (params->args_count > 1 + multisampled + !!offset_dim) + { +@@ -6032,16 +5627,11 @@ static bool add_load_method_call(struct hlsl_ctx *ctx, struct hlsl_block *block, + } + + /* +1 for the mipmap level for non-multisampled textures */ +- if (!(load_params.coords = add_implicit_conversion(ctx, block, params->args[0], +- hlsl_get_vector_type(ctx, HLSL_TYPE_INT, sampler_dim + !multisampled), loc))) +- return false; +- ++ load_params.coords = add_implicit_conversion(ctx, block, params->args[0], ++ hlsl_get_vector_type(ctx, HLSL_TYPE_INT, sampler_dim + !multisampled), loc); + load_params.format = object_type->e.resource.format; + load_params.resource = object; +- +- if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) +- return false; +- hlsl_block_add_instr(block, load); ++ hlsl_block_add_resource_load(ctx, block, &load_params, loc); + return true; + } + +@@ -6052,7 +5642,6 @@ static bool add_sample_method_call(struct hlsl_ctx *ctx, struct hlsl_block *bloc + struct hlsl_resource_load_params load_params = {.type = HLSL_RESOURCE_SAMPLE}; + unsigned int sampler_dim, offset_dim; + const struct hlsl_type *sampler_type; +- struct hlsl_ir_node *load; + + sampler_dim = hlsl_sampler_dim_count(object_type->sampler_dim); + offset_dim = hlsl_offset_dim_count(object_type->sampler_dim); +@@ -6077,16 +5666,12 @@ static bool add_sample_method_call(struct hlsl_ctx *ctx, struct hlsl_block *bloc + return false; + } + +- if (!(load_params.coords = add_implicit_conversion(ctx, block, params->args[1], +- hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) +- return false; ++ load_params.coords = add_implicit_conversion(ctx, block, params->args[1], ++ hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc); + + if (offset_dim && params->args_count > 2) +- { +- if (!(load_params.texel_offset = add_implicit_conversion(ctx, block, params->args[2], +- hlsl_get_vector_type(ctx, HLSL_TYPE_INT, offset_dim), loc))) +- return false; +- } ++ load_params.texel_offset = add_implicit_conversion(ctx, block, params->args[2], ++ hlsl_get_vector_type(ctx, HLSL_TYPE_INT, offset_dim), loc); + + if (params->args_count > 2 + !!offset_dim) + hlsl_fixme(ctx, loc, "Sample() clamp parameter."); +@@ -6096,11 +5681,7 @@ static bool add_sample_method_call(struct hlsl_ctx *ctx, struct hlsl_block *bloc + load_params.format = object_type->e.resource.format; + load_params.resource = object; + load_params.sampler = params->args[0]; +- +- if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) +- return false; +- hlsl_block_add_instr(block, load); +- ++ hlsl_block_add_resource_load(ctx, block, &load_params, loc); + return true; + } + +@@ -6111,7 +5692,6 @@ static bool add_sample_cmp_method_call(struct hlsl_ctx *ctx, struct hlsl_block * + struct hlsl_resource_load_params load_params = { 0 }; + unsigned int sampler_dim, offset_dim; + const struct hlsl_type *sampler_type; +- struct hlsl_ir_node *load; + + sampler_dim = hlsl_sampler_dim_count(object_type->sampler_dim); + offset_dim = hlsl_offset_dim_count(object_type->sampler_dim); +@@ -6142,20 +5722,14 @@ static bool add_sample_cmp_method_call(struct hlsl_ctx *ctx, struct hlsl_block * + return false; + } + +- if (!(load_params.coords = add_implicit_conversion(ctx, block, params->args[1], +- hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) +- return false; +- +- if (!(load_params.cmp = add_implicit_conversion(ctx, block, params->args[2], +- hlsl_get_scalar_type(ctx, HLSL_TYPE_FLOAT), loc))) +- load_params.cmp = params->args[2]; ++ load_params.coords = add_implicit_conversion(ctx, block, params->args[1], ++ hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc); ++ load_params.cmp = add_implicit_conversion(ctx, block, params->args[2], ++ hlsl_get_scalar_type(ctx, HLSL_TYPE_FLOAT), loc); + + if (offset_dim && params->args_count > 3) +- { +- if (!(load_params.texel_offset = add_implicit_conversion(ctx, block, params->args[2], +- hlsl_get_vector_type(ctx, HLSL_TYPE_INT, offset_dim), loc))) +- return false; +- } ++ load_params.texel_offset = add_implicit_conversion(ctx, block, params->args[2], ++ hlsl_get_vector_type(ctx, HLSL_TYPE_INT, offset_dim), loc); + + if (params->args_count > 3 + !!offset_dim) + hlsl_fixme(ctx, loc, "%s() clamp parameter.", name); +@@ -6165,11 +5739,7 @@ static bool add_sample_cmp_method_call(struct hlsl_ctx *ctx, struct hlsl_block * + load_params.format = object_type->e.resource.format; + load_params.resource = object; + load_params.sampler = params->args[0]; +- +- if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) +- return false; +- hlsl_block_add_instr(block, load); +- ++ hlsl_block_add_resource_load(ctx, block, &load_params, loc); + return true; + } + +@@ -6180,7 +5750,6 @@ static bool add_gather_method_call(struct hlsl_ctx *ctx, struct hlsl_block *bloc + struct hlsl_resource_load_params load_params = {0}; + unsigned int sampler_dim, offset_dim; + const struct hlsl_type *sampler_type; +- struct hlsl_ir_node *load; + unsigned int read_channel; + + sampler_dim = hlsl_sampler_dim_count(object_type->sampler_dim); +@@ -6234,9 +5803,8 @@ static bool add_gather_method_call(struct hlsl_ctx *ctx, struct hlsl_block *bloc + } + else if (offset_dim && params->args_count > 2) + { +- if (!(load_params.texel_offset = add_implicit_conversion(ctx, block, params->args[2], +- hlsl_get_vector_type(ctx, HLSL_TYPE_INT, offset_dim), loc))) +- return false; ++ load_params.texel_offset = add_implicit_conversion(ctx, block, params->args[2], ++ hlsl_get_vector_type(ctx, HLSL_TYPE_INT, offset_dim), loc); + } + + sampler_type = params->args[0]->data_type; +@@ -6258,17 +5826,12 @@ static bool add_gather_method_call(struct hlsl_ctx *ctx, struct hlsl_block *bloc + return false; + } + +- if (!(load_params.coords = add_implicit_conversion(ctx, block, params->args[1], +- hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) +- return false; +- ++ load_params.coords = add_implicit_conversion(ctx, block, params->args[1], ++ hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc); + load_params.format = hlsl_get_vector_type(ctx, object_type->e.resource.format->e.numeric.type, 4); + load_params.resource = object; + load_params.sampler = params->args[0]; +- +- if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) +- return false; +- hlsl_block_add_instr(block, load); ++ hlsl_block_add_resource_load(ctx, block, &load_params, loc); + return true; + } + +@@ -6279,7 +5842,6 @@ static bool add_gather_cmp_method_call(struct hlsl_ctx *ctx, struct hlsl_block * + struct hlsl_resource_load_params load_params = {0}; + unsigned int sampler_dim, offset_dim; + const struct hlsl_type *sampler_type; +- struct hlsl_ir_node *load; + + sampler_dim = hlsl_sampler_dim_count(object_type->sampler_dim); + offset_dim = hlsl_offset_dim_count(object_type->sampler_dim); +@@ -6346,10 +5908,7 @@ static bool add_gather_cmp_method_call(struct hlsl_ctx *ctx, struct hlsl_block * + load_params.format = hlsl_get_vector_type(ctx, object_type->e.resource.format->e.numeric.type, 4); + load_params.resource = object; + load_params.sampler = params->args[0]; +- +- if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) +- return false; +- hlsl_block_add_instr(block, load); ++ hlsl_block_add_resource_load(ctx, block, &load_params, loc); + return true; + } + +@@ -6361,9 +5920,7 @@ static bool add_assignment_from_component(struct hlsl_ctx *ctx, struct hlsl_bloc if (!dest) return true; @@ -14022,7 +16263,7 @@ index 7afc9274c2e..71802fce388 100644 if (!add_assignment(ctx, instrs, dest, ASSIGN_OP_ASSIGN, load, false)) return false; -@@ -6377,7 +6156,6 @@ static bool add_getdimensions_method_call(struct hlsl_ctx *ctx, struct hlsl_bloc +@@ -6377,7 +5934,6 @@ static bool add_getdimensions_method_call(struct hlsl_ctx *ctx, struct hlsl_bloc bool uint_resinfo, has_uint_arg, has_float_arg; struct hlsl_resource_load_params load_params; struct hlsl_ir_node *sample_info, *res_info; @@ -14030,7 +16271,22 @@ index 7afc9274c2e..71802fce388 100644 struct hlsl_type *uint_type, *float_type; unsigned int i, j; enum func_argument -@@ -6478,12 +6256,7 @@ static bool add_getdimensions_method_call(struct hlsl_ctx *ctx, struct hlsl_bloc +@@ -6441,12 +5997,8 @@ static bool add_getdimensions_method_call(struct hlsl_ctx *ctx, struct hlsl_bloc + /* Input parameter. */ + if (iter->args[j] == ARG_MIP_LEVEL) + { +- if (!(args[ARG_MIP_LEVEL] = add_implicit_conversion(ctx, block, args[ARG_MIP_LEVEL], +- hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), loc))) +- { +- return false; +- } +- ++ args[ARG_MIP_LEVEL] = add_implicit_conversion(ctx, block, args[ARG_MIP_LEVEL], ++ hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), loc); + continue; + } + +@@ -6478,22 +6030,14 @@ static bool add_getdimensions_method_call(struct hlsl_ctx *ctx, struct hlsl_bloc } if (!args[ARG_MIP_LEVEL]) @@ -14044,7 +16300,27 @@ index 7afc9274c2e..71802fce388 100644 memset(&load_params, 0, sizeof(load_params)); load_params.type = HLSL_RESOURCE_RESINFO; -@@ -6524,10 +6297,7 @@ static bool add_getdimensions_method_call(struct hlsl_ctx *ctx, struct hlsl_bloc + load_params.resource = object; + load_params.lod = args[ARG_MIP_LEVEL]; + load_params.format = hlsl_get_vector_type(ctx, uint_resinfo ? HLSL_TYPE_UINT : HLSL_TYPE_FLOAT, 4); +- +- if (!(res_info = hlsl_new_resource_load(ctx, &load_params, loc))) +- return false; +- hlsl_block_add_instr(block, res_info); ++ res_info = hlsl_block_add_resource_load(ctx, block, &load_params, loc); + + if (!add_assignment_from_component(ctx, block, args[ARG_WIDTH], res_info, 0, loc)) + return false; +@@ -6516,18 +6060,13 @@ static bool add_getdimensions_method_call(struct hlsl_ctx *ctx, struct hlsl_bloc + load_params.type = HLSL_RESOURCE_SAMPLE_INFO; + load_params.resource = object; + load_params.format = args[ARG_SAMPLE_COUNT]->data_type; +- if (!(sample_info = hlsl_new_resource_load(ctx, &load_params, loc))) +- return false; +- hlsl_block_add_instr(block, sample_info); ++ sample_info = hlsl_block_add_resource_load(ctx, block, &load_params, loc); + + if (!add_assignment(ctx, block, args[ARG_SAMPLE_COUNT], ASSIGN_OP_ASSIGN, sample_info, false)) return false; } @@ -14056,7 +16332,106 @@ index 7afc9274c2e..71802fce388 100644 return true; } -@@ -6665,7 +6435,7 @@ static bool add_sample_grad_method_call(struct hlsl_ctx *ctx, struct hlsl_block +@@ -6538,7 +6077,6 @@ static bool add_sample_lod_method_call(struct hlsl_ctx *ctx, struct hlsl_block * + struct hlsl_resource_load_params load_params = { 0 }; + unsigned int sampler_dim, offset_dim; + const struct hlsl_type *sampler_type; +- struct hlsl_ir_node *load; + + sampler_dim = hlsl_sampler_dim_count(object_type->sampler_dim); + offset_dim = hlsl_offset_dim_count(object_type->sampler_dim); +@@ -6568,20 +6106,14 @@ static bool add_sample_lod_method_call(struct hlsl_ctx *ctx, struct hlsl_block * + return false; + } + +- if (!(load_params.coords = add_implicit_conversion(ctx, block, params->args[1], +- hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) +- load_params.coords = params->args[1]; +- +- if (!(load_params.lod = add_implicit_conversion(ctx, block, params->args[2], +- hlsl_get_scalar_type(ctx, HLSL_TYPE_FLOAT), loc))) +- load_params.lod = params->args[2]; ++ load_params.coords = add_implicit_conversion(ctx, block, params->args[1], ++ hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc); ++ load_params.lod = add_implicit_conversion(ctx, block, params->args[2], ++ hlsl_get_scalar_type(ctx, HLSL_TYPE_FLOAT), loc); + + if (offset_dim && params->args_count > 3) +- { +- if (!(load_params.texel_offset = add_implicit_conversion(ctx, block, params->args[3], +- hlsl_get_vector_type(ctx, HLSL_TYPE_INT, offset_dim), loc))) +- return false; +- } ++ load_params.texel_offset = add_implicit_conversion(ctx, block, params->args[3], ++ hlsl_get_vector_type(ctx, HLSL_TYPE_INT, offset_dim), loc); + + if (params->args_count > 3 + !!offset_dim) + hlsl_fixme(ctx, loc, "Tiled resource status argument."); +@@ -6589,10 +6121,7 @@ static bool add_sample_lod_method_call(struct hlsl_ctx *ctx, struct hlsl_block * + load_params.format = object_type->e.resource.format; + load_params.resource = object; + load_params.sampler = params->args[0]; +- +- if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) +- return false; +- hlsl_block_add_instr(block, load); ++ hlsl_block_add_resource_load(ctx, block, &load_params, loc); + return true; + } + +@@ -6603,7 +6132,6 @@ static bool add_sample_grad_method_call(struct hlsl_ctx *ctx, struct hlsl_block + struct hlsl_resource_load_params load_params = { 0 }; + unsigned int sampler_dim, offset_dim; + const struct hlsl_type *sampler_type; +- struct hlsl_ir_node *load; + + sampler_dim = hlsl_sampler_dim_count(object_type->sampler_dim); + offset_dim = hlsl_offset_dim_count(object_type->sampler_dim); +@@ -6630,24 +6158,16 @@ static bool add_sample_grad_method_call(struct hlsl_ctx *ctx, struct hlsl_block + return false; + } + +- if (!(load_params.coords = add_implicit_conversion(ctx, block, params->args[1], +- hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) +- load_params.coords = params->args[1]; +- +- if (!(load_params.ddx = add_implicit_conversion(ctx, block, params->args[2], +- hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) +- load_params.ddx = params->args[2]; +- +- if (!(load_params.ddy = add_implicit_conversion(ctx, block, params->args[3], +- hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) +- load_params.ddy = params->args[3]; ++ load_params.coords = add_implicit_conversion(ctx, block, params->args[1], ++ hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc); ++ load_params.ddx = add_implicit_conversion(ctx, block, params->args[2], ++ hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc); ++ load_params.ddy = add_implicit_conversion(ctx, block, params->args[3], ++ hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc); + + if (offset_dim && params->args_count > 4) +- { +- if (!(load_params.texel_offset = add_implicit_conversion(ctx, block, params->args[4], +- hlsl_get_vector_type(ctx, HLSL_TYPE_INT, offset_dim), loc))) +- return false; +- } ++ load_params.texel_offset = add_implicit_conversion(ctx, block, params->args[4], ++ hlsl_get_vector_type(ctx, HLSL_TYPE_INT, offset_dim), loc); + + if (params->args_count > 4 + !!offset_dim) + hlsl_fixme(ctx, loc, "Tiled resource status argument."); +@@ -6655,17 +6175,14 @@ static bool add_sample_grad_method_call(struct hlsl_ctx *ctx, struct hlsl_block + load_params.format = object_type->e.resource.format; + load_params.resource = object; + load_params.sampler = params->args[0]; +- +- if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) +- return false; +- hlsl_block_add_instr(block, load); ++ hlsl_block_add_resource_load(ctx, block, &load_params, loc); + return true; + } + static bool add_store_method_call(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *object, const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { @@ -14065,7 +16440,22 @@ index 7afc9274c2e..71802fce388 100644 struct hlsl_deref resource_deref; unsigned int value_dim; -@@ -6696,13 +6466,7 @@ static bool add_store_method_call(struct hlsl_ctx *ctx, struct hlsl_block *block +@@ -6685,24 +6202,15 @@ static bool add_store_method_call(struct hlsl_ctx *ctx, struct hlsl_block *block + else + value_dim = 4; + +- if (!(offset = add_implicit_conversion(ctx, block, params->args[0], +- hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), loc))) +- return false; +- +- if (!(rhs = add_implicit_conversion(ctx, block, params->args[1], +- hlsl_get_vector_type(ctx, HLSL_TYPE_UINT, value_dim), loc))) +- return false; ++ offset = add_implicit_conversion(ctx, block, params->args[0], ++ hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), loc); ++ rhs = add_implicit_conversion(ctx, block, params->args[1], ++ hlsl_get_vector_type(ctx, HLSL_TYPE_UINT, value_dim), loc); + if (!hlsl_init_deref_from_index_chain(ctx, &resource_deref, object)) return false; @@ -14080,7 +16470,84 @@ index 7afc9274c2e..71802fce388 100644 hlsl_cleanup_deref(&resource_deref); return true; -@@ -8752,6 +8516,26 @@ state_block: +@@ -6903,15 +6411,8 @@ static bool add_switch(struct hlsl_ctx *ctx, struct hlsl_block *block, + return true; + } + +- if (!(selector = add_implicit_conversion(ctx, block, selector, +- hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), &selector->loc))) +- { +- destroy_switch_cases(cases); +- destroy_block(block); +- cleanup_parse_attribute_list(attributes); +- return false; +- } +- ++ selector = add_implicit_conversion(ctx, block, selector, ++ hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), &selector->loc); + s = hlsl_new_switch(ctx, selector, cases, loc); + + destroy_switch_cases(cases); +@@ -7052,6 +6553,8 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, + %token KW_INLINE + %token KW_INOUT + %token KW_INPUTPATCH ++%token KW_LINE ++%token KW_LINEADJ + %token KW_LINEAR + %token KW_LINESTREAM + %token KW_MATRIX +@@ -7064,6 +6567,7 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, + %token KW_PACKOFFSET + %token KW_PASS + %token KW_PIXELSHADER ++%token KW_POINT + %token KW_POINTSTREAM + %token KW_RASTERIZERORDEREDBUFFER + %token KW_RASTERIZERORDEREDSTRUCTUREDBUFFER +@@ -7114,6 +6618,8 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, + %token KW_TEXTURE3D + %token KW_TEXTURECUBE + %token KW_TEXTURECUBEARRAY ++%token KW_TRIANGLE ++%token KW_TRIANGLEADJ + %token KW_TRIANGLESTREAM + %token KW_TRUE + %token KW_TYPEDEF +@@ -8121,7 +7627,8 @@ parameter: + parameter_decl: + var_modifiers type_no_void any_identifier arrays colon_attributes + { +- uint32_t modifiers = $1; ++ uint32_t prim_modifiers = $1 & HLSL_PRIMITIVE_MODIFIERS_MASK; ++ uint32_t modifiers = $1 & ~HLSL_PRIMITIVE_MODIFIERS_MASK; + struct hlsl_type *type; + unsigned int i; + +@@ -8146,6 +7653,22 @@ parameter_decl: + } + vkd3d_free($4.sizes); + ++ if (prim_modifiers && (prim_modifiers & (prim_modifiers - 1))) ++ { ++ hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, ++ "Primitive type modifiers are mutually exclusive."); ++ prim_modifiers = 0; ++ } ++ ++ if (prim_modifiers) ++ { ++ if (type->class != HLSL_CLASS_ARRAY) ++ hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, ++ "Primitive type modifiers can only be applied to arrays."); ++ else ++ type->modifiers |= prim_modifiers; ++ } ++ + $$.type = type; + + if (hlsl_version_ge(ctx, 5, 1) && type->class == HLSL_CLASS_ARRAY && hlsl_type_is_resource(type)) +@@ -8752,6 +8275,26 @@ state_block: hlsl_src_from_node(&entry->args[i], $5.args[i]); vkd3d_free($5.args); @@ -14107,7 +16574,52 @@ index 7afc9274c2e..71802fce388 100644 $$ = $1; hlsl_state_block_add_entry($$, entry); } -@@ -9130,8 +8914,6 @@ statement: +@@ -8845,7 +8388,7 @@ variable_def_typed: + if (!(type = apply_type_modifiers(ctx, $2, &modifiers, true, &@1))) + YYABORT; + +- check_invalid_in_out_modifiers(ctx, modifiers, &@1); ++ check_invalid_non_parameter_modifiers(ctx, modifiers, &@1); + + $$ = $3; + $$->basic_type = type; +@@ -8860,7 +8403,7 @@ variable_def_typed: + if (!(type = apply_type_modifiers(ctx, $2, &modifiers, true, &@1))) + YYABORT; + +- check_invalid_in_out_modifiers(ctx, modifiers, &@1); ++ check_invalid_non_parameter_modifiers(ctx, modifiers, &@1); + + $$ = $3; + $$->basic_type = type; +@@ -9001,6 +8544,26 @@ var_modifiers: + { + $$ = add_modifiers(ctx, $2, HLSL_MODIFIER_SNORM, &@1); + } ++ | KW_LINE var_modifiers ++ { ++ $$ = add_modifiers(ctx, $2, HLSL_PRIMITIVE_LINE, &@1); ++ } ++ | KW_LINEADJ var_modifiers ++ { ++ $$ = add_modifiers(ctx, $2, HLSL_PRIMITIVE_LINEADJ, &@1); ++ } ++ | KW_POINT var_modifiers ++ { ++ $$ = add_modifiers(ctx, $2, HLSL_PRIMITIVE_POINT, &@1); ++ } ++ | KW_TRIANGLE var_modifiers ++ { ++ $$ = add_modifiers(ctx, $2, HLSL_PRIMITIVE_TRIANGLE, &@1); ++ } ++ | KW_TRIANGLEADJ var_modifiers ++ { ++ $$ = add_modifiers(ctx, $2, HLSL_PRIMITIVE_TRIANGLEADJ, &@1); ++ } + | var_identifier var_modifiers + { + $$ = $2; +@@ -9130,8 +8693,6 @@ statement: jump_statement: KW_BREAK ';' { @@ -14116,7 +16628,7 @@ index 7afc9274c2e..71802fce388 100644 if (!is_break_allowed(ctx->cur_scope)) { hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, -@@ -9140,22 +8922,15 @@ jump_statement: +@@ -9140,22 +8701,15 @@ jump_statement: if (!($$ = make_empty_block(ctx))) YYABORT; @@ -14141,7 +16653,7 @@ index 7afc9274c2e..71802fce388 100644 } | KW_RETURN expr ';' { -@@ -9172,18 +8947,12 @@ jump_statement: +@@ -9172,18 +8726,12 @@ jump_statement: } | KW_DISCARD ';' { @@ -14163,7 +16675,7 @@ index 7afc9274c2e..71802fce388 100644 } selection_statement: -@@ -9191,7 +8960,6 @@ selection_statement: +@@ -9191,7 +8739,6 @@ selection_statement: { struct hlsl_ir_node *condition = node_from_block($4); const struct parse_attribute_list *attributes = &$1; @@ -14171,9 +16683,19 @@ index 7afc9274c2e..71802fce388 100644 unsigned int i; check_attribute_list_for_duplicates(ctx, attributes); -@@ -9221,19 +8989,13 @@ selection_statement: - YYABORT; - } +@@ -9213,27 +8760,14 @@ selection_statement: + + check_condition_type(ctx, condition); + +- if (!(condition = add_cast(ctx, $4, condition, hlsl_get_scalar_type(ctx, HLSL_TYPE_BOOL), &@4))) +- { +- destroy_block($6.then_block); +- destroy_block($6.else_block); +- cleanup_parse_attribute_list(&$1); +- YYABORT; +- } ++ condition = add_cast(ctx, $4, condition, hlsl_get_scalar_type(ctx, HLSL_TYPE_BOOL), &@4); ++ hlsl_block_add_if(ctx, $4, condition, $6.then_block, $6.else_block, &@2); - if (!(instr = hlsl_new_if(ctx, condition, $6.then_block, $6.else_block, &@2))) - { @@ -14182,8 +16704,6 @@ index 7afc9274c2e..71802fce388 100644 - cleanup_parse_attribute_list(&$1); - YYABORT; - } -+ hlsl_block_add_if(ctx, $4, condition, $6.then_block, $6.else_block, &@2); -+ destroy_block($6.then_block); destroy_block($6.else_block); cleanup_parse_attribute_list(&$1); @@ -14193,7 +16713,7 @@ index 7afc9274c2e..71802fce388 100644 } if_body: -@@ -9383,30 +9145,21 @@ func_arguments: +@@ -9383,30 +8917,21 @@ func_arguments: primary_expr: C_FLOAT { @@ -14230,7 +16750,7 @@ index 7afc9274c2e..71802fce388 100644 } | boolean { -@@ -9451,17 +9204,15 @@ primary_expr: +@@ -9451,17 +8976,15 @@ primary_expr: } | VAR_IDENTIFIER { @@ -14250,7 +16770,7 @@ index 7afc9274c2e..71802fce388 100644 } else { -@@ -9583,12 +9334,7 @@ postfix_expr: +@@ -9583,12 +9106,7 @@ postfix_expr: if (node->data_type->class == HLSL_CLASS_STRUCT) { @@ -14264,8 +16784,22 @@ index 7afc9274c2e..71802fce388 100644 } else if (hlsl_is_numeric_type(node->data_type)) { +@@ -9703,12 +9221,7 @@ unary_expr: + hlsl_error(ctx, &@2, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, + "Modifiers are not allowed on casts."); + +- if (!add_explicit_conversion(ctx, $6, $3, &$4, &@3)) +- { +- destroy_block($6); +- vkd3d_free($4.sizes); +- YYABORT; +- } ++ add_explicit_conversion(ctx, $6, $3, &$4, &@3); + vkd3d_free($4.sizes); + $$ = $6; + } diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -index 2afd3e1e1e5..edcd9ce62a7 100644 +index 2afd3e1e1e5..ba56ba90403 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c @@ -62,14 +62,9 @@ static struct hlsl_ir_node *new_offset_from_path_index(struct hlsl_ctx *ctx, str @@ -14469,7 +17003,51 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 } static bool types_are_semantic_equivalent(struct hlsl_ctx *ctx, const struct hlsl_type *type1, -@@ -416,7 +413,7 @@ static void prepend_input_copy(struct hlsl_ctx *ctx, struct hlsl_ir_function_dec +@@ -274,9 +271,9 @@ static bool types_are_semantic_equivalent(struct hlsl_ctx *ctx, const struct hls + if (ctx->profile->major_version < 4) + return true; + +- if (hlsl_type_is_patch_array(type1)) ++ if (hlsl_type_is_primitive_array(type1)) + { +- return hlsl_type_is_patch_array(type2) ++ return hlsl_type_is_primitive_array(type2) + && type1->e.array.array_type == type2->e.array.array_type + && type1->e.array.elements_count == type2->e.array.elements_count + && types_are_semantic_equivalent(ctx, type1->e.array.type, type2->e.array.type); +@@ -298,8 +295,8 @@ static struct hlsl_ir_var *add_semantic_var(struct hlsl_ctx *ctx, struct hlsl_ir + const char *prefix; + char *new_name; + +- if (hlsl_type_is_patch_array(type)) +- prefix = type->e.array.array_type == HLSL_ARRAY_PATCH_INPUT ? "inputpatch" : "outputpatch"; ++ if (hlsl_type_is_primitive_array(type)) ++ prefix = type->e.array.array_type == HLSL_ARRAY_PATCH_OUTPUT ? "outputpatch" : "inputprim"; + else + prefix = output ? "output" : "input"; + +@@ -310,9 +307,9 @@ static struct hlsl_ir_var *add_semantic_var(struct hlsl_ctx *ctx, struct hlsl_ir + { + if (!ascii_strcasecmp(ext_var->name, new_name)) + { +- VKD3D_ASSERT(hlsl_type_is_patch_array(ext_var->data_type) ++ VKD3D_ASSERT(hlsl_type_is_primitive_array(ext_var->data_type) + || ext_var->data_type->class <= HLSL_CLASS_VECTOR); +- VKD3D_ASSERT(hlsl_type_is_patch_array(type) || type->class <= HLSL_CLASS_VECTOR); ++ VKD3D_ASSERT(hlsl_type_is_primitive_array(type) || type->class <= HLSL_CLASS_VECTOR); + + if (output) + { +@@ -386,7 +383,7 @@ static uint32_t combine_field_storage_modifiers(uint32_t modifiers, uint32_t fie + } + + static void prepend_input_copy(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func, +- struct hlsl_block *block, struct hlsl_ir_var *top_var, uint32_t patch_index, struct hlsl_ir_load *lhs, ++ struct hlsl_block *block, uint32_t prim_index, struct hlsl_ir_load *lhs, + uint32_t modifiers, struct hlsl_semantic *semantic, uint32_t semantic_index, bool force_align) + { + struct hlsl_type *type = lhs->node.data_type, *vector_type_src, *vector_type_dst; +@@ -416,31 +413,29 @@ static void prepend_input_copy(struct hlsl_ctx *ctx, struct hlsl_ir_function_dec for (i = 0; i < hlsl_type_major_size(type); ++i) { @@ -14478,18 +17056,41 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 struct hlsl_ir_var *input; struct hlsl_ir_load *load; -@@ -436,9 +433,7 @@ static void prepend_input_copy(struct hlsl_ctx *ctx, struct hlsl_ir_function_dec +- if (hlsl_type_is_patch_array(top_var->data_type)) ++ if (hlsl_type_is_primitive_array(var->data_type)) + { +- struct hlsl_type *top_type = top_var->data_type; +- struct hlsl_type *patch_type; +- struct hlsl_deref patch_deref; ++ struct hlsl_type *prim_type_src; ++ struct hlsl_deref prim_deref; + struct hlsl_ir_node *idx; + +- if (!(patch_type = hlsl_new_array_type(ctx, vector_type_src, top_type->e.array.elements_count, +- top_type->e.array.array_type))) ++ if (!(prim_type_src = hlsl_new_array_type(ctx, vector_type_src, var->data_type->e.array.elements_count, ++ var->data_type->e.array.array_type))) return; - hlsl_init_simple_deref_from_var(&patch_deref, input); ++ prim_type_src->modifiers = var->data_type->modifiers & HLSL_PRIMITIVE_MODIFIERS_MASK; + +- if (!(input = add_semantic_var(ctx, func, var, patch_type, ++ if (!(input = add_semantic_var(ctx, func, var, prim_type_src, + modifiers, semantic, semantic_index + i, false, force_align, loc))) + return; +- hlsl_init_simple_deref_from_var(&patch_deref, input); ++ hlsl_init_simple_deref_from_var(&prim_deref, input); - if (!(idx = hlsl_new_uint_constant(ctx, patch_index, &var->loc))) - return; - hlsl_block_add_instr(block, idx); -+ idx = hlsl_block_add_uint_constant(ctx, block, patch_index, &var->loc); ++ idx = hlsl_block_add_uint_constant(ctx, block, prim_index, &var->loc); - if (!(load = hlsl_new_load_index(ctx, &patch_deref, idx, loc))) +- if (!(load = hlsl_new_load_index(ctx, &patch_deref, idx, loc))) ++ if (!(load = hlsl_new_load_index(ctx, &prim_deref, idx, loc))) return; -@@ -455,27 +450,19 @@ static void prepend_input_copy(struct hlsl_ctx *ctx, struct hlsl_ir_function_dec + hlsl_block_add_instr(block, &load->node); + } +@@ -455,33 +450,25 @@ static void prepend_input_copy(struct hlsl_ctx *ctx, struct hlsl_ir_function_dec hlsl_block_add_instr(block, &load->node); } @@ -14521,7 +17122,25 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 } } } -@@ -526,9 +513,7 @@ static void prepend_input_copy_recurse(struct hlsl_ctx *ctx, struct hlsl_ir_func + + static void prepend_input_copy_recurse(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func, +- struct hlsl_block *block, struct hlsl_ir_var *top_var, uint32_t patch_index, struct hlsl_ir_load *lhs, ++ struct hlsl_block *block, uint32_t prim_index, struct hlsl_ir_load *lhs, + uint32_t modifiers, struct hlsl_semantic *semantic, uint32_t semantic_index, bool force_align) + { + struct vkd3d_shader_location *loc = &lhs->node.loc; +@@ -507,8 +494,8 @@ static void prepend_input_copy_recurse(struct hlsl_ctx *ctx, struct hlsl_ir_func + element_modifiers = modifiers; + force_align = true; + +- if (hlsl_type_is_patch_array(type)) +- patch_index = i; ++ if (hlsl_type_is_primitive_array(type)) ++ prim_index = i; + } + else + { +@@ -526,22 +513,20 @@ static void prepend_input_copy_recurse(struct hlsl_ctx *ctx, struct hlsl_ir_func force_align = (i == 0); } @@ -14532,6 +17151,32 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 /* This redundant load is expected to be deleted later by DCE. */ if (!(element_load = hlsl_new_load_index(ctx, &lhs->src, c, loc))) + return; + hlsl_block_add_instr(block, &element_load->node); + +- prepend_input_copy_recurse(ctx, func, block, top_var, patch_index, element_load, ++ prepend_input_copy_recurse(ctx, func, block, prim_index, element_load, + element_modifiers, semantic, elem_semantic_index, force_align); + } + } + else + { +- prepend_input_copy(ctx, func, block, var, patch_index, lhs, modifiers, semantic, semantic_index, force_align); ++ prepend_input_copy(ctx, func, block, prim_index, lhs, modifiers, semantic, semantic_index, force_align); + } + } + +@@ -559,8 +544,8 @@ static void prepend_input_var_copy(struct hlsl_ctx *ctx, struct hlsl_ir_function + return; + hlsl_block_add_instr(&block, &load->node); + +- prepend_input_copy_recurse(ctx, func, &block, var, 0, load, +- var->storage_modifiers, &var->semantic, var->semantic.index, false); ++ prepend_input_copy_recurse(ctx, func, &block, 0, load, var->storage_modifiers, ++ &var->semantic, var->semantic.index, false); + + list_move_head(&func->body.instrs, &block.instrs); + } @@ -593,9 +578,8 @@ static void append_output_copy(struct hlsl_ctx *ctx, struct hlsl_ir_function_dec for (i = 0; i < hlsl_type_major_size(type); ++i) @@ -14677,13 +17322,13 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 - if (!(store = hlsl_new_store_index(ctx, &coords_deref, NULL, zero, 1u << dim_count, loc))) - return NULL; - hlsl_block_add_instr(block, store); -+ zero = hlsl_block_add_uint_constant(ctx, block, 0, loc); -+ hlsl_block_add_store_index(ctx, block, &coords_deref, NULL, zero, 1u << dim_count, loc); - +- - if (!(coords_load = hlsl_new_var_load(ctx, coords, loc))) - return NULL; - hlsl_block_add_instr(block, &coords_load->node); -- ++ zero = hlsl_block_add_uint_constant(ctx, block, 0, loc); ++ hlsl_block_add_store_index(ctx, block, &coords_deref, NULL, zero, 1u << dim_count, loc); + - return &coords_load->node; + return hlsl_block_add_simple_load(ctx, block, coords, loc); } @@ -14780,7 +17425,27 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 struct hlsl_ir_var *var; if (instr->type != HLSL_IR_INDEX) -@@ -1306,9 +1238,7 @@ static bool lower_index_loads(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, +@@ -1282,7 +1214,6 @@ static bool lower_index_loads(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, + unsigned int dim_count = hlsl_sampler_dim_count(val->data_type->sampler_dim); + struct hlsl_ir_node *coords = index->idx.node; + struct hlsl_resource_load_params params = {0}; +- struct hlsl_ir_node *resource_load; + + VKD3D_ASSERT(coords->data_type->class == HLSL_CLASS_VECTOR); + VKD3D_ASSERT(coords->data_type->e.numeric.type == HLSL_TYPE_UINT); +@@ -1295,10 +1226,7 @@ static bool lower_index_loads(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, + params.resource = val; + params.coords = coords; + params.format = val->data_type->e.resource.format; +- +- if (!(resource_load = hlsl_new_resource_load(ctx, ¶ms, &instr->loc))) +- return false; +- hlsl_block_add_instr(block, resource_load); ++ hlsl_block_add_resource_load(ctx, block, ¶ms, &instr->loc); + return true; + } + +@@ -1306,9 +1234,7 @@ static bool lower_index_loads(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, return false; hlsl_init_simple_deref_from_var(&var_deref, var); @@ -14791,7 +17456,7 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 if (hlsl_index_is_noncontiguous(index)) { -@@ -1326,9 +1256,7 @@ static bool lower_index_loads(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, +@@ -1326,9 +1252,7 @@ static bool lower_index_loads(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, { struct hlsl_ir_node *c; @@ -14802,7 +17467,7 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 if (!(load = hlsl_new_load_index(ctx, &var_deref, c, &instr->loc))) return false; -@@ -1338,20 +1266,14 @@ static bool lower_index_loads(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, +@@ -1338,20 +1262,14 @@ static bool lower_index_loads(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, return false; hlsl_block_add_instr(block, &load->node); @@ -14826,7 +17491,13 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 } return true; } -@@ -1378,9 +1300,7 @@ static bool lower_broadcasts(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, s +@@ -1373,22 +1291,16 @@ static bool lower_broadcasts(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, s + + if (src_type->class <= HLSL_CLASS_VECTOR && dst_type->class <= HLSL_CLASS_VECTOR && src_type->e.numeric.dimx == 1) + { +- struct hlsl_ir_node *new_cast, *swizzle; ++ struct hlsl_ir_node *new_cast; + dst_scalar_type = hlsl_get_scalar_type(ctx, dst_type->e.numeric.type); /* We need to preserve the cast since it might be doing more than just * turning the scalar into a vector. */ @@ -14836,8 +17507,18 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 + new_cast = hlsl_block_add_cast(ctx, block, cast->operands[0].node, dst_scalar_type, &cast->node.loc); if (dst_type->e.numeric.dimx != 1) - { -@@ -1398,7 +1318,7 @@ static bool lower_broadcasts(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, s +- { +- if (!(swizzle = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, X, X, X), +- dst_type->e.numeric.dimx, new_cast, &cast->node.loc))) +- return false; +- hlsl_block_add_instr(block, swizzle); +- } ++ hlsl_block_add_swizzle(ctx, block, HLSL_SWIZZLE(X, X, X, X), ++ dst_type->e.numeric.dimx, new_cast, &cast->node.loc); + + return true; + } +@@ -1398,7 +1310,7 @@ static bool lower_broadcasts(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, s /* Allocate a unique, ordered index to each instruction, which will be used for * copy propagation and computing liveness ranges. @@ -14846,30 +17527,1517 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 static unsigned int index_instructions(struct hlsl_block *block, unsigned int index) { struct hlsl_ir_node *instr; -@@ -2210,7 +2130,10 @@ bool hlsl_copy_propagation_execute(struct hlsl_ctx *ctx, struct hlsl_block *bloc - struct copy_propagation_state state; - bool progress; +@@ -1480,6 +1392,17 @@ static unsigned int index_instructions(struct hlsl_block *block, unsigned int in + * + * we can copy-prop the load (@7) into a constant vector {123, 456}, but we + * cannot easily vectorize the stores @3 and @6. ++ * ++ * Moreover, we implement a transformation that propagates loads with a single ++ * non-constant index in its deref path. Consider a load of the form ++ * var[[a0][a1]...[i]...[an]], where ak are integral constants, and i is an ++ * arbitrary non-constant node. If, for all j, the following holds: ++ * ++ * var[[a0][a1]...[j]...[an]] = x[[c0*j + d0][c1*j + d1]...[cm*j + dm]], ++ * ++ * where ck, dk are constants, then we can replace the load with ++ * x[[c0*i + d0]...[cm*i + dm]]. This pass is implemented by ++ * copy_propagation_replace_with_deref(). + */ -- index_instructions(block, 2); + struct copy_propagation_value +@@ -1704,16 +1627,25 @@ static void copy_propagation_invalidate_variable_from_deref_recurse(struct hlsl_ + + if (path_node->type == HLSL_IR_CONSTANT) + { ++ uint32_t index = hlsl_ir_constant(path_node)->value.u[0].u; ++ ++ /* Don't bother invalidating anything if the index is constant but ++ * out-of-range. ++ * Such indices are illegal in HLSL, but only if the code is not ++ * dead, and we can't always know if code is dead without copy-prop ++ * itself. */ ++ if (index >= hlsl_type_element_count(type)) ++ return; ++ + copy_propagation_invalidate_variable_from_deref_recurse(ctx, var_def, deref, subtype, +- depth + 1, hlsl_ir_constant(path_node)->value.u[0].u * subtype_comp_count, +- writemask, time); ++ depth + 1, comp_start + index * subtype_comp_count, writemask, time); + } + else + { + for (i = 0; i < hlsl_type_element_count(type); ++i) + { + copy_propagation_invalidate_variable_from_deref_recurse(ctx, var_def, deref, subtype, +- depth + 1, i * subtype_comp_count, writemask, time); ++ depth + 1, comp_start + i * subtype_comp_count, writemask, time); + } + } + } +@@ -1837,147 +1769,475 @@ static bool copy_propagation_replace_with_constant_vector(struct hlsl_ctx *ctx, + return true; + } + +-static bool copy_propagation_transform_load(struct hlsl_ctx *ctx, +- struct hlsl_ir_load *load, struct copy_propagation_state *state) ++static bool component_index_from_deref_path_node(struct hlsl_ir_node *path_node, ++ struct hlsl_type *type, unsigned int *index) + { +- struct hlsl_type *type = load->node.data_type; ++ unsigned int idx, i; ++ ++ if (path_node->type != HLSL_IR_CONSTANT) ++ return false; ++ ++ idx = hlsl_ir_constant(path_node)->value.u[0].u; ++ *index = 0; + + switch (type->class) + { +- case HLSL_CLASS_DEPTH_STENCIL_STATE: +- case HLSL_CLASS_SCALAR: + case HLSL_CLASS_VECTOR: +- case HLSL_CLASS_PIXEL_SHADER: +- case HLSL_CLASS_RASTERIZER_STATE: +- case HLSL_CLASS_SAMPLER: +- case HLSL_CLASS_STRING: +- case HLSL_CLASS_TEXTURE: +- case HLSL_CLASS_UAV: +- case HLSL_CLASS_VERTEX_SHADER: +- case HLSL_CLASS_COMPUTE_SHADER: +- case HLSL_CLASS_DOMAIN_SHADER: +- case HLSL_CLASS_HULL_SHADER: +- case HLSL_CLASS_RENDER_TARGET_VIEW: +- case HLSL_CLASS_DEPTH_STENCIL_VIEW: +- case HLSL_CLASS_GEOMETRY_SHADER: +- case HLSL_CLASS_BLEND_STATE: +- case HLSL_CLASS_STREAM_OUTPUT: +- case HLSL_CLASS_NULL: ++ if (idx >= type->e.numeric.dimx) ++ return false; ++ *index = idx; + break; + + case HLSL_CLASS_MATRIX: ++ if (idx >= hlsl_type_major_size(type)) ++ return false; ++ if (hlsl_type_is_row_major(type)) ++ *index = idx * type->e.numeric.dimx; ++ else ++ *index = idx * type->e.numeric.dimy; ++ break; ++ + case HLSL_CLASS_ARRAY: ++ if (idx >= type->e.array.elements_count) ++ return false; ++ *index = idx * hlsl_type_component_count(type->e.array.type); ++ break; ++ + case HLSL_CLASS_STRUCT: +- /* We can't handle complex types here. +- * They should have been already split anyway by earlier passes, +- * but they may not have been deleted yet. We can't rely on DCE to +- * solve that problem for us, since we may be called on a partial +- * block, but DCE deletes dead stores, so it needs to be able to +- * see the whole program. */ +- case HLSL_CLASS_ERROR: +- return false; ++ for (i = 0; i < idx; ++i) ++ *index += hlsl_type_component_count(type->e.record.fields[i].type); ++ break; + +- case HLSL_CLASS_CONSTANT_BUFFER: +- case HLSL_CLASS_EFFECT_GROUP: +- case HLSL_CLASS_PASS: +- case HLSL_CLASS_TECHNIQUE: +- case HLSL_CLASS_VOID: ++ default: + vkd3d_unreachable(); + } + +- if (copy_propagation_replace_with_constant_vector(ctx, state, load, HLSL_SWIZZLE(X, Y, Z, W), &load->node)) +- return true; +- +- if (copy_propagation_replace_with_single_instr(ctx, state, load, HLSL_SWIZZLE(X, Y, Z, W), &load->node)) +- return true; +- +- return false; ++ return true; + } + +-static bool copy_propagation_transform_swizzle(struct hlsl_ctx *ctx, +- struct hlsl_ir_swizzle *swizzle, struct copy_propagation_state *state) ++static bool nonconst_index_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, ++ unsigned int *idx, unsigned int *base, unsigned int *scale, unsigned int *count) + { +- struct hlsl_ir_load *load; +- +- if (swizzle->val.node->type != HLSL_IR_LOAD) +- return false; +- load = hlsl_ir_load(swizzle->val.node); ++ struct hlsl_type *type = deref->var->data_type; ++ bool found = false; ++ unsigned int i; + +- if (copy_propagation_replace_with_constant_vector(ctx, state, load, swizzle->u.vector, &swizzle->node)) +- return true; ++ *base = 0; + +- if (copy_propagation_replace_with_single_instr(ctx, state, load, swizzle->u.vector, &swizzle->node)) +- return true; ++ for (i = 0; i < deref->path_len; ++i) ++ { ++ struct hlsl_ir_node *path_node = deref->path[i].node; ++ struct hlsl_type *next_type; + +- return false; +-} ++ VKD3D_ASSERT(path_node); + +-static bool copy_propagation_transform_object_load(struct hlsl_ctx *ctx, +- struct hlsl_deref *deref, struct copy_propagation_state *state, unsigned int time) +-{ +- struct copy_propagation_value *value; +- struct hlsl_ir_load *load; +- unsigned int start, count; ++ /* We should always have generated a cast to UINT. */ ++ VKD3D_ASSERT(hlsl_is_vec1(path_node->data_type) && path_node->data_type->e.numeric.type == HLSL_TYPE_UINT); + +- if (!hlsl_component_index_range_from_deref(ctx, deref, &start, &count)) +- return false; +- VKD3D_ASSERT(count == 1); ++ next_type = hlsl_get_element_type_from_path_index(ctx, type, path_node); + +- if (!(value = copy_propagation_get_value(state, deref->var, start, time))) +- return false; +- VKD3D_ASSERT(value->component == 0); ++ if (path_node->type != HLSL_IR_CONSTANT) ++ { ++ if (found) ++ return false; ++ found = true; ++ *idx = i; ++ *scale = hlsl_type_component_count(next_type); ++ *count = hlsl_type_element_count(type); ++ } ++ else ++ { ++ unsigned int index; + +- /* Only HLSL_IR_LOAD can produce an object. */ +- load = hlsl_ir_load(value->node); ++ if (!component_index_from_deref_path_node(path_node, type, &index)) ++ return false; ++ *base += index; ++ } + +- /* As we are replacing the instruction's deref (with the one in the hlsl_ir_load) and not the +- * instruction itself, we won't be able to rely on the value retrieved by +- * copy_propagation_get_value() for the new deref in subsequent iterations of copy propagation. +- * This is because another value may be written to that deref between the hlsl_ir_load and +- * this instruction. +- * +- * For this reason, we only replace the new deref when it corresponds to a uniform variable, +- * which cannot be written to. +- * +- * In a valid shader, all object references must resolve statically to a single uniform object. +- * If this is the case, we can expect copy propagation on regular store/loads and the other +- * compilation passes to replace all hlsl_ir_loads with loads to uniform objects, so this +- * implementation is complete, even with this restriction. +- */ +- if (!load->src.var->is_uniform) +- { +- TRACE("Ignoring load from non-uniform object variable %s\n", load->src.var->name); +- return false; ++ type = next_type; + } + +- hlsl_cleanup_deref(deref); +- hlsl_copy_deref(ctx, deref, &load->src); +- +- return true; ++ return found; + } + +-static bool copy_propagation_transform_resource_load(struct hlsl_ctx *ctx, +- struct hlsl_ir_resource_load *load, struct copy_propagation_state *state) ++static struct hlsl_ir_node *new_affine_path_index(struct hlsl_ctx *ctx, const struct vkd3d_shader_location *loc, ++ struct hlsl_block *block, struct hlsl_ir_node *index, int c, int d) + { +- bool progress = false; ++ struct hlsl_ir_node *c_node, *d_node, *ic, *idx; ++ bool use_uint = c >= 0 && d >= 0; + +- progress |= copy_propagation_transform_object_load(ctx, &load->resource, state, load->node.index); +- if (load->sampler.var) +- progress |= copy_propagation_transform_object_load(ctx, &load->sampler, state, load->node.index); +- return progress; +-} ++ if (!c) ++ { ++ VKD3D_ASSERT(d >= 0); + +-static bool copy_propagation_transform_resource_store(struct hlsl_ctx *ctx, +- struct hlsl_ir_resource_store *store, struct copy_propagation_state *state) +-{ +- bool progress = false; ++ return hlsl_block_add_uint_constant(ctx, block, d, loc); ++ } + +- progress |= copy_propagation_transform_object_load(ctx, &store->resource, state, store->node.index); +- return progress; ++ if (use_uint) ++ { ++ c_node = hlsl_block_add_uint_constant(ctx, block, c, loc); ++ d_node = hlsl_block_add_uint_constant(ctx, block, d, loc); ++ } ++ else ++ { ++ c_node = hlsl_block_add_int_constant(ctx, block, c, loc); ++ d_node = hlsl_block_add_int_constant(ctx, block, d, loc); ++ index = hlsl_block_add_cast(ctx, block, index, hlsl_get_scalar_type(ctx, HLSL_TYPE_INT), loc); ++ } ++ ++ ic = hlsl_block_add_binary_expr(ctx, block, HLSL_OP2_MUL, index, c_node); ++ idx = hlsl_block_add_binary_expr(ctx, block, HLSL_OP2_ADD, ic, d_node); ++ if (!use_uint) ++ idx = hlsl_block_add_cast(ctx, block, idx, hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), loc); ++ ++ return idx; + } + +-static bool copy_propagation_transform_interlocked(struct hlsl_ctx *ctx, +- struct hlsl_ir_interlocked *interlocked, struct copy_propagation_state *state) ++static bool copy_propagation_replace_with_deref(struct hlsl_ctx *ctx, ++ const struct copy_propagation_state *state, const struct hlsl_ir_load *load, ++ uint32_t swizzle, struct hlsl_ir_node *instr) ++{ ++ const unsigned int instr_component_count = hlsl_type_component_count(instr->data_type); ++ unsigned int nonconst_i = 0, base, scale, count; ++ struct hlsl_ir_node *index, *new_instr = NULL; ++ const struct hlsl_deref *deref = &load->src; ++ const struct hlsl_ir_var *var = deref->var; ++ unsigned int time = load->node.index; ++ struct hlsl_deref tmp_deref = {0}; ++ struct hlsl_ir_load *new_load; ++ struct hlsl_ir_var *x = NULL; ++ int *c = NULL, *d = NULL; ++ uint32_t ret_swizzle = 0; ++ struct hlsl_block block; ++ unsigned int path_len; ++ bool success = false; ++ int i, j, k; ++ ++ if (!nonconst_index_from_deref(ctx, deref, &nonconst_i, &base, &scale, &count)) ++ return false; ++ ++ VKD3D_ASSERT(count); ++ ++ hlsl_block_init(&block); ++ ++ index = deref->path[nonconst_i].node; ++ ++ /* Iterate over the nonconst index, and check if their values all have the form ++ * x[[c0*i + d0][c1*i + d1]...[cm*i + dm]], and determine the constants c, d. */ ++ for (i = 0; i < count; ++i) ++ { ++ unsigned int start = base + scale * i; ++ struct copy_propagation_value *value; ++ struct hlsl_ir_load *idx; ++ uint32_t cur_swizzle = 0; ++ ++ if (!(value = copy_propagation_get_value(state, var, ++ start + hlsl_swizzle_get_component(swizzle, 0), time))) ++ goto done; ++ ++ if (value->node->type != HLSL_IR_LOAD) ++ goto done; ++ idx = hlsl_ir_load(value->node); ++ ++ if (!x) ++ x = idx->src.var; ++ else if (x != idx->src.var) ++ goto done; ++ ++ if (hlsl_version_lt(ctx, 4, 0) && x->is_uniform && ctx->profile->type != VKD3D_SHADER_TYPE_VERTEX) ++ { ++ TRACE("Skipping propagating non-constant deref to SM1 uniform %s.\n", var->name); ++ goto done; ++ } ++ ++ if (i == 0) ++ { ++ path_len = idx->src.path_len; ++ ++ if (path_len) ++ { ++ if (!(c = hlsl_calloc(ctx, path_len, sizeof(c[0]))) ++ || !(d = hlsl_alloc(ctx, path_len * sizeof(d[0])))) ++ goto done; ++ } ++ ++ for (k = 0; k < path_len; ++k) ++ { ++ if (idx->src.path[k].node->type != HLSL_IR_CONSTANT) ++ goto done; ++ d[k] = hlsl_ir_constant(idx->src.path[k].node)->value.u[0].u; ++ } ++ ++ } ++ else if (i == 1) ++ { ++ struct hlsl_type *type = idx->src.var->data_type; ++ ++ if (idx->src.path_len != path_len) ++ goto done; ++ ++ /* Calculate constants c and d based on the first two path indices. */ ++ for (k = 0; k < path_len; ++k) ++ { ++ int ix; ++ ++ if (idx->src.path[k].node->type != HLSL_IR_CONSTANT) ++ goto done; ++ ix = hlsl_ir_constant(idx->src.path[k].node)->value.u[0].u; ++ c[k] = ix - d[k]; ++ d[k] = ix - c[k] * i; ++ ++ if (c[k] && type->class == HLSL_CLASS_STRUCT) ++ goto done; ++ ++ type = hlsl_get_element_type_from_path_index(ctx, type, idx->src.path[k].node); ++ } ++ } ++ else ++ { ++ if (idx->src.path_len != path_len) ++ goto done; ++ ++ /* Check that this load has the form x[[c0*i +d0][c1*i + d1]...[cm*i + dm]]. */ ++ for (k = 0; k < path_len; ++k) ++ { ++ if (idx->src.path[k].node->type != HLSL_IR_CONSTANT) ++ goto done; ++ if (hlsl_ir_constant(idx->src.path[k].node)->value.u[0].u != c[k] * i + d[k]) ++ goto done; ++ } ++ } ++ ++ hlsl_swizzle_set_component(&cur_swizzle, 0, value->component); ++ ++ for (j = 1; j < instr_component_count; ++j) ++ { ++ struct copy_propagation_value *val; ++ ++ if (!(val = copy_propagation_get_value(state, var, ++ start + hlsl_swizzle_get_component(swizzle, j), time))) ++ goto done; ++ if (val->node != &idx->node) ++ goto done; ++ ++ hlsl_swizzle_set_component(&cur_swizzle, j, val->component); ++ } ++ ++ if (i == 0) ++ ret_swizzle = cur_swizzle; ++ else if (ret_swizzle != cur_swizzle) ++ goto done; ++ } ++ ++ if (!hlsl_init_deref(ctx, &tmp_deref, x, path_len)) ++ goto done; ++ ++ for (k = 0; k < path_len; ++k) ++ { ++ hlsl_src_from_node(&tmp_deref.path[k], ++ new_affine_path_index(ctx, &load->node.loc, &block, index, c[k], d[k])); ++ } ++ ++ if (!(new_load = hlsl_new_load_index(ctx, &tmp_deref, NULL, &load->node.loc))) ++ goto done; ++ new_instr = &new_load->node; ++ hlsl_block_add_instr(&block, new_instr); ++ ++ if (new_instr->data_type->class == HLSL_CLASS_SCALAR || new_instr->data_type->class == HLSL_CLASS_VECTOR) ++ new_instr = hlsl_block_add_swizzle(ctx, &block, ret_swizzle, instr_component_count, new_instr, &instr->loc); ++ ++ if (TRACE_ON()) ++ { ++ struct vkd3d_string_buffer buffer; ++ ++ vkd3d_string_buffer_init(&buffer); ++ ++ vkd3d_string_buffer_printf(&buffer, "Load from %s[", var->name); ++ for (j = 0; j < deref->path_len; ++j) ++ { ++ if (j == nonconst_i) ++ vkd3d_string_buffer_printf(&buffer, "[i]"); ++ else ++ vkd3d_string_buffer_printf(&buffer, "[%u]", hlsl_ir_constant(deref->path[j].node)->value.u[0].u); ++ } ++ vkd3d_string_buffer_printf(&buffer, "]%s propagated as %s[", ++ debug_hlsl_swizzle(swizzle, instr_component_count), tmp_deref.var->name); ++ for (k = 0; k < path_len; ++k) ++ { ++ if (c[k]) ++ vkd3d_string_buffer_printf(&buffer, "[i*%d + %d]", c[k], d[k]); ++ else ++ vkd3d_string_buffer_printf(&buffer, "[%d]", d[k]); ++ } ++ vkd3d_string_buffer_printf(&buffer, "]%s (i = %p).\n", ++ debug_hlsl_swizzle(ret_swizzle, instr_component_count), index); ++ ++ vkd3d_string_buffer_trace(&buffer); ++ vkd3d_string_buffer_cleanup(&buffer); ++ } ++ ++ list_move_before(&instr->entry, &block.instrs); ++ hlsl_replace_node(instr, new_instr); ++ success = true; ++ ++done: ++ hlsl_cleanup_deref(&tmp_deref); ++ hlsl_block_cleanup(&block); ++ vkd3d_free(c); ++ vkd3d_free(d); ++ return success; ++} ++ ++static bool copy_propagation_transform_load(struct hlsl_ctx *ctx, ++ struct hlsl_ir_load *load, struct copy_propagation_state *state) ++{ ++ struct hlsl_type *type = load->node.data_type; ++ ++ switch (type->class) ++ { ++ case HLSL_CLASS_DEPTH_STENCIL_STATE: ++ case HLSL_CLASS_SCALAR: ++ case HLSL_CLASS_VECTOR: ++ case HLSL_CLASS_PIXEL_SHADER: ++ case HLSL_CLASS_RASTERIZER_STATE: ++ case HLSL_CLASS_SAMPLER: ++ case HLSL_CLASS_STRING: ++ case HLSL_CLASS_TEXTURE: ++ case HLSL_CLASS_UAV: ++ case HLSL_CLASS_VERTEX_SHADER: ++ case HLSL_CLASS_COMPUTE_SHADER: ++ case HLSL_CLASS_DOMAIN_SHADER: ++ case HLSL_CLASS_HULL_SHADER: ++ case HLSL_CLASS_RENDER_TARGET_VIEW: ++ case HLSL_CLASS_DEPTH_STENCIL_VIEW: ++ case HLSL_CLASS_GEOMETRY_SHADER: ++ case HLSL_CLASS_BLEND_STATE: ++ case HLSL_CLASS_STREAM_OUTPUT: ++ case HLSL_CLASS_NULL: ++ break; ++ ++ case HLSL_CLASS_MATRIX: ++ case HLSL_CLASS_ARRAY: ++ case HLSL_CLASS_STRUCT: ++ /* We can't handle complex types here. ++ * They should have been already split anyway by earlier passes, ++ * but they may not have been deleted yet. We can't rely on DCE to ++ * solve that problem for us, since we may be called on a partial ++ * block, but DCE deletes dead stores, so it needs to be able to ++ * see the whole program. */ ++ case HLSL_CLASS_ERROR: ++ return false; ++ ++ case HLSL_CLASS_CONSTANT_BUFFER: ++ case HLSL_CLASS_EFFECT_GROUP: ++ case HLSL_CLASS_PASS: ++ case HLSL_CLASS_TECHNIQUE: ++ case HLSL_CLASS_VOID: ++ vkd3d_unreachable(); ++ } ++ ++ if (copy_propagation_replace_with_constant_vector(ctx, state, load, HLSL_SWIZZLE(X, Y, Z, W), &load->node)) ++ return true; ++ ++ if (copy_propagation_replace_with_single_instr(ctx, state, load, HLSL_SWIZZLE(X, Y, Z, W), &load->node)) ++ return true; ++ ++ if (copy_propagation_replace_with_deref(ctx, state, load, HLSL_SWIZZLE(X, Y, Z, W), &load->node)) ++ return true; ++ ++ return false; ++} ++ ++static bool copy_propagation_transform_swizzle(struct hlsl_ctx *ctx, ++ struct hlsl_ir_swizzle *swizzle, struct copy_propagation_state *state) ++{ ++ struct hlsl_ir_load *load; ++ ++ if (swizzle->val.node->type != HLSL_IR_LOAD) ++ return false; ++ load = hlsl_ir_load(swizzle->val.node); ++ ++ if (copy_propagation_replace_with_constant_vector(ctx, state, load, swizzle->u.vector, &swizzle->node)) ++ return true; ++ ++ if (copy_propagation_replace_with_single_instr(ctx, state, load, swizzle->u.vector, &swizzle->node)) ++ return true; ++ ++ if (copy_propagation_replace_with_deref(ctx, state, load, swizzle->u.vector, &swizzle->node)) ++ return true; ++ ++ return false; ++} ++ ++static bool copy_propagation_transform_object_load(struct hlsl_ctx *ctx, ++ struct hlsl_deref *deref, struct copy_propagation_state *state, unsigned int time) ++{ ++ struct copy_propagation_value *value; ++ struct hlsl_ir_load *load; ++ unsigned int start, count; ++ ++ if (!hlsl_component_index_range_from_deref(ctx, deref, &start, &count)) ++ return false; ++ VKD3D_ASSERT(count == 1); ++ ++ if (!(value = copy_propagation_get_value(state, deref->var, start, time))) ++ return false; ++ VKD3D_ASSERT(value->component == 0); ++ ++ /* A uniform object should have never been written to. */ ++ VKD3D_ASSERT(!deref->var->is_uniform); ++ ++ /* Only HLSL_IR_LOAD can produce an object. */ ++ load = hlsl_ir_load(value->node); ++ ++ /* As we are replacing the instruction's deref (with the one in the hlsl_ir_load) and not the ++ * instruction itself, we won't be able to rely on the value retrieved by ++ * copy_propagation_get_value() for the new deref in subsequent iterations of copy propagation. ++ * This is because another value may be written to that deref between the hlsl_ir_load and ++ * this instruction. ++ * ++ * For this reason, we only replace the new deref when it corresponds to a uniform variable, ++ * which cannot be written to. ++ * ++ * In a valid shader, all object references must resolve statically to a single uniform object. ++ * If this is the case, we can expect copy propagation on regular store/loads and the other ++ * compilation passes to replace all hlsl_ir_loads with loads to uniform objects, so this ++ * implementation is complete, even with this restriction. ++ */ ++ if (!load->src.var->is_uniform) ++ { ++ TRACE("Ignoring load from non-uniform object variable %s\n", load->src.var->name); ++ return false; ++ } ++ ++ hlsl_cleanup_deref(deref); ++ hlsl_copy_deref(ctx, deref, &load->src); ++ ++ return true; ++} ++ ++static bool copy_propagation_transform_resource_load(struct hlsl_ctx *ctx, ++ struct hlsl_ir_resource_load *load, struct copy_propagation_state *state) ++{ ++ bool progress = false; ++ ++ progress |= copy_propagation_transform_object_load(ctx, &load->resource, state, load->node.index); ++ if (load->sampler.var) ++ progress |= copy_propagation_transform_object_load(ctx, &load->sampler, state, load->node.index); ++ return progress; ++} ++ ++static bool copy_propagation_transform_resource_store(struct hlsl_ctx *ctx, ++ struct hlsl_ir_resource_store *store, struct copy_propagation_state *state) ++{ ++ bool progress = false; ++ ++ progress |= copy_propagation_transform_object_load(ctx, &store->resource, state, store->node.index); ++ return progress; ++} ++ ++static bool copy_propagation_transform_interlocked(struct hlsl_ctx *ctx, ++ struct hlsl_ir_interlocked *interlocked, struct copy_propagation_state *state) + { + bool progress = false; + +@@ -2067,167 +2327,718 @@ static void copy_propagation_invalidate_from_block(struct hlsl_ctx *ctx, struct + break; + } + +- default: +- break; ++ default: ++ break; ++ } ++ } ++} ++ ++static bool copy_propagation_transform_block(struct hlsl_ctx *ctx, struct hlsl_block *block, ++ struct copy_propagation_state *state); ++ ++static bool copy_propagation_process_if(struct hlsl_ctx *ctx, struct hlsl_ir_if *iff, ++ struct copy_propagation_state *state) ++{ ++ bool progress = false; ++ ++ copy_propagation_push_scope(state, ctx); ++ progress |= copy_propagation_transform_block(ctx, &iff->then_block, state); ++ if (state->stopped) ++ return progress; ++ copy_propagation_pop_scope(state); ++ ++ copy_propagation_push_scope(state, ctx); ++ progress |= copy_propagation_transform_block(ctx, &iff->else_block, state); ++ if (state->stopped) ++ return progress; ++ copy_propagation_pop_scope(state); ++ ++ /* Ideally we'd invalidate the outer state looking at what was ++ * touched in the two inner states, but this doesn't work for ++ * loops (because we need to know what is invalidated in advance), ++ * so we need copy_propagation_invalidate_from_block() anyway. */ ++ copy_propagation_invalidate_from_block(ctx, state, &iff->then_block, iff->node.index); ++ copy_propagation_invalidate_from_block(ctx, state, &iff->else_block, iff->node.index); ++ ++ return progress; ++} ++ ++static bool copy_propagation_process_loop(struct hlsl_ctx *ctx, struct hlsl_ir_loop *loop, ++ struct copy_propagation_state *state) ++{ ++ bool progress = false; ++ ++ copy_propagation_invalidate_from_block(ctx, state, &loop->body, loop->node.index); ++ copy_propagation_invalidate_from_block(ctx, state, &loop->iter, loop->node.index); ++ ++ copy_propagation_push_scope(state, ctx); ++ progress |= copy_propagation_transform_block(ctx, &loop->body, state); ++ if (state->stopped) ++ return progress; ++ copy_propagation_pop_scope(state); ++ ++ return progress; ++} ++ ++static bool copy_propagation_process_switch(struct hlsl_ctx *ctx, struct hlsl_ir_switch *s, ++ struct copy_propagation_state *state) ++{ ++ struct hlsl_ir_switch_case *c; ++ bool progress = false; ++ ++ LIST_FOR_EACH_ENTRY(c, &s->cases, struct hlsl_ir_switch_case, entry) ++ { ++ copy_propagation_push_scope(state, ctx); ++ progress |= copy_propagation_transform_block(ctx, &c->body, state); ++ if (state->stopped) ++ return progress; ++ copy_propagation_pop_scope(state); ++ } ++ ++ LIST_FOR_EACH_ENTRY(c, &s->cases, struct hlsl_ir_switch_case, entry) ++ { ++ copy_propagation_invalidate_from_block(ctx, state, &c->body, s->node.index); ++ } ++ ++ return progress; ++} ++ ++static bool copy_propagation_transform_block(struct hlsl_ctx *ctx, struct hlsl_block *block, ++ struct copy_propagation_state *state) ++{ ++ struct hlsl_ir_node *instr, *next; ++ bool progress = false; ++ ++ LIST_FOR_EACH_ENTRY_SAFE(instr, next, &block->instrs, struct hlsl_ir_node, entry) ++ { ++ if (instr == state->stop) ++ { ++ state->stopped = true; ++ return progress; ++ } ++ ++ switch (instr->type) ++ { ++ case HLSL_IR_LOAD: ++ progress |= copy_propagation_transform_load(ctx, hlsl_ir_load(instr), state); ++ break; ++ ++ case HLSL_IR_RESOURCE_LOAD: ++ progress |= copy_propagation_transform_resource_load(ctx, hlsl_ir_resource_load(instr), state); ++ break; ++ ++ case HLSL_IR_RESOURCE_STORE: ++ progress |= copy_propagation_transform_resource_store(ctx, hlsl_ir_resource_store(instr), state); ++ break; ++ ++ case HLSL_IR_STORE: ++ copy_propagation_record_store(ctx, hlsl_ir_store(instr), state); ++ break; ++ ++ case HLSL_IR_SWIZZLE: ++ progress |= copy_propagation_transform_swizzle(ctx, hlsl_ir_swizzle(instr), state); ++ break; ++ ++ case HLSL_IR_IF: ++ progress |= copy_propagation_process_if(ctx, hlsl_ir_if(instr), state); ++ break; ++ ++ case HLSL_IR_LOOP: ++ progress |= copy_propagation_process_loop(ctx, hlsl_ir_loop(instr), state); ++ break; ++ ++ case HLSL_IR_SWITCH: ++ progress |= copy_propagation_process_switch(ctx, hlsl_ir_switch(instr), state); ++ break; ++ ++ case HLSL_IR_INTERLOCKED: ++ progress |= copy_propagation_transform_interlocked(ctx, hlsl_ir_interlocked(instr), state); ++ ++ default: ++ break; ++ } ++ ++ if (state->stopped) ++ return progress; ++ } ++ ++ return progress; ++} ++ ++bool hlsl_copy_propagation_execute(struct hlsl_ctx *ctx, struct hlsl_block *block) ++{ ++ struct copy_propagation_state state; ++ bool progress; ++ + if (ctx->result) + return false; + + index_instructions(block, 1); ++ ++ copy_propagation_state_init(&state, ctx); ++ ++ progress = copy_propagation_transform_block(ctx, block, &state); ++ ++ copy_propagation_state_destroy(&state); ++ ++ return progress; ++} ++ ++enum validation_result ++{ ++ DEREF_VALIDATION_OK, ++ DEREF_VALIDATION_OUT_OF_BOUNDS, ++ DEREF_VALIDATION_NOT_CONSTANT, ++}; ++ ++struct vectorize_exprs_state ++{ ++ struct vectorizable_exprs_group ++ { ++ struct hlsl_block *block; ++ struct hlsl_ir_expr *exprs[4]; ++ uint8_t expr_count, component_count; ++ } *groups; ++ size_t count, capacity; ++}; ++ ++static bool is_same_vectorizable_source(struct hlsl_ir_node *a, struct hlsl_ir_node *b) ++{ ++ /* TODO: We can also vectorize different constants. */ ++ ++ if (a->type == HLSL_IR_SWIZZLE) ++ a = hlsl_ir_swizzle(a)->val.node; ++ if (b->type == HLSL_IR_SWIZZLE) ++ b = hlsl_ir_swizzle(b)->val.node; ++ ++ return a == b; ++} ++ ++static bool is_same_vectorizable_expr(struct hlsl_ir_expr *a, struct hlsl_ir_expr *b) ++{ ++ if (a->op != b->op) ++ return false; ++ ++ for (size_t j = 0; j < HLSL_MAX_OPERANDS; ++j) ++ { ++ if (!a->operands[j].node) ++ break; ++ if (!is_same_vectorizable_source(a->operands[j].node, b->operands[j].node)) ++ return false; ++ } ++ ++ return true; ++} ++ ++static void record_vectorizable_expr(struct hlsl_ctx *ctx, struct hlsl_block *block, ++ struct hlsl_ir_expr *expr, struct vectorize_exprs_state *state) ++{ ++ if (expr->node.data_type->class > HLSL_CLASS_VECTOR) ++ return; ++ ++ /* These are the only current ops that are not per-component. */ ++ if (expr->op == HLSL_OP1_COS_REDUCED || expr->op == HLSL_OP1_SIN_REDUCED ++ || expr->op == HLSL_OP2_DOT || expr->op == HLSL_OP3_DP2ADD) ++ return; ++ ++ for (size_t i = 0; i < state->count; ++i) ++ { ++ struct vectorizable_exprs_group *group = &state->groups[i]; ++ struct hlsl_ir_expr *other = group->exprs[0]; ++ ++ /* These are SSA instructions, which means they have the same value ++ * regardless of what block they're in. However, being in different ++ * blocks may mean that one expression or the other is not always ++ * executed. */ ++ ++ if (expr->node.data_type->e.numeric.dimx + group->component_count <= 4 ++ && group->block == block ++ && is_same_vectorizable_expr(expr, other)) ++ { ++ group->exprs[group->expr_count++] = expr; ++ group->component_count += expr->node.data_type->e.numeric.dimx; ++ return; ++ } ++ } ++ ++ if (!hlsl_array_reserve(ctx, (void **)&state->groups, ++ &state->capacity, state->count + 1, sizeof(*state->groups))) ++ return; ++ state->groups[state->count].block = block; ++ state->groups[state->count].exprs[0] = expr; ++ state->groups[state->count].expr_count = 1; ++ state->groups[state->count].component_count = expr->node.data_type->e.numeric.dimx; ++ ++state->count; ++} ++ ++static void find_vectorizable_expr_groups(struct hlsl_ctx *ctx, struct hlsl_block *block, ++ struct vectorize_exprs_state *state) ++{ ++ struct hlsl_ir_node *instr; ++ ++ LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry) ++ { ++ if (instr->type == HLSL_IR_EXPR) ++ { ++ record_vectorizable_expr(ctx, block, hlsl_ir_expr(instr), state); ++ } ++ else if (instr->type == HLSL_IR_IF) ++ { ++ struct hlsl_ir_if *iff = hlsl_ir_if(instr); ++ ++ find_vectorizable_expr_groups(ctx, &iff->then_block, state); ++ find_vectorizable_expr_groups(ctx, &iff->else_block, state); ++ } ++ else if (instr->type == HLSL_IR_LOOP) ++ { ++ find_vectorizable_expr_groups(ctx, &hlsl_ir_loop(instr)->body, state); ++ } ++ else if (instr->type == HLSL_IR_SWITCH) ++ { ++ struct hlsl_ir_switch *s = hlsl_ir_switch(instr); ++ struct hlsl_ir_switch_case *c; ++ ++ LIST_FOR_EACH_ENTRY(c, &s->cases, struct hlsl_ir_switch_case, entry) ++ find_vectorizable_expr_groups(ctx, &c->body, state); ++ } ++ } ++} ++ ++/* Combine sequences like ++ * ++ * 3: @1.x ++ * 4: @2.x ++ * 5: @3 * @4 ++ * 6: @1.y ++ * 7: @2.x ++ * 8: @6 * @7 ++ * ++ * into ++ * ++ * 5_1: @1.xy ++ * 5_2: @2.xx ++ * 5_3: @5_1 * @5_2 ++ * 5: @5_3.x ++ * 8: @5_3.y ++ * ++ * Each operand to an expression needs to refer to the same ultimate source ++ * (in this case @1 and @2 respectively), but can be a swizzle thereof. ++ * ++ * In practice the swizzles @5 and @8 can generally then be vectorized again, ++ * either as part of another expression, or as part of a store. ++ */ ++static bool vectorize_exprs(struct hlsl_ctx *ctx, struct hlsl_block *block) ++{ ++ struct vectorize_exprs_state state = {0}; ++ bool progress = false; ++ ++ find_vectorizable_expr_groups(ctx, block, &state); ++ ++ for (unsigned int i = 0; i < state.count; ++i) ++ { ++ struct vectorizable_exprs_group *group = &state.groups[i]; ++ struct hlsl_ir_node *args[HLSL_MAX_OPERANDS] = {0}; ++ uint32_t swizzles[HLSL_MAX_OPERANDS] = {0}; ++ struct hlsl_ir_node *arg, *combined; ++ unsigned int component_count = 0; ++ struct hlsl_type *combined_type; ++ struct hlsl_block new_block; ++ struct hlsl_ir_expr *expr; ++ ++ if (group->expr_count == 1) ++ continue; ++ ++ hlsl_block_init(&new_block); ++ ++ for (unsigned int j = 0; j < group->expr_count; ++j) ++ { ++ expr = group->exprs[j]; ++ ++ for (unsigned int a = 0; a < HLSL_MAX_OPERANDS; ++a) ++ { ++ uint32_t arg_swizzle; ++ ++ if (!(arg = expr->operands[a].node)) ++ break; ++ ++ if (arg->type == HLSL_IR_SWIZZLE) ++ arg_swizzle = hlsl_ir_swizzle(arg)->u.vector; ++ else ++ arg_swizzle = HLSL_SWIZZLE(X, Y, Z, W); ++ ++ /* Mask out the invalid components. */ ++ arg_swizzle &= (1u << VKD3D_SHADER_SWIZZLE_SHIFT(arg->data_type->e.numeric.dimx)) - 1; ++ swizzles[a] |= arg_swizzle << VKD3D_SHADER_SWIZZLE_SHIFT(component_count); ++ } ++ ++ component_count += expr->node.data_type->e.numeric.dimx; ++ } ++ ++ expr = group->exprs[0]; ++ for (unsigned int a = 0; a < HLSL_MAX_OPERANDS; ++a) ++ { ++ if (!(arg = expr->operands[a].node)) ++ break; ++ if (arg->type == HLSL_IR_SWIZZLE) ++ arg = hlsl_ir_swizzle(arg)->val.node; ++ args[a] = hlsl_block_add_swizzle(ctx, &new_block, swizzles[a], component_count, arg, &arg->loc); ++ } ++ ++ combined_type = hlsl_get_vector_type(ctx, expr->node.data_type->e.numeric.type, component_count); ++ combined = hlsl_block_add_expr(ctx, &new_block, expr->op, args, combined_type, &expr->node.loc); ++ ++ list_move_before(&expr->node.entry, &new_block.instrs); ++ ++ TRACE("Combining %u %s instructions into %p.\n", group->expr_count, ++ debug_hlsl_expr_op(group->exprs[0]->op), combined); ++ ++ component_count = 0; ++ for (unsigned int j = 0; j < group->expr_count; ++j) ++ { ++ struct hlsl_ir_node *replacement; ++ ++ expr = group->exprs[j]; ++ ++ if (!(replacement = hlsl_new_swizzle(ctx, ++ HLSL_SWIZZLE(X, Y, Z, W) >> VKD3D_SHADER_SWIZZLE_SHIFT(component_count), ++ expr->node.data_type->e.numeric.dimx, combined, &expr->node.loc))) ++ goto out; ++ component_count += expr->node.data_type->e.numeric.dimx; ++ list_add_before(&expr->node.entry, &replacement->entry); ++ hlsl_replace_node(&expr->node, replacement); + } ++ ++ progress = true; + } ++ ++out: ++ vkd3d_free(state.groups); ++ return progress; + } - copy_propagation_state_init(&state, ctx); +-static bool copy_propagation_transform_block(struct hlsl_ctx *ctx, struct hlsl_block *block, +- struct copy_propagation_state *state); ++struct vectorize_stores_state ++{ ++ struct vectorizable_stores_group ++ { ++ struct hlsl_block *block; ++ /* We handle overlapping stores, because it's not really easier not to. ++ * In theory, then, we could collect an arbitrary number of stores here. ++ * ++ * In practice, overlapping stores are unlikely, and of course at most ++ * 4 stores can appear without overlap. Therefore, for simplicity, we ++ * just use a fixed array of 4. ++ * ++ * Since computing the writemask requires traversing the deref, and we ++ * need to do that anyway, we store it here for convenience. */ ++ struct hlsl_ir_store *stores[4]; ++ unsigned int path_len; ++ uint8_t writemasks[4]; ++ uint8_t store_count; ++ bool dirty; ++ } *groups; ++ size_t count, capacity; ++}; + +-static bool copy_propagation_process_if(struct hlsl_ctx *ctx, struct hlsl_ir_if *iff, +- struct copy_propagation_state *state) ++/* This must be a store to a subsection of a vector. ++ * In theory we can also vectorize stores to packed struct fields, ++ * but this requires target-specific knowledge and is probably best left ++ * to a VSIR pass. */ ++static bool can_vectorize_store(struct hlsl_ctx *ctx, struct hlsl_ir_store *store, ++ unsigned int *path_len, uint8_t *writemask) + { +- bool progress = false; ++ struct hlsl_type *type = store->lhs.var->data_type; ++ unsigned int i; + +- copy_propagation_push_scope(state, ctx); +- progress |= copy_propagation_transform_block(ctx, &iff->then_block, state); +- if (state->stopped) +- return progress; +- copy_propagation_pop_scope(state); ++ if (store->rhs.node->data_type->class > HLSL_CLASS_VECTOR) ++ return false; + +- copy_propagation_push_scope(state, ctx); +- progress |= copy_propagation_transform_block(ctx, &iff->else_block, state); +- if (state->stopped) +- return progress; +- copy_propagation_pop_scope(state); ++ if (type->class == HLSL_CLASS_SCALAR) ++ return false; + +- /* Ideally we'd invalidate the outer state looking at what was +- * touched in the two inner states, but this doesn't work for +- * loops (because we need to know what is invalidated in advance), +- * so we need copy_propagation_invalidate_from_block() anyway. */ +- copy_propagation_invalidate_from_block(ctx, state, &iff->then_block, iff->node.index); +- copy_propagation_invalidate_from_block(ctx, state, &iff->else_block, iff->node.index); ++ for (i = 0; type->class != HLSL_CLASS_VECTOR && i < store->lhs.path_len; ++i) ++ type = hlsl_get_element_type_from_path_index(ctx, type, store->lhs.path[i].node); + +- return progress; ++ if (type->class != HLSL_CLASS_VECTOR) ++ return false; ++ ++ *path_len = i; ++ ++ if (i < store->lhs.path_len) ++ { ++ struct hlsl_ir_constant *c; ++ ++ /* This is a store to a scalar component of a vector, achieved via ++ * indexing. */ ++ ++ if (store->lhs.path[i].node->type != HLSL_IR_CONSTANT) ++ return false; ++ c = hlsl_ir_constant(store->lhs.path[i].node); ++ *writemask = (1u << c->value.u[0].u); ++ } ++ else ++ { ++ *writemask = store->writemask; ++ } ++ ++ return true; + } + +-static bool copy_propagation_process_loop(struct hlsl_ctx *ctx, struct hlsl_ir_loop *loop, +- struct copy_propagation_state *state) ++static bool derefs_are_same_vector(struct hlsl_ctx *ctx, const struct hlsl_deref *a, const struct hlsl_deref *b) + { +- bool progress = false; ++ struct hlsl_type *type = a->var->data_type; + +- copy_propagation_invalidate_from_block(ctx, state, &loop->body, loop->node.index); +- copy_propagation_invalidate_from_block(ctx, state, &loop->iter, loop->node.index); ++ if (a->var != b->var) ++ return false; + +- copy_propagation_push_scope(state, ctx); +- progress |= copy_propagation_transform_block(ctx, &loop->body, state); +- if (state->stopped) +- return progress; +- copy_propagation_pop_scope(state); ++ for (unsigned int i = 0; type->class != HLSL_CLASS_VECTOR && i < a->path_len && i < b->path_len; ++i) ++ { ++ if (a->path[i].node != b->path[i].node) ++ return false; ++ type = hlsl_get_element_type_from_path_index(ctx, type, a->path[i].node); ++ } + +- return progress; ++ return true; + } + +-static bool copy_propagation_process_switch(struct hlsl_ctx *ctx, struct hlsl_ir_switch *s, +- struct copy_propagation_state *state) ++static void record_vectorizable_store(struct hlsl_ctx *ctx, struct hlsl_block *block, ++ struct hlsl_ir_store *store, struct vectorize_stores_state *state) + { +- struct hlsl_ir_switch_case *c; +- bool progress = false; ++ unsigned int path_len; ++ uint8_t writemask; + +- LIST_FOR_EACH_ENTRY(c, &s->cases, struct hlsl_ir_switch_case, entry) ++ if (!can_vectorize_store(ctx, store, &path_len, &writemask)) + { +- copy_propagation_push_scope(state, ctx); +- progress |= copy_propagation_transform_block(ctx, &c->body, state); +- if (state->stopped) +- return progress; +- copy_propagation_pop_scope(state); ++ /* In the case of a dynamically indexed vector, we must invalidate ++ * any groups that statically index the same vector. ++ * For the sake of expediency, we go one step further and invalidate ++ * any groups that store to the same variable. ++ * (We also don't check that that was the reason why this store isn't ++ * vectorizable.) ++ * We could be more granular, but we'll defer that until it comes ++ * up in practice. */ ++ for (size_t i = 0; i < state->count; ++i) ++ { ++ if (state->groups[i].stores[0]->lhs.var == store->lhs.var) ++ state->groups[i].dirty = true; ++ } ++ return; + } + +- LIST_FOR_EACH_ENTRY(c, &s->cases, struct hlsl_ir_switch_case, entry) ++ for (size_t i = 0; i < state->count; ++i) + { +- copy_propagation_invalidate_from_block(ctx, state, &c->body, s->node.index); ++ struct vectorizable_stores_group *group = &state->groups[i]; ++ struct hlsl_ir_store *other = group->stores[0]; ++ ++ if (group->dirty) ++ continue; ++ ++ if (derefs_are_same_vector(ctx, &store->lhs, &other->lhs)) ++ { ++ /* Stores must be in the same CFG block. If they're not, ++ * they're not executed in exactly the same flow, and ++ * therefore can't be vectorized. */ ++ if (group->block == block ++ && is_same_vectorizable_source(store->rhs.node, other->rhs.node)) ++ { ++ if (group->store_count < ARRAY_SIZE(group->stores)) ++ { ++ group->stores[group->store_count] = store; ++ group->writemasks[group->store_count] = writemask; ++ ++group->store_count; ++ return; ++ } ++ } ++ else ++ { ++ /* A store to the same vector with a different source, or in ++ * a different CFG block, invalidates any earlier store. ++ * ++ * A store to a component which *contains* the vector in ++ * question would also invalidate, but we should have split all ++ * of those by the time we get here. */ ++ group->dirty = true; ++ ++ /* Note that we do exit this loop early if we find a store A we ++ * can vectorize with, but that's fine. If there was a store B ++ * also in the state that we can't vectorize with, it would ++ * already have invalidated A. */ ++ } ++ } ++ else ++ { ++ /* This could still be a store to the same vector, if e.g. the ++ * vector is part of a dynamically indexed array, or the path has ++ * two equivalent instructions which refer to the same component. ++ * [CSE may help with the latter, but we don't have it yet, ++ * and we shouldn't depend on it anyway.] ++ * For the sake of expediency, we just invalidate it if it refers ++ * to the same variable at all. ++ * As above, we could be more granular, but we'll defer that until ++ * it comes up in practice. */ ++ if (store->lhs.var == other->lhs.var) ++ group->dirty = true; ++ ++ /* As above, we don't need to worry about exiting the loop early. */ ++ } + } + +- return progress; ++ if (!hlsl_array_reserve(ctx, (void **)&state->groups, ++ &state->capacity, state->count + 1, sizeof(*state->groups))) ++ return; ++ state->groups[state->count].block = block; ++ state->groups[state->count].stores[0] = store; ++ state->groups[state->count].path_len = path_len; ++ state->groups[state->count].writemasks[0] = writemask; ++ state->groups[state->count].store_count = 1; ++ state->groups[state->count].dirty = false; ++ ++state->count; + } + +-static bool copy_propagation_transform_block(struct hlsl_ctx *ctx, struct hlsl_block *block, +- struct copy_propagation_state *state) ++static void find_vectorizable_store_groups(struct hlsl_ctx *ctx, struct hlsl_block *block, ++ struct vectorize_stores_state *state) + { +- struct hlsl_ir_node *instr, *next; +- bool progress = false; ++ struct hlsl_ir_node *instr; + +- LIST_FOR_EACH_ENTRY_SAFE(instr, next, &block->instrs, struct hlsl_ir_node, entry) ++ LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry) + { +- if (instr == state->stop) ++ if (instr->type == HLSL_IR_STORE) + { +- state->stopped = true; +- return progress; ++ record_vectorizable_store(ctx, block, hlsl_ir_store(instr), state); + } ++ else if (instr->type == HLSL_IR_LOAD) ++ { ++ struct hlsl_ir_var *var = hlsl_ir_load(instr)->src.var; + +- switch (instr->type) ++ /* By vectorizing store A with store B, we are effectively moving ++ * store A down to happen at the same time as store B. ++ * If there was a load of the same variable between the two, this ++ * would be incorrect. ++ * Therefore invalidate all stores to this variable. As above, we ++ * could be more granular if necessary. */ ++ ++ for (unsigned int i = 0; i < state->count; ++i) ++ { ++ if (state->groups[i].stores[0]->lhs.var == var) ++ state->groups[i].dirty = true; ++ } ++ } ++ else if (instr->type == HLSL_IR_IF) + { +- case HLSL_IR_LOAD: +- progress |= copy_propagation_transform_load(ctx, hlsl_ir_load(instr), state); +- break; ++ struct hlsl_ir_if *iff = hlsl_ir_if(instr); + +- case HLSL_IR_RESOURCE_LOAD: +- progress |= copy_propagation_transform_resource_load(ctx, hlsl_ir_resource_load(instr), state); +- break; ++ find_vectorizable_store_groups(ctx, &iff->then_block, state); ++ find_vectorizable_store_groups(ctx, &iff->else_block, state); ++ } ++ else if (instr->type == HLSL_IR_LOOP) ++ { ++ find_vectorizable_store_groups(ctx, &hlsl_ir_loop(instr)->body, state); ++ } ++ else if (instr->type == HLSL_IR_SWITCH) ++ { ++ struct hlsl_ir_switch *s = hlsl_ir_switch(instr); ++ struct hlsl_ir_switch_case *c; + +- case HLSL_IR_RESOURCE_STORE: +- progress |= copy_propagation_transform_resource_store(ctx, hlsl_ir_resource_store(instr), state); +- break; ++ LIST_FOR_EACH_ENTRY(c, &s->cases, struct hlsl_ir_switch_case, entry) ++ find_vectorizable_store_groups(ctx, &c->body, state); ++ } ++ } ++} + +- case HLSL_IR_STORE: +- copy_propagation_record_store(ctx, hlsl_ir_store(instr), state); +- break; ++/* Combine sequences like ++ * ++ * 2: @1.yw ++ * 3: @1.zy ++ * 4: var.xy = @2 ++ * 5: var.yw = @3 ++ * ++ * to ++ * ++ * 2: @1.yzy ++ * 5: var.xyw = @2 ++ * ++ * There are a lot of gotchas here. We need to make sure the two stores are to ++ * the same vector (which may be embedded in a complex variable), that they're ++ * always executed in the same control flow, and that there aren't any other ++ * stores or loads on the same vector in the middle. */ ++static bool vectorize_stores(struct hlsl_ctx *ctx, struct hlsl_block *block) ++{ ++ struct vectorize_stores_state state = {0}; ++ bool progress = false; + +- case HLSL_IR_SWIZZLE: +- progress |= copy_propagation_transform_swizzle(ctx, hlsl_ir_swizzle(instr), state); +- break; ++ find_vectorizable_store_groups(ctx, block, &state); + +- case HLSL_IR_IF: +- progress |= copy_propagation_process_if(ctx, hlsl_ir_if(instr), state); +- break; ++ for (unsigned int i = 0; i < state.count; ++i) ++ { ++ struct vectorizable_stores_group *group = &state.groups[i]; ++ uint32_t new_swizzle = 0, new_writemask = 0; ++ struct hlsl_ir_node *new_rhs, *value; ++ uint32_t swizzle_components[4]; ++ unsigned int component_count; ++ struct hlsl_ir_store *store; ++ struct hlsl_block new_block; + +- case HLSL_IR_LOOP: +- progress |= copy_propagation_process_loop(ctx, hlsl_ir_loop(instr), state); +- break; ++ if (group->store_count == 1) ++ continue; + +- case HLSL_IR_SWITCH: +- progress |= copy_propagation_process_switch(ctx, hlsl_ir_switch(instr), state); +- break; ++ hlsl_block_init(&new_block); + +- case HLSL_IR_INTERLOCKED: +- progress |= copy_propagation_transform_interlocked(ctx, hlsl_ir_interlocked(instr), state); ++ /* Compute the swizzle components. */ ++ for (unsigned int j = 0; j < group->store_count; ++j) ++ { ++ unsigned int writemask = group->writemasks[j]; ++ uint32_t rhs_swizzle; + +- default: +- break; ++ store = group->stores[j]; ++ ++ if (store->rhs.node->type == HLSL_IR_SWIZZLE) ++ rhs_swizzle = hlsl_ir_swizzle(store->rhs.node)->u.vector; ++ else ++ rhs_swizzle = HLSL_SWIZZLE(X, Y, Z, W); ++ ++ component_count = 0; ++ for (unsigned int k = 0; k < 4; ++k) ++ { ++ if (writemask & (1u << k)) ++ swizzle_components[k] = hlsl_swizzle_get_component(rhs_swizzle, component_count++); ++ } ++ ++ new_writemask |= writemask; + } + +- if (state->stopped) +- return progress; +- } ++ /* Construct the new swizzle. */ ++ component_count = 0; ++ for (unsigned int k = 0; k < 4; ++k) ++ { ++ if (new_writemask & (1u << k)) ++ hlsl_swizzle_set_component(&new_swizzle, component_count++, swizzle_components[k]); ++ } + +- return progress; +-} ++ store = group->stores[0]; ++ value = store->rhs.node; ++ if (value->type == HLSL_IR_SWIZZLE) ++ value = hlsl_ir_swizzle(value)->val.node; + +-bool hlsl_copy_propagation_execute(struct hlsl_ctx *ctx, struct hlsl_block *block) +-{ +- struct copy_propagation_state state; +- bool progress; ++ new_rhs = hlsl_block_add_swizzle(ctx, &new_block, new_swizzle, component_count, value, &value->loc); ++ hlsl_block_add_store_parent(ctx, &new_block, &store->lhs, ++ group->path_len, new_rhs, new_writemask, &store->node.loc); + +- index_instructions(block, 2); ++ TRACE("Combining %u stores to %s.\n", group->store_count, store->lhs.var->name); + +- copy_propagation_state_init(&state, ctx); ++ list_move_before(&group->stores[group->store_count - 1]->node.entry, &new_block.instrs); + +- progress = copy_propagation_transform_block(ctx, block, &state); ++ for (unsigned int j = 0; j < group->store_count; ++j) ++ { ++ list_remove(&group->stores[j]->node.entry); ++ hlsl_free_instr(&group->stores[j]->node); ++ } + +- copy_propagation_state_destroy(&state); ++ progress = true; ++ } + ++ vkd3d_free(state.groups); + return progress; + } + +-enum validation_result +-{ +- DEREF_VALIDATION_OK, +- DEREF_VALIDATION_OUT_OF_BOUNDS, +- DEREF_VALIDATION_NOT_CONSTANT, +-}; +- + static enum validation_result validate_component_index_range_from_deref(struct hlsl_ctx *ctx, + const struct hlsl_deref *deref) + { +@@ -2244,8 +3055,7 @@ static enum validation_result validate_component_index_range_from_deref(struct h + return DEREF_VALIDATION_NOT_CONSTANT; + + /* We should always have generated a cast to UINT. */ +- VKD3D_ASSERT(path_node->data_type->class == HLSL_CLASS_SCALAR +- && path_node->data_type->e.numeric.type == HLSL_TYPE_UINT); ++ VKD3D_ASSERT(hlsl_is_vec1(path_node->data_type) && path_node->data_type->e.numeric.type == HLSL_TYPE_UINT); + + idx = hlsl_ir_constant(path_node)->value.u[0].u; + +@@ -2402,11 +3212,6 @@ static bool validate_dereferences(struct hlsl_ctx *ctx, struct hlsl_ir_node *ins + return false; + } + +-static bool is_vec1(const struct hlsl_type *type) +-{ +- return (type->class == HLSL_CLASS_SCALAR) || (type->class == HLSL_CLASS_VECTOR && type->e.numeric.dimx == 1); +-} +- + static bool fold_redundant_casts(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) + { + if (instr->type == HLSL_IR_EXPR) +@@ -2421,7 +3226,8 @@ static bool fold_redundant_casts(struct hlsl_ctx *ctx, struct hlsl_ir_node *inst + src_type = expr->operands[0].node->data_type; + + if (hlsl_types_are_equal(src_type, dst_type) +- || (src_type->e.numeric.type == dst_type->e.numeric.type && is_vec1(src_type) && is_vec1(dst_type))) ++ || (src_type->e.numeric.type == dst_type->e.numeric.type ++ && hlsl_is_vec1(src_type) && hlsl_is_vec1(dst_type))) + { + hlsl_replace_node(&expr->node, expr->operands[0].node); + return true; +@@ -2584,20 +3390,14 @@ static bool lower_narrowing_casts(struct hlsl_ctx *ctx, struct hlsl_ir_node *ins + if (src_type->class <= HLSL_CLASS_VECTOR && dst_type->class <= HLSL_CLASS_VECTOR + && dst_type->e.numeric.dimx < src_type->e.numeric.dimx) + { +- struct hlsl_ir_node *new_cast, *swizzle; ++ struct hlsl_ir_node *new_cast; -@@ -2589,9 +2512,7 @@ static bool lower_narrowing_casts(struct hlsl_ctx *ctx, struct hlsl_ir_node *ins dst_vector_type = hlsl_get_vector_type(ctx, dst_type->e.numeric.type, src_type->e.numeric.dimx); /* We need to preserve the cast since it might be doing more than just * narrowing the vector. */ - if (!(new_cast = hlsl_new_cast(ctx, cast->operands[0].node, dst_vector_type, &cast->node.loc))) - return false; - hlsl_block_add_instr(block, new_cast); +- +- if (!(swizzle = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, Y, Z, W), +- dst_type->e.numeric.dimx, new_cast, &cast->node.loc))) +- return false; +- hlsl_block_add_instr(block, swizzle); +- + new_cast = hlsl_block_add_cast(ctx, block, cast->operands[0].node, dst_vector_type, &cast->node.loc); ++ hlsl_block_add_swizzle(ctx, block, HLSL_SWIZZLE(X, Y, Z, W), ++ dst_type->e.numeric.dimx, new_cast, &cast->node.loc); + return true; + } - if (!(swizzle = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, Y, Z, W), - dst_type->e.numeric.dimx, new_cast, &cast->node.loc))) -@@ -2768,16 +2689,9 @@ static bool normalize_switch_cases(struct hlsl_ctx *ctx, struct hlsl_ir_node *in +@@ -2768,16 +3568,9 @@ static bool normalize_switch_cases(struct hlsl_ctx *ctx, struct hlsl_ir_node *in } else { @@ -14887,7 +19055,7 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 } list_add_tail(&s->cases, &def->entry); -@@ -2808,7 +2722,7 @@ static bool lower_nonconstant_vector_derefs(struct hlsl_ctx *ctx, struct hlsl_ir +@@ -2808,7 +3601,7 @@ static bool lower_nonconstant_vector_derefs(struct hlsl_ctx *ctx, struct hlsl_ir if (type->class == HLSL_CLASS_VECTOR && idx->type != HLSL_IR_CONSTANT) { @@ -14896,7 +19064,18 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 unsigned int width = type->e.numeric.dimx; struct hlsl_constant_value value; struct hlsl_ir_load *vector_load; -@@ -2832,14 +2746,9 @@ static bool lower_nonconstant_vector_derefs(struct hlsl_ctx *ctx, struct hlsl_ir +@@ -2818,9 +3611,7 @@ static bool lower_nonconstant_vector_derefs(struct hlsl_ctx *ctx, struct hlsl_ir + return false; + hlsl_block_add_instr(block, &vector_load->node); + +- if (!(swizzle = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, X, X, X), width, idx, &instr->loc))) +- return false; +- hlsl_block_add_instr(block, swizzle); ++ swizzle = hlsl_block_add_swizzle(ctx, block, HLSL_SWIZZLE(X, X, X, X), width, idx, &instr->loc); + + value.u[0].u = 0; + value.u[1].u = 1; +@@ -2832,14 +3623,9 @@ static bool lower_nonconstant_vector_derefs(struct hlsl_ctx *ctx, struct hlsl_ir operands[0] = swizzle; operands[1] = c; @@ -14914,7 +19093,7 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 op = HLSL_OP2_DOT; if (width == 1) -@@ -2849,10 +2758,7 @@ static bool lower_nonconstant_vector_derefs(struct hlsl_ctx *ctx, struct hlsl_ir +@@ -2849,10 +3635,7 @@ static bool lower_nonconstant_vector_derefs(struct hlsl_ctx *ctx, struct hlsl_ir * LOGIC_OR + LOGIC_AND. */ operands[0] = &vector_load->node; operands[1] = eq; @@ -14926,7 +19105,19 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 return true; } -@@ -2901,7 +2807,7 @@ static bool lower_nonconstant_array_loads(struct hlsl_ctx *ctx, struct hlsl_ir_n +@@ -2891,6 +3674,11 @@ static bool validate_nonconstant_vector_store_derefs(struct hlsl_ctx *ctx, struc + return false; + } + ++static bool deref_supports_sm1_indirect_addressing(struct hlsl_ctx *ctx, const struct hlsl_deref *deref) ++{ ++ return ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX && deref->var->is_uniform; ++} ++ + /* This pass flattens array (and row_major matrix) loads that include the indexing of a non-constant + * index into multiple constant loads, where the value of only one of them ends up in the resulting + * node. +@@ -2901,7 +3689,7 @@ static bool lower_nonconstant_array_loads(struct hlsl_ctx *ctx, struct hlsl_ir_n struct hlsl_block *block) { struct hlsl_constant_value zero_value = {0}; @@ -14935,7 +19126,17 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 unsigned int i, i_cut, element_count; const struct hlsl_deref *deref; struct hlsl_type *cut_type; -@@ -2944,70 +2850,47 @@ static bool lower_nonconstant_array_loads(struct hlsl_ctx *ctx, struct hlsl_ir_n +@@ -2917,6 +3705,9 @@ static bool lower_nonconstant_array_loads(struct hlsl_ctx *ctx, struct hlsl_ir_n + if (deref->path_len == 0) + return false; + ++ if (deref_supports_sm1_indirect_addressing(ctx, deref)) ++ return false; ++ + for (i = deref->path_len - 1; ; --i) + { + if (deref->path[i].node->type != HLSL_IR_CONSTANT) +@@ -2944,70 +3735,44 @@ static bool lower_nonconstant_array_loads(struct hlsl_ctx *ctx, struct hlsl_ir_n return false; hlsl_block_add_instr(block, zero); @@ -14966,12 +19167,14 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 - if (!(equals = hlsl_new_expr(ctx, HLSL_OP2_EQUAL, operands, btype, &cut_index->loc))) - return false; - hlsl_block_add_instr(block, equals); +- +- if (!(equals = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, X, X, X), +- var->data_type->e.numeric.dimx, equals, &cut_index->loc))) +- return false; +- hlsl_block_add_instr(block, equals); + equals = hlsl_block_add_expr(ctx, block, HLSL_OP2_EQUAL, operands, btype, &cut_index->loc); - - if (!(equals = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, X, X, X), - var->data_type->e.numeric.dimx, equals, &cut_index->loc))) - return false; - hlsl_block_add_instr(block, equals); ++ equals = hlsl_block_add_swizzle(ctx, block, HLSL_SWIZZLE(X, X, X, X), ++ var->data_type->e.numeric.dimx, equals, &cut_index->loc); - if (!(var_load = hlsl_new_var_load(ctx, var, &cut_index->loc))) - return false; @@ -15017,16 +19220,77 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 return true; } -@@ -3305,7 +3188,7 @@ static bool lower_casts_to_int(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, +@@ -3278,9 +4043,37 @@ static bool sort_synthetic_separated_samplers_first(struct hlsl_ctx *ctx) + return false; + } + +-/* Turn CAST to int or uint as follows: ++/* Turn CAST to int or uint into TRUNC + REINTERPRET */ ++static bool lower_casts_to_int(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) ++{ ++ struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = { 0 }; ++ struct hlsl_ir_node *arg, *trunc; ++ struct hlsl_ir_expr *expr; ++ ++ if (instr->type != HLSL_IR_EXPR) ++ return false; ++ expr = hlsl_ir_expr(instr); ++ if (expr->op != HLSL_OP1_CAST) ++ return false; ++ ++ arg = expr->operands[0].node; ++ if (!hlsl_type_is_integer(instr->data_type) || instr->data_type->e.numeric.type == HLSL_TYPE_BOOL) ++ return false; ++ if (!hlsl_type_is_floating_point(arg->data_type)) ++ return false; ++ ++ trunc = hlsl_block_add_unary_expr(ctx, block, HLSL_OP1_TRUNC, arg, &instr->loc); ++ ++ memset(operands, 0, sizeof(operands)); ++ operands[0] = trunc; ++ hlsl_block_add_expr(ctx, block, HLSL_OP1_REINTERPRET, operands, instr->data_type, &instr->loc); ++ ++ return true; ++} ++ ++/* Turn TRUNC into: + * +- * CAST(x) = x - FRACT(x) + extra ++ * TRUNC(x) = x - FRACT(x) + extra + * + * where + * +@@ -3288,27 +4081,19 @@ static bool sort_synthetic_separated_samplers_first(struct hlsl_ctx *ctx) + * + * where the comparisons in the extra term are performed using CMP or SLT + * depending on whether this is a pixel or vertex shader, respectively. +- * +- * A REINTERPET (which is written as a mere MOV) is also applied to the final +- * result for type consistency. + */ +-static bool lower_casts_to_int(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) ++static bool lower_trunc(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) + { +- struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = { 0 }; + struct hlsl_ir_node *arg, *res; + struct hlsl_ir_expr *expr; + + if (instr->type != HLSL_IR_EXPR) + return false; + expr = hlsl_ir_expr(instr); +- if (expr->op != HLSL_OP1_CAST) ++ if (expr->op != HLSL_OP1_TRUNC) return false; arg = expr->operands[0].node; - if (instr->data_type->e.numeric.type != HLSL_TYPE_INT && instr->data_type->e.numeric.type != HLSL_TYPE_UINT) -+ if (!hlsl_type_is_integer(instr->data_type) || instr->data_type->e.numeric.type == HLSL_TYPE_BOOL) - return false; - if (arg->data_type->e.numeric.type != HLSL_TYPE_FLOAT && arg->data_type->e.numeric.type != HLSL_TYPE_HALF) - return false; -@@ -3328,13 +3211,8 @@ static bool lower_casts_to_int(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, +- return false; +- if (arg->data_type->e.numeric.type != HLSL_TYPE_FLOAT && arg->data_type->e.numeric.type != HLSL_TYPE_HALF) +- return false; + + if (ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL) + { +@@ -3328,13 +4113,8 @@ static bool lower_casts_to_int(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, return false; hlsl_block_add_instr(block, one); @@ -15042,7 +19306,7 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 if (!(has_fract = hlsl_new_ternary_expr(ctx, HLSL_OP3_CMP, neg_fract, zero, one))) return false; -@@ -3344,41 +3222,19 @@ static bool lower_casts_to_int(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, +@@ -3344,52 +4124,63 @@ static bool lower_casts_to_int(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, return false; hlsl_block_add_instr(block, extra); @@ -15063,26 +19327,6 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 - if (!(neg_arg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, arg, &instr->loc))) - return false; - hlsl_block_add_instr(block, neg_arg); -- -- if (!(is_neg = hlsl_new_binary_expr(ctx, HLSL_OP2_SLT, arg, neg_arg))) -- return false; -- hlsl_block_add_instr(block, is_neg); -- -- if (!(fract = hlsl_new_unary_expr(ctx, HLSL_OP1_FRACT, arg, &instr->loc))) -- return false; -- hlsl_block_add_instr(block, fract); -- -- if (!(neg_fract = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, fract, &instr->loc))) -- return false; -- hlsl_block_add_instr(block, neg_fract); -- -- if (!(has_fract = hlsl_new_binary_expr(ctx, HLSL_OP2_SLT, neg_fract, fract))) -- return false; -- hlsl_block_add_instr(block, has_fract); -- -- if (!(floor = hlsl_new_binary_expr(ctx, HLSL_OP2_ADD, arg, neg_fract))) -- return false; -- hlsl_block_add_instr(block, floor); + neg_arg = hlsl_block_add_unary_expr(ctx, block, HLSL_OP1_NEG, arg, &instr->loc); + is_neg = hlsl_block_add_binary_expr(ctx, block, HLSL_OP2_SLT, arg, neg_arg); + fract = hlsl_block_add_unary_expr(ctx, block, HLSL_OP1_FRACT, arg, &instr->loc); @@ -15090,20 +19334,75 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 + has_fract = hlsl_block_add_binary_expr(ctx, block, HLSL_OP2_SLT, neg_fract, fract); + floor = hlsl_block_add_binary_expr(ctx, block, HLSL_OP2_ADD, arg, neg_fract); - if (!(res = hlsl_new_ternary_expr(ctx, HLSL_OP3_MAD, is_neg, has_fract, floor))) +- if (!(is_neg = hlsl_new_binary_expr(ctx, HLSL_OP2_SLT, arg, neg_arg))) ++ if (!(res = hlsl_new_ternary_expr(ctx, HLSL_OP3_MAD, is_neg, has_fract, floor))) return false; -@@ -3387,18 +3243,17 @@ static bool lower_casts_to_int(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, +- hlsl_block_add_instr(block, is_neg); ++ hlsl_block_add_instr(block, res); ++ } - memset(operands, 0, sizeof(operands)); - operands[0] = res; +- if (!(fract = hlsl_new_unary_expr(ctx, HLSL_OP1_FRACT, arg, &instr->loc))) +- return false; +- hlsl_block_add_instr(block, fract); ++ return true; ++} + +- if (!(neg_fract = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, fract, &instr->loc))) +- return false; +- hlsl_block_add_instr(block, neg_fract); ++/* Lower modulus using: ++ * ++ * mod(x, y) = x - trunc(x / y) * y; ++ * ++ */ ++static bool lower_int_modulus_sm1(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) ++{ ++ struct hlsl_ir_node *div, *trunc, *mul, *neg, *operands[2], *ret; ++ struct hlsl_type *float_type; ++ struct hlsl_ir_expr *expr; ++ bool is_float; + +- if (!(has_fract = hlsl_new_binary_expr(ctx, HLSL_OP2_SLT, neg_fract, fract))) +- return false; +- hlsl_block_add_instr(block, has_fract); ++ if (instr->type != HLSL_IR_EXPR) ++ return false; ++ expr = hlsl_ir_expr(instr); ++ if (expr->op != HLSL_OP2_MOD) ++ return false; + +- if (!(floor = hlsl_new_binary_expr(ctx, HLSL_OP2_ADD, arg, neg_fract))) +- return false; +- hlsl_block_add_instr(block, floor); ++ is_float = instr->data_type->e.numeric.type == HLSL_TYPE_FLOAT ++ || instr->data_type->e.numeric.type == HLSL_TYPE_HALF; ++ if (is_float) ++ return false; ++ float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, instr->data_type->e.numeric.dimx); + +- if (!(res = hlsl_new_ternary_expr(ctx, HLSL_OP3_MAD, is_neg, has_fract, floor))) +- return false; +- hlsl_block_add_instr(block, res); ++ for (unsigned int i = 0; i < 2; ++i) ++ { ++ operands[i] = hlsl_block_add_cast(ctx, block, expr->operands[i].node, float_type, &instr->loc); + } + +- memset(operands, 0, sizeof(operands)); +- operands[0] = res; - if (!(res = hlsl_new_expr(ctx, HLSL_OP1_REINTERPRET, operands, instr->data_type, &instr->loc))) - return false; - hlsl_block_add_instr(block, res); -- -+ hlsl_block_add_expr(ctx, block, HLSL_OP1_REINTERPRET, operands, instr->data_type, &instr->loc); ++ div = hlsl_block_add_binary_expr(ctx, block, HLSL_OP2_DIV, operands[0], operands[1]); ++ trunc = hlsl_block_add_unary_expr(ctx, block, HLSL_OP1_TRUNC, div, &instr->loc); ++ mul = hlsl_block_add_binary_expr(ctx, block, HLSL_OP2_MUL, trunc, operands[1]); ++ neg = hlsl_block_add_unary_expr(ctx, block, HLSL_OP1_NEG, mul, &instr->loc); ++ ret = hlsl_block_add_binary_expr(ctx, block, HLSL_OP2_ADD, operands[0], neg); ++ hlsl_block_add_cast(ctx, block, ret, instr->data_type, &instr->loc); + return true; } - +@@ -3397,8 +4188,10 @@ static bool lower_casts_to_int(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, /* Lower DIV to RCP + MUL. */ static bool lower_division(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) { @@ -15115,7 +19414,7 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 if (instr->type != HLSL_IR_EXPR) return false; -@@ -3406,13 +3261,21 @@ static bool lower_division(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, str +@@ -3406,13 +4199,21 @@ static bool lower_division(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, str if (expr->op != HLSL_OP2_DIV) return false; @@ -15143,7 +19442,7 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 return true; } -@@ -3420,8 +3283,8 @@ static bool lower_division(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, str +@@ -3420,8 +4221,8 @@ static bool lower_division(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, str /* Lower SQRT to RSQ + RCP. */ static bool lower_sqrt(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) { @@ -15153,7 +19452,7 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 if (instr->type != HLSL_IR_EXPR) return false; -@@ -3429,20 +3292,15 @@ static bool lower_sqrt(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct +@@ -3429,20 +4230,15 @@ static bool lower_sqrt(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct if (expr->op != HLSL_OP1_SQRT) return false; @@ -15177,7 +19476,7 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 struct hlsl_ir_expr *expr; if (instr->type != HLSL_IR_EXPR) -@@ -3459,22 +3317,15 @@ static bool lower_dot(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct h +@@ -3459,37 +4255,22 @@ static bool lower_dot(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct h { struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = { 0 }; @@ -15199,23 +19498,31 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 - if (!(mul = hlsl_new_binary_expr(ctx, HLSL_OP2_MUL, expr->operands[0].node, expr->operands[1].node))) - return false; - hlsl_block_add_instr(block, mul); +- +- if (!(add_x = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, X, X, X), +- instr->data_type->e.numeric.dimx, mul, &expr->node.loc))) +- return false; +- hlsl_block_add_instr(block, add_x); +- +- if (!(add_y = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(Y, Y, Y, Y), +- instr->data_type->e.numeric.dimx, mul, &expr->node.loc))) +- return false; +- hlsl_block_add_instr(block, add_y); + mul = hlsl_block_add_binary_expr(ctx, block, HLSL_OP2_MUL, expr->operands[0].node, expr->operands[1].node); - if (!(add_x = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, X, X, X), - instr->data_type->e.numeric.dimx, mul, &expr->node.loc))) -@@ -3486,10 +3337,8 @@ static bool lower_dot(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct h - return false; - hlsl_block_add_instr(block, add_y); - - if (!(replacement = hlsl_new_binary_expr(ctx, HLSL_OP2_ADD, add_x, add_y))) - return false; ++ add_x = hlsl_block_add_swizzle(ctx, block, HLSL_SWIZZLE(X, X, X, X), ++ instr->data_type->e.numeric.dimx, mul, &expr->node.loc); ++ add_y = hlsl_block_add_swizzle(ctx, block, HLSL_SWIZZLE(Y, Y, Y, Y), ++ instr->data_type->e.numeric.dimx, mul, &expr->node.loc); + hlsl_block_add_binary_expr(ctx, block, HLSL_OP2_ADD, add_x, add_y); } - hlsl_block_add_instr(block, replacement); return true; } -@@ -3497,7 +3346,7 @@ static bool lower_dot(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct h +@@ -3497,7 +4278,7 @@ static bool lower_dot(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct h /* Lower ABS to MAX */ static bool lower_abs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) { @@ -15224,7 +19531,7 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 struct hlsl_ir_expr *expr; if (instr->type != HLSL_IR_EXPR) -@@ -3507,21 +3356,15 @@ static bool lower_abs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct h +@@ -3507,21 +4288,15 @@ static bool lower_abs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct h if (expr->op != HLSL_OP1_ABS) return false; @@ -15249,7 +19556,7 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 struct hlsl_type *type = instr->data_type; struct hlsl_constant_value half_value; unsigned int i, component_count; -@@ -3542,29 +3385,17 @@ static bool lower_round(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct +@@ -3542,29 +4317,17 @@ static bool lower_round(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct return false; hlsl_block_add_instr(block, half); @@ -15284,7 +19591,7 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 struct hlsl_ir_expr *expr; if (instr->type != HLSL_IR_EXPR) -@@ -3575,25 +3406,16 @@ static bool lower_ceil(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct +@@ -3575,25 +4338,16 @@ static bool lower_ceil(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct if (expr->op != HLSL_OP1_CEIL) return false; @@ -15314,7 +19621,7 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 struct hlsl_ir_expr *expr; if (instr->type != HLSL_IR_EXPR) -@@ -3604,18 +3426,9 @@ static bool lower_floor(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct +@@ -3604,18 +4358,9 @@ static bool lower_floor(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct if (expr->op != HLSL_OP1_FLOOR) return false; @@ -15336,7 +19643,7 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 return true; } -@@ -3667,25 +3480,20 @@ static bool lower_trig(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct +@@ -3667,33 +4412,26 @@ static bool lower_trig(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct if (!(mad = hlsl_new_ternary_expr(ctx, HLSL_OP3_MAD, arg, reciprocal_two_pi, half))) return false; hlsl_block_add_instr(block, mad); @@ -15364,7 +19671,16 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 for (i = 0; i < type->e.numeric.dimx; ++i) { -@@ -3702,20 +3510,11 @@ static bool lower_trig(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct + uint32_t s = hlsl_swizzle_from_writemask(1 << i); + +- if (!(comps[i] = hlsl_new_swizzle(ctx, s, 1, reduced, &instr->loc))) +- return false; +- hlsl_block_add_instr(block, comps[i]); ++ comps[i] = hlsl_block_add_swizzle(ctx, block, s, 1, reduced, &instr->loc); + } + + if (!(var = hlsl_new_synthetic_var(ctx, "sincos", type, &instr->loc))) +@@ -3702,20 +4440,11 @@ static bool lower_trig(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct for (i = 0; i < type->e.numeric.dimx; ++i) { @@ -15388,7 +19704,7 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 } return true; -@@ -3723,8 +3522,8 @@ static bool lower_trig(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct +@@ -3723,8 +4452,8 @@ static bool lower_trig(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct static bool lower_logic_not(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) { @@ -15398,7 +19714,7 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 struct hlsl_constant_value one_value; struct hlsl_type *float_type; struct hlsl_ir_expr *expr; -@@ -3741,13 +3540,9 @@ static bool lower_logic_not(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, st +@@ -3741,13 +4470,9 @@ static bool lower_logic_not(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, st /* If this is happens, it means we failed to cast the argument to boolean somewhere. */ VKD3D_ASSERT(arg->data_type->e.numeric.type == HLSL_TYPE_BOOL); @@ -15414,7 +19730,7 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 one_value.u[0].f = 1.0; one_value.u[1].f = 1.0; -@@ -3757,24 +3552,19 @@ static bool lower_logic_not(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, st +@@ -3757,24 +4482,19 @@ static bool lower_logic_not(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, st return false; hlsl_block_add_instr(block, one); @@ -15442,7 +19758,7 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 struct hlsl_ir_expr *expr; struct hlsl_type *type; -@@ -3799,23 +3589,14 @@ static bool lower_ternary(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, stru +@@ -3799,23 +4519,14 @@ static bool lower_ternary(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, stru type = hlsl_get_numeric_type(ctx, instr->data_type->class, HLSL_TYPE_FLOAT, instr->data_type->e.numeric.dimx, instr->data_type->e.numeric.dimy); @@ -15469,7 +19785,7 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 return true; } -@@ -3867,7 +3648,7 @@ static bool lower_resource_load_bias(struct hlsl_ctx *ctx, struct hlsl_ir_node * +@@ -3867,7 +4578,7 @@ static bool lower_resource_load_bias(struct hlsl_ctx *ctx, struct hlsl_ir_node * static bool lower_comparison_operators(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) { @@ -15478,7 +19794,7 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS]; struct hlsl_type *float_type; struct hlsl_ir_expr *expr; -@@ -3884,13 +3665,8 @@ static bool lower_comparison_operators(struct hlsl_ctx *ctx, struct hlsl_ir_node +@@ -3884,13 +4595,8 @@ static bool lower_comparison_operators(struct hlsl_ctx *ctx, struct hlsl_ir_node arg2 = expr->operands[1].node; float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, instr->data_type->e.numeric.dimx); @@ -15494,7 +19810,7 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 switch (expr->op) { -@@ -3899,36 +3675,21 @@ static bool lower_comparison_operators(struct hlsl_ctx *ctx, struct hlsl_ir_node +@@ -3899,36 +4605,21 @@ static bool lower_comparison_operators(struct hlsl_ctx *ctx, struct hlsl_ir_node { struct hlsl_ir_node *neg, *sub, *abs, *abs_neg; @@ -15537,7 +19853,7 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 negate = (expr->op == HLSL_OP2_EQUAL); break; } -@@ -3936,10 +3697,7 @@ static bool lower_comparison_operators(struct hlsl_ctx *ctx, struct hlsl_ir_node +@@ -3936,10 +4627,7 @@ static bool lower_comparison_operators(struct hlsl_ctx *ctx, struct hlsl_ir_node case HLSL_OP2_GEQUAL: case HLSL_OP2_LESS: { @@ -15549,7 +19865,7 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 negate = (expr->op == HLSL_OP2_GEQUAL); break; } -@@ -3961,13 +3719,8 @@ static bool lower_comparison_operators(struct hlsl_ctx *ctx, struct hlsl_ir_node +@@ -3961,13 +4649,8 @@ static bool lower_comparison_operators(struct hlsl_ctx *ctx, struct hlsl_ir_node return false; hlsl_block_add_instr(block, one); @@ -15565,7 +19881,7 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 } else { -@@ -3978,10 +3731,7 @@ static bool lower_comparison_operators(struct hlsl_ctx *ctx, struct hlsl_ir_node +@@ -3978,10 +4661,7 @@ static bool lower_comparison_operators(struct hlsl_ctx *ctx, struct hlsl_ir_node * and casts to BOOL have already been lowered to "!= 0". */ memset(operands, 0, sizeof(operands)); operands[0] = res; @@ -15577,7 +19893,7 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 return true; } -@@ -4010,21 +3760,10 @@ static bool lower_slt(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct h +@@ -4010,21 +4690,10 @@ static bool lower_slt(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct h arg2 = expr->operands[1].node; float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, instr->data_type->e.numeric.dimx); @@ -15603,7 +19919,7 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 memset(&zero_value, 0, sizeof(zero_value)); if (!(zero = hlsl_new_constant(ctx, float_type, &zero_value, &instr->loc))) -@@ -4056,7 +3795,7 @@ static bool lower_slt(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct h +@@ -4056,7 +4725,7 @@ static bool lower_slt(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct h */ static bool lower_cmp(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) { @@ -15612,7 +19928,7 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 struct hlsl_constant_value zero_value, one_value; struct hlsl_type *float_type; struct hlsl_ir_expr *expr; -@@ -4073,10 +3812,7 @@ static bool lower_cmp(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct h +@@ -4073,10 +4742,7 @@ static bool lower_cmp(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct h for (i = 0; i < 3; ++i) { args[i] = expr->operands[i].node; @@ -15624,7 +19940,7 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 } memset(&zero_value, 0, sizeof(zero_value)); -@@ -4092,30 +3828,12 @@ static bool lower_cmp(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct h +@@ -4092,30 +4758,12 @@ static bool lower_cmp(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct h return false; hlsl_block_add_instr(block, one); @@ -15661,7 +19977,7 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 return true; } -@@ -4145,10 +3863,8 @@ static bool lower_casts_to_bool(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr +@@ -4145,10 +4793,8 @@ static bool lower_casts_to_bool(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr return false; hlsl_block_add_instr(block, zero); @@ -15673,7 +19989,7 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 return true; } -@@ -4158,7 +3874,6 @@ struct hlsl_ir_node *hlsl_add_conditional(struct hlsl_ctx *ctx, struct hlsl_bloc +@@ -4158,7 +4804,6 @@ struct hlsl_ir_node *hlsl_add_conditional(struct hlsl_ctx *ctx, struct hlsl_bloc { struct hlsl_type *cond_type = condition->data_type; struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS]; @@ -15681,7 +19997,7 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 VKD3D_ASSERT(hlsl_types_are_equal(if_true->data_type, if_false->data_type)); -@@ -4166,20 +3881,13 @@ struct hlsl_ir_node *hlsl_add_conditional(struct hlsl_ctx *ctx, struct hlsl_bloc +@@ -4166,23 +4811,16 @@ struct hlsl_ir_node *hlsl_add_conditional(struct hlsl_ctx *ctx, struct hlsl_bloc { cond_type = hlsl_get_numeric_type(ctx, cond_type->class, HLSL_TYPE_BOOL, cond_type->e.numeric.dimx, cond_type->e.numeric.dimy); @@ -15703,8 +20019,12 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 + return hlsl_block_add_expr(ctx, instrs, HLSL_OP3_TERNARY, operands, if_true->data_type, &condition->loc); } - static bool lower_int_division(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) -@@ -4203,9 +3911,7 @@ static bool lower_int_division(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, +-static bool lower_int_division(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) ++static bool lower_int_division_sm4(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) + { + struct hlsl_ir_node *arg1, *arg2, *xor, *and, *abs1, *abs2, *div, *neg, *cast1, *cast2, *cast3, *high_bit; + struct hlsl_type *type = instr->data_type, *utype; +@@ -4203,9 +4841,7 @@ static bool lower_int_division(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, return false; utype = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_UINT, type->e.numeric.dimx, type->e.numeric.dimy); @@ -15715,7 +20035,7 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 for (i = 0; i < type->e.numeric.dimx; ++i) high_bit_value.u[i].u = 0x80000000; -@@ -4213,38 +3919,14 @@ static bool lower_int_division(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, +@@ -4213,42 +4849,18 @@ static bool lower_int_division(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, return false; hlsl_block_add_instr(block, high_bit); @@ -15762,7 +20082,12 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 return hlsl_add_conditional(ctx, block, and, neg, cast3); } -@@ -4275,45 +3957,21 @@ static bool lower_int_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, +-static bool lower_int_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) ++static bool lower_int_modulus_sm4(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) + { + struct hlsl_ir_node *arg1, *arg2, *and, *abs1, *abs2, *div, *neg, *cast1, *cast2, *cast3, *high_bit; + struct hlsl_type *type = instr->data_type, *utype; +@@ -4275,45 +4887,21 @@ static bool lower_int_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, return false; hlsl_block_add_instr(block, high_bit); @@ -15817,7 +20142,7 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 struct hlsl_ir_expr *expr; if (instr->type != HLSL_IR_EXPR) -@@ -4329,14 +3987,8 @@ static bool lower_int_abs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, stru +@@ -4329,14 +4917,8 @@ static bool lower_int_abs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, stru arg = expr->operands[0].node; @@ -15834,7 +20159,7 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 return true; } -@@ -4355,8 +4007,7 @@ static bool lower_int_dot(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, stru +@@ -4355,8 +4937,7 @@ static bool lower_int_dot(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, stru if (expr->op != HLSL_OP2_DOT) return false; @@ -15844,7 +20169,7 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 { arg1 = expr->operands[0].node; arg2 = expr->operands[1].node; -@@ -4364,9 +4015,7 @@ static bool lower_int_dot(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, stru +@@ -4364,26 +4945,18 @@ static bool lower_int_dot(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, stru dimx = arg1->data_type->e.numeric.dimx; is_bool = type->e.numeric.type == HLSL_TYPE_BOOL; @@ -15855,7 +20180,13 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 for (i = 0; i < dimx; ++i) { -@@ -4379,11 +4028,7 @@ static bool lower_int_dot(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, stru + uint32_t s = hlsl_swizzle_from_writemask(1 << i); + +- if (!(comps[i] = hlsl_new_swizzle(ctx, s, 1, mult, &instr->loc))) +- return false; +- hlsl_block_add_instr(block, comps[i]); ++ comps[i] = hlsl_block_add_swizzle(ctx, block, s, 1, mult, &instr->loc); + } res = comps[0]; for (i = 1; i < dimx; ++i) @@ -15868,7 +20199,7 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 return true; } -@@ -4393,7 +4038,7 @@ static bool lower_int_dot(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, stru +@@ -4393,125 +4966,45 @@ static bool lower_int_dot(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, stru static bool lower_float_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) { @@ -15876,11 +20207,22 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 + struct hlsl_ir_node *arg1, *arg2, *mul1, *neg1, *ge, *neg2, *div, *mul2, *frc, *cond, *one; struct hlsl_type *type = instr->data_type, *btype; struct hlsl_constant_value one_value; - struct hlsl_ir_expr *expr; -@@ -4412,25 +4057,14 @@ static bool lower_float_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr - return false; - btype = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_BOOL, type->e.numeric.dimx, type->e.numeric.dimy); - +- struct hlsl_ir_expr *expr; +- unsigned int i; +- +- if (instr->type != HLSL_IR_EXPR) +- return false; +- expr = hlsl_ir_expr(instr); +- arg1 = expr->operands[0].node; +- arg2 = expr->operands[1].node; +- if (expr->op != HLSL_OP2_MOD) +- return false; +- if (type->class != HLSL_CLASS_SCALAR && type->class != HLSL_CLASS_VECTOR) +- return false; +- if (type->e.numeric.type != HLSL_TYPE_FLOAT) +- return false; +- btype = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_BOOL, type->e.numeric.dimx, type->e.numeric.dimy); +- - if (!(mul1 = hlsl_new_binary_expr(ctx, HLSL_OP2_MUL, arg2, arg1))) - return false; - hlsl_block_add_instr(block, mul1); @@ -15888,30 +20230,25 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 - if (!(neg1 = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, mul1, &instr->loc))) - return false; - hlsl_block_add_instr(block, neg1); -+ mul1 = hlsl_block_add_binary_expr(ctx, block, HLSL_OP2_MUL, arg2, arg1); -+ neg1 = hlsl_block_add_unary_expr(ctx, block, HLSL_OP1_NEG, mul1, &instr->loc); - +- - if (!(ge = hlsl_new_binary_expr(ctx, HLSL_OP2_GEQUAL, mul1, neg1))) - return false; -+ ge = hlsl_block_add_binary_expr(ctx, block, HLSL_OP2_GEQUAL, mul1, neg1); - ge->data_type = btype; +- ge->data_type = btype; - hlsl_block_add_instr(block, ge); - +- - if (!(neg2 = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, arg2, &instr->loc))) - return false; - hlsl_block_add_instr(block, neg2); - - if (!(cond = hlsl_add_conditional(ctx, block, ge, arg2, neg2))) - return false; -+ neg2 = hlsl_block_add_unary_expr(ctx, block, HLSL_OP1_NEG, arg2, &instr->loc); -+ cond = hlsl_add_conditional(ctx, block, ge, arg2, neg2); - - for (i = 0; i < type->e.numeric.dimx; ++i) - one_value.u[i].f = 1.0f; -@@ -4438,82 +4072,13 @@ static bool lower_float_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr - return false; - hlsl_block_add_instr(block, one); - +- +- for (i = 0; i < type->e.numeric.dimx; ++i) +- one_value.u[i].f = 1.0f; +- if (!(one = hlsl_new_constant(ctx, type, &one_value, &instr->loc))) +- return false; +- hlsl_block_add_instr(block, one); +- - if (!(div = hlsl_new_binary_expr(ctx, HLSL_OP2_DIV, one, cond))) - return false; - hlsl_block_add_instr(block, div); @@ -15928,13 +20265,9 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 - return false; - hlsl_block_add_instr(block, mul3); - -+ div = hlsl_block_add_binary_expr(ctx, block, HLSL_OP2_DIV, one, cond); -+ mul2 = hlsl_block_add_binary_expr(ctx, block, HLSL_OP2_MUL, div, arg1); -+ frc = hlsl_block_add_unary_expr(ctx, block, HLSL_OP1_FRACT, mul2, &instr->loc); -+ hlsl_block_add_binary_expr(ctx, block, HLSL_OP2_MUL, frc, cond); - return true; - } - +- return true; +-} +- -static bool lower_nonfloat_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) -{ - struct hlsl_ir_expr *expr; @@ -15961,41 +20294,69 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 - struct hlsl_ir_node *arg, *arg_cast, *float_expr, *ret; - struct hlsl_type *float_type; - unsigned int i; -- ++ struct hlsl_ir_expr *expr; ++ unsigned int i; + - for (i = 0; i < HLSL_MAX_OPERANDS; ++i) - { - arg = expr->operands[i].node; - if (!arg) - continue; -- ++ if (instr->type != HLSL_IR_EXPR) ++ return false; ++ expr = hlsl_ir_expr(instr); ++ arg1 = expr->operands[0].node; ++ arg2 = expr->operands[1].node; ++ if (expr->op != HLSL_OP2_MOD) ++ return false; ++ if (type->class != HLSL_CLASS_SCALAR && type->class != HLSL_CLASS_VECTOR) ++ return false; ++ if (type->e.numeric.type != HLSL_TYPE_FLOAT) ++ return false; ++ btype = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_BOOL, type->e.numeric.dimx, type->e.numeric.dimy); + - float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, arg->data_type->e.numeric.dimx); - if (!(arg_cast = hlsl_new_cast(ctx, arg, float_type, &instr->loc))) - return false; - hlsl_block_add_instr(block, arg_cast); -- ++ mul1 = hlsl_block_add_binary_expr(ctx, block, HLSL_OP2_MUL, arg2, arg1); ++ neg1 = hlsl_block_add_unary_expr(ctx, block, HLSL_OP1_NEG, mul1, &instr->loc); + - operands[i] = arg_cast; - } -- ++ ge = hlsl_block_add_binary_expr(ctx, block, HLSL_OP2_GEQUAL, mul1, neg1); ++ ge->data_type = btype; + - float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, instr->data_type->e.numeric.dimx); - if (!(float_expr = hlsl_new_expr(ctx, expr->op, operands, float_type, &instr->loc))) - return false; - hlsl_block_add_instr(block, float_expr); -- ++ neg2 = hlsl_block_add_unary_expr(ctx, block, HLSL_OP1_NEG, arg2, &instr->loc); ++ cond = hlsl_add_conditional(ctx, block, ge, arg2, neg2); + - if (!(ret = hlsl_new_cast(ctx, float_expr, instr->data_type, &instr->loc))) - return false; - hlsl_block_add_instr(block, ret); -- ++ for (i = 0; i < type->e.numeric.dimx; ++i) ++ one_value.u[i].f = 1.0f; ++ if (!(one = hlsl_new_constant(ctx, type, &one_value, &instr->loc))) ++ return false; ++ hlsl_block_add_instr(block, one); + - return true; - } - default: - return false; - } --} -- ++ div = hlsl_block_add_binary_expr(ctx, block, HLSL_OP2_DIV, one, cond); ++ mul2 = hlsl_block_add_binary_expr(ctx, block, HLSL_OP2_MUL, div, arg1); ++ frc = hlsl_block_add_unary_expr(ctx, block, HLSL_OP1_FRACT, mul2, &instr->loc); ++ hlsl_block_add_binary_expr(ctx, block, HLSL_OP2_MUL, frc, cond); ++ return true; + } + static bool lower_discard_neg(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) - { - struct hlsl_ir_node *zero, *bool_false, *or, *cmp, *load; -@@ -4541,9 +4106,7 @@ static bool lower_discard_neg(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, +@@ -4541,9 +5034,7 @@ static bool lower_discard_neg(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, operands[1] = zero; cmp_type = hlsl_get_numeric_type(ctx, arg_type->class, HLSL_TYPE_BOOL, arg_type->e.numeric.dimx, arg_type->e.numeric.dimy); @@ -16006,7 +20367,7 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 if (!(bool_false = hlsl_new_constant(ctx, hlsl_get_scalar_type(ctx, HLSL_TYPE_BOOL), &zero_value, &instr->loc))) return false; -@@ -4554,12 +4117,8 @@ static bool lower_discard_neg(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, +@@ -4554,12 +5045,8 @@ static bool lower_discard_neg(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, count = hlsl_type_component_count(cmp_type); for (i = 0; i < count; ++i) { @@ -16021,7 +20382,7 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 } list_move_tail(&instr->entry, &block.instrs); -@@ -4588,17 +4147,9 @@ static bool lower_discard_nz(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, v +@@ -4588,17 +5075,9 @@ static bool lower_discard_nz(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, v hlsl_block_init(&block); @@ -16042,7 +20403,7 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 list_move_tail(&instr->entry, &block.instrs); hlsl_src_remove(&jump->condition); -@@ -4634,6 +4185,9 @@ static bool dce(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +@@ -4634,6 +5113,9 @@ static bool dce(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) struct hlsl_ir_store *store = hlsl_ir_store(instr); struct hlsl_ir_var *var = store->lhs.var; @@ -16052,7 +20413,7 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 if (var->last_read < instr->index) { list_remove(&instr->entry); -@@ -4938,20 +4492,15 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop +@@ -4938,20 +5420,15 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop } } @@ -16077,7 +20438,7 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 LIST_FOR_EACH_ENTRY(scope, &ctx->scopes, struct hlsl_scope, entry) { -@@ -4959,12 +4508,6 @@ static void compute_liveness(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl +@@ -4959,12 +5436,6 @@ static void compute_liveness(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl var->first_write = var->last_read = 0; } @@ -16090,7 +20451,7 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 compute_liveness_recurse(&entry_func->body, 0, 0); } -@@ -5001,7 +4544,7 @@ struct register_allocator +@@ -5001,7 +5472,7 @@ struct register_allocator /* Indexable temps are allocated separately and always keep their index regardless of their * lifetime. */ @@ -16099,15 +20460,148 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 /* Total number of registers allocated so far. Used to declare sm4 temp count. */ uint32_t reg_count; -@@ -5553,6 +5096,7 @@ static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, - f = value->i; +@@ -5269,8 +5740,7 @@ static void register_deref_usage(struct hlsl_ctx *ctx, struct hlsl_deref *deref) + struct hlsl_type *type; + unsigned int index; + +- if (!hlsl_regset_index_from_deref(ctx, deref, regset, &index)) +- return; ++ hlsl_regset_index_from_deref(ctx, deref, regset, &index); + + if (regset <= HLSL_REGSET_LAST_OBJECT) + { +@@ -5281,7 +5751,6 @@ static void register_deref_usage(struct hlsl_ctx *ctx, struct hlsl_deref *deref) + { + type = hlsl_deref_get_type(ctx, deref); + +- hlsl_regset_index_from_deref(ctx, deref, regset, &index); + required_bind_count = align(index + type->reg_size[regset], 4) / 4; + var->bind_count[regset] = max(var->bind_count[regset], required_bind_count); + } +@@ -5481,6 +5950,33 @@ static void allocate_temp_registers_recurse(struct hlsl_ctx *ctx, + } + } + ++static bool find_constant(struct hlsl_ctx *ctx, const float *f, unsigned int count, struct hlsl_reg *ret) ++{ ++ struct hlsl_constant_defs *defs = &ctx->constant_defs; ++ ++ for (size_t i = 0; i < defs->count; ++i) ++ { ++ const struct hlsl_constant_register *reg = &defs->regs[i]; ++ ++ for (size_t j = 0; j <= 4 - count; ++j) ++ { ++ unsigned int writemask = ((1u << count) - 1) << j; ++ ++ if ((reg->allocated_mask & writemask) == writemask ++ && !memcmp(f, ®->value.f[j], count * sizeof(float))) ++ { ++ ret->id = reg->index; ++ ret->allocation_size = 1; ++ ret->writemask = writemask; ++ ret->allocated = true; ++ return true; ++ } ++ } ++ } ++ ++ return false; ++} ++ + static void record_constant(struct hlsl_ctx *ctx, unsigned int component_index, float f, + const struct vkd3d_shader_location *loc) + { +@@ -5494,6 +5990,7 @@ static void record_constant(struct hlsl_ctx *ctx, unsigned int component_index, + if (reg->index == (component_index / 4)) + { + reg->value.f[component_index % 4] = f; ++ reg->allocated_mask |= (1u << (component_index % 4)); + return; + } + } +@@ -5504,6 +6001,7 @@ static void record_constant(struct hlsl_ctx *ctx, unsigned int component_index, + memset(reg, 0, sizeof(*reg)); + reg->index = component_index / 4; + reg->value.f[component_index % 4] = f; ++ reg->allocated_mask = (1u << (component_index % 4)); + reg->loc = *loc; + } + +@@ -5520,49 +6018,57 @@ static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, + { + struct hlsl_ir_constant *constant = hlsl_ir_constant(instr); + const struct hlsl_type *type = instr->data_type; +- unsigned int x, i; +- +- constant->reg = allocate_numeric_registers_for_type(ctx, allocator, 1, UINT_MAX, type); +- TRACE("Allocated constant @%u to %s.\n", instr->index, debug_register('c', constant->reg, type)); ++ float f[4] = {0}; + + VKD3D_ASSERT(hlsl_is_numeric_type(type)); + VKD3D_ASSERT(type->e.numeric.dimy == 1); +- VKD3D_ASSERT(constant->reg.writemask); + +- for (x = 0, i = 0; x < 4; ++x) ++ for (unsigned int i = 0; i < type->e.numeric.dimx; ++i) + { + const union hlsl_constant_value_component *value; +- float f = 0; + +- if (!(constant->reg.writemask & (1u << x))) +- continue; +- value = &constant->value.u[i++]; ++ value = &constant->value.u[i]; + + switch (type->e.numeric.type) + { + case HLSL_TYPE_BOOL: +- f = !!value->u; ++ f[i] = !!value->u; + break; + + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_HALF: +- f = value->f; ++ f[i] = value->f; + break; + + case HLSL_TYPE_INT: +- f = value->i; ++ f[i] = value->i; break; + case HLSL_TYPE_MIN16UINT: case HLSL_TYPE_UINT: - f = value->u; +- f = value->u; ++ f[i] = value->u; break; -@@ -5765,15 +5309,12 @@ static uint32_t allocate_temp_registers(struct hlsl_ctx *ctx, struct hlsl_ir_fun + + case HLSL_TYPE_DOUBLE: + FIXME("Double constant.\n"); + return; + } ++ } ++ ++ if (find_constant(ctx, f, type->e.numeric.dimx, &constant->reg)) ++ { ++ TRACE("Reusing already allocated constant %s for @%u.\n", ++ debug_register('c', constant->reg, type), instr->index); ++ break; ++ } ++ ++ constant->reg = allocate_numeric_registers_for_type(ctx, allocator, 1, UINT_MAX, type); ++ TRACE("Allocated constant @%u to %s.\n", instr->index, debug_register('c', constant->reg, type)); + +- record_constant(ctx, constant->reg.id * 4 + x, f, &constant->node.loc); ++ for (unsigned int x = 0, i = 0; x < 4; ++x) ++ { ++ if ((constant->reg.writemask & (1u << x))) ++ record_constant(ctx, constant->reg.id * 4 + x, f[i++], &constant->node.loc); + } + + break; +@@ -5765,15 +6271,12 @@ static uint32_t allocate_temp_registers(struct hlsl_ctx *ctx, struct hlsl_ir_fun /* ps_1_* outputs are special and go in temp register 0. */ if (ctx->profile->major_version == 1 && ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL) { @@ -16125,7 +20619,7 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 break; } } -@@ -5782,6 +5323,13 @@ static uint32_t allocate_temp_registers(struct hlsl_ctx *ctx, struct hlsl_ir_fun +@@ -5782,6 +6285,13 @@ static uint32_t allocate_temp_registers(struct hlsl_ctx *ctx, struct hlsl_ir_fun allocate_temp_registers_recurse(ctx, &entry_func->body, &allocator); vkd3d_free(allocator.allocations); @@ -16139,11 +20633,11 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 return allocator.reg_count; } -@@ -5803,6 +5351,11 @@ static enum vkd3d_shader_interpolation_mode sm4_get_interpolation_mode(struct hl +@@ -5803,6 +6313,11 @@ static enum vkd3d_shader_interpolation_mode sm4_get_interpolation_mode(struct hl {HLSL_STORAGE_CENTROID | HLSL_STORAGE_LINEAR, VKD3DSIM_LINEAR_CENTROID}, }; -+ if (hlsl_type_is_patch_array(type)) ++ if (hlsl_type_is_primitive_array(type)) + type = type->e.array.type; + + VKD3D_ASSERT(hlsl_is_numeric_type(type)); @@ -16151,32 +20645,560 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 if ((storage_modifiers & HLSL_STORAGE_NOINTERPOLATION) || base_type_get_semantic_equivalent(type->e.numeric.type) == HLSL_TYPE_UINT) return VKD3DSIM_CONSTANT; -@@ -7016,6 +6569,24 @@ void hlsl_lower_index_loads(struct hlsl_ctx *ctx, struct hlsl_block *body) - lower_ir(ctx, lower_index_loads, body); +@@ -5829,7 +6344,7 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var + [VKD3D_SHADER_TYPE_COMPUTE] = "Compute", + }; + +- bool is_patch = hlsl_type_is_patch_array(var->data_type); ++ bool is_primitive = hlsl_type_is_primitive_array(var->data_type); + enum vkd3d_shader_register_type type; + struct vkd3d_shader_version version; + bool special_interpolation = false; +@@ -5870,7 +6385,7 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var + bool has_idx; + + if (!sm4_sysval_semantic_from_semantic_name(&semantic, &version, ctx->semantic_compat_mapping, ctx->domain, +- var->semantic.name, var->semantic.index, output, ctx->is_patch_constant_func, is_patch)) ++ var->semantic.name, var->semantic.index, output, ctx->is_patch_constant_func, is_primitive)) + { + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC, + "Invalid semantic '%s'.", var->semantic.name); +@@ -5903,7 +6418,7 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var + } + else + { +- unsigned int component_count = is_patch ++ unsigned int component_count = is_primitive + ? var->data_type->e.array.type->e.numeric.dimx : var->data_type->e.numeric.dimx; + int mode = (ctx->profile->major_version < 4) + ? 0 : sm4_get_interpolation_mode(var->data_type, var->storage_modifiers); +@@ -5922,7 +6437,7 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var + + static void allocate_semantic_registers(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) + { +- struct register_allocator in_patch_allocator = {0}, patch_constant_out_patch_allocator = {0}; ++ struct register_allocator in_prim_allocator = {0}, patch_constant_out_patch_allocator = {0}; + struct register_allocator input_allocator = {0}, output_allocator = {0}; + bool is_vertex_shader = ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX; + bool is_pixel_shader = ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL; +@@ -5935,7 +6450,7 @@ static void allocate_semantic_registers(struct hlsl_ctx *ctx, struct hlsl_ir_fun + { + if (var->is_input_semantic) + { +- if (hlsl_type_is_patch_array(var->data_type)) ++ if (hlsl_type_is_primitive_array(var->data_type)) + { + bool is_patch_constant_output_patch = ctx->is_patch_constant_func && + var->data_type->e.array.array_type == HLSL_ARRAY_PATCH_OUTPUT; +@@ -5944,7 +6459,7 @@ static void allocate_semantic_registers(struct hlsl_ctx *ctx, struct hlsl_ir_fun + allocate_semantic_register(ctx, var, &patch_constant_out_patch_allocator, false, + !is_vertex_shader); + else +- allocate_semantic_register(ctx, var, &in_patch_allocator, false, ++ allocate_semantic_register(ctx, var, &in_prim_allocator, false, + !is_vertex_shader); + } + else +@@ -6360,7 +6875,7 @@ bool hlsl_component_index_range_from_deref(struct hlsl_ctx *ctx, const struct hl + unsigned int *start, unsigned int *count) + { + struct hlsl_type *type = deref->var->data_type; +- unsigned int i, k; ++ unsigned int i; + + *start = 0; + *count = 0; +@@ -6368,49 +6883,18 @@ bool hlsl_component_index_range_from_deref(struct hlsl_ctx *ctx, const struct hl + for (i = 0; i < deref->path_len; ++i) + { + struct hlsl_ir_node *path_node = deref->path[i].node; +- unsigned int idx = 0; ++ unsigned int index; + + VKD3D_ASSERT(path_node); + if (path_node->type != HLSL_IR_CONSTANT) + return false; + + /* We should always have generated a cast to UINT. */ +- VKD3D_ASSERT(path_node->data_type->class == HLSL_CLASS_SCALAR +- && path_node->data_type->e.numeric.type == HLSL_TYPE_UINT); +- +- idx = hlsl_ir_constant(path_node)->value.u[0].u; ++ VKD3D_ASSERT(hlsl_is_vec1(path_node->data_type) && path_node->data_type->e.numeric.type == HLSL_TYPE_UINT); + +- switch (type->class) +- { +- case HLSL_CLASS_VECTOR: +- if (idx >= type->e.numeric.dimx) +- return false; +- *start += idx; +- break; +- +- case HLSL_CLASS_MATRIX: +- if (idx >= hlsl_type_major_size(type)) +- return false; +- if (hlsl_type_is_row_major(type)) +- *start += idx * type->e.numeric.dimx; +- else +- *start += idx * type->e.numeric.dimy; +- break; +- +- case HLSL_CLASS_ARRAY: +- if (idx >= type->e.array.elements_count) +- return false; +- *start += idx * hlsl_type_component_count(type->e.array.type); +- break; +- +- case HLSL_CLASS_STRUCT: +- for (k = 0; k < idx; ++k) +- *start += hlsl_type_component_count(type->e.record.fields[k].type); +- break; +- +- default: +- vkd3d_unreachable(); +- } ++ if (!component_index_from_deref_path_node(path_node, type, &index)) ++ return false; ++ *start += index; + + type = hlsl_get_element_type_from_path_index(ctx, type, path_node); + } +@@ -6439,8 +6923,7 @@ bool hlsl_regset_index_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref + if (path_node->type == HLSL_IR_CONSTANT) + { + /* We should always have generated a cast to UINT. */ +- VKD3D_ASSERT(path_node->data_type->class == HLSL_CLASS_SCALAR +- && path_node->data_type->e.numeric.type == HLSL_TYPE_UINT); ++ VKD3D_ASSERT(hlsl_is_vec1(path_node->data_type) && path_node->data_type->e.numeric.type == HLSL_TYPE_UINT); + + idx = hlsl_ir_constant(path_node)->value.u[0].u; + +@@ -6502,14 +6985,13 @@ bool hlsl_offset_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref + + *offset = deref->const_offset; + +- if (hlsl_type_is_patch_array(deref->var->data_type)) ++ if (hlsl_type_is_primitive_array(deref->var->data_type)) + return false; + + if (offset_node) + { + /* We should always have generated a cast to UINT. */ +- VKD3D_ASSERT(offset_node->data_type->class == HLSL_CLASS_SCALAR +- && offset_node->data_type->e.numeric.type == HLSL_TYPE_UINT); ++ VKD3D_ASSERT(hlsl_is_vec1(offset_node->data_type) && offset_node->data_type->e.numeric.type == HLSL_TYPE_UINT); + VKD3D_ASSERT(offset_node->type != HLSL_IR_CONSTANT); + return false; + } +@@ -6544,11 +7026,14 @@ struct hlsl_reg hlsl_reg_from_deref(struct hlsl_ctx *ctx, const struct hlsl_dere + { + const struct hlsl_ir_var *var = deref->var; + struct hlsl_reg ret = var->regs[HLSL_REGSET_NUMERIC]; +- unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref); ++ unsigned int offset = 0; + + VKD3D_ASSERT(deref->data_type); + VKD3D_ASSERT(hlsl_is_numeric_type(deref->data_type)); + ++ if (!hlsl_type_is_primitive_array(deref->var->data_type)) ++ offset = hlsl_offset_from_deref_safe(ctx, deref); ++ + ret.index += offset / 4; + ret.id += offset / 4; + +@@ -6559,6 +7044,36 @@ struct hlsl_reg hlsl_reg_from_deref(struct hlsl_ctx *ctx, const struct hlsl_dere + return ret; } -+static void hlsl_run_folding_passes(struct hlsl_ctx *ctx, struct hlsl_block *body) ++static bool get_integral_argument_value(struct hlsl_ctx *ctx, const struct hlsl_attribute *attr, ++ unsigned int i, enum hlsl_base_type *base_type, int *value) +{ ++ const struct hlsl_ir_node *instr = attr->args[i].node; ++ const struct hlsl_type *type = instr->data_type; ++ ++ if (type->class != HLSL_CLASS_SCALAR ++ || (type->e.numeric.type != HLSL_TYPE_INT && type->e.numeric.type != HLSL_TYPE_UINT)) ++ { ++ struct vkd3d_string_buffer *string; ++ ++ if ((string = hlsl_type_to_string(ctx, type))) ++ hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, ++ "Unexpected type for argument %u of [%s]: expected int or uint, but got %s.", ++ i, attr->name, string->buffer); ++ hlsl_release_string_buffer(ctx, string); ++ return false; ++ } ++ ++ if (instr->type != HLSL_IR_CONSTANT) ++ { ++ hlsl_fixme(ctx, &instr->loc, "Non-constant expression in [%s] initializer.", attr->name); ++ return false; ++ } ++ ++ *base_type = type->e.numeric.type; ++ *value = hlsl_ir_constant(instr)->value.u[0].i; ++ return true; ++} ++ + static const char *get_string_argument_value(struct hlsl_ctx *ctx, const struct hlsl_attribute *attr, unsigned int i) + { + const struct hlsl_ir_node *instr = attr->args[i].node; +@@ -6594,36 +7109,17 @@ static void parse_numthreads_attribute(struct hlsl_ctx *ctx, const struct hlsl_a + + for (i = 0; i < attr->args_count; ++i) + { +- const struct hlsl_ir_node *instr = attr->args[i].node; +- const struct hlsl_type *type = instr->data_type; +- const struct hlsl_ir_constant *constant; +- +- if (type->class != HLSL_CLASS_SCALAR +- || (type->e.numeric.type != HLSL_TYPE_INT && type->e.numeric.type != HLSL_TYPE_UINT)) +- { +- struct vkd3d_string_buffer *string; +- +- if ((string = hlsl_type_to_string(ctx, type))) +- hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, +- "Wrong type for argument %u of [numthreads]: expected int or uint, but got %s.", +- i, string->buffer); +- hlsl_release_string_buffer(ctx, string); +- break; +- } ++ enum hlsl_base_type base_type; ++ int value; + +- if (instr->type != HLSL_IR_CONSTANT) +- { +- hlsl_fixme(ctx, &instr->loc, "Non-constant expression in [numthreads] initializer."); +- break; +- } +- constant = hlsl_ir_constant(instr); ++ if (!get_integral_argument_value(ctx, attr, i, &base_type, &value)) ++ return; + +- if ((type->e.numeric.type == HLSL_TYPE_INT && constant->value.u[0].i <= 0) +- || (type->e.numeric.type == HLSL_TYPE_UINT && !constant->value.u[0].u)) +- hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_THREAD_COUNT, ++ if ((base_type == HLSL_TYPE_INT && value <= 0) || (base_type == HLSL_TYPE_UINT && !value)) ++ hlsl_error(ctx, &attr->args[i].node->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_THREAD_COUNT, + "Thread count must be a positive integer."); + +- ctx->thread_count[i] = constant->value.u[0].u; ++ ctx->thread_count[i] = value; + } + } + +@@ -6655,9 +7151,8 @@ static void parse_domain_attribute(struct hlsl_ctx *ctx, const struct hlsl_attri + + static void parse_outputcontrolpoints_attribute(struct hlsl_ctx *ctx, const struct hlsl_attribute *attr) + { +- const struct hlsl_ir_node *instr; +- const struct hlsl_type *type; +- const struct hlsl_ir_constant *constant; ++ enum hlsl_base_type base_type; ++ int value; + + if (attr->args_count != 1) + { +@@ -6666,35 +7161,14 @@ static void parse_outputcontrolpoints_attribute(struct hlsl_ctx *ctx, const stru + return; + } + +- instr = attr->args[0].node; +- type = instr->data_type; +- +- if (type->class != HLSL_CLASS_SCALAR +- || (type->e.numeric.type != HLSL_TYPE_INT && type->e.numeric.type != HLSL_TYPE_UINT)) +- { +- struct vkd3d_string_buffer *string; +- +- if ((string = hlsl_type_to_string(ctx, type))) +- hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, +- "Wrong type for argument 0 of [outputcontrolpoints]: expected int or uint, but got %s.", +- string->buffer); +- hlsl_release_string_buffer(ctx, string); +- return; +- } +- +- if (instr->type != HLSL_IR_CONSTANT) +- { +- hlsl_fixme(ctx, &instr->loc, "Non-constant expression in [outputcontrolpoints] initializer."); ++ if (!get_integral_argument_value(ctx, attr, 0, &base_type, &value)) + return; +- } +- constant = hlsl_ir_constant(instr); + +- if ((type->e.numeric.type == HLSL_TYPE_INT && constant->value.u[0].i < 0) +- || constant->value.u[0].u > 32) +- hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_CONTROL_POINT_COUNT, ++ if (value < 0 || value > 32) ++ hlsl_error(ctx, &attr->args[0].node->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_CONTROL_POINT_COUNT, + "Output control point count must be between 0 and 32."); + +- ctx->output_control_point_count = constant->value.u[0].u; ++ ctx->output_control_point_count = value; + } + + static void parse_outputtopology_attribute(struct hlsl_ctx *ctx, const struct hlsl_attribute *attr) +@@ -6788,6 +7262,28 @@ static void parse_patchconstantfunc_attribute(struct hlsl_ctx *ctx, const struct + "Patch constant function \"%s\" is not defined.", name); + } + ++static void parse_maxvertexcount_attribute(struct hlsl_ctx *ctx, const struct hlsl_attribute *attr) ++{ ++ enum hlsl_base_type base_type; ++ int value; ++ ++ if (attr->args_count != 1) ++ { ++ hlsl_error(ctx, &attr->loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, ++ "Expected 1 parameter for [maxvertexcount] attribute, but got %u.", attr->args_count); ++ return; ++ } ++ ++ if (!get_integral_argument_value(ctx, attr, 0, &base_type, &value)) ++ return; ++ ++ if (value < 1 || value > 1024) ++ hlsl_error(ctx, &attr->args[0].node->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MAX_VERTEX_COUNT, ++ "Max vertex count must be between 1 and 1024."); ++ ++ ctx->max_vertex_count = value; ++} ++ + static void parse_entry_function_attributes(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) + { + const struct hlsl_profile_info *profile = ctx->profile; +@@ -6812,6 +7308,8 @@ static void parse_entry_function_attributes(struct hlsl_ctx *ctx, struct hlsl_ir + parse_patchconstantfunc_attribute(ctx, attr); + else if (!strcmp(attr->name, "earlydepthstencil") && profile->type == VKD3D_SHADER_TYPE_PIXEL) + entry_func->early_depth_test = true; ++ else if (!strcmp(attr->name, "maxvertexcount") && profile->type == VKD3D_SHADER_TYPE_GEOMETRY) ++ parse_maxvertexcount_attribute(ctx, attr); + else + hlsl_warning(ctx, &entry_func->attrs[i]->loc, VKD3D_SHADER_WARNING_HLSL_UNKNOWN_ATTRIBUTE, + "Ignoring unknown attribute \"%s\".", entry_func->attrs[i]->name); +@@ -6884,7 +7382,71 @@ static void validate_hull_shader_attributes(struct hlsl_ctx *ctx, const struct h + } + } + +-static void validate_and_record_patch_type(struct hlsl_ctx *ctx, struct hlsl_ir_var *var) ++static enum vkd3d_primitive_type get_primitive_type(struct hlsl_ctx *ctx, struct hlsl_ir_var *var) ++{ ++ uint32_t prim_modifier = var->data_type->modifiers & HLSL_PRIMITIVE_MODIFIERS_MASK; ++ enum vkd3d_primitive_type prim_type = VKD3D_PT_UNDEFINED; ++ ++ if (prim_modifier) ++ { ++ unsigned int count = var->data_type->e.array.elements_count; ++ unsigned int expected_count; ++ ++ VKD3D_ASSERT(!(prim_modifier & (prim_modifier - 1))); ++ ++ switch (prim_modifier) ++ { ++ case HLSL_PRIMITIVE_POINT: ++ prim_type = VKD3D_PT_POINTLIST; ++ expected_count = 1; ++ break; ++ ++ case HLSL_PRIMITIVE_LINE: ++ prim_type = VKD3D_PT_LINELIST; ++ expected_count = 2; ++ break; ++ ++ case HLSL_PRIMITIVE_TRIANGLE: ++ prim_type = VKD3D_PT_TRIANGLELIST; ++ expected_count = 3; ++ break; ++ ++ case HLSL_PRIMITIVE_LINEADJ: ++ prim_type = VKD3D_PT_LINELIST_ADJ; ++ expected_count = 4; ++ break; ++ ++ case HLSL_PRIMITIVE_TRIANGLEADJ: ++ prim_type = VKD3D_PT_TRIANGLELIST_ADJ; ++ expected_count = 6; ++ break; ++ ++ default: ++ vkd3d_unreachable(); ++ } ++ ++ if (count != expected_count) ++ { ++ struct vkd3d_string_buffer *string; ++ ++ if ((string = hlsl_modifiers_to_string(ctx, prim_modifier))) ++ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_CONTROL_POINT_COUNT, ++ "Control point count %u does not match the expect count %u for the %s input primitive type.", ++ count, expected_count, string->buffer); ++ hlsl_release_string_buffer(ctx, string); ++ } ++ } ++ ++ /* Patch types take precedence over primitive modifiers. */ ++ if (hlsl_type_is_patch_array(var->data_type)) ++ prim_type = VKD3D_PT_PATCH; ++ ++ VKD3D_ASSERT(prim_type != VKD3D_PT_UNDEFINED); ++ return prim_type; ++} ++ ++ ++static void validate_and_record_prim_type(struct hlsl_ctx *ctx, struct hlsl_ir_var *var) + { + unsigned int control_point_count = var->data_type->e.array.elements_count; + enum hlsl_array_type array_type = var->data_type->e.array.array_type; +@@ -6902,7 +7464,7 @@ static void validate_and_record_patch_type(struct hlsl_ctx *ctx, struct hlsl_ir_ + return; + } + } +- else ++ else if (array_type == HLSL_ARRAY_PATCH_OUTPUT) + { + if (!ctx->is_patch_constant_func && profile->type != VKD3D_SHADER_TYPE_DOMAIN) + { +@@ -6913,6 +7475,30 @@ static void validate_and_record_patch_type(struct hlsl_ctx *ctx, struct hlsl_ir_ + } + } + ++ if ((var->data_type->modifiers & HLSL_PRIMITIVE_MODIFIERS_MASK) && profile->type != VKD3D_SHADER_TYPE_GEOMETRY) ++ { ++ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INCOMPATIBLE_PROFILE, ++ "Input primitive parameters can only be used in geometry shaders."); ++ return; ++ } ++ ++ if (profile->type == VKD3D_SHADER_TYPE_GEOMETRY) ++ { ++ enum vkd3d_primitive_type prim_type = get_primitive_type(ctx, var); ++ ++ if (ctx->input_primitive_type == VKD3D_PT_UNDEFINED) ++ { ++ ctx->input_primitive_type = prim_type; ++ } ++ else if (ctx->input_primitive_type != prim_type) ++ { ++ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, ++ "Input primitive type does not match the previously declared type."); ++ hlsl_note(ctx, &ctx->input_primitive_param->loc, VKD3D_SHADER_LOG_ERROR, ++ "The input primitive was previously declared here."); ++ } ++ } ++ + if (control_point_count > 32) + { + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_CONTROL_POINT_COUNT, +@@ -6925,7 +7511,7 @@ static void validate_and_record_patch_type(struct hlsl_ctx *ctx, struct hlsl_ir_ + { + if (control_point_count != ctx->output_control_point_count) + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_CONTROL_POINT_COUNT, +- "Output control point count %u does not match the count %u specified in the control point function.", ++ "Output control point count %u does not match the count %u declared in the control point function.", + control_point_count, ctx->output_control_point_count); + + if (!hlsl_types_are_equal(control_point_type, ctx->output_control_point_type)) +@@ -6937,22 +7523,32 @@ static void validate_and_record_patch_type(struct hlsl_ctx *ctx, struct hlsl_ir_ + + if (ctx->input_control_point_count != UINT_MAX) + { +- VKD3D_ASSERT(ctx->is_patch_constant_func); ++ VKD3D_ASSERT(profile->type == VKD3D_SHADER_TYPE_GEOMETRY || ctx->is_patch_constant_func); + + if (control_point_count != ctx->input_control_point_count) ++ { + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_CONTROL_POINT_COUNT, +- "Input control point count %u does not match the count %u specified in the control point function.", ++ "Input control point count %u does not match the count %u declared previously.", + control_point_count, ctx->input_control_point_count); ++ hlsl_note(ctx, &ctx->input_primitive_param->loc, VKD3D_SHADER_LOG_ERROR, ++ "The input primitive was previously declared here."); ++ } + +- if (!hlsl_types_are_equal(control_point_type, ctx->input_control_point_type)) ++ if (profile->type != VKD3D_SHADER_TYPE_GEOMETRY ++ && !hlsl_types_are_equal(control_point_type, ctx->input_control_point_type)) ++ { + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, +- "Input control point type does not match the input type specified in the control point function."); ++ "Input control point type does not match the input type declared previously."); ++ hlsl_note(ctx, &ctx->input_primitive_param->loc, VKD3D_SHADER_LOG_ERROR, ++ "The input primitive was previously declared here."); ++ } + + return; + } + + ctx->input_control_point_count = control_point_count; + ctx->input_control_point_type = control_point_type; ++ ctx->input_primitive_param = var; + } + + static void remove_unreachable_code(struct hlsl_ctx *ctx, struct hlsl_block *body) +@@ -7007,13 +7603,47 @@ static void remove_unreachable_code(struct hlsl_ctx *ctx, struct hlsl_block *bod + list_move_slice_tail(&block.instrs, start, list_tail(&body->instrs)); + hlsl_block_cleanup(&block); + +- break; +- } ++ break; ++ } ++} ++ ++void hlsl_lower_index_loads(struct hlsl_ctx *ctx, struct hlsl_block *body) ++{ ++ lower_ir(ctx, lower_index_loads, body); ++} ++ ++ ++static bool simplify_exprs(struct hlsl_ctx *ctx, struct hlsl_block *block) ++{ ++ bool progress, any_progress = false; ++ ++ do ++ { ++ progress = hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, block, NULL); ++ progress |= hlsl_transform_ir(ctx, hlsl_normalize_binary_exprs, block, NULL); ++ progress |= hlsl_transform_ir(ctx, hlsl_fold_constant_identities, block, NULL); ++ progress |= hlsl_transform_ir(ctx, hlsl_fold_constant_swizzles, block, NULL); ++ ++ any_progress |= progress; ++ } while (progress); ++ ++ return any_progress; + } + +-void hlsl_lower_index_loads(struct hlsl_ctx *ctx, struct hlsl_block *body) ++static void hlsl_run_folding_passes(struct hlsl_ctx *ctx, struct hlsl_block *body) + { +- lower_ir(ctx, lower_index_loads, body); + bool progress; + + hlsl_transform_ir(ctx, fold_redundant_casts, body, NULL); + do + { -+ progress = hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, body, NULL); -+ progress |= hlsl_transform_ir(ctx, hlsl_fold_constant_identities, body, NULL); -+ progress |= hlsl_transform_ir(ctx, hlsl_normalize_binary_exprs, body, NULL); -+ progress |= hlsl_transform_ir(ctx, hlsl_fold_constant_swizzles, body, NULL); ++ progress = simplify_exprs(ctx, body); + progress |= hlsl_copy_propagation_execute(ctx, body); + progress |= hlsl_transform_ir(ctx, fold_swizzle_chains, body, NULL); + progress |= hlsl_transform_ir(ctx, remove_trivial_swizzles, body, NULL); + progress |= hlsl_transform_ir(ctx, remove_trivial_conditional_branches, body, NULL); + } while (progress); -+} -+ ++ hlsl_transform_ir(ctx, fold_redundant_casts, body, NULL); + } + void hlsl_run_const_passes(struct hlsl_ctx *ctx, struct hlsl_block *body) - { - bool progress; -@@ -7040,19 +6611,8 @@ void hlsl_run_const_passes(struct hlsl_ctx *ctx, struct hlsl_block *body) +@@ -7035,32 +7665,24 @@ void hlsl_run_const_passes(struct hlsl_ctx *ctx, struct hlsl_block *body) + + lower_ir(ctx, lower_narrowing_casts, body); + lower_ir(ctx, lower_int_dot, body); +- lower_ir(ctx, lower_int_division, body); +- lower_ir(ctx, lower_int_modulus, body); ++ if (hlsl_version_ge(ctx, 4, 0)) ++ { ++ lower_ir(ctx, lower_int_modulus_sm4, body); ++ lower_ir(ctx, lower_int_division_sm4, body); ++ } lower_ir(ctx, lower_int_abs, body); lower_ir(ctx, lower_casts_to_bool, body); lower_ir(ctx, lower_float_modulus, body); @@ -16197,7 +21219,25 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 } static void generate_vsir_signature_entry(struct hlsl_ctx *ctx, struct vsir_program *program, -@@ -7109,6 +6669,7 @@ static void generate_vsir_signature_entry(struct hlsl_ctx *ctx, struct vsir_prog + struct shader_signature *signature, bool output, struct hlsl_ir_var *var) + { + enum vkd3d_shader_component_type component_type = VKD3D_SHADER_COMPONENT_VOID; ++ bool is_primitive = hlsl_type_is_primitive_array(var->data_type); + enum vkd3d_shader_sysval_semantic sysval = VKD3D_SHADER_SV_NONE; +- bool is_patch = hlsl_type_is_patch_array(var->data_type); + unsigned int register_index, mask, use_mask; + const char *name = var->semantic.name; + enum vkd3d_shader_register_type type; +@@ -7073,7 +7695,7 @@ static void generate_vsir_signature_entry(struct hlsl_ctx *ctx, struct vsir_prog + bool has_idx, ret; + + ret = sm4_sysval_semantic_from_semantic_name(&sysval, &program->shader_version, ctx->semantic_compat_mapping, +- ctx->domain, var->semantic.name, var->semantic.index, output, ctx->is_patch_constant_func, is_patch); ++ ctx->domain, var->semantic.name, var->semantic.index, output, ctx->is_patch_constant_func, is_primitive); + VKD3D_ASSERT(ret); + if (sysval == ~0u) + return; +@@ -7109,6 +7731,7 @@ static void generate_vsir_signature_entry(struct hlsl_ctx *ctx, struct vsir_prog break; case HLSL_TYPE_BOOL: @@ -16205,7 +21245,7 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 case HLSL_TYPE_UINT: component_type = VKD3D_SHADER_COMPONENT_UINT; break; -@@ -7198,6 +6759,22 @@ static void generate_vsir_signature_entry(struct hlsl_ctx *ctx, struct vsir_prog +@@ -7198,6 +7821,22 @@ static void generate_vsir_signature_entry(struct hlsl_ctx *ctx, struct vsir_prog element->used_mask = use_mask; if (program->shader_version.type == VKD3D_SHADER_TYPE_PIXEL && !output) element->interpolation_mode = VKD3DSIM_LINEAR; @@ -16228,7 +21268,7 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 } static void generate_vsir_signature(struct hlsl_ctx *ctx, -@@ -7265,6 +6842,7 @@ static enum vkd3d_data_type vsir_data_type_from_hlsl_type(struct hlsl_ctx *ctx, +@@ -7265,6 +7904,7 @@ static enum vkd3d_data_type vsir_data_type_from_hlsl_type(struct hlsl_ctx *ctx, return VKD3D_DATA_INT; case HLSL_TYPE_UINT: case HLSL_TYPE_BOOL: @@ -16236,16 +21276,158 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 return VKD3D_DATA_UINT; } } -@@ -7929,7 +7507,7 @@ static void sm1_generate_vsir_instr_expr_sincos(struct hlsl_ctx *ctx, struct vsi +@@ -7416,7 +8056,7 @@ static void sm1_generate_vsir_sampler_dcls(struct hlsl_ctx *ctx, + static enum vkd3d_shader_register_type sm4_get_semantic_register_type(enum vkd3d_shader_type shader_type, + bool is_patch_constant_func, const struct hlsl_ir_var *var) + { +- if (hlsl_type_is_patch_array(var->data_type)) ++ if (hlsl_type_is_primitive_array(var->data_type)) + { + VKD3D_ASSERT(var->is_input_semantic); + +@@ -7636,7 +8276,7 @@ static bool sm4_generate_vsir_reg_from_deref(struct hlsl_ctx *ctx, struct vsir_p + } + else + { +- unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref) + var->buffer_offset; ++ unsigned int offset = deref->const_offset + var->buffer_offset; + + VKD3D_ASSERT(data_type->class <= HLSL_CLASS_VECTOR); + reg->type = VKD3DSPR_CONSTBUFFER; +@@ -7654,19 +8294,27 @@ static bool sm4_generate_vsir_reg_from_deref(struct hlsl_ctx *ctx, struct vsir_p + reg->idx[1].offset = offset / 4; + reg->idx_count = 2; + } ++ ++ if (deref->rel_offset.node) ++ { ++ if (!(reg->idx[reg->idx_count - 1].rel_addr = sm4_generate_vsir_new_idx_src(ctx, ++ program, deref->rel_offset.node))) ++ return false; ++ } ++ + *writemask = ((1u << data_type->e.numeric.dimx) - 1) << (offset & 3); + } + } + else if (var->is_input_semantic) + { +- bool is_patch = hlsl_type_is_patch_array(var->data_type); ++ bool is_primitive = hlsl_type_is_primitive_array(var->data_type); + bool has_idx; + + if (sm4_register_from_semantic_name(version, var->semantic.name, false, ®->type, &has_idx)) + { + unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref); + +- VKD3D_ASSERT(!is_patch); ++ VKD3D_ASSERT(!is_primitive); + + if (has_idx) + { +@@ -7688,12 +8336,12 @@ static bool sm4_generate_vsir_reg_from_deref(struct hlsl_ctx *ctx, struct vsir_p + + reg->type = sm4_get_semantic_register_type(version->type, ctx->is_patch_constant_func, var); + reg->dimension = VSIR_DIMENSION_VEC4; +- reg->idx[is_patch ? 1 : 0].offset = hlsl_reg.id; +- reg->idx_count = is_patch ? 2 : 1; ++ reg->idx[is_primitive ? 1 : 0].offset = hlsl_reg.id; ++ reg->idx_count = is_primitive ? 2 : 1; + *writemask = hlsl_reg.writemask; + } + +- if (is_patch) ++ if (is_primitive) + { + reg->idx[0].offset = deref->const_offset / 4; + if (deref->rel_offset.node) +@@ -7750,7 +8398,8 @@ static bool sm4_generate_vsir_init_src_param_from_deref(struct hlsl_ctx *ctx, st + + if (!sm4_generate_vsir_reg_from_deref(ctx, program, &src_param->reg, &writemask, deref)) + return false; +- src_param->swizzle = generate_vsir_get_src_swizzle(writemask, dst_writemask); ++ if (src_param->reg.dimension != VSIR_DIMENSION_NONE) ++ src_param->swizzle = generate_vsir_get_src_swizzle(writemask, dst_writemask); + return true; + } + +@@ -7780,7 +8429,6 @@ static void sm1_generate_vsir_instr_constant(struct hlsl_ctx *ctx, + struct vsir_program *program, struct hlsl_ir_constant *constant) + { + struct hlsl_ir_node *instr = &constant->node; +- struct vkd3d_shader_dst_param *dst_param; + struct vkd3d_shader_src_param *src_param; + struct vkd3d_shader_instruction *ins; + +@@ -7792,13 +8440,11 @@ static void sm1_generate_vsir_instr_constant(struct hlsl_ctx *ctx, + + src_param = &ins->src[0]; + vsir_register_init(&src_param->reg, VKD3DSPR_CONST, VKD3D_DATA_FLOAT, 1); ++ src_param->reg.dimension = VSIR_DIMENSION_VEC4; + src_param->reg.idx[0].offset = constant->reg.id; + src_param->swizzle = generate_vsir_get_src_swizzle(constant->reg.writemask, instr->reg.writemask); + +- dst_param = &ins->dst[0]; +- vsir_register_init(&dst_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); +- dst_param->reg.idx[0].offset = instr->reg.id; +- dst_param->write_mask = instr->reg.writemask; ++ vsir_dst_from_hlsl_node(&ins->dst[0], ctx, instr); + } + + static void sm4_generate_vsir_rasterizer_sample_count(struct hlsl_ctx *ctx, +@@ -7885,11 +8531,13 @@ static void sm1_generate_vsir_instr_expr_per_component_instr_op(struct hlsl_ctx + dst_param = &ins->dst[0]; + vsir_register_init(&dst_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); + dst_param->reg.idx[0].offset = instr->reg.id; ++ dst_param->reg.dimension = VSIR_DIMENSION_VEC4; + dst_param->write_mask = 1u << i; + + src_param = &ins->src[0]; + vsir_register_init(&src_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); + src_param->reg.idx[0].offset = operand->reg.id; ++ src_param->reg.dimension = VSIR_DIMENSION_VEC4; + c = vsir_swizzle_get_component(src_swizzle, i); + src_param->swizzle = vsir_swizzle_from_writemask(1u << c); + } +@@ -7901,7 +8549,6 @@ static void sm1_generate_vsir_instr_expr_sincos(struct hlsl_ctx *ctx, struct vsi + { + struct hlsl_ir_node *operand = expr->operands[0].node; + struct hlsl_ir_node *instr = &expr->node; +- struct vkd3d_shader_dst_param *dst_param; + struct vkd3d_shader_src_param *src_param; + struct vkd3d_shader_instruction *ins; + unsigned int src_count = 0; +@@ -7912,25 +8559,20 @@ static void sm1_generate_vsir_instr_expr_sincos(struct hlsl_ctx *ctx, struct vsi + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_SINCOS, 1, src_count))) + return; + +- dst_param = &ins->dst[0]; +- vsir_register_init(&dst_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); +- dst_param->reg.idx[0].offset = instr->reg.id; +- dst_param->write_mask = instr->reg.writemask; +- +- src_param = &ins->src[0]; +- vsir_register_init(&src_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); +- src_param->reg.idx[0].offset = operand->reg.id; +- src_param->swizzle = generate_vsir_get_src_swizzle(operand->reg.writemask, VKD3DSP_WRITEMASK_ALL); ++ vsir_dst_from_hlsl_node(&ins->dst[0], ctx, instr); ++ vsir_src_from_hlsl_node(&ins->src[0], ctx, operand, VKD3DSP_WRITEMASK_ALL); + + if (ctx->profile->major_version < 3) + { + src_param = &ins->src[1]; + vsir_register_init(&src_param->reg, VKD3DSPR_CONST, VKD3D_DATA_FLOAT, 1); ++ src_param->reg.dimension = VSIR_DIMENSION_VEC4; src_param->reg.idx[0].offset = ctx->d3dsincosconst1.id; src_param->swizzle = VKD3D_SHADER_NO_SWIZZLE; - src_param = &ins->src[1]; + src_param = &ins->src[2]; vsir_register_init(&src_param->reg, VKD3DSPR_CONST, VKD3D_DATA_FLOAT, 1); ++ src_param->reg.dimension = VSIR_DIMENSION_VEC4; src_param->reg.idx[0].offset = ctx->d3dsincosconst2.id; src_param->swizzle = VKD3D_SHADER_NO_SWIZZLE; -@@ -7957,6 +7535,7 @@ static bool sm1_generate_vsir_instr_expr_cast(struct hlsl_ctx *ctx, + } +@@ -7957,6 +8599,7 @@ static bool sm1_generate_vsir_instr_expr_cast(struct hlsl_ctx *ctx, switch (src_type->e.numeric.type) { case HLSL_TYPE_INT: @@ -16253,7 +21435,7 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 case HLSL_TYPE_UINT: case HLSL_TYPE_BOOL: /* Integrals are internally represented as floats, so no change is necessary.*/ -@@ -7978,8 +7557,9 @@ static bool sm1_generate_vsir_instr_expr_cast(struct hlsl_ctx *ctx, +@@ -7978,8 +8621,9 @@ static bool sm1_generate_vsir_instr_expr_cast(struct hlsl_ctx *ctx, break; case HLSL_TYPE_INT: @@ -16264,7 +21446,7 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 { case HLSL_TYPE_HALF: case HLSL_TYPE_FLOAT: -@@ -7989,6 +7569,7 @@ static bool sm1_generate_vsir_instr_expr_cast(struct hlsl_ctx *ctx, +@@ -7989,6 +8633,7 @@ static bool sm1_generate_vsir_instr_expr_cast(struct hlsl_ctx *ctx, break; case HLSL_TYPE_INT: @@ -16272,7 +21454,7 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 case HLSL_TYPE_UINT: case HLSL_TYPE_BOOL: generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, 0, true); -@@ -8004,13 +7585,8 @@ static bool sm1_generate_vsir_instr_expr_cast(struct hlsl_ctx *ctx, +@@ -8004,13 +8649,8 @@ static bool sm1_generate_vsir_instr_expr_cast(struct hlsl_ctx *ctx, switch (src_type->e.numeric.type) { case HLSL_TYPE_FLOAT: @@ -16288,7 +21470,7 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 break; default: -@@ -8033,12 +7609,15 @@ static bool sm1_generate_vsir_instr_expr(struct hlsl_ctx *ctx, struct vsir_progr +@@ -8033,12 +8673,15 @@ static bool sm1_generate_vsir_instr_expr(struct hlsl_ctx *ctx, struct vsir_progr struct hlsl_ir_expr *expr) { struct hlsl_ir_node *instr = &expr->node; @@ -16308,7 +21490,7 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 return false; } -@@ -8053,30 +7632,44 @@ static bool sm1_generate_vsir_instr_expr(struct hlsl_ctx *ctx, struct vsir_progr +@@ -8053,30 +8696,44 @@ static bool sm1_generate_vsir_instr_expr(struct hlsl_ctx *ctx, struct vsir_progr case HLSL_OP1_COS_REDUCED: VKD3D_ASSERT(expr->node.reg.writemask == VKD3DSP_WRITEMASK_0); @@ -16353,7 +21535,7 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 sm1_generate_vsir_instr_expr_per_component_instr_op(ctx, program, expr, VKD3DSIH_RCP); break; -@@ -8085,23 +7678,33 @@ static bool sm1_generate_vsir_instr_expr(struct hlsl_ctx *ctx, struct vsir_progr +@@ -8085,23 +8742,33 @@ static bool sm1_generate_vsir_instr_expr(struct hlsl_ctx *ctx, struct vsir_progr break; case HLSL_OP1_RSQ: @@ -16387,7 +21569,7 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 switch (expr->operands[0].node->data_type->e.numeric.dimx) { case 3: -@@ -8135,35 +7738,49 @@ static bool sm1_generate_vsir_instr_expr(struct hlsl_ctx *ctx, struct vsir_progr +@@ -8135,35 +8802,49 @@ static bool sm1_generate_vsir_instr_expr(struct hlsl_ctx *ctx, struct vsir_progr break; case HLSL_OP2_LOGIC_AND: @@ -16440,7 +21622,230 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 } static void sm1_generate_vsir_init_dst_param_from_deref(struct hlsl_ctx *ctx, -@@ -8650,6 +8267,10 @@ D3DXPARAMETER_TYPE hlsl_sm1_base_type(const struct hlsl_type *type, bool is_comb +@@ -8213,19 +8894,68 @@ static void sm1_generate_vsir_init_dst_param_from_deref(struct hlsl_ctx *ctx, + else + VKD3D_ASSERT(reg.allocated); + +- vsir_register_init(&dst_param->reg, type, VKD3D_DATA_FLOAT, 1); ++ if (type == VKD3DSPR_DEPTHOUT) ++ { ++ vsir_register_init(&dst_param->reg, type, VKD3D_DATA_FLOAT, 0); ++ dst_param->reg.dimension = VSIR_DIMENSION_SCALAR; ++ } ++ else ++ { ++ vsir_register_init(&dst_param->reg, type, VKD3D_DATA_FLOAT, 1); ++ dst_param->reg.idx[0].offset = register_index; ++ dst_param->reg.dimension = VSIR_DIMENSION_VEC4; ++ } + dst_param->write_mask = writemask; +- dst_param->reg.idx[0].offset = register_index; + + if (deref->rel_offset.node) + hlsl_fixme(ctx, loc, "Translate relative addressing on dst register for vsir."); + } + ++static void sm1_generate_vsir_instr_mova(struct hlsl_ctx *ctx, ++ struct vsir_program *program, struct hlsl_ir_node *instr) ++{ ++ enum vkd3d_shader_opcode opcode = hlsl_version_ge(ctx, 2, 0) ? VKD3DSIH_MOVA : VKD3DSIH_MOV; ++ struct vkd3d_shader_dst_param *dst_param; ++ struct vkd3d_shader_instruction *ins; ++ ++ VKD3D_ASSERT(instr->reg.allocated); ++ ++ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, opcode, 1, 1))) ++ return; ++ ++ dst_param = &ins->dst[0]; ++ vsir_register_init(&dst_param->reg, VKD3DSPR_ADDR, VKD3D_DATA_FLOAT, 0); ++ dst_param->write_mask = VKD3DSP_WRITEMASK_0; ++ ++ VKD3D_ASSERT(instr->data_type->class <= HLSL_CLASS_VECTOR); ++ VKD3D_ASSERT(instr->data_type->e.numeric.dimx == 1); ++ vsir_src_from_hlsl_node(&ins->src[0], ctx, instr, VKD3DSP_WRITEMASK_ALL); ++} ++ ++static struct vkd3d_shader_src_param *sm1_generate_vsir_new_address_src(struct hlsl_ctx *ctx, ++ struct vsir_program *program) ++{ ++ struct vkd3d_shader_src_param *idx_src; ++ ++ if (!(idx_src = vsir_program_get_src_params(program, 1))) ++ { ++ ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; ++ return NULL; ++ } ++ ++ memset(idx_src, 0, sizeof(*idx_src)); ++ vsir_register_init(&idx_src->reg, VKD3DSPR_ADDR, VKD3D_DATA_FLOAT, 0); ++ idx_src->reg.dimension = VSIR_DIMENSION_VEC4; ++ idx_src->swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X); ++ return idx_src; ++} ++ + static void sm1_generate_vsir_init_src_param_from_deref(struct hlsl_ctx *ctx, +- struct vkd3d_shader_src_param *src_param, struct hlsl_deref *deref, +- unsigned int dst_writemask, const struct vkd3d_shader_location *loc) ++ struct vsir_program *program, struct vkd3d_shader_src_param *src_param, ++ struct hlsl_deref *deref, uint32_t dst_writemask, const struct vkd3d_shader_location *loc) + { + enum vkd3d_shader_register_type type = VKD3DSPR_TEMP; ++ struct vkd3d_shader_src_param *src_rel_addr = NULL; + struct vkd3d_shader_version version; + uint32_t register_index; + unsigned int writemask; +@@ -8243,12 +8973,26 @@ static void sm1_generate_vsir_init_src_param_from_deref(struct hlsl_ctx *ctx, + } + else if (deref->var->is_uniform) + { ++ unsigned int offset = deref->const_offset; ++ + type = VKD3DSPR_CONST; ++ register_index = deref->var->regs[HLSL_REGSET_NUMERIC].id + offset / 4; + +- reg = hlsl_reg_from_deref(ctx, deref); +- register_index = reg.id; +- writemask = reg.writemask; +- VKD3D_ASSERT(reg.allocated); ++ writemask = 0xf & (0xf << (offset % 4)); ++ if (deref->var->regs[HLSL_REGSET_NUMERIC].writemask) ++ writemask = hlsl_combine_writemasks(deref->var->regs[HLSL_REGSET_NUMERIC].writemask, writemask); ++ ++ if (deref->rel_offset.node) ++ { ++ VKD3D_ASSERT(deref_supports_sm1_indirect_addressing(ctx, deref)); ++ ++ if (!(src_rel_addr = sm1_generate_vsir_new_address_src(ctx, program))) ++ { ++ ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; ++ return; ++ } ++ } ++ VKD3D_ASSERT(deref->var->regs[HLSL_REGSET_NUMERIC].allocated); + } + else if (deref->var->is_input_semantic) + { +@@ -8280,32 +9024,30 @@ static void sm1_generate_vsir_init_src_param_from_deref(struct hlsl_ctx *ctx, + } + + vsir_register_init(&src_param->reg, type, VKD3D_DATA_FLOAT, 1); ++ src_param->reg.dimension = VSIR_DIMENSION_VEC4; + src_param->reg.idx[0].offset = register_index; ++ src_param->reg.idx[0].rel_addr = src_rel_addr; + src_param->swizzle = generate_vsir_get_src_swizzle(writemask, dst_writemask); +- +- if (deref->rel_offset.node) +- hlsl_fixme(ctx, loc, "Translate relative addressing on src register for vsir."); + } + + static void sm1_generate_vsir_instr_load(struct hlsl_ctx *ctx, struct vsir_program *program, + struct hlsl_ir_load *load) + { + struct hlsl_ir_node *instr = &load->node; +- struct vkd3d_shader_dst_param *dst_param; + struct vkd3d_shader_instruction *ins; + + VKD3D_ASSERT(instr->reg.allocated); + ++ if (load->src.rel_offset.node) ++ sm1_generate_vsir_instr_mova(ctx, program, load->src.rel_offset.node); ++ + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_MOV, 1, 1))) + return; + +- dst_param = &ins->dst[0]; +- vsir_register_init(&dst_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); +- dst_param->reg.idx[0].offset = instr->reg.id; +- dst_param->write_mask = instr->reg.writemask; ++ vsir_dst_from_hlsl_node(&ins->dst[0], ctx, instr); + +- sm1_generate_vsir_init_src_param_from_deref(ctx, &ins->src[0], &load->src, dst_param->write_mask, +- &ins->location); ++ sm1_generate_vsir_init_src_param_from_deref(ctx, program, &ins->src[0], ++ &load->src, ins->dst[0].write_mask, &ins->location); + } + + static void sm1_generate_vsir_instr_resource_load(struct hlsl_ctx *ctx, +@@ -8315,7 +9057,6 @@ static void sm1_generate_vsir_instr_resource_load(struct hlsl_ctx *ctx, + struct hlsl_ir_node *ddx = load->ddx.node; + struct hlsl_ir_node *ddy = load->ddy.node; + struct hlsl_ir_node *instr = &load->node; +- struct vkd3d_shader_dst_param *dst_param; + struct vkd3d_shader_src_param *src_param; + struct vkd3d_shader_instruction *ins; + enum vkd3d_shader_opcode opcode; +@@ -8354,15 +9095,12 @@ static void sm1_generate_vsir_instr_resource_load(struct hlsl_ctx *ctx, + return; + ins->flags = flags; + +- dst_param = &ins->dst[0]; +- vsir_register_init(&dst_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); +- dst_param->reg.idx[0].offset = instr->reg.id; +- dst_param->write_mask = instr->reg.writemask; ++ vsir_dst_from_hlsl_node(&ins->dst[0], ctx, instr); + + src_param = &ins->src[0]; + vsir_src_from_hlsl_node(src_param, ctx, coords, VKD3DSP_WRITEMASK_ALL); + +- sm1_generate_vsir_init_src_param_from_deref(ctx, &ins->src[1], &load->resource, ++ sm1_generate_vsir_init_src_param_from_deref(ctx, program, &ins->src[1], &load->resource, + VKD3DSP_WRITEMASK_ALL, &ins->location); + + if (load->load_type == HLSL_RESOURCE_SAMPLE_GRAD) +@@ -8379,7 +9117,6 @@ static void generate_vsir_instr_swizzle(struct hlsl_ctx *ctx, + struct vsir_program *program, struct hlsl_ir_swizzle *swizzle_instr) + { + struct hlsl_ir_node *instr = &swizzle_instr->node, *val = swizzle_instr->val.node; +- struct vkd3d_shader_dst_param *dst_param; + struct vkd3d_shader_src_param *src_param; + struct vkd3d_shader_instruction *ins; + uint32_t swizzle; +@@ -8389,11 +9126,7 @@ static void generate_vsir_instr_swizzle(struct hlsl_ctx *ctx, + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_MOV, 1, 1))) + return; + +- dst_param = &ins->dst[0]; +- vsir_register_init(&dst_param->reg, VKD3DSPR_TEMP, vsir_data_type_from_hlsl_instruction(ctx, instr), 1); +- dst_param->reg.idx[0].offset = instr->reg.id; +- dst_param->reg.dimension = VSIR_DIMENSION_VEC4; +- dst_param->write_mask = instr->reg.writemask; ++ vsir_dst_from_hlsl_node(&ins->dst[0], ctx, instr); + + swizzle = hlsl_swizzle_from_writemask(val->reg.writemask); + swizzle = hlsl_combine_swizzles(swizzle, swizzle_instr->u.vector, instr->data_type->e.numeric.dimx); +@@ -8429,7 +9162,6 @@ static void sm1_generate_vsir_instr_jump(struct hlsl_ctx *ctx, + { + struct hlsl_ir_node *condition = jump->condition.node; + struct hlsl_ir_node *instr = &jump->node; +- struct vkd3d_shader_dst_param *dst_param; + struct vkd3d_shader_instruction *ins; + + if (jump->type == HLSL_IR_JUMP_DISCARD_NEG) +@@ -8437,10 +9169,7 @@ static void sm1_generate_vsir_instr_jump(struct hlsl_ctx *ctx, + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_TEXKILL, 1, 0))) + return; + +- dst_param = &ins->dst[0]; +- vsir_register_init(&dst_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); +- dst_param->reg.idx[0].offset = condition->reg.id; +- dst_param->write_mask = condition->reg.writemask; ++ vsir_dst_from_hlsl_node(&ins->dst[0], ctx, condition); + } + else + { +@@ -8561,6 +9290,10 @@ static void sm1_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl + return; + } + ++ program->temp_count = allocate_temp_registers(ctx, entry_func); ++ if (ctx->result) ++ return; ++ + generate_vsir_signature(ctx, program, entry_func); + + hlsl_block_init(&block); +@@ -8650,6 +9383,10 @@ D3DXPARAMETER_TYPE hlsl_sm1_base_type(const struct hlsl_type *type, bool is_comb case HLSL_TYPE_INT: case HLSL_TYPE_UINT: return D3DXPT_INT; @@ -16451,7 +21856,7 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 } break; -@@ -8934,6 +8555,7 @@ static void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffe +@@ -8934,6 +9671,7 @@ static void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffe uni.f = var->default_values[k].number.i; break; @@ -16459,7 +21864,71 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 case HLSL_TYPE_UINT: case HLSL_TYPE_BOOL: uni.f = var->default_values[k].number.u; -@@ -9127,13 +8749,6 @@ static bool type_is_float(const struct hlsl_type *type) +@@ -8977,7 +9715,7 @@ static void sm4_generate_vsir_instr_dcl_semantic(struct hlsl_ctx *ctx, struct vs + const struct hlsl_ir_var *var, struct hlsl_block *block, const struct vkd3d_shader_location *loc) + { + const struct vkd3d_shader_version *version = &program->shader_version; +- const bool is_patch = hlsl_type_is_patch_array(var->data_type); ++ const bool is_primitive = hlsl_type_is_primitive_array(var->data_type); + const bool output = var->is_output_semantic; + enum vkd3d_shader_sysval_semantic semantic; + struct vkd3d_shader_dst_param *dst_param; +@@ -8989,7 +9727,7 @@ static void sm4_generate_vsir_instr_dcl_semantic(struct hlsl_ctx *ctx, struct vs + bool has_idx; + + sm4_sysval_semantic_from_semantic_name(&semantic, version, ctx->semantic_compat_mapping, ctx->domain, +- var->semantic.name, var->semantic.index, output, ctx->is_patch_constant_func, is_patch); ++ var->semantic.name, var->semantic.index, output, ctx->is_patch_constant_func, is_primitive); + if (semantic == ~0u) + semantic = VKD3D_SHADER_SV_NONE; + +@@ -9002,9 +9740,17 @@ static void sm4_generate_vsir_instr_dcl_semantic(struct hlsl_ctx *ctx, struct vs + ? VKD3DSIH_DCL_INPUT_PS : VKD3DSIH_DCL_INPUT; + break; + ++ case VKD3D_SHADER_SV_PRIMITIVE_ID: ++ if (version->type == VKD3D_SHADER_TYPE_PIXEL) ++ opcode = VKD3DSIH_DCL_INPUT_PS_SGV; ++ else if (version->type == VKD3D_SHADER_TYPE_GEOMETRY) ++ opcode = VKD3DSIH_DCL_INPUT; ++ else ++ opcode = VKD3DSIH_DCL_INPUT_SGV; ++ break; ++ + case VKD3D_SHADER_SV_INSTANCE_ID: + case VKD3D_SHADER_SV_IS_FRONT_FACE: +- case VKD3D_SHADER_SV_PRIMITIVE_ID: + case VKD3D_SHADER_SV_SAMPLE_INDEX: + case VKD3D_SHADER_SV_VERTEX_ID: + opcode = (version->type == VKD3D_SHADER_TYPE_PIXEL) +@@ -9014,7 +9760,7 @@ static void sm4_generate_vsir_instr_dcl_semantic(struct hlsl_ctx *ctx, struct vs + default: + if (version->type == VKD3D_SHADER_TYPE_PIXEL) + opcode = VKD3DSIH_DCL_INPUT_PS_SIV; +- else if (is_patch) ++ else if (is_primitive && version->type != VKD3D_SHADER_TYPE_GEOMETRY) + opcode = VKD3DSIH_DCL_INPUT; + else + opcode = VKD3DSIH_DCL_INPUT_SIV; +@@ -9055,7 +9801,7 @@ static void sm4_generate_vsir_instr_dcl_semantic(struct hlsl_ctx *ctx, struct vs + } + else if (opcode == VKD3DSIH_DCL_INPUT || opcode == VKD3DSIH_DCL_INPUT_PS) + { +- VKD3D_ASSERT(semantic == VKD3D_SHADER_SV_NONE || is_patch); ++ VKD3D_ASSERT(semantic == VKD3D_SHADER_SV_NONE || is_primitive || version->type == VKD3D_SHADER_TYPE_GEOMETRY); + dst_param = &ins->declaration.dst; + } + else +@@ -9066,7 +9812,7 @@ static void sm4_generate_vsir_instr_dcl_semantic(struct hlsl_ctx *ctx, struct vs + dst_param = &ins->declaration.register_semantic.reg; + } + +- if (is_patch) ++ if (is_primitive) + { + VKD3D_ASSERT(has_idx); + vsir_register_init(&dst_param->reg, type, VKD3D_DATA_FLOAT, 2); +@@ -9127,13 +9873,6 @@ static bool type_is_float(const struct hlsl_type *type) return type->e.numeric.type == HLSL_TYPE_FLOAT || type->e.numeric.type == HLSL_TYPE_HALF; } @@ -16473,7 +21942,7 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 static void sm4_generate_vsir_cast_from_bool(struct hlsl_ctx *ctx, struct vsir_program *program, const struct hlsl_ir_expr *expr, uint32_t bits) { -@@ -9188,6 +8803,7 @@ static bool sm4_generate_vsir_instr_expr_cast(struct hlsl_ctx *ctx, +@@ -9188,6 +9927,7 @@ static bool sm4_generate_vsir_instr_expr_cast(struct hlsl_ctx *ctx, generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_ITOF, 0, 0, true); return true; @@ -16481,7 +21950,7 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 case HLSL_TYPE_UINT: generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_UTOF, 0, 0, true); return true; -@@ -9211,6 +8827,7 @@ static bool sm4_generate_vsir_instr_expr_cast(struct hlsl_ctx *ctx, +@@ -9211,6 +9951,7 @@ static bool sm4_generate_vsir_instr_expr_cast(struct hlsl_ctx *ctx, return true; case HLSL_TYPE_INT: @@ -16489,7 +21958,7 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 case HLSL_TYPE_UINT: generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, 0, true); return true; -@@ -9225,6 +8842,7 @@ static bool sm4_generate_vsir_instr_expr_cast(struct hlsl_ctx *ctx, +@@ -9225,6 +9966,7 @@ static bool sm4_generate_vsir_instr_expr_cast(struct hlsl_ctx *ctx, } break; @@ -16497,7 +21966,7 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 case HLSL_TYPE_UINT: switch (src_type->e.numeric.type) { -@@ -9234,6 +8852,7 @@ static bool sm4_generate_vsir_instr_expr_cast(struct hlsl_ctx *ctx, +@@ -9234,6 +9976,7 @@ static bool sm4_generate_vsir_instr_expr_cast(struct hlsl_ctx *ctx, return true; case HLSL_TYPE_INT: @@ -16505,7 +21974,7 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 case HLSL_TYPE_UINT: generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, 0, true); return true; -@@ -9339,7 +8958,7 @@ static bool sm4_generate_vsir_instr_expr(struct hlsl_ctx *ctx, +@@ -9339,7 +10082,7 @@ static bool sm4_generate_vsir_instr_expr(struct hlsl_ctx *ctx, return true; case HLSL_OP1_BIT_NOT: @@ -16514,7 +21983,7 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_NOT, 0, 0, true); return true; -@@ -9431,6 +9050,7 @@ static bool sm4_generate_vsir_instr_expr(struct hlsl_ctx *ctx, +@@ -9431,6 +10174,7 @@ static bool sm4_generate_vsir_instr_expr(struct hlsl_ctx *ctx, return true; case HLSL_TYPE_INT: @@ -16522,7 +21991,7 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 case HLSL_TYPE_UINT: generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_INEG, 0, 0, true); return true; -@@ -9498,6 +9118,7 @@ static bool sm4_generate_vsir_instr_expr(struct hlsl_ctx *ctx, +@@ -9498,6 +10242,7 @@ static bool sm4_generate_vsir_instr_expr(struct hlsl_ctx *ctx, return true; case HLSL_TYPE_INT: @@ -16530,7 +21999,7 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 case HLSL_TYPE_UINT: generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_IADD, 0, 0, true); return true; -@@ -9508,17 +9129,17 @@ static bool sm4_generate_vsir_instr_expr(struct hlsl_ctx *ctx, +@@ -9508,17 +10253,17 @@ static bool sm4_generate_vsir_instr_expr(struct hlsl_ctx *ctx, } case HLSL_OP2_BIT_AND: @@ -16551,7 +22020,7 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_XOR, 0, 0, true); return true; -@@ -9529,6 +9150,7 @@ static bool sm4_generate_vsir_instr_expr(struct hlsl_ctx *ctx, +@@ -9529,6 +10274,7 @@ static bool sm4_generate_vsir_instr_expr(struct hlsl_ctx *ctx, generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DIV, 0, 0, true); return true; @@ -16559,7 +22028,7 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 case HLSL_TYPE_UINT: sm4_generate_vsir_expr_with_two_destinations(ctx, program, VKD3DSIH_UDIV, expr, 0); return true; -@@ -9577,6 +9199,7 @@ static bool sm4_generate_vsir_instr_expr(struct hlsl_ctx *ctx, +@@ -9577,6 +10323,7 @@ static bool sm4_generate_vsir_instr_expr(struct hlsl_ctx *ctx, case HLSL_TYPE_BOOL: case HLSL_TYPE_INT: @@ -16567,7 +22036,7 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 case HLSL_TYPE_UINT: generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_IEQ, 0, 0, true); return true; -@@ -9601,6 +9224,7 @@ static bool sm4_generate_vsir_instr_expr(struct hlsl_ctx *ctx, +@@ -9601,6 +10348,7 @@ static bool sm4_generate_vsir_instr_expr(struct hlsl_ctx *ctx, return true; case HLSL_TYPE_BOOL: @@ -16575,7 +22044,7 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 case HLSL_TYPE_UINT: generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_UGE, 0, 0, true); return true; -@@ -9625,6 +9249,7 @@ static bool sm4_generate_vsir_instr_expr(struct hlsl_ctx *ctx, +@@ -9625,6 +10373,7 @@ static bool sm4_generate_vsir_instr_expr(struct hlsl_ctx *ctx, return true; case HLSL_TYPE_BOOL: @@ -16583,7 +22052,7 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 case HLSL_TYPE_UINT: generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_ULT, 0, 0, true); return true; -@@ -9646,7 +9271,7 @@ static bool sm4_generate_vsir_instr_expr(struct hlsl_ctx *ctx, +@@ -9646,7 +10395,7 @@ static bool sm4_generate_vsir_instr_expr(struct hlsl_ctx *ctx, return true; case HLSL_OP2_LSHIFT: @@ -16592,7 +22061,7 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 VKD3D_ASSERT(dst_type->e.numeric.type != HLSL_TYPE_BOOL); generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_ISHL, 0, 0, true); return true; -@@ -9659,6 +9284,7 @@ static bool sm4_generate_vsir_instr_expr(struct hlsl_ctx *ctx, +@@ -9659,6 +10408,7 @@ static bool sm4_generate_vsir_instr_expr(struct hlsl_ctx *ctx, return true; case HLSL_TYPE_INT: @@ -16600,7 +22069,7 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 case HLSL_TYPE_UINT: generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_IMAD, 0, 0, true); return true; -@@ -9679,6 +9305,7 @@ static bool sm4_generate_vsir_instr_expr(struct hlsl_ctx *ctx, +@@ -9679,6 +10429,7 @@ static bool sm4_generate_vsir_instr_expr(struct hlsl_ctx *ctx, generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_IMAX, 0, 0, true); return true; @@ -16608,7 +22077,7 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 case HLSL_TYPE_UINT: generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_UMAX, 0, 0, true); return true; -@@ -9699,6 +9326,7 @@ static bool sm4_generate_vsir_instr_expr(struct hlsl_ctx *ctx, +@@ -9699,6 +10450,7 @@ static bool sm4_generate_vsir_instr_expr(struct hlsl_ctx *ctx, generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_IMIN, 0, 0, true); return true; @@ -16616,7 +22085,7 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 case HLSL_TYPE_UINT: generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_UMIN, 0, 0, true); return true; -@@ -9711,6 +9339,7 @@ static bool sm4_generate_vsir_instr_expr(struct hlsl_ctx *ctx, +@@ -9711,6 +10463,7 @@ static bool sm4_generate_vsir_instr_expr(struct hlsl_ctx *ctx, case HLSL_OP2_MOD: switch (dst_type->e.numeric.type) { @@ -16624,7 +22093,7 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 case HLSL_TYPE_UINT: sm4_generate_vsir_expr_with_two_destinations(ctx, program, VKD3DSIH_UDIV, expr, 1); return true; -@@ -9728,6 +9357,7 @@ static bool sm4_generate_vsir_instr_expr(struct hlsl_ctx *ctx, +@@ -9728,6 +10481,7 @@ static bool sm4_generate_vsir_instr_expr(struct hlsl_ctx *ctx, return true; case HLSL_TYPE_INT: @@ -16632,7 +22101,7 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 case HLSL_TYPE_UINT: /* Using IMUL instead of UMUL because we're taking the low * bits, and the native compiler generates IMUL. */ -@@ -9750,6 +9380,7 @@ static bool sm4_generate_vsir_instr_expr(struct hlsl_ctx *ctx, +@@ -9750,6 +10504,7 @@ static bool sm4_generate_vsir_instr_expr(struct hlsl_ctx *ctx, case HLSL_TYPE_BOOL: case HLSL_TYPE_INT: @@ -16640,7 +22109,7 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 case HLSL_TYPE_UINT: generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_INE, 0, 0, true); return true; -@@ -9761,7 +9392,7 @@ static bool sm4_generate_vsir_instr_expr(struct hlsl_ctx *ctx, +@@ -9761,7 +10516,7 @@ static bool sm4_generate_vsir_instr_expr(struct hlsl_ctx *ctx, } case HLSL_OP2_RSHIFT: @@ -16649,7 +22118,7 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 VKD3D_ASSERT(dst_type->e.numeric.type != HLSL_TYPE_BOOL); generate_vsir_instr_expr_single_instr_op(ctx, program, expr, dst_type->e.numeric.type == HLSL_TYPE_INT ? VKD3DSIH_ISHR : VKD3DSIH_USHR, 0, 0, true); -@@ -10840,12 +10471,32 @@ static void generate_vsir_scan_required_features(struct hlsl_ctx *ctx, struct vs +@@ -10840,12 +11595,32 @@ static void generate_vsir_scan_required_features(struct hlsl_ctx *ctx, struct vs * STENCIL_REF, and TYPED_UAV_LOAD_ADDITIONAL_FORMATS. */ } @@ -16682,7 +22151,7 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 extern_resources = sm4_get_extern_resources(ctx, &extern_resources_count); -@@ -10866,6 +10517,25 @@ static void generate_vsir_scan_global_flags(struct hlsl_ctx *ctx, +@@ -10866,6 +11641,25 @@ static void generate_vsir_scan_global_flags(struct hlsl_ctx *ctx, sm4_free_extern_resources(extern_resources, extern_resources_count); @@ -16690,7 +22159,7 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 + { + const struct hlsl_type *type = var->data_type; + -+ if (hlsl_type_is_patch_array(type)) ++ if (hlsl_type_is_primitive_array(type)) + type = var->data_type->e.array.type; + + /* Note that it doesn't matter if the semantic is unused or doesn't @@ -16708,7 +22177,7 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 if (entry_func->early_depth_test && vkd3d_shader_ver_ge(version, 5, 0)) program->global_flags |= VKD3DSGF_FORCE_EARLY_DEPTH_STENCIL; } -@@ -10994,6 +10664,7 @@ static enum vkd3d_data_type sm4_generate_vsir_get_format_type(const struct hlsl_ +@@ -10994,6 +11788,7 @@ static enum vkd3d_data_type sm4_generate_vsir_get_format_type(const struct hlsl_ return VKD3D_DATA_INT; case HLSL_TYPE_BOOL: @@ -16716,7 +22185,21 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 case HLSL_TYPE_UINT: return VKD3D_DATA_UINT; } -@@ -11288,6 +10959,7 @@ static enum D3D_RESOURCE_RETURN_TYPE sm4_data_type(const struct hlsl_type *type) +@@ -11153,6 +11948,13 @@ static void sm4_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl + ? 0 : ctx->input_control_point_count; + program->tess_domain = ctx->domain; + } ++ else if (version.type == VKD3D_SHADER_TYPE_GEOMETRY) ++ { ++ program->input_control_point_count = ctx->input_control_point_count; ++ program->input_primitive = ctx->input_primitive_type; ++ program->output_topology = VKD3D_PT_UNDEFINED; /* TODO: obtain from stream output parameters. */ ++ program->vertices_out_count = ctx->max_vertex_count; ++ } + + LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) + { +@@ -11288,6 +12090,7 @@ static enum D3D_RESOURCE_RETURN_TYPE sm4_data_type(const struct hlsl_type *type) break; case HLSL_TYPE_BOOL: @@ -16724,7 +22207,7 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 case HLSL_TYPE_UINT: return D3D_RETURN_TYPE_UINT; } -@@ -11373,6 +11045,8 @@ static D3D_SHADER_VARIABLE_TYPE sm4_base_type(const struct hlsl_type *type) +@@ -11373,6 +12176,8 @@ static D3D_SHADER_VARIABLE_TYPE sm4_base_type(const struct hlsl_type *type) return D3D_SVT_INT; case HLSL_TYPE_UINT: return D3D_SVT_UINT; @@ -16733,7 +22216,7 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 } vkd3d_unreachable(); -@@ -11696,16 +11370,13 @@ static void sm4_generate_rdef(struct hlsl_ctx *ctx, struct vkd3d_shader_code *rd +@@ -11696,16 +12501,13 @@ static void sm4_generate_rdef(struct hlsl_ctx *ctx, struct vkd3d_shader_code *rd static bool loop_unrolling_generate_const_bool_store(struct hlsl_ctx *ctx, struct hlsl_ir_var *var, bool val, struct hlsl_block *block, struct vkd3d_shader_location *loc) { @@ -16752,7 +22235,7 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 return true; } -@@ -11759,19 +11430,13 @@ static bool loop_unrolling_remove_jumps_visit(struct hlsl_ctx *ctx, struct hlsl_ +@@ -11759,19 +12561,13 @@ static bool loop_unrolling_remove_jumps_visit(struct hlsl_ctx *ctx, struct hlsl_ static struct hlsl_ir_if *loop_unrolling_generate_var_check(struct hlsl_ctx *ctx, struct hlsl_block *dst, struct hlsl_ir_var *var, struct vkd3d_shader_location *loc) { @@ -16775,7 +22258,18 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 if (!(iff = hlsl_new_if(ctx, cond, &then_block, NULL, loc))) return NULL; -@@ -12113,10 +11778,9 @@ static void loop_unrolling_execute(struct hlsl_ctx *ctx, struct hlsl_block *bloc +@@ -11850,9 +12646,7 @@ static void loop_unrolling_simplify(struct hlsl_ctx *ctx, struct hlsl_block *blo + copy_propagation_pop_scope(state); + copy_propagation_push_scope(state, ctx); + +- progress = hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, block, NULL); +- progress |= hlsl_transform_ir(ctx, hlsl_fold_constant_identities, block, NULL); +- progress |= hlsl_transform_ir(ctx, hlsl_fold_constant_swizzles, block, NULL); ++ progress = simplify_exprs(ctx, block); + + current_index = index_instructions(block, *index); + progress |= copy_propagation_transform_block(ctx, block, state); +@@ -12113,10 +12907,9 @@ static void loop_unrolling_execute(struct hlsl_ctx *ctx, struct hlsl_block *bloc static bool lower_f16tof32(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, struct hlsl_block *block) { @@ -16787,7 +22281,7 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 struct hlsl_ir_expr *expr; struct hlsl_ir_var *lhs; char *body; -@@ -12179,28 +11843,21 @@ static bool lower_f16tof32(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, stru +@@ -12179,28 +12972,21 @@ static bool lower_f16tof32(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, stru return false; lhs = func->parameters.vars[0]; @@ -16819,7 +22313,7 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 struct hlsl_ir_expr *expr; struct hlsl_ir_var *lhs; char *body; -@@ -12251,30 +11908,22 @@ static bool lower_f32tof16(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, stru +@@ -12251,30 +13037,22 @@ static bool lower_f32tof16(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, stru return false; lhs = func->parameters.vars[0]; @@ -16853,7 +22347,7 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 const char *template; char *body; -@@ -12327,7 +11976,7 @@ static bool lower_isinf(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, struct +@@ -12327,7 +13105,7 @@ static bool lower_isinf(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, struct template = template_sm2; else if (hlsl_version_lt(ctx, 4, 0)) template = template_sm3; @@ -16862,7 +22356,7 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 template = template_int; else template = template_sm4; -@@ -12339,20 +11988,13 @@ static bool lower_isinf(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, struct +@@ -12339,20 +13117,13 @@ static bool lower_isinf(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, struct if (!(func = hlsl_compile_internal_function(ctx, "isinf", body))) return false; @@ -16885,22 +22379,183 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 return true; } -@@ -12513,6 +12155,9 @@ static void process_entry_function(struct hlsl_ctx *ctx, - lower_ir(ctx, lower_casts_to_bool, body); - lower_ir(ctx, lower_int_dot, body); +@@ -12366,6 +13137,7 @@ static void process_entry_function(struct hlsl_ctx *ctx, + struct recursive_call_ctx recursive_call_ctx; + struct hlsl_ir_var *var; + unsigned int i; ++ bool progress; + + ctx->is_patch_constant_func = entry_func == ctx->patch_constant_func; + +@@ -12418,41 +13190,61 @@ static void process_entry_function(struct hlsl_ctx *ctx, + else + prepend_uniform_copy(ctx, body, var); + } +- else if (hlsl_type_is_patch_array(var->data_type)) ++ else if (hlsl_type_is_primitive_array(var->data_type)) + { +- if (var->data_type->e.array.array_type == HLSL_ARRAY_PATCH_INPUT) ++ if (var->storage_modifiers & HLSL_STORAGE_OUT) ++ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, ++ "Input primitive parameter \"%s\" is declared as \"out\".", var->name); ++ ++ if (profile->type != VKD3D_SHADER_TYPE_GEOMETRY) + { +- if (input_patch) ++ enum hlsl_array_type array_type = var->data_type->e.array.array_type; ++ ++ if (array_type == HLSL_ARRAY_PATCH_INPUT) + { +- hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_DUPLICATE_PATCH, +- "Found multiple InputPatch parameters."); +- hlsl_note(ctx, &input_patch->loc, VKD3D_SHADER_LOG_ERROR, +- "The InputPatch parameter was previously declared here."); +- continue; ++ if (input_patch) ++ { ++ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_DUPLICATE_PATCH, ++ "Found multiple InputPatch parameters."); ++ hlsl_note(ctx, &input_patch->loc, VKD3D_SHADER_LOG_ERROR, ++ "The InputPatch parameter was previously declared here."); ++ continue; ++ } ++ input_patch = var; + } +- input_patch = var; +- } +- else +- { +- if (output_patch) ++ else if (array_type == HLSL_ARRAY_PATCH_OUTPUT) + { +- hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_DUPLICATE_PATCH, +- "Found multiple OutputPatch parameters."); +- hlsl_note(ctx, &output_patch->loc, VKD3D_SHADER_LOG_ERROR, +- "The OutputPatch parameter was previously declared here."); +- continue; ++ if (output_patch) ++ { ++ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_DUPLICATE_PATCH, ++ "Found multiple OutputPatch parameters."); ++ hlsl_note(ctx, &output_patch->loc, VKD3D_SHADER_LOG_ERROR, ++ "The OutputPatch parameter was previously declared here."); ++ continue; ++ } ++ output_patch = var; + } +- output_patch = var; + } + +- validate_and_record_patch_type(ctx, var); +- if (profile->type == VKD3D_SHADER_TYPE_GEOMETRY) ++ validate_and_record_prim_type(ctx, var); ++ prepend_input_var_copy(ctx, entry_func, var); ++ } ++ else if (hlsl_get_stream_output_type(var->data_type)) ++ { ++ if (profile->type != VKD3D_SHADER_TYPE_GEOMETRY) + { +- hlsl_fixme(ctx, &var->loc, "InputPatch/OutputPatch parameters in geometry shaders."); ++ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INCOMPATIBLE_PROFILE, ++ "Stream output parameters can only be used in geometry shaders."); + continue; + } + +- prepend_input_var_copy(ctx, entry_func, var); ++ if (!(var->storage_modifiers & HLSL_STORAGE_IN) || !(var->storage_modifiers & HLSL_STORAGE_OUT)) ++ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, ++ "Stream output parameter \"%s\" must be declared as \"inout\".", var->name); ++ ++ /* TODO: check that maxvertexcount * component_count(element_type) <= 1024. */ ++ ++ continue; + } + else + { +@@ -12465,12 +13257,24 @@ static void process_entry_function(struct hlsl_ctx *ctx, + } + + if (var->storage_modifiers & HLSL_STORAGE_IN) ++ { ++ if (profile->type == VKD3D_SHADER_TYPE_GEOMETRY && !var->semantic.name) ++ { ++ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_PRIMITIVE_TYPE, ++ "Input parameter \"%s\" is missing a primitive type.", var->name); ++ continue; ++ } ++ + prepend_input_var_copy(ctx, entry_func, var); ++ } + if (var->storage_modifiers & HLSL_STORAGE_OUT) + { + if (profile->type == VKD3D_SHADER_TYPE_HULL && !ctx->is_patch_constant_func) + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, + "Output parameters are not supported in hull shader control point functions."); ++ else if (profile->type == VKD3D_SHADER_TYPE_GEOMETRY) ++ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, ++ "Output parameters are not allowed in geometry shaders."); + else + append_output_var_copy(ctx, entry_func, var); + } +@@ -12478,7 +13282,11 @@ static void process_entry_function(struct hlsl_ctx *ctx, + } + if (entry_func->return_var) + { +- if (entry_func->return_var->data_type->class != HLSL_CLASS_STRUCT && !entry_func->return_var->semantic.name) ++ if (profile->type == VKD3D_SHADER_TYPE_GEOMETRY) ++ hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_INCOMPATIBLE_PROFILE, ++ "Geometry shaders cannot return values."); ++ else if (entry_func->return_var->data_type->class != HLSL_CLASS_STRUCT ++ && !entry_func->return_var->semantic.name) + hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_SEMANTIC, + "Entry point \"%s\" is missing a return value semantic.", entry_func->func->name); + +@@ -12493,6 +13301,10 @@ static void process_entry_function(struct hlsl_ctx *ctx, + hlsl_fixme(ctx, &entry_func->loc, "Passthrough hull shader control point function."); + } + ++ if (profile->type == VKD3D_SHADER_TYPE_GEOMETRY && ctx->input_primitive_type == VKD3D_PT_UNDEFINED) ++ hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_PRIMITIVE_TYPE, ++ "Entry point \"%s\" is missing an input primitive parameter.", entry_func->func->name); ++ + if (hlsl_version_ge(ctx, 4, 0)) + { + hlsl_transform_ir(ctx, lower_discard_neg, body, NULL); +@@ -12503,6 +13315,9 @@ static void process_entry_function(struct hlsl_ctx *ctx, + hlsl_transform_ir(ctx, lower_resource_load_bias, body, NULL); + } + compute_liveness(ctx, entry_func); + transform_derefs(ctx, divert_written_uniform_derefs_to_temp, &entry_func->body); + - if (hlsl_version_lt(ctx, 4, 0)) + loop_unrolling_execute(ctx, body); + hlsl_run_const_passes(ctx, body); + +@@ -12517,6 +13332,17 @@ static void process_entry_function(struct hlsl_ctx *ctx, hlsl_transform_ir(ctx, lower_separate_samples, body, NULL); -@@ -12537,14 +12182,12 @@ static void process_entry_function(struct hlsl_ctx *ctx, + hlsl_transform_ir(ctx, validate_dereferences, body, NULL); ++ ++ do ++ { ++ progress = vectorize_exprs(ctx, body); ++ compute_liveness(ctx, entry_func); ++ progress |= hlsl_transform_ir(ctx, dce, body, NULL); ++ progress |= hlsl_transform_ir(ctx, fold_swizzle_chains, body, NULL); ++ progress |= hlsl_transform_ir(ctx, remove_trivial_swizzles, body, NULL); ++ progress |= vectorize_stores(ctx, body); ++ } while (progress); ++ + hlsl_transform_ir(ctx, track_object_components_sampler_dim, body, NULL); + + if (hlsl_version_ge(ctx, 4, 0)) +@@ -12537,14 +13363,14 @@ static void process_entry_function(struct hlsl_ctx *ctx, while (lower_ir(ctx, lower_nonconstant_array_loads, body)); lower_ir(ctx, lower_ternary, body); - - lower_ir(ctx, lower_nonfloat_exprs, body); ++ lower_ir(ctx, lower_int_modulus_sm1, body); + lower_ir(ctx, lower_division, body); /* Constants casted to float must be folded, and new casts to bool also need to be lowered. */ hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, body, NULL); @@ -16908,10 +22563,11 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 lower_ir(ctx, lower_casts_to_int, body); - lower_ir(ctx, lower_division, body); ++ lower_ir(ctx, lower_trunc, body); lower_ir(ctx, lower_sqrt, body); lower_ir(ctx, lower_dot, body); lower_ir(ctx, lower_round, body); -@@ -12566,6 +12209,8 @@ static void process_entry_function(struct hlsl_ctx *ctx, +@@ -12566,13 +13392,15 @@ static void process_entry_function(struct hlsl_ctx *ctx, lower_ir(ctx, validate_nonconstant_vector_store_derefs, body); @@ -16920,8 +22576,34 @@ index 2afd3e1e1e5..edcd9ce62a7 100644 do compute_liveness(ctx, entry_func); while (hlsl_transform_ir(ctx, dce, body, NULL)); + + /* TODO: move forward, remove when no longer needed */ + transform_derefs(ctx, replace_deref_path_with_offset, body); +- while (hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, body, NULL)); ++ simplify_exprs(ctx, body); + transform_derefs(ctx, clean_constant_deref_offset_srcs, body); + + do +@@ -12608,6 +13436,9 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry + else if (profile->type == VKD3D_SHADER_TYPE_DOMAIN && ctx->domain == VKD3D_TESSELLATOR_DOMAIN_INVALID) + hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_ATTRIBUTE, + "Entry point \"%s\" is missing a [domain] attribute.", entry_func->func->name); ++ else if (profile->type == VKD3D_SHADER_TYPE_GEOMETRY && !ctx->max_vertex_count) ++ hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_ATTRIBUTE, ++ "Entry point \"%s\" is missing a [maxvertexcount] attribute.", entry_func->func->name); + + hlsl_block_init(&global_uniform_block); + +@@ -12633,7 +13464,6 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry + if (profile->major_version < 4) + { + mark_indexable_vars(ctx, entry_func); +- allocate_temp_registers(ctx, entry_func); + allocate_const_registers(ctx, entry_func); + sort_uniforms_by_bind_count(ctx, HLSL_REGSET_SAMPLERS); + allocate_objects(ctx, entry_func, HLSL_REGSET_SAMPLERS); diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c -index 538f0f46854..f4715a9224c 100644 +index 538f0f46854..f74ecffcd4b 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c @@ -51,6 +51,7 @@ static bool fold_abs(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, @@ -17175,7 +22857,7 @@ index 538f0f46854..f4715a9224c 100644 arg2 = e1->operands[1].node; progress = true; } -@@ -1706,13 +1707,7 @@ bool hlsl_normalize_binary_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *inst +@@ -1706,18 +1707,13 @@ bool hlsl_normalize_binary_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *inst && e2->operands[0].node->type != HLSL_IR_CONSTANT && e2->operands[1].node->type == HLSL_IR_CONSTANT) { /* x OP (y OP a) -> (x OP y) OP a */ @@ -17190,7 +22872,42 @@ index 538f0f46854..f4715a9224c 100644 arg2 = e2->operands[1].node; progress = true; } -@@ -1757,19 +1752,13 @@ bool hlsl_normalize_binary_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *inst + +- if (!progress && e1 && (tmp = collect_exprs(ctx, &block, instr, op, e1->operands[1].node, arg2))) ++ if (!progress && e1 && e1->op == op ++ && (tmp = collect_exprs(ctx, &block, instr, op, e1->operands[1].node, arg2))) + { + /* (y OPR (x OPL a)) OPR (x OPL b) -> y OPR (x OPL (a OPR b)) */ + arg1 = e1->operands[0].node; +@@ -1725,7 +1721,7 @@ bool hlsl_normalize_binary_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *inst + progress = true; + } + +- if (!progress && is_op_commutative(op) && e1 ++ if (!progress && is_op_commutative(op) && e1 && e1->op == op + && (tmp = collect_exprs(ctx, &block, instr, op, e1->operands[0].node, arg2))) + { + /* ((x OPL a) OPR y) OPR (x OPL b) -> (x OPL (a OPR b)) OPR y */ +@@ -1734,7 +1730,8 @@ bool hlsl_normalize_binary_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *inst + progress = true; + } + +- if (!progress && e2 && (tmp = collect_exprs(ctx, &block, instr, op, arg1, e2->operands[0].node))) ++ if (!progress && e2 && e2->op == op ++ && (tmp = collect_exprs(ctx, &block, instr, op, arg1, e2->operands[0].node))) + { + /* (x OPL a) OPR ((x OPL b) OPR y) -> (x OPL (a OPR b)) OPR y */ + arg1 = tmp; +@@ -1742,7 +1739,7 @@ bool hlsl_normalize_binary_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *inst + progress = true; + } + +- if (!progress && is_op_commutative(op) && e2 ++ if (!progress && is_op_commutative(op) && e2 && e2->op == op + && (tmp = collect_exprs(ctx, &block, instr, op, arg1, e2->operands[1].node))) + { + /* (x OPL a) OPR (y OPR (x OPL b)) -> (x OPL (a OPR b)) OPR y */ +@@ -1757,19 +1754,13 @@ bool hlsl_normalize_binary_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *inst struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {arg1, arg2}; struct hlsl_ir_node *res; @@ -17212,10 +22929,155 @@ index 538f0f46854..f4715a9224c 100644 bool hlsl_fold_constant_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) diff --git a/libs/vkd3d/libs/vkd3d-shader/ir.c b/libs/vkd3d/libs/vkd3d-shader/ir.c -index b608fae21ac..41aa99fbc09 100644 +index b608fae21ac..72cf53761e4 100644 --- a/libs/vkd3d/libs/vkd3d-shader/ir.c +++ b/libs/vkd3d/libs/vkd3d-shader/ir.c -@@ -1732,8 +1732,20 @@ static enum vkd3d_result instruction_array_normalise_hull_shader_control_point_i +@@ -116,6 +116,7 @@ void vsir_program_cleanup(struct vsir_program *program) + shader_signature_cleanup(&program->input_signature); + shader_signature_cleanup(&program->output_signature); + shader_signature_cleanup(&program->patch_constant_signature); ++ vkd3d_shader_free_scan_descriptor_info1(&program->descriptors); + } + + const struct vkd3d_shader_parameter1 *vsir_program_get_parameter( +@@ -469,6 +470,80 @@ static bool get_opcode_from_rel_op(enum vkd3d_shader_rel_op rel_op, enum vkd3d_d + return false; + } + ++static enum vkd3d_result vsir_program_normalize_addr(struct vsir_program *program, ++ struct vsir_transformation_context *ctx) ++{ ++ struct vkd3d_shader_instruction *ins, *ins2; ++ unsigned int tmp_idx = ~0u; ++ unsigned int i, k, r; ++ ++ for (i = 0; i < program->instructions.count; ++i) ++ { ++ ins = &program->instructions.elements[i]; ++ ++ if (ins->opcode == VKD3DSIH_MOV && ins->dst[0].reg.type == VKD3DSPR_ADDR) ++ { ++ if (tmp_idx == ~0u) ++ tmp_idx = program->temp_count++; ++ ++ ins->opcode = VKD3DSIH_FTOU; ++ vsir_register_init(&ins->dst[0].reg, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1); ++ ins->dst[0].reg.idx[0].offset = tmp_idx; ++ ins->dst[0].reg.dimension = VSIR_DIMENSION_VEC4; ++ } ++ else if (ins->opcode == VKD3DSIH_MOVA) ++ { ++ if (tmp_idx == ~0u) ++ tmp_idx = program->temp_count++; ++ ++ if (!shader_instruction_array_insert_at(&program->instructions, i + 1, 1)) ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ ins = &program->instructions.elements[i]; ++ ins2 = &program->instructions.elements[i + 1]; ++ ++ ins->opcode = VKD3DSIH_ROUND_NE; ++ vsir_register_init(&ins->dst[0].reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); ++ ins->dst[0].reg.idx[0].offset = tmp_idx; ++ ins->dst[0].reg.dimension = VSIR_DIMENSION_VEC4; ++ ++ if (!vsir_instruction_init_with_params(program, ins2, &ins->location, VKD3DSIH_FTOU, 1, 1)) ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ ++ vsir_register_init(&ins2->dst[0].reg, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1); ++ ins2->dst[0].reg.idx[0].offset = tmp_idx; ++ ins2->dst[0].reg.dimension = VSIR_DIMENSION_VEC4; ++ ins2->dst[0].write_mask = ins->dst[0].write_mask; ++ ++ vsir_register_init(&ins2->src[0].reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); ++ ins2->src[0].reg.idx[0].offset = tmp_idx; ++ ins2->src[0].reg.dimension = VSIR_DIMENSION_VEC4; ++ ins2->src[0].swizzle = vsir_swizzle_from_writemask(ins2->dst[0].write_mask); ++ } ++ ++ for (k = 0; k < ins->src_count; ++k) ++ { ++ struct vkd3d_shader_src_param *src = &ins->src[k]; ++ ++ for (r = 0; r < src->reg.idx_count; ++r) ++ { ++ struct vkd3d_shader_src_param *rel = src->reg.idx[r].rel_addr; ++ ++ if (rel && rel->reg.type == VKD3DSPR_ADDR) ++ { ++ if (tmp_idx == ~0u) ++ tmp_idx = program->temp_count++; ++ ++ vsir_register_init(&rel->reg, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1); ++ rel->reg.idx[0].offset = tmp_idx; ++ rel->reg.dimension = VSIR_DIMENSION_VEC4; ++ } ++ } ++ } ++ } ++ ++ return VKD3D_OK; ++} ++ + static enum vkd3d_result vsir_program_lower_ifc(struct vsir_program *program, + struct vkd3d_shader_instruction *ifc, unsigned int *tmp_idx, + struct vkd3d_shader_message_context *message_context) +@@ -481,6 +556,7 @@ static enum vkd3d_result vsir_program_lower_ifc(struct vsir_program *program, + + if (!shader_instruction_array_insert_at(instructions, pos + 1, 2)) + return VKD3D_ERROR_OUT_OF_MEMORY; ++ ifc = &instructions->elements[pos]; + + if (*tmp_idx == ~0u) + *tmp_idx = program->temp_count++; +@@ -534,6 +610,7 @@ static enum vkd3d_result vsir_program_lower_texkill(struct vsir_program *program + + if (!shader_instruction_array_insert_at(instructions, pos + 1, components_read + 1)) + return VKD3D_ERROR_OUT_OF_MEMORY; ++ texkill = &instructions->elements[pos]; + + if (*tmp_idx == ~0u) + *tmp_idx = program->temp_count++; +@@ -620,6 +697,7 @@ static enum vkd3d_result vsir_program_lower_precise_mad(struct vsir_program *pro + + if (!shader_instruction_array_insert_at(instructions, pos + 1, 1)) + return VKD3D_ERROR_OUT_OF_MEMORY; ++ mad = &instructions->elements[pos]; + + if (*tmp_idx == ~0u) + *tmp_idx = program->temp_count++; +@@ -664,6 +742,7 @@ static enum vkd3d_result vsir_program_lower_sm1_sincos(struct vsir_program *prog + + if (!shader_instruction_array_insert_at(instructions, pos + 1, 1)) + return VKD3D_ERROR_OUT_OF_MEMORY; ++ sincos = &instructions->elements[pos]; + + ins = &instructions->elements[pos + 1]; + +@@ -716,6 +795,7 @@ static enum vkd3d_result vsir_program_lower_texldp(struct vsir_program *program, + + if (!shader_instruction_array_insert_at(instructions, pos + 1, 2)) + return VKD3D_ERROR_OUT_OF_MEMORY; ++ tex = &instructions->elements[pos]; + + if (*tmp_idx == ~0u) + *tmp_idx = program->temp_count++; +@@ -1127,6 +1207,7 @@ static enum vkd3d_result vsir_program_ensure_diffuse(struct vsir_program *progra + if (!shader_instruction_array_insert_at(&program->instructions, i, 1)) + return VKD3D_ERROR_OUT_OF_MEMORY; + ins = &program->instructions.elements[i]; ++ + vsir_instruction_init_with_params(program, ins, &no_loc, VKD3DSIH_MOV, 1, 1); + vsir_dst_param_init(&ins->dst[0], VKD3DSPR_ATTROUT, VKD3D_DATA_FLOAT, 1); + ins->dst[0].reg.idx[0].offset = 0; +@@ -1345,7 +1426,6 @@ static enum vkd3d_result vsir_program_remap_output_signature(struct vsir_program + loc = ins->location; + if (!shader_instruction_array_insert_at(&program->instructions, i, uninit_varying_count)) + return VKD3D_ERROR_OUT_OF_MEMORY; +- + ins = &program->instructions.elements[i]; + + for (unsigned int j = signature->element_count - uninit_varying_count; j < signature->element_count; ++j) +@@ -1732,8 +1812,20 @@ static enum vkd3d_result instruction_array_normalise_hull_shader_control_point_i return VKD3D_OK; } @@ -17236,7 +23098,7 @@ index b608fae21ac..41aa99fbc09 100644 struct vkd3d_shader_instruction_array instructions; enum vkd3d_shader_type shader_type; uint8_t major; -@@ -1751,9 +1763,9 @@ struct io_normaliser +@@ -1751,9 +1843,9 @@ struct io_normaliser struct vkd3d_shader_dst_param *input_dcl_params[MAX_REG_OUTPUT]; struct vkd3d_shader_dst_param *output_dcl_params[MAX_REG_OUTPUT]; struct vkd3d_shader_dst_param *pc_dcl_params[MAX_REG_OUTPUT]; @@ -17249,7 +23111,7 @@ index b608fae21ac..41aa99fbc09 100644 bool use_vocp; }; -@@ -1794,36 +1806,44 @@ struct signature_element *vsir_signature_find_element_for_reg(const struct shade +@@ -1794,36 +1886,44 @@ struct signature_element *vsir_signature_find_element_for_reg(const struct shade return NULL; } @@ -17308,7 +23170,7 @@ index b608fae21ac..41aa99fbc09 100644 for (i = 0; i < register_count; ++i) { -@@ -1834,21 +1854,31 @@ static void range_map_set_register_range(uint8_t range_map[][VKD3D_VEC4_SIZE], u +@@ -1834,21 +1934,31 @@ static void range_map_set_register_range(uint8_t range_map[][VKD3D_VEC4_SIZE], u /* A synthetic patch constant range which overlaps an existing range can start upstream of it * for fork/join phase instancing, but ranges declared by dcl_indexrange should not overlap. * The latter is validated in the TPF reader. */ @@ -17346,7 +23208,7 @@ index b608fae21ac..41aa99fbc09 100644 switch (reg->type) { -@@ -1879,9 +1909,21 @@ static void io_normaliser_add_index_range(struct io_normaliser *normaliser, +@@ -1879,9 +1989,21 @@ static void io_normaliser_add_index_range(struct io_normaliser *normaliser, } reg_idx = reg->idx[reg->idx_count - 1].offset; @@ -17371,7 +23233,7 @@ index b608fae21ac..41aa99fbc09 100644 } static int signature_element_mask_compare(const void *a, const void *b) -@@ -1908,11 +1950,12 @@ static bool sysval_semantics_should_merge(const struct signature_element *e, con +@@ -1908,11 +2030,12 @@ static bool sysval_semantics_should_merge(const struct signature_element *e, con } /* Merge tess factor sysvals because they are an array in SPIR-V. */ @@ -17387,7 +23249,7 @@ index b608fae21ac..41aa99fbc09 100644 qsort(s->elements, s->element_count, sizeof(s->elements[0]), signature_element_mask_compare); -@@ -1933,8 +1976,12 @@ static void shader_signature_map_patch_constant_index_ranges(struct shader_signa +@@ -1933,8 +2056,12 @@ static void shader_signature_map_patch_constant_index_ranges(struct shader_signa if (register_count < 2) continue; @@ -17401,7 +23263,7 @@ index b608fae21ac..41aa99fbc09 100644 } static int signature_element_register_compare(const void *a, const void *b) -@@ -1977,62 +2024,19 @@ static int signature_element_index_compare(const void *a, const void *b) +@@ -1977,62 +2104,19 @@ static int signature_element_index_compare(const void *a, const void *b) return vkd3d_u32_compare(e->sort_index, f->sort_index); } @@ -17468,7 +23330,7 @@ index b608fae21ac..41aa99fbc09 100644 if (element_count) memcpy(elements, s->elements, element_count * sizeof(*elements)); -@@ -2091,42 +2095,49 @@ static bool shader_signature_merge(struct shader_signature *s, uint8_t range_map +@@ -2091,42 +2175,49 @@ static bool shader_signature_merge(struct shader_signature *s, uint8_t range_map s->elements = elements; s->element_count = element_count; @@ -17537,7 +23399,7 @@ index b608fae21ac..41aa99fbc09 100644 } static unsigned int shader_register_normalise_arrayed_addressing(struct vkd3d_shader_register *reg, -@@ -2342,8 +2353,9 @@ static void shader_instruction_normalise_io_params(struct vkd3d_shader_instructi +@@ -2342,8 +2433,9 @@ static void shader_instruction_normalise_io_params(struct vkd3d_shader_instructi static enum vkd3d_result vsir_program_normalise_io_registers(struct vsir_program *program, struct vsir_transformation_context *ctx) { @@ -17548,7 +23410,7 @@ index b608fae21ac..41aa99fbc09 100644 unsigned int i; VKD3D_ASSERT(program->normalisation_level == VSIR_NORMALISED_HULL_CONTROL_POINT_IO); -@@ -2365,7 +2377,8 @@ static enum vkd3d_result vsir_program_normalise_io_registers(struct vsir_program +@@ -2365,7 +2457,8 @@ static enum vkd3d_result vsir_program_normalise_io_registers(struct vsir_program normaliser.output_control_point_count = ins->declaration.count; break; case VKD3DSIH_DCL_INDEX_RANGE: @@ -17558,7 +23420,7 @@ index b608fae21ac..41aa99fbc09 100644 vkd3d_shader_instruction_make_nop(ins); break; case VKD3DSIH_HS_CONTROL_POINT_PHASE: -@@ -2378,12 +2391,14 @@ static enum vkd3d_result vsir_program_normalise_io_registers(struct vsir_program +@@ -2378,12 +2471,14 @@ static enum vkd3d_result vsir_program_normalise_io_registers(struct vsir_program } } @@ -17577,7 +23439,363 @@ index b608fae21ac..41aa99fbc09 100644 } normaliser.phase = VKD3DSIH_INVALID; -@@ -8505,10 +8520,13 @@ static void vsir_validate_signature_element(struct validation_context *ctx, +@@ -2410,7 +2505,8 @@ struct flat_constants_normaliser + }; + + static bool get_flat_constant_register_type(const struct vkd3d_shader_register *reg, +- enum vkd3d_shader_d3dbc_constant_register *set, uint32_t *index) ++ enum vkd3d_shader_d3dbc_constant_register *set, uint32_t *index, ++ struct vkd3d_shader_src_param **rel_addr) + { + static const struct + { +@@ -2430,12 +2526,8 @@ static bool get_flat_constant_register_type(const struct vkd3d_shader_register * + { + if (reg->type == regs[i].type) + { +- if (reg->idx[0].rel_addr) +- { +- FIXME("Unhandled relative address.\n"); +- return false; +- } +- ++ if (rel_addr) ++ *rel_addr = reg->idx[0].rel_addr; + *set = regs[i].set; + *index = reg->idx[0].offset; + return true; +@@ -2449,10 +2541,11 @@ static void shader_register_normalise_flat_constants(struct vkd3d_shader_src_par + const struct flat_constants_normaliser *normaliser) + { + enum vkd3d_shader_d3dbc_constant_register set; ++ struct vkd3d_shader_src_param *rel_addr; + uint32_t index; + size_t i, j; + +- if (!get_flat_constant_register_type(¶m->reg, &set, &index)) ++ if (!get_flat_constant_register_type(¶m->reg, &set, &index, &rel_addr)) + return; + + for (i = 0; i < normaliser->def_count; ++i) +@@ -2470,8 +2563,11 @@ static void shader_register_normalise_flat_constants(struct vkd3d_shader_src_par + + param->reg.type = VKD3DSPR_CONSTBUFFER; + param->reg.idx[0].offset = set; /* register ID */ ++ param->reg.idx[0].rel_addr = NULL; + param->reg.idx[1].offset = set; /* register index */ ++ param->reg.idx[1].rel_addr = NULL; + param->reg.idx[2].offset = index; /* buffer index */ ++ param->reg.idx[2].rel_addr = rel_addr; + param->reg.idx_count = 3; + } + +@@ -2498,7 +2594,7 @@ static enum vkd3d_result vsir_program_normalise_flat_constants(struct vsir_progr + + def = &normaliser.defs[normaliser.def_count++]; + +- get_flat_constant_register_type((struct vkd3d_shader_register *)&ins->dst[0].reg, &def->set, &def->index); ++ get_flat_constant_register_type(&ins->dst[0].reg, &def->set, &def->index, NULL); + for (j = 0; j < 4; ++j) + def->value[j] = ins->src[0].reg.u.immconst_u32[j]; + +@@ -6021,6 +6117,7 @@ static enum vkd3d_result insert_alpha_test_before_ret(struct vsir_program *progr + uint32_t colour_temp, size_t *ret_pos, struct vkd3d_shader_message_context *message_context) + { + struct vkd3d_shader_instruction_array *instructions = &program->instructions; ++ const struct vkd3d_shader_location loc = ret->location; + static const struct vkd3d_shader_location no_loc; + size_t pos = ret - instructions->elements; + struct vkd3d_shader_instruction *ins; +@@ -6045,9 +6142,10 @@ static enum vkd3d_result insert_alpha_test_before_ret(struct vsir_program *progr + { + if (!shader_instruction_array_insert_at(&program->instructions, pos, 1)) + return VKD3D_ERROR_OUT_OF_MEMORY; ++ ret = NULL; + ins = &program->instructions.elements[pos]; + +- vsir_instruction_init_with_params(program, ins, &ret->location, VKD3DSIH_DISCARD, 0, 1); ++ vsir_instruction_init_with_params(program, ins, &loc, VKD3DSIH_DISCARD, 0, 1); + ins->flags = VKD3D_SHADER_CONDITIONAL_OP_Z; + src_param_init_const_uint(&ins->src[0], 0); + +@@ -6057,20 +6155,20 @@ static enum vkd3d_result insert_alpha_test_before_ret(struct vsir_program *progr + + if (!shader_instruction_array_insert_at(&program->instructions, pos, 3)) + return VKD3D_ERROR_OUT_OF_MEMORY; +- ++ ret = NULL; + ins = &program->instructions.elements[pos]; + + switch (ref->data_type) + { + case VKD3D_SHADER_PARAMETER_DATA_TYPE_FLOAT32: +- vsir_instruction_init_with_params(program, ins, &ret->location, opcodes[compare_func].float_opcode, 1, 2); ++ vsir_instruction_init_with_params(program, ins, &loc, opcodes[compare_func].float_opcode, 1, 2); + src_param_init_temp_float(&ins->src[opcodes[compare_func].swap ? 1 : 0], colour_temp); + src_param_init_parameter(&ins->src[opcodes[compare_func].swap ? 0 : 1], + VKD3D_SHADER_PARAMETER_NAME_ALPHA_TEST_REF, VKD3D_DATA_FLOAT); + break; + + case VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32: +- vsir_instruction_init_with_params(program, ins, &ret->location, opcodes[compare_func].uint_opcode, 1, 2); ++ vsir_instruction_init_with_params(program, ins, &loc, opcodes[compare_func].uint_opcode, 1, 2); + src_param_init_temp_uint(&ins->src[opcodes[compare_func].swap ? 1 : 0], colour_temp); + src_param_init_parameter(&ins->src[opcodes[compare_func].swap ? 0 : 1], + VKD3D_SHADER_PARAMETER_NAME_ALPHA_TEST_REF, VKD3D_DATA_UINT); +@@ -6091,14 +6189,14 @@ static enum vkd3d_result insert_alpha_test_before_ret(struct vsir_program *progr + ins->src[opcodes[compare_func].swap ? 1 : 0].swizzle = VKD3D_SHADER_SWIZZLE(W, W, W, W); + + ++ins; +- vsir_instruction_init_with_params(program, ins, &ret->location, VKD3DSIH_DISCARD, 0, 1); ++ vsir_instruction_init_with_params(program, ins, &loc, VKD3DSIH_DISCARD, 0, 1); + ins->flags = VKD3D_SHADER_CONDITIONAL_OP_Z; + src_param_init_ssa_bool(&ins->src[0], program->ssa_count); + + ++program->ssa_count; + + ++ins; +- vsir_instruction_init_with_params(program, ins, &ret->location, VKD3DSIH_MOV, 1, 1); ++ vsir_instruction_init_with_params(program, ins, &loc, VKD3DSIH_MOV, 1, 1); + vsir_dst_param_init(&ins->dst[0], VKD3DSPR_OUTPUT, VKD3D_DATA_FLOAT, 1); + ins->dst[0].reg.idx[0].offset = colour_signature_idx; + ins->dst[0].reg.dimension = VSIR_DIMENSION_VEC4; +@@ -6199,13 +6297,14 @@ static enum vkd3d_result insert_clip_planes_before_ret(struct vsir_program *prog + uint32_t position_temp, uint32_t low_signature_idx, uint32_t high_signature_idx, size_t *ret_pos) + { + struct vkd3d_shader_instruction_array *instructions = &program->instructions; ++ const struct vkd3d_shader_location loc = ret->location; + size_t pos = ret - instructions->elements; + struct vkd3d_shader_instruction *ins; + unsigned int output_idx = 0; + + if (!shader_instruction_array_insert_at(&program->instructions, pos, vkd3d_popcount(mask) + 1)) + return VKD3D_ERROR_OUT_OF_MEMORY; +- ++ ret = NULL; + ins = &program->instructions.elements[pos]; + + for (unsigned int i = 0; i < 8; ++i) +@@ -6213,7 +6312,7 @@ static enum vkd3d_result insert_clip_planes_before_ret(struct vsir_program *prog + if (!(mask & (1u << i))) + continue; + +- vsir_instruction_init_with_params(program, ins, &ret->location, VKD3DSIH_DP4, 1, 2); ++ vsir_instruction_init_with_params(program, ins, &loc, VKD3DSIH_DP4, 1, 2); + src_param_init_temp_float4(&ins->src[0], position_temp); + src_param_init_parameter(&ins->src[1], VKD3D_SHADER_PARAMETER_NAME_CLIP_PLANE_0 + i, VKD3D_DATA_FLOAT); + ins->src[1].swizzle = VKD3D_SHADER_NO_SWIZZLE; +@@ -6231,7 +6330,7 @@ static enum vkd3d_result insert_clip_planes_before_ret(struct vsir_program *prog + ++ins; + } + +- vsir_instruction_init_with_params(program, ins, &ret->location, VKD3DSIH_MOV, 1, 1); ++ vsir_instruction_init_with_params(program, ins, &loc, VKD3DSIH_MOV, 1, 1); + vsir_dst_param_init(&ins->dst[0], VKD3DSPR_OUTPUT, VKD3D_DATA_FLOAT, 1); + ins->dst[0].reg.idx[0].offset = position_signature_idx; + ins->dst[0].reg.dimension = VSIR_DIMENSION_VEC4; +@@ -6388,15 +6487,16 @@ static enum vkd3d_result insert_point_size_before_ret(struct vsir_program *progr + const struct vkd3d_shader_instruction *ret, size_t *ret_pos) + { + struct vkd3d_shader_instruction_array *instructions = &program->instructions; ++ const struct vkd3d_shader_location loc = ret->location; + size_t pos = ret - instructions->elements; + struct vkd3d_shader_instruction *ins; + + if (!shader_instruction_array_insert_at(&program->instructions, pos, 1)) + return VKD3D_ERROR_OUT_OF_MEMORY; +- ++ ret = NULL; + ins = &program->instructions.elements[pos]; + +- vsir_instruction_init_with_params(program, ins, &ret->location, VKD3DSIH_MOV, 1, 1); ++ vsir_instruction_init_with_params(program, ins, &loc, VKD3DSIH_MOV, 1, 1); + vsir_dst_param_init(&ins->dst[0], VKD3DSPR_RASTOUT, VKD3D_DATA_FLOAT, 1); + ins->dst[0].reg.idx[0].offset = VSIR_RASTOUT_POINT_SIZE; + src_param_init_parameter(&ins->src[0], VKD3D_SHADER_PARAMETER_NAME_POINT_SIZE, VKD3D_DATA_FLOAT); +@@ -6525,9 +6625,9 @@ static enum vkd3d_result vsir_program_insert_point_size_clamp(struct vsir_progra + + if (!shader_instruction_array_insert_at(&program->instructions, i + 1, !!min_parameter + !!max_parameter)) + return VKD3D_ERROR_OUT_OF_MEMORY; ++ ins = &program->instructions.elements[i + 1]; + + loc = &program->instructions.elements[i].location; +- ins = &program->instructions.elements[i + 1]; + + if (min_parameter) + { +@@ -6725,7 +6825,6 @@ static enum vkd3d_result vsir_program_insert_point_coord(struct vsir_program *pr + { + if (!shader_instruction_array_insert_at(&program->instructions, insert_pos, 2)) + return VKD3D_ERROR_OUT_OF_MEMORY; +- + ins = &program->instructions.elements[insert_pos]; + + vsir_instruction_init_with_params(program, ins, &no_loc, VKD3DSIH_MOV, 1, 1); +@@ -6799,6 +6898,8 @@ static enum vkd3d_result insert_fragment_fog_before_ret(struct vsir_program *pro + */ + if (!shader_instruction_array_insert_at(&program->instructions, pos, 4)) + return VKD3D_ERROR_OUT_OF_MEMORY; ++ ret = NULL; ++ + *ret_pos = pos + 4; + + ssa_temp = program->ssa_count++; +@@ -6829,6 +6930,8 @@ static enum vkd3d_result insert_fragment_fog_before_ret(struct vsir_program *pro + */ + if (!shader_instruction_array_insert_at(&program->instructions, pos, 4)) + return VKD3D_ERROR_OUT_OF_MEMORY; ++ ret = NULL; ++ + *ret_pos = pos + 4; + + ssa_temp = program->ssa_count++; +@@ -6859,6 +6962,8 @@ static enum vkd3d_result insert_fragment_fog_before_ret(struct vsir_program *pro + */ + if (!shader_instruction_array_insert_at(&program->instructions, pos, 5)) + return VKD3D_ERROR_OUT_OF_MEMORY; ++ ret = NULL; ++ + *ret_pos = pos + 5; + + ssa_temp = program->ssa_count++; +@@ -7037,16 +7142,18 @@ static enum vkd3d_result insert_vertex_fog_before_ret(struct vsir_program *progr + { + const struct signature_element *e = &program->output_signature.elements[source_signature_idx]; + struct vkd3d_shader_instruction_array *instructions = &program->instructions; ++ const struct vkd3d_shader_location loc = ret->location; + size_t pos = ret - instructions->elements; + struct vkd3d_shader_instruction *ins; + + if (!shader_instruction_array_insert_at(&program->instructions, pos, 2)) + return VKD3D_ERROR_OUT_OF_MEMORY; ++ ret = NULL; + + ins = &program->instructions.elements[pos]; + + /* Write the fog output. */ +- vsir_instruction_init_with_params(program, ins, &ret->location, VKD3DSIH_MOV, 1, 1); ++ vsir_instruction_init_with_params(program, ins, &loc, VKD3DSIH_MOV, 1, 1); + dst_param_init_output(&ins->dst[0], VKD3D_DATA_FLOAT, fog_signature_idx, 0x1); + src_param_init_temp_float4(&ins->src[0], temp); + if (source == VKD3D_SHADER_FOG_SOURCE_Z) +@@ -7056,7 +7163,7 @@ static enum vkd3d_result insert_vertex_fog_before_ret(struct vsir_program *progr + ++ins; + + /* Write the position or specular output. */ +- vsir_instruction_init_with_params(program, ins, &ret->location, VKD3DSIH_MOV, 1, 1); ++ vsir_instruction_init_with_params(program, ins, &loc, VKD3DSIH_MOV, 1, 1); + dst_param_init_output(&ins->dst[0], vkd3d_data_type_from_component_type(e->component_type), + source_signature_idx, e->mask); + src_param_init_temp_float4(&ins->src[0], temp); +@@ -7691,6 +7798,54 @@ static void vsir_validate_label_register(struct validation_context *ctx, + reg->idx[0].offset, ctx->program->block_count); + } + ++static void vsir_validate_descriptor_indices(struct validation_context *ctx, ++ const struct vkd3d_shader_register *reg, enum vkd3d_shader_descriptor_type type, const char *name) ++{ ++ const struct vkd3d_shader_descriptor_info1 *descriptor; ++ ++ if (reg->idx[0].rel_addr) ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, ++ "Non-NULL indirect address for the ID of a register of type \"%s\".", name); ++ ++ if (!ctx->program->has_descriptor_info) ++ return; ++ ++ if (!(descriptor = vkd3d_shader_find_descriptor(&ctx->program->descriptors, type, reg->idx[0].offset))) ++ { ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, ++ "No matching descriptor found for register %s%u.", name, reg->idx[0].offset); ++ return; ++ } ++ ++ if (!reg->idx[1].rel_addr && (reg->idx[1].offset < descriptor->register_index ++ || reg->idx[1].offset - descriptor->register_index >= descriptor->count)) ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, ++ "Register index %u doesn't belong to the range [%u, %u] for register %s%u.", ++ reg->idx[1].offset, descriptor->register_index, ++ descriptor->register_index + descriptor->count - 1, name, reg->idx[0].offset); ++} ++ ++static void vsir_validate_constbuffer_register(struct validation_context *ctx, ++ const struct vkd3d_shader_register *reg) ++{ ++ if (reg->precision != VKD3D_SHADER_REGISTER_PRECISION_DEFAULT) ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_PRECISION, ++ "Invalid precision %#x for a CONSTBUFFER register.", reg->precision); ++ ++ if (reg->dimension != VSIR_DIMENSION_VEC4) ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION, ++ "Invalid dimension %#x for a CONSTBUFFER register.", reg->dimension); ++ ++ if (reg->idx_count != 3) ++ { ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, ++ "Invalid index count %u for a CONSTBUFFER register.", reg->idx_count); ++ return; ++ } ++ ++ vsir_validate_descriptor_indices(ctx, reg, VKD3D_SHADER_DESCRIPTOR_TYPE_CBV, "cb"); ++} ++ + static void vsir_validate_sampler_register(struct validation_context *ctx, + const struct vkd3d_shader_register *reg) + { +@@ -7714,9 +7869,7 @@ static void vsir_validate_sampler_register(struct validation_context *ctx, + return; + } + +- if (reg->idx[0].rel_addr) +- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, +- "Non-NULL relative address for the descriptor index of a SAMPLER register."); ++ vsir_validate_descriptor_indices(ctx, reg, VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER, "s"); + } + + static void vsir_validate_resource_register(struct validation_context *ctx, +@@ -7741,9 +7894,7 @@ static void vsir_validate_resource_register(struct validation_context *ctx, + return; + } + +- if (reg->idx[0].rel_addr) +- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, +- "Non-NULL relative address for the descriptor index of a RESOURCE register."); ++ vsir_validate_descriptor_indices(ctx, reg, VKD3D_SHADER_DESCRIPTOR_TYPE_SRV, "t"); + } + + static void vsir_validate_uav_register(struct validation_context *ctx, +@@ -7773,9 +7924,7 @@ static void vsir_validate_uav_register(struct validation_context *ctx, + return; + } + +- if (reg->idx[0].rel_addr) +- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, +- "Non-NULL relative address for the descriptor index of a UAV register."); ++ vsir_validate_descriptor_indices(ctx, reg, VKD3D_SHADER_DESCRIPTOR_TYPE_UAV, "u"); + } + + static void vsir_validate_ssa_register(struct validation_context *ctx, +@@ -7928,6 +8077,10 @@ static void vsir_validate_register(struct validation_context *ctx, + vsir_validate_register_without_indices(ctx, reg); + break; + ++ case VKD3DSPR_CONSTBUFFER: ++ vsir_validate_constbuffer_register(ctx, reg); ++ break; ++ + case VKD3DSPR_PRIMID: + vsir_validate_register_without_indices(ctx, reg); + break; +@@ -8115,6 +8268,8 @@ static void vsir_validate_dst_param(struct validation_context *ctx, + + case VKD3DSPR_IMMCONST: + case VKD3DSPR_IMMCONST64: ++ case VKD3DSPR_CONSTBUFFER: ++ case VKD3DSPR_IMMCONSTBUFFER: + case VKD3DSPR_SAMPLER: + case VKD3DSPR_RESOURCE: + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, +@@ -8505,10 +8660,13 @@ static void vsir_validate_signature_element(struct validation_context *ctx, { case VKD3D_SHADER_COMPONENT_INT: case VKD3D_SHADER_COMPONENT_UINT: @@ -17591,6 +23809,97 @@ index b608fae21ac..41aa99fbc09 100644 break; default: +@@ -9776,6 +9934,9 @@ enum vkd3d_result vsir_program_transform(struct vsir_program *program, uint64_t + if (program->shader_version.major <= 2) + vsir_transform(&ctx, vsir_program_ensure_diffuse); + ++ if (program->shader_version.major < 4) ++ vsir_transform(&ctx, vsir_program_normalize_addr); ++ + if (program->shader_version.type != VKD3D_SHADER_TYPE_PIXEL) + vsir_transform(&ctx, vsir_program_remap_output_signature); + +diff --git a/libs/vkd3d/libs/vkd3d-shader/msl.c b/libs/vkd3d/libs/vkd3d-shader/msl.c +index e783128e236..a5d952cd525 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/msl.c ++++ b/libs/vkd3d/libs/vkd3d-shader/msl.c +@@ -44,7 +44,6 @@ struct msl_generator + bool write_depth; + + const struct vkd3d_shader_interface_info *interface_info; +- const struct vkd3d_shader_scan_descriptor_info1 *descriptor_info; + }; + + static void VKD3D_PRINTF_FUNC(3, 4) msl_compiler_error(struct msl_generator *gen, +@@ -821,7 +820,7 @@ static void msl_generate_cbv_declaration(struct msl_generator *gen, + + static void msl_generate_descriptor_struct_declarations(struct msl_generator *gen) + { +- const struct vkd3d_shader_scan_descriptor_info1 *info = gen->descriptor_info; ++ const struct vkd3d_shader_scan_descriptor_info1 *info = &gen->program->descriptors; + const struct vkd3d_shader_descriptor_info1 *descriptor; + struct vkd3d_string_buffer *buffer = gen->buffer; + unsigned int i; +@@ -1171,7 +1170,7 @@ static void msl_generate_entrypoint(struct msl_generator *gen) + + vkd3d_string_buffer_printf(gen->buffer, "vkd3d_%s_out shader_entry(\n", gen->prefix); + +- if (gen->descriptor_info->descriptor_count) ++ if (gen->program->descriptors.descriptor_count) + { + msl_print_indent(gen->buffer, 2); + /* TODO: Configurable argument buffer binding location. */ +@@ -1195,7 +1194,7 @@ static void msl_generate_entrypoint(struct msl_generator *gen) + vkd3d_string_buffer_printf(gen->buffer, " %s_main(%s_in, %s_out", gen->prefix, gen->prefix, gen->prefix); + if (gen->write_depth) + vkd3d_string_buffer_printf(gen->buffer, ", shader_out_depth"); +- if (gen->descriptor_info->descriptor_count) ++ if (gen->program->descriptors.descriptor_count) + vkd3d_string_buffer_printf(gen->buffer, ", descriptors"); + vkd3d_string_buffer_printf(gen->buffer, ");\n"); + +@@ -1234,7 +1233,7 @@ static int msl_generator_generate(struct msl_generator *gen, struct vkd3d_shader + gen->prefix); + if (gen->write_depth) + vkd3d_string_buffer_printf(gen->buffer, ", thread float& o_depth"); +- if (gen->descriptor_info->descriptor_count) ++ if (gen->program->descriptors.descriptor_count) + vkd3d_string_buffer_printf(gen->buffer, ", constant vkd3d_%s_descriptors& descriptors", gen->prefix); + vkd3d_string_buffer_printf(gen->buffer, ")\n{\n"); + +@@ -1276,7 +1275,6 @@ static void msl_generator_cleanup(struct msl_generator *gen) + + static int msl_generator_init(struct msl_generator *gen, struct vsir_program *program, + const struct vkd3d_shader_compile_info *compile_info, +- const struct vkd3d_shader_scan_descriptor_info1 *descriptor_info, + struct vkd3d_shader_message_context *message_context) + { + enum vkd3d_shader_type type = program->shader_version.type; +@@ -1297,13 +1295,11 @@ static int msl_generator_init(struct msl_generator *gen, struct vsir_program *pr + return VKD3D_ERROR_INVALID_SHADER; + } + gen->interface_info = vkd3d_find_struct(compile_info->next, INTERFACE_INFO); +- gen->descriptor_info = descriptor_info; + + return VKD3D_OK; + } + + int msl_compile(struct vsir_program *program, uint64_t config_flags, +- const struct vkd3d_shader_scan_descriptor_info1 *descriptor_info, + const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *out, + struct vkd3d_shader_message_context *message_context) + { +@@ -1314,8 +1310,9 @@ int msl_compile(struct vsir_program *program, uint64_t config_flags, + return ret; + + VKD3D_ASSERT(program->normalisation_level == VSIR_NORMALISED_SM6); ++ VKD3D_ASSERT(program->has_descriptor_info); + +- if ((ret = msl_generator_init(&generator, program, compile_info, descriptor_info, message_context)) < 0) ++ if ((ret = msl_generator_init(&generator, program, compile_info, message_context)) < 0) + return ret; + ret = msl_generator_generate(&generator, out); + msl_generator_cleanup(&generator); diff --git a/libs/vkd3d/libs/vkd3d-shader/preproc.l b/libs/vkd3d/libs/vkd3d-shader/preproc.l index 4a8d0fddae1..d167415c356 100644 --- a/libs/vkd3d/libs/vkd3d-shader/preproc.l @@ -17625,7 +23934,7 @@ index c6be17bd230..95987831faa 100644 { fclose(f); diff --git a/libs/vkd3d/libs/vkd3d-shader/spirv.c b/libs/vkd3d/libs/vkd3d-shader/spirv.c -index db7ebab742d..66a4a274f17 100644 +index db7ebab742d..91a6686eb0d 100644 --- a/libs/vkd3d/libs/vkd3d-shader/spirv.c +++ b/libs/vkd3d/libs/vkd3d-shader/spirv.c @@ -18,6 +18,7 @@ @@ -17673,7 +23982,7 @@ index db7ebab742d..66a4a274f17 100644 static uint32_t spirv_parser_read_u32(struct spirv_parser *parser) { if (parser->pos >= parser->size) -@@ -290,6 +307,128 @@ static void spirv_parser_print_generator(struct spirv_parser *parser, uint32_t m +@@ -290,6 +307,91 @@ static void spirv_parser_print_generator(struct spirv_parser *parser, uint32_t m spirv_parser_print_comment(parser, "Generator: Unknown (%#x); %u", id, version); } @@ -17718,49 +24027,12 @@ index db7ebab742d..66a4a274f17 100644 + parser->colours.comment, offset * sizeof(uint32_t), parser->colours.reset, suffix); +} + -+static char get_escape_char(char c) -+{ -+ switch (c) -+ { -+ case '"': -+ case '\\': -+ return c; -+ case '\t': -+ return 't'; -+ case '\n': -+ return 'n'; -+ case '\v': -+ return 'v'; -+ case '\f': -+ return 'f'; -+ case '\r': -+ return 'r'; -+ default: -+ return 0; -+ } -+} -+ +static void spirv_parser_print_string_literal(struct spirv_parser *parser, struct vkd3d_string_buffer *buffer, + const char *prefix, const char *s, size_t len, const char *suffix) +{ -+ size_t start, i; -+ char c; -+ + vkd3d_string_buffer_printf(buffer, "%s\"%s", prefix, parser->colours.literal); -+ for (i = 0, start = 0; i < len; ++i) -+ { -+ if ((c = get_escape_char(s[i]))) -+ { -+ vkd3d_string_buffer_printf(buffer, "%.*s\\%c", (int)(i - start), &s[start], c); -+ start = i + 1; -+ } -+ else if (!isprint(s[i])) -+ { -+ vkd3d_string_buffer_printf(buffer, "%.*s\\%03o", (int)(i - start), &s[start], (uint8_t)s[i]); -+ start = i + 1; -+ } -+ } -+ vkd3d_string_buffer_printf(buffer, "%.*s%s\"%s", (int)(len - start), &s[start], parser->colours.reset, suffix); ++ vkd3d_string_buffer_print_string_escaped(buffer, s, len); ++ vkd3d_string_buffer_printf(buffer, "%s\"%s", parser->colours.reset, suffix); +} + +static const struct spirv_parser_enumerant *spirv_parser_get_enumerant( @@ -17802,7 +24074,7 @@ index db7ebab742d..66a4a274f17 100644 static enum vkd3d_result spirv_parser_read_header(struct spirv_parser *parser) { uint32_t magic, version, generator, bound, schema; -@@ -357,36 +496,260 @@ static enum vkd3d_result spirv_parser_read_header(struct spirv_parser *parser) +@@ -357,36 +459,260 @@ static enum vkd3d_result spirv_parser_read_header(struct spirv_parser *parser) return VKD3D_OK; } @@ -18028,12 +24300,8 @@ index db7ebab742d..66a4a274f17 100644 + goto raw; + } + } while (operand->quantifier == '*' && parser->pos < end); - } -- spirv_parser_print_comment(parser, "%s", op, buffer->buffer); -- vkd3d_string_buffer_release(&parser->string_buffers, buffer); - -- spirv_parser_error(parser, VKD3D_SHADER_ERROR_SPV_NOT_IMPLEMENTED, -- "Unrecognised instruction %#x.", op); ++ } ++ + if ((rem = end - parser->pos)) + { + spirv_parser_warning(parser, VKD3D_SHADER_ERROR_SPV_NOT_IMPLEMENTED, @@ -18056,7 +24324,9 @@ index db7ebab742d..66a4a274f17 100644 + result_name->content_size > max_indent ? 0 : max_indent - (int)result_name->content_size, "", + parser->colours.id, result_name->buffer, parser->colours.reset); + vkd3d_string_buffer_release(&parser->string_buffers, result_name); -+ } + } +- spirv_parser_print_comment(parser, "%s", op, buffer->buffer); +- vkd3d_string_buffer_release(&parser->string_buffers, buffer); + else if (parser->formatting & VKD3D_SHADER_COMPILE_OPTION_FORMATTING_INDENT) + { + vkd3d_string_buffer_printf(parser->text, "%*s", VKD3D_SPIRV_INDENT, ""); @@ -18068,7 +24338,9 @@ index db7ebab742d..66a4a274f17 100644 + vkd3d_string_buffer_printf(parser->text, "\n"); + + vkd3d_string_buffer_release(&parser->string_buffers, operands); -+ + +- spirv_parser_error(parser, VKD3D_SHADER_ERROR_SPV_NOT_IMPLEMENTED, +- "Unrecognised instruction %#x.", op); + return VKD3D_OK; + +raw: @@ -18077,7 +24349,7 @@ index db7ebab742d..66a4a274f17 100644 return VKD3D_OK; } -@@ -441,11 +804,19 @@ static enum vkd3d_result spirv_parser_init(struct spirv_parser *parser, const st +@@ -441,11 +767,19 @@ static enum vkd3d_result spirv_parser_init(struct spirv_parser *parser, const st { .reset = "", .comment = "", @@ -18097,7 +24369,7 @@ index db7ebab742d..66a4a274f17 100644 }; memset(parser, 0, sizeof(*parser)); -@@ -829,16 +1200,16 @@ static unsigned int vkd3d_spirv_string_word_count(const char *str) +@@ -829,16 +1163,16 @@ static unsigned int vkd3d_spirv_string_word_count(const char *str) static void vkd3d_spirv_build_string(struct vkd3d_spirv_stream *stream, const char *str, unsigned int word_count) { @@ -18123,7 +24395,61 @@ index db7ebab742d..66a4a274f17 100644 } typedef uint32_t (*vkd3d_spirv_build_pfn)(struct vkd3d_spirv_builder *builder); -@@ -3375,7 +3746,8 @@ static uint32_t spirv_compiler_get_constant(struct spirv_compiler *compiler, +@@ -2757,9 +3091,6 @@ struct spirv_compiler + } *spirv_parameter_info; + + bool prolog_emitted; +- struct shader_signature input_signature; +- struct shader_signature output_signature; +- struct shader_signature patch_constant_signature; + const struct vkd3d_shader_transform_feedback_info *xfb_info; + struct vkd3d_shader_output_info + { +@@ -2774,7 +3105,6 @@ struct spirv_compiler + + uint32_t binding_idx; + +- const struct vkd3d_shader_scan_descriptor_info1 *scan_descriptor_info; + unsigned int input_control_point_count; + unsigned int output_control_point_count; + +@@ -2852,10 +3182,6 @@ static void spirv_compiler_destroy(struct spirv_compiler *compiler) + + vkd3d_string_buffer_cache_cleanup(&compiler->string_buffers); + +- shader_signature_cleanup(&compiler->input_signature); +- shader_signature_cleanup(&compiler->output_signature); +- shader_signature_cleanup(&compiler->patch_constant_signature); +- + vkd3d_free(compiler->ssa_register_info); + vkd3d_free(compiler->block_label_ids); + +@@ -2864,7 +3190,6 @@ static void spirv_compiler_destroy(struct spirv_compiler *compiler) + + static struct spirv_compiler *spirv_compiler_create(const struct vsir_program *program, + const struct vkd3d_shader_compile_info *compile_info, +- const struct vkd3d_shader_scan_descriptor_info1 *scan_descriptor_info, + struct vkd3d_shader_message_context *message_context, uint64_t config_flags) + { + const struct vkd3d_shader_interface_info *shader_interface; +@@ -2880,6 +3205,7 @@ static struct spirv_compiler *spirv_compiler_create(const struct vsir_program *p + compiler->message_context = message_context; + compiler->location.source_name = compile_info->source_name; + compiler->config_flags = config_flags; ++ compiler->program = program; + + if ((target_info = vkd3d_find_struct(compile_info->next, SPIRV_TARGET_INFO))) + { +@@ -3006,8 +3332,6 @@ static struct spirv_compiler *spirv_compiler_create(const struct vsir_program *p + else if (compiler->shader_type != VKD3D_SHADER_TYPE_GEOMETRY) + compiler->emit_point_size = compiler->xfb_info && compiler->xfb_info->element_count; + +- compiler->scan_descriptor_info = scan_descriptor_info; +- + compiler->phase = VKD3DSIH_INVALID; + + vkd3d_string_buffer_cache_init(&compiler->string_buffers); +@@ -3375,7 +3699,8 @@ static uint32_t spirv_compiler_get_constant(struct spirv_compiler *compiler, "Vectors of bool type are not supported."); return vkd3d_spirv_get_op_undef(builder, type_id); default: @@ -18133,7 +24459,72 @@ index db7ebab742d..66a4a274f17 100644 return vkd3d_spirv_get_op_undef(builder, type_id); } -@@ -6904,6 +7276,13 @@ static void spirv_compiler_emit_workgroup_memory(struct spirv_compiler *compiler +@@ -5471,7 +5796,7 @@ static uint32_t spirv_compiler_emit_input(struct spirv_compiler *compiler, + unsigned int array_sizes[2]; + + shader_signature = reg_type == VKD3DSPR_PATCHCONST +- ? &compiler->patch_constant_signature : &compiler->input_signature; ++ ? &compiler->program->patch_constant_signature : &compiler->program->input_signature; + + signature_element = &shader_signature->elements[element_idx]; + sysval = signature_element->sysval_semantic; +@@ -5549,7 +5874,7 @@ static uint32_t spirv_compiler_emit_input(struct spirv_compiler *compiler, + if (reg_type == VKD3DSPR_PATCHCONST) + { + vkd3d_spirv_build_op_decorate(builder, input_id, SpvDecorationPatch, NULL, 0); +- location += shader_signature_next_location(&compiler->input_signature); ++ location += shader_signature_next_location(&compiler->program->input_signature); + } + vkd3d_spirv_build_op_decorate1(builder, input_id, SpvDecorationLocation, location); + if (component_idx) +@@ -5683,7 +6008,7 @@ static void calculate_clip_or_cull_distance_mask(const struct signature_element + /* Emits arrayed SPIR-V built-in variables. */ + static void spirv_compiler_emit_shader_signature_outputs(struct spirv_compiler *compiler) + { +- const struct shader_signature *output_signature = &compiler->output_signature; ++ const struct shader_signature *output_signature = &compiler->program->output_signature; + uint32_t clip_distance_mask = 0, clip_distance_id = 0; + uint32_t cull_distance_mask = 0, cull_distance_id = 0; + const struct vkd3d_spirv_builtin *builtin; +@@ -5793,7 +6118,8 @@ static void spirv_compiler_emit_output(struct spirv_compiler *compiler, + + is_patch_constant = (reg_type == VKD3DSPR_PATCHCONST); + +- shader_signature = is_patch_constant ? &compiler->patch_constant_signature : &compiler->output_signature; ++ shader_signature = is_patch_constant ? &compiler->program->patch_constant_signature ++ : &compiler->program->output_signature; + + signature_element = &shader_signature->elements[element_idx]; + sysval = signature_element->sysval_semantic; +@@ -5867,7 +6193,7 @@ static void spirv_compiler_emit_output(struct spirv_compiler *compiler, + unsigned int location = signature_element->target_location; + + if (is_patch_constant) +- location += shader_signature_next_location(&compiler->output_signature); ++ location += shader_signature_next_location(&compiler->program->output_signature); + else if (compiler->shader_type == VKD3D_SHADER_TYPE_PIXEL + && signature_element->sysval_semantic == VKD3D_SHADER_SV_TARGET) + location = signature_element->semantic_index; +@@ -6057,7 +6383,8 @@ static void spirv_compiler_emit_shader_epilogue_function(struct spirv_compiler * + + is_patch_constant = is_in_fork_or_join_phase(compiler); + +- signature = is_patch_constant ? &compiler->patch_constant_signature : &compiler->output_signature; ++ signature = is_patch_constant ? &compiler->program->patch_constant_signature ++ : &compiler->program->output_signature; + + function_id = compiler->epilogue_function_id; + +@@ -6401,7 +6728,7 @@ static const struct vkd3d_shader_descriptor_info1 *spirv_compiler_get_descriptor + struct spirv_compiler *compiler, enum vkd3d_shader_descriptor_type type, + const struct vkd3d_shader_register_range *range) + { +- const struct vkd3d_shader_scan_descriptor_info1 *descriptor_info = compiler->scan_descriptor_info; ++ const struct vkd3d_shader_scan_descriptor_info1 *descriptor_info = &compiler->program->descriptors; + unsigned int register_last = (range->last == ~0u) ? range->first : range->last; + const struct vkd3d_shader_descriptor_info1 *d; + unsigned int i; +@@ -6904,6 +7231,13 @@ static void spirv_compiler_emit_workgroup_memory(struct spirv_compiler *compiler const SpvStorageClass storage_class = SpvStorageClassWorkgroup; struct vkd3d_symbol reg_symbol; @@ -18147,11 +24538,158 @@ index db7ebab742d..66a4a274f17 100644 /* Alignment is supported only in the Kernel execution model. */ if (alignment) TRACE("Ignoring alignment %u.\n", alignment); +@@ -10772,20 +11106,20 @@ static void spirv_compiler_emit_io_declarations(struct spirv_compiler *compiler) + { + struct vkd3d_shader_dst_param dst; + +- for (unsigned int i = 0; i < compiler->input_signature.element_count; ++i) ++ for (unsigned int i = 0; i < compiler->program->input_signature.element_count; ++i) + spirv_compiler_emit_input(compiler, VKD3DSPR_INPUT, i); + +- for (unsigned int i = 0; i < compiler->output_signature.element_count; ++i) ++ for (unsigned int i = 0; i < compiler->program->output_signature.element_count; ++i) + { + /* PS outputs other than TARGET have dedicated registers and therefore + * go through spirv_compiler_emit_dcl_output() for now. */ + if (compiler->shader_type == VKD3D_SHADER_TYPE_PIXEL +- && compiler->output_signature.elements[i].sysval_semantic != VKD3D_SHADER_SV_TARGET) ++ && compiler->program->output_signature.elements[i].sysval_semantic != VKD3D_SHADER_SV_TARGET) + continue; + spirv_compiler_emit_output(compiler, VKD3DSPR_OUTPUT, i); + } + +- for (unsigned int i = 0; i < compiler->patch_constant_signature.element_count; ++i) ++ for (unsigned int i = 0; i < compiler->program->patch_constant_signature.element_count; ++i) + { + if (compiler->shader_type == VKD3D_SHADER_TYPE_HULL) + spirv_compiler_emit_output(compiler, VKD3DSPR_PATCHCONST, i); +@@ -10821,11 +11155,12 @@ static void spirv_compiler_emit_io_declarations(struct spirv_compiler *compiler) + + static void spirv_compiler_emit_descriptor_declarations(struct spirv_compiler *compiler) + { ++ const struct vkd3d_shader_scan_descriptor_info1 *descriptors = &compiler->program->descriptors; + unsigned int i; + +- for (i = 0; i < compiler->scan_descriptor_info->descriptor_count; ++i) ++ for (i = 0; i < descriptors->descriptor_count; ++i) + { +- const struct vkd3d_shader_descriptor_info1 *descriptor = &compiler->scan_descriptor_info->descriptors[i]; ++ const struct vkd3d_shader_descriptor_info1 *descriptor = &descriptors->descriptors[i]; + struct vkd3d_shader_register_range range; + + range.first = descriptor->register_index; +@@ -10856,23 +11191,18 @@ static void spirv_compiler_emit_descriptor_declarations(struct spirv_compiler *c + } + } + +-static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, struct vsir_program *program, ++static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, + const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *spirv) + { + const struct vkd3d_shader_spirv_target_info *info = compiler->spirv_target_info; + const struct vkd3d_shader_spirv_domain_shader_target_info *ds_info; + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; ++ const struct vsir_program *program = compiler->program; + struct vkd3d_shader_instruction_array instructions; + enum vkd3d_shader_spirv_environment environment; + enum vkd3d_result result = VKD3D_OK; + unsigned int i, max_element_count; + +- if ((result = vsir_program_transform(program, compiler->config_flags, +- compile_info, compiler->message_context)) < 0) +- return result; +- +- VKD3D_ASSERT(program->normalisation_level == VSIR_NORMALISED_SM6); +- + max_element_count = max(program->output_signature.element_count, program->patch_constant_signature.element_count); + if (!(compiler->output_info = vkd3d_calloc(max_element_count, sizeof(*compiler->output_info)))) + return VKD3D_ERROR_OUT_OF_MEMORY; +@@ -10919,17 +11249,8 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, struct + if (program->block_count && !spirv_compiler_init_blocks(compiler, program->block_count)) + return VKD3D_ERROR_OUT_OF_MEMORY; + +- compiler->program = program; +- + instructions = program->instructions; +- memset(&program->instructions, 0, sizeof(program->instructions)); +- +- compiler->input_signature = program->input_signature; +- compiler->output_signature = program->output_signature; +- compiler->patch_constant_signature = program->patch_constant_signature; +- memset(&program->input_signature, 0, sizeof(program->input_signature)); +- memset(&program->output_signature, 0, sizeof(program->output_signature)); +- memset(&program->patch_constant_signature, 0, sizeof(program->patch_constant_signature)); ++ + compiler->use_vocp = program->use_vocp; + compiler->block_names = program->block_names; + compiler->block_name_count = program->block_name_count; +@@ -10949,8 +11270,6 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, struct + result = spirv_compiler_handle_instruction(compiler, &instructions.elements[i]); + } + +- shader_instruction_array_destroy(&instructions); +- + if (result < 0) + return result; + +@@ -11032,21 +11351,26 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, struct + } + + int spirv_compile(struct vsir_program *program, uint64_t config_flags, +- const struct vkd3d_shader_scan_descriptor_info1 *scan_descriptor_info, + const struct vkd3d_shader_compile_info *compile_info, + struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context) + { + struct spirv_compiler *spirv_compiler; + int ret; + ++ if ((ret = vsir_program_transform(program, config_flags, compile_info, message_context)) < 0) ++ return ret; ++ ++ VKD3D_ASSERT(program->normalisation_level == VSIR_NORMALISED_SM6); ++ VKD3D_ASSERT(program->has_descriptor_info); ++ + if (!(spirv_compiler = spirv_compiler_create(program, compile_info, +- scan_descriptor_info, message_context, config_flags))) ++ message_context, config_flags))) + { + ERR("Failed to create SPIR-V compiler.\n"); + return VKD3D_ERROR; + } + +- ret = spirv_compiler_generate_spirv(spirv_compiler, program, compile_info, out); ++ ret = spirv_compiler_generate_spirv(spirv_compiler, compile_info, out); + + spirv_compiler_destroy(spirv_compiler); + return ret; diff --git a/libs/vkd3d/libs/vkd3d-shader/tpf.c b/libs/vkd3d/libs/vkd3d-shader/tpf.c -index 82302aac666..267f0884d83 100644 +index 82302aac666..23dab35a288 100644 --- a/libs/vkd3d/libs/vkd3d-shader/tpf.c +++ b/libs/vkd3d/libs/vkd3d-shader/tpf.c -@@ -1051,7 +1051,8 @@ static void shader_sm4_read_dcl_index_range(struct vkd3d_shader_instruction *ins +@@ -714,6 +714,22 @@ input_primitive_type_table[] = + [VKD3D_SM4_INPUT_PT_TRIANGLEADJ] = {6, VKD3D_PT_TRIANGLELIST_ADJ}, + }; + ++static const enum vkd3d_sm4_input_primitive_type sm4_input_primitive_type_table[] = ++{ ++ [VKD3D_PT_POINTLIST] = VKD3D_SM4_INPUT_PT_POINT, ++ [VKD3D_PT_LINELIST] = VKD3D_SM4_INPUT_PT_LINE, ++ [VKD3D_PT_TRIANGLELIST] = VKD3D_SM4_INPUT_PT_TRIANGLE, ++ [VKD3D_PT_LINELIST_ADJ] = VKD3D_SM4_INPUT_PT_LINEADJ, ++ [VKD3D_PT_TRIANGLELIST_ADJ] = VKD3D_SM4_INPUT_PT_TRIANGLEADJ, ++}; ++ ++static const enum vkd3d_sm4_output_primitive_type sm4_output_primitive_type_table[] = ++{ ++ [VKD3D_PT_POINTLIST] = VKD3D_SM4_OUTPUT_PT_POINTLIST, ++ [VKD3D_PT_LINESTRIP] = VKD3D_SM4_OUTPUT_PT_LINESTRIP, ++ [VKD3D_PT_TRIANGLESTRIP] = VKD3D_SM4_OUTPUT_PT_TRIANGLESTRIP, ++}; ++ + static const enum vkd3d_shader_resource_type resource_type_table[] = + { + /* 0 */ VKD3D_SHADER_RESOURCE_NONE, +@@ -1051,7 +1067,8 @@ static void shader_sm4_read_dcl_index_range(struct vkd3d_shader_instruction *ins register_idx, register_count, write_mask, e->sysval_semantic); return; } @@ -18161,7 +24699,90 @@ index 82302aac666..267f0884d83 100644 { WARN("No matching declaration for index range base %u, count %u, mask %#x.\n", register_idx, register_count, write_mask); -@@ -3228,6 +3229,7 @@ static int signature_element_pointer_compare(const void *x, const void *y) +@@ -1076,6 +1093,8 @@ static void shader_sm4_read_dcl_output_topology(struct vkd3d_shader_instruction + + if (ins->declaration.primitive_type.type == VKD3D_PT_UNDEFINED) + FIXME("Unhandled output primitive type %#x.\n", primitive_type); ++ ++ priv->p.program->output_topology = ins->declaration.primitive_type.type; + } + + static void shader_sm4_read_dcl_input_primitive(struct vkd3d_shader_instruction *ins, uint32_t opcode, +@@ -1103,6 +1122,8 @@ static void shader_sm4_read_dcl_input_primitive(struct vkd3d_shader_instruction + + if (ins->declaration.primitive_type.type == VKD3D_PT_UNDEFINED) + FIXME("Unhandled input primitive type %#x.\n", primitive_type); ++ ++ program->input_primitive = ins->declaration.primitive_type.type; + } + + static void shader_sm4_read_declaration_count(struct vkd3d_shader_instruction *ins, uint32_t opcode, +@@ -1113,6 +1134,8 @@ static void shader_sm4_read_declaration_count(struct vkd3d_shader_instruction *i + ins->declaration.count = *tokens; + if (opcode == VKD3D_SM4_OP_DCL_TEMPS) + program->temp_count = max(program->temp_count, *tokens); ++ else if (opcode == VKD3D_SM4_OP_DCL_VERTICES_OUT) ++ program->vertices_out_count = *tokens; + } + + static void shader_sm4_read_declaration_dst(struct vkd3d_shader_instruction *ins, uint32_t opcode, +@@ -1720,7 +1743,7 @@ static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup) + {VKD3D_SM5_RT_LOCAL_THREAD_ID, VKD3DSPR_LOCALTHREADID, VKD3D_SM4_SWIZZLE_VEC4}, + {VKD3D_SM5_RT_COVERAGE, VKD3DSPR_COVERAGE, VKD3D_SM4_SWIZZLE_VEC4}, + {VKD3D_SM5_RT_LOCAL_THREAD_INDEX, VKD3DSPR_LOCALTHREADINDEX,VKD3D_SM4_SWIZZLE_VEC4}, +- {VKD3D_SM5_RT_GS_INSTANCE_ID, VKD3DSPR_GSINSTID, VKD3D_SM4_SWIZZLE_VEC4}, ++ {VKD3D_SM5_RT_GS_INSTANCE_ID, VKD3DSPR_GSINSTID, VKD3D_SM4_SWIZZLE_SCALAR}, + {VKD3D_SM5_RT_DEPTHOUT_GREATER_EQUAL, VKD3DSPR_DEPTHOUTGE, VKD3D_SM4_SWIZZLE_VEC4}, + {VKD3D_SM5_RT_DEPTHOUT_LESS_EQUAL, VKD3DSPR_DEPTHOUTLE, VKD3D_SM4_SWIZZLE_VEC4}, + {VKD3D_SM5_RT_OUTPUT_STENCIL_REF, VKD3DSPR_OUTSTENCILREF, VKD3D_SM4_SWIZZLE_VEC4}, +@@ -2990,6 +3013,7 @@ bool sm4_register_from_semantic_name(const struct vkd3d_shader_version *version, + {"sv_primitiveid", false, VKD3D_SHADER_TYPE_DOMAIN, VKD3DSPR_PRIMID, false}, + + {"sv_primitiveid", false, VKD3D_SHADER_TYPE_GEOMETRY, VKD3DSPR_PRIMID, false}, ++ {"sv_gsinstanceid", false, VKD3D_SHADER_TYPE_GEOMETRY, VKD3DSPR_GSINSTID, false}, + + {"sv_outputcontrolpointid", false, VKD3D_SHADER_TYPE_HULL, VKD3DSPR_OUTPOINTID, false}, + {"sv_primitiveid", false, VKD3D_SHADER_TYPE_HULL, VKD3DSPR_PRIMID, false}, +@@ -3070,7 +3094,8 @@ static bool get_insidetessfactor_sysval_semantic(enum vkd3d_shader_sysval_semant + + bool sm4_sysval_semantic_from_semantic_name(enum vkd3d_shader_sysval_semantic *sysval_semantic, + const struct vkd3d_shader_version *version, bool semantic_compat_mapping, enum vkd3d_tessellator_domain domain, +- const char *semantic_name, unsigned int semantic_idx, bool output, bool is_patch_constant_func, bool is_patch) ++ const char *semantic_name, unsigned int semantic_idx, bool output, ++ bool is_patch_constant_func, bool is_primitive) + { + unsigned int i; + +@@ -3094,9 +3119,8 @@ bool sm4_sysval_semantic_from_semantic_name(enum vkd3d_shader_sysval_semantic *s + + {"sv_position", true, VKD3D_SHADER_TYPE_DOMAIN, VKD3D_SHADER_SV_POSITION}, + +- {"position", false, VKD3D_SHADER_TYPE_GEOMETRY, VKD3D_SHADER_SV_POSITION}, +- {"sv_position", false, VKD3D_SHADER_TYPE_GEOMETRY, VKD3D_SHADER_SV_POSITION}, + {"sv_primitiveid", false, VKD3D_SHADER_TYPE_GEOMETRY, VKD3D_SHADER_SV_PRIMITIVE_ID}, ++ {"sv_gsinstanceid", false, VKD3D_SHADER_TYPE_GEOMETRY, ~0u}, + + {"position", true, VKD3D_SHADER_TYPE_GEOMETRY, VKD3D_SHADER_SV_POSITION}, + {"sv_position", true, VKD3D_SHADER_TYPE_GEOMETRY, VKD3D_SHADER_SV_POSITION}, +@@ -3133,7 +3157,7 @@ bool sm4_sysval_semantic_from_semantic_name(enum vkd3d_shader_sysval_semantic *s + }; + bool has_sv_prefix = !ascii_strncasecmp(semantic_name, "sv_", 3); + +- if (is_patch) ++ if (is_primitive) + { + VKD3D_ASSERT(!output); + +@@ -3197,6 +3221,8 @@ bool sm4_sysval_semantic_from_semantic_name(enum vkd3d_shader_sysval_semantic *s + + if (has_sv_prefix) + return false; ++ if (!output && version->type == VKD3D_SHADER_TYPE_GEOMETRY) ++ return false; + + *sysval_semantic = VKD3D_SHADER_SV_NONE; + return true; +@@ -3228,6 +3254,7 @@ static int signature_element_pointer_compare(const void *x, const void *y) static void tpf_write_signature(struct tpf_compiler *tpf, const struct shader_signature *signature, uint32_t tag) { @@ -18169,7 +24790,7 @@ index 82302aac666..267f0884d83 100644 bool output = tag == TAG_OSGN || (tag == TAG_PCSG && tpf->program->shader_version.type == VKD3D_SHADER_TYPE_HULL); const struct signature_element **sorted_elements; -@@ -3256,12 +3258,16 @@ static void tpf_write_signature(struct tpf_compiler *tpf, const struct shader_si +@@ -3256,12 +3283,16 @@ static void tpf_write_signature(struct tpf_compiler *tpf, const struct shader_si if (sysval >= VKD3D_SHADER_SV_TARGET) sysval = VKD3D_SHADER_SV_NONE; @@ -18186,7 +24807,7 @@ index 82302aac666..267f0884d83 100644 } for (i = 0; i < signature->element_count; ++i) -@@ -3270,9 +3276,21 @@ static void tpf_write_signature(struct tpf_compiler *tpf, const struct shader_si +@@ -3270,9 +3301,21 @@ static void tpf_write_signature(struct tpf_compiler *tpf, const struct shader_si size_t string_offset; string_offset = put_string(&buffer, element->semantic_name); @@ -18209,7 +24830,108 @@ index 82302aac666..267f0884d83 100644 add_section(tpf, tag, &buffer); vkd3d_free(sorted_elements); } -@@ -4233,6 +4251,9 @@ static void tpf_write_sfi0(struct tpf_compiler *tpf) +@@ -3444,12 +3487,16 @@ static void sm4_write_register_index(const struct tpf_compiler *tpf, const struc + unsigned int j) + { + unsigned int addressing = sm4_get_index_addressing_from_reg(reg, j); ++ const struct vkd3d_shader_register_index *idx = ®->idx[j]; + struct vkd3d_bytecode_buffer *buffer = tpf->buffer; + unsigned int k; + ++ if (!addressing || (addressing & VKD3D_SM4_ADDRESSING_OFFSET)) ++ put_u32(buffer, idx->offset); ++ + if (addressing & VKD3D_SM4_ADDRESSING_RELATIVE) + { +- const struct vkd3d_shader_src_param *idx_src = reg->idx[j].rel_addr; ++ const struct vkd3d_shader_src_param *idx_src = idx->rel_addr; + uint32_t idx_src_token; + + VKD3D_ASSERT(idx_src); +@@ -3464,10 +3511,6 @@ static void sm4_write_register_index(const struct tpf_compiler *tpf, const struc + VKD3D_ASSERT(!idx_src->reg.idx[k].rel_addr); + } + } +- else +- { +- put_u32(tpf->buffer, reg->idx[j].offset); +- } + } + + static void sm4_write_dst_register(const struct tpf_compiler *tpf, const struct vkd3d_shader_dst_param *dst) +@@ -3912,6 +3955,57 @@ static void tpf_write_dcl_tessellator_output_primitive(const struct tpf_compiler + write_sm4_instruction(tpf, &instr); + } + ++static void tpf_write_dcl_input_primitive(const struct tpf_compiler *tpf, enum vkd3d_primitive_type input_primitive, ++ unsigned int patch_vertex_count) ++{ ++ enum vkd3d_sm4_input_primitive_type sm4_input_primitive; ++ struct sm4_instruction instr = ++ { ++ .opcode = VKD3D_SM4_OP_DCL_INPUT_PRIMITIVE, ++ }; ++ ++ if (input_primitive == VKD3D_PT_PATCH) ++ { ++ VKD3D_ASSERT(patch_vertex_count >= 1 && patch_vertex_count <= 32); ++ sm4_input_primitive = VKD3D_SM5_INPUT_PT_PATCH1 + patch_vertex_count - 1; ++ } ++ else ++ { ++ VKD3D_ASSERT(input_primitive < ARRAY_SIZE(sm4_input_primitive_type_table)); ++ sm4_input_primitive = sm4_input_primitive_type_table[input_primitive]; ++ } ++ ++ instr.extra_bits = sm4_input_primitive << VKD3D_SM4_PRIMITIVE_TYPE_SHIFT; ++ ++ write_sm4_instruction(tpf, &instr); ++} ++ ++static void tpf_write_dcl_output_topology(const struct tpf_compiler *tpf, enum vkd3d_primitive_type output_topology) ++{ ++ struct sm4_instruction instr = ++ { ++ .opcode = VKD3D_SM4_OP_DCL_OUTPUT_TOPOLOGY, ++ }; ++ ++ VKD3D_ASSERT(output_topology < ARRAY_SIZE(sm4_output_primitive_type_table)); ++ instr.extra_bits = sm4_output_primitive_type_table[output_topology] << VKD3D_SM4_PRIMITIVE_TYPE_SHIFT; ++ ++ write_sm4_instruction(tpf, &instr); ++} ++ ++static void tpf_write_dcl_vertices_out(const struct tpf_compiler *tpf, unsigned int count) ++{ ++ struct sm4_instruction instr = ++ { ++ .opcode = VKD3D_SM4_OP_DCL_VERTICES_OUT, ++ ++ .idx = {count}, ++ .idx_count = 1, ++ }; ++ ++ write_sm4_instruction(tpf, &instr); ++} ++ + static void tpf_simple_instruction(struct tpf_compiler *tpf, const struct vkd3d_shader_instruction *ins) + { + struct sm4_instruction_modifier *modifier; +@@ -4215,6 +4309,13 @@ static void tpf_write_shdr(struct tpf_compiler *tpf) + tpf_write_dcl_input_control_point_count(tpf, program->input_control_point_count); + tpf_write_dcl_tessellator_domain(tpf, program->tess_domain); + } ++ else if (version->type == VKD3D_SHADER_TYPE_GEOMETRY) ++ { ++ tpf_write_dcl_input_primitive(tpf, program->input_primitive, program->input_control_point_count); ++ if (program->output_topology != VKD3D_PT_UNDEFINED) ++ tpf_write_dcl_output_topology(tpf, program->output_topology); ++ tpf_write_dcl_vertices_out(tpf, program->vertices_out_count); ++ } + + tpf_write_program(tpf, program); + +@@ -4233,6 +4334,9 @@ static void tpf_write_sfi0(struct tpf_compiler *tpf) if (tpf->program->features.rovs) *flags |= DXBC_SFI0_REQUIRES_ROVS; @@ -18220,7 +24942,7 @@ index 82302aac666..267f0884d83 100644 * STENCIL_REF, and TYPED_UAV_LOAD_ADDITIONAL_FORMATS. */ diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c -index 021691bb3a1..c990b496545 100644 +index 021691bb3a1..9191429c439 100644 --- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c +++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c @@ -23,6 +23,8 @@ @@ -18232,7 +24954,68 @@ index 021691bb3a1..c990b496545 100644 static inline int char_to_int(char c) { if ('0' <= c && c <= '9') -@@ -454,8 +456,15 @@ struct shader_dump_data +@@ -161,6 +163,60 @@ int vkd3d_string_buffer_print_f64(struct vkd3d_string_buffer *buffer, double d) + return ret; + } + ++static char get_escape_char(char c) ++{ ++ switch (c) ++ { ++ case '"': ++ case '\\': ++ return c; ++ case '\t': ++ return 't'; ++ case '\n': ++ return 'n'; ++ case '\v': ++ return 'v'; ++ case '\f': ++ return 'f'; ++ case '\r': ++ return 'r'; ++ default: ++ return 0; ++ } ++} ++ ++int vkd3d_string_buffer_print_string_escaped(struct vkd3d_string_buffer *buffer, const char *s, size_t len) ++{ ++ size_t content_size, start, i; ++ int ret; ++ char c; ++ ++ content_size = buffer->content_size; ++ for (i = 0, start = 0; i < len; ++i) ++ { ++ if ((c = get_escape_char(s[i]))) ++ { ++ if ((ret = vkd3d_string_buffer_printf(buffer, "%.*s\\%c", (int)(i - start), &s[start], c)) < 0) ++ goto fail; ++ start = i + 1; ++ } ++ else if (!isprint(s[i])) ++ { ++ if ((ret = vkd3d_string_buffer_printf(buffer, "%.*s\\%03o", ++ (int)(i - start), &s[start], (uint8_t)s[i])) < 0) ++ goto fail; ++ start = i + 1; ++ } ++ } ++ if ((ret = vkd3d_string_buffer_printf(buffer, "%.*s", (int)(len - start), &s[start])) < 0) ++ goto fail; ++ return ret; ++ ++fail: ++ buffer->content_size = content_size; ++ return ret; ++} ++ + void vkd3d_string_buffer_trace_(const struct vkd3d_string_buffer *buffer, const char *function) + { + vkd3d_shader_trace_text_(buffer->buffer, buffer->content_size, function); +@@ -454,8 +510,15 @@ struct shader_dump_data const char *target_suffix; }; @@ -18249,7 +25032,7 @@ index 021691bb3a1..c990b496545 100644 { static const char hexadecimal_digits[] = "0123456789abcdef"; const uint8_t *checksum = dump_data->checksum; -@@ -480,8 +489,10 @@ static void vkd3d_shader_dump_shader(const struct shader_dump_data *dump_data, +@@ -480,8 +543,10 @@ static void vkd3d_shader_dump_shader(const struct shader_dump_data *dump_data, if (dump_data->profile) pos += snprintf(filename + pos, ARRAY_SIZE(filename) - pos, "-%s", dump_data->profile); @@ -18261,7 +25044,7 @@ index 021691bb3a1..c990b496545 100644 else pos += snprintf(filename + pos, ARRAY_SIZE(filename) - pos, "-target.%s", dump_data->target_suffix); -@@ -737,12 +748,20 @@ void vkd3d_shader_free_messages(char *messages) +@@ -737,12 +802,20 @@ void vkd3d_shader_free_messages(char *messages) static bool vkd3d_shader_signature_from_shader_signature(struct vkd3d_shader_signature *signature, const struct shader_signature *src) { @@ -18285,7 +25068,7 @@ index 021691bb3a1..c990b496545 100644 signature->elements = NULL; return true; } -@@ -750,30 +769,25 @@ static bool vkd3d_shader_signature_from_shader_signature(struct vkd3d_shader_sig +@@ -750,30 +823,25 @@ static bool vkd3d_shader_signature_from_shader_signature(struct vkd3d_shader_sig if (!(signature->elements = vkd3d_calloc(signature->element_count, sizeof(*signature->elements)))) return false; @@ -18330,7 +25113,117 @@ index 021691bb3a1..c990b496545 100644 } return true; -@@ -1631,7 +1645,7 @@ int vkd3d_shader_scan(const struct vkd3d_shader_compile_info *compile_info, char +@@ -1059,7 +1127,7 @@ static void vkd3d_shader_scan_combined_sampler_declaration( + &semantic->resource.range, semantic->resource_type, VKD3D_SHADER_RESOURCE_DATA_FLOAT); + } + +-static const struct vkd3d_shader_descriptor_info1 *find_descriptor( ++const struct vkd3d_shader_descriptor_info1 *vkd3d_shader_find_descriptor( + const struct vkd3d_shader_scan_descriptor_info1 *info, + enum vkd3d_shader_descriptor_type type, unsigned int register_id) + { +@@ -1113,11 +1181,11 @@ static void vkd3d_shader_scan_combined_sampler_usage(struct vkd3d_shader_scan_co + if (dynamic_resource || dynamic_sampler) + return; + +- if ((d = find_descriptor(context->scan_descriptor_info, ++ if ((d = vkd3d_shader_find_descriptor(context->scan_descriptor_info, + VKD3D_SHADER_DESCRIPTOR_TYPE_SRV, resource->idx[0].offset))) + resource_space = d->register_space; + +- if (sampler && (d = find_descriptor(context->scan_descriptor_info, ++ if (sampler && (d = vkd3d_shader_find_descriptor(context->scan_descriptor_info, + VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER, sampler->idx[0].offset))) + sampler_space = d->register_space; + } +@@ -1501,7 +1569,7 @@ static enum vkd3d_result convert_descriptor_info(struct vkd3d_shader_scan_descri + return VKD3D_OK; + } + +-static void vkd3d_shader_free_scan_descriptor_info1(struct vkd3d_shader_scan_descriptor_info1 *scan_descriptor_info) ++void vkd3d_shader_free_scan_descriptor_info1(struct vkd3d_shader_scan_descriptor_info1 *scan_descriptor_info) + { + TRACE("scan_descriptor_info %p.\n", scan_descriptor_info); + +@@ -1509,12 +1577,10 @@ static void vkd3d_shader_free_scan_descriptor_info1(struct vkd3d_shader_scan_des + } + + static int vsir_program_scan(struct vsir_program *program, const struct vkd3d_shader_compile_info *compile_info, +- struct vkd3d_shader_message_context *message_context, +- struct vkd3d_shader_scan_descriptor_info1 *descriptor_info1) ++ struct vkd3d_shader_message_context *message_context, bool add_descriptor_info) + { + struct vkd3d_shader_scan_combined_resource_sampler_info *combined_sampler_info; + struct vkd3d_shader_scan_hull_shader_tessellation_info *tessellation_info; +- struct vkd3d_shader_scan_descriptor_info1 local_descriptor_info1 = {0}; + struct vkd3d_shader_scan_descriptor_info *descriptor_info; + struct vkd3d_shader_scan_signature_info *signature_info; + struct vkd3d_shader_instruction *instruction; +@@ -1523,29 +1589,25 @@ static int vsir_program_scan(struct vsir_program *program, const struct vkd3d_sh + unsigned int i; + + descriptor_info = vkd3d_find_struct(compile_info->next, SCAN_DESCRIPTOR_INFO); +- if (descriptor_info1) +- { +- descriptor_info1->descriptors = NULL; +- descriptor_info1->descriptor_count = 0; +- } +- else if (descriptor_info) +- { +- descriptor_info1 = &local_descriptor_info1; +- } ++ if (descriptor_info) ++ add_descriptor_info = true; ++ + signature_info = vkd3d_find_struct(compile_info->next, SCAN_SIGNATURE_INFO); + + if ((combined_sampler_info = vkd3d_find_struct(compile_info->next, SCAN_COMBINED_RESOURCE_SAMPLER_INFO))) + { + combined_sampler_info->combined_samplers = NULL; + combined_sampler_info->combined_sampler_count = 0; +- if (!descriptor_info1) +- descriptor_info1 = &local_descriptor_info1; ++ add_descriptor_info = true; + } + + tessellation_info = vkd3d_find_struct(compile_info->next, SCAN_HULL_SHADER_TESSELLATION_INFO); + + vkd3d_shader_scan_context_init(&context, &program->shader_version, compile_info, +- descriptor_info1, combined_sampler_info, message_context); ++ add_descriptor_info ? &program->descriptors : NULL, combined_sampler_info, message_context); ++ ++ if (add_descriptor_info) ++ program->has_descriptor_info = true; + + if (TRACE_ON()) + vsir_program_trace(program); +@@ -1585,7 +1647,7 @@ static int vsir_program_scan(struct vsir_program *program, const struct vkd3d_sh + } + + if (!ret && descriptor_info) +- ret = convert_descriptor_info(descriptor_info, descriptor_info1); ++ ret = convert_descriptor_info(descriptor_info, &program->descriptors); + + if (!ret && tessellation_info) + { +@@ -1599,15 +1661,10 @@ static int vsir_program_scan(struct vsir_program *program, const struct vkd3d_sh + vkd3d_shader_free_scan_combined_resource_sampler_info(combined_sampler_info); + if (descriptor_info) + vkd3d_shader_free_scan_descriptor_info(descriptor_info); +- if (descriptor_info1) +- vkd3d_shader_free_scan_descriptor_info1(descriptor_info1); + if (signature_info) + vkd3d_shader_free_scan_signature_info(signature_info); + } +- else +- { +- vkd3d_shader_free_scan_descriptor_info1(&local_descriptor_info1); +- } ++ + vkd3d_shader_scan_context_cleanup(&context); + return ret; + } +@@ -1631,7 +1688,7 @@ int vkd3d_shader_scan(const struct vkd3d_shader_compile_info *compile_info, char vkd3d_shader_message_context_init(&message_context, compile_info->log_level); fill_shader_dump_data(compile_info, &dump_data); @@ -18339,7 +25232,59 @@ index 021691bb3a1..c990b496545 100644 if (compile_info->source_type == VKD3D_SHADER_SOURCE_HLSL) { -@@ -1711,7 +1725,8 @@ int vsir_program_compile(struct vsir_program *program, uint64_t config_flags, +@@ -1645,7 +1702,7 @@ int vkd3d_shader_scan(const struct vkd3d_shader_compile_info *compile_info, char + + if (!(ret = vsir_parse(compile_info, config_flags, &message_context, &program))) + { +- ret = vsir_program_scan(&program, compile_info, &message_context, NULL); ++ ret = vsir_program_scan(&program, compile_info, &message_context, false); + vsir_program_cleanup(&program); + } + } +@@ -1662,7 +1719,6 @@ int vsir_program_compile(struct vsir_program *program, uint64_t config_flags, + struct vkd3d_shader_message_context *message_context) + { + struct vkd3d_shader_scan_combined_resource_sampler_info combined_sampler_info; +- struct vkd3d_shader_scan_descriptor_info1 scan_descriptor_info; + struct vkd3d_shader_compile_info scan_info; + int ret; + +@@ -1678,28 +1734,24 @@ int vsir_program_compile(struct vsir_program *program, uint64_t config_flags, + combined_sampler_info.type = VKD3D_SHADER_STRUCTURE_TYPE_SCAN_COMBINED_RESOURCE_SAMPLER_INFO; + combined_sampler_info.next = scan_info.next; + scan_info.next = &combined_sampler_info; +- if ((ret = vsir_program_scan(program, &scan_info, message_context, &scan_descriptor_info)) < 0) ++ if ((ret = vsir_program_scan(program, &scan_info, message_context, true)) < 0) + return ret; +- ret = glsl_compile(program, config_flags, &scan_descriptor_info, ++ ret = glsl_compile(program, config_flags, + &combined_sampler_info, compile_info, out, message_context); + vkd3d_shader_free_scan_combined_resource_sampler_info(&combined_sampler_info); +- vkd3d_shader_free_scan_descriptor_info1(&scan_descriptor_info); + break; + + case VKD3D_SHADER_TARGET_SPIRV_BINARY: + case VKD3D_SHADER_TARGET_SPIRV_TEXT: +- if ((ret = vsir_program_scan(program, &scan_info, message_context, &scan_descriptor_info)) < 0) ++ if ((ret = vsir_program_scan(program, &scan_info, message_context, true)) < 0) + return ret; +- ret = spirv_compile(program, config_flags, &scan_descriptor_info, +- compile_info, out, message_context); +- vkd3d_shader_free_scan_descriptor_info1(&scan_descriptor_info); ++ ret = spirv_compile(program, config_flags, compile_info, out, message_context); + break; + + case VKD3D_SHADER_TARGET_MSL: +- if ((ret = vsir_program_scan(program, &scan_info, message_context, &scan_descriptor_info)) < 0) ++ if ((ret = vsir_program_scan(program, &scan_info, message_context, true)) < 0) + return ret; +- ret = msl_compile(program, config_flags, &scan_descriptor_info, compile_info, out, message_context); +- vkd3d_shader_free_scan_descriptor_info1(&scan_descriptor_info); ++ ret = msl_compile(program, config_flags, compile_info, out, message_context); + break; + + default: +@@ -1711,7 +1763,8 @@ int vsir_program_compile(struct vsir_program *program, uint64_t config_flags, } static int compile_hlsl(const struct vkd3d_shader_compile_info *compile_info, @@ -18349,7 +25294,7 @@ index 021691bb3a1..c990b496545 100644 { struct vkd3d_shader_code preprocessed; int ret; -@@ -1719,6 +1734,8 @@ static int compile_hlsl(const struct vkd3d_shader_compile_info *compile_info, +@@ -1719,6 +1772,8 @@ static int compile_hlsl(const struct vkd3d_shader_compile_info *compile_info, if ((ret = preproc_lexer_parse(compile_info, &preprocessed, message_context))) return ret; @@ -18358,7 +25303,7 @@ index 021691bb3a1..c990b496545 100644 ret = hlsl_compile_shader(&preprocessed, compile_info, out, message_context); vkd3d_shader_free_shader_code(&preprocessed); -@@ -1745,11 +1762,11 @@ int vkd3d_shader_compile(const struct vkd3d_shader_compile_info *compile_info, +@@ -1745,11 +1800,11 @@ int vkd3d_shader_compile(const struct vkd3d_shader_compile_info *compile_info, vkd3d_shader_message_context_init(&message_context, compile_info->log_level); fill_shader_dump_data(compile_info, &dump_data); @@ -18372,7 +25317,7 @@ index 021691bb3a1..c990b496545 100644 } else if (compile_info->source_type == VKD3D_SHADER_SOURCE_FX) { -@@ -1768,7 +1785,7 @@ int vkd3d_shader_compile(const struct vkd3d_shader_compile_info *compile_info, +@@ -1768,7 +1823,7 @@ int vkd3d_shader_compile(const struct vkd3d_shader_compile_info *compile_info, } if (ret >= 0) @@ -18381,7 +25326,7 @@ index 021691bb3a1..c990b496545 100644 vkd3d_shader_message_context_trace_messages(&message_context); if (!vkd3d_shader_message_context_copy_messages(&message_context, messages)) -@@ -1961,9 +1978,7 @@ const enum vkd3d_shader_source_type *vkd3d_shader_get_supported_source_types(uns +@@ -1961,9 +2016,7 @@ const enum vkd3d_shader_source_type *vkd3d_shader_get_supported_source_types(uns VKD3D_SHADER_SOURCE_DXBC_TPF, VKD3D_SHADER_SOURCE_HLSL, VKD3D_SHADER_SOURCE_D3D_BYTECODE, @@ -18391,7 +25336,17 @@ index 021691bb3a1..c990b496545 100644 VKD3D_SHADER_SOURCE_FX, }; -@@ -2012,7 +2027,6 @@ const enum vkd3d_shader_target_type *vkd3d_shader_get_supported_target_types( +@@ -1996,6 +2049,9 @@ const enum vkd3d_shader_target_type *vkd3d_shader_get_supported_target_types( + VKD3D_SHADER_TARGET_SPIRV_BINARY, + #if defined(HAVE_SPIRV_TOOLS) || defined(VKD3D_SHADER_UNSUPPORTED_SPIRV_PARSER) + VKD3D_SHADER_TARGET_SPIRV_TEXT, ++#endif ++#ifdef VKD3D_SHADER_UNSUPPORTED_GLSL ++ VKD3D_SHADER_TARGET_GLSL, + #endif + VKD3D_SHADER_TARGET_D3D_ASM, + VKD3D_SHADER_TARGET_D3D_BYTECODE, +@@ -2012,7 +2068,6 @@ const enum vkd3d_shader_target_type *vkd3d_shader_get_supported_target_types( VKD3D_SHADER_TARGET_D3D_ASM, }; @@ -18399,7 +25354,7 @@ index 021691bb3a1..c990b496545 100644 static const enum vkd3d_shader_target_type dxbc_dxil_types[] = { VKD3D_SHADER_TARGET_SPIRV_BINARY, -@@ -2021,7 +2035,6 @@ const enum vkd3d_shader_target_type *vkd3d_shader_get_supported_target_types( +@@ -2021,7 +2076,6 @@ const enum vkd3d_shader_target_type *vkd3d_shader_get_supported_target_types( # endif VKD3D_SHADER_TARGET_D3D_ASM, }; @@ -18407,7 +25362,7 @@ index 021691bb3a1..c990b496545 100644 static const enum vkd3d_shader_target_type fx_types[] = { -@@ -2044,11 +2057,9 @@ const enum vkd3d_shader_target_type *vkd3d_shader_get_supported_target_types( +@@ -2044,11 +2098,9 @@ const enum vkd3d_shader_target_type *vkd3d_shader_get_supported_target_types( *count = ARRAY_SIZE(d3dbc_types); return d3dbc_types; @@ -18420,7 +25375,7 @@ index 021691bb3a1..c990b496545 100644 case VKD3D_SHADER_SOURCE_FX: *count = ARRAY_SIZE(fx_types); diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -index 03643acff3c..eb50da28d24 100644 +index 03643acff3c..bf794d5e936 100644 --- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h +++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h @@ -59,7 +59,7 @@ @@ -18441,7 +25396,17 @@ index 03643acff3c..eb50da28d24 100644 VKD3D_SHADER_ERROR_TPF_MISMATCHED_CF = 1000, VKD3D_SHADER_ERROR_TPF_INVALID_REGISTER_RANGE = 1001, -@@ -261,6 +263,8 @@ enum vkd3d_shader_error +@@ -167,6 +169,9 @@ enum vkd3d_shader_error + VKD3D_SHADER_ERROR_HLSL_MISPLACED_SAMPLER_STATE = 5039, + VKD3D_SHADER_ERROR_HLSL_AMBIGUOUS_CALL = 5040, + VKD3D_SHADER_ERROR_HLSL_DUPLICATE_PATCH = 5041, ++ VKD3D_SHADER_ERROR_HLSL_INVALID_MAX_VERTEX_COUNT = 5042, ++ VKD3D_SHADER_ERROR_HLSL_MISSING_PRIMITIVE_TYPE = 5043, ++ VKD3D_SHADER_ERROR_HLSL_MISPLACED_STREAM_OUTPUT = 5044, + + VKD3D_SHADER_WARNING_HLSL_IMPLICIT_TRUNCATION = 5300, + VKD3D_SHADER_WARNING_HLSL_DIVISION_BY_ZERO = 5301, +@@ -261,6 +266,8 @@ enum vkd3d_shader_error VKD3D_SHADER_ERROR_FX_NOT_IMPLEMENTED = 11000, VKD3D_SHADER_ERROR_FX_INVALID_VERSION = 11001, VKD3D_SHADER_ERROR_FX_INVALID_DATA = 11002, @@ -18450,6 +25415,128 @@ index 03643acff3c..eb50da28d24 100644 }; enum vkd3d_shader_opcode +@@ -1415,6 +1422,33 @@ enum vsir_normalisation_level + VSIR_NORMALISED_SM6, + }; + ++struct vkd3d_shader_descriptor_info1 ++{ ++ enum vkd3d_shader_descriptor_type type; ++ unsigned int register_space; ++ unsigned int register_index; ++ unsigned int register_id; ++ enum vkd3d_shader_resource_type resource_type; ++ enum vkd3d_shader_resource_data_type resource_data_type; ++ unsigned int flags; ++ unsigned int sample_count; ++ unsigned int buffer_size; ++ unsigned int structure_stride; ++ unsigned int count; ++ uint32_t uav_flags; ++}; ++ ++struct vkd3d_shader_scan_descriptor_info1 ++{ ++ struct vkd3d_shader_descriptor_info1 *descriptors; ++ unsigned int descriptor_count; ++}; ++ ++const struct vkd3d_shader_descriptor_info1 *vkd3d_shader_find_descriptor( ++ const struct vkd3d_shader_scan_descriptor_info1 *info, ++ enum vkd3d_shader_descriptor_type type, unsigned int register_id); ++void vkd3d_shader_free_scan_descriptor_info1(struct vkd3d_shader_scan_descriptor_info1 *scan_descriptor_info); ++ + struct vsir_program + { + struct vkd3d_shader_version shader_version; +@@ -1424,6 +1458,9 @@ struct vsir_program + struct shader_signature output_signature; + struct shader_signature patch_constant_signature; + ++ struct vkd3d_shader_scan_descriptor_info1 descriptors; ++ bool has_descriptor_info; ++ + unsigned int parameter_count; + const struct vkd3d_shader_parameter1 *parameters; + bool free_parameters; +@@ -1445,6 +1482,9 @@ struct vsir_program + enum vkd3d_tessellator_domain tess_domain; + enum vkd3d_shader_tessellator_partitioning tess_partitioning; + enum vkd3d_shader_tessellator_output_primitive tess_output_primitive; ++ enum vkd3d_primitive_type input_primitive, output_topology; ++ unsigned int vertices_out_count; ++ + uint32_t io_dcls[VKD3D_BITMAP_SIZE(VKD3DSPR_COUNT)]; + + struct vsir_features features; +@@ -1501,28 +1541,6 @@ void vkd3d_shader_parser_init(struct vkd3d_shader_parser *parser, struct vsir_pr + void vkd3d_shader_parser_warning(struct vkd3d_shader_parser *parser, + enum vkd3d_shader_error error, const char *format, ...) VKD3D_PRINTF_FUNC(3, 4); + +-struct vkd3d_shader_descriptor_info1 +-{ +- enum vkd3d_shader_descriptor_type type; +- unsigned int register_space; +- unsigned int register_index; +- unsigned int register_id; +- enum vkd3d_shader_resource_type resource_type; +- enum vkd3d_shader_resource_data_type resource_data_type; +- unsigned int flags; +- unsigned int sample_count; +- unsigned int buffer_size; +- unsigned int structure_stride; +- unsigned int count; +- uint32_t uav_flags; +-}; +- +-struct vkd3d_shader_scan_descriptor_info1 +-{ +- struct vkd3d_shader_descriptor_info1 *descriptors; +- unsigned int descriptor_count; +-}; +- + void vsir_program_trace(const struct vsir_program *program); + + const char *shader_get_type_prefix(enum vkd3d_shader_type type); +@@ -1558,6 +1576,7 @@ void vkd3d_string_buffer_clear(struct vkd3d_string_buffer *buffer); + void vkd3d_string_buffer_truncate(struct vkd3d_string_buffer *buffer, size_t size); + int vkd3d_string_buffer_print_f32(struct vkd3d_string_buffer *buffer, float f); + int vkd3d_string_buffer_print_f64(struct vkd3d_string_buffer *buffer, double d); ++int vkd3d_string_buffer_print_string_escaped(struct vkd3d_string_buffer *buffer, const char *s, size_t len); + int vkd3d_string_buffer_printf(struct vkd3d_string_buffer *buffer, const char *format, ...) VKD3D_PRINTF_FUNC(2, 3); + void vkd3d_string_buffer_release(struct vkd3d_string_buffer_cache *list, struct vkd3d_string_buffer *buffer); + #define vkd3d_string_buffer_trace(buffer) \ +@@ -1642,7 +1661,8 @@ bool sm4_register_from_semantic_name(const struct vkd3d_shader_version *version, + bool shader_sm4_is_scalar_register(const struct vkd3d_shader_register *reg); + bool sm4_sysval_semantic_from_semantic_name(enum vkd3d_shader_sysval_semantic *sysval_semantic, + const struct vkd3d_shader_version *version, bool semantic_compat_mapping, enum vkd3d_tessellator_domain domain, +- const char *semantic_name, unsigned int semantic_idx, bool output, bool is_patch_constant_func, bool is_patch); ++ const char *semantic_name, unsigned int semantic_idx, bool output, ++ bool is_patch_constant_func, bool is_primitive); + + int d3dbc_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t config_flags, + struct vkd3d_shader_message_context *message_context, struct vsir_program *program); +@@ -1665,7 +1685,6 @@ int d3dbc_compile(struct vsir_program *program, uint64_t config_flags, + struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context); + + int glsl_compile(struct vsir_program *program, uint64_t config_flags, +- const struct vkd3d_shader_scan_descriptor_info1 *descriptor_info, + const struct vkd3d_shader_scan_combined_resource_sampler_info *combined_sampler_info, + const struct vkd3d_shader_compile_info *compile_info, + struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context); +@@ -1673,12 +1692,10 @@ int glsl_compile(struct vsir_program *program, uint64_t config_flags, + #define SPIRV_MAX_SRC_COUNT 6 + + int spirv_compile(struct vsir_program *program, uint64_t config_flags, +- const struct vkd3d_shader_scan_descriptor_info1 *scan_descriptor_info, + const struct vkd3d_shader_compile_info *compile_info, + struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context); + + int msl_compile(struct vsir_program *program, uint64_t config_flags, +- const struct vkd3d_shader_scan_descriptor_info1 *descriptor_info, + const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *out, + struct vkd3d_shader_message_context *message_context); + diff --git a/libs/vkd3d/libs/vkd3d-utils/vkd3d_utils_main.c b/libs/vkd3d/libs/vkd3d-utils/vkd3d_utils_main.c index 62dd5f69f77..d59a133c3d4 100644 --- a/libs/vkd3d/libs/vkd3d-utils/vkd3d_utils_main.c diff --git a/patches/vkd3d-latest/0002-Updated-vkd3d-to-a4f58be00c58e06b5bd60bec7eb9e37b6f1.patch b/patches/vkd3d-latest/0002-Updated-vkd3d-to-a4f58be00c58e06b5bd60bec7eb9e37b6f1.patch deleted file mode 100644 index aa2e96bf..00000000 --- a/patches/vkd3d-latest/0002-Updated-vkd3d-to-a4f58be00c58e06b5bd60bec7eb9e37b6f1.patch +++ /dev/null @@ -1,3494 +0,0 @@ -From 83a3253798716f86bf8ace1e2cf0fbbd829e614a Mon Sep 17 00:00:00 2001 -From: Alistair Leslie-Hughes -Date: Fri, 14 Mar 2025 08:23:50 +1100 -Subject: [PATCH] Updated vkd3d to a4f58be00c58e06b5bd60bec7eb9e37b6f112c24. - ---- - libs/vkd3d/include/private/vkd3d_version.h | 2 +- - libs/vkd3d/libs/vkd3d-shader/glsl.c | 9 +- - libs/vkd3d/libs/vkd3d-shader/hlsl.c | 76 +- - libs/vkd3d/libs/vkd3d-shader/hlsl.h | 31 +- - libs/vkd3d/libs/vkd3d-shader/hlsl.l | 5 + - libs/vkd3d/libs/vkd3d-shader/hlsl.y | 735 +++++--------- - libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 893 ++++++++++++++---- - .../libs/vkd3d-shader/hlsl_constant_ops.c | 10 +- - libs/vkd3d/libs/vkd3d-shader/tpf.c | 10 +- - .../libs/vkd3d-shader/vkd3d_shader_private.h | 3 + - 10 files changed, 1057 insertions(+), 717 deletions(-) - -diff --git a/libs/vkd3d/include/private/vkd3d_version.h b/libs/vkd3d/include/private/vkd3d_version.h -index a73ae3fb03c..795bc2dc490 100644 ---- a/libs/vkd3d/include/private/vkd3d_version.h -+++ b/libs/vkd3d/include/private/vkd3d_version.h -@@ -1 +1 @@ --#define VKD3D_VCS_ID " (git 81dc67b1)" -+#define VKD3D_VCS_ID " (git a4f58be0)" -diff --git a/libs/vkd3d/libs/vkd3d-shader/glsl.c b/libs/vkd3d/libs/vkd3d-shader/glsl.c -index ab6604bd703..a47c2feb094 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/glsl.c -+++ b/libs/vkd3d/libs/vkd3d-shader/glsl.c -@@ -269,15 +269,15 @@ static void shader_glsl_print_register_name(struct vkd3d_string_buffer *buffer, - vkd3d_string_buffer_printf(buffer, "", reg->type); - break; - } -- if (reg->idx[0].rel_addr || reg->idx[2].rel_addr) -+ if (reg->idx[0].rel_addr) - { - vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, - "Internal compiler error: Unhandled constant buffer register indirect addressing."); - vkd3d_string_buffer_printf(buffer, "", reg->type); - break; - } -- vkd3d_string_buffer_printf(buffer, "%s_cb_%u[%u]", -- gen->prefix, reg->idx[0].offset, reg->idx[2].offset); -+ vkd3d_string_buffer_printf(buffer, "%s_cb_%u", gen->prefix, reg->idx[0].offset); -+ shader_glsl_print_subscript(buffer, gen, reg->idx[2].rel_addr, reg->idx[2].offset); - break; - - case VKD3DSPR_THREADID: -@@ -485,8 +485,7 @@ static void shader_glsl_print_subscript(struct vkd3d_string_buffer *buffer, stru - vkd3d_string_buffer_printf(buffer, "[%s", r.str->buffer); - if (offset) - vkd3d_string_buffer_printf(buffer, " + %u", offset); -- else -- vkd3d_string_buffer_printf(buffer, "]"); -+ vkd3d_string_buffer_printf(buffer, "]"); - glsl_src_cleanup(&r, &gen->string_buffers); - } - -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.c b/libs/vkd3d/libs/vkd3d-shader/hlsl.c -index a7641a203f3..01586592b25 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.c -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.c -@@ -234,6 +234,33 @@ unsigned int hlsl_get_multiarray_size(const struct hlsl_type *type) - return 1; - } - -+const struct hlsl_type *hlsl_get_stream_output_type(const struct hlsl_type *type) -+{ -+ unsigned int i; -+ -+ switch (type->class) -+ { -+ case HLSL_CLASS_ARRAY: -+ return hlsl_get_stream_output_type(type->e.array.type); -+ -+ case HLSL_CLASS_STRUCT: -+ for (i = 0; i < type->e.record.field_count; ++i) -+ { -+ const struct hlsl_type *field_type = hlsl_get_stream_output_type(type->e.record.fields[i].type); -+ -+ if (field_type) -+ return field_type; -+ } -+ return NULL; -+ -+ case HLSL_CLASS_STREAM_OUTPUT: -+ return type; -+ -+ default: -+ return NULL; -+ } -+} -+ - bool hlsl_type_is_resource(const struct hlsl_type *type) - { - switch (type->class) -@@ -298,6 +325,12 @@ bool hlsl_type_is_patch_array(const struct hlsl_type *type) - || type->e.array.array_type == HLSL_ARRAY_PATCH_OUTPUT); - } - -+bool hlsl_type_is_primitive_array(const struct hlsl_type *type) -+{ -+ return type->class == HLSL_CLASS_ARRAY && (type->e.array.array_type != HLSL_ARRAY_GENERIC -+ || (type->modifiers & HLSL_PRIMITIVE_MODIFIERS_MASK)); -+} -+ - bool hlsl_base_type_is_integer(enum hlsl_base_type type) - { - switch (type) -@@ -739,8 +772,7 @@ unsigned int hlsl_type_get_component_offset(struct hlsl_ctx *ctx, struct hlsl_ty - return offset[*regset]; - } - --static bool init_deref(struct hlsl_ctx *ctx, struct hlsl_deref *deref, struct hlsl_ir_var *var, -- unsigned int path_len) -+bool hlsl_init_deref(struct hlsl_ctx *ctx, struct hlsl_deref *deref, struct hlsl_ir_var *var, unsigned int path_len) - { - deref->var = var; - deref->path_len = path_len; -@@ -798,7 +830,7 @@ bool hlsl_init_deref_from_index_chain(struct hlsl_ctx *ctx, struct hlsl_deref *d - } - load = hlsl_ir_load(ptr); - -- if (!init_deref(ctx, deref, load->src.var, load->src.path_len + chain_len)) -+ if (!hlsl_init_deref(ctx, deref, load->src.var, load->src.path_len + chain_len)) - return false; - - for (i = 0; i < load->src.path_len; ++i) -@@ -867,7 +899,7 @@ static bool init_deref_from_component_index(struct hlsl_ctx *ctx, struct hlsl_bl - ++path_len; - } - -- if (!init_deref(ctx, deref, prefix->var, prefix->path_len + path_len)) -+ if (!hlsl_init_deref(ctx, deref, prefix->var, prefix->path_len + path_len)) - return false; - - deref_path_len = 0; -@@ -1133,6 +1165,7 @@ unsigned int hlsl_type_component_count(const struct hlsl_type *type) - case HLSL_CLASS_HULL_SHADER: - case HLSL_CLASS_GEOMETRY_SHADER: - case HLSL_CLASS_BLEND_STATE: -+ case HLSL_CLASS_STREAM_OUTPUT: - case HLSL_CLASS_NULL: - return 1; - -@@ -1140,7 +1173,6 @@ unsigned int hlsl_type_component_count(const struct hlsl_type *type) - case HLSL_CLASS_PASS: - case HLSL_CLASS_TECHNIQUE: - case HLSL_CLASS_VOID: -- case HLSL_CLASS_STREAM_OUTPUT: - break; - } - -@@ -1459,7 +1491,7 @@ bool hlsl_copy_deref(struct hlsl_ctx *ctx, struct hlsl_deref *deref, const struc - - VKD3D_ASSERT(!hlsl_deref_is_lowered(other)); - -- if (!init_deref(ctx, deref, other->var, other->path_len)) -+ if (!hlsl_init_deref(ctx, deref, other->var, other->path_len)) - return false; - - for (i = 0; i < deref->path_len; ++i) -@@ -1521,7 +1553,7 @@ struct hlsl_ir_node *hlsl_new_store_index(struct hlsl_ctx *ctx, const struct hls - return NULL; - init_node(&store->node, HLSL_IR_STORE, NULL, loc); - -- if (!init_deref(ctx, &store->lhs, lhs->var, lhs->path_len + !!idx)) -+ if (!hlsl_init_deref(ctx, &store->lhs, lhs->var, lhs->path_len + !!idx)) - { - vkd3d_free(store); - return NULL; -@@ -1857,7 +1889,7 @@ struct hlsl_ir_load *hlsl_new_load_index(struct hlsl_ctx *ctx, const struct hlsl - return NULL; - init_node(&load->node, HLSL_IR_LOAD, type, loc); - -- if (!init_deref(ctx, &load->src, deref->var, deref->path_len + !!idx)) -+ if (!hlsl_init_deref(ctx, &load->src, deref->var, deref->path_len + !!idx)) - { - vkd3d_free(load); - return NULL; -@@ -1939,7 +1971,7 @@ struct hlsl_ir_node *hlsl_block_add_load_component(struct hlsl_ctx *ctx, struct - return &load->node; - } - --struct hlsl_ir_node *hlsl_new_resource_load(struct hlsl_ctx *ctx, -+static struct hlsl_ir_node *hlsl_new_resource_load(struct hlsl_ctx *ctx, - const struct hlsl_resource_load_params *params, const struct vkd3d_shader_location *loc) - { - struct hlsl_ir_resource_load *load; -@@ -1978,6 +2010,12 @@ struct hlsl_ir_node *hlsl_new_resource_load(struct hlsl_ctx *ctx, - return &load->node; - } - -+struct hlsl_ir_node *hlsl_block_add_resource_load(struct hlsl_ctx *ctx, struct hlsl_block *block, -+ const struct hlsl_resource_load_params *params, const struct vkd3d_shader_location *loc) -+{ -+ return append_new_instr(ctx, block, hlsl_new_resource_load(ctx, params, loc)); -+} -+ - static struct hlsl_ir_node *hlsl_new_resource_store(struct hlsl_ctx *ctx, const struct hlsl_deref *resource, - struct hlsl_ir_node *coords, struct hlsl_ir_node *value, const struct vkd3d_shader_location *loc) - { -@@ -2022,6 +2060,12 @@ struct hlsl_ir_node *hlsl_new_swizzle(struct hlsl_ctx *ctx, uint32_t s, unsigned - return &swizzle->node; - } - -+struct hlsl_ir_node *hlsl_block_add_swizzle(struct hlsl_ctx *ctx, struct hlsl_block *block, uint32_t s, -+ unsigned int width, struct hlsl_ir_node *val, const struct vkd3d_shader_location *loc) -+{ -+ return append_new_instr(ctx, block, hlsl_new_swizzle(ctx, s, width, val, loc)); -+} -+ - struct hlsl_ir_node *hlsl_new_matrix_swizzle(struct hlsl_ctx *ctx, struct hlsl_matrix_swizzle s, - unsigned int component_count, struct hlsl_ir_node *val, const struct vkd3d_shader_location *loc) - { -@@ -2367,7 +2411,7 @@ static bool clone_deref(struct hlsl_ctx *ctx, struct clone_instr_map *map, - - VKD3D_ASSERT(!hlsl_deref_is_lowered(src)); - -- if (!init_deref(ctx, dst, src->var, src->path_len)) -+ if (!hlsl_init_deref(ctx, dst, src->var, src->path_len)) - return false; - - for (i = 0; i < src->path_len; ++i) -@@ -3203,6 +3247,16 @@ struct vkd3d_string_buffer *hlsl_modifiers_to_string(struct hlsl_ctx *ctx, uint3 - vkd3d_string_buffer_printf(string, "row_major "); - if (modifiers & HLSL_MODIFIER_COLUMN_MAJOR) - vkd3d_string_buffer_printf(string, "column_major "); -+ if (modifiers & HLSL_PRIMITIVE_POINT) -+ vkd3d_string_buffer_printf(string, "point "); -+ if (modifiers & HLSL_PRIMITIVE_LINE) -+ vkd3d_string_buffer_printf(string, "line "); -+ if (modifiers & HLSL_PRIMITIVE_TRIANGLE) -+ vkd3d_string_buffer_printf(string, "triangle "); -+ if (modifiers & HLSL_PRIMITIVE_LINEADJ) -+ vkd3d_string_buffer_printf(string, "lineadj "); -+ if (modifiers & HLSL_PRIMITIVE_TRIANGLEADJ) -+ vkd3d_string_buffer_printf(string, "triangleadj "); - if ((modifiers & (HLSL_STORAGE_IN | HLSL_STORAGE_OUT)) == (HLSL_STORAGE_IN | HLSL_STORAGE_OUT)) - vkd3d_string_buffer_printf(string, "inout "); - else if (modifiers & HLSL_STORAGE_IN) -@@ -4736,6 +4790,8 @@ static bool hlsl_ctx_init(struct hlsl_ctx *ctx, const struct vkd3d_shader_compil - ctx->output_primitive = 0; - ctx->partitioning = 0; - ctx->input_control_point_count = UINT_MAX; -+ ctx->max_vertex_count = 0; -+ ctx->input_primitive_type = VKD3D_PT_UNDEFINED; - - return true; - } -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h -index 2ef84d35ff2..98d3d17e826 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.h -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h -@@ -417,6 +417,11 @@ struct hlsl_attribute - #define HLSL_STORAGE_ANNOTATION 0x00080000 - #define HLSL_MODIFIER_UNORM 0x00100000 - #define HLSL_MODIFIER_SNORM 0x00200000 -+#define HLSL_PRIMITIVE_POINT 0x00400000 -+#define HLSL_PRIMITIVE_LINE 0x00800000 -+#define HLSL_PRIMITIVE_TRIANGLE 0x01000000 -+#define HLSL_PRIMITIVE_LINEADJ 0x02000000 -+#define HLSL_PRIMITIVE_TRIANGLEADJ 0x04000000 - - #define HLSL_TYPE_MODIFIERS_MASK (HLSL_MODIFIER_PRECISE | HLSL_MODIFIER_VOLATILE | \ - HLSL_MODIFIER_CONST | HLSL_MODIFIER_ROW_MAJOR | \ -@@ -427,6 +432,9 @@ struct hlsl_attribute - - #define HLSL_MODIFIERS_MAJORITY_MASK (HLSL_MODIFIER_ROW_MAJOR | HLSL_MODIFIER_COLUMN_MAJOR) - -+#define HLSL_PRIMITIVE_MODIFIERS_MASK (HLSL_PRIMITIVE_POINT | HLSL_PRIMITIVE_LINE | HLSL_PRIMITIVE_TRIANGLE | \ -+ HLSL_PRIMITIVE_LINEADJ | HLSL_PRIMITIVE_TRIANGLEADJ) -+ - #define HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT 0 - - /* Reservation of a register and/or an offset for objects inside constant buffers, to be used as a -@@ -1184,10 +1192,18 @@ struct hlsl_ctx - unsigned int input_control_point_count; - struct hlsl_type *input_control_point_type; - -+ /* The first declared input primitive parameter in tessellation and geometry shaders. */ -+ struct hlsl_ir_var *input_primitive_param; -+ - /* Whether the current function being processed during HLSL codegen is - * the patch constant function in a hull shader. */ - bool is_patch_constant_func; - -+ /* The maximum output vertex count of a geometry shader. */ -+ unsigned int max_vertex_count; -+ /* The input primitive type of a geometry shader. */ -+ enum vkd3d_primitive_type input_primitive_type; -+ - /* In some cases we generate opcodes by parsing an HLSL function and then - * invoking it. If not NULL, this field is the name of the function that we - * are currently parsing, "mangled" with an internal prefix to avoid -@@ -1458,6 +1474,11 @@ static inline bool hlsl_is_numeric_type(const struct hlsl_type *type) - return type->class <= HLSL_CLASS_LAST_NUMERIC; - } - -+static inline bool hlsl_is_vec1(const struct hlsl_type *type) -+{ -+ return type->class == HLSL_CLASS_SCALAR || (type->class == HLSL_CLASS_VECTOR && type->e.numeric.dimx == 1); -+} -+ - static inline unsigned int hlsl_sampler_dim_count(enum hlsl_sampler_dim dim) - { - switch (dim) -@@ -1530,6 +1551,8 @@ struct hlsl_ir_node *hlsl_block_add_load_index(struct hlsl_ctx *ctx, struct hlsl - void hlsl_block_add_loop(struct hlsl_ctx *ctx, struct hlsl_block *block, - struct hlsl_block *iter, struct hlsl_block *body, enum hlsl_loop_unroll_type unroll_type, - unsigned int unroll_limit, const struct vkd3d_shader_location *loc); -+struct hlsl_ir_node *hlsl_block_add_resource_load(struct hlsl_ctx *ctx, struct hlsl_block *block, -+ const struct hlsl_resource_load_params *params, const struct vkd3d_shader_location *loc); - void hlsl_block_add_resource_store(struct hlsl_ctx *ctx, struct hlsl_block *block, const struct hlsl_deref *resource, - struct hlsl_ir_node *coords, struct hlsl_ir_node *value, const struct vkd3d_shader_location *loc); - struct hlsl_ir_node *hlsl_block_add_simple_load(struct hlsl_ctx *ctx, struct hlsl_block *block, -@@ -1541,6 +1564,8 @@ void hlsl_block_add_store_component(struct hlsl_ctx *ctx, struct hlsl_block *blo - void hlsl_block_add_store_index(struct hlsl_ctx *ctx, struct hlsl_block *block, - const struct hlsl_deref *lhs, struct hlsl_ir_node *idx, struct hlsl_ir_node *rhs, - unsigned int writemask, const struct vkd3d_shader_location *loc); -+struct hlsl_ir_node *hlsl_block_add_swizzle(struct hlsl_ctx *ctx, struct hlsl_block *block, uint32_t s, -+ unsigned int width, struct hlsl_ir_node *val, const struct vkd3d_shader_location *loc); - struct hlsl_ir_node *hlsl_block_add_uint_constant(struct hlsl_ctx *ctx, struct hlsl_block *block, - unsigned int n, const struct vkd3d_shader_location *loc); - struct hlsl_ir_node *hlsl_block_add_unary_expr(struct hlsl_ctx *ctx, struct hlsl_block *block, -@@ -1567,6 +1592,7 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry - enum vkd3d_shader_target_type target_type, struct vkd3d_shader_code *out); - int hlsl_emit_effect_binary(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out); - -+bool hlsl_init_deref(struct hlsl_ctx *ctx, struct hlsl_deref *deref, struct hlsl_ir_var *var, unsigned int path_len); - bool hlsl_init_deref_from_index_chain(struct hlsl_ctx *ctx, struct hlsl_deref *deref, struct hlsl_ir_node *chain); - bool hlsl_copy_deref(struct hlsl_ctx *ctx, struct hlsl_deref *deref, const struct hlsl_deref *other); - -@@ -1653,8 +1679,6 @@ struct hlsl_ir_node *hlsl_new_interlocked(struct hlsl_ctx *ctx, enum hlsl_interl - struct hlsl_ir_node *value, const struct vkd3d_shader_location *loc); - struct hlsl_ir_node *hlsl_new_matrix_swizzle(struct hlsl_ctx *ctx, struct hlsl_matrix_swizzle s, - unsigned int width, struct hlsl_ir_node *val, const struct vkd3d_shader_location *loc); --struct hlsl_ir_node *hlsl_new_resource_load(struct hlsl_ctx *ctx, -- const struct hlsl_resource_load_params *params, const struct vkd3d_shader_location *loc); - struct hlsl_type *hlsl_new_struct_type(struct hlsl_ctx *ctx, const char *name, - struct hlsl_struct_field *fields, size_t field_count); - struct hlsl_ir_node *hlsl_new_swizzle(struct hlsl_ctx *ctx, uint32_t s, unsigned int components, -@@ -1719,6 +1743,7 @@ bool hlsl_type_is_integer(const struct hlsl_type *type); - bool hlsl_type_is_resource(const struct hlsl_type *type); - bool hlsl_type_is_shader(const struct hlsl_type *type); - bool hlsl_type_is_patch_array(const struct hlsl_type *type); -+bool hlsl_type_is_primitive_array(const struct hlsl_type *type); - unsigned int hlsl_type_get_sm4_offset(const struct hlsl_type *type, unsigned int offset); - bool hlsl_types_are_equal(const struct hlsl_type *t1, const struct hlsl_type *t2); - -@@ -1727,6 +1752,8 @@ void hlsl_calculate_buffer_offsets(struct hlsl_ctx *ctx); - const struct hlsl_type *hlsl_get_multiarray_element_type(const struct hlsl_type *type); - unsigned int hlsl_get_multiarray_size(const struct hlsl_type *type); - -+const struct hlsl_type *hlsl_get_stream_output_type(const struct hlsl_type *type); -+ - uint32_t hlsl_combine_swizzles(uint32_t first, uint32_t second, unsigned int dim); - unsigned int hlsl_combine_writemasks(unsigned int first, unsigned int second); - uint32_t hlsl_map_swizzle(uint32_t swizzle, unsigned int writemask); -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.l b/libs/vkd3d/libs/vkd3d-shader/hlsl.l -index 605a9abaa93..d9fd43b5e78 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.l -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.l -@@ -106,6 +106,8 @@ inline {return KW_INLINE; } - inout {return KW_INOUT; } - InputPatch {return KW_INPUTPATCH; } - LineStream {return KW_LINESTREAM; } -+line {return KW_LINE; } -+lineadj {return KW_LINEADJ; } - linear {return KW_LINEAR; } - matrix {return KW_MATRIX; } - namespace {return KW_NAMESPACE; } -@@ -119,6 +121,7 @@ pass {return KW_PASS; } - PixelShader {return KW_PIXELSHADER; } - PointStream {return KW_POINTSTREAM; } - pixelshader {return KW_PIXELSHADER; } -+point {return KW_POINT; } - RasterizerOrderedBuffer {return KW_RASTERIZERORDEREDBUFFER; } - RasterizerOrderedStructuredBuffer {return KW_RASTERIZERORDEREDSTRUCTUREDBUFFER; } - RasterizerOrderedTexture1D {return KW_RASTERIZERORDEREDTEXTURE1D; } -@@ -175,6 +178,8 @@ TextureCube {return KW_TEXTURECUBE; } - textureCUBE {return KW_TEXTURECUBE; } - TextureCubeArray {return KW_TEXTURECUBEARRAY; } - TriangleStream {return KW_TRIANGLESTREAM; } -+triangle {return KW_TRIANGLE; } -+triangleadj {return KW_TRIANGLEADJ; } - true {return KW_TRUE; } - typedef {return KW_TYPEDEF; } - unsigned {return KW_UNSIGNED; } -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.y b/libs/vkd3d/libs/vkd3d-shader/hlsl.y -index 71802fce388..ff3d58da8f4 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.y -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.y -@@ -372,7 +372,15 @@ static struct hlsl_ir_node *add_implicit_conversion(struct hlsl_ctx *ctx, struct - if (node->type == HLSL_IR_SAMPLER_STATE && dst_type->class == HLSL_CLASS_SAMPLER) - return node; - -- if (!implicit_compatible_data_types(ctx, src_type, dst_type)) -+ if (implicit_compatible_data_types(ctx, src_type, dst_type)) -+ { -+ if (hlsl_is_numeric_type(dst_type) && hlsl_is_numeric_type(src_type) -+ && dst_type->e.numeric.dimx * dst_type->e.numeric.dimy < src_type->e.numeric.dimx * src_type->e.numeric.dimy -+ && ctx->warn_implicit_truncation) -+ hlsl_warning(ctx, loc, VKD3D_SHADER_WARNING_HLSL_IMPLICIT_TRUNCATION, "Implicit truncation of %s type.", -+ src_type->class == HLSL_CLASS_VECTOR ? "vector" : "matrix"); -+ } -+ else - { - struct vkd3d_string_buffer *src_string, *dst_string; - -@@ -383,19 +391,12 @@ static struct hlsl_ir_node *add_implicit_conversion(struct hlsl_ctx *ctx, struct - "Can't implicitly convert from %s to %s.", src_string->buffer, dst_string->buffer); - hlsl_release_string_buffer(ctx, src_string); - hlsl_release_string_buffer(ctx, dst_string); -- return NULL; - } - -- if (hlsl_is_numeric_type(dst_type) && hlsl_is_numeric_type(src_type) -- && dst_type->e.numeric.dimx * dst_type->e.numeric.dimy < src_type->e.numeric.dimx * src_type->e.numeric.dimy -- && ctx->warn_implicit_truncation) -- hlsl_warning(ctx, loc, VKD3D_SHADER_WARNING_HLSL_IMPLICIT_TRUNCATION, "Implicit truncation of %s type.", -- src_type->class == HLSL_CLASS_VECTOR ? "vector" : "matrix"); -- - return add_cast(ctx, block, node, dst_type, loc); - } - --static bool add_explicit_conversion(struct hlsl_ctx *ctx, struct hlsl_block *block, -+static void add_explicit_conversion(struct hlsl_ctx *ctx, struct hlsl_block *block, - struct hlsl_type *dst_type, const struct parse_array_sizes *arrays, const struct vkd3d_shader_location *loc) - { - struct hlsl_ir_node *instr = node_from_block(block); -@@ -414,7 +415,7 @@ static bool add_explicit_conversion(struct hlsl_ctx *ctx, struct hlsl_block *blo - } - - if (instr->data_type->class == HLSL_CLASS_ERROR) -- return true; -+ return; - - if (!explicit_compatible_data_types(ctx, src_type, dst_type)) - { -@@ -427,10 +428,9 @@ static bool add_explicit_conversion(struct hlsl_ctx *ctx, struct hlsl_block *blo - src_string->buffer, dst_string->buffer); - hlsl_release_string_buffer(ctx, src_string); - hlsl_release_string_buffer(ctx, dst_string); -- return false; - } - -- return add_cast(ctx, block, instr, dst_type, loc); -+ add_cast(ctx, block, instr, dst_type, loc); - } - - static uint32_t add_modifiers(struct hlsl_ctx *ctx, uint32_t modifiers, uint32_t mod, -@@ -449,7 +449,7 @@ static uint32_t add_modifiers(struct hlsl_ctx *ctx, uint32_t modifiers, uint32_t - return modifiers | mod; - } - --static bool append_conditional_break(struct hlsl_ctx *ctx, struct hlsl_block *cond_block) -+static void append_conditional_break(struct hlsl_ctx *ctx, struct hlsl_block *cond_block) - { - struct hlsl_ir_node *condition, *cast, *not; - struct hlsl_block then_block; -@@ -457,7 +457,7 @@ static bool append_conditional_break(struct hlsl_ctx *ctx, struct hlsl_block *co - - /* E.g. "for (i = 0; ; ++i)". */ - if (list_empty(&cond_block->instrs)) -- return true; -+ return; - - condition = node_from_block(cond_block); - -@@ -466,15 +466,12 @@ static bool append_conditional_break(struct hlsl_ctx *ctx, struct hlsl_block *co - bool_type = hlsl_get_scalar_type(ctx, HLSL_TYPE_BOOL); - /* We already checked for a 1-component numeric type, so - * add_implicit_conversion() is equivalent to add_cast() here. */ -- if (!(cast = add_cast(ctx, cond_block, condition, bool_type, &condition->loc))) -- return false; -- -+ cast = add_cast(ctx, cond_block, condition, bool_type, &condition->loc); - not = hlsl_block_add_unary_expr(ctx, cond_block, HLSL_OP1_LOGIC_NOT, cast, &condition->loc); - - hlsl_block_init(&then_block); - hlsl_block_add_jump(ctx, &then_block, HLSL_IR_JUMP_BREAK, NULL, &condition->loc); - hlsl_block_add_if(ctx, cond_block, not, &then_block, NULL, &condition->loc); -- return true; - } - - static void check_attribute_list_for_duplicates(struct hlsl_ctx *ctx, const struct parse_attribute_list *attrs) -@@ -518,11 +515,7 @@ static void resolve_loop_continue(struct hlsl_ctx *ctx, struct hlsl_block *block - { - if (!hlsl_clone_block(ctx, &cond_block, cond)) - return; -- if (!append_conditional_break(ctx, &cond_block)) -- { -- hlsl_block_cleanup(&cond_block); -- return; -- } -+ append_conditional_break(ctx, &cond_block); - list_move_before(&instr->entry, &cond_block.instrs); - } - } -@@ -598,11 +591,7 @@ static struct hlsl_default_value evaluate_static_expression(struct hlsl_ctx *ctx - return ret; - hlsl_block_add_block(&expr, block); - -- if (!(node = add_implicit_conversion(ctx, &expr, node_from_block(&expr), dst_type, loc))) -- { -- hlsl_block_cleanup(&expr); -- return ret; -- } -+ node = add_implicit_conversion(ctx, &expr, node_from_block(&expr), dst_type, loc); - - /* Wrap the node into a src to allow the reference to survive the multiple const passes. */ - hlsl_src_from_node(&src, node); -@@ -705,8 +694,7 @@ static struct hlsl_block *create_loop(struct hlsl_ctx *ctx, enum hlsl_loop_type - if (!init && !(init = make_empty_block(ctx))) - goto oom; - -- if (!append_conditional_break(ctx, cond)) -- goto oom; -+ append_conditional_break(ctx, cond); - - if (type == HLSL_LOOP_DO_WHILE) - list_move_tail(&body->instrs, &cond->instrs); -@@ -850,9 +838,7 @@ static bool add_return(struct hlsl_ctx *ctx, struct hlsl_block *block, - if (return_value->data_type->class == HLSL_CLASS_ERROR) - return true; - -- if (!(return_value = add_implicit_conversion(ctx, block, return_value, return_type, loc))) -- return false; -- -+ return_value = add_implicit_conversion(ctx, block, return_value, return_type, loc); - hlsl_block_add_simple_store(ctx, block, ctx->cur_function->return_var, return_value); - } - else -@@ -931,10 +917,8 @@ static bool add_array_access(struct hlsl_ctx *ctx, struct hlsl_block *block, str - return false; - } - -- if (!(index = add_implicit_conversion(ctx, block, index, -- hlsl_get_vector_type(ctx, HLSL_TYPE_UINT, dim_count), &index->loc))) -- return false; -- -+ index = add_implicit_conversion(ctx, block, index, -+ hlsl_get_vector_type(ctx, HLSL_TYPE_UINT, dim_count), &index->loc); - hlsl_block_add_index(ctx, block, array, index, loc); - return true; - } -@@ -1196,6 +1180,14 @@ static bool add_typedef(struct hlsl_ctx *ctx, struct hlsl_type *const orig_type, - return true; - } - -+static void check_invalid_stream_output_object(struct hlsl_ctx *ctx, const struct hlsl_type *type, -+ const char *name, const struct vkd3d_shader_location* loc) -+{ -+ if (hlsl_type_component_count(type) != 1) -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -+ "Stream output object '%s' is not single-element.", name); -+} -+ - static void initialize_var_components(struct hlsl_ctx *ctx, struct hlsl_block *instrs, - struct hlsl_ir_var *dst, unsigned int *store_index, struct hlsl_ir_node *src, - bool is_default_values_initializer); -@@ -1230,6 +1222,9 @@ static bool add_func_parameter(struct hlsl_ctx *ctx, struct hlsl_func_parameters - hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, - "Output parameter '%s' has a default value.", param->name); - -+ if (hlsl_get_stream_output_type(param->type)) -+ check_invalid_stream_output_object(ctx, param->type, param->name, loc); -+ - if (!(var = hlsl_new_var(ctx, param->name, param->type, loc, ¶m->semantic, param->modifiers, - ¶m->reg_reservation))) - return false; -@@ -1246,9 +1241,7 @@ static bool add_func_parameter(struct hlsl_ctx *ctx, struct hlsl_func_parameters - - if (!param->initializer.braces) - { -- if (!(add_implicit_conversion(ctx, param->initializer.instrs, param->initializer.args[0], param->type, loc))) -- return false; -- -+ add_implicit_conversion(ctx, param->initializer.instrs, param->initializer.args[0], param->type, loc); - param->initializer.args[0] = node_from_block(param->initializer.instrs); - } - -@@ -1645,10 +1638,7 @@ static struct hlsl_ir_node *add_unary_logical_expr(struct hlsl_ctx *ctx, struct - - bool_type = hlsl_get_numeric_type(ctx, arg->data_type->class, HLSL_TYPE_BOOL, - arg->data_type->e.numeric.dimx, arg->data_type->e.numeric.dimy); -- -- if (!(args[0] = add_implicit_conversion(ctx, block, arg, bool_type, loc))) -- return NULL; -- -+ args[0] = add_implicit_conversion(ctx, block, arg, bool_type, loc); - return add_expr(ctx, block, op, args, bool_type, loc); - } - -@@ -1678,12 +1668,8 @@ static struct hlsl_ir_node *add_binary_arithmetic_expr(struct hlsl_ctx *ctx, str - return block->value; - } - -- if (!(args[0] = add_implicit_conversion(ctx, block, arg1, common_type, loc))) -- return NULL; -- -- if (!(args[1] = add_implicit_conversion(ctx, block, arg2, common_type, loc))) -- return NULL; -- -+ args[0] = add_implicit_conversion(ctx, block, arg1, common_type, loc); -+ args[1] = add_implicit_conversion(ctx, block, arg2, common_type, loc); - return add_expr(ctx, block, op, args, common_type, loc); - } - -@@ -1714,12 +1700,8 @@ static struct hlsl_ir_node *add_binary_comparison_expr(struct hlsl_ctx *ctx, str - common_type = hlsl_get_numeric_type(ctx, type, base, dimx, dimy); - return_type = hlsl_get_numeric_type(ctx, type, HLSL_TYPE_BOOL, dimx, dimy); - -- if (!(args[0] = add_implicit_conversion(ctx, block, arg1, common_type, loc))) -- return NULL; -- -- if (!(args[1] = add_implicit_conversion(ctx, block, arg2, common_type, loc))) -- return NULL; -- -+ args[0] = add_implicit_conversion(ctx, block, arg1, common_type, loc); -+ args[1] = add_implicit_conversion(ctx, block, arg2, common_type, loc); - return add_expr(ctx, block, op, args, return_type, loc); - } - -@@ -1737,12 +1719,8 @@ static struct hlsl_ir_node *add_binary_logical_expr(struct hlsl_ctx *ctx, struct - - common_type = hlsl_get_numeric_type(ctx, type, HLSL_TYPE_BOOL, dimx, dimy); - -- if (!(args[0] = add_implicit_conversion(ctx, block, arg1, common_type, loc))) -- return NULL; -- -- if (!(args[1] = add_implicit_conversion(ctx, block, arg2, common_type, loc))) -- return NULL; -- -+ args[0] = add_implicit_conversion(ctx, block, arg1, common_type, loc); -+ args[1] = add_implicit_conversion(ctx, block, arg2, common_type, loc); - return add_expr(ctx, block, op, args, common_type, loc); - } - -@@ -1768,12 +1746,8 @@ static struct hlsl_ir_node *add_binary_shift_expr(struct hlsl_ctx *ctx, struct h - return_type = hlsl_get_numeric_type(ctx, type, base, dimx, dimy); - integer_type = hlsl_get_numeric_type(ctx, type, HLSL_TYPE_INT, dimx, dimy); - -- if (!(args[0] = add_implicit_conversion(ctx, block, arg1, return_type, loc))) -- return NULL; -- -- if (!(args[1] = add_implicit_conversion(ctx, block, arg2, integer_type, loc))) -- return NULL; -- -+ args[0] = add_implicit_conversion(ctx, block, arg1, return_type, loc); -+ args[1] = add_implicit_conversion(ctx, block, arg2, integer_type, loc); - return add_expr(ctx, block, op, args, return_type, loc); - } - -@@ -1821,12 +1795,8 @@ static struct hlsl_ir_node *add_binary_dot_expr(struct hlsl_ctx *ctx, struct hls - common_type = hlsl_get_vector_type(ctx, base, dim); - ret_type = hlsl_get_scalar_type(ctx, base); - -- if (!(args[0] = add_implicit_conversion(ctx, instrs, arg1, common_type, loc))) -- return NULL; -- -- if (!(args[1] = add_implicit_conversion(ctx, instrs, arg2, common_type, loc))) -- return NULL; -- -+ args[0] = add_implicit_conversion(ctx, instrs, arg1, common_type, loc); -+ args[1] = add_implicit_conversion(ctx, instrs, arg2, common_type, loc); - return add_expr(ctx, instrs, op, args, ret_type, loc); - } - -@@ -2022,8 +1992,7 @@ static bool add_assignment(struct hlsl_ctx *ctx, struct hlsl_block *block, struc - width = size; - } - -- if (!(rhs = add_implicit_conversion(ctx, block, rhs, lhs_type, &rhs->loc))) -- return false; -+ rhs = add_implicit_conversion(ctx, block, rhs, lhs_type, &rhs->loc); - - while (lhs->type != HLSL_IR_LOAD && lhs->type != HLSL_IR_INDEX) - { -@@ -2053,7 +2022,6 @@ static bool add_assignment(struct hlsl_ctx *ctx, struct hlsl_block *block, struc - else if (lhs->type == HLSL_IR_SWIZZLE) - { - struct hlsl_ir_swizzle *swizzle = hlsl_ir_swizzle(lhs); -- struct hlsl_ir_node *new_swizzle; - uint32_t s; - - VKD3D_ASSERT(!matrix_writemask); -@@ -2084,13 +2052,9 @@ static bool add_assignment(struct hlsl_ctx *ctx, struct hlsl_block *block, struc - } - } - -- if (!(new_swizzle = hlsl_new_swizzle(ctx, s, width, rhs, &swizzle->node.loc))) -- return false; -- hlsl_block_add_instr(block, new_swizzle); -- -+ rhs = hlsl_block_add_swizzle(ctx, block, s, width, rhs, &swizzle->node.loc); - lhs = swizzle->val.node; - lhs_type = hlsl_get_vector_type(ctx, lhs_type->e.numeric.type, width); -- rhs = new_swizzle; - } - else - { -@@ -2102,8 +2066,7 @@ static bool add_assignment(struct hlsl_ctx *ctx, struct hlsl_block *block, struc - /* lhs casts could have resulted in a discrepancy between the - * rhs->data_type and the type of the variable that will be ulimately - * stored to. This is corrected. */ -- if (!(rhs = add_cast(ctx, block, rhs, lhs_type, &rhs->loc))) -- return false; -+ rhs = add_cast(ctx, block, rhs, lhs_type, &rhs->loc); - - if (lhs->type == HLSL_IR_INDEX && hlsl_index_chain_has_resource_access(hlsl_ir_index(lhs))) - { -@@ -2325,9 +2288,7 @@ static void initialize_var_components(struct hlsl_ctx *ctx, struct hlsl_block *i - } - else - { -- if (!(conv = add_implicit_conversion(ctx, instrs, load, dst_comp_type, &src->loc))) -- return; -- -+ conv = add_implicit_conversion(ctx, instrs, load, dst_comp_type, &src->loc); - hlsl_block_add_store_component(ctx, instrs, &dst_deref, *store_index, conv); - } - } -@@ -2401,10 +2362,10 @@ static bool type_has_numeric_components(struct hlsl_type *type) - return false; - } - --static void check_invalid_in_out_modifiers(struct hlsl_ctx *ctx, unsigned int modifiers, -+static void check_invalid_non_parameter_modifiers(struct hlsl_ctx *ctx, unsigned int modifiers, - const struct vkd3d_shader_location *loc) - { -- modifiers &= (HLSL_STORAGE_IN | HLSL_STORAGE_OUT); -+ modifiers &= (HLSL_STORAGE_IN | HLSL_STORAGE_OUT | HLSL_PRIMITIVE_MODIFIERS_MASK); - if (modifiers) - { - struct vkd3d_string_buffer *string; -@@ -2438,6 +2399,7 @@ static void declare_var(struct hlsl_ctx *ctx, struct parse_variable_def *v) - bool constant_buffer = false; - struct hlsl_ir_var *var; - struct hlsl_type *type; -+ bool stream_output; - char *var_name; - unsigned int i; - -@@ -2529,6 +2491,10 @@ static void declare_var(struct hlsl_ctx *ctx, struct parse_variable_def *v) - hlsl_fixme(ctx, &v->loc, "Shader model 5.1+ resource array."); - } - -+ stream_output = !!hlsl_get_stream_output_type(type); -+ if (stream_output) -+ check_invalid_stream_output_object(ctx, type, v->name, &v->loc); -+ - if (!(var_name = vkd3d_strdup(v->name))) - return; - -@@ -2583,6 +2549,10 @@ static void declare_var(struct hlsl_ctx *ctx, struct parse_variable_def *v) - if (!(modifiers & HLSL_STORAGE_STATIC)) - var->storage_modifiers |= HLSL_STORAGE_UNIFORM; - -+ if (stream_output) -+ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_MISPLACED_STREAM_OUTPUT, -+ "Stream output object '%s' is not allowed in the global scope.", var->name); -+ - if ((ctx->profile->major_version < 5 || ctx->profile->type == VKD3D_SHADER_TYPE_EFFECT) - && (var->storage_modifiers & HLSL_STORAGE_UNIFORM)) - { -@@ -2713,15 +2683,8 @@ static struct hlsl_block *initialize_vars(struct hlsl_ctx *ctx, struct list *var - } - - if (!v->initializer.braces) -- { -- if (!(add_implicit_conversion(ctx, v->initializer.instrs, v->initializer.args[0], type, &v->loc))) -- { -- free_parse_variable_def(v); -- continue; -- } -- -- v->initializer.args[0] = node_from_block(v->initializer.instrs); -- } -+ v->initializer.args[0] = add_implicit_conversion(ctx, -+ v->initializer.instrs, v->initializer.args[0], type, &v->loc); - - if (var->data_type->class != HLSL_CLASS_ERROR) - initialize_var(ctx, var, &v->initializer, is_default_values_initializer); -@@ -2755,13 +2718,7 @@ static struct hlsl_block *initialize_vars(struct hlsl_ctx *ctx, struct list *var - } - - zero = hlsl_block_add_uint_constant(ctx, &ctx->static_initializers, 0, &var->loc); -- -- if (!(cast = add_cast(ctx, &ctx->static_initializers, zero, var->data_type, &var->loc))) -- { -- free_parse_variable_def(v); -- continue; -- } -- -+ cast = add_cast(ctx, &ctx->static_initializers, zero, var->data_type, &var->loc); - hlsl_block_add_simple_store(ctx, &ctx->static_initializers, var, cast); - } - free_parse_variable_def(v); -@@ -3033,13 +2990,7 @@ static struct hlsl_ir_node *add_user_call(struct hlsl_ctx *ctx, - if (param->storage_modifiers & HLSL_STORAGE_IN) - { - if (!hlsl_types_are_equal(arg->data_type, param->data_type)) -- { -- struct hlsl_ir_node *cast; -- -- if (!(cast = add_cast(ctx, args->instrs, arg, param->data_type, &arg->loc))) -- return NULL; -- arg = cast; -- } -+ arg = add_cast(ctx, args->instrs, arg, param->data_type, &arg->loc); - - hlsl_block_add_simple_store(ctx, args->instrs, param, arg); - } -@@ -3125,21 +3076,13 @@ static struct hlsl_ir_node *intrinsic_float_convert_arg(struct hlsl_ctx *ctx, - return add_implicit_conversion(ctx, params->instrs, arg, type, loc); - } - --static bool convert_args(struct hlsl_ctx *ctx, const struct parse_initializer *params, -+static void convert_args(struct hlsl_ctx *ctx, const struct parse_initializer *params, - struct hlsl_type *type, const struct vkd3d_shader_location *loc) - { - unsigned int i; - - for (i = 0; i < params->args_count; ++i) -- { -- struct hlsl_ir_node *new_arg; -- -- if (!(new_arg = add_implicit_conversion(ctx, params->instrs, params->args[i], type, loc))) -- return false; -- params->args[i] = new_arg; -- } -- -- return true; -+ params->args[i] = add_implicit_conversion(ctx, params->instrs, params->args[i], type, loc); - } - - static struct hlsl_type *elementwise_intrinsic_get_common_type(struct hlsl_ctx *ctx, -@@ -3200,7 +3143,8 @@ static bool elementwise_intrinsic_convert_args(struct hlsl_ctx *ctx, - if (!(common_type = elementwise_intrinsic_get_common_type(ctx, params, loc))) - return false; - -- return convert_args(ctx, params, common_type, loc); -+ convert_args(ctx, params, common_type, loc); -+ return true; - } - - static bool elementwise_intrinsic_float_convert_args(struct hlsl_ctx *ctx, -@@ -3213,7 +3157,8 @@ static bool elementwise_intrinsic_float_convert_args(struct hlsl_ctx *ctx, - if (hlsl_type_is_integer(type)) - type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_FLOAT, type->e.numeric.dimx, type->e.numeric.dimy); - -- return convert_args(ctx, params, type, loc); -+ convert_args(ctx, params, type, loc); -+ return true; - } - - static bool elementwise_intrinsic_uint_convert_args(struct hlsl_ctx *ctx, -@@ -3226,7 +3171,8 @@ static bool elementwise_intrinsic_uint_convert_args(struct hlsl_ctx *ctx, - - type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_UINT, type->e.numeric.dimx, type->e.numeric.dimy); - -- return convert_args(ctx, params, type, loc); -+ convert_args(ctx, params, type, loc); -+ return true; - } - - static bool intrinsic_abs(struct hlsl_ctx *ctx, -@@ -3263,8 +3209,7 @@ static bool write_acos_or_asin(struct hlsl_ctx *ctx, - - const char *fn_name = asin_mode ? fn_name_asin : fn_name_acos; - -- if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) -- return false; -+ arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc); - type = arg->data_type; - - if (!(body = hlsl_sprintf_alloc(ctx, template, -@@ -3318,9 +3263,7 @@ static bool intrinsic_all(struct hlsl_ctx *ctx, - struct hlsl_type *bool_type; - - bool_type = convert_numeric_type(ctx, arg->data_type, HLSL_TYPE_BOOL); -- if (!(cast = add_cast(ctx, params->instrs, arg, bool_type, loc))) -- return false; -- -+ cast = add_cast(ctx, params->instrs, arg, bool_type, loc); - add_combine_components(ctx, params, cast, HLSL_OP2_LOGIC_AND, loc); - return true; - } -@@ -3332,9 +3275,7 @@ static bool intrinsic_any(struct hlsl_ctx *ctx, const struct parse_initializer * - struct hlsl_type *bool_type; - - bool_type = convert_numeric_type(ctx, arg->data_type, HLSL_TYPE_BOOL); -- if (!(cast = add_cast(ctx, params->instrs, arg, bool_type, loc))) -- return false; -- -+ cast = add_cast(ctx, params->instrs, arg, bool_type, loc); - add_combine_components(ctx, params, cast, HLSL_OP2_LOGIC_OR, loc); - return true; - } -@@ -3522,10 +3463,7 @@ static bool intrinsic_asuint(struct hlsl_ctx *ctx, - static bool intrinsic_ceil(struct hlsl_ctx *ctx, - const struct parse_initializer *params, const struct vkd3d_shader_location *loc) - { -- struct hlsl_ir_node *arg; -- -- if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) -- return false; -+ struct hlsl_ir_node *arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc); - - return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_CEIL, arg, loc); - } -@@ -3572,10 +3510,7 @@ static bool intrinsic_clip(struct hlsl_ctx *ctx, - static bool intrinsic_cos(struct hlsl_ctx *ctx, - const struct parse_initializer *params, const struct vkd3d_shader_location *loc) - { -- struct hlsl_ir_node *arg; -- -- if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) -- return false; -+ struct hlsl_ir_node *arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc); - - return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_COS, arg, loc); - } -@@ -3596,8 +3531,7 @@ static bool write_cosh_or_sinh(struct hlsl_ctx *ctx, - static const char fn_name_sinh[] = "sinh"; - static const char fn_name_cosh[] = "cosh"; - -- if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) -- return false; -+ arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc); - - type_name = arg->data_type->name; - fn_name = sinh_mode ? fn_name_sinh : fn_name_cosh; -@@ -3635,32 +3569,18 @@ static bool intrinsic_cross(struct hlsl_ctx *ctx, - - cast_type = hlsl_get_vector_type(ctx, base, 3); - -- if (!(arg1_cast = add_implicit_conversion(ctx, params->instrs, arg1, cast_type, loc))) -- return false; -- -- if (!(arg2_cast = add_implicit_conversion(ctx, params->instrs, arg2, cast_type, loc))) -- return false; -- -- if (!(arg1_swzl1 = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(Z, X, Y, Z), 3, arg1_cast, loc))) -- return false; -- hlsl_block_add_instr(params->instrs, arg1_swzl1); -- -- if (!(arg2_swzl1 = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(Y, Z, X, Y), 3, arg2_cast, loc))) -- return false; -- hlsl_block_add_instr(params->instrs, arg2_swzl1); -+ arg1_cast = add_implicit_conversion(ctx, params->instrs, arg1, cast_type, loc); -+ arg2_cast = add_implicit_conversion(ctx, params->instrs, arg2, cast_type, loc); -+ arg1_swzl1 = hlsl_block_add_swizzle(ctx, params->instrs, HLSL_SWIZZLE(Z, X, Y, Z), 3, arg1_cast, loc); -+ arg2_swzl1 = hlsl_block_add_swizzle(ctx, params->instrs, HLSL_SWIZZLE(Y, Z, X, Y), 3, arg2_cast, loc); - - if (!(mul1 = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, arg1_swzl1, arg2_swzl1, loc))) - return false; - - mul1_neg = hlsl_block_add_unary_expr(ctx, params->instrs, HLSL_OP1_NEG, mul1, loc); - -- if (!(arg1_swzl2 = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(Y, Z, X, Y), 3, arg1_cast, loc))) -- return false; -- hlsl_block_add_instr(params->instrs, arg1_swzl2); -- -- if (!(arg2_swzl2 = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(Z, X, Y, Z), 3, arg2_cast, loc))) -- return false; -- hlsl_block_add_instr(params->instrs, arg2_swzl2); -+ arg1_swzl2 = hlsl_block_add_swizzle(ctx, params->instrs, HLSL_SWIZZLE(Y, Z, X, Y), 3, arg1_cast, loc); -+ arg2_swzl2 = hlsl_block_add_swizzle(ctx, params->instrs, HLSL_SWIZZLE(Z, X, Y, Z), 3, arg2_cast, loc); - - if (!(mul2 = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, arg1_swzl2, arg2_swzl2, loc))) - return false; -@@ -3673,8 +3593,7 @@ static bool intrinsic_ddx(struct hlsl_ctx *ctx, - { - struct hlsl_ir_node *arg; - -- if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) -- return false; -+ arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc); - - return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_DSX, arg, loc); - } -@@ -3684,8 +3603,7 @@ static bool intrinsic_ddx_coarse(struct hlsl_ctx *ctx, - { - struct hlsl_ir_node *arg; - -- if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) -- return false; -+ arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc); - - return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_DSX_COARSE, arg, loc); - } -@@ -3695,8 +3613,7 @@ static bool intrinsic_ddx_fine(struct hlsl_ctx *ctx, - { - struct hlsl_ir_node *arg; - -- if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) -- return false; -+ arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc); - - return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_DSX_FINE, arg, loc); - } -@@ -3706,8 +3623,7 @@ static bool intrinsic_ddy(struct hlsl_ctx *ctx, - { - struct hlsl_ir_node *arg; - -- if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) -- return false; -+ arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc); - - return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_DSY, arg, loc); - } -@@ -3717,8 +3633,7 @@ static bool intrinsic_ddy_coarse(struct hlsl_ctx *ctx, - { - struct hlsl_ir_node *arg; - -- if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) -- return false; -+ arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc); - - return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_DSY_COARSE, arg, loc); - } -@@ -3728,8 +3643,7 @@ static bool intrinsic_degrees(struct hlsl_ctx *ctx, - { - struct hlsl_ir_node *arg, *deg; - -- if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) -- return false; -+ arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc); - - /* 1 rad = 180/pi degree = 57.2957795 degree */ - deg = hlsl_block_add_float_constant(ctx, params->instrs, 57.2957795f, loc); -@@ -3741,8 +3655,7 @@ static bool intrinsic_ddy_fine(struct hlsl_ctx *ctx, - { - struct hlsl_ir_node *arg; - -- if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) -- return false; -+ arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc); - - return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_DSY_FINE, arg, loc); - } -@@ -3796,8 +3709,7 @@ static bool intrinsic_determinant(struct hlsl_ctx *ctx, - return false; - } - -- if (!(arg = intrinsic_float_convert_arg(ctx, params, arg, loc))) -- return false; -+ arg = intrinsic_float_convert_arg(ctx, params, arg, loc); - - dim = min(type->e.numeric.dimx, type->e.numeric.dimy); - if (dim == 1) -@@ -3839,11 +3751,8 @@ static bool intrinsic_distance(struct hlsl_ctx *ctx, - { - struct hlsl_ir_node *arg1, *arg2, *neg, *add, *dot; - -- if (!(arg1 = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) -- return false; -- -- if (!(arg2 = intrinsic_float_convert_arg(ctx, params, params->args[1], loc))) -- return false; -+ arg1 = intrinsic_float_convert_arg(ctx, params, params->args[0], loc); -+ arg2 = intrinsic_float_convert_arg(ctx, params, params->args[1], loc); - - if (!(neg = add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_NEG, arg2, loc))) - return false; -@@ -3912,8 +3821,7 @@ static bool intrinsic_exp(struct hlsl_ctx *ctx, - { - struct hlsl_ir_node *arg, *mul, *coeff; - -- if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) -- return false; -+ arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc); - - /* 1/ln(2) */ - coeff = hlsl_block_add_float_constant(ctx, params->instrs, 1.442695f, loc); -@@ -3929,8 +3837,7 @@ static bool intrinsic_exp2(struct hlsl_ctx *ctx, - { - struct hlsl_ir_node *arg; - -- if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) -- return false; -+ arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc); - - return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_EXP2, arg, loc); - } -@@ -3998,8 +3905,7 @@ static bool intrinsic_floor(struct hlsl_ctx *ctx, - { - struct hlsl_ir_node *arg; - -- if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) -- return false; -+ arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc); - - return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_FLOOR, arg, loc); - } -@@ -4011,11 +3917,8 @@ static bool intrinsic_fmod(struct hlsl_ctx *ctx, const struct parse_initializer - struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = { 0 }; - static const struct hlsl_constant_value zero_value; - -- if (!(x = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) -- return false; -- -- if (!(y = intrinsic_float_convert_arg(ctx, params, params->args[1], loc))) -- return false; -+ x = intrinsic_float_convert_arg(ctx, params, params->args[0], loc); -+ y = intrinsic_float_convert_arg(ctx, params, params->args[1], loc); - - if (!(div = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_DIV, x, y, loc))) - return false; -@@ -4050,8 +3953,7 @@ static bool intrinsic_frac(struct hlsl_ctx *ctx, - { - struct hlsl_ir_node *arg; - -- if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) -- return false; -+ arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc); - - return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_FRACT, arg, loc); - } -@@ -4126,8 +4028,7 @@ static bool intrinsic_length(struct hlsl_ctx *ctx, - hlsl_release_string_buffer(ctx, string); - } - -- if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) -- return false; -+ arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc); - - if (!(dot = add_binary_dot_expr(ctx, params->instrs, arg, arg, loc))) - return false; -@@ -4189,8 +4090,7 @@ static bool intrinsic_log(struct hlsl_ctx *ctx, - { - struct hlsl_ir_node *log, *arg, *coeff; - -- if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) -- return false; -+ arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc); - - if (!(log = add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_LOG2, arg, loc))) - return false; -@@ -4205,8 +4105,7 @@ static bool intrinsic_log10(struct hlsl_ctx *ctx, - { - struct hlsl_ir_node *log, *arg, *coeff; - -- if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) -- return false; -+ arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc); - - if (!(log = add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_LOG2, arg, loc))) - return false; -@@ -4221,8 +4120,7 @@ static bool intrinsic_log2(struct hlsl_ctx *ctx, - { - struct hlsl_ir_node *arg; - -- if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) -- return false; -+ arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc); - - return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_LOG2, arg, loc); - } -@@ -4330,11 +4228,8 @@ static bool intrinsic_mul(struct hlsl_ctx *ctx, - ret_type = hlsl_get_scalar_type(ctx, base); - } - -- if (!(cast1 = add_implicit_conversion(ctx, params->instrs, arg1, cast_type1, loc))) -- return false; -- -- if (!(cast2 = add_implicit_conversion(ctx, params->instrs, arg2, cast_type2, loc))) -- return false; -+ cast1 = add_implicit_conversion(ctx, params->instrs, arg1, cast_type1, loc); -+ cast2 = add_implicit_conversion(ctx, params->instrs, arg2, cast_type2, loc); - - if (!(var = hlsl_new_synthetic_var(ctx, "mul", matrix_type, loc))) - return false; -@@ -4375,7 +4270,8 @@ static bool intrinsic_mul(struct hlsl_ctx *ctx, - } - - load = hlsl_block_add_simple_load(ctx, params->instrs, var, loc); -- return !!add_implicit_conversion(ctx, params->instrs, load, ret_type, loc); -+ add_implicit_conversion(ctx, params->instrs, load, ret_type, loc); -+ return true; - } - - static bool intrinsic_normalize(struct hlsl_ctx *ctx, -@@ -4394,8 +4290,7 @@ static bool intrinsic_normalize(struct hlsl_ctx *ctx, - hlsl_release_string_buffer(ctx, string); - } - -- if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) -- return false; -+ arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc); - - if (!(dot = add_binary_dot_expr(ctx, params->instrs, arg, arg, loc))) - return false; -@@ -4428,8 +4323,7 @@ static bool intrinsic_radians(struct hlsl_ctx *ctx, - { - struct hlsl_ir_node *arg, *rad; - -- if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) -- return false; -+ arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc); - - /* 1 degree = pi/180 rad = 0.0174532925f rad */ - rad = hlsl_block_add_float_constant(ctx, params->instrs, 0.0174532925f, loc); -@@ -4441,8 +4335,7 @@ static bool intrinsic_rcp(struct hlsl_ctx *ctx, - { - struct hlsl_ir_node *arg; - -- if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) -- return false; -+ arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc); - - return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_RCP, arg, loc); - } -@@ -4473,7 +4366,6 @@ static bool intrinsic_refract(struct hlsl_ctx *ctx, - { - struct hlsl_type *type, *scalar_type; - struct hlsl_ir_function_decl *func; -- struct hlsl_ir_node *index; - char *body; - - static const char template[] = -@@ -4503,9 +4395,7 @@ static bool intrinsic_refract(struct hlsl_ctx *ctx, - * which we will only use the first component of. */ - - scalar_type = hlsl_get_scalar_type(ctx, params->args[2]->data_type->e.numeric.type); -- if (!(index = add_implicit_conversion(ctx, params->instrs, params->args[2], scalar_type, loc))) -- return false; -- params->args[2] = index; -+ params->args[2] = add_implicit_conversion(ctx, params->instrs, params->args[2], scalar_type, loc); - - if (!elementwise_intrinsic_float_convert_args(ctx, params, loc)) - return false; -@@ -4528,8 +4418,7 @@ static bool intrinsic_round(struct hlsl_ctx *ctx, - { - struct hlsl_ir_node *arg; - -- if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) -- return false; -+ arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc); - - return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_ROUND, arg, loc); - } -@@ -4539,8 +4428,7 @@ static bool intrinsic_rsqrt(struct hlsl_ctx *ctx, - { - struct hlsl_ir_node *arg; - -- if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) -- return false; -+ arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc); - - return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_RSQ, arg, loc); - } -@@ -4550,8 +4438,7 @@ static bool intrinsic_saturate(struct hlsl_ctx *ctx, - { - struct hlsl_ir_node *arg; - -- if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) -- return false; -+ arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc); - - return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_SAT, arg, loc); - } -@@ -4574,16 +4461,14 @@ static bool intrinsic_sign(struct hlsl_ctx *ctx, - if (!(lt = add_binary_comparison_expr(ctx, params->instrs, HLSL_OP2_LESS, zero, arg, loc))) - return false; - -- if (!(op1 = add_implicit_conversion(ctx, params->instrs, lt, int_type, loc))) -- return false; -+ op1 = add_implicit_conversion(ctx, params->instrs, lt, int_type, loc); - - /* Check if arg < 0, cast bool to int and invert (meaning true is -1) */ - - if (!(lt = add_binary_comparison_expr(ctx, params->instrs, HLSL_OP2_LESS, arg, zero, loc))) - return false; - -- if (!(op2 = add_implicit_conversion(ctx, params->instrs, lt, int_type, loc))) -- return false; -+ op2 = add_implicit_conversion(ctx, params->instrs, lt, int_type, loc); - - if (!(neg = add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_NEG, op2, loc))) - return false; -@@ -4597,8 +4482,7 @@ static bool intrinsic_sin(struct hlsl_ctx *ctx, - { - struct hlsl_ir_node *arg; - -- if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) -- return false; -+ arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc); - - return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_SIN, arg, loc); - } -@@ -4672,8 +4556,7 @@ static bool intrinsic_sqrt(struct hlsl_ctx *ctx, - { - struct hlsl_ir_node *arg; - -- if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) -- return false; -+ arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc); - - return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_SQRT, arg, loc); - } -@@ -4692,7 +4575,8 @@ static bool intrinsic_step(struct hlsl_ctx *ctx, - params->args[1], params->args[0], loc))) - return false; - -- return !!add_implicit_conversion(ctx, params->instrs, ge, type, loc); -+ add_implicit_conversion(ctx, params->instrs, ge, type, loc); -+ return true; - } - - static bool intrinsic_tan(struct hlsl_ctx *ctx, -@@ -4726,8 +4610,7 @@ static bool intrinsic_tanh(struct hlsl_ctx *ctx, - " return (exp_pos - exp_neg) / (exp_pos + exp_neg);\n" - "}\n"; - -- if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) -- return false; -+ arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc); - type = arg->data_type; - - if (!(body = hlsl_sprintf_alloc(ctx, template, -@@ -4748,7 +4631,7 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * - unsigned int sampler_dim = hlsl_sampler_dim_count(dim); - struct hlsl_resource_load_params load_params = { 0 }; - const struct hlsl_type *sampler_type; -- struct hlsl_ir_node *coords, *sample; -+ struct hlsl_ir_node *coords; - - if (params->args_count != 2 && params->args_count != 4) - { -@@ -4780,47 +4663,27 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * - else - load_params.type = HLSL_RESOURCE_SAMPLE_LOD_BIAS; - -- if (!(c = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, Y, Z, W), sampler_dim, params->args[1], loc))) -- return false; -- hlsl_block_add_instr(params->instrs, c); -- -- if (!(coords = add_implicit_conversion(ctx, params->instrs, c, -- hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) -- { -- return false; -- } -- -- if (!(lod = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(W, W, W, W), 1, params->args[1], loc))) -- return false; -- hlsl_block_add_instr(params->instrs, lod); -+ c = hlsl_block_add_swizzle(ctx, params->instrs, HLSL_SWIZZLE(X, Y, Z, W), sampler_dim, params->args[1], loc); -+ coords = add_implicit_conversion(ctx, params->instrs, c, -+ hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc); - -- if (!(load_params.lod = add_implicit_conversion(ctx, params->instrs, lod, -- hlsl_get_scalar_type(ctx, HLSL_TYPE_FLOAT), loc))) -- { -- return false; -- } -+ lod = hlsl_block_add_swizzle(ctx, params->instrs, HLSL_SWIZZLE(W, W, W, W), 1, params->args[1], loc); -+ load_params.lod = add_implicit_conversion(ctx, params->instrs, lod, -+ hlsl_get_scalar_type(ctx, HLSL_TYPE_FLOAT), loc); - } - else if (!strcmp(name, "tex2Dproj") - || !strcmp(name, "tex3Dproj") - || !strcmp(name, "texCUBEproj")) - { -- if (!(coords = add_implicit_conversion(ctx, params->instrs, params->args[1], -- hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, 4), loc))) -- { -- return false; -- } -+ coords = add_implicit_conversion(ctx, params->instrs, params->args[1], -+ hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, 4), loc); - - if (hlsl_version_ge(ctx, 4, 0)) - { - struct hlsl_ir_node *divisor; - -- if (!(divisor = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(W, W, W, W), sampler_dim, coords, loc))) -- return false; -- hlsl_block_add_instr(params->instrs, divisor); -- -- if (!(coords = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, Y, Z, W), sampler_dim, coords, loc))) -- return false; -- hlsl_block_add_instr(params->instrs, coords); -+ divisor = hlsl_block_add_swizzle(ctx, params->instrs, HLSL_SWIZZLE(W, W, W, W), sampler_dim, coords, loc); -+ coords = hlsl_block_add_swizzle(ctx, params->instrs, HLSL_SWIZZLE(X, Y, Z, W), sampler_dim, coords, loc); - - if (!(coords = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_DIV, coords, divisor, loc))) - return false; -@@ -4834,35 +4697,19 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * - } - else if (params->args_count == 4) /* Gradient sampling. */ - { -- if (!(coords = add_implicit_conversion(ctx, params->instrs, params->args[1], -- hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) -- { -- return false; -- } -- -- if (!(load_params.ddx = add_implicit_conversion(ctx, params->instrs, params->args[2], -- hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) -- { -- return false; -- } -- -- if (!(load_params.ddy = add_implicit_conversion(ctx, params->instrs, params->args[3], -- hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) -- { -- return false; -- } -- -+ coords = add_implicit_conversion(ctx, params->instrs, params->args[1], -+ hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc); -+ load_params.ddx = add_implicit_conversion(ctx, params->instrs, params->args[2], -+ hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc); -+ load_params.ddy = add_implicit_conversion(ctx, params->instrs, params->args[3], -+ hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc); - load_params.type = HLSL_RESOURCE_SAMPLE_GRAD; - } - else - { - load_params.type = HLSL_RESOURCE_SAMPLE; -- -- if (!(coords = add_implicit_conversion(ctx, params->instrs, params->args[1], -- hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) -- { -- return false; -- } -+ coords = add_implicit_conversion(ctx, params->instrs, params->args[1], -+ hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc); - } - - /* tex1D() functions never produce 1D resource declarations. For newer profiles half offset -@@ -4889,9 +4736,7 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * - load_params.format = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, 4); - load_params.sampling_dim = dim; - -- if (!(sample = hlsl_new_resource_load(ctx, &load_params, loc))) -- return false; -- hlsl_block_add_instr(params->instrs, sample); -+ hlsl_block_add_resource_load(ctx, params->instrs, &load_params, loc); - return true; - } - -@@ -5026,10 +4871,7 @@ static bool intrinsic_transpose(struct hlsl_ctx *ctx, - static bool intrinsic_trunc(struct hlsl_ctx *ctx, - const struct parse_initializer *params, const struct vkd3d_shader_location *loc) - { -- struct hlsl_ir_node *arg; -- -- if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) -- return false; -+ struct hlsl_ir_node *arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc); - - return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_TRUNC, arg, loc); - } -@@ -5037,7 +4879,7 @@ static bool intrinsic_trunc(struct hlsl_ctx *ctx, - static bool intrinsic_d3dcolor_to_ubyte4(struct hlsl_ctx *ctx, - const struct parse_initializer *params, const struct vkd3d_shader_location *loc) - { -- struct hlsl_ir_node *arg = params->args[0], *ret, *c, *swizzle; -+ struct hlsl_ir_node *arg = params->args[0], *ret, *c; - struct hlsl_type *arg_type = arg->data_type; - - if (arg_type->class != HLSL_CLASS_SCALAR && !(arg_type->class == HLSL_CLASS_VECTOR -@@ -5054,19 +4896,11 @@ static bool intrinsic_d3dcolor_to_ubyte4(struct hlsl_ctx *ctx, - return false; - } - -- if (!(arg = intrinsic_float_convert_arg(ctx, params, arg, loc))) -- return false; -- -+ arg = intrinsic_float_convert_arg(ctx, params, arg, loc); - c = hlsl_block_add_float_constant(ctx, params->instrs, 255.0f + (0.5f / 256.0f), loc); - - if (arg_type->class == HLSL_CLASS_VECTOR) -- { -- if (!(swizzle = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(Z, Y, X, W), 4, arg, loc))) -- return false; -- hlsl_block_add_instr(params->instrs, swizzle); -- -- arg = swizzle; -- } -+ arg = hlsl_block_add_swizzle(ctx, params->instrs, HLSL_SWIZZLE(Z, Y, X, W), 4, arg, loc); - - if (!(ret = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, arg, c, loc))) - return false; -@@ -5603,8 +5437,7 @@ static bool add_ternary(struct hlsl_ctx *ctx, struct hlsl_block *block, - { - cond_type = hlsl_get_numeric_type(ctx, common_type->class, - HLSL_TYPE_BOOL, common_type->e.numeric.dimx, common_type->e.numeric.dimy); -- if (!(cond = add_implicit_conversion(ctx, block, cond, cond_type, &cond->loc))) -- return false; -+ cond = add_implicit_conversion(ctx, block, cond, cond_type, &cond->loc); - } - else - { -@@ -5633,15 +5466,11 @@ static bool add_ternary(struct hlsl_ctx *ctx, struct hlsl_block *block, - - cond_type = hlsl_get_numeric_type(ctx, common_type->class, HLSL_TYPE_BOOL, - common_type->e.numeric.dimx, common_type->e.numeric.dimy); -- if (!(cond = add_implicit_conversion(ctx, block, cond, cond_type, &cond->loc))) -- return false; -+ cond = add_implicit_conversion(ctx, block, cond, cond_type, &cond->loc); - } - -- if (!(first = add_implicit_conversion(ctx, block, first, common_type, &first->loc))) -- return false; -- -- if (!(second = add_implicit_conversion(ctx, block, second, common_type, &second->loc))) -- return false; -+ first = add_implicit_conversion(ctx, block, first, common_type, &first->loc); -+ second = add_implicit_conversion(ctx, block, second, common_type, &second->loc); - } - else - { -@@ -5661,9 +5490,7 @@ static bool add_ternary(struct hlsl_ctx *ctx, struct hlsl_block *block, - - cond_type = hlsl_get_numeric_type(ctx, cond_type->class, HLSL_TYPE_BOOL, - cond_type->e.numeric.dimx, cond_type->e.numeric.dimy); -- if (!(cond = add_implicit_conversion(ctx, block, cond, cond_type, &cond->loc))) -- return false; -- -+ cond = add_implicit_conversion(ctx, block, cond, cond_type, &cond->loc); - common_type = first->data_type; - } - -@@ -5716,7 +5543,6 @@ static bool add_raw_load_method_call(struct hlsl_ctx *ctx, struct hlsl_block *bl - const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) - { - struct hlsl_resource_load_params load_params = {.type = HLSL_RESOURCE_LOAD}; -- struct hlsl_ir_node *load; - unsigned int value_dim; - - if (params->args_count != 1 && params->args_count != 2) -@@ -5748,16 +5574,11 @@ static bool add_raw_load_method_call(struct hlsl_ctx *ctx, struct hlsl_block *bl - else - value_dim = 4; - -- if (!(load_params.coords = add_implicit_conversion(ctx, block, params->args[0], -- hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), loc))) -- return false; -- -+ load_params.coords = add_implicit_conversion(ctx, block, params->args[0], -+ hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), loc); - load_params.format = hlsl_get_vector_type(ctx, HLSL_TYPE_UINT, value_dim); - load_params.resource = object; -- -- if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) -- return false; -- hlsl_block_add_instr(block, load); -+ hlsl_block_add_resource_load(ctx, block, &load_params, loc); - return true; - } - -@@ -5767,7 +5588,6 @@ static bool add_load_method_call(struct hlsl_ctx *ctx, struct hlsl_block *block, - const struct hlsl_type *object_type = object->data_type; - struct hlsl_resource_load_params load_params = {.type = HLSL_RESOURCE_LOAD}; - unsigned int sampler_dim, offset_dim; -- struct hlsl_ir_node *load; - bool multisampled; - - if (object_type->sampler_dim == HLSL_SAMPLER_DIM_RAW_BUFFER) -@@ -5794,18 +5614,12 @@ static bool add_load_method_call(struct hlsl_ctx *ctx, struct hlsl_block *block, - } - - if (multisampled) -- { -- if (!(load_params.sample_index = add_implicit_conversion(ctx, block, params->args[1], -- hlsl_get_scalar_type(ctx, HLSL_TYPE_INT), loc))) -- return false; -- } -+ load_params.sample_index = add_implicit_conversion(ctx, block, params->args[1], -+ hlsl_get_scalar_type(ctx, HLSL_TYPE_INT), loc); - - if (!!offset_dim && params->args_count > 1 + multisampled) -- { -- if (!(load_params.texel_offset = add_implicit_conversion(ctx, block, params->args[1 + multisampled], -- hlsl_get_vector_type(ctx, HLSL_TYPE_INT, offset_dim), loc))) -- return false; -- } -+ load_params.texel_offset = add_implicit_conversion(ctx, block, params->args[1 + multisampled], -+ hlsl_get_vector_type(ctx, HLSL_TYPE_INT, offset_dim), loc); - - if (params->args_count > 1 + multisampled + !!offset_dim) - { -@@ -5813,16 +5627,11 @@ static bool add_load_method_call(struct hlsl_ctx *ctx, struct hlsl_block *block, - } - - /* +1 for the mipmap level for non-multisampled textures */ -- if (!(load_params.coords = add_implicit_conversion(ctx, block, params->args[0], -- hlsl_get_vector_type(ctx, HLSL_TYPE_INT, sampler_dim + !multisampled), loc))) -- return false; -- -+ load_params.coords = add_implicit_conversion(ctx, block, params->args[0], -+ hlsl_get_vector_type(ctx, HLSL_TYPE_INT, sampler_dim + !multisampled), loc); - load_params.format = object_type->e.resource.format; - load_params.resource = object; -- -- if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) -- return false; -- hlsl_block_add_instr(block, load); -+ hlsl_block_add_resource_load(ctx, block, &load_params, loc); - return true; - } - -@@ -5833,7 +5642,6 @@ static bool add_sample_method_call(struct hlsl_ctx *ctx, struct hlsl_block *bloc - struct hlsl_resource_load_params load_params = {.type = HLSL_RESOURCE_SAMPLE}; - unsigned int sampler_dim, offset_dim; - const struct hlsl_type *sampler_type; -- struct hlsl_ir_node *load; - - sampler_dim = hlsl_sampler_dim_count(object_type->sampler_dim); - offset_dim = hlsl_offset_dim_count(object_type->sampler_dim); -@@ -5858,16 +5666,12 @@ static bool add_sample_method_call(struct hlsl_ctx *ctx, struct hlsl_block *bloc - return false; - } - -- if (!(load_params.coords = add_implicit_conversion(ctx, block, params->args[1], -- hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) -- return false; -+ load_params.coords = add_implicit_conversion(ctx, block, params->args[1], -+ hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc); - - if (offset_dim && params->args_count > 2) -- { -- if (!(load_params.texel_offset = add_implicit_conversion(ctx, block, params->args[2], -- hlsl_get_vector_type(ctx, HLSL_TYPE_INT, offset_dim), loc))) -- return false; -- } -+ load_params.texel_offset = add_implicit_conversion(ctx, block, params->args[2], -+ hlsl_get_vector_type(ctx, HLSL_TYPE_INT, offset_dim), loc); - - if (params->args_count > 2 + !!offset_dim) - hlsl_fixme(ctx, loc, "Sample() clamp parameter."); -@@ -5877,11 +5681,7 @@ static bool add_sample_method_call(struct hlsl_ctx *ctx, struct hlsl_block *bloc - load_params.format = object_type->e.resource.format; - load_params.resource = object; - load_params.sampler = params->args[0]; -- -- if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) -- return false; -- hlsl_block_add_instr(block, load); -- -+ hlsl_block_add_resource_load(ctx, block, &load_params, loc); - return true; - } - -@@ -5892,7 +5692,6 @@ static bool add_sample_cmp_method_call(struct hlsl_ctx *ctx, struct hlsl_block * - struct hlsl_resource_load_params load_params = { 0 }; - unsigned int sampler_dim, offset_dim; - const struct hlsl_type *sampler_type; -- struct hlsl_ir_node *load; - - sampler_dim = hlsl_sampler_dim_count(object_type->sampler_dim); - offset_dim = hlsl_offset_dim_count(object_type->sampler_dim); -@@ -5923,20 +5722,14 @@ static bool add_sample_cmp_method_call(struct hlsl_ctx *ctx, struct hlsl_block * - return false; - } - -- if (!(load_params.coords = add_implicit_conversion(ctx, block, params->args[1], -- hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) -- return false; -- -- if (!(load_params.cmp = add_implicit_conversion(ctx, block, params->args[2], -- hlsl_get_scalar_type(ctx, HLSL_TYPE_FLOAT), loc))) -- load_params.cmp = params->args[2]; -+ load_params.coords = add_implicit_conversion(ctx, block, params->args[1], -+ hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc); -+ load_params.cmp = add_implicit_conversion(ctx, block, params->args[2], -+ hlsl_get_scalar_type(ctx, HLSL_TYPE_FLOAT), loc); - - if (offset_dim && params->args_count > 3) -- { -- if (!(load_params.texel_offset = add_implicit_conversion(ctx, block, params->args[2], -- hlsl_get_vector_type(ctx, HLSL_TYPE_INT, offset_dim), loc))) -- return false; -- } -+ load_params.texel_offset = add_implicit_conversion(ctx, block, params->args[2], -+ hlsl_get_vector_type(ctx, HLSL_TYPE_INT, offset_dim), loc); - - if (params->args_count > 3 + !!offset_dim) - hlsl_fixme(ctx, loc, "%s() clamp parameter.", name); -@@ -5946,11 +5739,7 @@ static bool add_sample_cmp_method_call(struct hlsl_ctx *ctx, struct hlsl_block * - load_params.format = object_type->e.resource.format; - load_params.resource = object; - load_params.sampler = params->args[0]; -- -- if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) -- return false; -- hlsl_block_add_instr(block, load); -- -+ hlsl_block_add_resource_load(ctx, block, &load_params, loc); - return true; - } - -@@ -5961,7 +5750,6 @@ static bool add_gather_method_call(struct hlsl_ctx *ctx, struct hlsl_block *bloc - struct hlsl_resource_load_params load_params = {0}; - unsigned int sampler_dim, offset_dim; - const struct hlsl_type *sampler_type; -- struct hlsl_ir_node *load; - unsigned int read_channel; - - sampler_dim = hlsl_sampler_dim_count(object_type->sampler_dim); -@@ -6015,9 +5803,8 @@ static bool add_gather_method_call(struct hlsl_ctx *ctx, struct hlsl_block *bloc - } - else if (offset_dim && params->args_count > 2) - { -- if (!(load_params.texel_offset = add_implicit_conversion(ctx, block, params->args[2], -- hlsl_get_vector_type(ctx, HLSL_TYPE_INT, offset_dim), loc))) -- return false; -+ load_params.texel_offset = add_implicit_conversion(ctx, block, params->args[2], -+ hlsl_get_vector_type(ctx, HLSL_TYPE_INT, offset_dim), loc); - } - - sampler_type = params->args[0]->data_type; -@@ -6039,17 +5826,12 @@ static bool add_gather_method_call(struct hlsl_ctx *ctx, struct hlsl_block *bloc - return false; - } - -- if (!(load_params.coords = add_implicit_conversion(ctx, block, params->args[1], -- hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) -- return false; -- -+ load_params.coords = add_implicit_conversion(ctx, block, params->args[1], -+ hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc); - load_params.format = hlsl_get_vector_type(ctx, object_type->e.resource.format->e.numeric.type, 4); - load_params.resource = object; - load_params.sampler = params->args[0]; -- -- if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) -- return false; -- hlsl_block_add_instr(block, load); -+ hlsl_block_add_resource_load(ctx, block, &load_params, loc); - return true; - } - -@@ -6060,7 +5842,6 @@ static bool add_gather_cmp_method_call(struct hlsl_ctx *ctx, struct hlsl_block * - struct hlsl_resource_load_params load_params = {0}; - unsigned int sampler_dim, offset_dim; - const struct hlsl_type *sampler_type; -- struct hlsl_ir_node *load; - - sampler_dim = hlsl_sampler_dim_count(object_type->sampler_dim); - offset_dim = hlsl_offset_dim_count(object_type->sampler_dim); -@@ -6127,10 +5908,7 @@ static bool add_gather_cmp_method_call(struct hlsl_ctx *ctx, struct hlsl_block * - load_params.format = hlsl_get_vector_type(ctx, object_type->e.resource.format->e.numeric.type, 4); - load_params.resource = object; - load_params.sampler = params->args[0]; -- -- if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) -- return false; -- hlsl_block_add_instr(block, load); -+ hlsl_block_add_resource_load(ctx, block, &load_params, loc); - return true; - } - -@@ -6219,12 +5997,8 @@ static bool add_getdimensions_method_call(struct hlsl_ctx *ctx, struct hlsl_bloc - /* Input parameter. */ - if (iter->args[j] == ARG_MIP_LEVEL) - { -- if (!(args[ARG_MIP_LEVEL] = add_implicit_conversion(ctx, block, args[ARG_MIP_LEVEL], -- hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), loc))) -- { -- return false; -- } -- -+ args[ARG_MIP_LEVEL] = add_implicit_conversion(ctx, block, args[ARG_MIP_LEVEL], -+ hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), loc); - continue; - } - -@@ -6263,10 +6037,7 @@ static bool add_getdimensions_method_call(struct hlsl_ctx *ctx, struct hlsl_bloc - load_params.resource = object; - load_params.lod = args[ARG_MIP_LEVEL]; - load_params.format = hlsl_get_vector_type(ctx, uint_resinfo ? HLSL_TYPE_UINT : HLSL_TYPE_FLOAT, 4); -- -- if (!(res_info = hlsl_new_resource_load(ctx, &load_params, loc))) -- return false; -- hlsl_block_add_instr(block, res_info); -+ res_info = hlsl_block_add_resource_load(ctx, block, &load_params, loc); - - if (!add_assignment_from_component(ctx, block, args[ARG_WIDTH], res_info, 0, loc)) - return false; -@@ -6289,9 +6060,7 @@ static bool add_getdimensions_method_call(struct hlsl_ctx *ctx, struct hlsl_bloc - load_params.type = HLSL_RESOURCE_SAMPLE_INFO; - load_params.resource = object; - load_params.format = args[ARG_SAMPLE_COUNT]->data_type; -- if (!(sample_info = hlsl_new_resource_load(ctx, &load_params, loc))) -- return false; -- hlsl_block_add_instr(block, sample_info); -+ sample_info = hlsl_block_add_resource_load(ctx, block, &load_params, loc); - - if (!add_assignment(ctx, block, args[ARG_SAMPLE_COUNT], ASSIGN_OP_ASSIGN, sample_info, false)) - return false; -@@ -6308,7 +6077,6 @@ static bool add_sample_lod_method_call(struct hlsl_ctx *ctx, struct hlsl_block * - struct hlsl_resource_load_params load_params = { 0 }; - unsigned int sampler_dim, offset_dim; - const struct hlsl_type *sampler_type; -- struct hlsl_ir_node *load; - - sampler_dim = hlsl_sampler_dim_count(object_type->sampler_dim); - offset_dim = hlsl_offset_dim_count(object_type->sampler_dim); -@@ -6338,20 +6106,14 @@ static bool add_sample_lod_method_call(struct hlsl_ctx *ctx, struct hlsl_block * - return false; - } - -- if (!(load_params.coords = add_implicit_conversion(ctx, block, params->args[1], -- hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) -- load_params.coords = params->args[1]; -- -- if (!(load_params.lod = add_implicit_conversion(ctx, block, params->args[2], -- hlsl_get_scalar_type(ctx, HLSL_TYPE_FLOAT), loc))) -- load_params.lod = params->args[2]; -+ load_params.coords = add_implicit_conversion(ctx, block, params->args[1], -+ hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc); -+ load_params.lod = add_implicit_conversion(ctx, block, params->args[2], -+ hlsl_get_scalar_type(ctx, HLSL_TYPE_FLOAT), loc); - - if (offset_dim && params->args_count > 3) -- { -- if (!(load_params.texel_offset = add_implicit_conversion(ctx, block, params->args[3], -- hlsl_get_vector_type(ctx, HLSL_TYPE_INT, offset_dim), loc))) -- return false; -- } -+ load_params.texel_offset = add_implicit_conversion(ctx, block, params->args[3], -+ hlsl_get_vector_type(ctx, HLSL_TYPE_INT, offset_dim), loc); - - if (params->args_count > 3 + !!offset_dim) - hlsl_fixme(ctx, loc, "Tiled resource status argument."); -@@ -6359,10 +6121,7 @@ static bool add_sample_lod_method_call(struct hlsl_ctx *ctx, struct hlsl_block * - load_params.format = object_type->e.resource.format; - load_params.resource = object; - load_params.sampler = params->args[0]; -- -- if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) -- return false; -- hlsl_block_add_instr(block, load); -+ hlsl_block_add_resource_load(ctx, block, &load_params, loc); - return true; - } - -@@ -6373,7 +6132,6 @@ static bool add_sample_grad_method_call(struct hlsl_ctx *ctx, struct hlsl_block - struct hlsl_resource_load_params load_params = { 0 }; - unsigned int sampler_dim, offset_dim; - const struct hlsl_type *sampler_type; -- struct hlsl_ir_node *load; - - sampler_dim = hlsl_sampler_dim_count(object_type->sampler_dim); - offset_dim = hlsl_offset_dim_count(object_type->sampler_dim); -@@ -6400,24 +6158,16 @@ static bool add_sample_grad_method_call(struct hlsl_ctx *ctx, struct hlsl_block - return false; - } - -- if (!(load_params.coords = add_implicit_conversion(ctx, block, params->args[1], -- hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) -- load_params.coords = params->args[1]; -- -- if (!(load_params.ddx = add_implicit_conversion(ctx, block, params->args[2], -- hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) -- load_params.ddx = params->args[2]; -- -- if (!(load_params.ddy = add_implicit_conversion(ctx, block, params->args[3], -- hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) -- load_params.ddy = params->args[3]; -+ load_params.coords = add_implicit_conversion(ctx, block, params->args[1], -+ hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc); -+ load_params.ddx = add_implicit_conversion(ctx, block, params->args[2], -+ hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc); -+ load_params.ddy = add_implicit_conversion(ctx, block, params->args[3], -+ hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc); - - if (offset_dim && params->args_count > 4) -- { -- if (!(load_params.texel_offset = add_implicit_conversion(ctx, block, params->args[4], -- hlsl_get_vector_type(ctx, HLSL_TYPE_INT, offset_dim), loc))) -- return false; -- } -+ load_params.texel_offset = add_implicit_conversion(ctx, block, params->args[4], -+ hlsl_get_vector_type(ctx, HLSL_TYPE_INT, offset_dim), loc); - - if (params->args_count > 4 + !!offset_dim) - hlsl_fixme(ctx, loc, "Tiled resource status argument."); -@@ -6425,10 +6175,7 @@ static bool add_sample_grad_method_call(struct hlsl_ctx *ctx, struct hlsl_block - load_params.format = object_type->e.resource.format; - load_params.resource = object; - load_params.sampler = params->args[0]; -- -- if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) -- return false; -- hlsl_block_add_instr(block, load); -+ hlsl_block_add_resource_load(ctx, block, &load_params, loc); - return true; - } - -@@ -6455,13 +6202,10 @@ static bool add_store_method_call(struct hlsl_ctx *ctx, struct hlsl_block *block - else - value_dim = 4; - -- if (!(offset = add_implicit_conversion(ctx, block, params->args[0], -- hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), loc))) -- return false; -- -- if (!(rhs = add_implicit_conversion(ctx, block, params->args[1], -- hlsl_get_vector_type(ctx, HLSL_TYPE_UINT, value_dim), loc))) -- return false; -+ offset = add_implicit_conversion(ctx, block, params->args[0], -+ hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), loc); -+ rhs = add_implicit_conversion(ctx, block, params->args[1], -+ hlsl_get_vector_type(ctx, HLSL_TYPE_UINT, value_dim), loc); - - if (!hlsl_init_deref_from_index_chain(ctx, &resource_deref, object)) - return false; -@@ -6667,15 +6411,8 @@ static bool add_switch(struct hlsl_ctx *ctx, struct hlsl_block *block, - return true; - } - -- if (!(selector = add_implicit_conversion(ctx, block, selector, -- hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), &selector->loc))) -- { -- destroy_switch_cases(cases); -- destroy_block(block); -- cleanup_parse_attribute_list(attributes); -- return false; -- } -- -+ selector = add_implicit_conversion(ctx, block, selector, -+ hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), &selector->loc); - s = hlsl_new_switch(ctx, selector, cases, loc); - - destroy_switch_cases(cases); -@@ -6816,6 +6553,8 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, - %token KW_INLINE - %token KW_INOUT - %token KW_INPUTPATCH -+%token KW_LINE -+%token KW_LINEADJ - %token KW_LINEAR - %token KW_LINESTREAM - %token KW_MATRIX -@@ -6828,6 +6567,7 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, - %token KW_PACKOFFSET - %token KW_PASS - %token KW_PIXELSHADER -+%token KW_POINT - %token KW_POINTSTREAM - %token KW_RASTERIZERORDEREDBUFFER - %token KW_RASTERIZERORDEREDSTRUCTUREDBUFFER -@@ -6878,6 +6618,8 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, - %token KW_TEXTURE3D - %token KW_TEXTURECUBE - %token KW_TEXTURECUBEARRAY -+%token KW_TRIANGLE -+%token KW_TRIANGLEADJ - %token KW_TRIANGLESTREAM - %token KW_TRUE - %token KW_TYPEDEF -@@ -7885,7 +7627,8 @@ parameter: - parameter_decl: - var_modifiers type_no_void any_identifier arrays colon_attributes - { -- uint32_t modifiers = $1; -+ uint32_t prim_modifiers = $1 & HLSL_PRIMITIVE_MODIFIERS_MASK; -+ uint32_t modifiers = $1 & ~HLSL_PRIMITIVE_MODIFIERS_MASK; - struct hlsl_type *type; - unsigned int i; - -@@ -7910,6 +7653,22 @@ parameter_decl: - } - vkd3d_free($4.sizes); - -+ if (prim_modifiers && (prim_modifiers & (prim_modifiers - 1))) -+ { -+ hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, -+ "Primitive type modifiers are mutually exclusive."); -+ prim_modifiers = 0; -+ } -+ -+ if (prim_modifiers) -+ { -+ if (type->class != HLSL_CLASS_ARRAY) -+ hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, -+ "Primitive type modifiers can only be applied to arrays."); -+ else -+ type->modifiers |= prim_modifiers; -+ } -+ - $$.type = type; - - if (hlsl_version_ge(ctx, 5, 1) && type->class == HLSL_CLASS_ARRAY && hlsl_type_is_resource(type)) -@@ -8629,7 +8388,7 @@ variable_def_typed: - if (!(type = apply_type_modifiers(ctx, $2, &modifiers, true, &@1))) - YYABORT; - -- check_invalid_in_out_modifiers(ctx, modifiers, &@1); -+ check_invalid_non_parameter_modifiers(ctx, modifiers, &@1); - - $$ = $3; - $$->basic_type = type; -@@ -8644,7 +8403,7 @@ variable_def_typed: - if (!(type = apply_type_modifiers(ctx, $2, &modifiers, true, &@1))) - YYABORT; - -- check_invalid_in_out_modifiers(ctx, modifiers, &@1); -+ check_invalid_non_parameter_modifiers(ctx, modifiers, &@1); - - $$ = $3; - $$->basic_type = type; -@@ -8785,6 +8544,26 @@ var_modifiers: - { - $$ = add_modifiers(ctx, $2, HLSL_MODIFIER_SNORM, &@1); - } -+ | KW_LINE var_modifiers -+ { -+ $$ = add_modifiers(ctx, $2, HLSL_PRIMITIVE_LINE, &@1); -+ } -+ | KW_LINEADJ var_modifiers -+ { -+ $$ = add_modifiers(ctx, $2, HLSL_PRIMITIVE_LINEADJ, &@1); -+ } -+ | KW_POINT var_modifiers -+ { -+ $$ = add_modifiers(ctx, $2, HLSL_PRIMITIVE_POINT, &@1); -+ } -+ | KW_TRIANGLE var_modifiers -+ { -+ $$ = add_modifiers(ctx, $2, HLSL_PRIMITIVE_TRIANGLE, &@1); -+ } -+ | KW_TRIANGLEADJ var_modifiers -+ { -+ $$ = add_modifiers(ctx, $2, HLSL_PRIMITIVE_TRIANGLEADJ, &@1); -+ } - | var_identifier var_modifiers - { - $$ = $2; -@@ -8981,14 +8760,7 @@ selection_statement: - - check_condition_type(ctx, condition); - -- if (!(condition = add_cast(ctx, $4, condition, hlsl_get_scalar_type(ctx, HLSL_TYPE_BOOL), &@4))) -- { -- destroy_block($6.then_block); -- destroy_block($6.else_block); -- cleanup_parse_attribute_list(&$1); -- YYABORT; -- } -- -+ condition = add_cast(ctx, $4, condition, hlsl_get_scalar_type(ctx, HLSL_TYPE_BOOL), &@4); - hlsl_block_add_if(ctx, $4, condition, $6.then_block, $6.else_block, &@2); - - destroy_block($6.then_block); -@@ -9449,12 +9221,7 @@ unary_expr: - hlsl_error(ctx, &@2, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, - "Modifiers are not allowed on casts."); - -- if (!add_explicit_conversion(ctx, $6, $3, &$4, &@3)) -- { -- destroy_block($6); -- vkd3d_free($4.sizes); -- YYABORT; -- } -+ add_explicit_conversion(ctx, $6, $3, &$4, &@3); - vkd3d_free($4.sizes); - $$ = $6; - } -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -index edcd9ce62a7..b5652475b43 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -@@ -1214,7 +1214,6 @@ static bool lower_index_loads(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, - unsigned int dim_count = hlsl_sampler_dim_count(val->data_type->sampler_dim); - struct hlsl_ir_node *coords = index->idx.node; - struct hlsl_resource_load_params params = {0}; -- struct hlsl_ir_node *resource_load; - - VKD3D_ASSERT(coords->data_type->class == HLSL_CLASS_VECTOR); - VKD3D_ASSERT(coords->data_type->e.numeric.type == HLSL_TYPE_UINT); -@@ -1227,10 +1226,7 @@ static bool lower_index_loads(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, - params.resource = val; - params.coords = coords; - params.format = val->data_type->e.resource.format; -- -- if (!(resource_load = hlsl_new_resource_load(ctx, ¶ms, &instr->loc))) -- return false; -- hlsl_block_add_instr(block, resource_load); -+ hlsl_block_add_resource_load(ctx, block, ¶ms, &instr->loc); - return true; - } - -@@ -1295,7 +1291,7 @@ static bool lower_broadcasts(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, s - - if (src_type->class <= HLSL_CLASS_VECTOR && dst_type->class <= HLSL_CLASS_VECTOR && src_type->e.numeric.dimx == 1) - { -- struct hlsl_ir_node *new_cast, *swizzle; -+ struct hlsl_ir_node *new_cast; - - dst_scalar_type = hlsl_get_scalar_type(ctx, dst_type->e.numeric.type); - /* We need to preserve the cast since it might be doing more than just -@@ -1303,12 +1299,8 @@ static bool lower_broadcasts(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, s - new_cast = hlsl_block_add_cast(ctx, block, cast->operands[0].node, dst_scalar_type, &cast->node.loc); - - if (dst_type->e.numeric.dimx != 1) -- { -- if (!(swizzle = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, X, X, X), -- dst_type->e.numeric.dimx, new_cast, &cast->node.loc))) -- return false; -- hlsl_block_add_instr(block, swizzle); -- } -+ hlsl_block_add_swizzle(ctx, block, HLSL_SWIZZLE(X, X, X, X), -+ dst_type->e.numeric.dimx, new_cast, &cast->node.loc); - - return true; - } -@@ -1400,6 +1392,17 @@ static unsigned int index_instructions(struct hlsl_block *block, unsigned int in - * - * we can copy-prop the load (@7) into a constant vector {123, 456}, but we - * cannot easily vectorize the stores @3 and @6. -+ * -+ * Moreover, we implement a transformation that propagates loads with a single -+ * non-constant index in its deref path. Consider a load of the form -+ * var[[a0][a1]...[i]...[an]], where ak are integral constants, and i is an -+ * arbitrary non-constant node. If, for all j, the following holds: -+ * -+ * var[[a0][a1]...[j]...[an]] = x[[c0*j + d0][c1*j + d1]...[cm*j + dm]], -+ * -+ * where ck, dk are constants, then we can replace the load with -+ * x[[c0*i + d0]...[cm*i + dm]]. This pass is implemented by -+ * copy_propagation_replace_with_deref(). - */ - - struct copy_propagation_value -@@ -1624,16 +1627,25 @@ static void copy_propagation_invalidate_variable_from_deref_recurse(struct hlsl_ - - if (path_node->type == HLSL_IR_CONSTANT) - { -+ uint32_t index = hlsl_ir_constant(path_node)->value.u[0].u; -+ -+ /* Don't bother invalidating anything if the index is constant but -+ * out-of-range. -+ * Such indices are illegal in HLSL, but only if the code is not -+ * dead, and we can't always know if code is dead without copy-prop -+ * itself. */ -+ if (index >= hlsl_type_element_count(type)) -+ return; -+ - copy_propagation_invalidate_variable_from_deref_recurse(ctx, var_def, deref, subtype, -- depth + 1, hlsl_ir_constant(path_node)->value.u[0].u * subtype_comp_count, -- writemask, time); -+ depth + 1, comp_start + index * subtype_comp_count, writemask, time); - } - else - { - for (i = 0; i < hlsl_type_element_count(type); ++i) - { - copy_propagation_invalidate_variable_from_deref_recurse(ctx, var_def, deref, subtype, -- depth + 1, i * subtype_comp_count, writemask, time); -+ depth + 1, comp_start + i * subtype_comp_count, writemask, time); - } - } - } -@@ -1757,6 +1769,325 @@ static bool copy_propagation_replace_with_constant_vector(struct hlsl_ctx *ctx, - return true; - } - -+static bool component_index_from_deref_path_node(struct hlsl_ir_node *path_node, -+ struct hlsl_type *type, unsigned int *index) -+{ -+ unsigned int idx, i; -+ -+ if (path_node->type != HLSL_IR_CONSTANT) -+ return false; -+ -+ idx = hlsl_ir_constant(path_node)->value.u[0].u; -+ *index = 0; -+ -+ switch (type->class) -+ { -+ case HLSL_CLASS_VECTOR: -+ if (idx >= type->e.numeric.dimx) -+ return false; -+ *index = idx; -+ break; -+ -+ case HLSL_CLASS_MATRIX: -+ if (idx >= hlsl_type_major_size(type)) -+ return false; -+ if (hlsl_type_is_row_major(type)) -+ *index = idx * type->e.numeric.dimx; -+ else -+ *index = idx * type->e.numeric.dimy; -+ break; -+ -+ case HLSL_CLASS_ARRAY: -+ if (idx >= type->e.array.elements_count) -+ return false; -+ *index = idx * hlsl_type_component_count(type->e.array.type); -+ break; -+ -+ case HLSL_CLASS_STRUCT: -+ for (i = 0; i < idx; ++i) -+ *index += hlsl_type_component_count(type->e.record.fields[i].type); -+ break; -+ -+ default: -+ vkd3d_unreachable(); -+ } -+ -+ return true; -+} -+ -+static bool nonconst_index_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, -+ unsigned int *idx, unsigned int *base, unsigned int *scale, unsigned int *count) -+{ -+ struct hlsl_type *type = deref->var->data_type; -+ bool found = false; -+ unsigned int i; -+ -+ *base = 0; -+ -+ for (i = 0; i < deref->path_len; ++i) -+ { -+ struct hlsl_ir_node *path_node = deref->path[i].node; -+ struct hlsl_type *next_type; -+ -+ VKD3D_ASSERT(path_node); -+ -+ /* We should always have generated a cast to UINT. */ -+ VKD3D_ASSERT(hlsl_is_vec1(path_node->data_type) && path_node->data_type->e.numeric.type == HLSL_TYPE_UINT); -+ -+ next_type = hlsl_get_element_type_from_path_index(ctx, type, path_node); -+ -+ if (path_node->type != HLSL_IR_CONSTANT) -+ { -+ if (found) -+ return false; -+ found = true; -+ *idx = i; -+ *scale = hlsl_type_component_count(next_type); -+ *count = hlsl_type_element_count(type); -+ } -+ else -+ { -+ unsigned int index; -+ -+ if (!component_index_from_deref_path_node(path_node, type, &index)) -+ return false; -+ *base += index; -+ } -+ -+ type = next_type; -+ } -+ -+ return found; -+} -+ -+static struct hlsl_ir_node *new_affine_path_index(struct hlsl_ctx *ctx, const struct vkd3d_shader_location *loc, -+ struct hlsl_block *block, struct hlsl_ir_node *index, int c, int d) -+{ -+ struct hlsl_ir_node *c_node, *d_node, *ic, *idx; -+ bool use_uint = c >= 0 && d >= 0; -+ -+ if (!c) -+ { -+ VKD3D_ASSERT(d >= 0); -+ -+ return hlsl_block_add_uint_constant(ctx, block, d, loc); -+ } -+ -+ if (use_uint) -+ { -+ c_node = hlsl_block_add_uint_constant(ctx, block, c, loc); -+ d_node = hlsl_block_add_uint_constant(ctx, block, d, loc); -+ } -+ else -+ { -+ c_node = hlsl_block_add_int_constant(ctx, block, c, loc); -+ d_node = hlsl_block_add_int_constant(ctx, block, d, loc); -+ index = hlsl_block_add_cast(ctx, block, index, hlsl_get_scalar_type(ctx, HLSL_TYPE_INT), loc); -+ } -+ -+ ic = hlsl_block_add_binary_expr(ctx, block, HLSL_OP2_MUL, index, c_node); -+ idx = hlsl_block_add_binary_expr(ctx, block, HLSL_OP2_ADD, ic, d_node); -+ if (!use_uint) -+ idx = hlsl_block_add_cast(ctx, block, idx, hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), loc); -+ -+ return idx; -+} -+ -+static bool copy_propagation_replace_with_deref(struct hlsl_ctx *ctx, -+ const struct copy_propagation_state *state, const struct hlsl_ir_load *load, -+ uint32_t swizzle, struct hlsl_ir_node *instr) -+{ -+ const unsigned int instr_component_count = hlsl_type_component_count(instr->data_type); -+ unsigned int nonconst_i = 0, base, scale, count; -+ struct hlsl_ir_node *index, *new_instr = NULL; -+ const struct hlsl_deref *deref = &load->src; -+ const struct hlsl_ir_var *var = deref->var; -+ unsigned int time = load->node.index; -+ struct hlsl_deref tmp_deref = {0}; -+ struct hlsl_ir_load *new_load; -+ struct hlsl_ir_var *x = NULL; -+ int *c = NULL, *d = NULL; -+ uint32_t ret_swizzle = 0; -+ struct hlsl_block block; -+ unsigned int path_len; -+ bool success = false; -+ int i, j, k; -+ -+ if (!nonconst_index_from_deref(ctx, deref, &nonconst_i, &base, &scale, &count)) -+ return false; -+ -+ if (hlsl_version_lt(ctx, 4, 0)) -+ { -+ TRACE("Non-constant index propagation is not yet supported for SM1.\n"); -+ return false; -+ } -+ -+ VKD3D_ASSERT(count); -+ -+ hlsl_block_init(&block); -+ -+ index = deref->path[nonconst_i].node; -+ -+ /* Iterate over the nonconst index, and check if their values all have the form -+ * x[[c0*i + d0][c1*i + d1]...[cm*i + dm]], and determine the constants c, d. */ -+ for (i = 0; i < count; ++i) -+ { -+ unsigned int start = base + scale * i; -+ struct copy_propagation_value *value; -+ struct hlsl_ir_load *idx; -+ uint32_t cur_swizzle = 0; -+ -+ if (!(value = copy_propagation_get_value(state, var, -+ start + hlsl_swizzle_get_component(swizzle, 0), time))) -+ goto done; -+ -+ if (value->node->type != HLSL_IR_LOAD) -+ goto done; -+ idx = hlsl_ir_load(value->node); -+ -+ if (!x) -+ x = idx->src.var; -+ else if (x != idx->src.var) -+ goto done; -+ -+ if (i == 0) -+ { -+ path_len = idx->src.path_len; -+ -+ if (path_len) -+ { -+ if (!(c = hlsl_calloc(ctx, path_len, sizeof(c[0]))) -+ || !(d = hlsl_alloc(ctx, path_len * sizeof(d[0])))) -+ goto done; -+ } -+ -+ for (k = 0; k < path_len; ++k) -+ { -+ if (idx->src.path[k].node->type != HLSL_IR_CONSTANT) -+ goto done; -+ d[k] = hlsl_ir_constant(idx->src.path[k].node)->value.u[0].u; -+ } -+ -+ } -+ else if (i == 1) -+ { -+ struct hlsl_type *type = idx->src.var->data_type; -+ -+ if (idx->src.path_len != path_len) -+ goto done; -+ -+ /* Calculate constants c and d based on the first two path indices. */ -+ for (k = 0; k < path_len; ++k) -+ { -+ int ix; -+ -+ if (idx->src.path[k].node->type != HLSL_IR_CONSTANT) -+ goto done; -+ ix = hlsl_ir_constant(idx->src.path[k].node)->value.u[0].u; -+ c[k] = ix - d[k]; -+ d[k] = ix - c[k] * i; -+ -+ if (c[k] && type->class == HLSL_CLASS_STRUCT) -+ goto done; -+ -+ type = hlsl_get_element_type_from_path_index(ctx, type, idx->src.path[k].node); -+ } -+ } -+ else -+ { -+ if (idx->src.path_len != path_len) -+ goto done; -+ -+ /* Check that this load has the form x[[c0*i +d0][c1*i + d1]...[cm*i + dm]]. */ -+ for (k = 0; k < path_len; ++k) -+ { -+ if (idx->src.path[k].node->type != HLSL_IR_CONSTANT) -+ goto done; -+ if (hlsl_ir_constant(idx->src.path[k].node)->value.u[0].u != c[k] * i + d[k]) -+ goto done; -+ } -+ } -+ -+ hlsl_swizzle_set_component(&cur_swizzle, 0, value->component); -+ -+ for (j = 1; j < instr_component_count; ++j) -+ { -+ struct copy_propagation_value *val; -+ -+ if (!(val = copy_propagation_get_value(state, var, -+ start + hlsl_swizzle_get_component(swizzle, j), time))) -+ goto done; -+ if (val->node != &idx->node) -+ goto done; -+ -+ hlsl_swizzle_set_component(&cur_swizzle, j, val->component); -+ } -+ -+ if (i == 0) -+ ret_swizzle = cur_swizzle; -+ else if (ret_swizzle != cur_swizzle) -+ goto done; -+ } -+ -+ if (!hlsl_init_deref(ctx, &tmp_deref, x, path_len)) -+ goto done; -+ -+ for (k = 0; k < path_len; ++k) -+ { -+ hlsl_src_from_node(&tmp_deref.path[k], -+ new_affine_path_index(ctx, &load->node.loc, &block, index, c[k], d[k])); -+ } -+ -+ if (!(new_load = hlsl_new_load_index(ctx, &tmp_deref, NULL, &load->node.loc))) -+ goto done; -+ new_instr = &new_load->node; -+ hlsl_block_add_instr(&block, new_instr); -+ -+ if (new_instr->data_type->class == HLSL_CLASS_SCALAR || new_instr->data_type->class == HLSL_CLASS_VECTOR) -+ new_instr = hlsl_block_add_swizzle(ctx, &block, ret_swizzle, instr_component_count, new_instr, &instr->loc); -+ -+ if (TRACE_ON()) -+ { -+ struct vkd3d_string_buffer buffer; -+ -+ vkd3d_string_buffer_init(&buffer); -+ -+ vkd3d_string_buffer_printf(&buffer, "Load from %s[", var->name); -+ for (j = 0; j < deref->path_len; ++j) -+ { -+ if (j == nonconst_i) -+ vkd3d_string_buffer_printf(&buffer, "[i]"); -+ else -+ vkd3d_string_buffer_printf(&buffer, "[%u]", hlsl_ir_constant(deref->path[j].node)->value.u[0].u); -+ } -+ vkd3d_string_buffer_printf(&buffer, "]%s propagated as %s[", -+ debug_hlsl_swizzle(swizzle, instr_component_count), tmp_deref.var->name); -+ for (k = 0; k < path_len; ++k) -+ { -+ if (c[k]) -+ vkd3d_string_buffer_printf(&buffer, "[i*%d + %d]", c[k], d[k]); -+ else -+ vkd3d_string_buffer_printf(&buffer, "[%d]", d[k]); -+ } -+ vkd3d_string_buffer_printf(&buffer, "]%s (i = %p).\n", -+ debug_hlsl_swizzle(ret_swizzle, instr_component_count), index); -+ -+ vkd3d_string_buffer_trace(&buffer); -+ vkd3d_string_buffer_cleanup(&buffer); -+ } -+ -+ list_move_before(&instr->entry, &block.instrs); -+ hlsl_replace_node(instr, new_instr); -+ success = true; -+ -+done: -+ hlsl_cleanup_deref(&tmp_deref); -+ hlsl_block_cleanup(&block); -+ vkd3d_free(c); -+ vkd3d_free(d); -+ return success; -+} -+ - static bool copy_propagation_transform_load(struct hlsl_ctx *ctx, - struct hlsl_ir_load *load, struct copy_propagation_state *state) - { -@@ -1811,6 +2142,9 @@ static bool copy_propagation_transform_load(struct hlsl_ctx *ctx, - if (copy_propagation_replace_with_single_instr(ctx, state, load, HLSL_SWIZZLE(X, Y, Z, W), &load->node)) - return true; - -+ if (copy_propagation_replace_with_deref(ctx, state, load, HLSL_SWIZZLE(X, Y, Z, W), &load->node)) -+ return true; -+ - return false; - } - -@@ -1829,6 +2163,9 @@ static bool copy_propagation_transform_swizzle(struct hlsl_ctx *ctx, - if (copy_propagation_replace_with_single_instr(ctx, state, load, swizzle->u.vector, &swizzle->node)) - return true; - -+ if (copy_propagation_replace_with_deref(ctx, state, load, swizzle->u.vector, &swizzle->node)) -+ return true; -+ - return false; - } - -@@ -2167,8 +2504,7 @@ static enum validation_result validate_component_index_range_from_deref(struct h - return DEREF_VALIDATION_NOT_CONSTANT; - - /* We should always have generated a cast to UINT. */ -- VKD3D_ASSERT(path_node->data_type->class == HLSL_CLASS_SCALAR -- && path_node->data_type->e.numeric.type == HLSL_TYPE_UINT); -+ VKD3D_ASSERT(hlsl_is_vec1(path_node->data_type) && path_node->data_type->e.numeric.type == HLSL_TYPE_UINT); - - idx = hlsl_ir_constant(path_node)->value.u[0].u; - -@@ -2325,11 +2661,6 @@ static bool validate_dereferences(struct hlsl_ctx *ctx, struct hlsl_ir_node *ins - return false; - } - --static bool is_vec1(const struct hlsl_type *type) --{ -- return (type->class == HLSL_CLASS_SCALAR) || (type->class == HLSL_CLASS_VECTOR && type->e.numeric.dimx == 1); --} -- - static bool fold_redundant_casts(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) - { - if (instr->type == HLSL_IR_EXPR) -@@ -2344,7 +2675,8 @@ static bool fold_redundant_casts(struct hlsl_ctx *ctx, struct hlsl_ir_node *inst - src_type = expr->operands[0].node->data_type; - - if (hlsl_types_are_equal(src_type, dst_type) -- || (src_type->e.numeric.type == dst_type->e.numeric.type && is_vec1(src_type) && is_vec1(dst_type))) -+ || (src_type->e.numeric.type == dst_type->e.numeric.type -+ && hlsl_is_vec1(src_type) && hlsl_is_vec1(dst_type))) - { - hlsl_replace_node(&expr->node, expr->operands[0].node); - return true; -@@ -2507,18 +2839,14 @@ static bool lower_narrowing_casts(struct hlsl_ctx *ctx, struct hlsl_ir_node *ins - if (src_type->class <= HLSL_CLASS_VECTOR && dst_type->class <= HLSL_CLASS_VECTOR - && dst_type->e.numeric.dimx < src_type->e.numeric.dimx) - { -- struct hlsl_ir_node *new_cast, *swizzle; -+ struct hlsl_ir_node *new_cast; - - dst_vector_type = hlsl_get_vector_type(ctx, dst_type->e.numeric.type, src_type->e.numeric.dimx); - /* We need to preserve the cast since it might be doing more than just - * narrowing the vector. */ - new_cast = hlsl_block_add_cast(ctx, block, cast->operands[0].node, dst_vector_type, &cast->node.loc); -- -- if (!(swizzle = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, Y, Z, W), -- dst_type->e.numeric.dimx, new_cast, &cast->node.loc))) -- return false; -- hlsl_block_add_instr(block, swizzle); -- -+ hlsl_block_add_swizzle(ctx, block, HLSL_SWIZZLE(X, Y, Z, W), -+ dst_type->e.numeric.dimx, new_cast, &cast->node.loc); - return true; - } - -@@ -2732,9 +3060,7 @@ static bool lower_nonconstant_vector_derefs(struct hlsl_ctx *ctx, struct hlsl_ir - return false; - hlsl_block_add_instr(block, &vector_load->node); - -- if (!(swizzle = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, X, X, X), width, idx, &instr->loc))) -- return false; -- hlsl_block_add_instr(block, swizzle); -+ swizzle = hlsl_block_add_swizzle(ctx, block, HLSL_SWIZZLE(X, X, X, X), width, idx, &instr->loc); - - value.u[0].u = 0; - value.u[1].u = 1; -@@ -2867,11 +3193,8 @@ static bool lower_nonconstant_array_loads(struct hlsl_ctx *ctx, struct hlsl_ir_n - operands[0] = cut_index; - operands[1] = const_i; - equals = hlsl_block_add_expr(ctx, block, HLSL_OP2_EQUAL, operands, btype, &cut_index->loc); -- -- if (!(equals = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, X, X, X), -- var->data_type->e.numeric.dimx, equals, &cut_index->loc))) -- return false; -- hlsl_block_add_instr(block, equals); -+ equals = hlsl_block_add_swizzle(ctx, block, HLSL_SWIZZLE(X, X, X, X), -+ var->data_type->e.numeric.dimx, equals, &cut_index->loc); - - var_load = hlsl_block_add_simple_load(ctx, block, var, &cut_index->loc); - -@@ -3161,9 +3484,37 @@ static bool sort_synthetic_separated_samplers_first(struct hlsl_ctx *ctx) - return false; - } - --/* Turn CAST to int or uint as follows: -+/* Turn CAST to int or uint into TRUNC + REINTERPRET */ -+static bool lower_casts_to_int(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) -+{ -+ struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = { 0 }; -+ struct hlsl_ir_node *arg, *trunc; -+ struct hlsl_ir_expr *expr; -+ -+ if (instr->type != HLSL_IR_EXPR) -+ return false; -+ expr = hlsl_ir_expr(instr); -+ if (expr->op != HLSL_OP1_CAST) -+ return false; -+ -+ arg = expr->operands[0].node; -+ if (!hlsl_type_is_integer(instr->data_type) || instr->data_type->e.numeric.type == HLSL_TYPE_BOOL) -+ return false; -+ if (!hlsl_type_is_floating_point(arg->data_type)) -+ return false; -+ -+ trunc = hlsl_block_add_unary_expr(ctx, block, HLSL_OP1_TRUNC, arg, &instr->loc); -+ -+ memset(operands, 0, sizeof(operands)); -+ operands[0] = trunc; -+ hlsl_block_add_expr(ctx, block, HLSL_OP1_REINTERPRET, operands, instr->data_type, &instr->loc); -+ -+ return true; -+} -+ -+/* Turn TRUNC into: - * -- * CAST(x) = x - FRACT(x) + extra -+ * TRUNC(x) = x - FRACT(x) + extra - * - * where - * -@@ -3171,27 +3522,19 @@ static bool sort_synthetic_separated_samplers_first(struct hlsl_ctx *ctx) - * - * where the comparisons in the extra term are performed using CMP or SLT - * depending on whether this is a pixel or vertex shader, respectively. -- * -- * A REINTERPET (which is written as a mere MOV) is also applied to the final -- * result for type consistency. - */ --static bool lower_casts_to_int(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) -+static bool lower_trunc(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) - { -- struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = { 0 }; - struct hlsl_ir_node *arg, *res; - struct hlsl_ir_expr *expr; - - if (instr->type != HLSL_IR_EXPR) - return false; - expr = hlsl_ir_expr(instr); -- if (expr->op != HLSL_OP1_CAST) -+ if (expr->op != HLSL_OP1_TRUNC) - return false; - - arg = expr->operands[0].node; -- if (!hlsl_type_is_integer(instr->data_type) || instr->data_type->e.numeric.type == HLSL_TYPE_BOOL) -- return false; -- if (arg->data_type->e.numeric.type != HLSL_TYPE_FLOAT && arg->data_type->e.numeric.type != HLSL_TYPE_HALF) -- return false; - - if (ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL) - { -@@ -3241,9 +3584,6 @@ static bool lower_casts_to_int(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, - hlsl_block_add_instr(block, res); - } - -- memset(operands, 0, sizeof(operands)); -- operands[0] = res; -- hlsl_block_add_expr(ctx, block, HLSL_OP1_REINTERPRET, operands, instr->data_type, &instr->loc); - return true; - } - -@@ -3327,16 +3667,10 @@ static bool lower_dot(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct h - { - mul = hlsl_block_add_binary_expr(ctx, block, HLSL_OP2_MUL, expr->operands[0].node, expr->operands[1].node); - -- if (!(add_x = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, X, X, X), -- instr->data_type->e.numeric.dimx, mul, &expr->node.loc))) -- return false; -- hlsl_block_add_instr(block, add_x); -- -- if (!(add_y = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(Y, Y, Y, Y), -- instr->data_type->e.numeric.dimx, mul, &expr->node.loc))) -- return false; -- hlsl_block_add_instr(block, add_y); -- -+ add_x = hlsl_block_add_swizzle(ctx, block, HLSL_SWIZZLE(X, X, X, X), -+ instr->data_type->e.numeric.dimx, mul, &expr->node.loc); -+ add_y = hlsl_block_add_swizzle(ctx, block, HLSL_SWIZZLE(Y, Y, Y, Y), -+ instr->data_type->e.numeric.dimx, mul, &expr->node.loc); - hlsl_block_add_binary_expr(ctx, block, HLSL_OP2_ADD, add_x, add_y); - } - -@@ -3499,9 +3833,7 @@ static bool lower_trig(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct - { - uint32_t s = hlsl_swizzle_from_writemask(1 << i); - -- if (!(comps[i] = hlsl_new_swizzle(ctx, s, 1, reduced, &instr->loc))) -- return false; -- hlsl_block_add_instr(block, comps[i]); -+ comps[i] = hlsl_block_add_swizzle(ctx, block, s, 1, reduced, &instr->loc); - } - - if (!(var = hlsl_new_synthetic_var(ctx, "sincos", type, &instr->loc))) -@@ -4021,9 +4353,7 @@ static bool lower_int_dot(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, stru - { - uint32_t s = hlsl_swizzle_from_writemask(1 << i); - -- if (!(comps[i] = hlsl_new_swizzle(ctx, s, 1, mult, &instr->loc))) -- return false; -- hlsl_block_add_instr(block, comps[i]); -+ comps[i] = hlsl_block_add_swizzle(ctx, block, s, 1, mult, &instr->loc); - } - - res = comps[0]; -@@ -4812,8 +5142,7 @@ static void register_deref_usage(struct hlsl_ctx *ctx, struct hlsl_deref *deref) - struct hlsl_type *type; - unsigned int index; - -- if (!hlsl_regset_index_from_deref(ctx, deref, regset, &index)) -- return; -+ hlsl_regset_index_from_deref(ctx, deref, regset, &index); - - if (regset <= HLSL_REGSET_LAST_OBJECT) - { -@@ -4824,7 +5153,6 @@ static void register_deref_usage(struct hlsl_ctx *ctx, struct hlsl_deref *deref) - { - type = hlsl_deref_get_type(ctx, deref); - -- hlsl_regset_index_from_deref(ctx, deref, regset, &index); - required_bind_count = align(index + type->reg_size[regset], 4) / 4; - var->bind_count[regset] = max(var->bind_count[regset], required_bind_count); - } -@@ -5913,7 +6241,7 @@ bool hlsl_component_index_range_from_deref(struct hlsl_ctx *ctx, const struct hl - unsigned int *start, unsigned int *count) - { - struct hlsl_type *type = deref->var->data_type; -- unsigned int i, k; -+ unsigned int i; - - *start = 0; - *count = 0; -@@ -5921,49 +6249,18 @@ bool hlsl_component_index_range_from_deref(struct hlsl_ctx *ctx, const struct hl - for (i = 0; i < deref->path_len; ++i) - { - struct hlsl_ir_node *path_node = deref->path[i].node; -- unsigned int idx = 0; -+ unsigned int index; - - VKD3D_ASSERT(path_node); - if (path_node->type != HLSL_IR_CONSTANT) - return false; - - /* We should always have generated a cast to UINT. */ -- VKD3D_ASSERT(path_node->data_type->class == HLSL_CLASS_SCALAR -- && path_node->data_type->e.numeric.type == HLSL_TYPE_UINT); -- -- idx = hlsl_ir_constant(path_node)->value.u[0].u; -- -- switch (type->class) -- { -- case HLSL_CLASS_VECTOR: -- if (idx >= type->e.numeric.dimx) -- return false; -- *start += idx; -- break; -- -- case HLSL_CLASS_MATRIX: -- if (idx >= hlsl_type_major_size(type)) -- return false; -- if (hlsl_type_is_row_major(type)) -- *start += idx * type->e.numeric.dimx; -- else -- *start += idx * type->e.numeric.dimy; -- break; -- -- case HLSL_CLASS_ARRAY: -- if (idx >= type->e.array.elements_count) -- return false; -- *start += idx * hlsl_type_component_count(type->e.array.type); -- break; -+ VKD3D_ASSERT(hlsl_is_vec1(path_node->data_type) && path_node->data_type->e.numeric.type == HLSL_TYPE_UINT); - -- case HLSL_CLASS_STRUCT: -- for (k = 0; k < idx; ++k) -- *start += hlsl_type_component_count(type->e.record.fields[k].type); -- break; -- -- default: -- vkd3d_unreachable(); -- } -+ if (!component_index_from_deref_path_node(path_node, type, &index)) -+ return false; -+ *start += index; - - type = hlsl_get_element_type_from_path_index(ctx, type, path_node); - } -@@ -5992,8 +6289,7 @@ bool hlsl_regset_index_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref - if (path_node->type == HLSL_IR_CONSTANT) - { - /* We should always have generated a cast to UINT. */ -- VKD3D_ASSERT(path_node->data_type->class == HLSL_CLASS_SCALAR -- && path_node->data_type->e.numeric.type == HLSL_TYPE_UINT); -+ VKD3D_ASSERT(hlsl_is_vec1(path_node->data_type) && path_node->data_type->e.numeric.type == HLSL_TYPE_UINT); - - idx = hlsl_ir_constant(path_node)->value.u[0].u; - -@@ -6061,8 +6357,7 @@ bool hlsl_offset_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref - if (offset_node) - { - /* We should always have generated a cast to UINT. */ -- VKD3D_ASSERT(offset_node->data_type->class == HLSL_CLASS_SCALAR -- && offset_node->data_type->e.numeric.type == HLSL_TYPE_UINT); -+ VKD3D_ASSERT(hlsl_is_vec1(offset_node->data_type) && offset_node->data_type->e.numeric.type == HLSL_TYPE_UINT); - VKD3D_ASSERT(offset_node->type != HLSL_IR_CONSTANT); - return false; - } -@@ -6097,11 +6392,14 @@ struct hlsl_reg hlsl_reg_from_deref(struct hlsl_ctx *ctx, const struct hlsl_dere - { - const struct hlsl_ir_var *var = deref->var; - struct hlsl_reg ret = var->regs[HLSL_REGSET_NUMERIC]; -- unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref); -+ unsigned int offset = 0; - - VKD3D_ASSERT(deref->data_type); - VKD3D_ASSERT(hlsl_is_numeric_type(deref->data_type)); - -+ if (!hlsl_type_is_patch_array(deref->var->data_type)) -+ offset = hlsl_offset_from_deref_safe(ctx, deref); -+ - ret.index += offset / 4; - ret.id += offset / 4; - -@@ -6112,6 +6410,36 @@ struct hlsl_reg hlsl_reg_from_deref(struct hlsl_ctx *ctx, const struct hlsl_dere - return ret; - } - -+static bool get_integral_argument_value(struct hlsl_ctx *ctx, const struct hlsl_attribute *attr, -+ unsigned int i, enum hlsl_base_type *base_type, int *value) -+{ -+ const struct hlsl_ir_node *instr = attr->args[i].node; -+ const struct hlsl_type *type = instr->data_type; -+ -+ if (type->class != HLSL_CLASS_SCALAR -+ || (type->e.numeric.type != HLSL_TYPE_INT && type->e.numeric.type != HLSL_TYPE_UINT)) -+ { -+ struct vkd3d_string_buffer *string; -+ -+ if ((string = hlsl_type_to_string(ctx, type))) -+ hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -+ "Unexpected type for argument %u of [%s]: expected int or uint, but got %s.", -+ i, attr->name, string->buffer); -+ hlsl_release_string_buffer(ctx, string); -+ return false; -+ } -+ -+ if (instr->type != HLSL_IR_CONSTANT) -+ { -+ hlsl_fixme(ctx, &instr->loc, "Non-constant expression in [%s] initializer.", attr->name); -+ return false; -+ } -+ -+ *base_type = type->e.numeric.type; -+ *value = hlsl_ir_constant(instr)->value.u[0].i; -+ return true; -+} -+ - static const char *get_string_argument_value(struct hlsl_ctx *ctx, const struct hlsl_attribute *attr, unsigned int i) - { - const struct hlsl_ir_node *instr = attr->args[i].node; -@@ -6147,36 +6475,17 @@ static void parse_numthreads_attribute(struct hlsl_ctx *ctx, const struct hlsl_a - - for (i = 0; i < attr->args_count; ++i) - { -- const struct hlsl_ir_node *instr = attr->args[i].node; -- const struct hlsl_type *type = instr->data_type; -- const struct hlsl_ir_constant *constant; -+ enum hlsl_base_type base_type; -+ int value; - -- if (type->class != HLSL_CLASS_SCALAR -- || (type->e.numeric.type != HLSL_TYPE_INT && type->e.numeric.type != HLSL_TYPE_UINT)) -- { -- struct vkd3d_string_buffer *string; -- -- if ((string = hlsl_type_to_string(ctx, type))) -- hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -- "Wrong type for argument %u of [numthreads]: expected int or uint, but got %s.", -- i, string->buffer); -- hlsl_release_string_buffer(ctx, string); -- break; -- } -- -- if (instr->type != HLSL_IR_CONSTANT) -- { -- hlsl_fixme(ctx, &instr->loc, "Non-constant expression in [numthreads] initializer."); -- break; -- } -- constant = hlsl_ir_constant(instr); -+ if (!get_integral_argument_value(ctx, attr, i, &base_type, &value)) -+ return; - -- if ((type->e.numeric.type == HLSL_TYPE_INT && constant->value.u[0].i <= 0) -- || (type->e.numeric.type == HLSL_TYPE_UINT && !constant->value.u[0].u)) -- hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_THREAD_COUNT, -+ if ((base_type == HLSL_TYPE_INT && value <= 0) || (base_type == HLSL_TYPE_UINT && !value)) -+ hlsl_error(ctx, &attr->args[i].node->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_THREAD_COUNT, - "Thread count must be a positive integer."); - -- ctx->thread_count[i] = constant->value.u[0].u; -+ ctx->thread_count[i] = value; - } - } - -@@ -6208,9 +6517,8 @@ static void parse_domain_attribute(struct hlsl_ctx *ctx, const struct hlsl_attri - - static void parse_outputcontrolpoints_attribute(struct hlsl_ctx *ctx, const struct hlsl_attribute *attr) - { -- const struct hlsl_ir_node *instr; -- const struct hlsl_type *type; -- const struct hlsl_ir_constant *constant; -+ enum hlsl_base_type base_type; -+ int value; - - if (attr->args_count != 1) - { -@@ -6219,35 +6527,14 @@ static void parse_outputcontrolpoints_attribute(struct hlsl_ctx *ctx, const stru - return; - } - -- instr = attr->args[0].node; -- type = instr->data_type; -- -- if (type->class != HLSL_CLASS_SCALAR -- || (type->e.numeric.type != HLSL_TYPE_INT && type->e.numeric.type != HLSL_TYPE_UINT)) -- { -- struct vkd3d_string_buffer *string; -- -- if ((string = hlsl_type_to_string(ctx, type))) -- hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -- "Wrong type for argument 0 of [outputcontrolpoints]: expected int or uint, but got %s.", -- string->buffer); -- hlsl_release_string_buffer(ctx, string); -- return; -- } -- -- if (instr->type != HLSL_IR_CONSTANT) -- { -- hlsl_fixme(ctx, &instr->loc, "Non-constant expression in [outputcontrolpoints] initializer."); -+ if (!get_integral_argument_value(ctx, attr, 0, &base_type, &value)) - return; -- } -- constant = hlsl_ir_constant(instr); - -- if ((type->e.numeric.type == HLSL_TYPE_INT && constant->value.u[0].i < 0) -- || constant->value.u[0].u > 32) -- hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_CONTROL_POINT_COUNT, -+ if (value < 0 || value > 32) -+ hlsl_error(ctx, &attr->args[0].node->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_CONTROL_POINT_COUNT, - "Output control point count must be between 0 and 32."); - -- ctx->output_control_point_count = constant->value.u[0].u; -+ ctx->output_control_point_count = value; - } - - static void parse_outputtopology_attribute(struct hlsl_ctx *ctx, const struct hlsl_attribute *attr) -@@ -6341,6 +6628,28 @@ static void parse_patchconstantfunc_attribute(struct hlsl_ctx *ctx, const struct - "Patch constant function \"%s\" is not defined.", name); - } - -+static void parse_maxvertexcount_attribute(struct hlsl_ctx *ctx, const struct hlsl_attribute *attr) -+{ -+ enum hlsl_base_type base_type; -+ int value; -+ -+ if (attr->args_count != 1) -+ { -+ hlsl_error(ctx, &attr->loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, -+ "Expected 1 parameter for [maxvertexcount] attribute, but got %u.", attr->args_count); -+ return; -+ } -+ -+ if (!get_integral_argument_value(ctx, attr, 0, &base_type, &value)) -+ return; -+ -+ if (value < 1 || value > 1024) -+ hlsl_error(ctx, &attr->args[0].node->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MAX_VERTEX_COUNT, -+ "Max vertex count must be between 1 and 1024."); -+ -+ ctx->max_vertex_count = value; -+} -+ - static void parse_entry_function_attributes(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) - { - const struct hlsl_profile_info *profile = ctx->profile; -@@ -6365,6 +6674,8 @@ static void parse_entry_function_attributes(struct hlsl_ctx *ctx, struct hlsl_ir - parse_patchconstantfunc_attribute(ctx, attr); - else if (!strcmp(attr->name, "earlydepthstencil") && profile->type == VKD3D_SHADER_TYPE_PIXEL) - entry_func->early_depth_test = true; -+ else if (!strcmp(attr->name, "maxvertexcount") && profile->type == VKD3D_SHADER_TYPE_GEOMETRY) -+ parse_maxvertexcount_attribute(ctx, attr); - else - hlsl_warning(ctx, &entry_func->attrs[i]->loc, VKD3D_SHADER_WARNING_HLSL_UNKNOWN_ATTRIBUTE, - "Ignoring unknown attribute \"%s\".", entry_func->attrs[i]->name); -@@ -6437,7 +6748,71 @@ static void validate_hull_shader_attributes(struct hlsl_ctx *ctx, const struct h - } - } - --static void validate_and_record_patch_type(struct hlsl_ctx *ctx, struct hlsl_ir_var *var) -+static enum vkd3d_primitive_type get_primitive_type(struct hlsl_ctx *ctx, struct hlsl_ir_var *var) -+{ -+ uint32_t prim_modifier = var->data_type->modifiers & HLSL_PRIMITIVE_MODIFIERS_MASK; -+ enum vkd3d_primitive_type prim_type = VKD3D_PT_UNDEFINED; -+ -+ if (prim_modifier) -+ { -+ unsigned int count = var->data_type->e.array.elements_count; -+ unsigned int expected_count; -+ -+ VKD3D_ASSERT(!(prim_modifier & (prim_modifier - 1))); -+ -+ switch (prim_modifier) -+ { -+ case HLSL_PRIMITIVE_POINT: -+ prim_type = VKD3D_PT_POINTLIST; -+ expected_count = 1; -+ break; -+ -+ case HLSL_PRIMITIVE_LINE: -+ prim_type = VKD3D_PT_LINELIST; -+ expected_count = 2; -+ break; -+ -+ case HLSL_PRIMITIVE_TRIANGLE: -+ prim_type = VKD3D_PT_TRIANGLELIST; -+ expected_count = 3; -+ break; -+ -+ case HLSL_PRIMITIVE_LINEADJ: -+ prim_type = VKD3D_PT_LINELIST_ADJ; -+ expected_count = 4; -+ break; -+ -+ case HLSL_PRIMITIVE_TRIANGLEADJ: -+ prim_type = VKD3D_PT_TRIANGLELIST_ADJ; -+ expected_count = 6; -+ break; -+ -+ default: -+ vkd3d_unreachable(); -+ } -+ -+ if (count != expected_count) -+ { -+ struct vkd3d_string_buffer *string; -+ -+ if ((string = hlsl_modifiers_to_string(ctx, prim_modifier))) -+ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_CONTROL_POINT_COUNT, -+ "Control point count %u does not match the expect count %u for the %s input primitive type.", -+ count, expected_count, string->buffer); -+ hlsl_release_string_buffer(ctx, string); -+ } -+ } -+ -+ /* Patch types take precedence over primitive modifiers. */ -+ if (hlsl_type_is_patch_array(var->data_type)) -+ prim_type = VKD3D_PT_PATCH; -+ -+ VKD3D_ASSERT(prim_type != VKD3D_PT_UNDEFINED); -+ return prim_type; -+} -+ -+ -+static void validate_and_record_prim_type(struct hlsl_ctx *ctx, struct hlsl_ir_var *var) - { - unsigned int control_point_count = var->data_type->e.array.elements_count; - enum hlsl_array_type array_type = var->data_type->e.array.array_type; -@@ -6455,7 +6830,7 @@ static void validate_and_record_patch_type(struct hlsl_ctx *ctx, struct hlsl_ir_ - return; - } - } -- else -+ else if (array_type == HLSL_ARRAY_PATCH_OUTPUT) - { - if (!ctx->is_patch_constant_func && profile->type != VKD3D_SHADER_TYPE_DOMAIN) - { -@@ -6466,6 +6841,30 @@ static void validate_and_record_patch_type(struct hlsl_ctx *ctx, struct hlsl_ir_ - } - } - -+ if ((var->data_type->modifiers & HLSL_PRIMITIVE_MODIFIERS_MASK) && profile->type != VKD3D_SHADER_TYPE_GEOMETRY) -+ { -+ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INCOMPATIBLE_PROFILE, -+ "Input primitive parameters can only be used in geometry shaders."); -+ return; -+ } -+ -+ if (profile->type == VKD3D_SHADER_TYPE_GEOMETRY) -+ { -+ enum vkd3d_primitive_type prim_type = get_primitive_type(ctx, var); -+ -+ if (ctx->input_primitive_type == VKD3D_PT_UNDEFINED) -+ { -+ ctx->input_primitive_type = prim_type; -+ } -+ else if (ctx->input_primitive_type != prim_type) -+ { -+ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -+ "Input primitive type does not match the previously declared type."); -+ hlsl_note(ctx, &ctx->input_primitive_param->loc, VKD3D_SHADER_LOG_ERROR, -+ "The input primitive was previously declared here."); -+ } -+ } -+ - if (control_point_count > 32) - { - hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_CONTROL_POINT_COUNT, -@@ -6478,7 +6877,7 @@ static void validate_and_record_patch_type(struct hlsl_ctx *ctx, struct hlsl_ir_ - { - if (control_point_count != ctx->output_control_point_count) - hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_CONTROL_POINT_COUNT, -- "Output control point count %u does not match the count %u specified in the control point function.", -+ "Output control point count %u does not match the count %u declared in the control point function.", - control_point_count, ctx->output_control_point_count); - - if (!hlsl_types_are_equal(control_point_type, ctx->output_control_point_type)) -@@ -6490,22 +6889,32 @@ static void validate_and_record_patch_type(struct hlsl_ctx *ctx, struct hlsl_ir_ - - if (ctx->input_control_point_count != UINT_MAX) - { -- VKD3D_ASSERT(ctx->is_patch_constant_func); -+ VKD3D_ASSERT(profile->type == VKD3D_SHADER_TYPE_GEOMETRY || ctx->is_patch_constant_func); - - if (control_point_count != ctx->input_control_point_count) -+ { - hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_CONTROL_POINT_COUNT, -- "Input control point count %u does not match the count %u specified in the control point function.", -+ "Input control point count %u does not match the count %u declared previously.", - control_point_count, ctx->input_control_point_count); -+ hlsl_note(ctx, &ctx->input_primitive_param->loc, VKD3D_SHADER_LOG_ERROR, -+ "The input primitive was previously declared here."); -+ } - -- if (!hlsl_types_are_equal(control_point_type, ctx->input_control_point_type)) -+ if (profile->type != VKD3D_SHADER_TYPE_GEOMETRY -+ && !hlsl_types_are_equal(control_point_type, ctx->input_control_point_type)) -+ { - hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -- "Input control point type does not match the input type specified in the control point function."); -+ "Input control point type does not match the input type declared previously."); -+ hlsl_note(ctx, &ctx->input_primitive_param->loc, VKD3D_SHADER_LOG_ERROR, -+ "The input primitive was previously declared here."); -+ } - - return; - } - - ctx->input_control_point_count = control_point_count; - ctx->input_control_point_type = control_point_type; -+ ctx->input_primitive_param = var; - } - - static void remove_unreachable_code(struct hlsl_ctx *ctx, struct hlsl_block *body) -@@ -6569,6 +6978,24 @@ void hlsl_lower_index_loads(struct hlsl_ctx *ctx, struct hlsl_block *body) - lower_ir(ctx, lower_index_loads, body); - } - -+ -+static bool simplify_exprs(struct hlsl_ctx *ctx, struct hlsl_block *block) -+{ -+ bool progress, any_progress = false; -+ -+ do -+ { -+ progress = hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, block, NULL); -+ progress |= hlsl_transform_ir(ctx, hlsl_normalize_binary_exprs, block, NULL); -+ progress |= hlsl_transform_ir(ctx, hlsl_fold_constant_identities, block, NULL); -+ progress |= hlsl_transform_ir(ctx, hlsl_fold_constant_swizzles, block, NULL); -+ -+ any_progress |= progress; -+ } while (progress); -+ -+ return any_progress; -+} -+ - static void hlsl_run_folding_passes(struct hlsl_ctx *ctx, struct hlsl_block *body) - { - bool progress; -@@ -6576,15 +7003,13 @@ static void hlsl_run_folding_passes(struct hlsl_ctx *ctx, struct hlsl_block *bod - hlsl_transform_ir(ctx, fold_redundant_casts, body, NULL); - do - { -- progress = hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, body, NULL); -- progress |= hlsl_transform_ir(ctx, hlsl_fold_constant_identities, body, NULL); -- progress |= hlsl_transform_ir(ctx, hlsl_normalize_binary_exprs, body, NULL); -- progress |= hlsl_transform_ir(ctx, hlsl_fold_constant_swizzles, body, NULL); -+ progress = simplify_exprs(ctx, body); - progress |= hlsl_copy_propagation_execute(ctx, body); - progress |= hlsl_transform_ir(ctx, fold_swizzle_chains, body, NULL); - progress |= hlsl_transform_ir(ctx, remove_trivial_swizzles, body, NULL); - progress |= hlsl_transform_ir(ctx, remove_trivial_conditional_branches, body, NULL); - } while (progress); -+ hlsl_transform_ir(ctx, fold_redundant_casts, body, NULL); - } - - void hlsl_run_const_passes(struct hlsl_ctx *ctx, struct hlsl_block *body) -@@ -7214,7 +7639,7 @@ static bool sm4_generate_vsir_reg_from_deref(struct hlsl_ctx *ctx, struct vsir_p - } - else - { -- unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref) + var->buffer_offset; -+ unsigned int offset = deref->const_offset + var->buffer_offset; - - VKD3D_ASSERT(data_type->class <= HLSL_CLASS_VECTOR); - reg->type = VKD3DSPR_CONSTBUFFER; -@@ -7232,6 +7657,14 @@ static bool sm4_generate_vsir_reg_from_deref(struct hlsl_ctx *ctx, struct vsir_p - reg->idx[1].offset = offset / 4; - reg->idx_count = 2; - } -+ -+ if (deref->rel_offset.node) -+ { -+ if (!(reg->idx[reg->idx_count - 1].rel_addr = sm4_generate_vsir_new_idx_src(ctx, -+ program, deref->rel_offset.node))) -+ return false; -+ } -+ - *writemask = ((1u << data_type->e.numeric.dimx) - 1) << (offset & 3); - } - } -@@ -11515,9 +11948,7 @@ static void loop_unrolling_simplify(struct hlsl_ctx *ctx, struct hlsl_block *blo - copy_propagation_pop_scope(state); - copy_propagation_push_scope(state, ctx); - -- progress = hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, block, NULL); -- progress |= hlsl_transform_ir(ctx, hlsl_fold_constant_identities, block, NULL); -- progress |= hlsl_transform_ir(ctx, hlsl_fold_constant_swizzles, block, NULL); -+ progress = simplify_exprs(ctx, block); - - current_index = index_instructions(block, *index); - progress |= copy_propagation_transform_block(ctx, block, state); -@@ -12060,42 +12491,68 @@ static void process_entry_function(struct hlsl_ctx *ctx, - else - prepend_uniform_copy(ctx, body, var); - } -- else if (hlsl_type_is_patch_array(var->data_type)) -+ else if (hlsl_type_is_primitive_array(var->data_type)) - { -- if (var->data_type->e.array.array_type == HLSL_ARRAY_PATCH_INPUT) -+ if (var->storage_modifiers & HLSL_STORAGE_OUT) -+ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, -+ "Input primitive parameter \"%s\" is declared as \"out\".", var->name); -+ -+ if (profile->type != VKD3D_SHADER_TYPE_GEOMETRY) - { -- if (input_patch) -+ enum hlsl_array_type array_type = var->data_type->e.array.array_type; -+ -+ if (array_type == HLSL_ARRAY_PATCH_INPUT) - { -- hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_DUPLICATE_PATCH, -- "Found multiple InputPatch parameters."); -- hlsl_note(ctx, &input_patch->loc, VKD3D_SHADER_LOG_ERROR, -- "The InputPatch parameter was previously declared here."); -- continue; -+ if (input_patch) -+ { -+ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_DUPLICATE_PATCH, -+ "Found multiple InputPatch parameters."); -+ hlsl_note(ctx, &input_patch->loc, VKD3D_SHADER_LOG_ERROR, -+ "The InputPatch parameter was previously declared here."); -+ continue; -+ } -+ input_patch = var; - } -- input_patch = var; -- } -- else -- { -- if (output_patch) -+ else if (array_type == HLSL_ARRAY_PATCH_OUTPUT) - { -- hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_DUPLICATE_PATCH, -- "Found multiple OutputPatch parameters."); -- hlsl_note(ctx, &output_patch->loc, VKD3D_SHADER_LOG_ERROR, -- "The OutputPatch parameter was previously declared here."); -- continue; -+ if (output_patch) -+ { -+ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_DUPLICATE_PATCH, -+ "Found multiple OutputPatch parameters."); -+ hlsl_note(ctx, &output_patch->loc, VKD3D_SHADER_LOG_ERROR, -+ "The OutputPatch parameter was previously declared here."); -+ continue; -+ } -+ output_patch = var; - } -- output_patch = var; - } - -- validate_and_record_patch_type(ctx, var); -+ validate_and_record_prim_type(ctx, var); - if (profile->type == VKD3D_SHADER_TYPE_GEOMETRY) - { -- hlsl_fixme(ctx, &var->loc, "InputPatch/OutputPatch parameters in geometry shaders."); -+ hlsl_fixme(ctx, &var->loc, "Input primitive parameters in geometry shaders."); - continue; - } - - prepend_input_var_copy(ctx, entry_func, var); - } -+ else if (hlsl_get_stream_output_type(var->data_type)) -+ { -+ if (profile->type != VKD3D_SHADER_TYPE_GEOMETRY) -+ { -+ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INCOMPATIBLE_PROFILE, -+ "Stream output parameters can only be used in geometry shaders."); -+ continue; -+ } -+ -+ if (!(var->storage_modifiers & HLSL_STORAGE_IN) || !(var->storage_modifiers & HLSL_STORAGE_OUT)) -+ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, -+ "Stream output parameter \"%s\" must be declared as \"inout\".", var->name); -+ -+ /* TODO: check that maxvertexcount * component_count(element_type) <= 1024. */ -+ -+ continue; -+ } - else - { - if (hlsl_get_multiarray_element_type(var->data_type)->class != HLSL_CLASS_STRUCT -@@ -12107,12 +12564,24 @@ static void process_entry_function(struct hlsl_ctx *ctx, - } - - if (var->storage_modifiers & HLSL_STORAGE_IN) -+ { -+ if (profile->type == VKD3D_SHADER_TYPE_GEOMETRY) -+ { -+ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_PRIMITIVE_TYPE, -+ "Input parameter \"%s\" is missing a primitive type.", var->name); -+ continue; -+ } -+ - prepend_input_var_copy(ctx, entry_func, var); -+ } - if (var->storage_modifiers & HLSL_STORAGE_OUT) - { - if (profile->type == VKD3D_SHADER_TYPE_HULL && !ctx->is_patch_constant_func) - hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, - "Output parameters are not supported in hull shader control point functions."); -+ else if (profile->type == VKD3D_SHADER_TYPE_GEOMETRY) -+ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, -+ "Output parameters are not allowed in geometry shaders."); - else - append_output_var_copy(ctx, entry_func, var); - } -@@ -12120,7 +12589,11 @@ static void process_entry_function(struct hlsl_ctx *ctx, - } - if (entry_func->return_var) - { -- if (entry_func->return_var->data_type->class != HLSL_CLASS_STRUCT && !entry_func->return_var->semantic.name) -+ if (profile->type == VKD3D_SHADER_TYPE_GEOMETRY) -+ hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_INCOMPATIBLE_PROFILE, -+ "Geometry shaders cannot return values."); -+ else if (entry_func->return_var->data_type->class != HLSL_CLASS_STRUCT -+ && !entry_func->return_var->semantic.name) - hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_SEMANTIC, - "Entry point \"%s\" is missing a return value semantic.", entry_func->func->name); - -@@ -12135,6 +12608,10 @@ static void process_entry_function(struct hlsl_ctx *ctx, - hlsl_fixme(ctx, &entry_func->loc, "Passthrough hull shader control point function."); - } - -+ if (profile->type == VKD3D_SHADER_TYPE_GEOMETRY && ctx->input_primitive_type == VKD3D_PT_UNDEFINED) -+ hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_PRIMITIVE_TYPE, -+ "Entry point \"%s\" is missing an input primitive parameter.", entry_func->func->name); -+ - if (hlsl_version_ge(ctx, 4, 0)) - { - hlsl_transform_ir(ctx, lower_discard_neg, body, NULL); -@@ -12188,6 +12665,7 @@ static void process_entry_function(struct hlsl_ctx *ctx, - lower_ir(ctx, lower_casts_to_bool, body); - - lower_ir(ctx, lower_casts_to_int, body); -+ lower_ir(ctx, lower_trunc, body); - lower_ir(ctx, lower_sqrt, body); - lower_ir(ctx, lower_dot, body); - lower_ir(ctx, lower_round, body); -@@ -12217,7 +12695,7 @@ static void process_entry_function(struct hlsl_ctx *ctx, - - /* TODO: move forward, remove when no longer needed */ - transform_derefs(ctx, replace_deref_path_with_offset, body); -- while (hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, body, NULL)); -+ simplify_exprs(ctx, body); - transform_derefs(ctx, clean_constant_deref_offset_srcs, body); - - do -@@ -12253,6 +12731,9 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry - else if (profile->type == VKD3D_SHADER_TYPE_DOMAIN && ctx->domain == VKD3D_TESSELLATOR_DOMAIN_INVALID) - hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_ATTRIBUTE, - "Entry point \"%s\" is missing a [domain] attribute.", entry_func->func->name); -+ else if (profile->type == VKD3D_SHADER_TYPE_GEOMETRY && !ctx->max_vertex_count) -+ hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_ATTRIBUTE, -+ "Entry point \"%s\" is missing a [maxvertexcount] attribute.", entry_func->func->name); - - hlsl_block_init(&global_uniform_block); - -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c -index f4715a9224c..f74ecffcd4b 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c -@@ -1712,7 +1712,8 @@ bool hlsl_normalize_binary_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *inst - progress = true; - } - -- if (!progress && e1 && (tmp = collect_exprs(ctx, &block, instr, op, e1->operands[1].node, arg2))) -+ if (!progress && e1 && e1->op == op -+ && (tmp = collect_exprs(ctx, &block, instr, op, e1->operands[1].node, arg2))) - { - /* (y OPR (x OPL a)) OPR (x OPL b) -> y OPR (x OPL (a OPR b)) */ - arg1 = e1->operands[0].node; -@@ -1720,7 +1721,7 @@ bool hlsl_normalize_binary_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *inst - progress = true; - } - -- if (!progress && is_op_commutative(op) && e1 -+ if (!progress && is_op_commutative(op) && e1 && e1->op == op - && (tmp = collect_exprs(ctx, &block, instr, op, e1->operands[0].node, arg2))) - { - /* ((x OPL a) OPR y) OPR (x OPL b) -> (x OPL (a OPR b)) OPR y */ -@@ -1729,7 +1730,8 @@ bool hlsl_normalize_binary_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *inst - progress = true; - } - -- if (!progress && e2 && (tmp = collect_exprs(ctx, &block, instr, op, arg1, e2->operands[0].node))) -+ if (!progress && e2 && e2->op == op -+ && (tmp = collect_exprs(ctx, &block, instr, op, arg1, e2->operands[0].node))) - { - /* (x OPL a) OPR ((x OPL b) OPR y) -> (x OPL (a OPR b)) OPR y */ - arg1 = tmp; -@@ -1737,7 +1739,7 @@ bool hlsl_normalize_binary_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *inst - progress = true; - } - -- if (!progress && is_op_commutative(op) && e2 -+ if (!progress && is_op_commutative(op) && e2 && e2->op == op - && (tmp = collect_exprs(ctx, &block, instr, op, arg1, e2->operands[1].node))) - { - /* (x OPL a) OPR (y OPR (x OPL b)) -> (x OPL (a OPR b)) OPR y */ -diff --git a/libs/vkd3d/libs/vkd3d-shader/tpf.c b/libs/vkd3d/libs/vkd3d-shader/tpf.c -index 267f0884d83..f4525009f77 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/tpf.c -+++ b/libs/vkd3d/libs/vkd3d-shader/tpf.c -@@ -3462,12 +3462,16 @@ static void sm4_write_register_index(const struct tpf_compiler *tpf, const struc - unsigned int j) - { - unsigned int addressing = sm4_get_index_addressing_from_reg(reg, j); -+ const struct vkd3d_shader_register_index *idx = ®->idx[j]; - struct vkd3d_bytecode_buffer *buffer = tpf->buffer; - unsigned int k; - -+ if (!addressing || (addressing & VKD3D_SM4_ADDRESSING_OFFSET)) -+ put_u32(buffer, idx->offset); -+ - if (addressing & VKD3D_SM4_ADDRESSING_RELATIVE) - { -- const struct vkd3d_shader_src_param *idx_src = reg->idx[j].rel_addr; -+ const struct vkd3d_shader_src_param *idx_src = idx->rel_addr; - uint32_t idx_src_token; - - VKD3D_ASSERT(idx_src); -@@ -3482,10 +3486,6 @@ static void sm4_write_register_index(const struct tpf_compiler *tpf, const struc - VKD3D_ASSERT(!idx_src->reg.idx[k].rel_addr); - } - } -- else -- { -- put_u32(tpf->buffer, reg->idx[j].offset); -- } - } - - static void sm4_write_dst_register(const struct tpf_compiler *tpf, const struct vkd3d_shader_dst_param *dst) -diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -index eb50da28d24..8329bf169b0 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -+++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -@@ -169,6 +169,9 @@ enum vkd3d_shader_error - VKD3D_SHADER_ERROR_HLSL_MISPLACED_SAMPLER_STATE = 5039, - VKD3D_SHADER_ERROR_HLSL_AMBIGUOUS_CALL = 5040, - VKD3D_SHADER_ERROR_HLSL_DUPLICATE_PATCH = 5041, -+ VKD3D_SHADER_ERROR_HLSL_INVALID_MAX_VERTEX_COUNT = 5042, -+ VKD3D_SHADER_ERROR_HLSL_MISSING_PRIMITIVE_TYPE = 5043, -+ VKD3D_SHADER_ERROR_HLSL_MISPLACED_STREAM_OUTPUT = 5044, - - VKD3D_SHADER_WARNING_HLSL_IMPLICIT_TRUNCATION = 5300, - VKD3D_SHADER_WARNING_HLSL_DIVISION_BY_ZERO = 5301, --- -2.47.2 - diff --git a/patches/vkd3d-latest/0002-Updated-vkd3d-to-cbce3a8631116ec10895e6c9c4a00b89b05.patch b/patches/vkd3d-latest/0002-Updated-vkd3d-to-cbce3a8631116ec10895e6c9c4a00b89b05.patch new file mode 100644 index 00000000..68e3bf36 --- /dev/null +++ b/patches/vkd3d-latest/0002-Updated-vkd3d-to-cbce3a8631116ec10895e6c9c4a00b89b05.patch @@ -0,0 +1,214 @@ +From d682beb0dff9eff34d01f2e517145d17d561ef01 Mon Sep 17 00:00:00 2001 +From: Alistair Leslie-Hughes +Date: Thu, 10 Apr 2025 07:44:42 +1000 +Subject: [PATCH 2/2] Updated vkd3d to + cbce3a8631116ec10895e6c9c4a00b89b051f6b0. + +--- + libs/vkd3d/libs/vkd3d-shader/fx.c | 44 ++++++++++++++++----- + libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 10 ++++- + libs/vkd3d/libs/vkd3d-shader/msl.c | 2 +- + libs/vkd3d/libs/vkd3d-shader/tpf.c | 20 +++++++--- + 4 files changed, 58 insertions(+), 18 deletions(-) + +diff --git a/libs/vkd3d/libs/vkd3d-shader/fx.c b/libs/vkd3d/libs/vkd3d-shader/fx.c +index debcb261811..c93f01039ef 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/fx.c ++++ b/libs/vkd3d/libs/vkd3d-shader/fx.c +@@ -2420,6 +2420,23 @@ static inline enum hlsl_base_type hlsl_type_from_fx_type(enum state_property_com + } + } + ++static inline bool hlsl_type_state_compatible(struct hlsl_type *lhs, enum hlsl_base_type rhs) ++{ ++ if (!hlsl_is_numeric_type(lhs)) ++ return false; ++ switch (lhs->e.numeric.type) ++ { ++ case HLSL_TYPE_INT: ++ case HLSL_TYPE_UINT: ++ return rhs == HLSL_TYPE_INT || rhs == HLSL_TYPE_UINT; ++ ++ default: ++ return lhs->e.numeric.type == rhs; ++ } ++ ++ vkd3d_unreachable(); ++} ++ + static const struct rhs_named_value filter_values[] = + { + { "MIN_MAG_MIP_POINT", 0x00 }, +@@ -2664,9 +2681,9 @@ static void resolve_fx_4_state_block_values(struct hlsl_ir_var *var, struct hlsl + struct replace_state_context replace_context; + const struct fx_4_state *state = NULL; + struct hlsl_type *state_type = NULL; +- struct hlsl_ir_node *node, *cast; + struct hlsl_ctx *ctx = fx->ctx; + enum hlsl_base_type base_type; ++ struct hlsl_ir_node *node; + unsigned int i; + + if (type->class == HLSL_CLASS_BLEND_STATE && ctx->profile->major_version == 5) +@@ -2803,9 +2820,15 @@ static void resolve_fx_4_state_block_values(struct hlsl_ir_var *var, struct hlsl + if (state_type) + { + node = entry->args->node; +- if (!(cast = hlsl_new_cast(ctx, node, state_type, &var->loc))) +- return; +- list_add_after(&node->entry, &cast->entry); ++ if (state->type == FX_UINT8 || !hlsl_type_state_compatible(node->data_type, base_type)) ++ { ++ struct hlsl_ir_node *cast; ++ ++ if (!(cast = hlsl_new_cast(ctx, node, state_type, &var->loc))) ++ return; ++ list_add_after(&node->entry, &cast->entry); ++ node = cast; ++ } + + /* FX_UINT8 values are using 32-bits in the binary. Mask higher 24 bits for those. */ + if (state->type == FX_UINT8) +@@ -2814,15 +2837,18 @@ static void resolve_fx_4_state_block_values(struct hlsl_ir_var *var, struct hlsl + + if (!(mask = hlsl_new_uint_constant(ctx, 0xff, &var->loc))) + return; +- list_add_after(&cast->entry, &mask->entry); ++ list_add_after(&node->entry, &mask->entry); + +- if (!(cast = hlsl_new_binary_expr(ctx, HLSL_OP2_BIT_AND, cast, mask))) ++ if (!(node = hlsl_new_binary_expr(ctx, HLSL_OP2_BIT_AND, node, mask))) + return; +- list_add_after(&mask->entry, &cast->entry); ++ list_add_after(&mask->entry, &node->entry); + } + +- hlsl_src_remove(entry->args); +- hlsl_src_from_node(entry->args, cast); ++ if (node != entry->args->node) ++ { ++ hlsl_src_remove(entry->args); ++ hlsl_src_from_node(entry->args, node); ++ } + + hlsl_run_const_passes(ctx, entry->instrs); + } +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c +index ba56ba90403..dc7607a1393 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c +@@ -6407,7 +6407,9 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var + || semantic == VKD3D_SHADER_SV_PRIMITIVE_ID) + vip_allocation = true; + +- if (semantic == VKD3D_SHADER_SV_IS_FRONT_FACE || semantic == VKD3D_SHADER_SV_SAMPLE_INDEX) ++ if (semantic == VKD3D_SHADER_SV_IS_FRONT_FACE || semantic == VKD3D_SHADER_SV_SAMPLE_INDEX ++ || (version.type == VKD3D_SHADER_TYPE_DOMAIN && !output && !is_primitive) ++ || (ctx->is_patch_constant_func && output)) + special_interpolation = true; + } + +@@ -6443,6 +6445,8 @@ static void allocate_semantic_registers(struct hlsl_ctx *ctx, struct hlsl_ir_fun + bool is_pixel_shader = ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL; + struct hlsl_ir_var *var; + ++ in_prim_allocator.prioritize_smaller_writemasks = true; ++ patch_constant_out_patch_allocator.prioritize_smaller_writemasks = true; + input_allocator.prioritize_smaller_writemasks = true; + output_allocator.prioritize_smaller_writemasks = true; + +@@ -6470,6 +6474,8 @@ static void allocate_semantic_registers(struct hlsl_ctx *ctx, struct hlsl_ir_fun + allocate_semantic_register(ctx, var, &output_allocator, true, !is_pixel_shader); + } + ++ vkd3d_free(in_prim_allocator.allocations); ++ vkd3d_free(patch_constant_out_patch_allocator.allocations); + vkd3d_free(input_allocator.allocations); + vkd3d_free(output_allocator.allocations); + } +@@ -9770,7 +9776,7 @@ static void sm4_generate_vsir_instr_dcl_semantic(struct hlsl_ctx *ctx, struct vs + else + { + if (semantic == VKD3D_SHADER_SV_NONE || version->type == VKD3D_SHADER_TYPE_PIXEL +- || version->type == VKD3D_SHADER_TYPE_HULL) ++ || (version->type == VKD3D_SHADER_TYPE_HULL && !ctx->is_patch_constant_func)) + opcode = VKD3DSIH_DCL_OUTPUT; + else + opcode = VKD3DSIH_DCL_OUTPUT_SIV; +diff --git a/libs/vkd3d/libs/vkd3d-shader/msl.c b/libs/vkd3d/libs/vkd3d-shader/msl.c +index a5d952cd525..d477bfa1c1b 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/msl.c ++++ b/libs/vkd3d/libs/vkd3d-shader/msl.c +@@ -1292,7 +1292,7 @@ static int msl_generator_init(struct msl_generator *gen, struct vsir_program *pr + { + msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, + "Internal compiler error: Unhandled shader type %#x.", type); +- return VKD3D_ERROR_INVALID_SHADER; ++ gen->prefix = "unknown"; + } + gen->interface_info = vkd3d_find_struct(compile_info->next, INTERFACE_INFO); + +diff --git a/libs/vkd3d/libs/vkd3d-shader/tpf.c b/libs/vkd3d/libs/vkd3d-shader/tpf.c +index 23dab35a288..3be1d743acf 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/tpf.c ++++ b/libs/vkd3d/libs/vkd3d-shader/tpf.c +@@ -3116,8 +3116,12 @@ bool sm4_sysval_semantic_from_semantic_name(enum vkd3d_shader_sysval_semantic *s + {"sv_domainlocation", false, VKD3D_SHADER_TYPE_DOMAIN, ~0u}, + {"sv_position", false, VKD3D_SHADER_TYPE_DOMAIN, VKD3D_SHADER_SV_NONE}, + {"sv_primitiveid", false, VKD3D_SHADER_TYPE_DOMAIN, ~0u}, ++ {"sv_rendertargetarrayindex", false, VKD3D_SHADER_TYPE_DOMAIN, VKD3D_SHADER_SV_NONE}, ++ {"sv_viewportarrayindex", false, VKD3D_SHADER_TYPE_DOMAIN, VKD3D_SHADER_SV_NONE}, + + {"sv_position", true, VKD3D_SHADER_TYPE_DOMAIN, VKD3D_SHADER_SV_POSITION}, ++ {"sv_rendertargetarrayindex", true, VKD3D_SHADER_TYPE_DOMAIN, VKD3D_SHADER_SV_RENDER_TARGET_ARRAY_INDEX}, ++ {"sv_viewportarrayindex", true, VKD3D_SHADER_TYPE_DOMAIN, VKD3D_SHADER_SV_VIEWPORT_ARRAY_INDEX}, + + {"sv_primitiveid", false, VKD3D_SHADER_TYPE_GEOMETRY, VKD3D_SHADER_SV_PRIMITIVE_ID}, + {"sv_gsinstanceid", false, VKD3D_SHADER_TYPE_GEOMETRY, ~0u}, +@@ -3131,6 +3135,8 @@ bool sm4_sysval_semantic_from_semantic_name(enum vkd3d_shader_sysval_semantic *s + {"sv_primitiveid", false, VKD3D_SHADER_TYPE_HULL, ~0u}, + + {"sv_position", true, VKD3D_SHADER_TYPE_HULL, VKD3D_SHADER_SV_POSITION}, ++ {"sv_rendertargetarrayindex", true, VKD3D_SHADER_TYPE_HULL, VKD3D_SHADER_SV_RENDER_TARGET_ARRAY_INDEX}, ++ {"sv_viewportarrayindex", true, VKD3D_SHADER_TYPE_HULL, VKD3D_SHADER_SV_VIEWPORT_ARRAY_INDEX}, + + {"position", false, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SHADER_SV_POSITION}, + {"sv_position", false, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SHADER_SV_POSITION}, +@@ -3164,6 +3170,10 @@ bool sm4_sysval_semantic_from_semantic_name(enum vkd3d_shader_sysval_semantic *s + if (!ascii_strcasecmp(semantic_name, "sv_position") + || (semantic_compat_mapping && !ascii_strcasecmp(semantic_name, "position"))) + *sysval_semantic = VKD3D_SHADER_SV_POSITION; ++ else if (!ascii_strcasecmp(semantic_name, "sv_rendertargetarrayindex")) ++ *sysval_semantic = VKD3D_SHADER_SV_RENDER_TARGET_ARRAY_INDEX; ++ else if (!ascii_strcasecmp(semantic_name, "sv_viewportarrayindex")) ++ *sysval_semantic = VKD3D_SHADER_SV_VIEWPORT_ARRAY_INDEX; + else if (has_sv_prefix) + return false; + else +@@ -3179,11 +3189,6 @@ bool sm4_sysval_semantic_from_semantic_name(enum vkd3d_shader_sysval_semantic *s + return get_tessfactor_sysval_semantic(sysval_semantic, domain, semantic_idx); + if (!ascii_strcasecmp(semantic_name, "sv_insidetessfactor")) + return get_insidetessfactor_sysval_semantic(sysval_semantic, domain, semantic_idx); +- if (!ascii_strcasecmp(semantic_name, "sv_position")) +- { +- *sysval_semantic = VKD3D_SHADER_SV_NONE; +- return true; +- } + } + else + { +@@ -3214,7 +3219,10 @@ bool sm4_sysval_semantic_from_semantic_name(enum vkd3d_shader_sysval_semantic *s + && (semantic_compat_mapping || has_sv_prefix) + && version->type == semantics[i].shader_type) + { +- *sysval_semantic = semantics[i].semantic; ++ if (is_patch_constant_func && output && semantics[i].semantic != ~0u) ++ *sysval_semantic = VKD3D_SHADER_SV_NONE; ++ else ++ *sysval_semantic = semantics[i].semantic; + return true; + } + } +-- +2.47.2 + diff --git a/patches/vkd3d-latest/0003-Updated-vkd3d-to-e418bbcfac74230dfbf0c49b72c4f059cb1.patch b/patches/vkd3d-latest/0003-Updated-vkd3d-to-e418bbcfac74230dfbf0c49b72c4f059cb1.patch deleted file mode 100644 index f60d27b6..00000000 --- a/patches/vkd3d-latest/0003-Updated-vkd3d-to-e418bbcfac74230dfbf0c49b72c4f059cb1.patch +++ /dev/null @@ -1,831 +0,0 @@ -From 6d98fd430eead31dfd3e0c71e883d5225b0e1876 Mon Sep 17 00:00:00 2001 -From: Alistair Leslie-Hughes -Date: Tue, 18 Mar 2025 08:53:16 +1100 -Subject: [PATCH] Updated vkd3d to e418bbcfac74230dfbf0c49b72c4f059cb1afc00. - ---- - libs/vkd3d/libs/vkd3d-shader/fx.c | 415 ++++++++++++++---- - libs/vkd3d/libs/vkd3d-shader/glsl.c | 3 + - libs/vkd3d/libs/vkd3d-shader/ir.c | 29 ++ - libs/vkd3d/libs/vkd3d-shader/spirv.c | 41 +- - .../libs/vkd3d-shader/vkd3d_shader_main.c | 54 +++ - .../libs/vkd3d-shader/vkd3d_shader_private.h | 1 + - 6 files changed, 423 insertions(+), 120 deletions(-) - -diff --git a/libs/vkd3d/libs/vkd3d-shader/fx.c b/libs/vkd3d/libs/vkd3d-shader/fx.c -index 4fbce393707..914c365b8f6 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/fx.c -+++ b/libs/vkd3d/libs/vkd3d-shader/fx.c -@@ -25,6 +25,14 @@ static inline size_t put_u32_unaligned(struct vkd3d_bytecode_buffer *buffer, uin - return bytecode_put_bytes_unaligned(buffer, &value, sizeof(value)); - } - -+enum fx_2_type_constants -+{ -+ /* Assignment types */ -+ FX_2_ASSIGNMENT_CODE_BLOB = 0x0, -+ FX_2_ASSIGNMENT_PARAMETER = 0x1, -+ FX_2_ASSIGNMENT_ARRAY_SELECTOR = 0x2, -+}; -+ - enum state_property_component_type - { - FX_BOOL, -@@ -281,6 +289,15 @@ static void set_status(struct fx_write_context *fx, int status) - fx->status = status; - } - -+static void fx_print_string(struct vkd3d_string_buffer *buffer, const char *prefix, -+ const char *s, size_t len) -+{ -+ if (len) -+ --len; /* Trim terminating null. */ -+ vkd3d_string_buffer_printf(buffer, "%s", prefix); -+ vkd3d_string_buffer_print_string_escaped(buffer, s, len); -+} -+ - static uint32_t write_string(const char *string, struct fx_write_context *fx) - { - return fx->ops->write_string(string, fx); -@@ -725,6 +742,28 @@ static const struct rhs_named_value fx_2_lighttype_values[] = - { NULL } - }; - -+static const struct rhs_named_value fx_2_address_values[] = -+{ -+ { "WRAP", 1 }, -+ { "MIRROR", 2 }, -+ { "CLAMP", 3 }, -+ { "BORDER", 4 }, -+ { "MIRROR_ONCE", 5 }, -+ { NULL } -+}; -+ -+static const struct rhs_named_value fx_2_filter_values[] = -+{ -+ { "NONE", 0 }, -+ { "POINT", 1 }, -+ { "LINEAR", 2 }, -+ { "ANISOTROPIC", 3 }, -+ { "PYRAMIDALQUAD", 6 }, -+ { "GAUSSIANQUAD", 7 }, -+ { "CONVOLUTIONMONO", 8 }, -+ { NULL } -+}; -+ - static const struct fx_2_state - { - const char *name; -@@ -895,6 +934,38 @@ fx_2_states[] = - - { "VertexShader", HLSL_CLASS_SCALAR, FX_VERTEXSHADER, 1, 1, 146 }, - { "PixelShader", HLSL_CLASS_SCALAR, FX_PIXELSHADER, 1, 1, 147 }, -+ -+ { "VertexShaderConstantF", HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u-1, 148 }, -+ { "VertexShaderConstantB", HLSL_CLASS_SCALAR, FX_BOOL, 1, ~0u-1, 149 }, -+ { "VertexShaderConstantI", HLSL_CLASS_SCALAR, FX_UINT, 1, ~0u-1, 150 }, -+ { "VertexShaderConstant", HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u-1, 151 }, -+ { "VertexShaderConstant1", HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u-1, 152 }, -+ { "VertexShaderConstant2", HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u-1, 153 }, -+ { "VertexShaderConstant3", HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u-1, 154 }, -+ { "VertexShaderConstant4", HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u-1, 155 }, -+ -+ { "PixelShaderConstantF", HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u-1, 156 }, -+ { "PixelShaderConstantB", HLSL_CLASS_SCALAR, FX_BOOL, 1, ~0u-1, 157 }, -+ { "PixelShaderConstantI", HLSL_CLASS_SCALAR, FX_UINT, 1, ~0u-1, 158 }, -+ { "PixelShaderConstant", HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u-1, 159 }, -+ { "PixelShaderConstant1", HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u-1, 160 }, -+ { "PixelShaderConstant2", HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u-1, 161 }, -+ { "PixelShaderConstant3", HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u-1, 162 }, -+ { "PixelShaderConstant4", HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u-1, 163 }, -+ -+ { "Texture", HLSL_CLASS_SCALAR, FX_TEXTURE, 1, 1, 164 }, -+ { "AddressU", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 165, fx_2_address_values }, -+ { "AddressV", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 166, fx_2_address_values }, -+ { "AddressW", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 167, fx_2_address_values }, -+ { "BorderColor", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 168 }, -+ { "MagFilter", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 169, fx_2_filter_values }, -+ { "MinFilter", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 170, fx_2_filter_values }, -+ { "MipFilter", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 171, fx_2_filter_values }, -+ { "MipMapLodBias", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 172 }, -+ { "MaxMipLevel", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 173 }, -+ { "MaxAnisotropy", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 174 }, -+ { "SRBTexture", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 175 }, -+ { "ElementIndex", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 176 }, - }; - - static void write_fx_2_pass(struct hlsl_ir_var *var, struct fx_write_context *fx) -@@ -3452,13 +3523,12 @@ static void parse_fx_print_indent(struct fx_parser *parser) - vkd3d_string_buffer_printf(&parser->buffer, "%*s", 4 * parser->indent, ""); - } - --static const char *fx_2_get_string(struct fx_parser *parser, uint32_t offset) -+static const char *fx_2_get_string(struct fx_parser *parser, uint32_t offset, uint32_t *size) - { - const char *ptr; -- uint32_t size; - -- fx_parser_read_unstructured(parser, &size, offset, sizeof(size)); -- ptr = fx_parser_get_unstructured_ptr(parser, offset + 4, size); -+ fx_parser_read_unstructured(parser, size, offset, sizeof(*size)); -+ ptr = fx_parser_get_unstructured_ptr(parser, offset + 4, *size); - - if (!ptr) - { -@@ -3624,13 +3694,14 @@ static void fx_parse_fx_2_parameter(struct fx_parser *parser, uint32_t offset) - uint32_t element_count; - } var; - const char *name; -+ uint32_t size; - - fx_parser_read_unstructured(parser, &var, offset, sizeof(var)); - - fx_parse_fx_2_type(parser, offset); - -- name = fx_2_get_string(parser, var.name); -- vkd3d_string_buffer_printf(&parser->buffer, " %s", name); -+ name = fx_2_get_string(parser, var.name, &size); -+ fx_print_string(&parser->buffer, " ", name, size); - if (var.element_count) - vkd3d_string_buffer_printf(&parser->buffer, "[%u]", var.element_count); - } -@@ -3644,6 +3715,40 @@ static bool is_fx_2_sampler(uint32_t type) - || type == D3DXPT_SAMPLERCUBE; - } - -+static void fx_parse_fx_2_assignment(struct fx_parser *parser, const struct fx_assignment *entry); -+ -+static void parse_fx_2_sampler(struct fx_parser *parser, uint32_t element_count, -+ uint32_t offset) -+{ -+ struct fx_assignment entry; -+ uint32_t count; -+ -+ element_count = max(element_count, 1); -+ -+ vkd3d_string_buffer_printf(&parser->buffer, "\n"); -+ for (uint32_t i = 0; i < element_count; ++i) -+ { -+ fx_parser_read_unstructured(parser, &count, offset, sizeof(count)); -+ offset += sizeof(count); -+ -+ parse_fx_start_indent(parser); -+ parse_fx_print_indent(parser); -+ vkd3d_string_buffer_printf(&parser->buffer, "{\n"); -+ parse_fx_start_indent(parser); -+ for (uint32_t j = 0; j < count; ++j, offset += sizeof(entry)) -+ { -+ fx_parser_read_unstructured(parser, &entry, offset, sizeof(entry)); -+ -+ parse_fx_print_indent(parser); -+ fx_parse_fx_2_assignment(parser, &entry); -+ } -+ parse_fx_end_indent(parser); -+ parse_fx_print_indent(parser); -+ vkd3d_string_buffer_printf(&parser->buffer, "},\n"); -+ parse_fx_end_indent(parser); -+ } -+} -+ - static void fx_parse_fx_2_initial_value(struct fx_parser *parser, uint32_t param, uint32_t value) - { - struct fx_2_var -@@ -3672,8 +3777,7 @@ static void fx_parse_fx_2_initial_value(struct fx_parser *parser, uint32_t param - if (var.class == D3DXPC_OBJECT) - { - if (is_fx_2_sampler(var.type)) -- fx_parser_error(parser, VKD3D_SHADER_ERROR_FX_NOT_IMPLEMENTED, -- "Parsing sampler initializers is not supported."); -+ parse_fx_2_sampler(parser, var.element_count, value); - else - parse_fx_2_object_value(parser, var.element_count, var.type, value); - } -@@ -3714,24 +3818,22 @@ static void fx_parse_fx_2_annotations(struct fx_parser *parser, uint32_t count) - vkd3d_string_buffer_printf(&parser->buffer, ">"); - } - --static void fx_parse_fx_2_assignment(struct fx_parser *parser) -+static void fx_parse_fx_2_assignment(struct fx_parser *parser, const struct fx_assignment *entry) - { - const struct rhs_named_value *named_value = NULL; - const struct fx_2_state *state = NULL; -- struct fx_assignment entry; - -- fx_parser_read_u32s(parser, &entry, sizeof(entry)); -- if (entry.id <= ARRAY_SIZE(fx_2_states)) -+ if (entry->id <= ARRAY_SIZE(fx_2_states)) - { -- state = &fx_2_states[entry.id]; -+ state = &fx_2_states[entry->id]; - - vkd3d_string_buffer_printf(&parser->buffer, "%s", state->name); - if (state->array_size > 1) -- vkd3d_string_buffer_printf(&parser->buffer, "[%u]", entry.lhs_index); -+ vkd3d_string_buffer_printf(&parser->buffer, "[%u]", entry->lhs_index); - } - else - { -- vkd3d_string_buffer_printf(&parser->buffer, "", entry.id); -+ vkd3d_string_buffer_printf(&parser->buffer, "", entry->id); - } - vkd3d_string_buffer_printf(&parser->buffer, " = "); - -@@ -3740,7 +3842,7 @@ static void fx_parse_fx_2_assignment(struct fx_parser *parser) - const struct rhs_named_value *ptr = state->values; - uint32_t value; - -- fx_parser_read_unstructured(parser, &value, entry.value, sizeof(value)); -+ fx_parser_read_unstructured(parser, &value, entry->value, sizeof(value)); - - while (ptr->name) - { -@@ -3757,13 +3859,27 @@ static void fx_parse_fx_2_assignment(struct fx_parser *parser) - { - vkd3d_string_buffer_printf(&parser->buffer, "%s /* %u */", named_value->name, named_value->value); - } -- else if (state && (state->type == FX_UINT || state->type == FX_FLOAT)) -+ else if (state) - { -- uint32_t offset = entry.type; -- unsigned int size; -+ if (state->type == FX_UINT || state->type == FX_FLOAT) -+ { -+ uint32_t offset = entry->type; -+ unsigned int size; - -- size = fx_get_fx_2_type_size(parser, &offset); -- parse_fx_2_numeric_value(parser, entry.value, size, entry.type); -+ size = fx_get_fx_2_type_size(parser, &offset); -+ parse_fx_2_numeric_value(parser, entry->value, size, entry->type); -+ } -+ else if (state->type == FX_VERTEXSHADER || state->type == FX_PIXELSHADER) -+ { -+ uint32_t id; -+ -+ fx_parser_read_unstructured(parser, &id, entry->value, sizeof(id)); -+ vkd3d_string_buffer_printf(&parser->buffer, "", id); -+ } -+ else -+ { -+ vkd3d_string_buffer_printf(&parser->buffer, ""); -+ } - } - else - { -@@ -3787,16 +3903,17 @@ static void fx_parse_fx_2_technique(struct fx_parser *parser) - uint32_t assignment_count; - } pass; - const char *name; -+ uint32_t size; - - if (parser->failed) - return; - - fx_parser_read_u32s(parser, &technique, sizeof(technique)); - -- name = fx_2_get_string(parser, technique.name); -+ name = fx_2_get_string(parser, technique.name, &size); - - parse_fx_print_indent(parser); -- vkd3d_string_buffer_printf(&parser->buffer, "technique %s", name); -+ fx_print_string(&parser->buffer, "technique ", name, size); - fx_parse_fx_2_annotations(parser, technique.annotation_count); - - vkd3d_string_buffer_printf(&parser->buffer, "\n"); -@@ -3807,10 +3924,10 @@ static void fx_parse_fx_2_technique(struct fx_parser *parser) - for (uint32_t i = 0; i < technique.pass_count; ++i) - { - fx_parser_read_u32s(parser, &pass, sizeof(pass)); -- name = fx_2_get_string(parser, pass.name); -+ name = fx_2_get_string(parser, pass.name, &size); - - parse_fx_print_indent(parser); -- vkd3d_string_buffer_printf(&parser->buffer, "pass %s", name); -+ fx_print_string(&parser->buffer, "pass ", name, size); - fx_parse_fx_2_annotations(parser, pass.annotation_count); - - vkd3d_string_buffer_printf(&parser->buffer, "\n"); -@@ -3820,8 +3937,11 @@ static void fx_parse_fx_2_technique(struct fx_parser *parser) - parse_fx_start_indent(parser); - for (uint32_t j = 0; j < pass.assignment_count; ++j) - { -+ struct fx_assignment entry; -+ - parse_fx_print_indent(parser); -- fx_parse_fx_2_assignment(parser); -+ fx_parser_read_u32s(parser, &entry, sizeof(entry)); -+ fx_parse_fx_2_assignment(parser, &entry); - } - parse_fx_end_indent(parser); - -@@ -3858,10 +3978,62 @@ static void fx_2_parse_parameters(struct fx_parser *parser, uint32_t count) - vkd3d_string_buffer_printf(&parser->buffer, "\n"); - } - -+static void fx_parse_shader_blob(struct fx_parser *parser, enum vkd3d_shader_source_type source_type, -+ const void *data, uint32_t data_size) -+{ -+ struct vkd3d_shader_compile_info info = { 0 }; -+ struct vkd3d_shader_code output; -+ const char *p, *q, *end; -+ int ret; -+ -+ static const struct vkd3d_shader_compile_option options[] = -+ { -+ {VKD3D_SHADER_COMPILE_OPTION_API_VERSION, VKD3D_SHADER_API_VERSION_1_15}, -+ }; -+ -+ info.type = VKD3D_SHADER_STRUCTURE_TYPE_COMPILE_INFO; -+ info.source.code = data; -+ info.source.size = data_size; -+ info.source_type = source_type; -+ info.target_type = VKD3D_SHADER_TARGET_D3D_ASM; -+ info.options = options; -+ info.option_count = ARRAY_SIZE(options); -+ info.log_level = VKD3D_SHADER_LOG_INFO; -+ -+ if ((ret = vkd3d_shader_compile(&info, &output, NULL)) < 0) -+ { -+ fx_parser_error(parser, VKD3D_SHADER_ERROR_FX_INVALID_DATA, -+ "Failed to disassemble shader blob."); -+ return; -+ } -+ parse_fx_print_indent(parser); -+ vkd3d_string_buffer_printf(&parser->buffer, "asm {\n"); -+ -+ parse_fx_start_indent(parser); -+ -+ end = (const char *)output.code + output.size; -+ for (p = output.code; p < end; p = q) -+ { -+ if (!(q = memchr(p, '\n', end - p))) -+ q = end; -+ else -+ ++q; -+ -+ parse_fx_print_indent(parser); -+ vkd3d_string_buffer_printf(&parser->buffer, "%.*s", (int)(q - p), p); -+ } -+ -+ parse_fx_end_indent(parser); -+ parse_fx_print_indent(parser); -+ vkd3d_string_buffer_printf(&parser->buffer, "}"); -+ -+ vkd3d_shader_free_shader_code(&output); -+} -+ - static void fx_parse_fx_2_data_blob(struct fx_parser *parser) - { - uint32_t id, size; -- const char *str; -+ const void *data; - - id = fx_parser_read_u32(parser); - size = fx_parser_read_u32(parser); -@@ -3882,13 +4054,24 @@ static void fx_parse_fx_2_data_blob(struct fx_parser *parser) - case D3DXPT_VERTEXSHADER: - vkd3d_string_buffer_printf(&parser->buffer, "%s object %u size %u bytes%s\n", - fx_2_types[type], id, size, size ? ":" : ","); -- if (size && type == D3DXPT_STRING) -+ -+ if (size) - { -- parse_fx_start_indent(parser); -- parse_fx_print_indent(parser); -- str = fx_parser_get_ptr(parser, size); -- vkd3d_string_buffer_printf(&parser->buffer, "\"%.*s\"\n", size, str); -- parse_fx_end_indent(parser); -+ data = fx_parser_get_ptr(parser, size); -+ -+ if (type == D3DXPT_STRING) -+ { -+ parse_fx_start_indent(parser); -+ parse_fx_print_indent(parser); -+ fx_print_string(&parser->buffer, "\"", (const char *)data, size); -+ vkd3d_string_buffer_printf(&parser->buffer, "\""); -+ parse_fx_end_indent(parser); -+ } -+ else if (type == D3DXPT_PIXELSHADER || type == D3DXPT_VERTEXSHADER) -+ { -+ fx_parse_shader_blob(parser, VKD3D_SHADER_SOURCE_D3D_BYTECODE, data, size); -+ } -+ vkd3d_string_buffer_printf(&parser->buffer, "\n"); - } - break; - default: -@@ -3903,9 +4086,117 @@ static void fx_parse_fx_2_data_blob(struct fx_parser *parser) - fx_parser_skip(parser, align(size, 4)); - } - -+static void fx_dump_blob(struct fx_parser *parser, const void *blob, uint32_t size) -+{ -+ const uint32_t *data = blob; -+ unsigned int i, j, n; -+ -+ size /= sizeof(*data); -+ i = 0; -+ while (i < size) -+ { -+ parse_fx_print_indent(parser); -+ n = min(size - i, 8); -+ for (j = 0; j < n; ++j) -+ vkd3d_string_buffer_printf(&parser->buffer, "0x%08x,", data[i + j]); -+ i += n; -+ vkd3d_string_buffer_printf(&parser->buffer, "\n"); -+ } -+} -+ -+static void fx_parse_fx_2_array_selector(struct fx_parser *parser, uint32_t size) -+{ -+ const uint8_t *end = parser->ptr + size; -+ uint32_t name_size, blob_size = 0; -+ const void *blob = NULL; -+ const char *name; -+ -+ name_size = fx_parser_read_u32(parser); -+ name = fx_parser_get_ptr(parser, name_size); -+ fx_parser_skip(parser, name_size); -+ -+ if (!name || (uint8_t *)name >= end) -+ fx_parser_error(parser, VKD3D_SHADER_ERROR_FX_INVALID_DATA, -+ "Malformed name entry in the array selector."); -+ -+ if (parser->ptr <= end) -+ { -+ blob_size = end - parser->ptr; -+ blob = fx_parser_get_ptr(parser, blob_size); -+ fx_parser_skip(parser, blob_size); -+ } -+ else -+ { -+ fx_parser_error(parser, VKD3D_SHADER_ERROR_FX_INVALID_DATA, -+ "Malformed blob entry in the array selector."); -+ } -+ -+ if (name) -+ { -+ fx_print_string(&parser->buffer, "array \"", name, name_size); -+ vkd3d_string_buffer_printf(&parser->buffer, "\"\n"); -+ } -+ if (blob) -+ { -+ parse_fx_print_indent(parser); -+ vkd3d_string_buffer_printf(&parser->buffer, "selector blob size %u\n", blob_size); -+ fx_dump_blob(parser, blob, blob_size); -+ } -+} -+ -+static void fx_parse_fx_2_complex_state(struct fx_parser *parser) -+{ -+ struct -+ { -+ uint32_t technique; -+ uint32_t index; -+ uint32_t element; -+ uint32_t state; -+ uint32_t assignment_type; -+ } state; -+ const char *data; -+ uint32_t size; -+ -+ fx_parser_read_u32s(parser, &state, sizeof(state)); -+ -+ if (state.technique == ~0u) -+ { -+ vkd3d_string_buffer_printf(&parser->buffer, "parameter %u[%u], state %u =\n", -+ state.index, state.element, state.state); -+ } -+ else -+ { -+ vkd3d_string_buffer_printf(&parser->buffer, "technique %u, pass %u, state %u =\n", -+ state.technique, state.index, state.state); -+ } -+ -+ size = fx_parser_read_u32(parser); -+ -+ parse_fx_print_indent(parser); -+ -+ if (state.assignment_type == FX_2_ASSIGNMENT_PARAMETER) -+ { -+ data = fx_parser_get_ptr(parser, size); -+ fx_print_string(&parser->buffer, "parameter \"", data, size); -+ vkd3d_string_buffer_printf(&parser->buffer, "\"\n"); -+ fx_parser_skip(parser, align(size, 4)); -+ } -+ else if (state.assignment_type == FX_2_ASSIGNMENT_ARRAY_SELECTOR) -+ { -+ fx_parse_fx_2_array_selector(parser, size); -+ } -+ else -+ { -+ vkd3d_string_buffer_printf(&parser->buffer, "blob size %u\n", size); -+ data = fx_parser_get_ptr(parser, size); -+ fx_dump_blob(parser, data, size); -+ fx_parser_skip(parser, align(size, 4)); -+ } -+} -+ - static void fx_2_parse(struct fx_parser *parser) - { -- uint32_t i, size, parameter_count, technique_count, blob_count; -+ uint32_t i, size, parameter_count, technique_count, blob_count, state_count; - - fx_parser_skip(parser, sizeof(uint32_t)); /* Version */ - size = fx_parser_read_u32(parser); -@@ -3931,13 +4222,20 @@ static void fx_2_parse(struct fx_parser *parser) - fx_parse_fx_2_technique(parser); - - blob_count = fx_parser_read_u32(parser); -- fx_parser_read_u32(parser); /* Resource count */ -+ state_count = fx_parser_read_u32(parser); - - vkd3d_string_buffer_printf(&parser->buffer, "object data {\n"); - parse_fx_start_indent(parser); - for (i = 0; i < blob_count; ++i) - fx_parse_fx_2_data_blob(parser); - parse_fx_end_indent(parser); -+ vkd3d_string_buffer_printf(&parser->buffer, "}\n\n"); -+ -+ vkd3d_string_buffer_printf(&parser->buffer, "state data {\n"); -+ parse_fx_start_indent(parser); -+ for (i = 0; i < state_count; ++i) -+ fx_parse_fx_2_complex_state(parser); -+ parse_fx_end_indent(parser); - vkd3d_string_buffer_printf(&parser->buffer, "}\n"); - } - -@@ -4157,17 +4455,8 @@ static void fx_parse_buffers(struct fx_parser *parser) - - static void fx_4_parse_shader_blob(struct fx_parser *parser, unsigned int object_type, const struct fx_5_shader *shader) - { -- struct vkd3d_shader_compile_info info = { 0 }; -- struct vkd3d_shader_code output; - const void *data = NULL; -- const char *p, *q, *end; - uint32_t data_size; -- int ret; -- -- static const struct vkd3d_shader_compile_option options[] = -- { -- {VKD3D_SHADER_COMPILE_OPTION_API_VERSION, VKD3D_SHADER_API_VERSION_1_15}, -- }; - - if (!shader->offset) - { -@@ -4183,42 +4472,8 @@ static void fx_4_parse_shader_blob(struct fx_parser *parser, unsigned int object - if (!data) - return; - -- info.type = VKD3D_SHADER_STRUCTURE_TYPE_COMPILE_INFO; -- info.source.code = data; -- info.source.size = data_size; -- info.source_type = VKD3D_SHADER_SOURCE_DXBC_TPF; -- info.target_type = VKD3D_SHADER_TARGET_D3D_ASM; -- info.options = options; -- info.option_count = ARRAY_SIZE(options); -- info.log_level = VKD3D_SHADER_LOG_INFO; -- -- if ((ret = vkd3d_shader_compile(&info, &output, NULL)) < 0) -- { -- fx_parser_error(parser, VKD3D_SHADER_ERROR_FX_INVALID_DATA, -- "Failed to disassemble shader blob."); -- return; -- } -- parse_fx_print_indent(parser); -- vkd3d_string_buffer_printf(&parser->buffer, "asm {\n"); -- -- parse_fx_start_indent(parser); -- -- end = (const char *)output.code + output.size; -- for (p = output.code; p < end; p = q) -- { -- if (!(q = memchr(p, '\n', end - p))) -- q = end; -- else -- ++q; -- -- parse_fx_print_indent(parser); -- vkd3d_string_buffer_printf(&parser->buffer, "%.*s", (int)(q - p), p); -- } -- -- parse_fx_end_indent(parser); -+ fx_parse_shader_blob(parser, VKD3D_SHADER_SOURCE_DXBC_TPF, data, data_size); - -- parse_fx_print_indent(parser); -- vkd3d_string_buffer_printf(&parser->buffer, "}"); - if (object_type == FX_4_OBJECT_TYPE_GEOMETRY_SHADER_SO && shader->sodecl[0]) - { - vkd3d_string_buffer_printf(&parser->buffer, "\n/* Stream output declaration: \"%s\" */", -@@ -4235,8 +4490,6 @@ static void fx_4_parse_shader_blob(struct fx_parser *parser, unsigned int object - if (shader->sodecl_count) - vkd3d_string_buffer_printf(&parser->buffer, "\n/* Rasterized stream %u */", shader->rast_stream); - } -- -- vkd3d_shader_free_shader_code(&output); - } - - static void fx_4_parse_shader_initializer(struct fx_parser *parser, unsigned int object_type) -diff --git a/libs/vkd3d/libs/vkd3d-shader/glsl.c b/libs/vkd3d/libs/vkd3d-shader/glsl.c -index a47c2feb094..801de6a5954 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/glsl.c -+++ b/libs/vkd3d/libs/vkd3d-shader/glsl.c -@@ -1657,6 +1657,9 @@ static void vkd3d_glsl_handle_instruction(struct vkd3d_glsl_generator *gen, - case VKD3DSIH_SWITCH: - shader_glsl_switch(gen, ins); - break; -+ case VKD3DSIH_XOR: -+ shader_glsl_binop(gen, ins, "^"); -+ break; - default: - shader_glsl_unhandled(gen, ins); - break; -diff --git a/libs/vkd3d/libs/vkd3d-shader/ir.c b/libs/vkd3d/libs/vkd3d-shader/ir.c -index 41aa99fbc09..2e3040f038e 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/ir.c -+++ b/libs/vkd3d/libs/vkd3d-shader/ir.c -@@ -7706,6 +7706,29 @@ static void vsir_validate_label_register(struct validation_context *ctx, - reg->idx[0].offset, ctx->program->block_count); - } - -+static void vsir_validate_constbuffer_register(struct validation_context *ctx, -+ const struct vkd3d_shader_register *reg) -+{ -+ if (reg->precision != VKD3D_SHADER_REGISTER_PRECISION_DEFAULT) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_PRECISION, -+ "Invalid precision %#x for a CONSTBUFFER register.", reg->precision); -+ -+ if (reg->dimension != VSIR_DIMENSION_VEC4) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION, -+ "Invalid dimension %#x for a CONSTBUFFER register.", reg->dimension); -+ -+ if (reg->idx_count != 3) -+ { -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, -+ "Invalid index count %u for a CONSTBUFFER register.", reg->idx_count); -+ return; -+ } -+ -+ if (reg->idx[0].rel_addr) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, -+ "Non-NULL relative address for a CONSTBUFFER register ID."); -+} -+ - static void vsir_validate_sampler_register(struct validation_context *ctx, - const struct vkd3d_shader_register *reg) - { -@@ -7943,6 +7966,10 @@ static void vsir_validate_register(struct validation_context *ctx, - vsir_validate_register_without_indices(ctx, reg); - break; - -+ case VKD3DSPR_CONSTBUFFER: -+ vsir_validate_constbuffer_register(ctx, reg); -+ break; -+ - case VKD3DSPR_PRIMID: - vsir_validate_register_without_indices(ctx, reg); - break; -@@ -8130,6 +8157,8 @@ static void vsir_validate_dst_param(struct validation_context *ctx, - - case VKD3DSPR_IMMCONST: - case VKD3DSPR_IMMCONST64: -+ case VKD3DSPR_CONSTBUFFER: -+ case VKD3DSPR_IMMCONSTBUFFER: - case VKD3DSPR_SAMPLER: - case VKD3DSPR_RESOURCE: - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, -diff --git a/libs/vkd3d/libs/vkd3d-shader/spirv.c b/libs/vkd3d/libs/vkd3d-shader/spirv.c -index 66a4a274f17..45140d44595 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/spirv.c -+++ b/libs/vkd3d/libs/vkd3d-shader/spirv.c -@@ -348,49 +348,12 @@ static void spirv_parser_print_instruction_offset(struct spirv_parser *parser, - parser->colours.comment, offset * sizeof(uint32_t), parser->colours.reset, suffix); - } - --static char get_escape_char(char c) --{ -- switch (c) -- { -- case '"': -- case '\\': -- return c; -- case '\t': -- return 't'; -- case '\n': -- return 'n'; -- case '\v': -- return 'v'; -- case '\f': -- return 'f'; -- case '\r': -- return 'r'; -- default: -- return 0; -- } --} -- - static void spirv_parser_print_string_literal(struct spirv_parser *parser, struct vkd3d_string_buffer *buffer, - const char *prefix, const char *s, size_t len, const char *suffix) - { -- size_t start, i; -- char c; -- - vkd3d_string_buffer_printf(buffer, "%s\"%s", prefix, parser->colours.literal); -- for (i = 0, start = 0; i < len; ++i) -- { -- if ((c = get_escape_char(s[i]))) -- { -- vkd3d_string_buffer_printf(buffer, "%.*s\\%c", (int)(i - start), &s[start], c); -- start = i + 1; -- } -- else if (!isprint(s[i])) -- { -- vkd3d_string_buffer_printf(buffer, "%.*s\\%03o", (int)(i - start), &s[start], (uint8_t)s[i]); -- start = i + 1; -- } -- } -- vkd3d_string_buffer_printf(buffer, "%.*s%s\"%s", (int)(len - start), &s[start], parser->colours.reset, suffix); -+ vkd3d_string_buffer_print_string_escaped(buffer, s, len); -+ vkd3d_string_buffer_printf(buffer, "%s\"%s", parser->colours.reset, suffix); - } - - static const struct spirv_parser_enumerant *spirv_parser_get_enumerant( -diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c -index c990b496545..2a66cbdb1be 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c -+++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c -@@ -163,6 +163,60 @@ int vkd3d_string_buffer_print_f64(struct vkd3d_string_buffer *buffer, double d) - return ret; - } - -+static char get_escape_char(char c) -+{ -+ switch (c) -+ { -+ case '"': -+ case '\\': -+ return c; -+ case '\t': -+ return 't'; -+ case '\n': -+ return 'n'; -+ case '\v': -+ return 'v'; -+ case '\f': -+ return 'f'; -+ case '\r': -+ return 'r'; -+ default: -+ return 0; -+ } -+} -+ -+int vkd3d_string_buffer_print_string_escaped(struct vkd3d_string_buffer *buffer, const char *s, size_t len) -+{ -+ size_t content_size, start, i; -+ int ret; -+ char c; -+ -+ content_size = buffer->content_size; -+ for (i = 0, start = 0; i < len; ++i) -+ { -+ if ((c = get_escape_char(s[i]))) -+ { -+ if ((ret = vkd3d_string_buffer_printf(buffer, "%.*s\\%c", (int)(i - start), &s[start], c)) < 0) -+ goto fail; -+ start = i + 1; -+ } -+ else if (!isprint(s[i])) -+ { -+ if ((ret = vkd3d_string_buffer_printf(buffer, "%.*s\\%03o", -+ (int)(i - start), &s[start], (uint8_t)s[i])) < 0) -+ goto fail; -+ start = i + 1; -+ } -+ } -+ if ((ret = vkd3d_string_buffer_printf(buffer, "%.*s", (int)(len - start), &s[start])) < 0) -+ goto fail; -+ return ret; -+ -+fail: -+ buffer->content_size = content_size; -+ return ret; -+} -+ - void vkd3d_string_buffer_trace_(const struct vkd3d_string_buffer *buffer, const char *function) - { - vkd3d_shader_trace_text_(buffer->buffer, buffer->content_size, function); -diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -index 8329bf169b0..3a1b8d8bb64 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -+++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -@@ -1565,6 +1565,7 @@ void vkd3d_string_buffer_clear(struct vkd3d_string_buffer *buffer); - void vkd3d_string_buffer_truncate(struct vkd3d_string_buffer *buffer, size_t size); - int vkd3d_string_buffer_print_f32(struct vkd3d_string_buffer *buffer, float f); - int vkd3d_string_buffer_print_f64(struct vkd3d_string_buffer *buffer, double d); -+int vkd3d_string_buffer_print_string_escaped(struct vkd3d_string_buffer *buffer, const char *s, size_t len); - int vkd3d_string_buffer_printf(struct vkd3d_string_buffer *buffer, const char *format, ...) VKD3D_PRINTF_FUNC(2, 3); - void vkd3d_string_buffer_release(struct vkd3d_string_buffer_cache *list, struct vkd3d_string_buffer *buffer); - #define vkd3d_string_buffer_trace(buffer) \ --- -2.47.2 - diff --git a/patches/vkd3d-latest/0004-Updated-vkd3d-to-110edf32d0b2a2f0a49cdd76c977b9eedd0.patch b/patches/vkd3d-latest/0004-Updated-vkd3d-to-110edf32d0b2a2f0a49cdd76c977b9eedd0.patch deleted file mode 100644 index 1d0dd619..00000000 --- a/patches/vkd3d-latest/0004-Updated-vkd3d-to-110edf32d0b2a2f0a49cdd76c977b9eedd0.patch +++ /dev/null @@ -1,1534 +0,0 @@ -From 19e486ddd73608536af85ffd79aafb2b99a52bc2 Mon Sep 17 00:00:00 2001 -From: Alistair Leslie-Hughes -Date: Thu, 20 Mar 2025 06:34:08 +1100 -Subject: [PATCH] Updated vkd3d to 110edf32d0b2a2f0a49cdd76c977b9eedd06628e. - ---- - libs/vkd3d/libs/vkd3d-shader/d3d_asm.c | 1 + - libs/vkd3d/libs/vkd3d-shader/dxil.c | 4 + - libs/vkd3d/libs/vkd3d-shader/fx.c | 6 + - libs/vkd3d/libs/vkd3d-shader/glsl.c | 12 +- - libs/vkd3d/libs/vkd3d-shader/hlsl.h | 1 + - libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 236 ++++++++++++------ - libs/vkd3d/libs/vkd3d-shader/ir.c | 1 + - libs/vkd3d/libs/vkd3d-shader/msl.c | 14 +- - libs/vkd3d/libs/vkd3d-shader/spirv.c | 73 ++---- - libs/vkd3d/libs/vkd3d-shader/tpf.c | 93 ++++++- - .../libs/vkd3d-shader/vkd3d_shader_main.c | 51 ++-- - .../libs/vkd3d-shader/vkd3d_shader_private.h | 57 +++-- - 12 files changed, 346 insertions(+), 203 deletions(-) - -diff --git a/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c -index 7b69535a445..764f0888490 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c -+++ b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c -@@ -2104,6 +2104,7 @@ static const char *get_semantic_register_name(enum vkd3d_shader_sysval_semantic - { - switch (semantic) - { -+ case VKD3D_SHADER_SV_PRIMITIVE_ID: return "primID"; - case VKD3D_SHADER_SV_DEPTH: return "oDepth"; - case VKD3D_SHADER_SV_DEPTH_GREATER_EQUAL: return "oDepthGE"; - case VKD3D_SHADER_SV_DEPTH_LESS_EQUAL: return "oDepthLE"; -diff --git a/libs/vkd3d/libs/vkd3d-shader/dxil.c b/libs/vkd3d/libs/vkd3d-shader/dxil.c -index 29e714ee2ac..ac4828d6f59 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/dxil.c -+++ b/libs/vkd3d/libs/vkd3d-shader/dxil.c -@@ -9963,6 +9963,7 @@ static void sm6_parser_gs_properties_init(struct sm6_parser *sm6, const struct s - { - input_primitive = VKD3D_PT_PATCH; - patch_vertex_count = i - INPUT_PRIMITIVE_PATCH1 + 1; -+ input_control_point_count = patch_vertex_count; - break; - } - -@@ -9973,6 +9974,7 @@ static void sm6_parser_gs_properties_init(struct sm6_parser *sm6, const struct s - } - - sm6_parser_emit_dcl_primitive_topology(sm6, VKD3DSIH_DCL_INPUT_PRIMITIVE, input_primitive, patch_vertex_count); -+ sm6->p.program->input_primitive = input_primitive; - sm6->p.program->input_control_point_count = input_control_point_count; - - i = operands[1]; -@@ -9984,6 +9986,7 @@ static void sm6_parser_gs_properties_init(struct sm6_parser *sm6, const struct s - "Geometry shader output vertex count %u is invalid.", i); - } - sm6_parser_emit_dcl_count(sm6, VKD3DSIH_DCL_VERTICES_OUT, i); -+ sm6->p.program->vertices_out_count = i; - - if (operands[2] > 1) - { -@@ -10001,6 +10004,7 @@ static void sm6_parser_gs_properties_init(struct sm6_parser *sm6, const struct s - output_primitive = VKD3D_PT_TRIANGLELIST; - } - sm6_parser_emit_dcl_primitive_topology(sm6, VKD3DSIH_DCL_OUTPUT_TOPOLOGY, output_primitive, 0); -+ sm6->p.program->output_topology = output_primitive; - - i = operands[4]; - if (!i || i > MAX_GS_INSTANCE_COUNT) -diff --git a/libs/vkd3d/libs/vkd3d-shader/fx.c b/libs/vkd3d/libs/vkd3d-shader/fx.c -index 914c365b8f6..7a226c1c870 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/fx.c -+++ b/libs/vkd3d/libs/vkd3d-shader/fx.c -@@ -1730,7 +1730,13 @@ static uint32_t write_fx_2_object_initializer(const struct hlsl_ir_var *var, str - put_u32(buffer, id); - put_u32(buffer, size); - if (size) -+ { -+ static const uint32_t pad; -+ - bytecode_put_bytes(buffer, data, size); -+ if (size % 4) -+ bytecode_put_bytes_unaligned(buffer, &pad, 4 - (size % 4)); -+ } - } - } - -diff --git a/libs/vkd3d/libs/vkd3d-shader/glsl.c b/libs/vkd3d/libs/vkd3d-shader/glsl.c -index 801de6a5954..828a94d77ab 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/glsl.c -+++ b/libs/vkd3d/libs/vkd3d-shader/glsl.c -@@ -64,7 +64,6 @@ struct vkd3d_glsl_generator - - const struct vkd3d_shader_interface_info *interface_info; - const struct vkd3d_shader_descriptor_offset_info *offset_info; -- const struct vkd3d_shader_scan_descriptor_info1 *descriptor_info; - const struct vkd3d_shader_scan_combined_resource_sampler_info *combined_sampler_info; - }; - -@@ -130,7 +129,7 @@ static const struct glsl_resource_type_info *shader_glsl_get_resource_type_info( - static const struct vkd3d_shader_descriptor_info1 *shader_glsl_get_descriptor(struct vkd3d_glsl_generator *gen, - enum vkd3d_shader_descriptor_type type, unsigned int idx, unsigned int space) - { -- const struct vkd3d_shader_scan_descriptor_info1 *info = gen->descriptor_info; -+ const struct vkd3d_shader_scan_descriptor_info1 *info = &gen->program->descriptors; - - for (unsigned int i = 0; i < info->descriptor_count; ++i) - { -@@ -146,7 +145,7 @@ static const struct vkd3d_shader_descriptor_info1 *shader_glsl_get_descriptor(st - static const struct vkd3d_shader_descriptor_info1 *shader_glsl_get_descriptor_by_id( - struct vkd3d_glsl_generator *gen, enum vkd3d_shader_descriptor_type type, unsigned int id) - { -- const struct vkd3d_shader_scan_descriptor_info1 *info = gen->descriptor_info; -+ const struct vkd3d_shader_scan_descriptor_info1 *info = &gen->program->descriptors; - - for (unsigned int i = 0; i < info->descriptor_count; ++i) - { -@@ -2080,7 +2079,7 @@ static void shader_glsl_generate_sampler_declaration(struct vkd3d_glsl_generator - static void shader_glsl_generate_descriptor_declarations(struct vkd3d_glsl_generator *gen) - { - const struct vkd3d_shader_scan_combined_resource_sampler_info *sampler_info = gen->combined_sampler_info; -- const struct vkd3d_shader_scan_descriptor_info1 *info = gen->descriptor_info; -+ const struct vkd3d_shader_scan_descriptor_info1 *info = &gen->program->descriptors; - const struct vkd3d_shader_descriptor_info1 *descriptor; - unsigned int i; - -@@ -2431,7 +2430,6 @@ static void shader_glsl_init_limits(struct vkd3d_glsl_generator *gen, const stru - - static void vkd3d_glsl_generator_init(struct vkd3d_glsl_generator *gen, - struct vsir_program *program, const struct vkd3d_shader_compile_info *compile_info, -- const struct vkd3d_shader_scan_descriptor_info1 *descriptor_info, - const struct vkd3d_shader_scan_combined_resource_sampler_info *combined_sampler_info, - struct vkd3d_shader_message_context *message_context) - { -@@ -2455,12 +2453,10 @@ static void vkd3d_glsl_generator_init(struct vkd3d_glsl_generator *gen, - - gen->interface_info = vkd3d_find_struct(compile_info->next, INTERFACE_INFO); - gen->offset_info = vkd3d_find_struct(compile_info->next, DESCRIPTOR_OFFSET_INFO); -- gen->descriptor_info = descriptor_info; - gen->combined_sampler_info = combined_sampler_info; - } - - int glsl_compile(struct vsir_program *program, uint64_t config_flags, -- const struct vkd3d_shader_scan_descriptor_info1 *descriptor_info, - const struct vkd3d_shader_scan_combined_resource_sampler_info *combined_sampler_info, - const struct vkd3d_shader_compile_info *compile_info, - struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context) -@@ -2474,7 +2470,7 @@ int glsl_compile(struct vsir_program *program, uint64_t config_flags, - VKD3D_ASSERT(program->normalisation_level == VSIR_NORMALISED_SM6); - - vkd3d_glsl_generator_init(&generator, program, compile_info, -- descriptor_info, combined_sampler_info, message_context); -+ combined_sampler_info, message_context); - ret = vkd3d_glsl_generator_generate(&generator, out); - vkd3d_glsl_generator_cleanup(&generator); - -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h -index 98d3d17e826..1d78c5622de 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.h -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h -@@ -1155,6 +1155,7 @@ struct hlsl_ctx - struct hlsl_constant_register - { - uint32_t index; -+ uint32_t allocated_mask; - struct hlsl_vec4 value; - struct vkd3d_shader_location loc; - } *regs; -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -index b5652475b43..8fcf6e6ac54 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -@@ -271,9 +271,9 @@ static bool types_are_semantic_equivalent(struct hlsl_ctx *ctx, const struct hls - if (ctx->profile->major_version < 4) - return true; - -- if (hlsl_type_is_patch_array(type1)) -+ if (hlsl_type_is_primitive_array(type1)) - { -- return hlsl_type_is_patch_array(type2) -+ return hlsl_type_is_primitive_array(type2) - && type1->e.array.array_type == type2->e.array.array_type - && type1->e.array.elements_count == type2->e.array.elements_count - && types_are_semantic_equivalent(ctx, type1->e.array.type, type2->e.array.type); -@@ -295,8 +295,8 @@ static struct hlsl_ir_var *add_semantic_var(struct hlsl_ctx *ctx, struct hlsl_ir - const char *prefix; - char *new_name; - -- if (hlsl_type_is_patch_array(type)) -- prefix = type->e.array.array_type == HLSL_ARRAY_PATCH_INPUT ? "inputpatch" : "outputpatch"; -+ if (hlsl_type_is_primitive_array(type)) -+ prefix = type->e.array.array_type == HLSL_ARRAY_PATCH_OUTPUT ? "outputpatch" : "inputprim"; - else - prefix = output ? "output" : "input"; - -@@ -307,9 +307,9 @@ static struct hlsl_ir_var *add_semantic_var(struct hlsl_ctx *ctx, struct hlsl_ir - { - if (!ascii_strcasecmp(ext_var->name, new_name)) - { -- VKD3D_ASSERT(hlsl_type_is_patch_array(ext_var->data_type) -+ VKD3D_ASSERT(hlsl_type_is_primitive_array(ext_var->data_type) - || ext_var->data_type->class <= HLSL_CLASS_VECTOR); -- VKD3D_ASSERT(hlsl_type_is_patch_array(type) || type->class <= HLSL_CLASS_VECTOR); -+ VKD3D_ASSERT(hlsl_type_is_primitive_array(type) || type->class <= HLSL_CLASS_VECTOR); - - if (output) - { -@@ -383,7 +383,7 @@ static uint32_t combine_field_storage_modifiers(uint32_t modifiers, uint32_t fie - } - - static void prepend_input_copy(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func, -- struct hlsl_block *block, struct hlsl_ir_var *top_var, uint32_t patch_index, struct hlsl_ir_load *lhs, -+ struct hlsl_block *block, uint32_t prim_index, struct hlsl_ir_load *lhs, - uint32_t modifiers, struct hlsl_semantic *semantic, uint32_t semantic_index, bool force_align) - { - struct hlsl_type *type = lhs->node.data_type, *vector_type_src, *vector_type_dst; -@@ -417,25 +417,25 @@ static void prepend_input_copy(struct hlsl_ctx *ctx, struct hlsl_ir_function_dec - struct hlsl_ir_var *input; - struct hlsl_ir_load *load; - -- if (hlsl_type_is_patch_array(top_var->data_type)) -+ if (hlsl_type_is_primitive_array(var->data_type)) - { -- struct hlsl_type *top_type = top_var->data_type; -- struct hlsl_type *patch_type; -- struct hlsl_deref patch_deref; -+ struct hlsl_type *prim_type_src; -+ struct hlsl_deref prim_deref; - struct hlsl_ir_node *idx; - -- if (!(patch_type = hlsl_new_array_type(ctx, vector_type_src, top_type->e.array.elements_count, -- top_type->e.array.array_type))) -+ if (!(prim_type_src = hlsl_new_array_type(ctx, vector_type_src, var->data_type->e.array.elements_count, -+ var->data_type->e.array.array_type))) - return; -+ prim_type_src->modifiers = var->data_type->modifiers & HLSL_PRIMITIVE_MODIFIERS_MASK; - -- if (!(input = add_semantic_var(ctx, func, var, patch_type, -+ if (!(input = add_semantic_var(ctx, func, var, prim_type_src, - modifiers, semantic, semantic_index + i, false, force_align, loc))) - return; -- hlsl_init_simple_deref_from_var(&patch_deref, input); -+ hlsl_init_simple_deref_from_var(&prim_deref, input); - -- idx = hlsl_block_add_uint_constant(ctx, block, patch_index, &var->loc); -+ idx = hlsl_block_add_uint_constant(ctx, block, prim_index, &var->loc); - -- if (!(load = hlsl_new_load_index(ctx, &patch_deref, idx, loc))) -+ if (!(load = hlsl_new_load_index(ctx, &prim_deref, idx, loc))) - return; - hlsl_block_add_instr(block, &load->node); - } -@@ -468,7 +468,7 @@ static void prepend_input_copy(struct hlsl_ctx *ctx, struct hlsl_ir_function_dec - } - - static void prepend_input_copy_recurse(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func, -- struct hlsl_block *block, struct hlsl_ir_var *top_var, uint32_t patch_index, struct hlsl_ir_load *lhs, -+ struct hlsl_block *block, uint32_t prim_index, struct hlsl_ir_load *lhs, - uint32_t modifiers, struct hlsl_semantic *semantic, uint32_t semantic_index, bool force_align) - { - struct vkd3d_shader_location *loc = &lhs->node.loc; -@@ -494,8 +494,8 @@ static void prepend_input_copy_recurse(struct hlsl_ctx *ctx, struct hlsl_ir_func - element_modifiers = modifiers; - force_align = true; - -- if (hlsl_type_is_patch_array(type)) -- patch_index = i; -+ if (hlsl_type_is_primitive_array(type)) -+ prim_index = i; - } - else - { -@@ -520,13 +520,13 @@ static void prepend_input_copy_recurse(struct hlsl_ctx *ctx, struct hlsl_ir_func - return; - hlsl_block_add_instr(block, &element_load->node); - -- prepend_input_copy_recurse(ctx, func, block, top_var, patch_index, element_load, -+ prepend_input_copy_recurse(ctx, func, block, prim_index, element_load, - element_modifiers, semantic, elem_semantic_index, force_align); - } - } - else - { -- prepend_input_copy(ctx, func, block, var, patch_index, lhs, modifiers, semantic, semantic_index, force_align); -+ prepend_input_copy(ctx, func, block, prim_index, lhs, modifiers, semantic, semantic_index, force_align); - } - } - -@@ -544,8 +544,8 @@ static void prepend_input_var_copy(struct hlsl_ctx *ctx, struct hlsl_ir_function - return; - hlsl_block_add_instr(&block, &load->node); - -- prepend_input_copy_recurse(ctx, func, &block, var, 0, load, -- var->storage_modifiers, &var->semantic, var->semantic.index, false); -+ prepend_input_copy_recurse(ctx, func, &block, 0, load, var->storage_modifiers, -+ &var->semantic, var->semantic.index, false); - - list_move_head(&func->body.instrs, &block.instrs); - } -@@ -3587,6 +3587,45 @@ static bool lower_trunc(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct - return true; - } - -+/* Lower modulus using: -+ * -+ * mod(x, y) = x - trunc(x / y) * y; -+ * -+ */ -+static bool lower_int_modulus_sm1(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) -+{ -+ struct hlsl_ir_node *div, *trunc, *mul, *neg, *operands[2], *ret; -+ struct hlsl_type *float_type; -+ struct hlsl_ir_expr *expr; -+ bool is_float; -+ -+ if (instr->type != HLSL_IR_EXPR) -+ return false; -+ expr = hlsl_ir_expr(instr); -+ if (expr->op != HLSL_OP2_MOD) -+ return false; -+ -+ is_float = instr->data_type->e.numeric.type == HLSL_TYPE_FLOAT -+ || instr->data_type->e.numeric.type == HLSL_TYPE_HALF; -+ if (is_float) -+ return false; -+ float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, instr->data_type->e.numeric.dimx); -+ -+ for (unsigned int i = 0; i < 2; ++i) -+ { -+ operands[i] = hlsl_block_add_cast(ctx, block, expr->operands[i].node, float_type, &instr->loc); -+ } -+ -+ div = hlsl_block_add_binary_expr(ctx, block, HLSL_OP2_DIV, operands[0], operands[1]); -+ trunc = hlsl_block_add_unary_expr(ctx, block, HLSL_OP1_TRUNC, div, &instr->loc); -+ mul = hlsl_block_add_binary_expr(ctx, block, HLSL_OP2_MUL, trunc, operands[1]); -+ neg = hlsl_block_add_unary_expr(ctx, block, HLSL_OP1_NEG, mul, &instr->loc); -+ ret = hlsl_block_add_binary_expr(ctx, block, HLSL_OP2_ADD, operands[0], neg); -+ hlsl_block_add_cast(ctx, block, ret, instr->data_type, &instr->loc); -+ -+ return true; -+} -+ - /* Lower DIV to RCP + MUL. */ - static bool lower_division(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) - { -@@ -4222,7 +4261,7 @@ struct hlsl_ir_node *hlsl_add_conditional(struct hlsl_ctx *ctx, struct hlsl_bloc - return hlsl_block_add_expr(ctx, instrs, HLSL_OP3_TERNARY, operands, if_true->data_type, &condition->loc); - } - --static bool lower_int_division(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) -+static bool lower_int_division_sm4(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) - { - struct hlsl_ir_node *arg1, *arg2, *xor, *and, *abs1, *abs2, *div, *neg, *cast1, *cast2, *cast3, *high_bit; - struct hlsl_type *type = instr->data_type, *utype; -@@ -4262,7 +4301,7 @@ static bool lower_int_division(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, - return hlsl_add_conditional(ctx, block, and, neg, cast3); - } - --static bool lower_int_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) -+static bool lower_int_modulus_sm4(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) - { - struct hlsl_ir_node *arg1, *arg2, *and, *abs1, *abs2, *div, *neg, *cast1, *cast2, *cast3, *high_bit; - struct hlsl_type *type = instr->data_type, *utype; -@@ -5352,6 +5391,33 @@ static void allocate_temp_registers_recurse(struct hlsl_ctx *ctx, - } - } - -+static bool find_constant(struct hlsl_ctx *ctx, const float *f, unsigned int count, struct hlsl_reg *ret) -+{ -+ struct hlsl_constant_defs *defs = &ctx->constant_defs; -+ -+ for (size_t i = 0; i < defs->count; ++i) -+ { -+ const struct hlsl_constant_register *reg = &defs->regs[i]; -+ -+ for (size_t j = 0; j <= 4 - count; ++j) -+ { -+ unsigned int writemask = ((1u << count) - 1) << j; -+ -+ if ((reg->allocated_mask & writemask) == writemask -+ && !memcmp(f, ®->value.f[j], count * sizeof(float))) -+ { -+ ret->id = reg->index; -+ ret->allocation_size = 1; -+ ret->writemask = writemask; -+ ret->allocated = true; -+ return true; -+ } -+ } -+ } -+ -+ return false; -+} -+ - static void record_constant(struct hlsl_ctx *ctx, unsigned int component_index, float f, - const struct vkd3d_shader_location *loc) - { -@@ -5365,6 +5431,7 @@ static void record_constant(struct hlsl_ctx *ctx, unsigned int component_index, - if (reg->index == (component_index / 4)) - { - reg->value.f[component_index % 4] = f; -+ reg->allocated_mask |= (1u << (component_index % 4)); - return; - } - } -@@ -5375,6 +5442,7 @@ static void record_constant(struct hlsl_ctx *ctx, unsigned int component_index, - memset(reg, 0, sizeof(*reg)); - reg->index = component_index / 4; - reg->value.f[component_index % 4] = f; -+ reg->allocated_mask = (1u << (component_index % 4)); - reg->loc = *loc; - } - -@@ -5391,50 +5459,57 @@ static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, - { - struct hlsl_ir_constant *constant = hlsl_ir_constant(instr); - const struct hlsl_type *type = instr->data_type; -- unsigned int x, i; -- -- constant->reg = allocate_numeric_registers_for_type(ctx, allocator, 1, UINT_MAX, type); -- TRACE("Allocated constant @%u to %s.\n", instr->index, debug_register('c', constant->reg, type)); -+ float f[4] = {0}; - - VKD3D_ASSERT(hlsl_is_numeric_type(type)); - VKD3D_ASSERT(type->e.numeric.dimy == 1); -- VKD3D_ASSERT(constant->reg.writemask); - -- for (x = 0, i = 0; x < 4; ++x) -+ for (unsigned int i = 0; i < type->e.numeric.dimx; ++i) - { - const union hlsl_constant_value_component *value; -- float f = 0; - -- if (!(constant->reg.writemask & (1u << x))) -- continue; -- value = &constant->value.u[i++]; -+ value = &constant->value.u[i]; - - switch (type->e.numeric.type) - { - case HLSL_TYPE_BOOL: -- f = !!value->u; -+ f[i] = !!value->u; - break; - - case HLSL_TYPE_FLOAT: - case HLSL_TYPE_HALF: -- f = value->f; -+ f[i] = value->f; - break; - - case HLSL_TYPE_INT: -- f = value->i; -+ f[i] = value->i; - break; - - case HLSL_TYPE_MIN16UINT: - case HLSL_TYPE_UINT: -- f = value->u; -+ f[i] = value->u; - break; - - case HLSL_TYPE_DOUBLE: - FIXME("Double constant.\n"); - return; - } -+ } -+ -+ if (find_constant(ctx, f, type->e.numeric.dimx, &constant->reg)) -+ { -+ TRACE("Reusing already allocated constant %s for @%u.\n", -+ debug_register('c', constant->reg, type), instr->index); -+ break; -+ } -+ -+ constant->reg = allocate_numeric_registers_for_type(ctx, allocator, 1, UINT_MAX, type); -+ TRACE("Allocated constant @%u to %s.\n", instr->index, debug_register('c', constant->reg, type)); - -- record_constant(ctx, constant->reg.id * 4 + x, f, &constant->node.loc); -+ for (unsigned int x = 0, i = 0; x < 4; ++x) -+ { -+ if ((constant->reg.writemask & (1u << x))) -+ record_constant(ctx, constant->reg.id * 4 + x, f[i++], &constant->node.loc); - } - - break; -@@ -5679,7 +5754,7 @@ static enum vkd3d_shader_interpolation_mode sm4_get_interpolation_mode(struct hl - {HLSL_STORAGE_CENTROID | HLSL_STORAGE_LINEAR, VKD3DSIM_LINEAR_CENTROID}, - }; - -- if (hlsl_type_is_patch_array(type)) -+ if (hlsl_type_is_primitive_array(type)) - type = type->e.array.type; - - VKD3D_ASSERT(hlsl_is_numeric_type(type)); -@@ -5710,7 +5785,7 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var - [VKD3D_SHADER_TYPE_COMPUTE] = "Compute", - }; - -- bool is_patch = hlsl_type_is_patch_array(var->data_type); -+ bool is_primitive = hlsl_type_is_primitive_array(var->data_type); - enum vkd3d_shader_register_type type; - struct vkd3d_shader_version version; - bool special_interpolation = false; -@@ -5751,7 +5826,7 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var - bool has_idx; - - if (!sm4_sysval_semantic_from_semantic_name(&semantic, &version, ctx->semantic_compat_mapping, ctx->domain, -- var->semantic.name, var->semantic.index, output, ctx->is_patch_constant_func, is_patch)) -+ var->semantic.name, var->semantic.index, output, ctx->is_patch_constant_func, is_primitive)) - { - hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC, - "Invalid semantic '%s'.", var->semantic.name); -@@ -5784,7 +5859,7 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var - } - else - { -- unsigned int component_count = is_patch -+ unsigned int component_count = is_primitive - ? var->data_type->e.array.type->e.numeric.dimx : var->data_type->e.numeric.dimx; - int mode = (ctx->profile->major_version < 4) - ? 0 : sm4_get_interpolation_mode(var->data_type, var->storage_modifiers); -@@ -5803,7 +5878,7 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var - - static void allocate_semantic_registers(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) - { -- struct register_allocator in_patch_allocator = {0}, patch_constant_out_patch_allocator = {0}; -+ struct register_allocator in_prim_allocator = {0}, patch_constant_out_patch_allocator = {0}; - struct register_allocator input_allocator = {0}, output_allocator = {0}; - bool is_vertex_shader = ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX; - bool is_pixel_shader = ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL; -@@ -5816,7 +5891,7 @@ static void allocate_semantic_registers(struct hlsl_ctx *ctx, struct hlsl_ir_fun - { - if (var->is_input_semantic) - { -- if (hlsl_type_is_patch_array(var->data_type)) -+ if (hlsl_type_is_primitive_array(var->data_type)) - { - bool is_patch_constant_output_patch = ctx->is_patch_constant_func && - var->data_type->e.array.array_type == HLSL_ARRAY_PATCH_OUTPUT; -@@ -5825,7 +5900,7 @@ static void allocate_semantic_registers(struct hlsl_ctx *ctx, struct hlsl_ir_fun - allocate_semantic_register(ctx, var, &patch_constant_out_patch_allocator, false, - !is_vertex_shader); - else -- allocate_semantic_register(ctx, var, &in_patch_allocator, false, -+ allocate_semantic_register(ctx, var, &in_prim_allocator, false, - !is_vertex_shader); - } - else -@@ -6351,7 +6426,7 @@ bool hlsl_offset_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref - - *offset = deref->const_offset; - -- if (hlsl_type_is_patch_array(deref->var->data_type)) -+ if (hlsl_type_is_primitive_array(deref->var->data_type)) - return false; - - if (offset_node) -@@ -6397,7 +6472,7 @@ struct hlsl_reg hlsl_reg_from_deref(struct hlsl_ctx *ctx, const struct hlsl_dere - VKD3D_ASSERT(deref->data_type); - VKD3D_ASSERT(hlsl_is_numeric_type(deref->data_type)); - -- if (!hlsl_type_is_patch_array(deref->var->data_type)) -+ if (!hlsl_type_is_primitive_array(deref->var->data_type)) - offset = hlsl_offset_from_deref_safe(ctx, deref); - - ret.index += offset / 4; -@@ -7031,8 +7106,11 @@ void hlsl_run_const_passes(struct hlsl_ctx *ctx, struct hlsl_block *body) - - lower_ir(ctx, lower_narrowing_casts, body); - lower_ir(ctx, lower_int_dot, body); -- lower_ir(ctx, lower_int_division, body); -- lower_ir(ctx, lower_int_modulus, body); -+ if (hlsl_version_ge(ctx, 4, 0)) -+ { -+ lower_ir(ctx, lower_int_modulus_sm4, body); -+ lower_ir(ctx, lower_int_division_sm4, body); -+ } - lower_ir(ctx, lower_int_abs, body); - lower_ir(ctx, lower_casts_to_bool, body); - lower_ir(ctx, lower_float_modulus, body); -@@ -7044,8 +7122,8 @@ static void generate_vsir_signature_entry(struct hlsl_ctx *ctx, struct vsir_prog - struct shader_signature *signature, bool output, struct hlsl_ir_var *var) - { - enum vkd3d_shader_component_type component_type = VKD3D_SHADER_COMPONENT_VOID; -+ bool is_primitive = hlsl_type_is_primitive_array(var->data_type); - enum vkd3d_shader_sysval_semantic sysval = VKD3D_SHADER_SV_NONE; -- bool is_patch = hlsl_type_is_patch_array(var->data_type); - unsigned int register_index, mask, use_mask; - const char *name = var->semantic.name; - enum vkd3d_shader_register_type type; -@@ -7058,7 +7136,7 @@ static void generate_vsir_signature_entry(struct hlsl_ctx *ctx, struct vsir_prog - bool has_idx, ret; - - ret = sm4_sysval_semantic_from_semantic_name(&sysval, &program->shader_version, ctx->semantic_compat_mapping, -- ctx->domain, var->semantic.name, var->semantic.index, output, ctx->is_patch_constant_func, is_patch); -+ ctx->domain, var->semantic.name, var->semantic.index, output, ctx->is_patch_constant_func, is_primitive); - VKD3D_ASSERT(ret); - if (sysval == ~0u) - return; -@@ -7419,7 +7497,7 @@ static void sm1_generate_vsir_sampler_dcls(struct hlsl_ctx *ctx, - static enum vkd3d_shader_register_type sm4_get_semantic_register_type(enum vkd3d_shader_type shader_type, - bool is_patch_constant_func, const struct hlsl_ir_var *var) - { -- if (hlsl_type_is_patch_array(var->data_type)) -+ if (hlsl_type_is_primitive_array(var->data_type)) - { - VKD3D_ASSERT(var->is_input_semantic); - -@@ -7670,14 +7748,14 @@ static bool sm4_generate_vsir_reg_from_deref(struct hlsl_ctx *ctx, struct vsir_p - } - else if (var->is_input_semantic) - { -- bool is_patch = hlsl_type_is_patch_array(var->data_type); -+ bool is_primitive = hlsl_type_is_primitive_array(var->data_type); - bool has_idx; - - if (sm4_register_from_semantic_name(version, var->semantic.name, false, ®->type, &has_idx)) - { - unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref); - -- VKD3D_ASSERT(!is_patch); -+ VKD3D_ASSERT(!is_primitive); - - if (has_idx) - { -@@ -7699,12 +7777,12 @@ static bool sm4_generate_vsir_reg_from_deref(struct hlsl_ctx *ctx, struct vsir_p - - reg->type = sm4_get_semantic_register_type(version->type, ctx->is_patch_constant_func, var); - reg->dimension = VSIR_DIMENSION_VEC4; -- reg->idx[is_patch ? 1 : 0].offset = hlsl_reg.id; -- reg->idx_count = is_patch ? 2 : 1; -+ reg->idx[is_primitive ? 1 : 0].offset = hlsl_reg.id; -+ reg->idx_count = is_primitive ? 2 : 1; - *writemask = hlsl_reg.writemask; - } - -- if (is_patch) -+ if (is_primitive) - { - reg->idx[0].offset = deref->const_offset / 4; - if (deref->rel_offset.node) -@@ -9032,7 +9110,7 @@ static void sm4_generate_vsir_instr_dcl_semantic(struct hlsl_ctx *ctx, struct vs - const struct hlsl_ir_var *var, struct hlsl_block *block, const struct vkd3d_shader_location *loc) - { - const struct vkd3d_shader_version *version = &program->shader_version; -- const bool is_patch = hlsl_type_is_patch_array(var->data_type); -+ const bool is_primitive = hlsl_type_is_primitive_array(var->data_type); - const bool output = var->is_output_semantic; - enum vkd3d_shader_sysval_semantic semantic; - struct vkd3d_shader_dst_param *dst_param; -@@ -9044,7 +9122,7 @@ static void sm4_generate_vsir_instr_dcl_semantic(struct hlsl_ctx *ctx, struct vs - bool has_idx; - - sm4_sysval_semantic_from_semantic_name(&semantic, version, ctx->semantic_compat_mapping, ctx->domain, -- var->semantic.name, var->semantic.index, output, ctx->is_patch_constant_func, is_patch); -+ var->semantic.name, var->semantic.index, output, ctx->is_patch_constant_func, is_primitive); - if (semantic == ~0u) - semantic = VKD3D_SHADER_SV_NONE; - -@@ -9057,9 +9135,17 @@ static void sm4_generate_vsir_instr_dcl_semantic(struct hlsl_ctx *ctx, struct vs - ? VKD3DSIH_DCL_INPUT_PS : VKD3DSIH_DCL_INPUT; - break; - -+ case VKD3D_SHADER_SV_PRIMITIVE_ID: -+ if (version->type == VKD3D_SHADER_TYPE_PIXEL) -+ opcode = VKD3DSIH_DCL_INPUT_PS_SGV; -+ else if (version->type == VKD3D_SHADER_TYPE_GEOMETRY) -+ opcode = VKD3DSIH_DCL_INPUT; -+ else -+ opcode = VKD3DSIH_DCL_INPUT_SGV; -+ break; -+ - case VKD3D_SHADER_SV_INSTANCE_ID: - case VKD3D_SHADER_SV_IS_FRONT_FACE: -- case VKD3D_SHADER_SV_PRIMITIVE_ID: - case VKD3D_SHADER_SV_SAMPLE_INDEX: - case VKD3D_SHADER_SV_VERTEX_ID: - opcode = (version->type == VKD3D_SHADER_TYPE_PIXEL) -@@ -9069,7 +9155,7 @@ static void sm4_generate_vsir_instr_dcl_semantic(struct hlsl_ctx *ctx, struct vs - default: - if (version->type == VKD3D_SHADER_TYPE_PIXEL) - opcode = VKD3DSIH_DCL_INPUT_PS_SIV; -- else if (is_patch) -+ else if (is_primitive && version->type != VKD3D_SHADER_TYPE_GEOMETRY) - opcode = VKD3DSIH_DCL_INPUT; - else - opcode = VKD3DSIH_DCL_INPUT_SIV; -@@ -9110,7 +9196,7 @@ static void sm4_generate_vsir_instr_dcl_semantic(struct hlsl_ctx *ctx, struct vs - } - else if (opcode == VKD3DSIH_DCL_INPUT || opcode == VKD3DSIH_DCL_INPUT_PS) - { -- VKD3D_ASSERT(semantic == VKD3D_SHADER_SV_NONE || is_patch); -+ VKD3D_ASSERT(semantic == VKD3D_SHADER_SV_NONE || is_primitive || version->type == VKD3D_SHADER_TYPE_GEOMETRY); - dst_param = &ins->declaration.dst; - } - else -@@ -9121,7 +9207,7 @@ static void sm4_generate_vsir_instr_dcl_semantic(struct hlsl_ctx *ctx, struct vs - dst_param = &ins->declaration.register_semantic.reg; - } - -- if (is_patch) -+ if (is_primitive) - { - VKD3D_ASSERT(has_idx); - vsir_register_init(&dst_param->reg, type, VKD3D_DATA_FLOAT, 2); -@@ -10954,7 +11040,7 @@ static void generate_vsir_scan_global_flags(struct hlsl_ctx *ctx, - { - const struct hlsl_type *type = var->data_type; - -- if (hlsl_type_is_patch_array(type)) -+ if (hlsl_type_is_primitive_array(type)) - type = var->data_type->e.array.type; - - /* Note that it doesn't matter if the semantic is unused or doesn't -@@ -11257,6 +11343,13 @@ static void sm4_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl - ? 0 : ctx->input_control_point_count; - program->tess_domain = ctx->domain; - } -+ else if (version.type == VKD3D_SHADER_TYPE_GEOMETRY) -+ { -+ program->input_control_point_count = ctx->input_control_point_count; -+ program->input_primitive = ctx->input_primitive_type; -+ program->output_topology = VKD3D_PT_UNDEFINED; /* TODO: obtain from stream output parameters. */ -+ program->vertices_out_count = ctx->max_vertex_count; -+ } - - LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) - { -@@ -12528,12 +12621,6 @@ static void process_entry_function(struct hlsl_ctx *ctx, - } - - validate_and_record_prim_type(ctx, var); -- if (profile->type == VKD3D_SHADER_TYPE_GEOMETRY) -- { -- hlsl_fixme(ctx, &var->loc, "Input primitive parameters in geometry shaders."); -- continue; -- } -- - prepend_input_var_copy(ctx, entry_func, var); - } - else if (hlsl_get_stream_output_type(var->data_type)) -@@ -12565,7 +12652,7 @@ static void process_entry_function(struct hlsl_ctx *ctx, - - if (var->storage_modifiers & HLSL_STORAGE_IN) - { -- if (profile->type == VKD3D_SHADER_TYPE_GEOMETRY) -+ if (profile->type == VKD3D_SHADER_TYPE_GEOMETRY && !var->semantic.name) - { - hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_PRIMITIVE_TYPE, - "Input parameter \"%s\" is missing a primitive type.", var->name); -@@ -12659,6 +12746,7 @@ static void process_entry_function(struct hlsl_ctx *ctx, - while (lower_ir(ctx, lower_nonconstant_array_loads, body)); - - lower_ir(ctx, lower_ternary, body); -+ lower_ir(ctx, lower_int_modulus_sm1, body); - lower_ir(ctx, lower_division, body); - /* Constants casted to float must be folded, and new casts to bool also need to be lowered. */ - hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, body, NULL); -diff --git a/libs/vkd3d/libs/vkd3d-shader/ir.c b/libs/vkd3d/libs/vkd3d-shader/ir.c -index 2e3040f038e..3a784c71388 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/ir.c -+++ b/libs/vkd3d/libs/vkd3d-shader/ir.c -@@ -116,6 +116,7 @@ void vsir_program_cleanup(struct vsir_program *program) - shader_signature_cleanup(&program->input_signature); - shader_signature_cleanup(&program->output_signature); - shader_signature_cleanup(&program->patch_constant_signature); -+ vkd3d_shader_free_scan_descriptor_info1(&program->descriptors); - } - - const struct vkd3d_shader_parameter1 *vsir_program_get_parameter( -diff --git a/libs/vkd3d/libs/vkd3d-shader/msl.c b/libs/vkd3d/libs/vkd3d-shader/msl.c -index e783128e236..756b43298d3 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/msl.c -+++ b/libs/vkd3d/libs/vkd3d-shader/msl.c -@@ -44,7 +44,6 @@ struct msl_generator - bool write_depth; - - const struct vkd3d_shader_interface_info *interface_info; -- const struct vkd3d_shader_scan_descriptor_info1 *descriptor_info; - }; - - static void VKD3D_PRINTF_FUNC(3, 4) msl_compiler_error(struct msl_generator *gen, -@@ -821,7 +820,7 @@ static void msl_generate_cbv_declaration(struct msl_generator *gen, - - static void msl_generate_descriptor_struct_declarations(struct msl_generator *gen) - { -- const struct vkd3d_shader_scan_descriptor_info1 *info = gen->descriptor_info; -+ const struct vkd3d_shader_scan_descriptor_info1 *info = &gen->program->descriptors; - const struct vkd3d_shader_descriptor_info1 *descriptor; - struct vkd3d_string_buffer *buffer = gen->buffer; - unsigned int i; -@@ -1171,7 +1170,7 @@ static void msl_generate_entrypoint(struct msl_generator *gen) - - vkd3d_string_buffer_printf(gen->buffer, "vkd3d_%s_out shader_entry(\n", gen->prefix); - -- if (gen->descriptor_info->descriptor_count) -+ if (gen->program->descriptors.descriptor_count) - { - msl_print_indent(gen->buffer, 2); - /* TODO: Configurable argument buffer binding location. */ -@@ -1195,7 +1194,7 @@ static void msl_generate_entrypoint(struct msl_generator *gen) - vkd3d_string_buffer_printf(gen->buffer, " %s_main(%s_in, %s_out", gen->prefix, gen->prefix, gen->prefix); - if (gen->write_depth) - vkd3d_string_buffer_printf(gen->buffer, ", shader_out_depth"); -- if (gen->descriptor_info->descriptor_count) -+ if (gen->program->descriptors.descriptor_count) - vkd3d_string_buffer_printf(gen->buffer, ", descriptors"); - vkd3d_string_buffer_printf(gen->buffer, ");\n"); - -@@ -1234,7 +1233,7 @@ static int msl_generator_generate(struct msl_generator *gen, struct vkd3d_shader - gen->prefix); - if (gen->write_depth) - vkd3d_string_buffer_printf(gen->buffer, ", thread float& o_depth"); -- if (gen->descriptor_info->descriptor_count) -+ if (gen->program->descriptors.descriptor_count) - vkd3d_string_buffer_printf(gen->buffer, ", constant vkd3d_%s_descriptors& descriptors", gen->prefix); - vkd3d_string_buffer_printf(gen->buffer, ")\n{\n"); - -@@ -1276,7 +1275,6 @@ static void msl_generator_cleanup(struct msl_generator *gen) - - static int msl_generator_init(struct msl_generator *gen, struct vsir_program *program, - const struct vkd3d_shader_compile_info *compile_info, -- const struct vkd3d_shader_scan_descriptor_info1 *descriptor_info, - struct vkd3d_shader_message_context *message_context) - { - enum vkd3d_shader_type type = program->shader_version.type; -@@ -1297,13 +1295,11 @@ static int msl_generator_init(struct msl_generator *gen, struct vsir_program *pr - return VKD3D_ERROR_INVALID_SHADER; - } - gen->interface_info = vkd3d_find_struct(compile_info->next, INTERFACE_INFO); -- gen->descriptor_info = descriptor_info; - - return VKD3D_OK; - } - - int msl_compile(struct vsir_program *program, uint64_t config_flags, -- const struct vkd3d_shader_scan_descriptor_info1 *descriptor_info, - const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *out, - struct vkd3d_shader_message_context *message_context) - { -@@ -1315,7 +1311,7 @@ int msl_compile(struct vsir_program *program, uint64_t config_flags, - - VKD3D_ASSERT(program->normalisation_level == VSIR_NORMALISED_SM6); - -- if ((ret = msl_generator_init(&generator, program, compile_info, descriptor_info, message_context)) < 0) -+ if ((ret = msl_generator_init(&generator, program, compile_info, message_context)) < 0) - return ret; - ret = msl_generator_generate(&generator, out); - msl_generator_cleanup(&generator); -diff --git a/libs/vkd3d/libs/vkd3d-shader/spirv.c b/libs/vkd3d/libs/vkd3d-shader/spirv.c -index 45140d44595..0cc1ceca798 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/spirv.c -+++ b/libs/vkd3d/libs/vkd3d-shader/spirv.c -@@ -3091,9 +3091,6 @@ struct spirv_compiler - } *spirv_parameter_info; - - bool prolog_emitted; -- struct shader_signature input_signature; -- struct shader_signature output_signature; -- struct shader_signature patch_constant_signature; - const struct vkd3d_shader_transform_feedback_info *xfb_info; - struct vkd3d_shader_output_info - { -@@ -3108,7 +3105,6 @@ struct spirv_compiler - - uint32_t binding_idx; - -- const struct vkd3d_shader_scan_descriptor_info1 *scan_descriptor_info; - unsigned int input_control_point_count; - unsigned int output_control_point_count; - -@@ -3186,10 +3182,6 @@ static void spirv_compiler_destroy(struct spirv_compiler *compiler) - - vkd3d_string_buffer_cache_cleanup(&compiler->string_buffers); - -- shader_signature_cleanup(&compiler->input_signature); -- shader_signature_cleanup(&compiler->output_signature); -- shader_signature_cleanup(&compiler->patch_constant_signature); -- - vkd3d_free(compiler->ssa_register_info); - vkd3d_free(compiler->block_label_ids); - -@@ -3198,7 +3190,6 @@ static void spirv_compiler_destroy(struct spirv_compiler *compiler) - - static struct spirv_compiler *spirv_compiler_create(const struct vsir_program *program, - const struct vkd3d_shader_compile_info *compile_info, -- const struct vkd3d_shader_scan_descriptor_info1 *scan_descriptor_info, - struct vkd3d_shader_message_context *message_context, uint64_t config_flags) - { - const struct vkd3d_shader_interface_info *shader_interface; -@@ -3214,6 +3205,7 @@ static struct spirv_compiler *spirv_compiler_create(const struct vsir_program *p - compiler->message_context = message_context; - compiler->location.source_name = compile_info->source_name; - compiler->config_flags = config_flags; -+ compiler->program = program; - - if ((target_info = vkd3d_find_struct(compile_info->next, SPIRV_TARGET_INFO))) - { -@@ -3340,8 +3332,6 @@ static struct spirv_compiler *spirv_compiler_create(const struct vsir_program *p - else if (compiler->shader_type != VKD3D_SHADER_TYPE_GEOMETRY) - compiler->emit_point_size = compiler->xfb_info && compiler->xfb_info->element_count; - -- compiler->scan_descriptor_info = scan_descriptor_info; -- - compiler->phase = VKD3DSIH_INVALID; - - vkd3d_string_buffer_cache_init(&compiler->string_buffers); -@@ -5806,7 +5796,7 @@ static uint32_t spirv_compiler_emit_input(struct spirv_compiler *compiler, - unsigned int array_sizes[2]; - - shader_signature = reg_type == VKD3DSPR_PATCHCONST -- ? &compiler->patch_constant_signature : &compiler->input_signature; -+ ? &compiler->program->patch_constant_signature : &compiler->program->input_signature; - - signature_element = &shader_signature->elements[element_idx]; - sysval = signature_element->sysval_semantic; -@@ -5884,7 +5874,7 @@ static uint32_t spirv_compiler_emit_input(struct spirv_compiler *compiler, - if (reg_type == VKD3DSPR_PATCHCONST) - { - vkd3d_spirv_build_op_decorate(builder, input_id, SpvDecorationPatch, NULL, 0); -- location += shader_signature_next_location(&compiler->input_signature); -+ location += shader_signature_next_location(&compiler->program->input_signature); - } - vkd3d_spirv_build_op_decorate1(builder, input_id, SpvDecorationLocation, location); - if (component_idx) -@@ -6018,7 +6008,7 @@ static void calculate_clip_or_cull_distance_mask(const struct signature_element - /* Emits arrayed SPIR-V built-in variables. */ - static void spirv_compiler_emit_shader_signature_outputs(struct spirv_compiler *compiler) - { -- const struct shader_signature *output_signature = &compiler->output_signature; -+ const struct shader_signature *output_signature = &compiler->program->output_signature; - uint32_t clip_distance_mask = 0, clip_distance_id = 0; - uint32_t cull_distance_mask = 0, cull_distance_id = 0; - const struct vkd3d_spirv_builtin *builtin; -@@ -6128,7 +6118,8 @@ static void spirv_compiler_emit_output(struct spirv_compiler *compiler, - - is_patch_constant = (reg_type == VKD3DSPR_PATCHCONST); - -- shader_signature = is_patch_constant ? &compiler->patch_constant_signature : &compiler->output_signature; -+ shader_signature = is_patch_constant ? &compiler->program->patch_constant_signature -+ : &compiler->program->output_signature; - - signature_element = &shader_signature->elements[element_idx]; - sysval = signature_element->sysval_semantic; -@@ -6202,7 +6193,7 @@ static void spirv_compiler_emit_output(struct spirv_compiler *compiler, - unsigned int location = signature_element->target_location; - - if (is_patch_constant) -- location += shader_signature_next_location(&compiler->output_signature); -+ location += shader_signature_next_location(&compiler->program->output_signature); - else if (compiler->shader_type == VKD3D_SHADER_TYPE_PIXEL - && signature_element->sysval_semantic == VKD3D_SHADER_SV_TARGET) - location = signature_element->semantic_index; -@@ -6392,7 +6383,8 @@ static void spirv_compiler_emit_shader_epilogue_function(struct spirv_compiler * - - is_patch_constant = is_in_fork_or_join_phase(compiler); - -- signature = is_patch_constant ? &compiler->patch_constant_signature : &compiler->output_signature; -+ signature = is_patch_constant ? &compiler->program->patch_constant_signature -+ : &compiler->program->output_signature; - - function_id = compiler->epilogue_function_id; - -@@ -6736,7 +6728,7 @@ static const struct vkd3d_shader_descriptor_info1 *spirv_compiler_get_descriptor - struct spirv_compiler *compiler, enum vkd3d_shader_descriptor_type type, - const struct vkd3d_shader_register_range *range) - { -- const struct vkd3d_shader_scan_descriptor_info1 *descriptor_info = compiler->scan_descriptor_info; -+ const struct vkd3d_shader_scan_descriptor_info1 *descriptor_info = &compiler->program->descriptors; - unsigned int register_last = (range->last == ~0u) ? range->first : range->last; - const struct vkd3d_shader_descriptor_info1 *d; - unsigned int i; -@@ -11114,20 +11106,20 @@ static void spirv_compiler_emit_io_declarations(struct spirv_compiler *compiler) - { - struct vkd3d_shader_dst_param dst; - -- for (unsigned int i = 0; i < compiler->input_signature.element_count; ++i) -+ for (unsigned int i = 0; i < compiler->program->input_signature.element_count; ++i) - spirv_compiler_emit_input(compiler, VKD3DSPR_INPUT, i); - -- for (unsigned int i = 0; i < compiler->output_signature.element_count; ++i) -+ for (unsigned int i = 0; i < compiler->program->output_signature.element_count; ++i) - { - /* PS outputs other than TARGET have dedicated registers and therefore - * go through spirv_compiler_emit_dcl_output() for now. */ - if (compiler->shader_type == VKD3D_SHADER_TYPE_PIXEL -- && compiler->output_signature.elements[i].sysval_semantic != VKD3D_SHADER_SV_TARGET) -+ && compiler->program->output_signature.elements[i].sysval_semantic != VKD3D_SHADER_SV_TARGET) - continue; - spirv_compiler_emit_output(compiler, VKD3DSPR_OUTPUT, i); - } - -- for (unsigned int i = 0; i < compiler->patch_constant_signature.element_count; ++i) -+ for (unsigned int i = 0; i < compiler->program->patch_constant_signature.element_count; ++i) - { - if (compiler->shader_type == VKD3D_SHADER_TYPE_HULL) - spirv_compiler_emit_output(compiler, VKD3DSPR_PATCHCONST, i); -@@ -11163,11 +11155,12 @@ static void spirv_compiler_emit_io_declarations(struct spirv_compiler *compiler) - - static void spirv_compiler_emit_descriptor_declarations(struct spirv_compiler *compiler) - { -+ const struct vkd3d_shader_scan_descriptor_info1 *descriptors = &compiler->program->descriptors; - unsigned int i; - -- for (i = 0; i < compiler->scan_descriptor_info->descriptor_count; ++i) -+ for (i = 0; i < descriptors->descriptor_count; ++i) - { -- const struct vkd3d_shader_descriptor_info1 *descriptor = &compiler->scan_descriptor_info->descriptors[i]; -+ const struct vkd3d_shader_descriptor_info1 *descriptor = &descriptors->descriptors[i]; - struct vkd3d_shader_register_range range; - - range.first = descriptor->register_index; -@@ -11198,23 +11191,18 @@ static void spirv_compiler_emit_descriptor_declarations(struct spirv_compiler *c - } - } - --static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, struct vsir_program *program, -+static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, - const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *spirv) - { - const struct vkd3d_shader_spirv_target_info *info = compiler->spirv_target_info; - const struct vkd3d_shader_spirv_domain_shader_target_info *ds_info; - struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; -+ const struct vsir_program *program = compiler->program; - struct vkd3d_shader_instruction_array instructions; - enum vkd3d_shader_spirv_environment environment; - enum vkd3d_result result = VKD3D_OK; - unsigned int i, max_element_count; - -- if ((result = vsir_program_transform(program, compiler->config_flags, -- compile_info, compiler->message_context)) < 0) -- return result; -- -- VKD3D_ASSERT(program->normalisation_level == VSIR_NORMALISED_SM6); -- - max_element_count = max(program->output_signature.element_count, program->patch_constant_signature.element_count); - if (!(compiler->output_info = vkd3d_calloc(max_element_count, sizeof(*compiler->output_info)))) - return VKD3D_ERROR_OUT_OF_MEMORY; -@@ -11261,17 +11249,8 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, struct - if (program->block_count && !spirv_compiler_init_blocks(compiler, program->block_count)) - return VKD3D_ERROR_OUT_OF_MEMORY; - -- compiler->program = program; -- - instructions = program->instructions; -- memset(&program->instructions, 0, sizeof(program->instructions)); -- -- compiler->input_signature = program->input_signature; -- compiler->output_signature = program->output_signature; -- compiler->patch_constant_signature = program->patch_constant_signature; -- memset(&program->input_signature, 0, sizeof(program->input_signature)); -- memset(&program->output_signature, 0, sizeof(program->output_signature)); -- memset(&program->patch_constant_signature, 0, sizeof(program->patch_constant_signature)); -+ - compiler->use_vocp = program->use_vocp; - compiler->block_names = program->block_names; - compiler->block_name_count = program->block_name_count; -@@ -11291,8 +11270,6 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, struct - result = spirv_compiler_handle_instruction(compiler, &instructions.elements[i]); - } - -- shader_instruction_array_destroy(&instructions); -- - if (result < 0) - return result; - -@@ -11374,21 +11351,25 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, struct - } - - int spirv_compile(struct vsir_program *program, uint64_t config_flags, -- const struct vkd3d_shader_scan_descriptor_info1 *scan_descriptor_info, - const struct vkd3d_shader_compile_info *compile_info, - struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context) - { - struct spirv_compiler *spirv_compiler; - int ret; - -+ if ((ret = vsir_program_transform(program, config_flags, compile_info, message_context)) < 0) -+ return ret; -+ -+ VKD3D_ASSERT(program->normalisation_level == VSIR_NORMALISED_SM6); -+ - if (!(spirv_compiler = spirv_compiler_create(program, compile_info, -- scan_descriptor_info, message_context, config_flags))) -+ message_context, config_flags))) - { - ERR("Failed to create SPIR-V compiler.\n"); - return VKD3D_ERROR; - } - -- ret = spirv_compiler_generate_spirv(spirv_compiler, program, compile_info, out); -+ ret = spirv_compiler_generate_spirv(spirv_compiler, compile_info, out); - - spirv_compiler_destroy(spirv_compiler); - return ret; -diff --git a/libs/vkd3d/libs/vkd3d-shader/tpf.c b/libs/vkd3d/libs/vkd3d-shader/tpf.c -index f4525009f77..23dab35a288 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/tpf.c -+++ b/libs/vkd3d/libs/vkd3d-shader/tpf.c -@@ -714,6 +714,22 @@ input_primitive_type_table[] = - [VKD3D_SM4_INPUT_PT_TRIANGLEADJ] = {6, VKD3D_PT_TRIANGLELIST_ADJ}, - }; - -+static const enum vkd3d_sm4_input_primitive_type sm4_input_primitive_type_table[] = -+{ -+ [VKD3D_PT_POINTLIST] = VKD3D_SM4_INPUT_PT_POINT, -+ [VKD3D_PT_LINELIST] = VKD3D_SM4_INPUT_PT_LINE, -+ [VKD3D_PT_TRIANGLELIST] = VKD3D_SM4_INPUT_PT_TRIANGLE, -+ [VKD3D_PT_LINELIST_ADJ] = VKD3D_SM4_INPUT_PT_LINEADJ, -+ [VKD3D_PT_TRIANGLELIST_ADJ] = VKD3D_SM4_INPUT_PT_TRIANGLEADJ, -+}; -+ -+static const enum vkd3d_sm4_output_primitive_type sm4_output_primitive_type_table[] = -+{ -+ [VKD3D_PT_POINTLIST] = VKD3D_SM4_OUTPUT_PT_POINTLIST, -+ [VKD3D_PT_LINESTRIP] = VKD3D_SM4_OUTPUT_PT_LINESTRIP, -+ [VKD3D_PT_TRIANGLESTRIP] = VKD3D_SM4_OUTPUT_PT_TRIANGLESTRIP, -+}; -+ - static const enum vkd3d_shader_resource_type resource_type_table[] = - { - /* 0 */ VKD3D_SHADER_RESOURCE_NONE, -@@ -1077,6 +1093,8 @@ static void shader_sm4_read_dcl_output_topology(struct vkd3d_shader_instruction - - if (ins->declaration.primitive_type.type == VKD3D_PT_UNDEFINED) - FIXME("Unhandled output primitive type %#x.\n", primitive_type); -+ -+ priv->p.program->output_topology = ins->declaration.primitive_type.type; - } - - static void shader_sm4_read_dcl_input_primitive(struct vkd3d_shader_instruction *ins, uint32_t opcode, -@@ -1104,6 +1122,8 @@ static void shader_sm4_read_dcl_input_primitive(struct vkd3d_shader_instruction - - if (ins->declaration.primitive_type.type == VKD3D_PT_UNDEFINED) - FIXME("Unhandled input primitive type %#x.\n", primitive_type); -+ -+ program->input_primitive = ins->declaration.primitive_type.type; - } - - static void shader_sm4_read_declaration_count(struct vkd3d_shader_instruction *ins, uint32_t opcode, -@@ -1114,6 +1134,8 @@ static void shader_sm4_read_declaration_count(struct vkd3d_shader_instruction *i - ins->declaration.count = *tokens; - if (opcode == VKD3D_SM4_OP_DCL_TEMPS) - program->temp_count = max(program->temp_count, *tokens); -+ else if (opcode == VKD3D_SM4_OP_DCL_VERTICES_OUT) -+ program->vertices_out_count = *tokens; - } - - static void shader_sm4_read_declaration_dst(struct vkd3d_shader_instruction *ins, uint32_t opcode, -@@ -1721,7 +1743,7 @@ static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup) - {VKD3D_SM5_RT_LOCAL_THREAD_ID, VKD3DSPR_LOCALTHREADID, VKD3D_SM4_SWIZZLE_VEC4}, - {VKD3D_SM5_RT_COVERAGE, VKD3DSPR_COVERAGE, VKD3D_SM4_SWIZZLE_VEC4}, - {VKD3D_SM5_RT_LOCAL_THREAD_INDEX, VKD3DSPR_LOCALTHREADINDEX,VKD3D_SM4_SWIZZLE_VEC4}, -- {VKD3D_SM5_RT_GS_INSTANCE_ID, VKD3DSPR_GSINSTID, VKD3D_SM4_SWIZZLE_VEC4}, -+ {VKD3D_SM5_RT_GS_INSTANCE_ID, VKD3DSPR_GSINSTID, VKD3D_SM4_SWIZZLE_SCALAR}, - {VKD3D_SM5_RT_DEPTHOUT_GREATER_EQUAL, VKD3DSPR_DEPTHOUTGE, VKD3D_SM4_SWIZZLE_VEC4}, - {VKD3D_SM5_RT_DEPTHOUT_LESS_EQUAL, VKD3DSPR_DEPTHOUTLE, VKD3D_SM4_SWIZZLE_VEC4}, - {VKD3D_SM5_RT_OUTPUT_STENCIL_REF, VKD3DSPR_OUTSTENCILREF, VKD3D_SM4_SWIZZLE_VEC4}, -@@ -2991,6 +3013,7 @@ bool sm4_register_from_semantic_name(const struct vkd3d_shader_version *version, - {"sv_primitiveid", false, VKD3D_SHADER_TYPE_DOMAIN, VKD3DSPR_PRIMID, false}, - - {"sv_primitiveid", false, VKD3D_SHADER_TYPE_GEOMETRY, VKD3DSPR_PRIMID, false}, -+ {"sv_gsinstanceid", false, VKD3D_SHADER_TYPE_GEOMETRY, VKD3DSPR_GSINSTID, false}, - - {"sv_outputcontrolpointid", false, VKD3D_SHADER_TYPE_HULL, VKD3DSPR_OUTPOINTID, false}, - {"sv_primitiveid", false, VKD3D_SHADER_TYPE_HULL, VKD3DSPR_PRIMID, false}, -@@ -3071,7 +3094,8 @@ static bool get_insidetessfactor_sysval_semantic(enum vkd3d_shader_sysval_semant - - bool sm4_sysval_semantic_from_semantic_name(enum vkd3d_shader_sysval_semantic *sysval_semantic, - const struct vkd3d_shader_version *version, bool semantic_compat_mapping, enum vkd3d_tessellator_domain domain, -- const char *semantic_name, unsigned int semantic_idx, bool output, bool is_patch_constant_func, bool is_patch) -+ const char *semantic_name, unsigned int semantic_idx, bool output, -+ bool is_patch_constant_func, bool is_primitive) - { - unsigned int i; - -@@ -3095,9 +3119,8 @@ bool sm4_sysval_semantic_from_semantic_name(enum vkd3d_shader_sysval_semantic *s - - {"sv_position", true, VKD3D_SHADER_TYPE_DOMAIN, VKD3D_SHADER_SV_POSITION}, - -- {"position", false, VKD3D_SHADER_TYPE_GEOMETRY, VKD3D_SHADER_SV_POSITION}, -- {"sv_position", false, VKD3D_SHADER_TYPE_GEOMETRY, VKD3D_SHADER_SV_POSITION}, - {"sv_primitiveid", false, VKD3D_SHADER_TYPE_GEOMETRY, VKD3D_SHADER_SV_PRIMITIVE_ID}, -+ {"sv_gsinstanceid", false, VKD3D_SHADER_TYPE_GEOMETRY, ~0u}, - - {"position", true, VKD3D_SHADER_TYPE_GEOMETRY, VKD3D_SHADER_SV_POSITION}, - {"sv_position", true, VKD3D_SHADER_TYPE_GEOMETRY, VKD3D_SHADER_SV_POSITION}, -@@ -3134,7 +3157,7 @@ bool sm4_sysval_semantic_from_semantic_name(enum vkd3d_shader_sysval_semantic *s - }; - bool has_sv_prefix = !ascii_strncasecmp(semantic_name, "sv_", 3); - -- if (is_patch) -+ if (is_primitive) - { - VKD3D_ASSERT(!output); - -@@ -3198,6 +3221,8 @@ bool sm4_sysval_semantic_from_semantic_name(enum vkd3d_shader_sysval_semantic *s - - if (has_sv_prefix) - return false; -+ if (!output && version->type == VKD3D_SHADER_TYPE_GEOMETRY) -+ return false; - - *sysval_semantic = VKD3D_SHADER_SV_NONE; - return true; -@@ -3930,6 +3955,57 @@ static void tpf_write_dcl_tessellator_output_primitive(const struct tpf_compiler - write_sm4_instruction(tpf, &instr); - } - -+static void tpf_write_dcl_input_primitive(const struct tpf_compiler *tpf, enum vkd3d_primitive_type input_primitive, -+ unsigned int patch_vertex_count) -+{ -+ enum vkd3d_sm4_input_primitive_type sm4_input_primitive; -+ struct sm4_instruction instr = -+ { -+ .opcode = VKD3D_SM4_OP_DCL_INPUT_PRIMITIVE, -+ }; -+ -+ if (input_primitive == VKD3D_PT_PATCH) -+ { -+ VKD3D_ASSERT(patch_vertex_count >= 1 && patch_vertex_count <= 32); -+ sm4_input_primitive = VKD3D_SM5_INPUT_PT_PATCH1 + patch_vertex_count - 1; -+ } -+ else -+ { -+ VKD3D_ASSERT(input_primitive < ARRAY_SIZE(sm4_input_primitive_type_table)); -+ sm4_input_primitive = sm4_input_primitive_type_table[input_primitive]; -+ } -+ -+ instr.extra_bits = sm4_input_primitive << VKD3D_SM4_PRIMITIVE_TYPE_SHIFT; -+ -+ write_sm4_instruction(tpf, &instr); -+} -+ -+static void tpf_write_dcl_output_topology(const struct tpf_compiler *tpf, enum vkd3d_primitive_type output_topology) -+{ -+ struct sm4_instruction instr = -+ { -+ .opcode = VKD3D_SM4_OP_DCL_OUTPUT_TOPOLOGY, -+ }; -+ -+ VKD3D_ASSERT(output_topology < ARRAY_SIZE(sm4_output_primitive_type_table)); -+ instr.extra_bits = sm4_output_primitive_type_table[output_topology] << VKD3D_SM4_PRIMITIVE_TYPE_SHIFT; -+ -+ write_sm4_instruction(tpf, &instr); -+} -+ -+static void tpf_write_dcl_vertices_out(const struct tpf_compiler *tpf, unsigned int count) -+{ -+ struct sm4_instruction instr = -+ { -+ .opcode = VKD3D_SM4_OP_DCL_VERTICES_OUT, -+ -+ .idx = {count}, -+ .idx_count = 1, -+ }; -+ -+ write_sm4_instruction(tpf, &instr); -+} -+ - static void tpf_simple_instruction(struct tpf_compiler *tpf, const struct vkd3d_shader_instruction *ins) - { - struct sm4_instruction_modifier *modifier; -@@ -4233,6 +4309,13 @@ static void tpf_write_shdr(struct tpf_compiler *tpf) - tpf_write_dcl_input_control_point_count(tpf, program->input_control_point_count); - tpf_write_dcl_tessellator_domain(tpf, program->tess_domain); - } -+ else if (version->type == VKD3D_SHADER_TYPE_GEOMETRY) -+ { -+ tpf_write_dcl_input_primitive(tpf, program->input_primitive, program->input_control_point_count); -+ if (program->output_topology != VKD3D_PT_UNDEFINED) -+ tpf_write_dcl_output_topology(tpf, program->output_topology); -+ tpf_write_dcl_vertices_out(tpf, program->vertices_out_count); -+ } - - tpf_write_program(tpf, program); - -diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c -index 2a66cbdb1be..2afeff086e5 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c -+++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c -@@ -1569,7 +1569,7 @@ static enum vkd3d_result convert_descriptor_info(struct vkd3d_shader_scan_descri - return VKD3D_OK; - } - --static void vkd3d_shader_free_scan_descriptor_info1(struct vkd3d_shader_scan_descriptor_info1 *scan_descriptor_info) -+void vkd3d_shader_free_scan_descriptor_info1(struct vkd3d_shader_scan_descriptor_info1 *scan_descriptor_info) - { - TRACE("scan_descriptor_info %p.\n", scan_descriptor_info); - -@@ -1577,12 +1577,10 @@ static void vkd3d_shader_free_scan_descriptor_info1(struct vkd3d_shader_scan_des - } - - static int vsir_program_scan(struct vsir_program *program, const struct vkd3d_shader_compile_info *compile_info, -- struct vkd3d_shader_message_context *message_context, -- struct vkd3d_shader_scan_descriptor_info1 *descriptor_info1) -+ struct vkd3d_shader_message_context *message_context, bool add_descriptor_info) - { - struct vkd3d_shader_scan_combined_resource_sampler_info *combined_sampler_info; - struct vkd3d_shader_scan_hull_shader_tessellation_info *tessellation_info; -- struct vkd3d_shader_scan_descriptor_info1 local_descriptor_info1 = {0}; - struct vkd3d_shader_scan_descriptor_info *descriptor_info; - struct vkd3d_shader_scan_signature_info *signature_info; - struct vkd3d_shader_instruction *instruction; -@@ -1591,29 +1589,22 @@ static int vsir_program_scan(struct vsir_program *program, const struct vkd3d_sh - unsigned int i; - - descriptor_info = vkd3d_find_struct(compile_info->next, SCAN_DESCRIPTOR_INFO); -- if (descriptor_info1) -- { -- descriptor_info1->descriptors = NULL; -- descriptor_info1->descriptor_count = 0; -- } -- else if (descriptor_info) -- { -- descriptor_info1 = &local_descriptor_info1; -- } -+ if (descriptor_info) -+ add_descriptor_info = true; -+ - signature_info = vkd3d_find_struct(compile_info->next, SCAN_SIGNATURE_INFO); - - if ((combined_sampler_info = vkd3d_find_struct(compile_info->next, SCAN_COMBINED_RESOURCE_SAMPLER_INFO))) - { - combined_sampler_info->combined_samplers = NULL; - combined_sampler_info->combined_sampler_count = 0; -- if (!descriptor_info1) -- descriptor_info1 = &local_descriptor_info1; -+ add_descriptor_info = true; - } - - tessellation_info = vkd3d_find_struct(compile_info->next, SCAN_HULL_SHADER_TESSELLATION_INFO); - - vkd3d_shader_scan_context_init(&context, &program->shader_version, compile_info, -- descriptor_info1, combined_sampler_info, message_context); -+ add_descriptor_info ? &program->descriptors : NULL, combined_sampler_info, message_context); - - if (TRACE_ON()) - vsir_program_trace(program); -@@ -1653,7 +1644,7 @@ static int vsir_program_scan(struct vsir_program *program, const struct vkd3d_sh - } - - if (!ret && descriptor_info) -- ret = convert_descriptor_info(descriptor_info, descriptor_info1); -+ ret = convert_descriptor_info(descriptor_info, &program->descriptors); - - if (!ret && tessellation_info) - { -@@ -1667,15 +1658,10 @@ static int vsir_program_scan(struct vsir_program *program, const struct vkd3d_sh - vkd3d_shader_free_scan_combined_resource_sampler_info(combined_sampler_info); - if (descriptor_info) - vkd3d_shader_free_scan_descriptor_info(descriptor_info); -- if (descriptor_info1) -- vkd3d_shader_free_scan_descriptor_info1(descriptor_info1); - if (signature_info) - vkd3d_shader_free_scan_signature_info(signature_info); - } -- else -- { -- vkd3d_shader_free_scan_descriptor_info1(&local_descriptor_info1); -- } -+ - vkd3d_shader_scan_context_cleanup(&context); - return ret; - } -@@ -1713,7 +1699,7 @@ int vkd3d_shader_scan(const struct vkd3d_shader_compile_info *compile_info, char - - if (!(ret = vsir_parse(compile_info, config_flags, &message_context, &program))) - { -- ret = vsir_program_scan(&program, compile_info, &message_context, NULL); -+ ret = vsir_program_scan(&program, compile_info, &message_context, false); - vsir_program_cleanup(&program); - } - } -@@ -1730,7 +1716,6 @@ int vsir_program_compile(struct vsir_program *program, uint64_t config_flags, - struct vkd3d_shader_message_context *message_context) - { - struct vkd3d_shader_scan_combined_resource_sampler_info combined_sampler_info; -- struct vkd3d_shader_scan_descriptor_info1 scan_descriptor_info; - struct vkd3d_shader_compile_info scan_info; - int ret; - -@@ -1746,28 +1731,24 @@ int vsir_program_compile(struct vsir_program *program, uint64_t config_flags, - combined_sampler_info.type = VKD3D_SHADER_STRUCTURE_TYPE_SCAN_COMBINED_RESOURCE_SAMPLER_INFO; - combined_sampler_info.next = scan_info.next; - scan_info.next = &combined_sampler_info; -- if ((ret = vsir_program_scan(program, &scan_info, message_context, &scan_descriptor_info)) < 0) -+ if ((ret = vsir_program_scan(program, &scan_info, message_context, true)) < 0) - return ret; -- ret = glsl_compile(program, config_flags, &scan_descriptor_info, -+ ret = glsl_compile(program, config_flags, - &combined_sampler_info, compile_info, out, message_context); - vkd3d_shader_free_scan_combined_resource_sampler_info(&combined_sampler_info); -- vkd3d_shader_free_scan_descriptor_info1(&scan_descriptor_info); - break; - - case VKD3D_SHADER_TARGET_SPIRV_BINARY: - case VKD3D_SHADER_TARGET_SPIRV_TEXT: -- if ((ret = vsir_program_scan(program, &scan_info, message_context, &scan_descriptor_info)) < 0) -+ if ((ret = vsir_program_scan(program, &scan_info, message_context, true)) < 0) - return ret; -- ret = spirv_compile(program, config_flags, &scan_descriptor_info, -- compile_info, out, message_context); -- vkd3d_shader_free_scan_descriptor_info1(&scan_descriptor_info); -+ ret = spirv_compile(program, config_flags, compile_info, out, message_context); - break; - - case VKD3D_SHADER_TARGET_MSL: -- if ((ret = vsir_program_scan(program, &scan_info, message_context, &scan_descriptor_info)) < 0) -+ if ((ret = vsir_program_scan(program, &scan_info, message_context, true)) < 0) - return ret; -- ret = msl_compile(program, config_flags, &scan_descriptor_info, compile_info, out, message_context); -- vkd3d_shader_free_scan_descriptor_info1(&scan_descriptor_info); -+ ret = msl_compile(program, config_flags, compile_info, out, message_context); - break; - - default: -diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -index 3a1b8d8bb64..e794257b9d8 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -+++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -@@ -1422,6 +1422,30 @@ enum vsir_normalisation_level - VSIR_NORMALISED_SM6, - }; - -+struct vkd3d_shader_descriptor_info1 -+{ -+ enum vkd3d_shader_descriptor_type type; -+ unsigned int register_space; -+ unsigned int register_index; -+ unsigned int register_id; -+ enum vkd3d_shader_resource_type resource_type; -+ enum vkd3d_shader_resource_data_type resource_data_type; -+ unsigned int flags; -+ unsigned int sample_count; -+ unsigned int buffer_size; -+ unsigned int structure_stride; -+ unsigned int count; -+ uint32_t uav_flags; -+}; -+ -+struct vkd3d_shader_scan_descriptor_info1 -+{ -+ struct vkd3d_shader_descriptor_info1 *descriptors; -+ unsigned int descriptor_count; -+}; -+ -+void vkd3d_shader_free_scan_descriptor_info1(struct vkd3d_shader_scan_descriptor_info1 *scan_descriptor_info); -+ - struct vsir_program - { - struct vkd3d_shader_version shader_version; -@@ -1431,6 +1455,8 @@ struct vsir_program - struct shader_signature output_signature; - struct shader_signature patch_constant_signature; - -+ struct vkd3d_shader_scan_descriptor_info1 descriptors; -+ - unsigned int parameter_count; - const struct vkd3d_shader_parameter1 *parameters; - bool free_parameters; -@@ -1452,6 +1478,9 @@ struct vsir_program - enum vkd3d_tessellator_domain tess_domain; - enum vkd3d_shader_tessellator_partitioning tess_partitioning; - enum vkd3d_shader_tessellator_output_primitive tess_output_primitive; -+ enum vkd3d_primitive_type input_primitive, output_topology; -+ unsigned int vertices_out_count; -+ - uint32_t io_dcls[VKD3D_BITMAP_SIZE(VKD3DSPR_COUNT)]; - - struct vsir_features features; -@@ -1508,28 +1537,6 @@ void vkd3d_shader_parser_init(struct vkd3d_shader_parser *parser, struct vsir_pr - void vkd3d_shader_parser_warning(struct vkd3d_shader_parser *parser, - enum vkd3d_shader_error error, const char *format, ...) VKD3D_PRINTF_FUNC(3, 4); - --struct vkd3d_shader_descriptor_info1 --{ -- enum vkd3d_shader_descriptor_type type; -- unsigned int register_space; -- unsigned int register_index; -- unsigned int register_id; -- enum vkd3d_shader_resource_type resource_type; -- enum vkd3d_shader_resource_data_type resource_data_type; -- unsigned int flags; -- unsigned int sample_count; -- unsigned int buffer_size; -- unsigned int structure_stride; -- unsigned int count; -- uint32_t uav_flags; --}; -- --struct vkd3d_shader_scan_descriptor_info1 --{ -- struct vkd3d_shader_descriptor_info1 *descriptors; -- unsigned int descriptor_count; --}; -- - void vsir_program_trace(const struct vsir_program *program); - - const char *shader_get_type_prefix(enum vkd3d_shader_type type); -@@ -1650,7 +1657,8 @@ bool sm4_register_from_semantic_name(const struct vkd3d_shader_version *version, - bool shader_sm4_is_scalar_register(const struct vkd3d_shader_register *reg); - bool sm4_sysval_semantic_from_semantic_name(enum vkd3d_shader_sysval_semantic *sysval_semantic, - const struct vkd3d_shader_version *version, bool semantic_compat_mapping, enum vkd3d_tessellator_domain domain, -- const char *semantic_name, unsigned int semantic_idx, bool output, bool is_patch_constant_func, bool is_patch); -+ const char *semantic_name, unsigned int semantic_idx, bool output, -+ bool is_patch_constant_func, bool is_primitive); - - int d3dbc_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t config_flags, - struct vkd3d_shader_message_context *message_context, struct vsir_program *program); -@@ -1673,7 +1681,6 @@ int d3dbc_compile(struct vsir_program *program, uint64_t config_flags, - struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context); - - int glsl_compile(struct vsir_program *program, uint64_t config_flags, -- const struct vkd3d_shader_scan_descriptor_info1 *descriptor_info, - const struct vkd3d_shader_scan_combined_resource_sampler_info *combined_sampler_info, - const struct vkd3d_shader_compile_info *compile_info, - struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context); -@@ -1681,12 +1688,10 @@ int glsl_compile(struct vsir_program *program, uint64_t config_flags, - #define SPIRV_MAX_SRC_COUNT 6 - - int spirv_compile(struct vsir_program *program, uint64_t config_flags, -- const struct vkd3d_shader_scan_descriptor_info1 *scan_descriptor_info, - const struct vkd3d_shader_compile_info *compile_info, - struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context); - - int msl_compile(struct vsir_program *program, uint64_t config_flags, -- const struct vkd3d_shader_scan_descriptor_info1 *descriptor_info, - const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *out, - struct vkd3d_shader_message_context *message_context); - --- -2.47.2 - diff --git a/patches/vkd3d-latest/0005-Updated-vkd3d-to-f576ecc9929dd98c900bb8bc0335b91a1a0.patch b/patches/vkd3d-latest/0005-Updated-vkd3d-to-f576ecc9929dd98c900bb8bc0335b91a1a0.patch deleted file mode 100644 index 4dfedd70..00000000 --- a/patches/vkd3d-latest/0005-Updated-vkd3d-to-f576ecc9929dd98c900bb8bc0335b91a1a0.patch +++ /dev/null @@ -1,1779 +0,0 @@ -From 86412218c44000e79015064e3efe32a3783dab6b Mon Sep 17 00:00:00 2001 -From: Alistair Leslie-Hughes -Date: Fri, 4 Apr 2025 07:59:53 +1100 -Subject: [PATCH] Updated vkd3d to f576ecc9929dd98c900bb8bc0335b91a1a0d3bff. - ---- - libs/vkd3d/libs/vkd3d-shader/d3dbc.c | 32 +- - libs/vkd3d/libs/vkd3d-shader/fx.c | 4 +- - libs/vkd3d/libs/vkd3d-shader/glsl.c | 3 +- - libs/vkd3d/libs/vkd3d-shader/hlsl.c | 38 + - libs/vkd3d/libs/vkd3d-shader/hlsl.h | 3 + - libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 738 ++++++++++++++++-- - libs/vkd3d/libs/vkd3d-shader/ir.c | 187 ++++- - libs/vkd3d/libs/vkd3d-shader/msl.c | 1 + - libs/vkd3d/libs/vkd3d-shader/spirv.c | 1 + - .../libs/vkd3d-shader/vkd3d_shader_main.c | 12 +- - .../libs/vkd3d-shader/vkd3d_shader_private.h | 4 + - 11 files changed, 911 insertions(+), 112 deletions(-) - -diff --git a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -index 58e35cf22e8..b49ef9865db 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -+++ b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -@@ -1759,27 +1759,40 @@ static bool is_inconsequential_instr(const struct vkd3d_shader_instruction *ins) - - static void write_sm1_dst_register(struct vkd3d_bytecode_buffer *buffer, const struct vkd3d_shader_dst_param *reg) - { -+ uint32_t offset = reg->reg.idx_count ? reg->reg.idx[0].offset : 0; -+ - VKD3D_ASSERT(reg->write_mask); - put_u32(buffer, VKD3D_SM1_INSTRUCTION_PARAMETER - | sm1_encode_register_type(®->reg) - | (reg->modifiers << VKD3D_SM1_DST_MODIFIER_SHIFT) - | (reg->write_mask << VKD3D_SM1_WRITEMASK_SHIFT) -- | (reg->reg.idx[0].offset & VKD3D_SM1_REGISTER_NUMBER_MASK)); -+ | (offset & VKD3D_SM1_REGISTER_NUMBER_MASK)); - } - - static void write_sm1_src_register(struct vkd3d_bytecode_buffer *buffer, const struct vkd3d_shader_src_param *reg) - { -+ uint32_t address_mode = VKD3D_SM1_ADDRESS_MODE_ABSOLUTE, offset = 0; -+ -+ if (reg->reg.idx_count) -+ { -+ offset = reg->reg.idx[0].offset; -+ if (reg->reg.idx[0].rel_addr) -+ address_mode = VKD3D_SM1_ADDRESS_MODE_RELATIVE; -+ } -+ - put_u32(buffer, VKD3D_SM1_INSTRUCTION_PARAMETER - | sm1_encode_register_type(®->reg) -+ | (address_mode << VKD3D_SM1_ADDRESS_MODE_SHIFT) - | (reg->modifiers << VKD3D_SM1_SRC_MODIFIER_SHIFT) - | (swizzle_from_vsir(reg->swizzle) << VKD3D_SM1_SWIZZLE_SHIFT) -- | (reg->reg.idx[0].offset & VKD3D_SM1_REGISTER_NUMBER_MASK)); -+ | (offset & VKD3D_SM1_REGISTER_NUMBER_MASK)); - } - - static void d3dbc_write_instruction(struct d3dbc_compiler *d3dbc, const struct vkd3d_shader_instruction *ins) - { - const struct vkd3d_shader_version *version = &d3dbc->program->shader_version; - struct vkd3d_bytecode_buffer *buffer = &d3dbc->buffer; -+ const struct vkd3d_shader_src_param *src; - const struct vkd3d_sm1_opcode_info *info; - unsigned int i; - uint32_t token; -@@ -1810,13 +1823,10 @@ static void d3dbc_write_instruction(struct d3dbc_compiler *d3dbc, const struct v - - for (i = 0; i < ins->src_count; ++i) - { -- if (ins->src[i].reg.idx[0].rel_addr) -- { -- vkd3d_shader_error(d3dbc->message_context, &ins->location, VKD3D_SHADER_ERROR_D3DBC_NOT_IMPLEMENTED, -- "Unhandled relative addressing on source register."); -- d3dbc->failed = true; -- } -- write_sm1_src_register(buffer, &ins->src[i]); -+ src = &ins->src[i]; -+ write_sm1_src_register(buffer, src); -+ if (src->reg.idx_count && src->reg.idx[0].rel_addr) -+ write_sm1_src_register(buffer, src->reg.idx[0].rel_addr); - } - }; - -@@ -1831,6 +1841,7 @@ static void d3dbc_write_vsir_def(struct d3dbc_compiler *d3dbc, const struct vkd3 - .reg.type = VKD3DSPR_CONST, - .write_mask = VKD3DSP_WRITEMASK_ALL, - .reg.idx[0].offset = ins->dst[0].reg.idx[0].offset, -+ .reg.idx_count = 1, - }; - - token = VKD3D_SM1_OP_DEF; -@@ -1863,6 +1874,7 @@ static void d3dbc_write_vsir_sampler_dcl(struct d3dbc_compiler *d3dbc, - reg.reg.type = VKD3DSPR_COMBINED_SAMPLER; - reg.write_mask = VKD3DSP_WRITEMASK_ALL; - reg.reg.idx[0].offset = reg_id; -+ reg.reg.idx_count = 1; - - write_sm1_dst_register(buffer, ®); - } -@@ -1938,6 +1950,7 @@ static void d3dbc_write_vsir_instruction(struct d3dbc_compiler *d3dbc, const str - case VKD3DSIH_MAX: - case VKD3DSIH_MIN: - case VKD3DSIH_MOV: -+ case VKD3DSIH_MOVA: - case VKD3DSIH_MUL: - case VKD3DSIH_SINCOS: - case VKD3DSIH_SLT: -@@ -1982,6 +1995,7 @@ static void d3dbc_write_semantic_dcl(struct d3dbc_compiler *d3dbc, - uint32_t token, usage_idx; - bool ret; - -+ reg.reg.idx_count = 1; - if (sm1_register_from_semantic_name(version, element->semantic_name, - element->semantic_index, output, ®.reg.type, ®.reg.idx[0].offset)) - { -diff --git a/libs/vkd3d/libs/vkd3d-shader/fx.c b/libs/vkd3d/libs/vkd3d-shader/fx.c -index 7a226c1c870..debcb261811 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/fx.c -+++ b/libs/vkd3d/libs/vkd3d-shader/fx.c -@@ -2366,6 +2366,7 @@ static inline bool is_object_fx_type(enum state_property_component_type type) - case FX_BLEND: - case FX_VERTEXSHADER: - case FX_PIXELSHADER: -+ case FX_GEOMETRYSHADER: - return true; - default: - return false; -@@ -2761,7 +2762,8 @@ static void resolve_fx_4_state_block_values(struct hlsl_ir_var *var, struct hlsl - struct hlsl_ir_constant *c = hlsl_ir_constant(node); - struct hlsl_type *data_type = c->node.data_type; - -- if (data_type->class == HLSL_CLASS_SCALAR && data_type->e.numeric.type == HLSL_TYPE_UINT) -+ if (data_type->class == HLSL_CLASS_SCALAR -+ && (data_type->e.numeric.type == HLSL_TYPE_INT || data_type->e.numeric.type == HLSL_TYPE_UINT)) - { - if (c->value.u[0].u != 0) - hlsl_error(ctx, &ctx->location, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, -diff --git a/libs/vkd3d/libs/vkd3d-shader/glsl.c b/libs/vkd3d/libs/vkd3d-shader/glsl.c -index 828a94d77ab..a87ade5e467 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/glsl.c -+++ b/libs/vkd3d/libs/vkd3d-shader/glsl.c -@@ -1296,7 +1296,7 @@ static void shader_glsl_print_sysval_name(struct vkd3d_string_buffer *buffer, st - vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, - "Internal compiler error: Unhandled SV_POSITION index %u.", idx); - if (version->type == VKD3D_SHADER_TYPE_PIXEL) -- vkd3d_string_buffer_printf(buffer, "gl_FragCoord"); -+ vkd3d_string_buffer_printf(buffer, "vec4(gl_FragCoord.xyz, 1.0 / gl_FragCoord.w)"); - else - vkd3d_string_buffer_printf(buffer, "gl_Position"); - break; -@@ -2468,6 +2468,7 @@ int glsl_compile(struct vsir_program *program, uint64_t config_flags, - return ret; - - VKD3D_ASSERT(program->normalisation_level == VSIR_NORMALISED_SM6); -+ VKD3D_ASSERT(program->has_descriptor_info); - - vkd3d_glsl_generator_init(&generator, program, compile_info, - combined_sampler_info, message_context); -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.c b/libs/vkd3d/libs/vkd3d-shader/hlsl.c -index 01586592b25..d1d20b7384c 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.c -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.c -@@ -1588,6 +1588,43 @@ void hlsl_block_add_simple_store(struct hlsl_ctx *ctx, struct hlsl_block *block, - hlsl_block_add_store_index(ctx, block, &lhs_deref, NULL, rhs, 0, &rhs->loc); - } - -+static struct hlsl_ir_node *hlsl_new_store_parent(struct hlsl_ctx *ctx, -+ const struct hlsl_deref *lhs, unsigned int path_len, struct hlsl_ir_node *rhs, -+ unsigned int writemask, const struct vkd3d_shader_location *loc) -+{ -+ struct hlsl_ir_store *store; -+ -+ VKD3D_ASSERT(!hlsl_deref_is_lowered(lhs)); -+ VKD3D_ASSERT(lhs->path_len >= path_len); -+ -+ if (!(store = hlsl_alloc(ctx, sizeof(*store)))) -+ return NULL; -+ init_node(&store->node, HLSL_IR_STORE, NULL, loc); -+ -+ if (!hlsl_init_deref(ctx, &store->lhs, lhs->var, path_len)) -+ { -+ vkd3d_free(store); -+ return NULL; -+ } -+ for (unsigned int i = 0; i < path_len; ++i) -+ hlsl_src_from_node(&store->lhs.path[i], lhs->path[i].node); -+ -+ hlsl_src_from_node(&store->rhs, rhs); -+ -+ if (!writemask && type_is_single_reg(rhs->data_type)) -+ writemask = (1 << rhs->data_type->e.numeric.dimx) - 1; -+ store->writemask = writemask; -+ -+ return &store->node; -+} -+ -+void hlsl_block_add_store_parent(struct hlsl_ctx *ctx, struct hlsl_block *block, -+ const struct hlsl_deref *lhs, unsigned int path_len, struct hlsl_ir_node *rhs, -+ unsigned int writemask, const struct vkd3d_shader_location *loc) -+{ -+ append_new_instr(ctx, block, hlsl_new_store_parent(ctx, lhs, path_len, rhs, writemask, loc)); -+} -+ - void hlsl_block_add_store_component(struct hlsl_ctx *ctx, struct hlsl_block *block, - const struct hlsl_deref *lhs, unsigned int comp, struct hlsl_ir_node *rhs) - { -@@ -4957,6 +4994,7 @@ int hlsl_compile_shader(const struct vkd3d_shader_code *hlsl, const struct vkd3d - - if (target_type == VKD3D_SHADER_TARGET_SPIRV_BINARY - || target_type == VKD3D_SHADER_TARGET_SPIRV_TEXT -+ || target_type == VKD3D_SHADER_TARGET_GLSL - || target_type == VKD3D_SHADER_TARGET_D3D_ASM) - { - uint64_t config_flags = vkd3d_shader_init_config_flags(); -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h -index 1d78c5622de..fafa5740963 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.h -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h -@@ -1565,6 +1565,9 @@ void hlsl_block_add_store_component(struct hlsl_ctx *ctx, struct hlsl_block *blo - void hlsl_block_add_store_index(struct hlsl_ctx *ctx, struct hlsl_block *block, - const struct hlsl_deref *lhs, struct hlsl_ir_node *idx, struct hlsl_ir_node *rhs, - unsigned int writemask, const struct vkd3d_shader_location *loc); -+void hlsl_block_add_store_parent(struct hlsl_ctx *ctx, struct hlsl_block *block, -+ const struct hlsl_deref *lhs, unsigned int path_len, struct hlsl_ir_node *rhs, -+ unsigned int writemask, const struct vkd3d_shader_location *loc); - struct hlsl_ir_node *hlsl_block_add_swizzle(struct hlsl_ctx *ctx, struct hlsl_block *block, uint32_t s, - unsigned int width, struct hlsl_ir_node *val, const struct vkd3d_shader_location *loc); - struct hlsl_ir_node *hlsl_block_add_uint_constant(struct hlsl_ctx *ctx, struct hlsl_block *block, -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -index 8fcf6e6ac54..ba56ba90403 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -@@ -1916,12 +1916,6 @@ static bool copy_propagation_replace_with_deref(struct hlsl_ctx *ctx, - if (!nonconst_index_from_deref(ctx, deref, &nonconst_i, &base, &scale, &count)) - return false; - -- if (hlsl_version_lt(ctx, 4, 0)) -- { -- TRACE("Non-constant index propagation is not yet supported for SM1.\n"); -- return false; -- } -- - VKD3D_ASSERT(count); - - hlsl_block_init(&block); -@@ -1950,6 +1944,12 @@ static bool copy_propagation_replace_with_deref(struct hlsl_ctx *ctx, - else if (x != idx->src.var) - goto done; - -+ if (hlsl_version_lt(ctx, 4, 0) && x->is_uniform && ctx->profile->type != VKD3D_SHADER_TYPE_VERTEX) -+ { -+ TRACE("Skipping propagating non-constant deref to SM1 uniform %s.\n", var->name); -+ goto done; -+ } -+ - if (i == 0) - { - path_len = idx->src.path_len; -@@ -2184,6 +2184,9 @@ static bool copy_propagation_transform_object_load(struct hlsl_ctx *ctx, - return false; - VKD3D_ASSERT(value->component == 0); - -+ /* A uniform object should have never been written to. */ -+ VKD3D_ASSERT(!deref->var->is_uniform); -+ - /* Only HLSL_IR_LOAD can produce an object. */ - load = hlsl_ir_load(value->node); - -@@ -2488,6 +2491,554 @@ enum validation_result - DEREF_VALIDATION_NOT_CONSTANT, - }; - -+struct vectorize_exprs_state -+{ -+ struct vectorizable_exprs_group -+ { -+ struct hlsl_block *block; -+ struct hlsl_ir_expr *exprs[4]; -+ uint8_t expr_count, component_count; -+ } *groups; -+ size_t count, capacity; -+}; -+ -+static bool is_same_vectorizable_source(struct hlsl_ir_node *a, struct hlsl_ir_node *b) -+{ -+ /* TODO: We can also vectorize different constants. */ -+ -+ if (a->type == HLSL_IR_SWIZZLE) -+ a = hlsl_ir_swizzle(a)->val.node; -+ if (b->type == HLSL_IR_SWIZZLE) -+ b = hlsl_ir_swizzle(b)->val.node; -+ -+ return a == b; -+} -+ -+static bool is_same_vectorizable_expr(struct hlsl_ir_expr *a, struct hlsl_ir_expr *b) -+{ -+ if (a->op != b->op) -+ return false; -+ -+ for (size_t j = 0; j < HLSL_MAX_OPERANDS; ++j) -+ { -+ if (!a->operands[j].node) -+ break; -+ if (!is_same_vectorizable_source(a->operands[j].node, b->operands[j].node)) -+ return false; -+ } -+ -+ return true; -+} -+ -+static void record_vectorizable_expr(struct hlsl_ctx *ctx, struct hlsl_block *block, -+ struct hlsl_ir_expr *expr, struct vectorize_exprs_state *state) -+{ -+ if (expr->node.data_type->class > HLSL_CLASS_VECTOR) -+ return; -+ -+ /* These are the only current ops that are not per-component. */ -+ if (expr->op == HLSL_OP1_COS_REDUCED || expr->op == HLSL_OP1_SIN_REDUCED -+ || expr->op == HLSL_OP2_DOT || expr->op == HLSL_OP3_DP2ADD) -+ return; -+ -+ for (size_t i = 0; i < state->count; ++i) -+ { -+ struct vectorizable_exprs_group *group = &state->groups[i]; -+ struct hlsl_ir_expr *other = group->exprs[0]; -+ -+ /* These are SSA instructions, which means they have the same value -+ * regardless of what block they're in. However, being in different -+ * blocks may mean that one expression or the other is not always -+ * executed. */ -+ -+ if (expr->node.data_type->e.numeric.dimx + group->component_count <= 4 -+ && group->block == block -+ && is_same_vectorizable_expr(expr, other)) -+ { -+ group->exprs[group->expr_count++] = expr; -+ group->component_count += expr->node.data_type->e.numeric.dimx; -+ return; -+ } -+ } -+ -+ if (!hlsl_array_reserve(ctx, (void **)&state->groups, -+ &state->capacity, state->count + 1, sizeof(*state->groups))) -+ return; -+ state->groups[state->count].block = block; -+ state->groups[state->count].exprs[0] = expr; -+ state->groups[state->count].expr_count = 1; -+ state->groups[state->count].component_count = expr->node.data_type->e.numeric.dimx; -+ ++state->count; -+} -+ -+static void find_vectorizable_expr_groups(struct hlsl_ctx *ctx, struct hlsl_block *block, -+ struct vectorize_exprs_state *state) -+{ -+ struct hlsl_ir_node *instr; -+ -+ LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry) -+ { -+ if (instr->type == HLSL_IR_EXPR) -+ { -+ record_vectorizable_expr(ctx, block, hlsl_ir_expr(instr), state); -+ } -+ else if (instr->type == HLSL_IR_IF) -+ { -+ struct hlsl_ir_if *iff = hlsl_ir_if(instr); -+ -+ find_vectorizable_expr_groups(ctx, &iff->then_block, state); -+ find_vectorizable_expr_groups(ctx, &iff->else_block, state); -+ } -+ else if (instr->type == HLSL_IR_LOOP) -+ { -+ find_vectorizable_expr_groups(ctx, &hlsl_ir_loop(instr)->body, state); -+ } -+ else if (instr->type == HLSL_IR_SWITCH) -+ { -+ struct hlsl_ir_switch *s = hlsl_ir_switch(instr); -+ struct hlsl_ir_switch_case *c; -+ -+ LIST_FOR_EACH_ENTRY(c, &s->cases, struct hlsl_ir_switch_case, entry) -+ find_vectorizable_expr_groups(ctx, &c->body, state); -+ } -+ } -+} -+ -+/* Combine sequences like -+ * -+ * 3: @1.x -+ * 4: @2.x -+ * 5: @3 * @4 -+ * 6: @1.y -+ * 7: @2.x -+ * 8: @6 * @7 -+ * -+ * into -+ * -+ * 5_1: @1.xy -+ * 5_2: @2.xx -+ * 5_3: @5_1 * @5_2 -+ * 5: @5_3.x -+ * 8: @5_3.y -+ * -+ * Each operand to an expression needs to refer to the same ultimate source -+ * (in this case @1 and @2 respectively), but can be a swizzle thereof. -+ * -+ * In practice the swizzles @5 and @8 can generally then be vectorized again, -+ * either as part of another expression, or as part of a store. -+ */ -+static bool vectorize_exprs(struct hlsl_ctx *ctx, struct hlsl_block *block) -+{ -+ struct vectorize_exprs_state state = {0}; -+ bool progress = false; -+ -+ find_vectorizable_expr_groups(ctx, block, &state); -+ -+ for (unsigned int i = 0; i < state.count; ++i) -+ { -+ struct vectorizable_exprs_group *group = &state.groups[i]; -+ struct hlsl_ir_node *args[HLSL_MAX_OPERANDS] = {0}; -+ uint32_t swizzles[HLSL_MAX_OPERANDS] = {0}; -+ struct hlsl_ir_node *arg, *combined; -+ unsigned int component_count = 0; -+ struct hlsl_type *combined_type; -+ struct hlsl_block new_block; -+ struct hlsl_ir_expr *expr; -+ -+ if (group->expr_count == 1) -+ continue; -+ -+ hlsl_block_init(&new_block); -+ -+ for (unsigned int j = 0; j < group->expr_count; ++j) -+ { -+ expr = group->exprs[j]; -+ -+ for (unsigned int a = 0; a < HLSL_MAX_OPERANDS; ++a) -+ { -+ uint32_t arg_swizzle; -+ -+ if (!(arg = expr->operands[a].node)) -+ break; -+ -+ if (arg->type == HLSL_IR_SWIZZLE) -+ arg_swizzle = hlsl_ir_swizzle(arg)->u.vector; -+ else -+ arg_swizzle = HLSL_SWIZZLE(X, Y, Z, W); -+ -+ /* Mask out the invalid components. */ -+ arg_swizzle &= (1u << VKD3D_SHADER_SWIZZLE_SHIFT(arg->data_type->e.numeric.dimx)) - 1; -+ swizzles[a] |= arg_swizzle << VKD3D_SHADER_SWIZZLE_SHIFT(component_count); -+ } -+ -+ component_count += expr->node.data_type->e.numeric.dimx; -+ } -+ -+ expr = group->exprs[0]; -+ for (unsigned int a = 0; a < HLSL_MAX_OPERANDS; ++a) -+ { -+ if (!(arg = expr->operands[a].node)) -+ break; -+ if (arg->type == HLSL_IR_SWIZZLE) -+ arg = hlsl_ir_swizzle(arg)->val.node; -+ args[a] = hlsl_block_add_swizzle(ctx, &new_block, swizzles[a], component_count, arg, &arg->loc); -+ } -+ -+ combined_type = hlsl_get_vector_type(ctx, expr->node.data_type->e.numeric.type, component_count); -+ combined = hlsl_block_add_expr(ctx, &new_block, expr->op, args, combined_type, &expr->node.loc); -+ -+ list_move_before(&expr->node.entry, &new_block.instrs); -+ -+ TRACE("Combining %u %s instructions into %p.\n", group->expr_count, -+ debug_hlsl_expr_op(group->exprs[0]->op), combined); -+ -+ component_count = 0; -+ for (unsigned int j = 0; j < group->expr_count; ++j) -+ { -+ struct hlsl_ir_node *replacement; -+ -+ expr = group->exprs[j]; -+ -+ if (!(replacement = hlsl_new_swizzle(ctx, -+ HLSL_SWIZZLE(X, Y, Z, W) >> VKD3D_SHADER_SWIZZLE_SHIFT(component_count), -+ expr->node.data_type->e.numeric.dimx, combined, &expr->node.loc))) -+ goto out; -+ component_count += expr->node.data_type->e.numeric.dimx; -+ list_add_before(&expr->node.entry, &replacement->entry); -+ hlsl_replace_node(&expr->node, replacement); -+ } -+ -+ progress = true; -+ } -+ -+out: -+ vkd3d_free(state.groups); -+ return progress; -+} -+ -+struct vectorize_stores_state -+{ -+ struct vectorizable_stores_group -+ { -+ struct hlsl_block *block; -+ /* We handle overlapping stores, because it's not really easier not to. -+ * In theory, then, we could collect an arbitrary number of stores here. -+ * -+ * In practice, overlapping stores are unlikely, and of course at most -+ * 4 stores can appear without overlap. Therefore, for simplicity, we -+ * just use a fixed array of 4. -+ * -+ * Since computing the writemask requires traversing the deref, and we -+ * need to do that anyway, we store it here for convenience. */ -+ struct hlsl_ir_store *stores[4]; -+ unsigned int path_len; -+ uint8_t writemasks[4]; -+ uint8_t store_count; -+ bool dirty; -+ } *groups; -+ size_t count, capacity; -+}; -+ -+/* This must be a store to a subsection of a vector. -+ * In theory we can also vectorize stores to packed struct fields, -+ * but this requires target-specific knowledge and is probably best left -+ * to a VSIR pass. */ -+static bool can_vectorize_store(struct hlsl_ctx *ctx, struct hlsl_ir_store *store, -+ unsigned int *path_len, uint8_t *writemask) -+{ -+ struct hlsl_type *type = store->lhs.var->data_type; -+ unsigned int i; -+ -+ if (store->rhs.node->data_type->class > HLSL_CLASS_VECTOR) -+ return false; -+ -+ if (type->class == HLSL_CLASS_SCALAR) -+ return false; -+ -+ for (i = 0; type->class != HLSL_CLASS_VECTOR && i < store->lhs.path_len; ++i) -+ type = hlsl_get_element_type_from_path_index(ctx, type, store->lhs.path[i].node); -+ -+ if (type->class != HLSL_CLASS_VECTOR) -+ return false; -+ -+ *path_len = i; -+ -+ if (i < store->lhs.path_len) -+ { -+ struct hlsl_ir_constant *c; -+ -+ /* This is a store to a scalar component of a vector, achieved via -+ * indexing. */ -+ -+ if (store->lhs.path[i].node->type != HLSL_IR_CONSTANT) -+ return false; -+ c = hlsl_ir_constant(store->lhs.path[i].node); -+ *writemask = (1u << c->value.u[0].u); -+ } -+ else -+ { -+ *writemask = store->writemask; -+ } -+ -+ return true; -+} -+ -+static bool derefs_are_same_vector(struct hlsl_ctx *ctx, const struct hlsl_deref *a, const struct hlsl_deref *b) -+{ -+ struct hlsl_type *type = a->var->data_type; -+ -+ if (a->var != b->var) -+ return false; -+ -+ for (unsigned int i = 0; type->class != HLSL_CLASS_VECTOR && i < a->path_len && i < b->path_len; ++i) -+ { -+ if (a->path[i].node != b->path[i].node) -+ return false; -+ type = hlsl_get_element_type_from_path_index(ctx, type, a->path[i].node); -+ } -+ -+ return true; -+} -+ -+static void record_vectorizable_store(struct hlsl_ctx *ctx, struct hlsl_block *block, -+ struct hlsl_ir_store *store, struct vectorize_stores_state *state) -+{ -+ unsigned int path_len; -+ uint8_t writemask; -+ -+ if (!can_vectorize_store(ctx, store, &path_len, &writemask)) -+ { -+ /* In the case of a dynamically indexed vector, we must invalidate -+ * any groups that statically index the same vector. -+ * For the sake of expediency, we go one step further and invalidate -+ * any groups that store to the same variable. -+ * (We also don't check that that was the reason why this store isn't -+ * vectorizable.) -+ * We could be more granular, but we'll defer that until it comes -+ * up in practice. */ -+ for (size_t i = 0; i < state->count; ++i) -+ { -+ if (state->groups[i].stores[0]->lhs.var == store->lhs.var) -+ state->groups[i].dirty = true; -+ } -+ return; -+ } -+ -+ for (size_t i = 0; i < state->count; ++i) -+ { -+ struct vectorizable_stores_group *group = &state->groups[i]; -+ struct hlsl_ir_store *other = group->stores[0]; -+ -+ if (group->dirty) -+ continue; -+ -+ if (derefs_are_same_vector(ctx, &store->lhs, &other->lhs)) -+ { -+ /* Stores must be in the same CFG block. If they're not, -+ * they're not executed in exactly the same flow, and -+ * therefore can't be vectorized. */ -+ if (group->block == block -+ && is_same_vectorizable_source(store->rhs.node, other->rhs.node)) -+ { -+ if (group->store_count < ARRAY_SIZE(group->stores)) -+ { -+ group->stores[group->store_count] = store; -+ group->writemasks[group->store_count] = writemask; -+ ++group->store_count; -+ return; -+ } -+ } -+ else -+ { -+ /* A store to the same vector with a different source, or in -+ * a different CFG block, invalidates any earlier store. -+ * -+ * A store to a component which *contains* the vector in -+ * question would also invalidate, but we should have split all -+ * of those by the time we get here. */ -+ group->dirty = true; -+ -+ /* Note that we do exit this loop early if we find a store A we -+ * can vectorize with, but that's fine. If there was a store B -+ * also in the state that we can't vectorize with, it would -+ * already have invalidated A. */ -+ } -+ } -+ else -+ { -+ /* This could still be a store to the same vector, if e.g. the -+ * vector is part of a dynamically indexed array, or the path has -+ * two equivalent instructions which refer to the same component. -+ * [CSE may help with the latter, but we don't have it yet, -+ * and we shouldn't depend on it anyway.] -+ * For the sake of expediency, we just invalidate it if it refers -+ * to the same variable at all. -+ * As above, we could be more granular, but we'll defer that until -+ * it comes up in practice. */ -+ if (store->lhs.var == other->lhs.var) -+ group->dirty = true; -+ -+ /* As above, we don't need to worry about exiting the loop early. */ -+ } -+ } -+ -+ if (!hlsl_array_reserve(ctx, (void **)&state->groups, -+ &state->capacity, state->count + 1, sizeof(*state->groups))) -+ return; -+ state->groups[state->count].block = block; -+ state->groups[state->count].stores[0] = store; -+ state->groups[state->count].path_len = path_len; -+ state->groups[state->count].writemasks[0] = writemask; -+ state->groups[state->count].store_count = 1; -+ state->groups[state->count].dirty = false; -+ ++state->count; -+} -+ -+static void find_vectorizable_store_groups(struct hlsl_ctx *ctx, struct hlsl_block *block, -+ struct vectorize_stores_state *state) -+{ -+ struct hlsl_ir_node *instr; -+ -+ LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry) -+ { -+ if (instr->type == HLSL_IR_STORE) -+ { -+ record_vectorizable_store(ctx, block, hlsl_ir_store(instr), state); -+ } -+ else if (instr->type == HLSL_IR_LOAD) -+ { -+ struct hlsl_ir_var *var = hlsl_ir_load(instr)->src.var; -+ -+ /* By vectorizing store A with store B, we are effectively moving -+ * store A down to happen at the same time as store B. -+ * If there was a load of the same variable between the two, this -+ * would be incorrect. -+ * Therefore invalidate all stores to this variable. As above, we -+ * could be more granular if necessary. */ -+ -+ for (unsigned int i = 0; i < state->count; ++i) -+ { -+ if (state->groups[i].stores[0]->lhs.var == var) -+ state->groups[i].dirty = true; -+ } -+ } -+ else if (instr->type == HLSL_IR_IF) -+ { -+ struct hlsl_ir_if *iff = hlsl_ir_if(instr); -+ -+ find_vectorizable_store_groups(ctx, &iff->then_block, state); -+ find_vectorizable_store_groups(ctx, &iff->else_block, state); -+ } -+ else if (instr->type == HLSL_IR_LOOP) -+ { -+ find_vectorizable_store_groups(ctx, &hlsl_ir_loop(instr)->body, state); -+ } -+ else if (instr->type == HLSL_IR_SWITCH) -+ { -+ struct hlsl_ir_switch *s = hlsl_ir_switch(instr); -+ struct hlsl_ir_switch_case *c; -+ -+ LIST_FOR_EACH_ENTRY(c, &s->cases, struct hlsl_ir_switch_case, entry) -+ find_vectorizable_store_groups(ctx, &c->body, state); -+ } -+ } -+} -+ -+/* Combine sequences like -+ * -+ * 2: @1.yw -+ * 3: @1.zy -+ * 4: var.xy = @2 -+ * 5: var.yw = @3 -+ * -+ * to -+ * -+ * 2: @1.yzy -+ * 5: var.xyw = @2 -+ * -+ * There are a lot of gotchas here. We need to make sure the two stores are to -+ * the same vector (which may be embedded in a complex variable), that they're -+ * always executed in the same control flow, and that there aren't any other -+ * stores or loads on the same vector in the middle. */ -+static bool vectorize_stores(struct hlsl_ctx *ctx, struct hlsl_block *block) -+{ -+ struct vectorize_stores_state state = {0}; -+ bool progress = false; -+ -+ find_vectorizable_store_groups(ctx, block, &state); -+ -+ for (unsigned int i = 0; i < state.count; ++i) -+ { -+ struct vectorizable_stores_group *group = &state.groups[i]; -+ uint32_t new_swizzle = 0, new_writemask = 0; -+ struct hlsl_ir_node *new_rhs, *value; -+ uint32_t swizzle_components[4]; -+ unsigned int component_count; -+ struct hlsl_ir_store *store; -+ struct hlsl_block new_block; -+ -+ if (group->store_count == 1) -+ continue; -+ -+ hlsl_block_init(&new_block); -+ -+ /* Compute the swizzle components. */ -+ for (unsigned int j = 0; j < group->store_count; ++j) -+ { -+ unsigned int writemask = group->writemasks[j]; -+ uint32_t rhs_swizzle; -+ -+ store = group->stores[j]; -+ -+ if (store->rhs.node->type == HLSL_IR_SWIZZLE) -+ rhs_swizzle = hlsl_ir_swizzle(store->rhs.node)->u.vector; -+ else -+ rhs_swizzle = HLSL_SWIZZLE(X, Y, Z, W); -+ -+ component_count = 0; -+ for (unsigned int k = 0; k < 4; ++k) -+ { -+ if (writemask & (1u << k)) -+ swizzle_components[k] = hlsl_swizzle_get_component(rhs_swizzle, component_count++); -+ } -+ -+ new_writemask |= writemask; -+ } -+ -+ /* Construct the new swizzle. */ -+ component_count = 0; -+ for (unsigned int k = 0; k < 4; ++k) -+ { -+ if (new_writemask & (1u << k)) -+ hlsl_swizzle_set_component(&new_swizzle, component_count++, swizzle_components[k]); -+ } -+ -+ store = group->stores[0]; -+ value = store->rhs.node; -+ if (value->type == HLSL_IR_SWIZZLE) -+ value = hlsl_ir_swizzle(value)->val.node; -+ -+ new_rhs = hlsl_block_add_swizzle(ctx, &new_block, new_swizzle, component_count, value, &value->loc); -+ hlsl_block_add_store_parent(ctx, &new_block, &store->lhs, -+ group->path_len, new_rhs, new_writemask, &store->node.loc); -+ -+ TRACE("Combining %u stores to %s.\n", group->store_count, store->lhs.var->name); -+ -+ list_move_before(&group->stores[group->store_count - 1]->node.entry, &new_block.instrs); -+ -+ for (unsigned int j = 0; j < group->store_count; ++j) -+ { -+ list_remove(&group->stores[j]->node.entry); -+ hlsl_free_instr(&group->stores[j]->node); -+ } -+ -+ progress = true; -+ } -+ -+ vkd3d_free(state.groups); -+ return progress; -+} -+ - static enum validation_result validate_component_index_range_from_deref(struct hlsl_ctx *ctx, - const struct hlsl_deref *deref) - { -@@ -3123,6 +3674,11 @@ static bool validate_nonconstant_vector_store_derefs(struct hlsl_ctx *ctx, struc - return false; - } - -+static bool deref_supports_sm1_indirect_addressing(struct hlsl_ctx *ctx, const struct hlsl_deref *deref) -+{ -+ return ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX && deref->var->is_uniform; -+} -+ - /* This pass flattens array (and row_major matrix) loads that include the indexing of a non-constant - * index into multiple constant loads, where the value of only one of them ends up in the resulting - * node. -@@ -3149,6 +3705,9 @@ static bool lower_nonconstant_array_loads(struct hlsl_ctx *ctx, struct hlsl_ir_n - if (deref->path_len == 0) - return false; - -+ if (deref_supports_sm1_indirect_addressing(ctx, deref)) -+ return false; -+ - for (i = deref->path_len - 1; ; --i) - { - if (deref->path[i].node->type != HLSL_IR_CONSTANT) -@@ -7839,7 +8398,8 @@ static bool sm4_generate_vsir_init_src_param_from_deref(struct hlsl_ctx *ctx, st - - if (!sm4_generate_vsir_reg_from_deref(ctx, program, &src_param->reg, &writemask, deref)) - return false; -- src_param->swizzle = generate_vsir_get_src_swizzle(writemask, dst_writemask); -+ if (src_param->reg.dimension != VSIR_DIMENSION_NONE) -+ src_param->swizzle = generate_vsir_get_src_swizzle(writemask, dst_writemask); - return true; - } - -@@ -7869,7 +8429,6 @@ static void sm1_generate_vsir_instr_constant(struct hlsl_ctx *ctx, - struct vsir_program *program, struct hlsl_ir_constant *constant) - { - struct hlsl_ir_node *instr = &constant->node; -- struct vkd3d_shader_dst_param *dst_param; - struct vkd3d_shader_src_param *src_param; - struct vkd3d_shader_instruction *ins; - -@@ -7881,13 +8440,11 @@ static void sm1_generate_vsir_instr_constant(struct hlsl_ctx *ctx, - - src_param = &ins->src[0]; - vsir_register_init(&src_param->reg, VKD3DSPR_CONST, VKD3D_DATA_FLOAT, 1); -+ src_param->reg.dimension = VSIR_DIMENSION_VEC4; - src_param->reg.idx[0].offset = constant->reg.id; - src_param->swizzle = generate_vsir_get_src_swizzle(constant->reg.writemask, instr->reg.writemask); - -- dst_param = &ins->dst[0]; -- vsir_register_init(&dst_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); -- dst_param->reg.idx[0].offset = instr->reg.id; -- dst_param->write_mask = instr->reg.writemask; -+ vsir_dst_from_hlsl_node(&ins->dst[0], ctx, instr); - } - - static void sm4_generate_vsir_rasterizer_sample_count(struct hlsl_ctx *ctx, -@@ -7974,11 +8531,13 @@ static void sm1_generate_vsir_instr_expr_per_component_instr_op(struct hlsl_ctx - dst_param = &ins->dst[0]; - vsir_register_init(&dst_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); - dst_param->reg.idx[0].offset = instr->reg.id; -+ dst_param->reg.dimension = VSIR_DIMENSION_VEC4; - dst_param->write_mask = 1u << i; - - src_param = &ins->src[0]; - vsir_register_init(&src_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); - src_param->reg.idx[0].offset = operand->reg.id; -+ src_param->reg.dimension = VSIR_DIMENSION_VEC4; - c = vsir_swizzle_get_component(src_swizzle, i); - src_param->swizzle = vsir_swizzle_from_writemask(1u << c); - } -@@ -7990,7 +8549,6 @@ static void sm1_generate_vsir_instr_expr_sincos(struct hlsl_ctx *ctx, struct vsi - { - struct hlsl_ir_node *operand = expr->operands[0].node; - struct hlsl_ir_node *instr = &expr->node; -- struct vkd3d_shader_dst_param *dst_param; - struct vkd3d_shader_src_param *src_param; - struct vkd3d_shader_instruction *ins; - unsigned int src_count = 0; -@@ -8001,25 +8559,20 @@ static void sm1_generate_vsir_instr_expr_sincos(struct hlsl_ctx *ctx, struct vsi - if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_SINCOS, 1, src_count))) - return; - -- dst_param = &ins->dst[0]; -- vsir_register_init(&dst_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); -- dst_param->reg.idx[0].offset = instr->reg.id; -- dst_param->write_mask = instr->reg.writemask; -- -- src_param = &ins->src[0]; -- vsir_register_init(&src_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); -- src_param->reg.idx[0].offset = operand->reg.id; -- src_param->swizzle = generate_vsir_get_src_swizzle(operand->reg.writemask, VKD3DSP_WRITEMASK_ALL); -+ vsir_dst_from_hlsl_node(&ins->dst[0], ctx, instr); -+ vsir_src_from_hlsl_node(&ins->src[0], ctx, operand, VKD3DSP_WRITEMASK_ALL); - - if (ctx->profile->major_version < 3) - { - src_param = &ins->src[1]; - vsir_register_init(&src_param->reg, VKD3DSPR_CONST, VKD3D_DATA_FLOAT, 1); -+ src_param->reg.dimension = VSIR_DIMENSION_VEC4; - src_param->reg.idx[0].offset = ctx->d3dsincosconst1.id; - src_param->swizzle = VKD3D_SHADER_NO_SWIZZLE; - - src_param = &ins->src[2]; - vsir_register_init(&src_param->reg, VKD3DSPR_CONST, VKD3D_DATA_FLOAT, 1); -+ src_param->reg.dimension = VSIR_DIMENSION_VEC4; - src_param->reg.idx[0].offset = ctx->d3dsincosconst2.id; - src_param->swizzle = VKD3D_SHADER_NO_SWIZZLE; - } -@@ -8341,19 +8894,68 @@ static void sm1_generate_vsir_init_dst_param_from_deref(struct hlsl_ctx *ctx, - else - VKD3D_ASSERT(reg.allocated); - -- vsir_register_init(&dst_param->reg, type, VKD3D_DATA_FLOAT, 1); -+ if (type == VKD3DSPR_DEPTHOUT) -+ { -+ vsir_register_init(&dst_param->reg, type, VKD3D_DATA_FLOAT, 0); -+ dst_param->reg.dimension = VSIR_DIMENSION_SCALAR; -+ } -+ else -+ { -+ vsir_register_init(&dst_param->reg, type, VKD3D_DATA_FLOAT, 1); -+ dst_param->reg.idx[0].offset = register_index; -+ dst_param->reg.dimension = VSIR_DIMENSION_VEC4; -+ } - dst_param->write_mask = writemask; -- dst_param->reg.idx[0].offset = register_index; - - if (deref->rel_offset.node) - hlsl_fixme(ctx, loc, "Translate relative addressing on dst register for vsir."); - } - -+static void sm1_generate_vsir_instr_mova(struct hlsl_ctx *ctx, -+ struct vsir_program *program, struct hlsl_ir_node *instr) -+{ -+ enum vkd3d_shader_opcode opcode = hlsl_version_ge(ctx, 2, 0) ? VKD3DSIH_MOVA : VKD3DSIH_MOV; -+ struct vkd3d_shader_dst_param *dst_param; -+ struct vkd3d_shader_instruction *ins; -+ -+ VKD3D_ASSERT(instr->reg.allocated); -+ -+ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, opcode, 1, 1))) -+ return; -+ -+ dst_param = &ins->dst[0]; -+ vsir_register_init(&dst_param->reg, VKD3DSPR_ADDR, VKD3D_DATA_FLOAT, 0); -+ dst_param->write_mask = VKD3DSP_WRITEMASK_0; -+ -+ VKD3D_ASSERT(instr->data_type->class <= HLSL_CLASS_VECTOR); -+ VKD3D_ASSERT(instr->data_type->e.numeric.dimx == 1); -+ vsir_src_from_hlsl_node(&ins->src[0], ctx, instr, VKD3DSP_WRITEMASK_ALL); -+} -+ -+static struct vkd3d_shader_src_param *sm1_generate_vsir_new_address_src(struct hlsl_ctx *ctx, -+ struct vsir_program *program) -+{ -+ struct vkd3d_shader_src_param *idx_src; -+ -+ if (!(idx_src = vsir_program_get_src_params(program, 1))) -+ { -+ ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; -+ return NULL; -+ } -+ -+ memset(idx_src, 0, sizeof(*idx_src)); -+ vsir_register_init(&idx_src->reg, VKD3DSPR_ADDR, VKD3D_DATA_FLOAT, 0); -+ idx_src->reg.dimension = VSIR_DIMENSION_VEC4; -+ idx_src->swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X); -+ return idx_src; -+} -+ - static void sm1_generate_vsir_init_src_param_from_deref(struct hlsl_ctx *ctx, -- struct vkd3d_shader_src_param *src_param, struct hlsl_deref *deref, -- unsigned int dst_writemask, const struct vkd3d_shader_location *loc) -+ struct vsir_program *program, struct vkd3d_shader_src_param *src_param, -+ struct hlsl_deref *deref, uint32_t dst_writemask, const struct vkd3d_shader_location *loc) - { - enum vkd3d_shader_register_type type = VKD3DSPR_TEMP; -+ struct vkd3d_shader_src_param *src_rel_addr = NULL; - struct vkd3d_shader_version version; - uint32_t register_index; - unsigned int writemask; -@@ -8371,12 +8973,26 @@ static void sm1_generate_vsir_init_src_param_from_deref(struct hlsl_ctx *ctx, - } - else if (deref->var->is_uniform) - { -+ unsigned int offset = deref->const_offset; -+ - type = VKD3DSPR_CONST; -+ register_index = deref->var->regs[HLSL_REGSET_NUMERIC].id + offset / 4; - -- reg = hlsl_reg_from_deref(ctx, deref); -- register_index = reg.id; -- writemask = reg.writemask; -- VKD3D_ASSERT(reg.allocated); -+ writemask = 0xf & (0xf << (offset % 4)); -+ if (deref->var->regs[HLSL_REGSET_NUMERIC].writemask) -+ writemask = hlsl_combine_writemasks(deref->var->regs[HLSL_REGSET_NUMERIC].writemask, writemask); -+ -+ if (deref->rel_offset.node) -+ { -+ VKD3D_ASSERT(deref_supports_sm1_indirect_addressing(ctx, deref)); -+ -+ if (!(src_rel_addr = sm1_generate_vsir_new_address_src(ctx, program))) -+ { -+ ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; -+ return; -+ } -+ } -+ VKD3D_ASSERT(deref->var->regs[HLSL_REGSET_NUMERIC].allocated); - } - else if (deref->var->is_input_semantic) - { -@@ -8408,32 +9024,30 @@ static void sm1_generate_vsir_init_src_param_from_deref(struct hlsl_ctx *ctx, - } - - vsir_register_init(&src_param->reg, type, VKD3D_DATA_FLOAT, 1); -+ src_param->reg.dimension = VSIR_DIMENSION_VEC4; - src_param->reg.idx[0].offset = register_index; -+ src_param->reg.idx[0].rel_addr = src_rel_addr; - src_param->swizzle = generate_vsir_get_src_swizzle(writemask, dst_writemask); -- -- if (deref->rel_offset.node) -- hlsl_fixme(ctx, loc, "Translate relative addressing on src register for vsir."); - } - - static void sm1_generate_vsir_instr_load(struct hlsl_ctx *ctx, struct vsir_program *program, - struct hlsl_ir_load *load) - { - struct hlsl_ir_node *instr = &load->node; -- struct vkd3d_shader_dst_param *dst_param; - struct vkd3d_shader_instruction *ins; - - VKD3D_ASSERT(instr->reg.allocated); - -+ if (load->src.rel_offset.node) -+ sm1_generate_vsir_instr_mova(ctx, program, load->src.rel_offset.node); -+ - if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_MOV, 1, 1))) - return; - -- dst_param = &ins->dst[0]; -- vsir_register_init(&dst_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); -- dst_param->reg.idx[0].offset = instr->reg.id; -- dst_param->write_mask = instr->reg.writemask; -+ vsir_dst_from_hlsl_node(&ins->dst[0], ctx, instr); - -- sm1_generate_vsir_init_src_param_from_deref(ctx, &ins->src[0], &load->src, dst_param->write_mask, -- &ins->location); -+ sm1_generate_vsir_init_src_param_from_deref(ctx, program, &ins->src[0], -+ &load->src, ins->dst[0].write_mask, &ins->location); - } - - static void sm1_generate_vsir_instr_resource_load(struct hlsl_ctx *ctx, -@@ -8443,7 +9057,6 @@ static void sm1_generate_vsir_instr_resource_load(struct hlsl_ctx *ctx, - struct hlsl_ir_node *ddx = load->ddx.node; - struct hlsl_ir_node *ddy = load->ddy.node; - struct hlsl_ir_node *instr = &load->node; -- struct vkd3d_shader_dst_param *dst_param; - struct vkd3d_shader_src_param *src_param; - struct vkd3d_shader_instruction *ins; - enum vkd3d_shader_opcode opcode; -@@ -8482,15 +9095,12 @@ static void sm1_generate_vsir_instr_resource_load(struct hlsl_ctx *ctx, - return; - ins->flags = flags; - -- dst_param = &ins->dst[0]; -- vsir_register_init(&dst_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); -- dst_param->reg.idx[0].offset = instr->reg.id; -- dst_param->write_mask = instr->reg.writemask; -+ vsir_dst_from_hlsl_node(&ins->dst[0], ctx, instr); - - src_param = &ins->src[0]; - vsir_src_from_hlsl_node(src_param, ctx, coords, VKD3DSP_WRITEMASK_ALL); - -- sm1_generate_vsir_init_src_param_from_deref(ctx, &ins->src[1], &load->resource, -+ sm1_generate_vsir_init_src_param_from_deref(ctx, program, &ins->src[1], &load->resource, - VKD3DSP_WRITEMASK_ALL, &ins->location); - - if (load->load_type == HLSL_RESOURCE_SAMPLE_GRAD) -@@ -8507,7 +9117,6 @@ static void generate_vsir_instr_swizzle(struct hlsl_ctx *ctx, - struct vsir_program *program, struct hlsl_ir_swizzle *swizzle_instr) - { - struct hlsl_ir_node *instr = &swizzle_instr->node, *val = swizzle_instr->val.node; -- struct vkd3d_shader_dst_param *dst_param; - struct vkd3d_shader_src_param *src_param; - struct vkd3d_shader_instruction *ins; - uint32_t swizzle; -@@ -8517,11 +9126,7 @@ static void generate_vsir_instr_swizzle(struct hlsl_ctx *ctx, - if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_MOV, 1, 1))) - return; - -- dst_param = &ins->dst[0]; -- vsir_register_init(&dst_param->reg, VKD3DSPR_TEMP, vsir_data_type_from_hlsl_instruction(ctx, instr), 1); -- dst_param->reg.idx[0].offset = instr->reg.id; -- dst_param->reg.dimension = VSIR_DIMENSION_VEC4; -- dst_param->write_mask = instr->reg.writemask; -+ vsir_dst_from_hlsl_node(&ins->dst[0], ctx, instr); - - swizzle = hlsl_swizzle_from_writemask(val->reg.writemask); - swizzle = hlsl_combine_swizzles(swizzle, swizzle_instr->u.vector, instr->data_type->e.numeric.dimx); -@@ -8557,7 +9162,6 @@ static void sm1_generate_vsir_instr_jump(struct hlsl_ctx *ctx, - { - struct hlsl_ir_node *condition = jump->condition.node; - struct hlsl_ir_node *instr = &jump->node; -- struct vkd3d_shader_dst_param *dst_param; - struct vkd3d_shader_instruction *ins; - - if (jump->type == HLSL_IR_JUMP_DISCARD_NEG) -@@ -8565,10 +9169,7 @@ static void sm1_generate_vsir_instr_jump(struct hlsl_ctx *ctx, - if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_TEXKILL, 1, 0))) - return; - -- dst_param = &ins->dst[0]; -- vsir_register_init(&dst_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); -- dst_param->reg.idx[0].offset = condition->reg.id; -- dst_param->write_mask = condition->reg.writemask; -+ vsir_dst_from_hlsl_node(&ins->dst[0], ctx, condition); - } - else - { -@@ -8689,6 +9290,10 @@ static void sm1_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl - return; - } - -+ program->temp_count = allocate_temp_registers(ctx, entry_func); -+ if (ctx->result) -+ return; -+ - generate_vsir_signature(ctx, program, entry_func); - - hlsl_block_init(&block); -@@ -12532,6 +13137,7 @@ static void process_entry_function(struct hlsl_ctx *ctx, - struct recursive_call_ctx recursive_call_ctx; - struct hlsl_ir_var *var; - unsigned int i; -+ bool progress; - - ctx->is_patch_constant_func = entry_func == ctx->patch_constant_func; - -@@ -12709,6 +13315,9 @@ static void process_entry_function(struct hlsl_ctx *ctx, - hlsl_transform_ir(ctx, lower_resource_load_bias, body, NULL); - } - -+ compute_liveness(ctx, entry_func); -+ transform_derefs(ctx, divert_written_uniform_derefs_to_temp, &entry_func->body); -+ - loop_unrolling_execute(ctx, body); - hlsl_run_const_passes(ctx, body); - -@@ -12719,13 +13328,21 @@ static void process_entry_function(struct hlsl_ctx *ctx, - lower_ir(ctx, lower_casts_to_bool, body); - lower_ir(ctx, lower_int_dot, body); - -- compute_liveness(ctx, entry_func); -- transform_derefs(ctx, divert_written_uniform_derefs_to_temp, &entry_func->body); -- - if (hlsl_version_lt(ctx, 4, 0)) - hlsl_transform_ir(ctx, lower_separate_samples, body, NULL); - - hlsl_transform_ir(ctx, validate_dereferences, body, NULL); -+ -+ do -+ { -+ progress = vectorize_exprs(ctx, body); -+ compute_liveness(ctx, entry_func); -+ progress |= hlsl_transform_ir(ctx, dce, body, NULL); -+ progress |= hlsl_transform_ir(ctx, fold_swizzle_chains, body, NULL); -+ progress |= hlsl_transform_ir(ctx, remove_trivial_swizzles, body, NULL); -+ progress |= vectorize_stores(ctx, body); -+ } while (progress); -+ - hlsl_transform_ir(ctx, track_object_components_sampler_dim, body, NULL); - - if (hlsl_version_ge(ctx, 4, 0)) -@@ -12847,7 +13464,6 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry - if (profile->major_version < 4) - { - mark_indexable_vars(ctx, entry_func); -- allocate_temp_registers(ctx, entry_func); - allocate_const_registers(ctx, entry_func); - sort_uniforms_by_bind_count(ctx, HLSL_REGSET_SAMPLERS); - allocate_objects(ctx, entry_func, HLSL_REGSET_SAMPLERS); -diff --git a/libs/vkd3d/libs/vkd3d-shader/ir.c b/libs/vkd3d/libs/vkd3d-shader/ir.c -index 3a784c71388..72cf53761e4 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/ir.c -+++ b/libs/vkd3d/libs/vkd3d-shader/ir.c -@@ -470,6 +470,80 @@ static bool get_opcode_from_rel_op(enum vkd3d_shader_rel_op rel_op, enum vkd3d_d - return false; - } - -+static enum vkd3d_result vsir_program_normalize_addr(struct vsir_program *program, -+ struct vsir_transformation_context *ctx) -+{ -+ struct vkd3d_shader_instruction *ins, *ins2; -+ unsigned int tmp_idx = ~0u; -+ unsigned int i, k, r; -+ -+ for (i = 0; i < program->instructions.count; ++i) -+ { -+ ins = &program->instructions.elements[i]; -+ -+ if (ins->opcode == VKD3DSIH_MOV && ins->dst[0].reg.type == VKD3DSPR_ADDR) -+ { -+ if (tmp_idx == ~0u) -+ tmp_idx = program->temp_count++; -+ -+ ins->opcode = VKD3DSIH_FTOU; -+ vsir_register_init(&ins->dst[0].reg, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1); -+ ins->dst[0].reg.idx[0].offset = tmp_idx; -+ ins->dst[0].reg.dimension = VSIR_DIMENSION_VEC4; -+ } -+ else if (ins->opcode == VKD3DSIH_MOVA) -+ { -+ if (tmp_idx == ~0u) -+ tmp_idx = program->temp_count++; -+ -+ if (!shader_instruction_array_insert_at(&program->instructions, i + 1, 1)) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ ins = &program->instructions.elements[i]; -+ ins2 = &program->instructions.elements[i + 1]; -+ -+ ins->opcode = VKD3DSIH_ROUND_NE; -+ vsir_register_init(&ins->dst[0].reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); -+ ins->dst[0].reg.idx[0].offset = tmp_idx; -+ ins->dst[0].reg.dimension = VSIR_DIMENSION_VEC4; -+ -+ if (!vsir_instruction_init_with_params(program, ins2, &ins->location, VKD3DSIH_FTOU, 1, 1)) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ -+ vsir_register_init(&ins2->dst[0].reg, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1); -+ ins2->dst[0].reg.idx[0].offset = tmp_idx; -+ ins2->dst[0].reg.dimension = VSIR_DIMENSION_VEC4; -+ ins2->dst[0].write_mask = ins->dst[0].write_mask; -+ -+ vsir_register_init(&ins2->src[0].reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); -+ ins2->src[0].reg.idx[0].offset = tmp_idx; -+ ins2->src[0].reg.dimension = VSIR_DIMENSION_VEC4; -+ ins2->src[0].swizzle = vsir_swizzle_from_writemask(ins2->dst[0].write_mask); -+ } -+ -+ for (k = 0; k < ins->src_count; ++k) -+ { -+ struct vkd3d_shader_src_param *src = &ins->src[k]; -+ -+ for (r = 0; r < src->reg.idx_count; ++r) -+ { -+ struct vkd3d_shader_src_param *rel = src->reg.idx[r].rel_addr; -+ -+ if (rel && rel->reg.type == VKD3DSPR_ADDR) -+ { -+ if (tmp_idx == ~0u) -+ tmp_idx = program->temp_count++; -+ -+ vsir_register_init(&rel->reg, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1); -+ rel->reg.idx[0].offset = tmp_idx; -+ rel->reg.dimension = VSIR_DIMENSION_VEC4; -+ } -+ } -+ } -+ } -+ -+ return VKD3D_OK; -+} -+ - static enum vkd3d_result vsir_program_lower_ifc(struct vsir_program *program, - struct vkd3d_shader_instruction *ifc, unsigned int *tmp_idx, - struct vkd3d_shader_message_context *message_context) -@@ -482,6 +556,7 @@ static enum vkd3d_result vsir_program_lower_ifc(struct vsir_program *program, - - if (!shader_instruction_array_insert_at(instructions, pos + 1, 2)) - return VKD3D_ERROR_OUT_OF_MEMORY; -+ ifc = &instructions->elements[pos]; - - if (*tmp_idx == ~0u) - *tmp_idx = program->temp_count++; -@@ -535,6 +610,7 @@ static enum vkd3d_result vsir_program_lower_texkill(struct vsir_program *program - - if (!shader_instruction_array_insert_at(instructions, pos + 1, components_read + 1)) - return VKD3D_ERROR_OUT_OF_MEMORY; -+ texkill = &instructions->elements[pos]; - - if (*tmp_idx == ~0u) - *tmp_idx = program->temp_count++; -@@ -621,6 +697,7 @@ static enum vkd3d_result vsir_program_lower_precise_mad(struct vsir_program *pro - - if (!shader_instruction_array_insert_at(instructions, pos + 1, 1)) - return VKD3D_ERROR_OUT_OF_MEMORY; -+ mad = &instructions->elements[pos]; - - if (*tmp_idx == ~0u) - *tmp_idx = program->temp_count++; -@@ -665,6 +742,7 @@ static enum vkd3d_result vsir_program_lower_sm1_sincos(struct vsir_program *prog - - if (!shader_instruction_array_insert_at(instructions, pos + 1, 1)) - return VKD3D_ERROR_OUT_OF_MEMORY; -+ sincos = &instructions->elements[pos]; - - ins = &instructions->elements[pos + 1]; - -@@ -717,6 +795,7 @@ static enum vkd3d_result vsir_program_lower_texldp(struct vsir_program *program, - - if (!shader_instruction_array_insert_at(instructions, pos + 1, 2)) - return VKD3D_ERROR_OUT_OF_MEMORY; -+ tex = &instructions->elements[pos]; - - if (*tmp_idx == ~0u) - *tmp_idx = program->temp_count++; -@@ -1128,6 +1207,7 @@ static enum vkd3d_result vsir_program_ensure_diffuse(struct vsir_program *progra - if (!shader_instruction_array_insert_at(&program->instructions, i, 1)) - return VKD3D_ERROR_OUT_OF_MEMORY; - ins = &program->instructions.elements[i]; -+ - vsir_instruction_init_with_params(program, ins, &no_loc, VKD3DSIH_MOV, 1, 1); - vsir_dst_param_init(&ins->dst[0], VKD3DSPR_ATTROUT, VKD3D_DATA_FLOAT, 1); - ins->dst[0].reg.idx[0].offset = 0; -@@ -1346,7 +1426,6 @@ static enum vkd3d_result vsir_program_remap_output_signature(struct vsir_program - loc = ins->location; - if (!shader_instruction_array_insert_at(&program->instructions, i, uninit_varying_count)) - return VKD3D_ERROR_OUT_OF_MEMORY; -- - ins = &program->instructions.elements[i]; - - for (unsigned int j = signature->element_count - uninit_varying_count; j < signature->element_count; ++j) -@@ -2426,7 +2505,8 @@ struct flat_constants_normaliser - }; - - static bool get_flat_constant_register_type(const struct vkd3d_shader_register *reg, -- enum vkd3d_shader_d3dbc_constant_register *set, uint32_t *index) -+ enum vkd3d_shader_d3dbc_constant_register *set, uint32_t *index, -+ struct vkd3d_shader_src_param **rel_addr) - { - static const struct - { -@@ -2446,12 +2526,8 @@ static bool get_flat_constant_register_type(const struct vkd3d_shader_register * - { - if (reg->type == regs[i].type) - { -- if (reg->idx[0].rel_addr) -- { -- FIXME("Unhandled relative address.\n"); -- return false; -- } -- -+ if (rel_addr) -+ *rel_addr = reg->idx[0].rel_addr; - *set = regs[i].set; - *index = reg->idx[0].offset; - return true; -@@ -2465,10 +2541,11 @@ static void shader_register_normalise_flat_constants(struct vkd3d_shader_src_par - const struct flat_constants_normaliser *normaliser) - { - enum vkd3d_shader_d3dbc_constant_register set; -+ struct vkd3d_shader_src_param *rel_addr; - uint32_t index; - size_t i, j; - -- if (!get_flat_constant_register_type(¶m->reg, &set, &index)) -+ if (!get_flat_constant_register_type(¶m->reg, &set, &index, &rel_addr)) - return; - - for (i = 0; i < normaliser->def_count; ++i) -@@ -2486,8 +2563,11 @@ static void shader_register_normalise_flat_constants(struct vkd3d_shader_src_par - - param->reg.type = VKD3DSPR_CONSTBUFFER; - param->reg.idx[0].offset = set; /* register ID */ -+ param->reg.idx[0].rel_addr = NULL; - param->reg.idx[1].offset = set; /* register index */ -+ param->reg.idx[1].rel_addr = NULL; - param->reg.idx[2].offset = index; /* buffer index */ -+ param->reg.idx[2].rel_addr = rel_addr; - param->reg.idx_count = 3; - } - -@@ -2514,7 +2594,7 @@ static enum vkd3d_result vsir_program_normalise_flat_constants(struct vsir_progr - - def = &normaliser.defs[normaliser.def_count++]; - -- get_flat_constant_register_type((struct vkd3d_shader_register *)&ins->dst[0].reg, &def->set, &def->index); -+ get_flat_constant_register_type(&ins->dst[0].reg, &def->set, &def->index, NULL); - for (j = 0; j < 4; ++j) - def->value[j] = ins->src[0].reg.u.immconst_u32[j]; - -@@ -6037,6 +6117,7 @@ static enum vkd3d_result insert_alpha_test_before_ret(struct vsir_program *progr - uint32_t colour_temp, size_t *ret_pos, struct vkd3d_shader_message_context *message_context) - { - struct vkd3d_shader_instruction_array *instructions = &program->instructions; -+ const struct vkd3d_shader_location loc = ret->location; - static const struct vkd3d_shader_location no_loc; - size_t pos = ret - instructions->elements; - struct vkd3d_shader_instruction *ins; -@@ -6061,9 +6142,10 @@ static enum vkd3d_result insert_alpha_test_before_ret(struct vsir_program *progr - { - if (!shader_instruction_array_insert_at(&program->instructions, pos, 1)) - return VKD3D_ERROR_OUT_OF_MEMORY; -+ ret = NULL; - ins = &program->instructions.elements[pos]; - -- vsir_instruction_init_with_params(program, ins, &ret->location, VKD3DSIH_DISCARD, 0, 1); -+ vsir_instruction_init_with_params(program, ins, &loc, VKD3DSIH_DISCARD, 0, 1); - ins->flags = VKD3D_SHADER_CONDITIONAL_OP_Z; - src_param_init_const_uint(&ins->src[0], 0); - -@@ -6073,20 +6155,20 @@ static enum vkd3d_result insert_alpha_test_before_ret(struct vsir_program *progr - - if (!shader_instruction_array_insert_at(&program->instructions, pos, 3)) - return VKD3D_ERROR_OUT_OF_MEMORY; -- -+ ret = NULL; - ins = &program->instructions.elements[pos]; - - switch (ref->data_type) - { - case VKD3D_SHADER_PARAMETER_DATA_TYPE_FLOAT32: -- vsir_instruction_init_with_params(program, ins, &ret->location, opcodes[compare_func].float_opcode, 1, 2); -+ vsir_instruction_init_with_params(program, ins, &loc, opcodes[compare_func].float_opcode, 1, 2); - src_param_init_temp_float(&ins->src[opcodes[compare_func].swap ? 1 : 0], colour_temp); - src_param_init_parameter(&ins->src[opcodes[compare_func].swap ? 0 : 1], - VKD3D_SHADER_PARAMETER_NAME_ALPHA_TEST_REF, VKD3D_DATA_FLOAT); - break; - - case VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32: -- vsir_instruction_init_with_params(program, ins, &ret->location, opcodes[compare_func].uint_opcode, 1, 2); -+ vsir_instruction_init_with_params(program, ins, &loc, opcodes[compare_func].uint_opcode, 1, 2); - src_param_init_temp_uint(&ins->src[opcodes[compare_func].swap ? 1 : 0], colour_temp); - src_param_init_parameter(&ins->src[opcodes[compare_func].swap ? 0 : 1], - VKD3D_SHADER_PARAMETER_NAME_ALPHA_TEST_REF, VKD3D_DATA_UINT); -@@ -6107,14 +6189,14 @@ static enum vkd3d_result insert_alpha_test_before_ret(struct vsir_program *progr - ins->src[opcodes[compare_func].swap ? 1 : 0].swizzle = VKD3D_SHADER_SWIZZLE(W, W, W, W); - - ++ins; -- vsir_instruction_init_with_params(program, ins, &ret->location, VKD3DSIH_DISCARD, 0, 1); -+ vsir_instruction_init_with_params(program, ins, &loc, VKD3DSIH_DISCARD, 0, 1); - ins->flags = VKD3D_SHADER_CONDITIONAL_OP_Z; - src_param_init_ssa_bool(&ins->src[0], program->ssa_count); - - ++program->ssa_count; - - ++ins; -- vsir_instruction_init_with_params(program, ins, &ret->location, VKD3DSIH_MOV, 1, 1); -+ vsir_instruction_init_with_params(program, ins, &loc, VKD3DSIH_MOV, 1, 1); - vsir_dst_param_init(&ins->dst[0], VKD3DSPR_OUTPUT, VKD3D_DATA_FLOAT, 1); - ins->dst[0].reg.idx[0].offset = colour_signature_idx; - ins->dst[0].reg.dimension = VSIR_DIMENSION_VEC4; -@@ -6215,13 +6297,14 @@ static enum vkd3d_result insert_clip_planes_before_ret(struct vsir_program *prog - uint32_t position_temp, uint32_t low_signature_idx, uint32_t high_signature_idx, size_t *ret_pos) - { - struct vkd3d_shader_instruction_array *instructions = &program->instructions; -+ const struct vkd3d_shader_location loc = ret->location; - size_t pos = ret - instructions->elements; - struct vkd3d_shader_instruction *ins; - unsigned int output_idx = 0; - - if (!shader_instruction_array_insert_at(&program->instructions, pos, vkd3d_popcount(mask) + 1)) - return VKD3D_ERROR_OUT_OF_MEMORY; -- -+ ret = NULL; - ins = &program->instructions.elements[pos]; - - for (unsigned int i = 0; i < 8; ++i) -@@ -6229,7 +6312,7 @@ static enum vkd3d_result insert_clip_planes_before_ret(struct vsir_program *prog - if (!(mask & (1u << i))) - continue; - -- vsir_instruction_init_with_params(program, ins, &ret->location, VKD3DSIH_DP4, 1, 2); -+ vsir_instruction_init_with_params(program, ins, &loc, VKD3DSIH_DP4, 1, 2); - src_param_init_temp_float4(&ins->src[0], position_temp); - src_param_init_parameter(&ins->src[1], VKD3D_SHADER_PARAMETER_NAME_CLIP_PLANE_0 + i, VKD3D_DATA_FLOAT); - ins->src[1].swizzle = VKD3D_SHADER_NO_SWIZZLE; -@@ -6247,7 +6330,7 @@ static enum vkd3d_result insert_clip_planes_before_ret(struct vsir_program *prog - ++ins; - } - -- vsir_instruction_init_with_params(program, ins, &ret->location, VKD3DSIH_MOV, 1, 1); -+ vsir_instruction_init_with_params(program, ins, &loc, VKD3DSIH_MOV, 1, 1); - vsir_dst_param_init(&ins->dst[0], VKD3DSPR_OUTPUT, VKD3D_DATA_FLOAT, 1); - ins->dst[0].reg.idx[0].offset = position_signature_idx; - ins->dst[0].reg.dimension = VSIR_DIMENSION_VEC4; -@@ -6404,15 +6487,16 @@ static enum vkd3d_result insert_point_size_before_ret(struct vsir_program *progr - const struct vkd3d_shader_instruction *ret, size_t *ret_pos) - { - struct vkd3d_shader_instruction_array *instructions = &program->instructions; -+ const struct vkd3d_shader_location loc = ret->location; - size_t pos = ret - instructions->elements; - struct vkd3d_shader_instruction *ins; - - if (!shader_instruction_array_insert_at(&program->instructions, pos, 1)) - return VKD3D_ERROR_OUT_OF_MEMORY; -- -+ ret = NULL; - ins = &program->instructions.elements[pos]; - -- vsir_instruction_init_with_params(program, ins, &ret->location, VKD3DSIH_MOV, 1, 1); -+ vsir_instruction_init_with_params(program, ins, &loc, VKD3DSIH_MOV, 1, 1); - vsir_dst_param_init(&ins->dst[0], VKD3DSPR_RASTOUT, VKD3D_DATA_FLOAT, 1); - ins->dst[0].reg.idx[0].offset = VSIR_RASTOUT_POINT_SIZE; - src_param_init_parameter(&ins->src[0], VKD3D_SHADER_PARAMETER_NAME_POINT_SIZE, VKD3D_DATA_FLOAT); -@@ -6541,9 +6625,9 @@ static enum vkd3d_result vsir_program_insert_point_size_clamp(struct vsir_progra - - if (!shader_instruction_array_insert_at(&program->instructions, i + 1, !!min_parameter + !!max_parameter)) - return VKD3D_ERROR_OUT_OF_MEMORY; -+ ins = &program->instructions.elements[i + 1]; - - loc = &program->instructions.elements[i].location; -- ins = &program->instructions.elements[i + 1]; - - if (min_parameter) - { -@@ -6741,7 +6825,6 @@ static enum vkd3d_result vsir_program_insert_point_coord(struct vsir_program *pr - { - if (!shader_instruction_array_insert_at(&program->instructions, insert_pos, 2)) - return VKD3D_ERROR_OUT_OF_MEMORY; -- - ins = &program->instructions.elements[insert_pos]; - - vsir_instruction_init_with_params(program, ins, &no_loc, VKD3DSIH_MOV, 1, 1); -@@ -6815,6 +6898,8 @@ static enum vkd3d_result insert_fragment_fog_before_ret(struct vsir_program *pro - */ - if (!shader_instruction_array_insert_at(&program->instructions, pos, 4)) - return VKD3D_ERROR_OUT_OF_MEMORY; -+ ret = NULL; -+ - *ret_pos = pos + 4; - - ssa_temp = program->ssa_count++; -@@ -6845,6 +6930,8 @@ static enum vkd3d_result insert_fragment_fog_before_ret(struct vsir_program *pro - */ - if (!shader_instruction_array_insert_at(&program->instructions, pos, 4)) - return VKD3D_ERROR_OUT_OF_MEMORY; -+ ret = NULL; -+ - *ret_pos = pos + 4; - - ssa_temp = program->ssa_count++; -@@ -6875,6 +6962,8 @@ static enum vkd3d_result insert_fragment_fog_before_ret(struct vsir_program *pro - */ - if (!shader_instruction_array_insert_at(&program->instructions, pos, 5)) - return VKD3D_ERROR_OUT_OF_MEMORY; -+ ret = NULL; -+ - *ret_pos = pos + 5; - - ssa_temp = program->ssa_count++; -@@ -7053,16 +7142,18 @@ static enum vkd3d_result insert_vertex_fog_before_ret(struct vsir_program *progr - { - const struct signature_element *e = &program->output_signature.elements[source_signature_idx]; - struct vkd3d_shader_instruction_array *instructions = &program->instructions; -+ const struct vkd3d_shader_location loc = ret->location; - size_t pos = ret - instructions->elements; - struct vkd3d_shader_instruction *ins; - - if (!shader_instruction_array_insert_at(&program->instructions, pos, 2)) - return VKD3D_ERROR_OUT_OF_MEMORY; -+ ret = NULL; - - ins = &program->instructions.elements[pos]; - - /* Write the fog output. */ -- vsir_instruction_init_with_params(program, ins, &ret->location, VKD3DSIH_MOV, 1, 1); -+ vsir_instruction_init_with_params(program, ins, &loc, VKD3DSIH_MOV, 1, 1); - dst_param_init_output(&ins->dst[0], VKD3D_DATA_FLOAT, fog_signature_idx, 0x1); - src_param_init_temp_float4(&ins->src[0], temp); - if (source == VKD3D_SHADER_FOG_SOURCE_Z) -@@ -7072,7 +7163,7 @@ static enum vkd3d_result insert_vertex_fog_before_ret(struct vsir_program *progr - ++ins; - - /* Write the position or specular output. */ -- vsir_instruction_init_with_params(program, ins, &ret->location, VKD3DSIH_MOV, 1, 1); -+ vsir_instruction_init_with_params(program, ins, &loc, VKD3DSIH_MOV, 1, 1); - dst_param_init_output(&ins->dst[0], vkd3d_data_type_from_component_type(e->component_type), - source_signature_idx, e->mask); - src_param_init_temp_float4(&ins->src[0], temp); -@@ -7707,6 +7798,33 @@ static void vsir_validate_label_register(struct validation_context *ctx, - reg->idx[0].offset, ctx->program->block_count); - } - -+static void vsir_validate_descriptor_indices(struct validation_context *ctx, -+ const struct vkd3d_shader_register *reg, enum vkd3d_shader_descriptor_type type, const char *name) -+{ -+ const struct vkd3d_shader_descriptor_info1 *descriptor; -+ -+ if (reg->idx[0].rel_addr) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, -+ "Non-NULL indirect address for the ID of a register of type \"%s\".", name); -+ -+ if (!ctx->program->has_descriptor_info) -+ return; -+ -+ if (!(descriptor = vkd3d_shader_find_descriptor(&ctx->program->descriptors, type, reg->idx[0].offset))) -+ { -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, -+ "No matching descriptor found for register %s%u.", name, reg->idx[0].offset); -+ return; -+ } -+ -+ if (!reg->idx[1].rel_addr && (reg->idx[1].offset < descriptor->register_index -+ || reg->idx[1].offset - descriptor->register_index >= descriptor->count)) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, -+ "Register index %u doesn't belong to the range [%u, %u] for register %s%u.", -+ reg->idx[1].offset, descriptor->register_index, -+ descriptor->register_index + descriptor->count - 1, name, reg->idx[0].offset); -+} -+ - static void vsir_validate_constbuffer_register(struct validation_context *ctx, - const struct vkd3d_shader_register *reg) - { -@@ -7725,9 +7843,7 @@ static void vsir_validate_constbuffer_register(struct validation_context *ctx, - return; - } - -- if (reg->idx[0].rel_addr) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, -- "Non-NULL relative address for a CONSTBUFFER register ID."); -+ vsir_validate_descriptor_indices(ctx, reg, VKD3D_SHADER_DESCRIPTOR_TYPE_CBV, "cb"); - } - - static void vsir_validate_sampler_register(struct validation_context *ctx, -@@ -7753,9 +7869,7 @@ static void vsir_validate_sampler_register(struct validation_context *ctx, - return; - } - -- if (reg->idx[0].rel_addr) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, -- "Non-NULL relative address for the descriptor index of a SAMPLER register."); -+ vsir_validate_descriptor_indices(ctx, reg, VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER, "s"); - } - - static void vsir_validate_resource_register(struct validation_context *ctx, -@@ -7780,9 +7894,7 @@ static void vsir_validate_resource_register(struct validation_context *ctx, - return; - } - -- if (reg->idx[0].rel_addr) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, -- "Non-NULL relative address for the descriptor index of a RESOURCE register."); -+ vsir_validate_descriptor_indices(ctx, reg, VKD3D_SHADER_DESCRIPTOR_TYPE_SRV, "t"); - } - - static void vsir_validate_uav_register(struct validation_context *ctx, -@@ -7812,9 +7924,7 @@ static void vsir_validate_uav_register(struct validation_context *ctx, - return; - } - -- if (reg->idx[0].rel_addr) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, -- "Non-NULL relative address for the descriptor index of a UAV register."); -+ vsir_validate_descriptor_indices(ctx, reg, VKD3D_SHADER_DESCRIPTOR_TYPE_UAV, "u"); - } - - static void vsir_validate_ssa_register(struct validation_context *ctx, -@@ -9824,6 +9934,9 @@ enum vkd3d_result vsir_program_transform(struct vsir_program *program, uint64_t - if (program->shader_version.major <= 2) - vsir_transform(&ctx, vsir_program_ensure_diffuse); - -+ if (program->shader_version.major < 4) -+ vsir_transform(&ctx, vsir_program_normalize_addr); -+ - if (program->shader_version.type != VKD3D_SHADER_TYPE_PIXEL) - vsir_transform(&ctx, vsir_program_remap_output_signature); - -diff --git a/libs/vkd3d/libs/vkd3d-shader/msl.c b/libs/vkd3d/libs/vkd3d-shader/msl.c -index 756b43298d3..a5d952cd525 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/msl.c -+++ b/libs/vkd3d/libs/vkd3d-shader/msl.c -@@ -1310,6 +1310,7 @@ int msl_compile(struct vsir_program *program, uint64_t config_flags, - return ret; - - VKD3D_ASSERT(program->normalisation_level == VSIR_NORMALISED_SM6); -+ VKD3D_ASSERT(program->has_descriptor_info); - - if ((ret = msl_generator_init(&generator, program, compile_info, message_context)) < 0) - return ret; -diff --git a/libs/vkd3d/libs/vkd3d-shader/spirv.c b/libs/vkd3d/libs/vkd3d-shader/spirv.c -index 0cc1ceca798..91a6686eb0d 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/spirv.c -+++ b/libs/vkd3d/libs/vkd3d-shader/spirv.c -@@ -11361,6 +11361,7 @@ int spirv_compile(struct vsir_program *program, uint64_t config_flags, - return ret; - - VKD3D_ASSERT(program->normalisation_level == VSIR_NORMALISED_SM6); -+ VKD3D_ASSERT(program->has_descriptor_info); - - if (!(spirv_compiler = spirv_compiler_create(program, compile_info, - message_context, config_flags))) -diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c -index 2afeff086e5..9191429c439 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c -+++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c -@@ -1127,7 +1127,7 @@ static void vkd3d_shader_scan_combined_sampler_declaration( - &semantic->resource.range, semantic->resource_type, VKD3D_SHADER_RESOURCE_DATA_FLOAT); - } - --static const struct vkd3d_shader_descriptor_info1 *find_descriptor( -+const struct vkd3d_shader_descriptor_info1 *vkd3d_shader_find_descriptor( - const struct vkd3d_shader_scan_descriptor_info1 *info, - enum vkd3d_shader_descriptor_type type, unsigned int register_id) - { -@@ -1181,11 +1181,11 @@ static void vkd3d_shader_scan_combined_sampler_usage(struct vkd3d_shader_scan_co - if (dynamic_resource || dynamic_sampler) - return; - -- if ((d = find_descriptor(context->scan_descriptor_info, -+ if ((d = vkd3d_shader_find_descriptor(context->scan_descriptor_info, - VKD3D_SHADER_DESCRIPTOR_TYPE_SRV, resource->idx[0].offset))) - resource_space = d->register_space; - -- if (sampler && (d = find_descriptor(context->scan_descriptor_info, -+ if (sampler && (d = vkd3d_shader_find_descriptor(context->scan_descriptor_info, - VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER, sampler->idx[0].offset))) - sampler_space = d->register_space; - } -@@ -1606,6 +1606,9 @@ static int vsir_program_scan(struct vsir_program *program, const struct vkd3d_sh - vkd3d_shader_scan_context_init(&context, &program->shader_version, compile_info, - add_descriptor_info ? &program->descriptors : NULL, combined_sampler_info, message_context); - -+ if (add_descriptor_info) -+ program->has_descriptor_info = true; -+ - if (TRACE_ON()) - vsir_program_trace(program); - -@@ -2046,6 +2049,9 @@ const enum vkd3d_shader_target_type *vkd3d_shader_get_supported_target_types( - VKD3D_SHADER_TARGET_SPIRV_BINARY, - #if defined(HAVE_SPIRV_TOOLS) || defined(VKD3D_SHADER_UNSUPPORTED_SPIRV_PARSER) - VKD3D_SHADER_TARGET_SPIRV_TEXT, -+#endif -+#ifdef VKD3D_SHADER_UNSUPPORTED_GLSL -+ VKD3D_SHADER_TARGET_GLSL, - #endif - VKD3D_SHADER_TARGET_D3D_ASM, - VKD3D_SHADER_TARGET_D3D_BYTECODE, -diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -index e794257b9d8..bf794d5e936 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -+++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -@@ -1444,6 +1444,9 @@ struct vkd3d_shader_scan_descriptor_info1 - unsigned int descriptor_count; - }; - -+const struct vkd3d_shader_descriptor_info1 *vkd3d_shader_find_descriptor( -+ const struct vkd3d_shader_scan_descriptor_info1 *info, -+ enum vkd3d_shader_descriptor_type type, unsigned int register_id); - void vkd3d_shader_free_scan_descriptor_info1(struct vkd3d_shader_scan_descriptor_info1 *scan_descriptor_info); - - struct vsir_program -@@ -1456,6 +1459,7 @@ struct vsir_program - struct shader_signature patch_constant_signature; - - struct vkd3d_shader_scan_descriptor_info1 descriptors; -+ bool has_descriptor_info; - - unsigned int parameter_count; - const struct vkd3d_shader_parameter1 *parameters; --- -2.47.2 -