diff --git a/patches/vkd3d-latest/0001-Updated-vkd3d-to-a68fd1b0ded735580b0ec9025f75fe02d62.patch b/patches/vkd3d-latest/0001-Updated-vkd3d-to-c7e173a1ffa1ba6916dd549bf9f32225440.patch similarity index 70% rename from patches/vkd3d-latest/0001-Updated-vkd3d-to-a68fd1b0ded735580b0ec9025f75fe02d62.patch rename to patches/vkd3d-latest/0001-Updated-vkd3d-to-c7e173a1ffa1ba6916dd549bf9f32225440.patch index 33908dd1..6954a02f 100644 --- a/patches/vkd3d-latest/0001-Updated-vkd3d-to-a68fd1b0ded735580b0ec9025f75fe02d62.patch +++ b/patches/vkd3d-latest/0001-Updated-vkd3d-to-c7e173a1ffa1ba6916dd549bf9f32225440.patch @@ -1,36 +1,36 @@ -From ca62b0d06263c5a02f00bc215ec02d39b3c8e63d Mon Sep 17 00:00:00 2001 +From 0ccf563391895bc762112674fe5ab5fff4302a01 Mon Sep 17 00:00:00 2001 From: Alistair Leslie-Hughes Date: Fri, 29 Nov 2024 07:14:57 +1100 -Subject: [PATCH] Updated vkd3d to a68fd1b0ded735580b0ec9025f75fe02d62966df. +Subject: [PATCH] Updated vkd3d to c7e173a1ffa1ba6916dd549bf9f32225440d1ec6. --- libs/vkd3d/include/private/vkd3d_common.h | 2 +- - libs/vkd3d/include/vkd3d_shader.h | 219 ++ + libs/vkd3d/include/vkd3d_shader.h | 219 + libs/vkd3d/libs/vkd3d-common/blob.c | 1 + - libs/vkd3d/libs/vkd3d-shader/d3d_asm.c | 34 +- - libs/vkd3d/libs/vkd3d-shader/d3dbc.c | 908 ++---- + libs/vkd3d/libs/vkd3d-shader/d3d_asm.c | 335 +- + libs/vkd3d/libs/vkd3d-shader/d3dbc.c | 1118 ++--- libs/vkd3d/libs/vkd3d-shader/dxbc.c | 35 + - libs/vkd3d/libs/vkd3d-shader/dxil.c | 36 +- - libs/vkd3d/libs/vkd3d-shader/fx.c | 163 +- + libs/vkd3d/libs/vkd3d-shader/dxil.c | 87 +- + libs/vkd3d/libs/vkd3d-shader/fx.c | 175 +- libs/vkd3d/libs/vkd3d-shader/glsl.c | 9 +- - libs/vkd3d/libs/vkd3d-shader/hlsl.c | 289 +- - libs/vkd3d/libs/vkd3d-shader/hlsl.h | 143 +- + libs/vkd3d/libs/vkd3d-shader/hlsl.c | 387 +- + libs/vkd3d/libs/vkd3d-shader/hlsl.h | 158 +- libs/vkd3d/libs/vkd3d-shader/hlsl.l | 3 + - libs/vkd3d/libs/vkd3d-shader/hlsl.y | 404 ++- - libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 2614 ++++++++++++++--- - .../libs/vkd3d-shader/hlsl_constant_ops.c | 149 +- - libs/vkd3d/libs/vkd3d-shader/ir.c | 1987 ++++++++++--- - libs/vkd3d/libs/vkd3d-shader/msl.c | 465 ++- + libs/vkd3d/libs/vkd3d-shader/hlsl.y | 671 ++- + libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 3839 ++++++++++++++--- + .../libs/vkd3d-shader/hlsl_constant_ops.c | 363 +- + libs/vkd3d/libs/vkd3d-shader/ir.c | 2094 +++++++-- + libs/vkd3d/libs/vkd3d-shader/msl.c | 465 +- libs/vkd3d/libs/vkd3d-shader/preproc.l | 1 + - libs/vkd3d/libs/vkd3d-shader/spirv.c | 709 +++-- - libs/vkd3d/libs/vkd3d-shader/tpf.c | 1561 ++-------- - .../libs/vkd3d-shader/vkd3d_shader_main.c | 28 +- - .../libs/vkd3d-shader/vkd3d_shader_private.h | 45 +- + libs/vkd3d/libs/vkd3d-shader/spirv.c | 760 ++-- + libs/vkd3d/libs/vkd3d-shader/tpf.c | 2232 +--------- + .../libs/vkd3d-shader/vkd3d_shader_main.c | 34 +- + .../libs/vkd3d-shader/vkd3d_shader_private.h | 86 +- libs/vkd3d/libs/vkd3d/command.c | 273 +- - libs/vkd3d/libs/vkd3d/device.c | 58 +- - libs/vkd3d/libs/vkd3d/state.c | 237 +- + libs/vkd3d/libs/vkd3d/device.c | 59 +- + libs/vkd3d/libs/vkd3d/state.c | 242 +- libs/vkd3d/libs/vkd3d/vkd3d_private.h | 63 +- - 26 files changed, 6880 insertions(+), 3556 deletions(-) + 26 files changed, 8687 insertions(+), 5024 deletions(-) diff --git a/libs/vkd3d/include/private/vkd3d_common.h b/libs/vkd3d/include/private/vkd3d_common.h index ec1dd70c9b2..fd62730f948 100644 @@ -309,7 +309,7 @@ index f60ef7db769..c2c6ad67804 100644 #include "vkd3d.h" #include "vkd3d_blob.h" diff --git a/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c -index 7c5444f63a3..8c96befadea 100644 +index 7c5444f63a3..69e14e0c7bf 100644 --- a/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c +++ b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c @@ -49,7 +49,7 @@ static const char * const shader_opcode_names[] = @@ -330,7 +330,95 @@ index 7c5444f63a3..8c96befadea 100644 [VKD3DSIH_IGE ] = "ige", [VKD3DSIH_ILT ] = "ilt", [VKD3DSIH_IMAD ] = "imad", -@@ -815,7 +815,7 @@ static void shader_print_dcl_usage(struct vkd3d_d3d_asm_compiler *compiler, +@@ -354,6 +354,64 @@ static const char * const shader_opcode_names[] = + [VKD3DSIH_XOR ] = "xor", + }; + ++static const char * const shader_register_names[] = ++{ ++ [VKD3DSPR_ADDR ] = "a", ++ [VKD3DSPR_ATTROUT ] = "oD", ++ [VKD3DSPR_COLOROUT ] = "oC", ++ [VKD3DSPR_COMBINED_SAMPLER ] = "s", ++ [VKD3DSPR_CONST ] = "c", ++ [VKD3DSPR_CONSTBOOL ] = "b", ++ [VKD3DSPR_CONSTBUFFER ] = "cb", ++ [VKD3DSPR_CONSTINT ] = "i", ++ [VKD3DSPR_COVERAGE ] = "vCoverage", ++ [VKD3DSPR_DEPTHOUT ] = "oDepth", ++ [VKD3DSPR_DEPTHOUTGE ] = "oDepthGE", ++ [VKD3DSPR_DEPTHOUTLE ] = "oDepthLE", ++ [VKD3DSPR_FORKINSTID ] = "vForkInstanceId", ++ [VKD3DSPR_FUNCTIONBODY ] = "fb", ++ [VKD3DSPR_FUNCTIONPOINTER ] = "fp", ++ [VKD3DSPR_GROUPSHAREDMEM ] = "g", ++ [VKD3DSPR_GSINSTID ] = "vGSInstanceID", ++ [VKD3DSPR_IDXTEMP ] = "x", ++ [VKD3DSPR_IMMCONST ] = "l", ++ [VKD3DSPR_IMMCONST64 ] = "d", ++ [VKD3DSPR_IMMCONSTBUFFER ] = "icb", ++ [VKD3DSPR_INCONTROLPOINT ] = "vicp", ++ [VKD3DSPR_INPUT ] = "v", ++ [VKD3DSPR_JOININSTID ] = "vJoinInstanceId", ++ [VKD3DSPR_LABEL ] = "l", ++ [VKD3DSPR_LOCALTHREADID ] = "vThreadIDInGroup", ++ [VKD3DSPR_LOCALTHREADINDEX ] = "vThreadIDInGroupFlattened", ++ [VKD3DSPR_LOOP ] = "aL", ++ [VKD3DSPR_NULL ] = "null", ++ [VKD3DSPR_OUTCONTROLPOINT ] = "vocp", ++ [VKD3DSPR_OUTPOINTID ] = "vOutputControlPointID", ++ [VKD3DSPR_OUTPUT ] = "o", ++ [VKD3DSPR_OUTSTENCILREF ] = "oStencilRef", ++ [VKD3DSPR_PARAMETER ] = "parameter", ++ [VKD3DSPR_PATCHCONST ] = "vpc", ++ [VKD3DSPR_POINT_COORD ] = "vPointCoord", ++ [VKD3DSPR_PREDICATE ] = "p", ++ [VKD3DSPR_PRIMID ] = "primID", ++ [VKD3DSPR_RASTERIZER ] = "rasterizer", ++ [VKD3DSPR_RESOURCE ] = "t", ++ [VKD3DSPR_SAMPLEMASK ] = "oMask", ++ [VKD3DSPR_SAMPLER ] = "s", ++ [VKD3DSPR_SSA ] = "sr", ++ [VKD3DSPR_STREAM ] = "m", ++ [VKD3DSPR_TEMP ] = "r", ++ [VKD3DSPR_TESSCOORD ] = "vDomainLocation", ++ [VKD3DSPR_TEXCRDOUT ] = "oT", ++ [VKD3DSPR_TEXTURE ] = "t", ++ [VKD3DSPR_THREADGROUPID ] = "vThreadGroupID", ++ [VKD3DSPR_THREADID ] = "vThreadID", ++ [VKD3DSPR_UAV ] = "u", ++ [VKD3DSPR_UNDEF ] = "undef", ++ [VKD3DSPR_WAVELANECOUNT ] = "vWaveLaneCount", ++ [VKD3DSPR_WAVELANEINDEX ] = "vWaveLaneIndex", ++}; ++ + struct vkd3d_d3d_asm_colours + { + const char *reset; +@@ -377,22 +435,6 @@ struct vkd3d_d3d_asm_compiler + const struct vkd3d_shader_instruction *current; + }; + +-/* Convert floating point offset relative to a register file to an absolute +- * offset for float constants. */ +-static unsigned int shader_get_float_offset(enum vkd3d_shader_register_type register_type, UINT register_idx) +-{ +- switch (register_type) +- { +- case VKD3DSPR_CONST: return register_idx; +- case VKD3DSPR_CONST2: return 2048 + register_idx; +- case VKD3DSPR_CONST3: return 4096 + register_idx; +- case VKD3DSPR_CONST4: return 6144 + register_idx; +- default: +- FIXME("Unsupported register type: %u.\n", register_type); +- return register_idx; +- } +-} +- + static void shader_dump_global_flags(struct vkd3d_d3d_asm_compiler *compiler, enum vsir_global_flags global_flags) + { + unsigned int i; +@@ -815,7 +857,7 @@ static void shader_print_dcl_usage(struct vkd3d_d3d_asm_compiler *compiler, usage = "tessfactor"; break; case VKD3D_DECL_USAGE_POSITIONT: @@ -339,7 +427,265 @@ index 7c5444f63a3..8c96befadea 100644 indexed = true; break; case VKD3D_DECL_USAGE_FOG: -@@ -2547,6 +2547,33 @@ static void trace_signature(const struct shader_signature *signature, const char +@@ -966,82 +1008,10 @@ static void shader_print_register(struct vkd3d_d3d_asm_compiler *compiler, const + reg->type == VKD3DSPR_LABEL ? compiler->colours.label : compiler->colours.reg); + switch (reg->type) + { +- case VKD3DSPR_TEMP: +- vkd3d_string_buffer_printf(buffer, "r"); +- break; +- +- case VKD3DSPR_INPUT: +- vkd3d_string_buffer_printf(buffer, "v"); +- break; +- +- case VKD3DSPR_CONST: +- case VKD3DSPR_CONST2: +- case VKD3DSPR_CONST3: +- case VKD3DSPR_CONST4: +- vkd3d_string_buffer_printf(buffer, "c"); +- offset = shader_get_float_offset(reg->type, offset); +- break; +- +- case VKD3DSPR_TEXTURE: /* vs: case VKD3DSPR_ADDR */ +- vkd3d_string_buffer_printf(buffer, "%c", +- compiler->shader_version.type == VKD3D_SHADER_TYPE_PIXEL ? 't' : 'a'); +- break; +- + case VKD3DSPR_RASTOUT: + vkd3d_string_buffer_printf(buffer, "%s", rastout_reg_names[offset]); + break; + +- case VKD3DSPR_COLOROUT: +- vkd3d_string_buffer_printf(buffer, "oC"); +- break; +- +- case VKD3DSPR_DEPTHOUT: +- vkd3d_string_buffer_printf(buffer, "oDepth"); +- break; +- +- case VKD3DSPR_DEPTHOUTGE: +- vkd3d_string_buffer_printf(buffer, "oDepthGE"); +- break; +- +- case VKD3DSPR_DEPTHOUTLE: +- vkd3d_string_buffer_printf(buffer, "oDepthLE"); +- break; +- +- case VKD3DSPR_ATTROUT: +- vkd3d_string_buffer_printf(buffer, "oD"); +- break; +- +- case VKD3DSPR_TEXCRDOUT: +- /* Vertex shaders >= 3.0 use general purpose output registers +- * (VKD3DSPR_OUTPUT), which can include an address token. */ +- if (vkd3d_shader_ver_ge(&compiler->shader_version, 3, 0)) +- vkd3d_string_buffer_printf(buffer, "o"); +- else +- vkd3d_string_buffer_printf(buffer, "oT"); +- break; +- +- case VKD3DSPR_CONSTINT: +- vkd3d_string_buffer_printf(buffer, "i"); +- break; +- +- case VKD3DSPR_CONSTBOOL: +- vkd3d_string_buffer_printf(buffer, "b"); +- break; +- +- case VKD3DSPR_LABEL: +- vkd3d_string_buffer_printf(buffer, "l"); +- break; +- +- case VKD3DSPR_LOOP: +- vkd3d_string_buffer_printf(buffer, "aL"); +- break; +- +- case VKD3DSPR_COMBINED_SAMPLER: +- case VKD3DSPR_SAMPLER: +- vkd3d_string_buffer_printf(buffer, "s"); +- is_descriptor = true; +- break; +- + case VKD3DSPR_MISCTYPE: + if (offset > 1) + vkd3d_string_buffer_printf(buffer, "%s%s", +@@ -1050,156 +1020,20 @@ static void shader_print_register(struct vkd3d_d3d_asm_compiler *compiler, const + vkd3d_string_buffer_printf(buffer, "%s", misctype_reg_names[offset]); + break; + +- case VKD3DSPR_PREDICATE: +- vkd3d_string_buffer_printf(buffer, "p"); +- break; +- +- case VKD3DSPR_IMMCONST: +- vkd3d_string_buffer_printf(buffer, "l"); +- break; +- +- case VKD3DSPR_IMMCONST64: +- vkd3d_string_buffer_printf(buffer, "d"); +- break; +- ++ case VKD3DSPR_COMBINED_SAMPLER: ++ case VKD3DSPR_SAMPLER: + case VKD3DSPR_CONSTBUFFER: +- vkd3d_string_buffer_printf(buffer, "cb"); +- is_descriptor = true; +- break; +- +- case VKD3DSPR_IMMCONSTBUFFER: +- vkd3d_string_buffer_printf(buffer, "icb"); +- break; +- +- case VKD3DSPR_PRIMID: +- vkd3d_string_buffer_printf(buffer, "primID"); +- break; +- +- case VKD3DSPR_NULL: +- vkd3d_string_buffer_printf(buffer, "null"); +- break; +- +- case VKD3DSPR_RASTERIZER: +- vkd3d_string_buffer_printf(buffer, "rasterizer"); +- break; +- + case VKD3DSPR_RESOURCE: +- vkd3d_string_buffer_printf(buffer, "t"); +- is_descriptor = true; +- break; +- + case VKD3DSPR_UAV: +- vkd3d_string_buffer_printf(buffer, "u"); + is_descriptor = true; +- break; +- +- case VKD3DSPR_OUTPOINTID: +- vkd3d_string_buffer_printf(buffer, "vOutputControlPointID"); +- break; +- +- case VKD3DSPR_FORKINSTID: +- vkd3d_string_buffer_printf(buffer, "vForkInstanceId"); +- break; +- +- case VKD3DSPR_JOININSTID: +- vkd3d_string_buffer_printf(buffer, "vJoinInstanceId"); +- break; +- +- case VKD3DSPR_INCONTROLPOINT: +- vkd3d_string_buffer_printf(buffer, "vicp"); +- break; +- +- case VKD3DSPR_OUTCONTROLPOINT: +- vkd3d_string_buffer_printf(buffer, "vocp"); +- break; +- +- case VKD3DSPR_PATCHCONST: +- vkd3d_string_buffer_printf(buffer, "vpc"); +- break; +- +- case VKD3DSPR_TESSCOORD: +- vkd3d_string_buffer_printf(buffer, "vDomainLocation"); +- break; +- +- case VKD3DSPR_GROUPSHAREDMEM: +- vkd3d_string_buffer_printf(buffer, "g"); +- break; +- +- case VKD3DSPR_THREADID: +- vkd3d_string_buffer_printf(buffer, "vThreadID"); +- break; +- +- case VKD3DSPR_THREADGROUPID: +- vkd3d_string_buffer_printf(buffer, "vThreadGroupID"); +- break; +- +- case VKD3DSPR_LOCALTHREADID: +- vkd3d_string_buffer_printf(buffer, "vThreadIDInGroup"); +- break; +- +- case VKD3DSPR_LOCALTHREADINDEX: +- vkd3d_string_buffer_printf(buffer, "vThreadIDInGroupFlattened"); +- break; +- +- case VKD3DSPR_IDXTEMP: +- vkd3d_string_buffer_printf(buffer, "x"); +- break; +- +- case VKD3DSPR_STREAM: +- vkd3d_string_buffer_printf(buffer, "m"); +- break; +- +- case VKD3DSPR_FUNCTIONBODY: +- vkd3d_string_buffer_printf(buffer, "fb"); +- break; +- +- case VKD3DSPR_FUNCTIONPOINTER: +- vkd3d_string_buffer_printf(buffer, "fp"); +- break; +- +- case VKD3DSPR_COVERAGE: +- vkd3d_string_buffer_printf(buffer, "vCoverage"); +- break; +- +- case VKD3DSPR_SAMPLEMASK: +- vkd3d_string_buffer_printf(buffer, "oMask"); +- break; +- +- case VKD3DSPR_GSINSTID: +- vkd3d_string_buffer_printf(buffer, "vGSInstanceID"); +- break; +- +- case VKD3DSPR_OUTSTENCILREF: +- vkd3d_string_buffer_printf(buffer, "oStencilRef"); +- break; +- +- case VKD3DSPR_UNDEF: +- vkd3d_string_buffer_printf(buffer, "undef"); +- break; +- +- case VKD3DSPR_SSA: +- vkd3d_string_buffer_printf(buffer, "sr"); +- break; +- +- case VKD3DSPR_WAVELANECOUNT: +- vkd3d_string_buffer_printf(buffer, "vWaveLaneCount"); +- break; +- +- case VKD3DSPR_WAVELANEINDEX: +- vkd3d_string_buffer_printf(buffer, "vWaveLaneIndex"); +- break; +- +- case VKD3DSPR_PARAMETER: +- vkd3d_string_buffer_printf(buffer, "parameter"); +- break; +- +- case VKD3DSPR_POINT_COORD: +- vkd3d_string_buffer_printf(buffer, "vPointCoord"); +- break; ++ /* fall through */ + + default: +- vkd3d_string_buffer_printf(buffer, "%s%s", +- compiler->colours.error, reg->type, compiler->colours.reset); ++ if (reg->type < ARRAY_SIZE(shader_register_names) && shader_register_names[reg->type]) ++ vkd3d_string_buffer_printf(buffer, "%s", shader_register_names[reg->type]); ++ else ++ vkd3d_string_buffer_printf(buffer, "%s%s", ++ compiler->colours.error, reg->type, compiler->colours.reset); + break; + } + +@@ -2132,8 +1966,7 @@ static void shader_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler, + + case VKD3DSIH_DEF: + vkd3d_string_buffer_printf(buffer, " %sc%u%s", compiler->colours.reg, +- shader_get_float_offset(ins->dst[0].reg.type, ins->dst[0].reg.idx[0].offset), +- compiler->colours.reset); ++ ins->dst[0].reg.idx[0].offset, compiler->colours.reset); + shader_print_float_literal(compiler, " = ", ins->src[0].reg.u.immconst_f32[0], ""); + shader_print_float_literal(compiler, ", ", ins->src[0].reg.u.immconst_f32[1], ""); + shader_print_float_literal(compiler, ", ", ins->src[0].reg.u.immconst_f32[2], ""); +@@ -2547,6 +2380,33 @@ static void trace_signature(const struct shader_signature *signature, const char vkd3d_string_buffer_cleanup(&buffer); } @@ -373,7 +719,7 @@ index 7c5444f63a3..8c96befadea 100644 void vsir_program_trace(const struct vsir_program *program) { const unsigned int flags = VSIR_ASM_FLAG_DUMP_TYPES | VSIR_ASM_FLAG_DUMP_ALL_INDICES; -@@ -2556,6 +2583,7 @@ void vsir_program_trace(const struct vsir_program *program) +@@ -2556,6 +2416,7 @@ void vsir_program_trace(const struct vsir_program *program) trace_signature(&program->input_signature, "Input"); trace_signature(&program->output_signature, "Output"); trace_signature(&program->patch_constant_signature, "Patch-constant"); @@ -382,7 +728,7 @@ index 7c5444f63a3..8c96befadea 100644 if (d3d_asm_compile(program, NULL, &code, flags) != VKD3D_OK) return; diff --git a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -index 9e2eacbcfa6..a931883e8d1 100644 +index 9e2eacbcfa6..58e35cf22e8 100644 --- a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c +++ b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c @@ -8,7 +8,7 @@ @@ -403,7 +749,40 @@ index 9e2eacbcfa6..a931883e8d1 100644 #define VKD3D_SM1_VS 0xfffeu #define VKD3D_SM1_PS 0xffffu -@@ -235,7 +235,7 @@ static const struct vkd3d_sm1_opcode_info vs_opcode_table[] = +@@ -89,6 +89,32 @@ + #define VKD3D_SM1_VERSION_MAJOR(version) (((version) >> 8u) & 0xffu) + #define VKD3D_SM1_VERSION_MINOR(version) (((version) >> 0u) & 0xffu) + ++enum vkd3d_sm1_register_type ++{ ++ VKD3D_SM1_REG_TEMP = 0x00, ++ VKD3D_SM1_REG_INPUT = 0x01, ++ VKD3D_SM1_REG_CONST = 0x02, ++ VKD3D_SM1_REG_ADDR = 0x03, ++ VKD3D_SM1_REG_TEXTURE = 0x03, ++ VKD3D_SM1_REG_RASTOUT = 0x04, ++ VKD3D_SM1_REG_ATTROUT = 0x05, ++ VKD3D_SM1_REG_TEXCRDOUT = 0x06, ++ VKD3D_SM1_REG_OUTPUT = 0x06, ++ VKD3D_SM1_REG_CONSTINT = 0x07, ++ VKD3D_SM1_REG_COLOROUT = 0x08, ++ VKD3D_SM1_REG_DEPTHOUT = 0x09, ++ VKD3D_SM1_REG_SAMPLER = 0x0a, ++ VKD3D_SM1_REG_CONST2 = 0x0b, ++ VKD3D_SM1_REG_CONST3 = 0x0c, ++ VKD3D_SM1_REG_CONST4 = 0x0d, ++ VKD3D_SM1_REG_CONSTBOOL = 0x0e, ++ VKD3D_SM1_REG_LOOP = 0x0f, ++ VKD3D_SM1_REG_TEMPFLOAT16 = 0x10, ++ VKD3D_SM1_REG_MISCTYPE = 0x11, ++ VKD3D_SM1_REG_LABEL = 0x12, ++ VKD3D_SM1_REG_PREDICATE = 0x13, ++}; ++ + enum vkd3d_sm1_address_mode_type + { + VKD3D_SM1_ADDRESS_MODE_ABSOLUTE = 0x0, +@@ -235,7 +261,7 @@ static const struct vkd3d_sm1_opcode_info vs_opcode_table[] = /* Arithmetic */ {VKD3D_SM1_OP_NOP, 0, 0, VKD3DSIH_NOP}, {VKD3D_SM1_OP_MOV, 1, 1, VKD3DSIH_MOV}, @@ -412,7 +791,7 @@ index 9e2eacbcfa6..a931883e8d1 100644 {VKD3D_SM1_OP_ADD, 1, 2, VKD3DSIH_ADD}, {VKD3D_SM1_OP_SUB, 1, 2, VKD3DSIH_SUB}, {VKD3D_SM1_OP_MAD, 1, 3, VKD3DSIH_MAD}, -@@ -248,22 +248,22 @@ static const struct vkd3d_sm1_opcode_info vs_opcode_table[] = +@@ -248,22 +274,22 @@ static const struct vkd3d_sm1_opcode_info vs_opcode_table[] = {VKD3D_SM1_OP_MAX, 1, 2, VKD3DSIH_MAX}, {VKD3D_SM1_OP_SLT, 1, 2, VKD3DSIH_SLT}, {VKD3D_SM1_OP_SGE, 1, 2, VKD3DSIH_SGE}, @@ -444,7 +823,7 @@ index 9e2eacbcfa6..a931883e8d1 100644 /* Matrix */ {VKD3D_SM1_OP_M4x4, 1, 2, VKD3DSIH_M4x4}, {VKD3D_SM1_OP_M4x3, 1, 2, VKD3DSIH_M4x3}, -@@ -274,27 +274,27 @@ static const struct vkd3d_sm1_opcode_info vs_opcode_table[] = +@@ -274,27 +300,27 @@ static const struct vkd3d_sm1_opcode_info vs_opcode_table[] = {VKD3D_SM1_OP_DCL, 0, 0, VKD3DSIH_DCL}, /* Constant definitions */ {VKD3D_SM1_OP_DEF, 1, 1, VKD3DSIH_DEF}, @@ -492,7 +871,7 @@ index 9e2eacbcfa6..a931883e8d1 100644 {0, 0, 0, VKD3DSIH_INVALID}, }; -@@ -307,89 +307,84 @@ static const struct vkd3d_sm1_opcode_info ps_opcode_table[] = +@@ -307,92 +333,115 @@ static const struct vkd3d_sm1_opcode_info ps_opcode_table[] = {VKD3D_SM1_OP_SUB, 1, 2, VKD3DSIH_SUB}, {VKD3D_SM1_OP_MAD, 1, 3, VKD3DSIH_MAD}, {VKD3D_SM1_OP_MUL, 1, 2, VKD3DSIH_MUL}, @@ -652,7 +1031,162 @@ index 9e2eacbcfa6..a931883e8d1 100644 {0, 0, 0, VKD3DSIH_INVALID}, }; -@@ -638,7 +633,32 @@ static void add_signature_mask(struct vkd3d_shader_sm1_parser *sm1, bool output, ++static const struct ++{ ++ enum vkd3d_sm1_register_type d3dbc_type; ++ enum vkd3d_shader_register_type vsir_type; ++} ++register_types[] = ++{ ++ {VKD3D_SM1_REG_TEMP, VKD3DSPR_TEMP}, ++ {VKD3D_SM1_REG_INPUT, VKD3DSPR_INPUT}, ++ {VKD3D_SM1_REG_CONST, VKD3DSPR_CONST}, ++ {VKD3D_SM1_REG_ADDR, VKD3DSPR_ADDR}, ++ {VKD3D_SM1_REG_TEXTURE, VKD3DSPR_TEXTURE}, ++ {VKD3D_SM1_REG_RASTOUT, VKD3DSPR_RASTOUT}, ++ {VKD3D_SM1_REG_ATTROUT, VKD3DSPR_ATTROUT}, ++ {VKD3D_SM1_REG_OUTPUT, VKD3DSPR_OUTPUT}, ++ {VKD3D_SM1_REG_TEXCRDOUT, VKD3DSPR_TEXCRDOUT}, ++ {VKD3D_SM1_REG_CONSTINT, VKD3DSPR_CONSTINT}, ++ {VKD3D_SM1_REG_COLOROUT, VKD3DSPR_COLOROUT}, ++ {VKD3D_SM1_REG_DEPTHOUT, VKD3DSPR_DEPTHOUT}, ++ {VKD3D_SM1_REG_SAMPLER, VKD3DSPR_COMBINED_SAMPLER}, ++ {VKD3D_SM1_REG_CONSTBOOL, VKD3DSPR_CONSTBOOL}, ++ {VKD3D_SM1_REG_LOOP, VKD3DSPR_LOOP}, ++ {VKD3D_SM1_REG_TEMPFLOAT16, VKD3DSPR_TEMPFLOAT16}, ++ {VKD3D_SM1_REG_MISCTYPE, VKD3DSPR_MISCTYPE}, ++ {VKD3D_SM1_REG_LABEL, VKD3DSPR_LABEL}, ++ {VKD3D_SM1_REG_PREDICATE, VKD3DSPR_PREDICATE}, ++}; ++ + static const enum vkd3d_shader_resource_type resource_type_table[] = + { + /* VKD3D_SM1_RESOURCE_UNKNOWN */ VKD3D_SHADER_RESOURCE_NONE, +@@ -458,6 +507,7 @@ static unsigned int idx_count_from_reg_type(enum vkd3d_shader_register_type reg_ + switch (reg_type) + { + case VKD3DSPR_DEPTHOUT: ++ case VKD3DSPR_ADDR: + return 0; + + default: +@@ -465,52 +515,82 @@ static unsigned int idx_count_from_reg_type(enum vkd3d_shader_register_type reg_ + } + } + +-static void shader_sm1_parse_src_param(uint32_t param, struct vkd3d_shader_src_param *rel_addr, +- struct vkd3d_shader_src_param *src) ++static enum vkd3d_shader_register_type parse_register_type( ++ struct vkd3d_shader_sm1_parser *sm1, uint32_t param, unsigned int *index_offset) + { +- enum vkd3d_shader_register_type reg_type = ((param & VKD3D_SM1_REGISTER_TYPE_MASK) >> VKD3D_SM1_REGISTER_TYPE_SHIFT) ++ enum vkd3d_sm1_register_type d3dbc_type = ((param & VKD3D_SM1_REGISTER_TYPE_MASK) >> VKD3D_SM1_REGISTER_TYPE_SHIFT) + | ((param & VKD3D_SM1_REGISTER_TYPE_MASK2) >> VKD3D_SM1_REGISTER_TYPE_SHIFT2); +- unsigned int idx_count = idx_count_from_reg_type(reg_type); + +- vsir_register_init(&src->reg, reg_type, VKD3D_DATA_FLOAT, idx_count); +- src->reg.precision = VKD3D_SHADER_REGISTER_PRECISION_DEFAULT; +- src->reg.non_uniform = false; +- if (idx_count == 1) ++ *index_offset = 0; ++ ++ if (d3dbc_type == VKD3D_SM1_REG_CONST2) + { +- src->reg.idx[0].offset = param & VKD3D_SM1_REGISTER_NUMBER_MASK; +- src->reg.idx[0].rel_addr = rel_addr; ++ *index_offset = 2048; ++ return VKD3DSPR_CONST; + } +- if (src->reg.type == VKD3DSPR_SAMPLER) +- src->reg.dimension = VSIR_DIMENSION_NONE; +- else if (src->reg.type == VKD3DSPR_DEPTHOUT) +- src->reg.dimension = VSIR_DIMENSION_SCALAR; +- else +- src->reg.dimension = VSIR_DIMENSION_VEC4; +- src->swizzle = swizzle_from_sm1((param & VKD3D_SM1_SWIZZLE_MASK) >> VKD3D_SM1_SWIZZLE_SHIFT); +- src->modifiers = (param & VKD3D_SM1_SRC_MODIFIER_MASK) >> VKD3D_SM1_SRC_MODIFIER_SHIFT; ++ ++ if (d3dbc_type == VKD3D_SM1_REG_CONST3) ++ { ++ *index_offset = 4096; ++ return VKD3DSPR_CONST; ++ } ++ ++ if (d3dbc_type == VKD3D_SM1_REG_CONST4) ++ { ++ *index_offset = 6144; ++ return VKD3DSPR_CONST; ++ } ++ ++ if (d3dbc_type == VKD3D_SM1_REG_ADDR) ++ return sm1->p.program->shader_version.type == VKD3D_SHADER_TYPE_PIXEL ? VKD3DSPR_TEXTURE : VKD3DSPR_ADDR; ++ if (d3dbc_type == VKD3D_SM1_REG_TEXCRDOUT) ++ return vkd3d_shader_ver_ge(&sm1->p.program->shader_version, 3, 0) ? VKD3DSPR_OUTPUT : VKD3DSPR_TEXCRDOUT; ++ ++ for (unsigned int i = 0; i < ARRAY_SIZE(register_types); ++i) ++ { ++ if (register_types[i].d3dbc_type == d3dbc_type) ++ return register_types[i].vsir_type; ++ } ++ ++ return VKD3DSPR_INVALID; + } + +-static void shader_sm1_parse_dst_param(uint32_t param, struct vkd3d_shader_src_param *rel_addr, +- struct vkd3d_shader_dst_param *dst) ++static void d3dbc_parse_register(struct vkd3d_shader_sm1_parser *d3dbc, ++ struct vkd3d_shader_register *reg, uint32_t param, struct vkd3d_shader_src_param *rel_addr) + { +- enum vkd3d_shader_register_type reg_type = ((param & VKD3D_SM1_REGISTER_TYPE_MASK) >> VKD3D_SM1_REGISTER_TYPE_SHIFT) +- | ((param & VKD3D_SM1_REGISTER_TYPE_MASK2) >> VKD3D_SM1_REGISTER_TYPE_SHIFT2); +- unsigned int idx_count = idx_count_from_reg_type(reg_type); ++ enum vkd3d_shader_register_type reg_type; ++ unsigned int index_offset, idx_count; + +- vsir_register_init(&dst->reg, reg_type, VKD3D_DATA_FLOAT, idx_count); +- dst->reg.precision = VKD3D_SHADER_REGISTER_PRECISION_DEFAULT; +- dst->reg.non_uniform = false; ++ reg_type = parse_register_type(d3dbc, param, &index_offset); ++ idx_count = idx_count_from_reg_type(reg_type); ++ vsir_register_init(reg, reg_type, VKD3D_DATA_FLOAT, idx_count); ++ reg->precision = VKD3D_SHADER_REGISTER_PRECISION_DEFAULT; ++ reg->non_uniform = false; + if (idx_count == 1) + { +- dst->reg.idx[0].offset = param & VKD3D_SM1_REGISTER_NUMBER_MASK; +- dst->reg.idx[0].rel_addr = rel_addr; ++ reg->idx[0].offset = index_offset + (param & VKD3D_SM1_REGISTER_NUMBER_MASK); ++ reg->idx[0].rel_addr = rel_addr; + } +- if (dst->reg.type == VKD3DSPR_SAMPLER) +- dst->reg.dimension = VSIR_DIMENSION_NONE; +- else if (dst->reg.type == VKD3DSPR_DEPTHOUT) +- dst->reg.dimension = VSIR_DIMENSION_SCALAR; ++ if (reg->type == VKD3DSPR_SAMPLER) ++ reg->dimension = VSIR_DIMENSION_NONE; ++ else if (reg->type == VKD3DSPR_DEPTHOUT) ++ reg->dimension = VSIR_DIMENSION_SCALAR; + else +- dst->reg.dimension = VSIR_DIMENSION_VEC4; ++ reg->dimension = VSIR_DIMENSION_VEC4; ++} ++ ++static void shader_sm1_parse_src_param(struct vkd3d_shader_sm1_parser *sm1, uint32_t param, ++ struct vkd3d_shader_src_param *rel_addr, struct vkd3d_shader_src_param *src) ++{ ++ d3dbc_parse_register(sm1, &src->reg, param, rel_addr); ++ src->swizzle = swizzle_from_sm1((param & VKD3D_SM1_SWIZZLE_MASK) >> VKD3D_SM1_SWIZZLE_SHIFT); ++ src->modifiers = (param & VKD3D_SM1_SRC_MODIFIER_MASK) >> VKD3D_SM1_SRC_MODIFIER_SHIFT; ++} ++ ++static void shader_sm1_parse_dst_param(struct vkd3d_shader_sm1_parser *sm1, uint32_t param, ++ struct vkd3d_shader_src_param *rel_addr, struct vkd3d_shader_dst_param *dst) ++{ ++ d3dbc_parse_register(sm1, &dst->reg, param, rel_addr); + dst->modifiers = (param & VKD3D_SM1_DST_MODIFIER_MASK) >> VKD3D_SM1_DST_MODIFIER_SHIFT; + dst->shift = (param & VKD3D_SM1_DSTSHIFT_MASK) >> VKD3D_SM1_DSTSHIFT_SHIFT; + +@@ -638,7 +718,32 @@ static void add_signature_mask(struct vkd3d_shader_sm1_parser *sm1, bool output, return; } @@ -685,7 +1219,79 @@ index 9e2eacbcfa6..a931883e8d1 100644 } static bool add_signature_element_from_register(struct vkd3d_shader_sm1_parser *sm1, -@@ -968,6 +988,8 @@ static void shader_sm1_read_dst_param(struct vkd3d_shader_sm1_parser *sm1, const +@@ -666,26 +771,18 @@ static bool add_signature_element_from_register(struct vkd3d_shader_sm1_parser * + VKD3D_SHADER_SV_NONE, SM1_COLOR_REGISTER_OFFSET + register_index, is_dcl, mask); + + case VKD3DSPR_TEXTURE: +- /* For vertex shaders, this is ADDR. */ +- if (version->type == VKD3D_SHADER_TYPE_VERTEX) +- return true; + return add_signature_element(sm1, false, "TEXCOORD", register_index, + VKD3D_SHADER_SV_NONE, register_index, is_dcl, mask); + ++ case VKD3DSPR_TEXCRDOUT: ++ return add_signature_element(sm1, true, "TEXCOORD", register_index, ++ VKD3D_SHADER_SV_NONE, register_index, is_dcl, mask); ++ + case VKD3DSPR_OUTPUT: + if (version->type == VKD3D_SHADER_TYPE_VERTEX) + { +- /* For sm < 2 vertex shaders, this is TEXCRDOUT. +- * +- * For sm3 vertex shaders, this is OUTPUT, but we already +- * should have had a DCL instruction. */ +- if (version->major == 3) +- { +- add_signature_mask(sm1, true, register_index, mask); +- return true; +- } +- return add_signature_element(sm1, true, "TEXCOORD", register_index, +- VKD3D_SHADER_SV_NONE, register_index, is_dcl, mask); ++ add_signature_mask(sm1, true, register_index, mask); ++ return true; + } + /* fall through */ + +@@ -822,18 +919,6 @@ static void shader_sm1_scan_register(struct vkd3d_shader_sm1_parser *sm1, + record_constant_register(sm1, VKD3D_SHADER_D3DBC_FLOAT_CONSTANT_REGISTER, register_index, from_def); + break; + +- case VKD3DSPR_CONST2: +- record_constant_register(sm1, VKD3D_SHADER_D3DBC_FLOAT_CONSTANT_REGISTER, 2048 + register_index, from_def); +- break; +- +- case VKD3DSPR_CONST3: +- record_constant_register(sm1, VKD3D_SHADER_D3DBC_FLOAT_CONSTANT_REGISTER, 4096 + register_index, from_def); +- break; +- +- case VKD3DSPR_CONST4: +- record_constant_register(sm1, VKD3D_SHADER_D3DBC_FLOAT_CONSTANT_REGISTER, 6144 + register_index, from_def); +- break; +- + case VKD3DSPR_CONSTINT: + record_constant_register(sm1, VKD3D_SHADER_D3DBC_INT_CONSTANT_REGISTER, register_index, from_def); + break; +@@ -941,9 +1026,9 @@ static void shader_sm1_read_src_param(struct vkd3d_shader_sm1_parser *sm1, const + sm1->abort = true; + return; + } +- shader_sm1_parse_src_param(addr_token, NULL, src_rel_addr); ++ shader_sm1_parse_src_param(sm1, addr_token, NULL, src_rel_addr); + } +- shader_sm1_parse_src_param(token, src_rel_addr, src_param); ++ shader_sm1_parse_src_param(sm1, token, src_rel_addr, src_param); + } + + static void shader_sm1_read_dst_param(struct vkd3d_shader_sm1_parser *sm1, const uint32_t **ptr, +@@ -962,12 +1047,14 @@ static void shader_sm1_read_dst_param(struct vkd3d_shader_sm1_parser *sm1, const + sm1->abort = true; + return; + } +- shader_sm1_parse_src_param(addr_token, NULL, dst_rel_addr); ++ shader_sm1_parse_src_param(sm1, addr_token, NULL, dst_rel_addr); + } +- shader_sm1_parse_dst_param(token, dst_rel_addr, dst_param); ++ shader_sm1_parse_dst_param(sm1, token, dst_rel_addr, dst_param); if (dst_param->reg.type == VKD3DSPR_RASTOUT && dst_param->reg.idx[0].offset == VSIR_RASTOUT_POINT_SIZE) sm1->p.program->has_point_size = true; @@ -694,7 +1300,16 @@ index 9e2eacbcfa6..a931883e8d1 100644 } static void shader_sm1_read_semantic(struct vkd3d_shader_sm1_parser *sm1, -@@ -1268,6 +1290,7 @@ static enum vkd3d_result shader_sm1_init(struct vkd3d_shader_sm1_parser *sm1, st +@@ -1005,7 +1092,7 @@ static void shader_sm1_read_semantic(struct vkd3d_shader_sm1_parser *sm1, + semantic->resource_data_type[1] = VKD3D_DATA_FLOAT; + semantic->resource_data_type[2] = VKD3D_DATA_FLOAT; + semantic->resource_data_type[3] = VKD3D_DATA_FLOAT; +- shader_sm1_parse_dst_param(dst_token, NULL, &semantic->resource.reg); ++ shader_sm1_parse_dst_param(sm1, dst_token, NULL, &semantic->resource.reg); + range = &semantic->resource.range; + range->space = 0; + range->first = range->last = semantic->resource.reg.reg.idx[0].offset; +@@ -1268,6 +1355,7 @@ static enum vkd3d_result shader_sm1_init(struct vkd3d_shader_sm1_parser *sm1, st const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context) { const struct vkd3d_shader_location location = {.source_name = compile_info->source_name}; @@ -702,7 +1317,7 @@ index 9e2eacbcfa6..a931883e8d1 100644 const uint32_t *code = compile_info->source.code; size_t code_size = compile_info->source.size; struct vkd3d_shader_version version; -@@ -1318,9 +1341,13 @@ static enum vkd3d_result shader_sm1_init(struct vkd3d_shader_sm1_parser *sm1, st +@@ -1318,9 +1406,13 @@ static enum vkd3d_result shader_sm1_init(struct vkd3d_shader_sm1_parser *sm1, st sm1->start = &code[1]; sm1->end = &code[token_count]; @@ -717,7 +1332,7 @@ index 9e2eacbcfa6..a931883e8d1 100644 return VKD3D_ERROR_OUT_OF_MEMORY; vkd3d_shader_parser_init(&sm1->p, program, message_context, compile_info->source_name); -@@ -1525,387 +1552,73 @@ static uint32_t sm1_version(enum vkd3d_shader_type type, unsigned int major, uns +@@ -1525,555 +1617,208 @@ static uint32_t sm1_version(enum vkd3d_shader_type type, unsigned int major, uns type == VKD3D_SHADER_TYPE_VERTEX ? VKD3D_SM1_VS : VKD3D_SM1_PS); } @@ -1013,10 +1628,13 @@ index 9e2eacbcfa6..a931883e8d1 100644 - } - - sm1_sort_externs(ctx); -- ++ struct vkd3d_bytecode_buffer *buffer = &d3dbc->buffer; ++ size_t offset, start, end; + - size_offset = put_u32(buffer, 0); - ctab_offset = put_u32(buffer, VKD3D_MAKE_TAG('C','T','A','B')); -- ++ offset = put_u32(buffer, 0); + - ctab_start = put_u32(buffer, 7 * sizeof(uint32_t)); /* CTAB header size. */ - creator_offset = put_u32(buffer, 0); - put_u32(buffer, sm1_version(ctx->profile->type, ctx->profile->major_version, ctx->profile->minor_version)); @@ -1024,12 +1642,20 @@ index 9e2eacbcfa6..a931883e8d1 100644 - vars_offset = put_u32(buffer, 0); - put_u32(buffer, 0); /* FIXME: flags */ - put_u32(buffer, 0); /* FIXME: target string */ -- ++ start = put_u32(buffer, tag); ++ bytecode_put_bytes(buffer, comment->code, comment->size); ++ end = bytecode_align(buffer); + - vars_start = bytecode_align(buffer); - set_u32(buffer, vars_offset, vars_start - ctab_start); -- ++ set_u32(buffer, offset, vkd3d_make_u32(VKD3D_SM1_OP_COMMENT, (end - start) / sizeof(uint32_t))); ++} + - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) -- { ++static enum vkd3d_sm1_register_type d3dbc_register_type_from_vsir(const struct vkd3d_shader_register *reg) ++{ ++ if (reg->type == VKD3DSPR_CONST) + { - unsigned int r; - - for (r = 0; r <= HLSL_REGSET_LAST; ++r) @@ -1051,12 +1677,19 @@ index 9e2eacbcfa6..a931883e8d1 100644 - put_u32(buffer, 0); /* type */ - put_u32(buffer, 0); /* default value */ - } -- } -- ++ if (reg->idx[0].offset >= 6144) ++ return VKD3D_SM1_REG_CONST4; ++ if (reg->idx[0].offset >= 4096) ++ return VKD3D_SM1_REG_CONST3; ++ if (reg->idx[0].offset >= 2048) ++ return VKD3D_SM1_REG_CONST2; + } + - uniform_count = 0; - - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) -- { ++ for (unsigned int i = 0; i < ARRAY_SIZE(register_types); ++i) + { - unsigned int r; - - for (r = 0; r <= HLSL_REGSET_LAST; ++r) @@ -1130,28 +1763,30 @@ index 9e2eacbcfa6..a931883e8d1 100644 - } - } - } -+ struct vkd3d_bytecode_buffer *buffer = &d3dbc->buffer; -+ size_t offset, start, end; - +- - ++uniform_count; - } -- } -+ offset = put_u32(buffer, 0); ++ if (register_types[i].vsir_type == reg->type) ++ return register_types[i].d3dbc_type; + } - offset = put_string(buffer, vkd3d_shader_get_version(NULL, NULL)); - set_u32(buffer, creator_offset, offset - ctab_start); -+ start = put_u32(buffer, tag); -+ bytecode_put_bytes(buffer, comment->code, comment->size); -+ end = bytecode_align(buffer); - +- - ctab_end = bytecode_align(buffer); - set_u32(buffer, size_offset, vkd3d_make_u32(VKD3D_SM1_OP_COMMENT, (ctab_end - ctab_offset) / sizeof(uint32_t))); -+ set_u32(buffer, offset, vkd3d_make_u32(VKD3D_SM1_OP_COMMENT, (end - start) / sizeof(uint32_t))); ++ vkd3d_unreachable(); } - static uint32_t sm1_encode_register_type(enum vkd3d_shader_register_type type) -@@ -1914,166 +1627,108 @@ static uint32_t sm1_encode_register_type(enum vkd3d_shader_register_type type) - | ((type << VKD3D_SM1_REGISTER_TYPE_SHIFT2) & VKD3D_SM1_REGISTER_TYPE_MASK2); +-static uint32_t sm1_encode_register_type(enum vkd3d_shader_register_type type) ++static uint32_t sm1_encode_register_type(const struct vkd3d_shader_register *reg) + { +- return ((type << VKD3D_SM1_REGISTER_TYPE_SHIFT) & VKD3D_SM1_REGISTER_TYPE_MASK) +- | ((type << VKD3D_SM1_REGISTER_TYPE_SHIFT2) & VKD3D_SM1_REGISTER_TYPE_MASK2); ++ enum vkd3d_sm1_register_type sm1_type = d3dbc_register_type_from_vsir(reg); ++ ++ return ((sm1_type << VKD3D_SM1_REGISTER_TYPE_SHIFT) & VKD3D_SM1_REGISTER_TYPE_MASK) ++ | ((sm1_type << VKD3D_SM1_REGISTER_TYPE_SHIFT2) & VKD3D_SM1_REGISTER_TYPE_MASK2); } -struct sm1_instruction @@ -1235,9 +1870,10 @@ index 9e2eacbcfa6..a931883e8d1 100644 - | sm1_encode_register_type(reg->type) - | (reg->mod << VKD3D_SM1_DST_MODIFIER_SHIFT) - | (reg->writemask << VKD3D_SM1_WRITEMASK_SHIFT) | reg->reg); -+ | sm1_encode_register_type(reg->reg.type) ++ | sm1_encode_register_type(®->reg) + | (reg->modifiers << VKD3D_SM1_DST_MODIFIER_SHIFT) -+ | (reg->write_mask << VKD3D_SM1_WRITEMASK_SHIFT) | reg->reg.idx[0].offset); ++ | (reg->write_mask << VKD3D_SM1_WRITEMASK_SHIFT) ++ | (reg->reg.idx[0].offset & VKD3D_SM1_REGISTER_NUMBER_MASK)); } -static void write_sm1_src_register(struct vkd3d_bytecode_buffer *buffer, @@ -1248,9 +1884,10 @@ index 9e2eacbcfa6..a931883e8d1 100644 - | sm1_encode_register_type(reg->type) - | (reg->mod << VKD3D_SM1_SRC_MODIFIER_SHIFT) - | (reg->swizzle << VKD3D_SM1_SWIZZLE_SHIFT) | reg->reg); -+ | sm1_encode_register_type(reg->reg.type) ++ | sm1_encode_register_type(®->reg) + | (reg->modifiers << VKD3D_SM1_SRC_MODIFIER_SHIFT) -+ | (swizzle_from_vsir(reg->swizzle) << VKD3D_SM1_SWIZZLE_SHIFT) | reg->reg.idx[0].offset); ++ | (swizzle_from_vsir(reg->swizzle) << VKD3D_SM1_SWIZZLE_SHIFT) ++ | (reg->reg.idx[0].offset & VKD3D_SM1_REGISTER_NUMBER_MASK)); } -static void d3dbc_write_instruction(struct d3dbc_compiler *d3dbc, const struct sm1_instruction *instr) @@ -1294,7 +1931,8 @@ index 9e2eacbcfa6..a931883e8d1 100644 - unsigned int i = 0; - - for (;;) -- { ++ for (i = 0; i < ins->dst_count; ++i) + { - info = &d3dbc->opcode_table[i++]; - if (info->vkd3d_opcode == VKD3DSIH_INVALID) - return NULL; @@ -1332,8 +1970,7 @@ index 9e2eacbcfa6..a931883e8d1 100644 - src->swizzle = swizzle_from_vsir(param->swizzle); - - if (param->reg.idx[0].rel_addr) -+ for (i = 0; i < ins->dst_count; ++i) - { +- { - vkd3d_shader_error(d3dbc->message_context, loc, VKD3D_SHADER_ERROR_D3DBC_NOT_IMPLEMENTED, - "Unhandled relative addressing on source register."); - d3dbc->failed = true; @@ -1374,7 +2011,7 @@ index 9e2eacbcfa6..a931883e8d1 100644 static void d3dbc_write_vsir_def(struct d3dbc_compiler *d3dbc, const struct vkd3d_shader_instruction *ins) { -@@ -2081,11 +1736,11 @@ static void d3dbc_write_vsir_def(struct d3dbc_compiler *d3dbc, const struct vkd3 +@@ -2081,11 +1826,11 @@ static void d3dbc_write_vsir_def(struct d3dbc_compiler *d3dbc, const struct vkd3 struct vkd3d_bytecode_buffer *buffer = &d3dbc->buffer; uint32_t token; @@ -1390,7 +2027,7 @@ index 9e2eacbcfa6..a931883e8d1 100644 }; token = VKD3D_SM1_OP_DEF; -@@ -2103,7 +1758,7 @@ static void d3dbc_write_vsir_sampler_dcl(struct d3dbc_compiler *d3dbc, +@@ -2103,7 +1848,7 @@ static void d3dbc_write_vsir_sampler_dcl(struct d3dbc_compiler *d3dbc, { const struct vkd3d_shader_version *version = &d3dbc->program->shader_version; struct vkd3d_bytecode_buffer *buffer = &d3dbc->buffer; @@ -1399,7 +2036,7 @@ index 9e2eacbcfa6..a931883e8d1 100644 uint32_t token; token = VKD3D_SM1_OP_DCL; -@@ -2115,9 +1770,9 @@ static void d3dbc_write_vsir_sampler_dcl(struct d3dbc_compiler *d3dbc, +@@ -2115,9 +1860,9 @@ static void d3dbc_write_vsir_sampler_dcl(struct d3dbc_compiler *d3dbc, token |= res_type << VKD3D_SM1_RESOURCE_TYPE_SHIFT; put_u32(buffer, token); @@ -1412,7 +2049,7 @@ index 9e2eacbcfa6..a931883e8d1 100644 write_sm1_dst_register(buffer, ®); } -@@ -2163,61 +1818,6 @@ static void d3dbc_write_vsir_dcl(struct d3dbc_compiler *d3dbc, const struct vkd3 +@@ -2163,61 +1908,6 @@ static void d3dbc_write_vsir_dcl(struct d3dbc_compiler *d3dbc, const struct vkd3 } } @@ -1474,7 +2111,7 @@ index 9e2eacbcfa6..a931883e8d1 100644 static void d3dbc_write_vsir_instruction(struct d3dbc_compiler *d3dbc, const struct vkd3d_shader_instruction *ins) { uint32_t writemask; -@@ -2254,7 +1854,7 @@ static void d3dbc_write_vsir_instruction(struct d3dbc_compiler *d3dbc, const str +@@ -2254,7 +1944,7 @@ static void d3dbc_write_vsir_instruction(struct d3dbc_compiler *d3dbc, const str case VKD3DSIH_TEX: case VKD3DSIH_TEXKILL: case VKD3DSIH_TEXLDD: @@ -1483,7 +2120,7 @@ index 9e2eacbcfa6..a931883e8d1 100644 break; case VKD3DSIH_EXP: -@@ -2271,7 +1871,7 @@ static void d3dbc_write_vsir_instruction(struct d3dbc_compiler *d3dbc, const str +@@ -2271,7 +1961,7 @@ static void d3dbc_write_vsir_instruction(struct d3dbc_compiler *d3dbc, const str writemask, ins->opcode); d3dbc->failed = true; } @@ -1492,7 +2129,7 @@ index 9e2eacbcfa6..a931883e8d1 100644 break; default: -@@ -2287,13 +1887,13 @@ static void d3dbc_write_semantic_dcl(struct d3dbc_compiler *d3dbc, +@@ -2287,13 +1977,13 @@ static void d3dbc_write_semantic_dcl(struct d3dbc_compiler *d3dbc, { const struct vkd3d_shader_version *version = &d3dbc->program->shader_version; struct vkd3d_bytecode_buffer *buffer = &d3dbc->buffer; @@ -1508,7 +2145,7 @@ index 9e2eacbcfa6..a931883e8d1 100644 { usage = 0; usage_idx = 0; -@@ -2302,8 +1902,8 @@ static void d3dbc_write_semantic_dcl(struct d3dbc_compiler *d3dbc, +@@ -2302,8 +1992,8 @@ static void d3dbc_write_semantic_dcl(struct d3dbc_compiler *d3dbc, { ret = sm1_usage_from_semantic_name(element->semantic_name, element->semantic_index, &usage, &usage_idx); VKD3D_ASSERT(ret); @@ -1519,7 +2156,7 @@ index 9e2eacbcfa6..a931883e8d1 100644 } token = VKD3D_SM1_OP_DCL; -@@ -2316,7 +1916,7 @@ static void d3dbc_write_semantic_dcl(struct d3dbc_compiler *d3dbc, +@@ -2316,7 +2006,7 @@ static void d3dbc_write_semantic_dcl(struct d3dbc_compiler *d3dbc, token |= usage_idx << VKD3D_SM1_DCL_USAGE_INDEX_SHIFT; put_u32(buffer, token); @@ -1528,7 +2165,7 @@ index 9e2eacbcfa6..a931883e8d1 100644 write_sm1_dst_register(buffer, ®); } -@@ -2384,9 +1984,7 @@ int d3dbc_compile(struct vsir_program *program, uint64_t config_flags, +@@ -2384,9 +2074,7 @@ int d3dbc_compile(struct vsir_program *program, uint64_t config_flags, } put_u32(buffer, sm1_version(version->type, version->major, version->minor)); @@ -1600,10 +2237,19 @@ index f6ac8e0829e..81af62f7810 100644 TRACE("Skipping AON9 shader code chunk.\n"); break; diff --git a/libs/vkd3d/libs/vkd3d-shader/dxil.c b/libs/vkd3d/libs/vkd3d-shader/dxil.c -index 7099bcc9ce2..4493602dfb7 100644 +index 7099bcc9ce2..399c2b67eae 100644 --- a/libs/vkd3d/libs/vkd3d-shader/dxil.c +++ b/libs/vkd3d/libs/vkd3d-shader/dxil.c -@@ -3824,7 +3824,7 @@ static void src_params_init_from_operands(struct vkd3d_shader_src_param *src_par +@@ -430,6 +430,8 @@ enum dx_intrinsic_opcode + DX_DERIV_COARSEY = 84, + DX_DERIV_FINEX = 85, + DX_DERIV_FINEY = 86, ++ DX_EVAL_SAMPLE_INDEX = 88, ++ DX_EVAL_CENTROID = 89, + DX_SAMPLE_INDEX = 90, + DX_COVERAGE = 91, + DX_THREAD_ID = 93, +@@ -3824,7 +3826,7 @@ static void src_params_init_from_operands(struct vkd3d_shader_src_param *src_par } static enum vkd3d_shader_register_type register_type_from_dxil_semantic_kind( @@ -1612,7 +2258,7 @@ index 7099bcc9ce2..4493602dfb7 100644 { switch (sysval_semantic) { -@@ -3834,7 +3834,7 @@ static enum vkd3d_shader_register_type register_type_from_dxil_semantic_kind( +@@ -3834,7 +3836,7 @@ static enum vkd3d_shader_register_type register_type_from_dxil_semantic_kind( case VKD3D_SHADER_SV_SAMPLE_INDEX: return VKD3DSPR_NULL; case VKD3D_SHADER_SV_COVERAGE: @@ -1621,7 +2267,7 @@ index 7099bcc9ce2..4493602dfb7 100644 case VKD3D_SHADER_SV_DEPTH: return VKD3DSPR_DEPTHOUT; case VKD3D_SHADER_SV_DEPTH_GREATER_EQUAL: -@@ -3884,7 +3884,7 @@ static void sm6_parser_init_signature(struct sm6_parser *sm6, const struct shade +@@ -3884,7 +3886,7 @@ static void sm6_parser_init_signature(struct sm6_parser *sm6, const struct shade param = ¶ms[i]; if (e->register_index == UINT_MAX @@ -1630,7 +2276,70 @@ index 7099bcc9ce2..4493602dfb7 100644 { dst_param_io_init(param, e, io_reg_type); continue; -@@ -9348,7 +9348,7 @@ static void signature_element_read_additional_element_values(struct signature_el +@@ -5098,6 +5100,53 @@ static void sm6_parser_emit_dx_dot(struct sm6_parser *sm6, enum dx_intrinsic_opc + instruction_dst_param_init_ssa_scalar(ins, sm6); + } + ++static void sm6_parser_emit_dx_eval_attrib(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, ++ const struct sm6_value **operands, struct function_emission_state *state) ++{ ++ struct vkd3d_shader_instruction *ins = state->ins; ++ struct vkd3d_shader_src_param *src_params; ++ const struct shader_signature *signature; ++ unsigned int row_index, column_index; ++ const struct signature_element *e; ++ ++ row_index = sm6_value_get_constant_uint(operands[0]); ++ column_index = sm6_value_get_constant_uint(operands[2]); ++ ++ signature = &sm6->p.program->input_signature; ++ if (row_index >= signature->element_count) ++ { ++ WARN("Invalid row index %u.\n", row_index); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, ++ "Invalid input row index %u for an attribute evaluation.", row_index); ++ return; ++ } ++ ++ e = &signature->elements[row_index]; ++ if (column_index >= VKD3D_VEC4_SIZE || !(e->mask & (1 << column_index))) ++ { ++ WARN("Invalid column index %u.\n", column_index); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, ++ "Invalid input column index %u for an attribute evaluation.", column_index); ++ return; ++ } ++ ++ vsir_instruction_init(ins, &sm6->p.location, (op == DX_EVAL_CENTROID) ++ ? VKD3DSIH_EVAL_CENTROID : VKD3DSIH_EVAL_SAMPLE_INDEX); ++ ++ if (!(src_params = instruction_src_params_alloc(ins, 1 + (op == DX_EVAL_SAMPLE_INDEX), sm6))) ++ return; ++ ++ src_params[0].reg = sm6->input_params[row_index].reg; ++ src_param_init_scalar(&src_params[0], column_index); ++ if (e->register_count > 1) ++ register_index_address_init(&src_params[0].reg.idx[0], operands[1], sm6); ++ ++ if (op == DX_EVAL_SAMPLE_INDEX) ++ src_param_init_from_value(&src_params[1], operands[3]); ++ ++ instruction_dst_param_init_ssa_scalar(ins, sm6); ++} ++ + static void sm6_parser_emit_dx_fabs(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, + const struct sm6_value **operands, struct function_emission_state *state) + { +@@ -6288,6 +6337,8 @@ static const struct sm6_dx_opcode_info sm6_dx_op_table[] = + [DX_DOT4 ] = {"g", "RRRRRRRR", sm6_parser_emit_dx_dot}, + [DX_EMIT_STREAM ] = {"v", "c", sm6_parser_emit_dx_stream}, + [DX_EMIT_THEN_CUT_STREAM ] = {"v", "c", sm6_parser_emit_dx_stream}, ++ [DX_EVAL_CENTROID ] = {"o", "cic", sm6_parser_emit_dx_eval_attrib}, ++ [DX_EVAL_SAMPLE_INDEX ] = {"o", "cici", sm6_parser_emit_dx_eval_attrib}, + [DX_EXP ] = {"g", "R", sm6_parser_emit_dx_unary}, + [DX_FABS ] = {"g", "R", sm6_parser_emit_dx_fabs}, + [DX_FIRST_BIT_HI ] = {"i", "m", sm6_parser_emit_dx_unary}, +@@ -9348,7 +9399,7 @@ static void signature_element_read_additional_element_values(struct signature_el } static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const struct sm6_metadata_value *m, @@ -1639,7 +2348,7 @@ index 7099bcc9ce2..4493602dfb7 100644 { unsigned int i, j, column_count, operand_count, index; const struct sm6_metadata_node *node, *element_node; -@@ -9466,7 +9466,7 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const +@@ -9466,7 +9517,7 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const if ((is_register = e->register_index == UINT_MAX)) { @@ -1648,7 +2357,7 @@ index 7099bcc9ce2..4493602dfb7 100644 { WARN("Unhandled I/O register semantic kind %u.\n", j); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_SIGNATURE, -@@ -9578,17 +9578,17 @@ static enum vkd3d_result sm6_parser_signatures_init(struct sm6_parser *sm6, cons +@@ -9578,17 +9629,17 @@ static enum vkd3d_result sm6_parser_signatures_init(struct sm6_parser *sm6, cons } if (m->u.node->operand_count && (ret = sm6_parser_read_signature(sm6, m->u.node->operands[0], @@ -1669,7 +2378,7 @@ index 7099bcc9ce2..4493602dfb7 100644 { return ret; } -@@ -9717,12 +9717,13 @@ static void sm6_parser_emit_dcl_tessellator_domain(struct sm6_parser *sm6, +@@ -9717,12 +9768,13 @@ static void sm6_parser_emit_dcl_tessellator_domain(struct sm6_parser *sm6, ins = sm6_parser_add_instruction(sm6, VKD3DSIH_DCL_TESSELLATOR_DOMAIN); ins->declaration.tessellator_domain = tessellator_domain; @@ -1686,7 +2395,7 @@ index 7099bcc9ce2..4493602dfb7 100644 { WARN("%s control point count %u invalid.\n", type, count); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, -@@ -9744,6 +9745,8 @@ static void sm6_parser_emit_dcl_tessellator_partitioning(struct sm6_parser *sm6, +@@ -9744,6 +9796,8 @@ static void sm6_parser_emit_dcl_tessellator_partitioning(struct sm6_parser *sm6, ins = sm6_parser_add_instruction(sm6, VKD3DSIH_DCL_TESSELLATOR_PARTITIONING); ins->declaration.tessellator_partitioning = tessellator_partitioning; @@ -1695,7 +2404,7 @@ index 7099bcc9ce2..4493602dfb7 100644 } static void sm6_parser_emit_dcl_tessellator_output_primitive(struct sm6_parser *sm6, -@@ -9760,6 +9763,8 @@ static void sm6_parser_emit_dcl_tessellator_output_primitive(struct sm6_parser * +@@ -9760,6 +9814,8 @@ static void sm6_parser_emit_dcl_tessellator_output_primitive(struct sm6_parser * ins = sm6_parser_add_instruction(sm6, VKD3DSIH_DCL_TESSELLATOR_OUTPUT_PRIMITIVE); ins->declaration.tessellator_output_primitive = primitive; @@ -1704,7 +2413,7 @@ index 7099bcc9ce2..4493602dfb7 100644 } static void sm6_parser_emit_dcl_max_tessellation_factor(struct sm6_parser *sm6, struct sm6_metadata_value *m) -@@ -9951,7 +9956,7 @@ static enum vkd3d_tessellator_domain sm6_parser_ds_properties_init(struct sm6_pa +@@ -9951,7 +10007,7 @@ static enum vkd3d_tessellator_domain sm6_parser_ds_properties_init(struct sm6_pa } sm6_parser_emit_dcl_tessellator_domain(sm6, operands[0]); @@ -1713,7 +2422,7 @@ index 7099bcc9ce2..4493602dfb7 100644 sm6->p.program->input_control_point_count = operands[1]; return operands[0]; -@@ -10010,9 +10015,9 @@ static enum vkd3d_tessellator_domain sm6_parser_hs_properties_init(struct sm6_pa +@@ -10010,9 +10066,9 @@ static enum vkd3d_tessellator_domain sm6_parser_hs_properties_init(struct sm6_pa } } @@ -1725,7 +2434,7 @@ index 7099bcc9ce2..4493602dfb7 100644 sm6_parser_emit_dcl_count(sm6, VKD3DSIH_DCL_OUTPUT_CONTROL_POINT_COUNT, operands[2]); program->output_control_point_count = operands[2]; sm6_parser_emit_dcl_tessellator_domain(sm6, operands[3]); -@@ -10351,7 +10356,7 @@ static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, struct vsir_pro +@@ -10351,7 +10407,7 @@ static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, struct vsir_pro /* Estimate instruction count to avoid reallocation in most shaders. */ count = max(token_count, 400) - 400; if (!vsir_program_init(program, compile_info, &version, @@ -1734,7 +2443,7 @@ index 7099bcc9ce2..4493602dfb7 100644 return VKD3D_ERROR_OUT_OF_MEMORY; vkd3d_shader_parser_init(&sm6->p, program, message_context, compile_info->source_name); sm6->ptr = &sm6->start[1]; -@@ -10378,6 +10383,7 @@ static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, struct vsir_pro +@@ -10378,6 +10434,7 @@ static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, struct vsir_pro *input_signature = dxbc_desc->input_signature; *output_signature = dxbc_desc->output_signature; *patch_constant_signature = dxbc_desc->patch_constant_signature; @@ -1743,7 +2452,7 @@ index 7099bcc9ce2..4493602dfb7 100644 block = &sm6->root_block; diff --git a/libs/vkd3d/libs/vkd3d-shader/fx.c b/libs/vkd3d/libs/vkd3d-shader/fx.c -index e22177e1e30..3795add87c7 100644 +index e22177e1e30..779ffa1e156 100644 --- a/libs/vkd3d/libs/vkd3d-shader/fx.c +++ b/libs/vkd3d/libs/vkd3d-shader/fx.c @@ -36,6 +36,16 @@ struct fx_4_binary_type @@ -1772,6 +2481,17 @@ index e22177e1e30..3795add87c7 100644 }; static const uint32_t fx_4_numeric_base_types[] = +@@ -598,8 +610,8 @@ static uint32_t get_fx_4_numeric_type_description(const struct hlsl_type *type, + return 0; + } + +- value |= (type->dimy & 0x7) << FX_4_NUMERIC_ROWS_SHIFT; +- value |= (type->dimx & 0x7) << FX_4_NUMERIC_COLUMNS_SHIFT; ++ value |= (type->e.numeric.dimy & 0x7) << FX_4_NUMERIC_ROWS_SHIFT; ++ value |= (type->e.numeric.dimx & 0x7) << FX_4_NUMERIC_COLUMNS_SHIFT; + if (type->modifiers & HLSL_MODIFIER_COLUMN_MAJOR) + value |= FX_4_NUMERIC_COLUMN_MAJOR_MASK; + @@ -762,6 +774,7 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co case HLSL_CLASS_TECHNIQUE: case HLSL_CLASS_CONSTANT_BUFFER: @@ -1800,6 +2520,24 @@ index e22177e1e30..3795add87c7 100644 put_u32(buffer, get_fx_2_type_class(type)); put_u32(buffer, name_offset); put_u32(buffer, semantic_offset); +@@ -1034,13 +1047,13 @@ static uint32_t write_fx_2_parameter(const struct hlsl_type *type, const char *n + switch (type->class) + { + case HLSL_CLASS_VECTOR: +- put_u32(buffer, type->dimx); +- put_u32(buffer, type->dimy); ++ put_u32(buffer, type->e.numeric.dimx); ++ put_u32(buffer, type->e.numeric.dimy); + break; + case HLSL_CLASS_SCALAR: + case HLSL_CLASS_MATRIX: +- put_u32(buffer, type->dimy); +- put_u32(buffer, type->dimx); ++ put_u32(buffer, type->e.numeric.dimy); ++ put_u32(buffer, type->e.numeric.dimx); + break; + case HLSL_CLASS_STRUCT: + put_u32(buffer, type->e.record.field_count); @@ -1061,7 +1074,7 @@ static uint32_t write_fx_2_parameter(const struct hlsl_type *type, const char *n /* Validated in check_invalid_object_fields(). */ @@ -2098,10 +2836,44 @@ index 0df0e30f399..ab6604bd703 100644 vkd3d_glsl_generator_init(&generator, program, compile_info, descriptor_info, combined_sampler_info, message_context); diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.c b/libs/vkd3d/libs/vkd3d-shader/hlsl.c -index 96de18dc886..84da2fcbc9f 100644 +index 96de18dc886..858186a1071 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.c +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.c -@@ -287,6 +287,7 @@ bool hlsl_type_is_shader(const struct hlsl_type *type) +@@ -192,18 +192,20 @@ bool hlsl_type_is_row_major(const struct hlsl_type *type) + + unsigned int hlsl_type_minor_size(const struct hlsl_type *type) + { ++ VKD3D_ASSERT(hlsl_is_numeric_type(type)); + if (type->class != HLSL_CLASS_MATRIX || hlsl_type_is_row_major(type)) +- return type->dimx; ++ return type->e.numeric.dimx; + else +- return type->dimy; ++ return type->e.numeric.dimy; + } + + unsigned int hlsl_type_major_size(const struct hlsl_type *type) + { ++ VKD3D_ASSERT(hlsl_is_numeric_type(type)); + if (type->class != HLSL_CLASS_MATRIX || hlsl_type_is_row_major(type)) +- return type->dimy; ++ return type->e.numeric.dimy; + else +- return type->dimx; ++ return type->e.numeric.dimx; + } + + unsigned int hlsl_type_element_count(const struct hlsl_type *type) +@@ -211,7 +213,7 @@ unsigned int hlsl_type_element_count(const struct hlsl_type *type) + switch (type->class) + { + case HLSL_CLASS_VECTOR: +- return type->dimx; ++ return type->e.numeric.dimx; + case HLSL_CLASS_MATRIX: + return hlsl_type_major_size(type); + case HLSL_CLASS_ARRAY: +@@ -287,6 +289,7 @@ bool hlsl_type_is_shader(const struct hlsl_type *type) case HLSL_CLASS_UAV: case HLSL_CLASS_CONSTANT_BUFFER: case HLSL_CLASS_BLEND_STATE: @@ -2109,7 +2881,52 @@ index 96de18dc886..84da2fcbc9f 100644 case HLSL_CLASS_VOID: case HLSL_CLASS_NULL: return false; -@@ -434,6 +435,7 @@ static void hlsl_type_calculate_reg_size(struct hlsl_ctx *ctx, struct hlsl_type +@@ -354,14 +357,24 @@ static void hlsl_type_calculate_reg_size(struct hlsl_ctx *ctx, struct hlsl_type + { + case HLSL_CLASS_SCALAR: + case HLSL_CLASS_VECTOR: +- type->reg_size[HLSL_REGSET_NUMERIC] = is_sm4 ? type->dimx : 4; ++ type->reg_size[HLSL_REGSET_NUMERIC] = is_sm4 ? type->e.numeric.dimx : 4; + break; + + case HLSL_CLASS_MATRIX: + if (hlsl_type_is_row_major(type)) +- type->reg_size[HLSL_REGSET_NUMERIC] = is_sm4 ? (4 * (type->dimy - 1) + type->dimx) : (4 * type->dimy); ++ { ++ if (is_sm4) ++ type->reg_size[HLSL_REGSET_NUMERIC] = 4 * (type->e.numeric.dimy - 1) + type->e.numeric.dimx; ++ else ++ type->reg_size[HLSL_REGSET_NUMERIC] = 4 * type->e.numeric.dimy; ++ } + else +- type->reg_size[HLSL_REGSET_NUMERIC] = is_sm4 ? (4 * (type->dimx - 1) + type->dimy) : (4 * type->dimx); ++ { ++ if (is_sm4) ++ type->reg_size[HLSL_REGSET_NUMERIC] = 4 * (type->e.numeric.dimx - 1) + type->e.numeric.dimy; ++ else ++ type->reg_size[HLSL_REGSET_NUMERIC] = 4 * type->e.numeric.dimx; ++ } + break; + + case HLSL_CLASS_ARRAY: +@@ -386,7 +399,6 @@ static void hlsl_type_calculate_reg_size(struct hlsl_ctx *ctx, struct hlsl_type + { + unsigned int i; + +- type->dimx = 0; + for (i = 0; i < type->e.record.field_count; ++i) + { + struct hlsl_struct_field *field = &type->e.record.fields[i]; +@@ -398,8 +410,6 @@ static void hlsl_type_calculate_reg_size(struct hlsl_ctx *ctx, struct hlsl_type + field->reg_offset[k] = type->reg_size[k]; + type->reg_size[k] += field->type->reg_size[k]; + } +- +- type->dimx += field->type->dimx * field->type->dimy * hlsl_get_multiarray_size(field->type); + } + break; + } +@@ -434,6 +444,7 @@ static void hlsl_type_calculate_reg_size(struct hlsl_ctx *ctx, struct hlsl_type case HLSL_CLASS_HULL_SHADER: case HLSL_CLASS_GEOMETRY_SHADER: case HLSL_CLASS_BLEND_STATE: @@ -2117,7 +2934,18 @@ index 96de18dc886..84da2fcbc9f 100644 case HLSL_CLASS_NULL: break; } -@@ -525,6 +527,7 @@ static bool type_is_single_component(const struct hlsl_type *type) +@@ -481,8 +492,8 @@ static struct hlsl_type *hlsl_new_type(struct hlsl_ctx *ctx, const char *name, e + } + type->class = type_class; + type->e.numeric.type = base_type; +- type->dimx = dimx; +- type->dimy = dimy; ++ type->e.numeric.dimx = dimx; ++ type->e.numeric.dimy = dimy; + hlsl_type_calculate_reg_size(ctx, type); + + list_add_tail(&ctx->types, &type->entry); +@@ -525,6 +536,7 @@ static bool type_is_single_component(const struct hlsl_type *type) case HLSL_CLASS_PASS: case HLSL_CLASS_TECHNIQUE: case HLSL_CLASS_VOID: @@ -2125,7 +2953,31 @@ index 96de18dc886..84da2fcbc9f 100644 break; } vkd3d_unreachable(); -@@ -680,6 +683,7 @@ unsigned int hlsl_type_get_component_offset(struct hlsl_ctx *ctx, struct hlsl_ty +@@ -549,18 +561,19 @@ static unsigned int traverse_path_from_component_index(struct hlsl_ctx *ctx, + switch (type->class) + { + case HLSL_CLASS_VECTOR: +- VKD3D_ASSERT(index < type->dimx); ++ VKD3D_ASSERT(index < type->e.numeric.dimx); + *type_ptr = hlsl_get_scalar_type(ctx, type->e.numeric.type); + *index_ptr = 0; + return index; + + case HLSL_CLASS_MATRIX: + { +- unsigned int y = index / type->dimx, x = index % type->dimx; ++ unsigned int y = index / type->e.numeric.dimx, x = index % type->e.numeric.dimx; + bool row_major = hlsl_type_is_row_major(type); + +- VKD3D_ASSERT(index < type->dimx * type->dimy); +- *type_ptr = hlsl_get_vector_type(ctx, type->e.numeric.type, row_major ? type->dimx : type->dimy); ++ VKD3D_ASSERT(index < type->e.numeric.dimx * type->e.numeric.dimy); ++ *type_ptr = hlsl_get_vector_type(ctx, type->e.numeric.type, ++ row_major ? type->e.numeric.dimx : type->e.numeric.dimy); + *index_ptr = row_major ? x : y; + return row_major ? y : x; + } +@@ -680,6 +693,7 @@ unsigned int hlsl_type_get_component_offset(struct hlsl_ctx *ctx, struct hlsl_ty case HLSL_CLASS_SCALAR: case HLSL_CLASS_CONSTANT_BUFFER: case HLSL_CLASS_NULL: @@ -2133,7 +2985,28 @@ index 96de18dc886..84da2fcbc9f 100644 vkd3d_unreachable(); } type = next_type; -@@ -898,6 +902,22 @@ struct hlsl_type *hlsl_new_array_type(struct hlsl_ctx *ctx, struct hlsl_type *ba +@@ -857,9 +871,9 @@ struct hlsl_type *hlsl_get_element_type_from_path_index(struct hlsl_ctx *ctx, co + + case HLSL_CLASS_MATRIX: + if (hlsl_type_is_row_major(type)) +- return hlsl_get_vector_type(ctx, type->e.numeric.type, type->dimx); ++ return hlsl_get_vector_type(ctx, type->e.numeric.type, type->e.numeric.dimx); + else +- return hlsl_get_vector_type(ctx, type->e.numeric.type, type->dimy); ++ return hlsl_get_vector_type(ctx, type->e.numeric.type, type->e.numeric.dimy); + + case HLSL_CLASS_ARRAY: + return type->e.array.type; +@@ -888,8 +902,6 @@ struct hlsl_type *hlsl_new_array_type(struct hlsl_ctx *ctx, struct hlsl_type *ba + type->modifiers = basic_type->modifiers; + type->e.array.elements_count = array_size; + type->e.array.type = basic_type; +- type->dimx = basic_type->dimx; +- type->dimy = basic_type->dimy; + type->sampler_dim = basic_type->sampler_dim; + hlsl_type_calculate_reg_size(ctx, type); + +@@ -898,6 +910,22 @@ struct hlsl_type *hlsl_new_array_type(struct hlsl_ctx *ctx, struct hlsl_type *ba return type; } @@ -2156,7 +3029,50 @@ index 96de18dc886..84da2fcbc9f 100644 struct hlsl_type *hlsl_new_struct_type(struct hlsl_ctx *ctx, const char *name, struct hlsl_struct_field *fields, size_t field_count) { -@@ -1086,6 +1106,7 @@ unsigned int hlsl_type_component_count(const struct hlsl_type *type) +@@ -907,7 +935,6 @@ struct hlsl_type *hlsl_new_struct_type(struct hlsl_ctx *ctx, const char *name, + return NULL; + type->class = HLSL_CLASS_STRUCT; + type->name = name; +- type->dimy = 1; + type->e.record.fields = fields; + type->e.record.field_count = field_count; + hlsl_type_calculate_reg_size(ctx, type); +@@ -925,8 +952,6 @@ struct hlsl_type *hlsl_new_texture_type(struct hlsl_ctx *ctx, enum hlsl_sampler_ + if (!(type = hlsl_alloc(ctx, sizeof(*type)))) + return NULL; + type->class = HLSL_CLASS_TEXTURE; +- type->dimx = 4; +- type->dimy = 1; + type->sampler_dim = dim; + type->e.resource.format = format; + type->sample_count = sample_count; +@@ -943,8 +968,6 @@ struct hlsl_type *hlsl_new_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim + if (!(type = hlsl_alloc(ctx, sizeof(*type)))) + return NULL; + type->class = HLSL_CLASS_UAV; +- type->dimx = format->dimx; +- type->dimy = 1; + type->sampler_dim = dim; + type->e.resource.format = format; + type->e.resource.rasteriser_ordered = rasteriser_ordered; +@@ -960,7 +983,6 @@ struct hlsl_type *hlsl_new_cb_type(struct hlsl_ctx *ctx, struct hlsl_type *forma + if (!(type = hlsl_alloc(ctx, sizeof(*type)))) + return NULL; + type->class = HLSL_CLASS_CONSTANT_BUFFER; +- type->dimy = 1; + type->e.resource.format = format; + hlsl_type_calculate_reg_size(ctx, type); + list_add_tail(&ctx->types, &type->entry); +@@ -1046,7 +1068,7 @@ unsigned int hlsl_type_component_count(const struct hlsl_type *type) + case HLSL_CLASS_SCALAR: + case HLSL_CLASS_VECTOR: + case HLSL_CLASS_MATRIX: +- return type->dimx * type->dimy; ++ return type->e.numeric.dimx * type->e.numeric.dimy; + + case HLSL_CLASS_STRUCT: + { +@@ -1086,6 +1108,7 @@ unsigned int hlsl_type_component_count(const struct hlsl_type *type) case HLSL_CLASS_PASS: case HLSL_CLASS_TECHNIQUE: case HLSL_CLASS_VOID: @@ -2164,7 +3080,19 @@ index 96de18dc886..84da2fcbc9f 100644 break; } -@@ -1157,6 +1178,11 @@ bool hlsl_types_are_equal(const struct hlsl_type *t1, const struct hlsl_type *t2 +@@ -1110,9 +1133,9 @@ bool hlsl_types_are_equal(const struct hlsl_type *t1, const struct hlsl_type *t2 + if ((t1->modifiers & HLSL_MODIFIER_ROW_MAJOR) + != (t2->modifiers & HLSL_MODIFIER_ROW_MAJOR)) + return false; +- if (t1->dimx != t2->dimx) ++ if (t1->e.numeric.dimx != t2->e.numeric.dimx) + return false; +- if (t1->dimy != t2->dimy) ++ if (t1->e.numeric.dimy != t2->e.numeric.dimy) + return false; + return true; + +@@ -1157,6 +1180,11 @@ bool hlsl_types_are_equal(const struct hlsl_type *t1, const struct hlsl_type *t2 case HLSL_CLASS_CONSTANT_BUFFER: return hlsl_types_are_equal(t1->e.resource.format, t2->e.resource.format); @@ -2176,7 +3104,43 @@ index 96de18dc886..84da2fcbc9f 100644 case HLSL_CLASS_DEPTH_STENCIL_STATE: case HLSL_CLASS_DEPTH_STENCIL_VIEW: case HLSL_CLASS_EFFECT_GROUP: -@@ -1695,22 +1721,6 @@ struct hlsl_ir_node *hlsl_new_switch(struct hlsl_ctx *ctx, struct hlsl_ir_node * +@@ -1198,8 +1226,6 @@ struct hlsl_type *hlsl_type_clone(struct hlsl_ctx *ctx, struct hlsl_type *old, + } + } + type->class = old->class; +- type->dimx = old->dimx; +- type->dimy = old->dimy; + type->modifiers = old->modifiers | modifiers; + if (!(type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK)) + type->modifiers |= default_majority; +@@ -1212,6 +1238,8 @@ struct hlsl_type *hlsl_type_clone(struct hlsl_ctx *ctx, struct hlsl_type *old, + case HLSL_CLASS_SCALAR: + case HLSL_CLASS_VECTOR: + case HLSL_CLASS_MATRIX: ++ type->e.numeric.dimx = old->e.numeric.dimx; ++ type->e.numeric.dimy = old->e.numeric.dimy; + type->e.numeric.type = old->e.numeric.type; + break; + +@@ -1471,7 +1499,7 @@ struct hlsl_ir_node *hlsl_new_store_index(struct hlsl_ctx *ctx, const struct hls + hlsl_src_from_node(&store->rhs, rhs); + + if (!writemask && type_is_single_reg(rhs->data_type)) +- writemask = (1 << rhs->data_type->dimx) - 1; ++ writemask = (1 << rhs->data_type->e.numeric.dimx) - 1; + store->writemask = writemask; + + return &store->node; +@@ -1498,7 +1526,7 @@ bool hlsl_new_store_component(struct hlsl_ctx *ctx, struct hlsl_block *block, + hlsl_src_from_node(&store->rhs, rhs); + + if (type_is_single_reg(rhs->data_type)) +- store->writemask = (1 << rhs->data_type->dimx) - 1; ++ store->writemask = (1 << rhs->data_type->e.numeric.dimx) - 1; + + hlsl_block_add_instr(block, &store->node); + +@@ -1695,22 +1723,6 @@ struct hlsl_ir_node *hlsl_new_switch(struct hlsl_ctx *ctx, struct hlsl_ir_node * return &s->node; } @@ -2199,7 +3163,7 @@ index 96de18dc886..84da2fcbc9f 100644 struct hlsl_ir_load *hlsl_new_load_index(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, struct hlsl_ir_node *idx, const struct vkd3d_shader_location *loc) { -@@ -1844,22 +1854,45 @@ struct hlsl_ir_node *hlsl_new_resource_store(struct hlsl_ctx *ctx, const struct +@@ -1844,22 +1856,45 @@ struct hlsl_ir_node *hlsl_new_resource_store(struct hlsl_ctx *ctx, const struct return &store->node; } @@ -2250,7 +3214,16 @@ index 96de18dc886..84da2fcbc9f 100644 return &swizzle->node; } -@@ -2054,8 +2087,8 @@ struct hlsl_ir_node *hlsl_new_jump(struct hlsl_ctx *ctx, enum hlsl_ir_jump_type +@@ -2031,7 +2066,7 @@ struct hlsl_ir_node *hlsl_new_index(struct hlsl_ctx *ctx, struct hlsl_ir_node *v + if (type->class == HLSL_CLASS_TEXTURE || type->class == HLSL_CLASS_UAV) + type = type->e.resource.format; + else if (type->class == HLSL_CLASS_MATRIX) +- type = hlsl_get_vector_type(ctx, type->e.numeric.type, type->dimx); ++ type = hlsl_get_vector_type(ctx, type->e.numeric.type, type->e.numeric.dimx); + else + type = hlsl_get_element_type_from_path_index(ctx, type, idx); + +@@ -2054,8 +2089,8 @@ struct hlsl_ir_node *hlsl_new_jump(struct hlsl_ctx *ctx, enum hlsl_ir_jump_type return &jump->node; } @@ -2261,7 +3234,7 @@ index 96de18dc886..84da2fcbc9f 100644 unsigned int unroll_limit, const struct vkd3d_shader_location *loc) { struct hlsl_ir_loop *loop; -@@ -2066,6 +2099,10 @@ struct hlsl_ir_node *hlsl_new_loop(struct hlsl_ctx *ctx, +@@ -2066,6 +2101,10 @@ struct hlsl_ir_node *hlsl_new_loop(struct hlsl_ctx *ctx, hlsl_block_init(&loop->body); hlsl_block_add_block(&loop->body, block); @@ -2272,7 +3245,7 @@ index 96de18dc886..84da2fcbc9f 100644 loop->unroll_type = unroll_type; loop->unroll_limit = unroll_limit; return &loop->node; -@@ -2221,14 +2258,21 @@ static struct hlsl_ir_node *clone_load(struct hlsl_ctx *ctx, struct clone_instr_ +@@ -2221,14 +2260,21 @@ static struct hlsl_ir_node *clone_load(struct hlsl_ctx *ctx, struct clone_instr_ static struct hlsl_ir_node *clone_loop(struct hlsl_ctx *ctx, struct clone_instr_map *map, struct hlsl_ir_loop *src) { @@ -2296,22 +3269,22 @@ index 96de18dc886..84da2fcbc9f 100644 hlsl_block_cleanup(&body); return NULL; } -@@ -2310,8 +2354,12 @@ static struct hlsl_ir_node *clone_store(struct hlsl_ctx *ctx, struct clone_instr +@@ -2310,8 +2356,12 @@ static struct hlsl_ir_node *clone_store(struct hlsl_ctx *ctx, struct clone_instr static struct hlsl_ir_node *clone_swizzle(struct hlsl_ctx *ctx, struct clone_instr_map *map, struct hlsl_ir_swizzle *src) { - return hlsl_new_swizzle(ctx, src->swizzle, src->node.data_type->dimx, - map_instr(map, src->val.node), &src->node.loc); + if (src->val.node->data_type->class == HLSL_CLASS_MATRIX) -+ return hlsl_new_matrix_swizzle(ctx, src->u.matrix, src->node.data_type->dimx, ++ return hlsl_new_matrix_swizzle(ctx, src->u.matrix, src->node.data_type->e.numeric.dimx, + map_instr(map, src->val.node), &src->node.loc); + else -+ return hlsl_new_swizzle(ctx, src->u.vector, src->node.data_type->dimx, ++ return hlsl_new_swizzle(ctx, src->u.vector, src->node.data_type->e.numeric.dimx, + map_instr(map, src->val.node), &src->node.loc); } static struct hlsl_ir_node *clone_index(struct hlsl_ctx *ctx, struct clone_instr_map *map, -@@ -2533,9 +2581,6 @@ static struct hlsl_ir_node *clone_instr(struct hlsl_ctx *ctx, +@@ -2533,9 +2583,6 @@ static struct hlsl_ir_node *clone_instr(struct hlsl_ctx *ctx, case HLSL_IR_STATEBLOCK_CONSTANT: return clone_stateblock_constant(ctx, map, hlsl_ir_stateblock_constant(instr)); @@ -2321,7 +3294,7 @@ index 96de18dc886..84da2fcbc9f 100644 } vkd3d_unreachable(); -@@ -2693,10 +2738,8 @@ struct hlsl_ir_function_decl *hlsl_get_func_decl(struct hlsl_ctx *ctx, const cha +@@ -2693,10 +2740,8 @@ struct hlsl_ir_function_decl *hlsl_get_func_decl(struct hlsl_ctx *ctx, const cha return NULL; } @@ -2333,7 +3306,7 @@ index 96de18dc886..84da2fcbc9f 100644 static const char *const base_types[] = { [HLSL_TYPE_FLOAT] = "float", -@@ -2720,31 +2763,28 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru +@@ -2720,31 +2765,29 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru [HLSL_SAMPLER_DIM_CUBEARRAY] = "CubeArray", }; @@ -2361,19 +3334,20 @@ index 96de18dc886..84da2fcbc9f 100644 VKD3D_ASSERT(type->e.numeric.type < ARRAY_SIZE(base_types)); - vkd3d_string_buffer_printf(string, "%s%u", base_types[type->e.numeric.type], type->dimx); - return string; -+ vkd3d_string_buffer_printf(buffer, "%s%u", base_types[type->e.numeric.type], type->dimx); ++ vkd3d_string_buffer_printf(buffer, "%s%u", base_types[type->e.numeric.type], type->e.numeric.dimx); + return; case HLSL_CLASS_MATRIX: VKD3D_ASSERT(type->e.numeric.type < ARRAY_SIZE(base_types)); - vkd3d_string_buffer_printf(string, "%s%ux%u", base_types[type->e.numeric.type], type->dimy, type->dimx); - return string; -+ vkd3d_string_buffer_printf(buffer, "%s%ux%u", base_types[type->e.numeric.type], type->dimy, type->dimx); ++ vkd3d_string_buffer_printf(buffer, "%s%ux%u", base_types[type->e.numeric.type], ++ type->e.numeric.dimy, type->e.numeric.dimx); + return; case HLSL_CLASS_ARRAY: { -@@ -2753,88 +2793,85 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru +@@ -2753,88 +2796,85 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru for (t = type; t->class == HLSL_CLASS_ARRAY; t = t->e.array.type) ; @@ -2502,7 +3476,7 @@ index 96de18dc886..84da2fcbc9f 100644 case HLSL_CLASS_DEPTH_STENCIL_STATE: case HLSL_CLASS_DEPTH_STENCIL_VIEW: -@@ -2857,8 +2894,17 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru +@@ -2857,8 +2897,17 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru break; } @@ -2522,7 +3496,7 @@ index 96de18dc886..84da2fcbc9f 100644 } struct vkd3d_string_buffer *hlsl_component_to_string(struct hlsl_ctx *ctx, const struct hlsl_ir_var *var, -@@ -2968,7 +3014,6 @@ const char *hlsl_node_type_to_string(enum hlsl_ir_node_type type) +@@ -2968,7 +3017,6 @@ const char *hlsl_node_type_to_string(enum hlsl_ir_node_type type) [HLSL_IR_COMPILE] = "HLSL_IR_COMPILE", [HLSL_IR_SAMPLER_STATE] = "HLSL_IR_SAMPLER_STATE", [HLSL_IR_STATEBLOCK_CONSTANT] = "HLSL_IR_STATEBLOCK_CONSTANT", @@ -2530,7 +3504,7 @@ index 96de18dc886..84da2fcbc9f 100644 }; if (type >= ARRAY_SIZE(names)) -@@ -3022,7 +3067,8 @@ static void dump_ir_var(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer +@@ -3022,7 +3070,8 @@ static void dump_ir_var(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer vkd3d_string_buffer_printf(buffer, "%s ", string->buffer); hlsl_release_string_buffer(ctx, string); } @@ -2540,7 +3514,7 @@ index 96de18dc886..84da2fcbc9f 100644 if (var->semantic.name) vkd3d_string_buffer_printf(buffer, " : %s%u", var->semantic.name, var->semantic.index); } -@@ -3103,34 +3149,28 @@ const char *debug_hlsl_swizzle(uint32_t swizzle, unsigned int size) +@@ -3103,42 +3152,36 @@ const char *debug_hlsl_swizzle(uint32_t swizzle, unsigned int size) return vkd3d_dbg_sprintf(".%s", string); } @@ -2587,6 +3561,30 @@ index 96de18dc886..84da2fcbc9f 100644 static void dump_ir_constant(struct vkd3d_string_buffer *buffer, const struct hlsl_ir_constant *constant) { struct hlsl_type *type = constant->node.data_type; + unsigned int x; + +- if (type->dimx != 1) ++ if (type->e.numeric.dimx != 1) + vkd3d_string_buffer_printf(buffer, "{"); +- for (x = 0; x < type->dimx; ++x) ++ for (x = 0; x < type->e.numeric.dimx; ++x) + { + const union hlsl_constant_value_component *value = &constant->value.u[x]; + +@@ -3164,12 +3207,9 @@ static void dump_ir_constant(struct vkd3d_string_buffer *buffer, const struct hl + case HLSL_TYPE_UINT: + vkd3d_string_buffer_printf(buffer, "%u ", value->u); + break; +- +- default: +- vkd3d_unreachable(); + } + } +- if (type->dimx != 1) ++ if (type->e.numeric.dimx != 1) + vkd3d_string_buffer_printf(buffer, "}"); + } + @@ -3201,13 +3241,11 @@ const char *debug_hlsl_expr_op(enum hlsl_ir_expr_op op) [HLSL_OP1_LOG2] = "log2", [HLSL_OP1_LOGIC_NOT] = "!", @@ -2609,22 +3607,29 @@ index 96de18dc886..84da2fcbc9f 100644 [HLSL_OP2_DIV] = "/", [HLSL_OP2_DOT] = "dot", [HLSL_OP2_EQUAL] = "==", -@@ -3402,11 +3439,12 @@ static void dump_ir_swizzle(struct vkd3d_string_buffer *buffer, const struct hls +@@ -3398,15 +3435,17 @@ static void dump_ir_swizzle(struct vkd3d_string_buffer *buffer, const struct hls + unsigned int i; + + dump_src(buffer, &swizzle->val); +- if (swizzle->val.node->data_type->dimy > 1) ++ if (swizzle->val.node->data_type->e.numeric.dimy > 1) { vkd3d_string_buffer_printf(buffer, "."); - for (i = 0; i < swizzle->node.data_type->dimx; ++i) +- for (i = 0; i < swizzle->node.data_type->dimx; ++i) - vkd3d_string_buffer_printf(buffer, "_m%u%u", (swizzle->swizzle >> i * 8) & 0xf, (swizzle->swizzle >> (i * 8 + 4)) & 0xf); ++ for (i = 0; i < swizzle->node.data_type->e.numeric.dimx; ++i) + vkd3d_string_buffer_printf(buffer, "_m%u%u", + swizzle->u.matrix.components[i].y, swizzle->u.matrix.components[i].x); } else { - vkd3d_string_buffer_printf(buffer, "%s", debug_hlsl_swizzle(swizzle->swizzle, swizzle->node.data_type->dimx)); -+ vkd3d_string_buffer_printf(buffer, "%s", debug_hlsl_swizzle(swizzle->u.vector, swizzle->node.data_type->dimx)); ++ vkd3d_string_buffer_printf(buffer, "%s", ++ debug_hlsl_swizzle(swizzle->u.vector, swizzle->node.data_type->e.numeric.dimx)); } } -@@ -3562,11 +3600,6 @@ static void dump_instr(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, +@@ -3562,11 +3601,6 @@ static void dump_instr(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, case HLSL_IR_STATEBLOCK_CONSTANT: dump_ir_stateblock_constant(buffer, hlsl_ir_stateblock_constant(instr)); break; @@ -2636,7 +3641,25 @@ index 96de18dc886..84da2fcbc9f 100644 } } -@@ -3719,6 +3752,7 @@ static void free_ir_load(struct hlsl_ir_load *load) +@@ -3625,10 +3659,15 @@ void hlsl_dump_var_default_values(const struct hlsl_ir_var *var) + + void hlsl_replace_node(struct hlsl_ir_node *old, struct hlsl_ir_node *new) + { ++ const struct hlsl_type *old_type = old->data_type, *new_type = new->data_type; + struct hlsl_src *src, *next; + +- VKD3D_ASSERT(old->data_type == new->data_type || old->data_type->dimx == new->data_type->dimx); +- VKD3D_ASSERT(old->data_type == new->data_type || old->data_type->dimy == new->data_type->dimy); ++ if (hlsl_is_numeric_type(old_type)) ++ { ++ VKD3D_ASSERT(hlsl_is_numeric_type(new_type)); ++ VKD3D_ASSERT(old_type->e.numeric.dimx == new_type->e.numeric.dimx); ++ VKD3D_ASSERT(old_type->e.numeric.dimy == new_type->e.numeric.dimy); ++ } + + LIST_FOR_EACH_ENTRY_SAFE(src, next, &old->uses, struct hlsl_src, entry) + { +@@ -3719,6 +3758,7 @@ static void free_ir_load(struct hlsl_ir_load *load) static void free_ir_loop(struct hlsl_ir_loop *loop) { hlsl_block_cleanup(&loop->body); @@ -2644,7 +3667,7 @@ index 96de18dc886..84da2fcbc9f 100644 vkd3d_free(loop); } -@@ -3875,10 +3909,6 @@ void hlsl_free_instr(struct hlsl_ir_node *node) +@@ -3875,10 +3915,6 @@ void hlsl_free_instr(struct hlsl_ir_node *node) case HLSL_IR_STATEBLOCK_CONSTANT: free_ir_stateblock_constant(hlsl_ir_stateblock_constant(node)); break; @@ -2655,7 +3678,7 @@ index 96de18dc886..84da2fcbc9f 100644 } } -@@ -3977,8 +4007,8 @@ void hlsl_add_function(struct hlsl_ctx *ctx, char *name, struct hlsl_ir_function +@@ -3977,8 +4013,8 @@ void hlsl_add_function(struct hlsl_ctx *ctx, char *name, struct hlsl_ir_function uint32_t hlsl_map_swizzle(uint32_t swizzle, unsigned int writemask) { @@ -2665,7 +3688,7 @@ index 96de18dc886..84da2fcbc9f 100644 /* Leave replicate swizzles alone; some instructions need them. */ if (swizzle == HLSL_SWIZZLE(X, X, X, X) -@@ -3987,13 +4017,10 @@ uint32_t hlsl_map_swizzle(uint32_t swizzle, unsigned int writemask) +@@ -3987,13 +4023,10 @@ uint32_t hlsl_map_swizzle(uint32_t swizzle, unsigned int writemask) || swizzle == HLSL_SWIZZLE(W, W, W, W)) return swizzle; @@ -2682,7 +3705,7 @@ index 96de18dc886..84da2fcbc9f 100644 } return ret; } -@@ -4046,7 +4073,7 @@ uint32_t hlsl_combine_swizzles(uint32_t first, uint32_t second, unsigned int dim +@@ -4046,7 +4079,7 @@ uint32_t hlsl_combine_swizzles(uint32_t first, uint32_t second, unsigned int dim for (i = 0; i < dim; ++i) { unsigned int s = hlsl_swizzle_get_component(second, i); @@ -2691,8 +3714,17 @@ index 96de18dc886..84da2fcbc9f 100644 } return ret; } +@@ -4304,7 +4337,7 @@ static void declare_predefined_types(struct hlsl_ctx *ctx) + } + + ctx->builtin_types.Void = hlsl_new_simple_type(ctx, "void", HLSL_CLASS_VOID); +- ctx->builtin_types.null = hlsl_new_type(ctx, "NULL", HLSL_CLASS_NULL, HLSL_TYPE_UINT, 1, 1); ++ ctx->builtin_types.null = hlsl_new_simple_type(ctx, "NULL", HLSL_CLASS_NULL); + ctx->builtin_types.string = hlsl_new_simple_type(ctx, "string", HLSL_CLASS_STRING); + ctx->builtin_types.error = hlsl_new_simple_type(ctx, "", HLSL_CLASS_ERROR); + hlsl_scope_add_type(ctx->globals, ctx->builtin_types.string); diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h -index 075c76cb0e2..7c9547a1c01 100644 +index 075c76cb0e2..d712a325322 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.h +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h @@ -22,7 +22,6 @@ @@ -2762,7 +3794,35 @@ index 075c76cb0e2..7c9547a1c01 100644 enum hlsl_regset { HLSL_REGSET_SAMPLERS, -@@ -220,6 +213,12 @@ struct hlsl_type +@@ -176,16 +169,6 @@ struct hlsl_type + * Modifiers that don't fall inside this mask are to be stored in the variable in + * hlsl_ir_var.modifiers, or in the struct field in hlsl_ir_field.modifiers. */ + uint32_t modifiers; +- /* Size of the type values on each dimension. For non-numeric types, they are set for the +- * convenience of the sm1/sm4 backends. +- * If type is HLSL_CLASS_SCALAR, then both dimx = 1 and dimy = 1. +- * If type is HLSL_CLASS_VECTOR, then dimx is the size of the vector, and dimy = 1. +- * If type is HLSL_CLASS_MATRIX, then dimx is the number of columns, and dimy the number of rows. +- * If type is HLSL_CLASS_ARRAY, then dimx and dimy have the same value as in the type of the array elements. +- * If type is HLSL_CLASS_STRUCT, then dimx is the sum of (dimx * dimy) of every component, and dimy = 1. +- */ +- unsigned int dimx; +- unsigned int dimy; + /* Sample count for HLSL_SAMPLER_DIM_2DMS or HLSL_SAMPLER_DIM_2DMSARRAY. */ + unsigned int sample_count; + +@@ -195,6 +178,10 @@ struct hlsl_type + struct + { + enum hlsl_base_type type; ++ /* For scalars, dimx == dimy == 1. ++ * For vectors, dimx == vector width; dimy == 1. ++ * For matrices, dimx == column count; dimy == row count. */ ++ unsigned int dimx, dimy; + } numeric; + /* Additional information if type is HLSL_CLASS_STRUCT. */ + struct +@@ -220,6 +207,12 @@ struct hlsl_type } resource; /* Additional field to distinguish object types. Currently used only for technique types. */ unsigned int version; @@ -2775,7 +3835,7 @@ index 075c76cb0e2..7c9547a1c01 100644 } e; /* Number of numeric register components used by one value of this type, for each regset. -@@ -330,8 +329,6 @@ enum hlsl_ir_node_type +@@ -330,8 +323,6 @@ enum hlsl_ir_node_type HLSL_IR_COMPILE, HLSL_IR_SAMPLER_STATE, HLSL_IR_STATEBLOCK_CONSTANT, @@ -2784,7 +3844,7 @@ index 075c76cb0e2..7c9547a1c01 100644 }; /* Common data for every type of IR instruction node. */ -@@ -524,6 +521,10 @@ struct hlsl_ir_var +@@ -524,6 +515,10 @@ struct hlsl_ir_var * element of a struct, and thus needs to be aligned when packed in the signature. */ bool force_align; @@ -2795,7 +3855,7 @@ index 075c76cb0e2..7c9547a1c01 100644 uint32_t is_input_semantic : 1; uint32_t is_output_semantic : 1; uint32_t is_uniform : 1; -@@ -644,21 +645,30 @@ struct hlsl_ir_if +@@ -644,21 +639,30 @@ struct hlsl_ir_if struct hlsl_block else_block; }; @@ -2831,7 +3891,7 @@ index 075c76cb0e2..7c9547a1c01 100644 }; struct hlsl_ir_switch_case -@@ -703,13 +713,11 @@ enum hlsl_ir_expr_op +@@ -703,13 +707,11 @@ enum hlsl_ir_expr_op HLSL_OP1_LOG2, HLSL_OP1_LOGIC_NOT, HLSL_OP1_NEG, @@ -2845,7 +3905,7 @@ index 075c76cb0e2..7c9547a1c01 100644 HLSL_OP1_SIN, HLSL_OP1_SIN_REDUCED, /* Reduced range [-pi, pi], writes to .y */ HLSL_OP1_SQRT, -@@ -719,7 +727,6 @@ enum hlsl_ir_expr_op +@@ -719,7 +721,6 @@ enum hlsl_ir_expr_op HLSL_OP2_BIT_AND, HLSL_OP2_BIT_OR, HLSL_OP2_BIT_XOR, @@ -2853,7 +3913,7 @@ index 075c76cb0e2..7c9547a1c01 100644 HLSL_OP2_DIV, HLSL_OP2_DOT, HLSL_OP2_EQUAL, -@@ -781,7 +788,17 @@ struct hlsl_ir_swizzle +@@ -781,7 +782,17 @@ struct hlsl_ir_swizzle { struct hlsl_ir_node node; struct hlsl_src val; @@ -2872,7 +3932,18 @@ index 075c76cb0e2..7c9547a1c01 100644 }; struct hlsl_ir_index -@@ -934,16 +951,6 @@ struct hlsl_ir_stateblock_constant +@@ -844,6 +855,10 @@ enum hlsl_resource_load_type + HLSL_RESOURCE_GATHER_GREEN, + HLSL_RESOURCE_GATHER_BLUE, + HLSL_RESOURCE_GATHER_ALPHA, ++ HLSL_RESOURCE_GATHER_CMP_RED, ++ HLSL_RESOURCE_GATHER_CMP_GREEN, ++ HLSL_RESOURCE_GATHER_CMP_BLUE, ++ HLSL_RESOURCE_GATHER_CMP_ALPHA, + HLSL_RESOURCE_SAMPLE_INFO, + HLSL_RESOURCE_RESINFO, + }; +@@ -934,16 +949,6 @@ struct hlsl_ir_stateblock_constant char *name; }; @@ -2889,7 +3960,7 @@ index 075c76cb0e2..7c9547a1c01 100644 struct hlsl_scope { /* Item entry for hlsl_ctx.scopes. */ -@@ -1259,12 +1266,6 @@ static inline struct hlsl_ir_stateblock_constant *hlsl_ir_stateblock_constant(co +@@ -1259,12 +1264,6 @@ static inline struct hlsl_ir_stateblock_constant *hlsl_ir_stateblock_constant(co return CONTAINING_RECORD(node, struct hlsl_ir_stateblock_constant, node); } @@ -2902,7 +3973,7 @@ index 075c76cb0e2..7c9547a1c01 100644 static inline void hlsl_block_init(struct hlsl_block *block) { list_init(&block->instrs); -@@ -1442,6 +1443,8 @@ void hlsl_block_cleanup(struct hlsl_block *block); +@@ -1442,6 +1441,8 @@ void hlsl_block_cleanup(struct hlsl_block *block); bool hlsl_clone_block(struct hlsl_ctx *ctx, struct hlsl_block *dst_block, const struct hlsl_block *src_block); void hlsl_dump_function(struct hlsl_ctx *ctx, const struct hlsl_ir_function_decl *func); @@ -2911,7 +3982,7 @@ index 075c76cb0e2..7c9547a1c01 100644 void hlsl_dump_var_default_values(const struct hlsl_ir_var *var); bool hlsl_state_block_add_entry(struct hlsl_state_block *state_block, -@@ -1519,6 +1522,8 @@ struct hlsl_ir_node *hlsl_new_if(struct hlsl_ctx *ctx, struct hlsl_ir_node *cond +@@ -1519,6 +1520,8 @@ struct hlsl_ir_node *hlsl_new_if(struct hlsl_ctx *ctx, struct hlsl_ir_node *cond struct hlsl_ir_node *hlsl_new_int_constant(struct hlsl_ctx *ctx, int32_t n, const struct vkd3d_shader_location *loc); struct hlsl_ir_node *hlsl_new_jump(struct hlsl_ctx *ctx, enum hlsl_ir_jump_type type, struct hlsl_ir_node *condition, const struct vkd3d_shader_location *loc); @@ -2920,7 +3991,7 @@ index 075c76cb0e2..7c9547a1c01 100644 struct hlsl_ir_node *hlsl_new_ternary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, struct hlsl_ir_node *arg3); -@@ -1550,8 +1555,11 @@ struct hlsl_ir_node *hlsl_new_compile(struct hlsl_ctx *ctx, enum hlsl_compile_ty +@@ -1550,8 +1553,11 @@ struct hlsl_ir_node *hlsl_new_compile(struct hlsl_ctx *ctx, enum hlsl_compile_ty struct hlsl_block *args_instrs, const struct vkd3d_shader_location *loc); struct hlsl_ir_node *hlsl_new_index(struct hlsl_ctx *ctx, struct hlsl_ir_node *val, struct hlsl_ir_node *idx, const struct vkd3d_shader_location *loc); @@ -2934,7 +4005,7 @@ index 075c76cb0e2..7c9547a1c01 100644 struct hlsl_ir_node *hlsl_new_resource_load(struct hlsl_ctx *ctx, const struct hlsl_resource_load_params *params, const struct vkd3d_shader_location *loc); struct hlsl_ir_node *hlsl_new_resource_store(struct hlsl_ctx *ctx, const struct hlsl_deref *resource, -@@ -1588,9 +1596,6 @@ struct hlsl_ir_switch_case *hlsl_new_switch_case(struct hlsl_ctx *ctx, unsigned +@@ -1588,9 +1594,6 @@ struct hlsl_ir_switch_case *hlsl_new_switch_case(struct hlsl_ctx *ctx, unsigned struct hlsl_ir_node *hlsl_new_switch(struct hlsl_ctx *ctx, struct hlsl_ir_node *selector, struct list *cases, const struct vkd3d_shader_location *loc); @@ -2944,7 +4015,7 @@ index 075c76cb0e2..7c9547a1c01 100644 void hlsl_error(struct hlsl_ctx *ctx, const struct vkd3d_shader_location *loc, enum vkd3d_shader_error error, const char *fmt, ...) VKD3D_PRINTF_FUNC(4, 5); void hlsl_fixme(struct hlsl_ctx *ctx, const struct vkd3d_shader_location *loc, -@@ -1645,21 +1650,39 @@ struct hlsl_reg hlsl_reg_from_deref(struct hlsl_ctx *ctx, const struct hlsl_dere +@@ -1645,24 +1648,35 @@ struct hlsl_reg hlsl_reg_from_deref(struct hlsl_ctx *ctx, const struct hlsl_dere bool hlsl_copy_propagation_execute(struct hlsl_ctx *ctx, struct hlsl_block *block); bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context); bool hlsl_fold_constant_identities(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context); @@ -2956,42 +4027,40 @@ index 075c76cb0e2..7c9547a1c01 100644 D3DXPARAMETER_CLASS hlsl_sm1_class(const struct hlsl_type *type); -D3DXPARAMETER_TYPE hlsl_sm1_base_type(const struct hlsl_type *type); +D3DXPARAMETER_TYPE hlsl_sm1_base_type(const struct hlsl_type *type, bool is_combined_sampler); - --void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer); --int d3dbc_compile(struct vsir_program *program, uint64_t config_flags, -- const struct vkd3d_shader_compile_info *compile_info, const struct vkd3d_shader_code *ctab, -- struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context); ++ +struct extern_resource +{ + /* "var" is only not NULL if this resource is a whole variable, so it may + * be responsible for more than one component. */ + const struct hlsl_ir_var *var; + const struct hlsl_buffer *buffer; -+ + +-void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer); +-int d3dbc_compile(struct vsir_program *program, uint64_t config_flags, +- const struct vkd3d_shader_compile_info *compile_info, const struct vkd3d_shader_code *ctab, +- struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context); + char *name; + bool is_user_packed; -+ + +-int tpf_compile(struct vsir_program *program, uint64_t config_flags, +- struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context, +- struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func); + /* The data type of a single component of the resource. This might be + * different from the data type of the resource itself in 4.0 profiles, + * where an array (or multi-dimensional array) is handled as a single + * resource, unlike in 5.0. */ + struct hlsl_type *component_type; -+ + +-enum vkd3d_shader_interpolation_mode sm4_get_interpolation_mode(struct hlsl_type *type, +- unsigned int storage_modifiers); + enum hlsl_regset regset; + unsigned int id, space, index, bind_count; + + struct vkd3d_shader_location loc; +}; --int tpf_compile(struct vsir_program *program, uint64_t config_flags, -- struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context, -- struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func); -+struct extern_resource *sm4_get_extern_resources(struct hlsl_ctx *ctx, unsigned int *count); -+void sm4_free_extern_resources(struct extern_resource *extern_resources, unsigned int count); -+void sm4_generate_rdef(struct hlsl_ctx *ctx, struct vkd3d_shader_code *rdef); + struct hlsl_ir_function_decl *hlsl_compile_internal_function(struct hlsl_ctx *ctx, const char *name, const char *hlsl); - enum vkd3d_shader_interpolation_mode sm4_get_interpolation_mode(struct hlsl_type *type, - unsigned int storage_modifiers); diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.l b/libs/vkd3d/libs/vkd3d-shader/hlsl.l index 8dace11916a..31fb30521e9 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.l @@ -3021,10 +4090,124 @@ index 8dace11916a..31fb30521e9 100644 typedef {return KW_TYPEDEF; } unsigned {return KW_UNSIGNED; } diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.y b/libs/vkd3d/libs/vkd3d-shader/hlsl.y -index 60aade732db..a3814a810b5 100644 +index 60aade732db..e6eaac78994 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.y +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.y -@@ -475,7 +475,11 @@ static bool add_explicit_conversion(struct hlsl_ctx *ctx, struct hlsl_block *blo +@@ -247,18 +247,19 @@ static bool type_contains_only_numerics(const struct hlsl_type *type) + + static bool explicit_compatible_data_types(struct hlsl_ctx *ctx, struct hlsl_type *src, struct hlsl_type *dst) + { +- if (hlsl_is_numeric_type(src) && src->dimx == 1 && src->dimy == 1 && type_contains_only_numerics(dst)) ++ if (hlsl_is_numeric_type(src) && src->e.numeric.dimx == 1 && src->e.numeric.dimy == 1 ++ && type_contains_only_numerics(dst)) + return true; + + if (src->class == HLSL_CLASS_MATRIX && dst->class == HLSL_CLASS_MATRIX +- && src->dimx >= dst->dimx && src->dimy >= dst->dimy) ++ && src->e.numeric.dimx >= dst->e.numeric.dimx && src->e.numeric.dimy >= dst->e.numeric.dimy) + return true; + +- if ((src->class == HLSL_CLASS_MATRIX && src->dimx > 1 && src->dimy > 1) ++ if ((src->class == HLSL_CLASS_MATRIX && src->e.numeric.dimx > 1 && src->e.numeric.dimy > 1) + && hlsl_type_component_count(src) != hlsl_type_component_count(dst)) + return false; + +- if ((dst->class == HLSL_CLASS_MATRIX && dst->dimy > 1) ++ if ((dst->class == HLSL_CLASS_MATRIX && dst->e.numeric.dimy > 1) + && hlsl_type_component_count(src) != hlsl_type_component_count(dst)) + return false; + +@@ -273,16 +274,16 @@ static bool implicit_compatible_data_types(struct hlsl_ctx *ctx, struct hlsl_typ + if (hlsl_is_numeric_type(src)) + { + /* Scalar vars can be converted to any other numeric data type */ +- if (src->dimx == 1 && src->dimy == 1) ++ if (src->e.numeric.dimx == 1 && src->e.numeric.dimy == 1) + return true; + /* The other way around is true too */ +- if (dst->dimx == 1 && dst->dimy == 1) ++ if (dst->e.numeric.dimx == 1 && dst->e.numeric.dimy == 1) + return true; + + if (src->class == HLSL_CLASS_MATRIX || dst->class == HLSL_CLASS_MATRIX) + { + if (src->class == HLSL_CLASS_MATRIX && dst->class == HLSL_CLASS_MATRIX) +- return src->dimx >= dst->dimx && src->dimy >= dst->dimy; ++ return src->e.numeric.dimx >= dst->e.numeric.dimx && src->e.numeric.dimy >= dst->e.numeric.dimy; + + /* Matrix-vector conversion is apparently allowed if they have + * the same components count, or if the matrix is 1xN or Nx1 +@@ -292,8 +293,8 @@ static bool implicit_compatible_data_types(struct hlsl_ctx *ctx, struct hlsl_typ + if (hlsl_type_component_count(src) == hlsl_type_component_count(dst)) + return true; + +- if ((src->class == HLSL_CLASS_VECTOR || src->dimx == 1 || src->dimy == 1) && +- (dst->class == HLSL_CLASS_VECTOR || dst->dimx == 1 || dst->dimy == 1)) ++ if ((src->class == HLSL_CLASS_VECTOR || src->e.numeric.dimx == 1 || src->e.numeric.dimy == 1) ++ && (dst->class == HLSL_CLASS_VECTOR || dst->e.numeric.dimx == 1 || dst->e.numeric.dimy == 1)) + return hlsl_type_component_count(src) >= hlsl_type_component_count(dst); + } + +@@ -301,7 +302,7 @@ static bool implicit_compatible_data_types(struct hlsl_ctx *ctx, struct hlsl_typ + } + else + { +- return src->dimx >= dst->dimx; ++ return src->e.numeric.dimx >= dst->e.numeric.dimx; + } + } + +@@ -335,7 +336,7 @@ static void check_condition_type(struct hlsl_ctx *ctx, const struct hlsl_ir_node + if (type->class == HLSL_CLASS_ERROR) + return; + +- if (type->class > HLSL_CLASS_LAST_NUMERIC || type->dimx > 1 || type->dimy > 1) ++ if (type->class > HLSL_CLASS_LAST_NUMERIC || type->e.numeric.dimx > 1 || type->e.numeric.dimy > 1) + { + struct vkd3d_string_buffer *string; + +@@ -368,14 +369,14 @@ static struct hlsl_ir_node *add_cast(struct hlsl_ctx *ctx, struct hlsl_block *bl + struct hlsl_ir_var *var; + unsigned int dst_idx; + +- broadcast = hlsl_is_numeric_type(src_type) && src_type->dimx == 1 && src_type->dimy == 1; ++ broadcast = hlsl_is_numeric_type(src_type) && src_type->e.numeric.dimx == 1 && src_type->e.numeric.dimy == 1; + matrix_cast = !broadcast && dst_comp_count != src_comp_count + && src_type->class == HLSL_CLASS_MATRIX && dst_type->class == HLSL_CLASS_MATRIX; + VKD3D_ASSERT(src_comp_count >= dst_comp_count || broadcast); + if (matrix_cast) + { +- VKD3D_ASSERT(dst_type->dimx <= src_type->dimx); +- VKD3D_ASSERT(dst_type->dimy <= src_type->dimy); ++ VKD3D_ASSERT(dst_type->e.numeric.dimx <= src_type->e.numeric.dimx); ++ VKD3D_ASSERT(dst_type->e.numeric.dimy <= src_type->e.numeric.dimy); + } + + if (!(var = hlsl_new_synthetic_var(ctx, "cast", dst_type, loc))) +@@ -395,9 +396,9 @@ static struct hlsl_ir_node *add_cast(struct hlsl_ctx *ctx, struct hlsl_block *bl + } + else if (matrix_cast) + { +- unsigned int x = dst_idx % dst_type->dimx, y = dst_idx / dst_type->dimx; ++ unsigned int x = dst_idx % dst_type->e.numeric.dimx, y = dst_idx / dst_type->e.numeric.dimx; + +- src_idx = y * src_type->dimx + x; ++ src_idx = y * src_type->e.numeric.dimx + x; + } + else + { +@@ -458,7 +459,9 @@ static struct hlsl_ir_node *add_implicit_conversion(struct hlsl_ctx *ctx, struct + return NULL; + } + +- if (dst_type->dimx * dst_type->dimy < src_type->dimx * src_type->dimy && ctx->warn_implicit_truncation) ++ if (hlsl_is_numeric_type(dst_type) && hlsl_is_numeric_type(src_type) ++ && dst_type->e.numeric.dimx * dst_type->e.numeric.dimy < src_type->e.numeric.dimx * src_type->e.numeric.dimy ++ && ctx->warn_implicit_truncation) + hlsl_warning(ctx, loc, VKD3D_SHADER_WARNING_HLSL_IMPLICIT_TRUNCATION, "Implicit truncation of %s type.", + src_type->class == HLSL_CLASS_VECTOR ? "vector" : "matrix"); + +@@ -475,7 +478,11 @@ static bool add_explicit_conversion(struct hlsl_ctx *ctx, struct hlsl_block *blo for (i = 0; i < arrays->count; ++i) { if (arrays->sizes[i] == HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT) @@ -3036,7 +4219,7 @@ index 60aade732db..a3814a810b5 100644 dst_type = hlsl_new_array_type(ctx, dst_type, arrays->sizes[i]); } -@@ -551,13 +555,6 @@ static bool append_conditional_break(struct hlsl_ctx *ctx, struct hlsl_block *co +@@ -551,13 +558,6 @@ static bool append_conditional_break(struct hlsl_ctx *ctx, struct hlsl_block *co return true; } @@ -3050,7 +4233,7 @@ index 60aade732db..a3814a810b5 100644 static void check_attribute_list_for_duplicates(struct hlsl_ctx *ctx, const struct parse_attribute_list *attrs) { unsigned int i, j; -@@ -573,8 +570,8 @@ static void check_attribute_list_for_duplicates(struct hlsl_ctx *ctx, const stru +@@ -573,8 +573,8 @@ static void check_attribute_list_for_duplicates(struct hlsl_ctx *ctx, const stru } } @@ -3061,7 +4244,7 @@ index 60aade732db..a3814a810b5 100644 { struct hlsl_ir_node *instr, *next; -@@ -584,8 +581,8 @@ static void resolve_loop_continue(struct hlsl_ctx *ctx, struct hlsl_block *block +@@ -584,8 +584,8 @@ static void resolve_loop_continue(struct hlsl_ctx *ctx, struct hlsl_block *block { struct hlsl_ir_if *iff = hlsl_ir_if(instr); @@ -3072,7 +4255,7 @@ index 60aade732db..a3814a810b5 100644 } else if (instr->type == HLSL_IR_JUMP) { -@@ -595,7 +592,7 @@ static void resolve_loop_continue(struct hlsl_ctx *ctx, struct hlsl_block *block +@@ -595,7 +595,7 @@ static void resolve_loop_continue(struct hlsl_ctx *ctx, struct hlsl_block *block if (jump->type != HLSL_IR_JUMP_UNRESOLVED_CONTINUE) continue; @@ -3081,7 +4264,7 @@ index 60aade732db..a3814a810b5 100644 { if (!hlsl_clone_block(ctx, &cond_block, cond)) return; -@@ -606,13 +603,6 @@ static void resolve_loop_continue(struct hlsl_ctx *ctx, struct hlsl_block *block +@@ -606,13 +606,6 @@ static void resolve_loop_continue(struct hlsl_ctx *ctx, struct hlsl_block *block } list_move_before(&instr->entry, &cond_block.instrs); } @@ -3095,7 +4278,7 @@ index 60aade732db..a3814a810b5 100644 } } } -@@ -678,8 +668,6 @@ static struct hlsl_default_value evaluate_static_expression(struct hlsl_ctx *ctx +@@ -678,8 +671,6 @@ static struct hlsl_default_value evaluate_static_expression(struct hlsl_ctx *ctx hlsl_error(ctx, &node->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, "Expected literal expression."); break; @@ -3104,7 +4287,7 @@ index 60aade732db..a3814a810b5 100644 } } -@@ -738,11 +726,11 @@ static unsigned int evaluate_static_expression_as_uint(struct hlsl_ctx *ctx, str +@@ -738,11 +729,11 @@ static unsigned int evaluate_static_expression_as_uint(struct hlsl_ctx *ctx, str return res.number.u; } @@ -3118,7 +4301,7 @@ index 60aade732db..a3814a810b5 100644 unsigned int i, unroll_limit = 0; struct hlsl_ir_node *loop; -@@ -773,11 +761,11 @@ static struct hlsl_block *create_loop(struct hlsl_ctx *ctx, enum loop_type type, +@@ -773,11 +764,11 @@ static struct hlsl_block *create_loop(struct hlsl_ctx *ctx, enum loop_type type, hlsl_block_cleanup(&expr); } @@ -3132,7 +4315,7 @@ index 60aade732db..a3814a810b5 100644 } else if (!strcmp(attr->name, "fastopt") || !strcmp(attr->name, "allow_uav_condition")) -@@ -790,7 +778,7 @@ static struct hlsl_block *create_loop(struct hlsl_ctx *ctx, enum loop_type type, +@@ -790,7 +781,7 @@ static struct hlsl_block *create_loop(struct hlsl_ctx *ctx, enum loop_type type, } } @@ -3141,7 +4324,7 @@ index 60aade732db..a3814a810b5 100644 if (!init && !(init = make_empty_block(ctx))) goto oom; -@@ -798,15 +786,12 @@ static struct hlsl_block *create_loop(struct hlsl_ctx *ctx, enum loop_type type, +@@ -798,15 +789,12 @@ static struct hlsl_block *create_loop(struct hlsl_ctx *ctx, enum loop_type type, if (!append_conditional_break(ctx, cond)) goto oom; @@ -3159,7 +4342,7 @@ index 60aade732db..a3814a810b5 100644 goto oom; hlsl_block_add_instr(init, loop); -@@ -860,6 +845,7 @@ static struct hlsl_ir_node *get_swizzle(struct hlsl_ctx *ctx, struct hlsl_ir_nod +@@ -860,6 +848,7 @@ static struct hlsl_ir_node *get_swizzle(struct hlsl_ctx *ctx, struct hlsl_ir_nod if (value->data_type->class == HLSL_CLASS_MATRIX) { /* Matrix swizzle */ @@ -3167,9 +4350,12 @@ index 60aade732db..a3814a810b5 100644 bool m_swizzle; unsigned int inc, x, y; -@@ -890,10 +876,11 @@ static struct hlsl_ir_node *get_swizzle(struct hlsl_ctx *ctx, struct hlsl_ir_nod +@@ -888,12 +877,13 @@ static struct hlsl_ir_node *get_swizzle(struct hlsl_ctx *ctx, struct hlsl_ir_nod + x = swizzle[i + 2] - '1'; + } - if (x >= value->data_type->dimx || y >= value->data_type->dimy) +- if (x >= value->data_type->dimx || y >= value->data_type->dimy) ++ if (x >= value->data_type->e.numeric.dimx || y >= value->data_type->e.numeric.dimy) return NULL; - swiz |= (y << 4 | x) << component * 8; + s.components[component].x = x; @@ -3181,9 +4367,12 @@ index 60aade732db..a3814a810b5 100644 } /* Vector swizzle */ -@@ -922,8 +909,7 @@ static struct hlsl_ir_node *get_swizzle(struct hlsl_ctx *ctx, struct hlsl_ir_nod +@@ -920,10 +910,9 @@ static struct hlsl_ir_node *get_swizzle(struct hlsl_ctx *ctx, struct hlsl_ir_nod + break; + } - if (s >= value->data_type->dimx) +- if (s >= value->data_type->dimx) ++ if (s >= value->data_type->e.numeric.dimx) return NULL; - swiz |= s << component * 2; - component++; @@ -3191,7 +4380,16 @@ index 60aade732db..a3814a810b5 100644 } if (valid) return hlsl_new_swizzle(ctx, swiz, component, value, loc); -@@ -1192,6 +1178,8 @@ static bool gen_struct_fields(struct hlsl_ctx *ctx, struct parse_fields *fields, +@@ -1035,7 +1024,7 @@ static bool add_array_access(struct hlsl_ctx *ctx, struct hlsl_block *block, str + { + unsigned int dim_count = hlsl_sampler_dim_count(expr_type->sampler_dim); + +- if (index_type->class > HLSL_CLASS_VECTOR || index_type->dimx != dim_count) ++ if (index_type->class > HLSL_CLASS_VECTOR || index_type->e.numeric.dimx != dim_count) + { + struct vkd3d_string_buffer *string; + +@@ -1192,6 +1181,8 @@ static bool gen_struct_fields(struct hlsl_ctx *ctx, struct parse_fields *fields, { hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Implicit size arrays not allowed in struct fields."); @@ -3200,7 +4398,7 @@ index 60aade732db..a3814a810b5 100644 } field->type = hlsl_new_array_type(ctx, field->type, v->arrays.sizes[k]); -@@ -1282,6 +1270,12 @@ static bool add_typedef(struct hlsl_ctx *ctx, struct hlsl_type *const orig_type, +@@ -1282,6 +1273,12 @@ static bool add_typedef(struct hlsl_ctx *ctx, struct hlsl_type *const orig_type, { hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Implicit size arrays not allowed in typedefs."); @@ -3213,7 +4411,118 @@ index 60aade732db..a3814a810b5 100644 } if (!(type = hlsl_new_array_type(ctx, type, v->arrays.sizes[i]))) -@@ -2092,8 +2086,8 @@ static bool invert_swizzle(uint32_t *swizzle, unsigned int *writemask, unsigned +@@ -1580,7 +1577,7 @@ static struct hlsl_block *make_block(struct hlsl_ctx *ctx, struct hlsl_ir_node * + static bool expr_compatible_data_types(struct hlsl_type *t1, struct hlsl_type *t2) + { + /* Scalar vars can be converted to pretty much everything */ +- if ((t1->dimx == 1 && t1->dimy == 1) || (t2->dimx == 1 && t2->dimy == 1)) ++ if ((t1->e.numeric.dimx == 1 && t1->e.numeric.dimy == 1) || (t2->e.numeric.dimx == 1 && t2->e.numeric.dimy == 1)) + return true; + + if (t1->class == HLSL_CLASS_VECTOR && t2->class == HLSL_CLASS_VECTOR) +@@ -1595,13 +1592,13 @@ static bool expr_compatible_data_types(struct hlsl_type *t1, struct hlsl_type *t + if (hlsl_type_component_count(t1) == hlsl_type_component_count(t2)) + return true; + +- return (t1->class == HLSL_CLASS_MATRIX && (t1->dimx == 1 || t1->dimy == 1)) +- || (t2->class == HLSL_CLASS_MATRIX && (t2->dimx == 1 || t2->dimy == 1)); ++ return (t1->class == HLSL_CLASS_MATRIX && (t1->e.numeric.dimx == 1 || t1->e.numeric.dimy == 1)) ++ || (t2->class == HLSL_CLASS_MATRIX && (t2->e.numeric.dimx == 1 || t2->e.numeric.dimy == 1)); + } + + /* Both matrices */ +- if ((t1->dimx >= t2->dimx && t1->dimy >= t2->dimy) +- || (t1->dimx <= t2->dimx && t1->dimy <= t2->dimy)) ++ if ((t1->e.numeric.dimx >= t2->e.numeric.dimx && t1->e.numeric.dimy >= t2->e.numeric.dimy) ++ || (t1->e.numeric.dimx <= t2->e.numeric.dimx && t1->e.numeric.dimy <= t2->e.numeric.dimy)) + return true; + } + +@@ -1661,37 +1658,37 @@ static bool expr_common_shape(struct hlsl_ctx *ctx, struct hlsl_type *t1, struct + return false; + } + +- if (t1->dimx == 1 && t1->dimy == 1) ++ if (t1->e.numeric.dimx == 1 && t1->e.numeric.dimy == 1) + { + *type = t2->class; +- *dimx = t2->dimx; +- *dimy = t2->dimy; ++ *dimx = t2->e.numeric.dimx; ++ *dimy = t2->e.numeric.dimy; + } +- else if (t2->dimx == 1 && t2->dimy == 1) ++ else if (t2->e.numeric.dimx == 1 && t2->e.numeric.dimy == 1) + { + *type = t1->class; +- *dimx = t1->dimx; +- *dimy = t1->dimy; ++ *dimx = t1->e.numeric.dimx; ++ *dimy = t1->e.numeric.dimy; + } + else if (t1->class == HLSL_CLASS_MATRIX && t2->class == HLSL_CLASS_MATRIX) + { + *type = HLSL_CLASS_MATRIX; +- *dimx = min(t1->dimx, t2->dimx); +- *dimy = min(t1->dimy, t2->dimy); ++ *dimx = min(t1->e.numeric.dimx, t2->e.numeric.dimx); ++ *dimy = min(t1->e.numeric.dimy, t2->e.numeric.dimy); + } + else + { +- if (t1->dimx * t1->dimy <= t2->dimx * t2->dimy) ++ if (t1->e.numeric.dimx * t1->e.numeric.dimy <= t2->e.numeric.dimx * t2->e.numeric.dimy) + { + *type = t1->class; +- *dimx = t1->dimx; +- *dimy = t1->dimy; ++ *dimx = t1->e.numeric.dimx; ++ *dimy = t1->e.numeric.dimy; + } + else + { + *type = t2->class; +- *dimx = t2->dimx; +- *dimy = t2->dimy; ++ *dimx = t2->e.numeric.dimx; ++ *dimy = t2->e.numeric.dimy; + } + } + +@@ -1719,7 +1716,7 @@ static struct hlsl_ir_node *add_expr(struct hlsl_ctx *ctx, struct hlsl_block *bl + return NULL; + hlsl_init_simple_deref_from_var(&var_deref, var); + +- for (i = 0; i < type->dimy * type->dimx; ++i) ++ for (i = 0; i < type->e.numeric.dimy * type->e.numeric.dimx; ++i) + { + struct hlsl_ir_node *value, *cell_operands[HLSL_MAX_OPERANDS] = { NULL }; + struct hlsl_block store_block; +@@ -1822,7 +1819,7 @@ static struct hlsl_ir_node *add_unary_logical_expr(struct hlsl_ctx *ctx, struct + return arg; + + bool_type = hlsl_get_numeric_type(ctx, arg->data_type->class, HLSL_TYPE_BOOL, +- arg->data_type->dimx, arg->data_type->dimy); ++ arg->data_type->e.numeric.dimx, arg->data_type->e.numeric.dimy); + + if (!(args[0] = add_implicit_conversion(ctx, block, arg, bool_type, loc))) + return NULL; +@@ -1985,11 +1982,11 @@ static struct hlsl_ir_node *add_binary_dot_expr(struct hlsl_ctx *ctx, struct hls + } + + if (arg1->data_type->class == HLSL_CLASS_SCALAR) +- dim = arg2->data_type->dimx; ++ dim = arg2->data_type->e.numeric.dimx; + else if (arg2->data_type->class == HLSL_CLASS_SCALAR) +- dim = arg1->data_type->dimx; ++ dim = arg1->data_type->e.numeric.dimx; + else +- dim = min(arg1->data_type->dimx, arg2->data_type->dimx); ++ dim = min(arg1->data_type->e.numeric.dimx, arg2->data_type->e.numeric.dimx); + + if (dim == 1) + op = HLSL_OP2_MUL; +@@ -2092,8 +2089,8 @@ static bool invert_swizzle(uint32_t *swizzle, unsigned int *writemask, unsigned { if (*writemask & (1 << i)) { @@ -3224,7 +4533,7 @@ index 60aade732db..a3814a810b5 100644 if (new_writemask & (1 << s)) return false; new_writemask |= 1 << s; -@@ -2107,9 +2101,9 @@ static bool invert_swizzle(uint32_t *swizzle, unsigned int *writemask, unsigned +@@ -2107,9 +2104,9 @@ static bool invert_swizzle(uint32_t *swizzle, unsigned int *writemask, unsigned { for (j = 0; j < width; ++j) { @@ -3236,7 +4545,7 @@ index 60aade732db..a3814a810b5 100644 } } -@@ -2119,22 +2113,22 @@ static bool invert_swizzle(uint32_t *swizzle, unsigned int *writemask, unsigned +@@ -2119,22 +2116,22 @@ static bool invert_swizzle(uint32_t *swizzle, unsigned int *writemask, unsigned return true; } @@ -3267,7 +4576,7 @@ index 60aade732db..a3814a810b5 100644 if (new_writemask & (1 << idx)) return false; new_writemask |= 1 << idx; -@@ -2142,22 +2136,22 @@ static bool invert_swizzle_matrix(uint32_t *swizzle, unsigned int *writemask, un +@@ -2142,22 +2139,22 @@ static bool invert_swizzle_matrix(uint32_t *swizzle, unsigned int *writemask, un } width = bit; @@ -3296,7 +4605,18 @@ index 60aade732db..a3814a810b5 100644 *writemask = new_writemask; *ret_width = width; return true; -@@ -2211,28 +2205,34 @@ static bool add_assignment(struct hlsl_ctx *ctx, struct hlsl_block *block, struc +@@ -2193,8 +2190,8 @@ static bool add_assignment(struct hlsl_ctx *ctx, struct hlsl_block *block, struc + + if (hlsl_is_numeric_type(lhs_type)) + { +- writemask = (1 << lhs_type->dimx) - 1; +- width = lhs_type->dimx; ++ writemask = (1 << lhs_type->e.numeric.dimx) - 1; ++ width = lhs_type->e.numeric.dimx; + } + + if (!(rhs = add_implicit_conversion(ctx, block, rhs, lhs_type, &rhs->loc))) +@@ -2211,28 +2208,34 @@ static bool add_assignment(struct hlsl_ctx *ctx, struct hlsl_block *block, struc { struct hlsl_ir_swizzle *swizzle = hlsl_ir_swizzle(lhs); struct hlsl_ir_node *new_swizzle; @@ -3336,7 +4656,50 @@ index 60aade732db..a3814a810b5 100644 } if (!(new_swizzle = hlsl_new_swizzle(ctx, s, width, rhs, &swizzle->node.loc))) -@@ -2670,26 +2670,30 @@ static void declare_var(struct hlsl_ctx *ctx, struct parse_variable_def *v) +@@ -2275,13 +2278,13 @@ static bool add_assignment(struct hlsl_ctx *ctx, struct hlsl_block *block, struc + + dim_count = hlsl_sampler_dim_count(resource_type->sampler_dim); + +- if (width != resource_type->e.resource.format->dimx * resource_type->e.resource.format->dimy) ++ if (width != resource_type->e.resource.format->e.numeric.dimx * resource_type->e.resource.format->e.numeric.dimy) + hlsl_error(ctx, &lhs->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_WRITEMASK, + "Resource store expressions must write to all components."); + + VKD3D_ASSERT(coords->data_type->class == HLSL_CLASS_VECTOR); + VKD3D_ASSERT(coords->data_type->e.numeric.type == HLSL_TYPE_UINT); +- VKD3D_ASSERT(coords->data_type->dimx == dim_count); ++ VKD3D_ASSERT(coords->data_type->e.numeric.dimx == dim_count); + + if (!(store = hlsl_new_resource_store(ctx, &resource_deref, coords, rhs, &lhs->loc))) + { +@@ -2298,14 +2301,14 @@ static bool add_assignment(struct hlsl_ctx *ctx, struct hlsl_block *block, struc + + hlsl_init_deref_from_index_chain(ctx, &deref, lhs); + +- for (i = 0; i < lhs->data_type->dimy; ++i) ++ for (i = 0; i < lhs->data_type->e.numeric.dimy; ++i) + { +- for (j = 0; j < lhs->data_type->dimx; ++j) ++ for (j = 0; j < lhs->data_type->e.numeric.dimx; ++j) + { + struct hlsl_ir_node *load; + struct hlsl_block store_block; + const unsigned int idx = i * 4 + j; +- const unsigned int component = i * lhs->data_type->dimx + j; ++ const unsigned int component = i * lhs->data_type->e.numeric.dimx + j; + + if (!(writemask & (1 << idx))) + continue; +@@ -2335,7 +2338,7 @@ static bool add_assignment(struct hlsl_ctx *ctx, struct hlsl_block *block, struc + + VKD3D_ASSERT(!matrix_writemask); + +- for (i = 0; i < mat->data_type->dimx; ++i) ++ for (i = 0; i < mat->data_type->e.numeric.dimx; ++i) + { + struct hlsl_ir_node *cell, *load, *store, *c; + struct hlsl_deref deref; +@@ -2670,26 +2673,30 @@ static void declare_var(struct hlsl_ctx *ctx, struct parse_variable_def *v) { hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Only innermost array size can be implicit."); @@ -3371,7 +4734,7 @@ index 60aade732db..a3814a810b5 100644 } else { -@@ -2908,7 +2912,8 @@ static struct hlsl_block *initialize_vars(struct hlsl_ctx *ctx, struct list *var +@@ -2908,7 +2915,8 @@ static struct hlsl_block *initialize_vars(struct hlsl_ctx *ctx, struct list *var v->initializer.args[0] = node_from_block(v->initializer.instrs); } @@ -3381,7 +4744,7 @@ index 60aade732db..a3814a810b5 100644 if (is_default_values_initializer) { -@@ -2993,13 +2998,137 @@ static bool func_is_compatible_match(struct hlsl_ctx *ctx, const struct hlsl_ir_ +@@ -2993,13 +3001,137 @@ static bool func_is_compatible_match(struct hlsl_ctx *ctx, const struct hlsl_ir_ return true; } @@ -3520,7 +4883,7 @@ index 60aade732db..a3814a810b5 100644 if (!(entry = rb_get(&ctx->functions, name))) return NULL; -@@ -3007,18 +3136,58 @@ static struct hlsl_ir_function_decl *find_function_call(struct hlsl_ctx *ctx, +@@ -3007,18 +3139,58 @@ static struct hlsl_ir_function_decl *find_function_call(struct hlsl_ctx *ctx, LIST_FOR_EACH_ENTRY(decl, &func->overloads, struct hlsl_ir_function_decl, entry) { @@ -3545,8 +4908,9 @@ index 60aade732db..a3814a810b5 100644 + candidates.candidates[0] = decl; + candidates.count = 1; + continue; -+ } -+ } + } +- compatible_match = decl; + } + + if (!(hlsl_array_reserve(ctx, (void **)&candidates.candidates, + &candidates.capacity, candidates.count + 1, sizeof(decl)))) @@ -3555,8 +4919,9 @@ index 60aade732db..a3814a810b5 100644 + return NULL; + } + candidates.candidates[candidates.count++] = decl; -+ } -+ + } + +- return compatible_match; + if (!candidates.count) + return NULL; + @@ -3571,13 +4936,11 @@ index 60aade732db..a3814a810b5 100644 + hlsl_dump_ir_function_decl(ctx, s, candidates.candidates[i]); + hlsl_note(ctx, loc, VKD3D_SHADER_LOG_ERROR, " %s;", s->buffer); + vkd3d_string_buffer_clear(s); - } -- compatible_match = decl; ++ } + hlsl_release_string_buffer(ctx, s); - } - } - -- return compatible_match; ++ } ++ } ++ + decl = candidates.candidates[0]; + vkd3d_free(candidates.candidates); + @@ -3585,7 +4948,199 @@ index 60aade732db..a3814a810b5 100644 } static struct hlsl_ir_node *hlsl_new_void_expr(struct hlsl_ctx *ctx, const struct vkd3d_shader_location *loc) -@@ -5447,6 +5616,17 @@ static struct hlsl_block *add_constructor(struct hlsl_ctx *ctx, struct hlsl_type +@@ -3164,7 +3336,7 @@ static struct hlsl_ir_node *intrinsic_float_convert_arg(struct hlsl_ctx *ctx, + if (!type_is_integer(type->e.numeric.type)) + return arg; + +- type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_FLOAT, type->dimx, type->dimy); ++ type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_FLOAT, type->e.numeric.dimx, type->e.numeric.dimy); + return add_implicit_conversion(ctx, params->instrs, arg, type, loc); + } + +@@ -3203,13 +3375,13 @@ static struct hlsl_type *elementwise_intrinsic_get_common_type(struct hlsl_ctx * + if (arg_type->class == HLSL_CLASS_VECTOR) + { + vectors = true; +- dimx = min(dimx, arg_type->dimx); ++ dimx = min(dimx, arg_type->e.numeric.dimx); + } + else if (arg_type->class == HLSL_CLASS_MATRIX) + { + matrices = true; +- dimx = min(dimx, arg_type->dimx); +- dimy = min(dimy, arg_type->dimy); ++ dimx = min(dimx, arg_type->e.numeric.dimx); ++ dimy = min(dimy, arg_type->e.numeric.dimy); + } + } + +@@ -3254,7 +3426,7 @@ static bool elementwise_intrinsic_float_convert_args(struct hlsl_ctx *ctx, + if (!(type = elementwise_intrinsic_get_common_type(ctx, params, loc))) + return false; + if (type_is_integer(type->e.numeric.type)) +- type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_FLOAT, type->dimx, type->dimy); ++ type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_FLOAT, type->e.numeric.dimx, type->e.numeric.dimy); + + return convert_args(ctx, params, type, loc); + } +@@ -3267,7 +3439,7 @@ static bool elementwise_intrinsic_uint_convert_args(struct hlsl_ctx *ctx, + if (!(type = elementwise_intrinsic_get_common_type(ctx, params, loc))) + return false; + +- type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_UINT, type->dimx, type->dimy); ++ type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_UINT, type->e.numeric.dimx, type->e.numeric.dimy); + + return convert_args(ctx, params, type, loc); + } +@@ -3334,7 +3506,7 @@ static bool intrinsic_acos(struct hlsl_ctx *ctx, + static struct hlsl_type *convert_numeric_type(const struct hlsl_ctx *ctx, + const struct hlsl_type *type, enum hlsl_base_type base_type) + { +- return hlsl_get_numeric_type(ctx, type->class, base_type, type->dimx, type->dimy); ++ return hlsl_get_numeric_type(ctx, type->class, base_type, type->e.numeric.dimx, type->e.numeric.dimy); + } + + static bool add_combine_components(struct hlsl_ctx *ctx, const struct parse_initializer *params, +@@ -3855,7 +4027,7 @@ static bool intrinsic_determinant(struct hlsl_ctx *ctx, + if (!(arg = intrinsic_float_convert_arg(ctx, params, arg, loc))) + return false; + +- dim = min(type->dimx, type->dimy); ++ dim = min(type->e.numeric.dimx, type->e.numeric.dimy); + if (dim == 1) + return hlsl_add_load_component(ctx, params->instrs, arg, 0, loc); + +@@ -3939,7 +4111,7 @@ static bool intrinsic_dst(struct hlsl_ctx *ctx, const struct parse_initializer * + return false; + type = params->args[0]->data_type; + if (!(type->class == HLSL_CLASS_SCALAR +- || (type->class == HLSL_CLASS_VECTOR && type->dimx == 4))) ++ || (type->class == HLSL_CLASS_VECTOR && type->e.numeric.dimx == 4))) + { + struct vkd3d_string_buffer *string; + if ((string = hlsl_type_to_string(ctx, type))) +@@ -4371,15 +4543,15 @@ static bool intrinsic_mul(struct hlsl_ctx *ctx, + if (arg1->data_type->class == HLSL_CLASS_VECTOR) + { + vect_count++; +- cast_type1 = hlsl_get_matrix_type(ctx, base, arg1->data_type->dimx, 1); ++ cast_type1 = hlsl_get_matrix_type(ctx, base, arg1->data_type->e.numeric.dimx, 1); + } + if (arg2->data_type->class == HLSL_CLASS_VECTOR) + { + vect_count++; +- cast_type2 = hlsl_get_matrix_type(ctx, base, 1, arg2->data_type->dimx); ++ cast_type2 = hlsl_get_matrix_type(ctx, base, 1, arg2->data_type->e.numeric.dimx); + } + +- matrix_type = hlsl_get_matrix_type(ctx, base, cast_type2->dimx, cast_type1->dimy); ++ matrix_type = hlsl_get_matrix_type(ctx, base, cast_type2->e.numeric.dimx, cast_type1->e.numeric.dimy); + + if (vect_count == 0) + { +@@ -4387,12 +4559,12 @@ static bool intrinsic_mul(struct hlsl_ctx *ctx, + } + else if (vect_count == 1) + { +- VKD3D_ASSERT(matrix_type->dimx == 1 || matrix_type->dimy == 1); +- ret_type = hlsl_get_vector_type(ctx, base, matrix_type->dimx * matrix_type->dimy); ++ VKD3D_ASSERT(matrix_type->e.numeric.dimx == 1 || matrix_type->e.numeric.dimy == 1); ++ ret_type = hlsl_get_vector_type(ctx, base, matrix_type->e.numeric.dimx * matrix_type->e.numeric.dimy); + } + else + { +- VKD3D_ASSERT(matrix_type->dimx == 1 && matrix_type->dimy == 1); ++ VKD3D_ASSERT(matrix_type->e.numeric.dimx == 1 && matrix_type->e.numeric.dimy == 1); + ret_type = hlsl_get_scalar_type(ctx, base); + } + +@@ -4406,23 +4578,23 @@ static bool intrinsic_mul(struct hlsl_ctx *ctx, + return false; + hlsl_init_simple_deref_from_var(&var_deref, var); + +- for (i = 0; i < matrix_type->dimx; ++i) ++ for (i = 0; i < matrix_type->e.numeric.dimx; ++i) + { +- for (j = 0; j < matrix_type->dimy; ++j) ++ for (j = 0; j < matrix_type->e.numeric.dimy; ++j) + { + struct hlsl_ir_node *instr = NULL; + struct hlsl_block block; + +- for (k = 0; k < cast_type1->dimx && k < cast_type2->dimy; ++k) ++ for (k = 0; k < cast_type1->e.numeric.dimx && k < cast_type2->e.numeric.dimy; ++k) + { + struct hlsl_ir_node *value1, *value2, *mul; + + if (!(value1 = hlsl_add_load_component(ctx, params->instrs, +- cast1, j * cast1->data_type->dimx + k, loc))) ++ cast1, j * cast1->data_type->e.numeric.dimx + k, loc))) + return false; + + if (!(value2 = hlsl_add_load_component(ctx, params->instrs, +- cast2, k * cast2->data_type->dimx + i, loc))) ++ cast2, k * cast2->data_type->e.numeric.dimx + i, loc))) + return false; + + if (!(mul = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, value1, value2, loc))) +@@ -4439,7 +4611,7 @@ static bool intrinsic_mul(struct hlsl_ctx *ctx, + } + } + +- if (!hlsl_new_store_component(ctx, &block, &var_deref, j * matrix_type->dimx + i, instr)) ++ if (!hlsl_new_store_component(ctx, &block, &var_deref, j * matrix_type->e.numeric.dimx + i, instr)) + return false; + hlsl_block_add_block(params->instrs, &block); + } +@@ -4632,7 +4804,7 @@ static bool intrinsic_sign(struct hlsl_ctx *ctx, + static const struct hlsl_constant_value zero_value; + + struct hlsl_type *int_type = hlsl_get_numeric_type(ctx, arg->data_type->class, HLSL_TYPE_INT, +- arg->data_type->dimx, arg->data_type->dimy); ++ arg->data_type->e.numeric.dimx, arg->data_type->e.numeric.dimy); + + if (!(zero = hlsl_new_constant(ctx, hlsl_get_scalar_type(ctx, arg->data_type->e.numeric.type), &zero_value, loc))) + return false; +@@ -5086,22 +5258,23 @@ static bool intrinsic_transpose(struct hlsl_ctx *ctx, + return true; + } + +- mat_type = hlsl_get_matrix_type(ctx, arg_type->e.numeric.type, arg_type->dimy, arg_type->dimx); ++ mat_type = hlsl_get_matrix_type(ctx, arg_type->e.numeric.type, arg_type->e.numeric.dimy, arg_type->e.numeric.dimx); + + if (!(var = hlsl_new_synthetic_var(ctx, "transpose", mat_type, loc))) + return false; + hlsl_init_simple_deref_from_var(&var_deref, var); + +- for (i = 0; i < arg_type->dimx; ++i) ++ for (i = 0; i < arg_type->e.numeric.dimx; ++i) + { +- for (j = 0; j < arg_type->dimy; ++j) ++ for (j = 0; j < arg_type->e.numeric.dimy; ++j) + { + struct hlsl_block block; + +- if (!(load = hlsl_add_load_component(ctx, params->instrs, arg, j * arg->data_type->dimx + i, loc))) ++ if (!(load = hlsl_add_load_component(ctx, params->instrs, arg, ++ j * arg->data_type->e.numeric.dimx + i, loc))) + return false; + +- if (!hlsl_new_store_component(ctx, &block, &var_deref, i * var->data_type->dimx + j, load)) ++ if (!hlsl_new_store_component(ctx, &block, &var_deref, i * var->data_type->e.numeric.dimx + j, load)) + return false; + hlsl_block_add_block(params->instrs, &block); + } +@@ -5131,7 +5304,8 @@ static bool intrinsic_d3dcolor_to_ubyte4(struct hlsl_ctx *ctx, + struct hlsl_ir_node *arg = params->args[0], *ret, *c, *swizzle; + struct hlsl_type *arg_type = arg->data_type; + +- if (arg_type->class != HLSL_CLASS_SCALAR && !(arg_type->class == HLSL_CLASS_VECTOR && arg_type->dimx == 4)) ++ if (arg_type->class != HLSL_CLASS_SCALAR && !(arg_type->class == HLSL_CLASS_VECTOR ++ && arg_type->e.numeric.dimx == 4)) + { + struct vkd3d_string_buffer *string; + +@@ -5447,6 +5621,17 @@ static struct hlsl_block *add_constructor(struct hlsl_ctx *ctx, struct hlsl_type struct hlsl_ir_load *load; struct hlsl_ir_var *var; @@ -3603,7 +5158,170 @@ index 60aade732db..a3814a810b5 100644 if (!(var = hlsl_new_synthetic_var(ctx, "constructor", type, loc))) return NULL; -@@ -6553,6 +6733,7 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, +@@ -5483,6 +5668,7 @@ static bool add_ternary(struct hlsl_ctx *ctx, struct hlsl_block *block, + hlsl_error(ctx, &cond->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Ternary condition type '%s' is not numeric.", string->buffer); + hlsl_release_string_buffer(ctx, string); ++ return false; + } + + if (first->data_type->class <= HLSL_CLASS_LAST_NUMERIC +@@ -5491,21 +5677,22 @@ static bool add_ternary(struct hlsl_ctx *ctx, struct hlsl_block *block, + if (!(common_type = get_common_numeric_type(ctx, first, second, &first->loc))) + return false; + +- if (cond_type->dimx == 1 && cond_type->dimy == 1) ++ if (cond_type->e.numeric.dimx == 1 && cond_type->e.numeric.dimy == 1) + { + cond_type = hlsl_get_numeric_type(ctx, common_type->class, +- HLSL_TYPE_BOOL, common_type->dimx, common_type->dimy); ++ HLSL_TYPE_BOOL, common_type->e.numeric.dimx, common_type->e.numeric.dimy); + if (!(cond = add_implicit_conversion(ctx, block, cond, cond_type, &cond->loc))) + return false; + } + else + { +- if (common_type->dimx == 1 && common_type->dimy == 1) ++ if (common_type->e.numeric.dimx == 1 && common_type->e.numeric.dimy == 1) + { + common_type = hlsl_get_numeric_type(ctx, cond_type->class, +- common_type->e.numeric.type, cond_type->dimx, cond_type->dimy); ++ common_type->e.numeric.type, cond_type->e.numeric.dimx, cond_type->e.numeric.dimy); + } +- else if (cond_type->dimx != common_type->dimx || cond_type->dimy != common_type->dimy) ++ else if (cond_type->e.numeric.dimx != common_type->e.numeric.dimx ++ || cond_type->e.numeric.dimy != common_type->e.numeric.dimy) + { + /* This condition looks wrong but is correct. + * floatN is compatible with float1xN, but not with floatNx1. */ +@@ -5523,7 +5710,7 @@ static bool add_ternary(struct hlsl_ctx *ctx, struct hlsl_block *block, + } + + cond_type = hlsl_get_numeric_type(ctx, common_type->class, HLSL_TYPE_BOOL, +- common_type->dimx, common_type->dimy); ++ common_type->e.numeric.dimx, common_type->e.numeric.dimy); + if (!(cond = add_implicit_conversion(ctx, block, cond, cond_type, &cond->loc))) + return false; + } +@@ -5551,7 +5738,7 @@ static bool add_ternary(struct hlsl_ctx *ctx, struct hlsl_block *block, + } + + cond_type = hlsl_get_numeric_type(ctx, cond_type->class, HLSL_TYPE_BOOL, +- cond_type->dimx, cond_type->dimy); ++ cond_type->e.numeric.dimx, cond_type->e.numeric.dimy); + if (!(cond = add_implicit_conversion(ctx, block, cond, cond_type, &cond->loc))) + return false; + +@@ -5923,7 +6110,7 @@ static bool add_gather_method_call(struct hlsl_ctx *ctx, struct hlsl_block *bloc + return false; + } + +- if (read_channel >= object_type->e.resource.format->dimx) ++ if (read_channel >= object_type->e.resource.format->e.numeric.dimx) + { + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Method %s() requires at least %u channels.", name, read_channel + 1); +@@ -5944,6 +6131,87 @@ static bool add_gather_method_call(struct hlsl_ctx *ctx, struct hlsl_block *bloc + return true; + } + ++static bool add_gather_cmp_method_call(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *object, ++ const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) ++{ ++ const struct hlsl_type *object_type = object->data_type; ++ struct hlsl_resource_load_params load_params = {0}; ++ unsigned int sampler_dim, offset_dim; ++ const struct hlsl_type *sampler_type; ++ struct hlsl_ir_node *load; ++ ++ sampler_dim = hlsl_sampler_dim_count(object_type->sampler_dim); ++ offset_dim = hlsl_offset_dim_count(object_type->sampler_dim); ++ ++ if (!strcmp(name, "GatherCmpGreen")) ++ load_params.type = HLSL_RESOURCE_GATHER_CMP_GREEN; ++ else if (!strcmp(name, "GatherCmpBlue")) ++ load_params.type = HLSL_RESOURCE_GATHER_CMP_BLUE; ++ else if (!strcmp(name, "GatherCmpAlpha")) ++ load_params.type = HLSL_RESOURCE_GATHER_CMP_ALPHA; ++ else ++ load_params.type = HLSL_RESOURCE_GATHER_CMP_RED; ++ ++ if (!strcmp(name, "GatherCmp") || !offset_dim) ++ { ++ if (params->args_count < 3 || params->args_count > 4 + !!offset_dim) ++ { ++ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, ++ "Wrong number of arguments to method '%s': expected from 3 to %u, but got %u.", ++ name, 4 + !!offset_dim, params->args_count); ++ return false; ++ } ++ } ++ else if (params->args_count < 3 || params->args_count == 6 || params->args_count > 8) ++ { ++ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, ++ "Wrong number of arguments to method '%s': expected 3, 4, 5, 7, or 8, but got %u.", ++ name, params->args_count); ++ return false; ++ } ++ ++ if (params->args_count == 5 || params->args_count == 8) ++ { ++ hlsl_fixme(ctx, loc, "Tiled resource status argument."); ++ } ++ else if (offset_dim && params->args_count > 3) ++ { ++ if (!(load_params.texel_offset = add_implicit_conversion(ctx, block, params->args[3], ++ hlsl_get_vector_type(ctx, HLSL_TYPE_INT, offset_dim), loc))) ++ return false; ++ } ++ ++ sampler_type = params->args[0]->data_type; ++ if (sampler_type->class != HLSL_CLASS_SAMPLER || sampler_type->sampler_dim != HLSL_SAMPLER_DIM_COMPARISON) ++ { ++ struct vkd3d_string_buffer *string; ++ ++ if ((string = hlsl_type_to_string(ctx, sampler_type))) ++ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, ++ "Wrong type for argument 0 of %s(): expected 'SamplerComparisonState', but got '%s'.", ++ name, string->buffer); ++ hlsl_release_string_buffer(ctx, string); ++ return false; ++ } ++ ++ if (!(load_params.coords = add_implicit_conversion(ctx, block, params->args[1], ++ hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) ++ return false; ++ ++ if (!(load_params.cmp = add_implicit_conversion(ctx, block, params->args[2], ++ hlsl_get_scalar_type(ctx, HLSL_TYPE_FLOAT), loc))) ++ return false; ++ ++ load_params.format = hlsl_get_vector_type(ctx, object_type->e.resource.format->e.numeric.type, 4); ++ load_params.resource = object; ++ load_params.sampler = params->args[0]; ++ ++ if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) ++ return false; ++ hlsl_block_add_instr(block, load); ++ return true; ++} ++ + static bool add_assignment_from_component(struct hlsl_ctx *ctx, struct hlsl_block *instrs, struct hlsl_ir_node *dest, + struct hlsl_ir_node *src, unsigned int component, const struct vkd3d_shader_location *loc) + { +@@ -6311,6 +6579,11 @@ texture_methods[] = + { "Gather", add_gather_method_call, "00010101001000" }, + { "GatherAlpha", add_gather_method_call, "00010101001000" }, + { "GatherBlue", add_gather_method_call, "00010101001000" }, ++ { "GatherCmp", add_gather_cmp_method_call, "00010101001000" }, ++ { "GatherCmpAlpha", add_gather_cmp_method_call, "00010101001000" }, ++ { "GatherCmpBlue", add_gather_cmp_method_call, "00010101001000" }, ++ { "GatherCmpGreen", add_gather_cmp_method_call, "00010101001000" }, ++ { "GatherCmpRed", add_gather_cmp_method_call, "00010101001000" }, + { "GatherGreen", add_gather_method_call, "00010101001000" }, + { "GatherRed", add_gather_method_call, "00010101001000" }, + +@@ -6553,6 +6826,7 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, struct hlsl_semantic semantic; enum hlsl_buffer_type buffer_type; enum hlsl_sampler_dim sampler_dim; @@ -3611,7 +5329,7 @@ index 60aade732db..a3814a810b5 100644 struct hlsl_attribute *attr; struct parse_attribute_list attr_list; struct hlsl_ir_switch_case *switch_case; -@@ -6596,6 +6777,7 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, +@@ -6596,6 +6870,7 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, %token KW_INLINE %token KW_INOUT %token KW_LINEAR @@ -3619,7 +5337,7 @@ index 60aade732db..a3814a810b5 100644 %token KW_MATRIX %token KW_NAMESPACE %token KW_NOINTERPOLATION -@@ -6605,6 +6787,7 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, +@@ -6605,6 +6880,7 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, %token KW_PACKOFFSET %token KW_PASS %token KW_PIXELSHADER @@ -3627,7 +5345,7 @@ index 60aade732db..a3814a810b5 100644 %token KW_RASTERIZERORDEREDBUFFER %token KW_RASTERIZERORDEREDSTRUCTUREDBUFFER %token KW_RASTERIZERORDEREDTEXTURE1D -@@ -6654,6 +6837,7 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, +@@ -6654,6 +6930,7 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, %token KW_TEXTURE3D %token KW_TEXTURECUBE %token KW_TEXTURECUBEARRAY @@ -3635,7 +5353,7 @@ index 60aade732db..a3814a810b5 100644 %token KW_TRUE %token KW_TYPEDEF %token KW_UNSIGNED -@@ -6784,6 +6968,8 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, +@@ -6784,6 +7061,8 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, %type semantic @@ -3644,7 +5362,7 @@ index 60aade732db..a3814a810b5 100644 %type state_block %type state_block_index_opt -@@ -7684,7 +7870,10 @@ parameter_decl: +@@ -7684,7 +7963,10 @@ parameter_decl: { hlsl_error(ctx, &@3, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Implicit size arrays not allowed in function parameters."); @@ -3655,7 +5373,7 @@ index 60aade732db..a3814a810b5 100644 type = hlsl_new_array_type(ctx, type, $4.sizes[i]); } vkd3d_free($4.sizes); -@@ -7805,6 +7994,20 @@ rov_type: +@@ -7805,6 +8087,20 @@ rov_type: $$ = HLSL_SAMPLER_DIM_3D; } @@ -3676,7 +5394,7 @@ index 60aade732db..a3814a810b5 100644 resource_format: var_modifiers type { -@@ -7948,6 +8151,10 @@ type_no_void: +@@ -7948,6 +8244,10 @@ type_no_void: validate_uav_type(ctx, $1, $3, &@4); $$ = hlsl_new_uav_type(ctx, $1, $3, true); } @@ -3687,7 +5405,7 @@ index 60aade732db..a3814a810b5 100644 | KW_RWBYTEADDRESSBUFFER { $$ = hlsl_new_uav_type(ctx, HLSL_SAMPLER_DIM_RAW_BUFFER, hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), false); -@@ -8088,14 +8295,9 @@ typedef: +@@ -8088,14 +8388,9 @@ typedef: } if (modifiers) @@ -3703,7 +5421,7 @@ index 60aade732db..a3814a810b5 100644 if (!add_typedef(ctx, type, $4)) YYABORT; } -@@ -8753,25 +8955,25 @@ if_body: +@@ -8753,25 +9048,25 @@ if_body: loop_statement: attribute_list_optional loop_scope_start KW_WHILE '(' expr ')' statement { @@ -3733,7 +5451,7 @@ index 60aade732db..a3814a810b5 100644 hlsl_pop_scope(ctx); cleanup_parse_attribute_list(&$1); } -@@ -8979,17 +9181,24 @@ primary_expr: +@@ -8979,17 +9274,24 @@ primary_expr: struct hlsl_ir_load *load; struct hlsl_ir_var *var; @@ -3765,7 +5483,7 @@ index 60aade732db..a3814a810b5 100644 } | '(' expr ')' { -@@ -9149,23 +9358,8 @@ postfix_expr: +@@ -9149,23 +9451,8 @@ postfix_expr: | var_modifiers type '(' initializer_expr_list ')' { if ($1) @@ -3789,7 +5507,7 @@ index 60aade732db..a3814a810b5 100644 if (!($$ = add_constructor(ctx, $2, &$4, &@2))) { -@@ -9233,11 +9427,8 @@ unary_expr: +@@ -9233,11 +9520,8 @@ unary_expr: | '(' var_modifiers type arrays ')' unary_expr { if ($2) @@ -3801,7 +5519,7 @@ index 60aade732db..a3814a810b5 100644 if (!add_explicit_conversion(ctx, $6, $3, &$4, &@3)) { -@@ -9381,10 +9572,7 @@ assignment_expr: +@@ -9381,10 +9665,7 @@ assignment_expr: struct hlsl_ir_node *lhs = node_from_block($1), *rhs = node_from_block($3); if (lhs->data_type->modifiers & HLSL_MODIFIER_CONST) @@ -3813,18 +5531,53 @@ index 60aade732db..a3814a810b5 100644 destroy_block($1); if (!add_assignment(ctx, $3, lhs, $2, rhs)) diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -index d11ff481f6b..a43ea53089e 100644 +index d11ff481f6b..c666599b342 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -@@ -19,6 +19,7 @@ +@@ -19,9 +19,14 @@ */ #include "hlsl.h" +#include "vkd3d_shader_private.h" ++#include "d3dcommon.h" #include #include -@@ -1075,7 +1076,7 @@ static bool lower_matrix_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *ins ++/* The shift that corresponds to the D3D_SIF_TEXTURE_COMPONENTS mask. */ ++#define VKD3D_SM4_SIF_TEXTURE_COMPONENTS_SHIFT 2 ++ + /* TODO: remove when no longer needed, only used for new_offset_instr_from_deref() */ + static struct hlsl_ir_node *new_offset_from_path_index(struct hlsl_ctx *ctx, struct hlsl_block *block, + struct hlsl_type *type, struct hlsl_ir_node *base_offset, struct hlsl_ir_node *idx, +@@ -269,7 +274,7 @@ static bool types_are_semantic_equivalent(struct hlsl_ctx *ctx, const struct hls + if (ctx->profile->major_version < 4) + return true; + +- if (type1->dimx != type2->dimx) ++ if (type1->e.numeric.dimx != type2->e.numeric.dimx) + return false; + + return base_type_get_semantic_equivalent(type1->e.numeric.type) +@@ -291,6 +296,9 @@ static struct hlsl_ir_var *add_semantic_var(struct hlsl_ctx *ctx, struct hlsl_ir + { + if (!ascii_strcasecmp(ext_var->name, new_name)) + { ++ VKD3D_ASSERT(ext_var->data_type->class <= HLSL_CLASS_VECTOR); ++ VKD3D_ASSERT(type->class <= HLSL_CLASS_VECTOR); ++ + if (output) + { + if (index >= semantic->reported_duplicated_output_next_index) +@@ -1031,7 +1039,7 @@ static bool lower_calls(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void * + static struct hlsl_ir_node *add_zero_mipmap_level(struct hlsl_ctx *ctx, struct hlsl_ir_node *index, + const struct vkd3d_shader_location *loc) + { +- unsigned int dim_count = index->data_type->dimx; ++ unsigned int dim_count = index->data_type->e.numeric.dimx; + struct hlsl_ir_node *store, *zero; + struct hlsl_ir_load *coords_load; + struct hlsl_deref coords_deref; +@@ -1075,7 +1083,7 @@ static bool lower_matrix_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *ins struct hlsl_deref var_deref; struct hlsl_type *matrix_type; struct hlsl_ir_var *var; @@ -3833,18 +5586,64 @@ index d11ff481f6b..a43ea53089e 100644 if (instr->type != HLSL_IR_SWIZZLE) return false; -@@ -1093,9 +1094,7 @@ static bool lower_matrix_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *ins +@@ -1088,14 +1096,12 @@ static bool lower_matrix_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *ins + return false; + hlsl_init_simple_deref_from_var(&var_deref, var); + +- for (i = 0; i < instr->data_type->dimx; ++i) ++ for (i = 0; i < instr->data_type->e.numeric.dimx; ++i) + { struct hlsl_block store_block; struct hlsl_ir_node *load; - y = (swizzle->swizzle >> (8 * i + 4)) & 0xf; - x = (swizzle->swizzle >> 8 * i) & 0xf; - k = y * matrix_type->dimx + x; -+ k = swizzle->u.matrix.components[i].y * matrix_type->dimx + swizzle->u.matrix.components[i].x; ++ k = swizzle->u.matrix.components[i].y * matrix_type->e.numeric.dimx + swizzle->u.matrix.components[i].x; if (!(load = hlsl_add_load_component(ctx, block, swizzle->val.node, k, &instr->loc))) return false; -@@ -1358,8 +1357,10 @@ struct copy_propagation_var_def +@@ -1140,7 +1146,7 @@ static bool lower_index_loads(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, + + VKD3D_ASSERT(coords->data_type->class == HLSL_CLASS_VECTOR); + VKD3D_ASSERT(coords->data_type->e.numeric.type == HLSL_TYPE_UINT); +- VKD3D_ASSERT(coords->data_type->dimx == dim_count); ++ VKD3D_ASSERT(coords->data_type->e.numeric.dimx == dim_count); + + if (!(coords = add_zero_mipmap_level(ctx, coords, &instr->loc))) + return false; +@@ -1176,7 +1182,7 @@ static bool lower_index_loads(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, + return false; + hlsl_init_simple_deref_from_var(&row_deref, var); + +- for (i = 0; i < mat->data_type->dimx; ++i) ++ for (i = 0; i < mat->data_type->e.numeric.dimx; ++i) + { + struct hlsl_ir_node *c; + +@@ -1225,7 +1231,7 @@ static bool lower_broadcasts(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, s + src_type = cast->operands[0].node->data_type; + dst_type = cast->node.data_type; + +- if (src_type->class <= HLSL_CLASS_VECTOR && dst_type->class <= HLSL_CLASS_VECTOR && src_type->dimx == 1) ++ if (src_type->class <= HLSL_CLASS_VECTOR && dst_type->class <= HLSL_CLASS_VECTOR && src_type->e.numeric.dimx == 1) + { + struct hlsl_ir_node *new_cast, *swizzle; + +@@ -1236,9 +1242,10 @@ static bool lower_broadcasts(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, s + return false; + hlsl_block_add_instr(block, new_cast); + +- if (dst_type->dimx != 1) ++ if (dst_type->e.numeric.dimx != 1) + { +- if (!(swizzle = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, X, X, X), dst_type->dimx, new_cast, &cast->node.loc))) ++ if (!(swizzle = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, X, X, X), ++ dst_type->e.numeric.dimx, new_cast, &cast->node.loc))) + return false; + hlsl_block_add_instr(block, swizzle); + } +@@ -1358,8 +1365,10 @@ struct copy_propagation_var_def struct copy_propagation_state { @@ -3857,7 +5656,7 @@ index d11ff481f6b..a43ea53089e 100644 }; static int copy_propagation_var_def_compare(const void *key, const struct rb_entry *entry) -@@ -1381,6 +1382,38 @@ static void copy_propagation_var_def_destroy(struct rb_entry *entry, void *conte +@@ -1381,6 +1390,38 @@ static void copy_propagation_var_def_destroy(struct rb_entry *entry, void *conte vkd3d_free(var_def); } @@ -3896,7 +5695,7 @@ index d11ff481f6b..a43ea53089e 100644 static struct copy_propagation_value *copy_propagation_get_value_at_time( struct copy_propagation_component_trace *trace, unsigned int time) { -@@ -1398,9 +1431,10 @@ static struct copy_propagation_value *copy_propagation_get_value_at_time( +@@ -1398,9 +1439,10 @@ static struct copy_propagation_value *copy_propagation_get_value_at_time( static struct copy_propagation_value *copy_propagation_get_value(const struct copy_propagation_state *state, const struct hlsl_ir_var *var, unsigned int component, unsigned int time) { @@ -3909,7 +5708,7 @@ index d11ff481f6b..a43ea53089e 100644 if (entry) { struct copy_propagation_var_def *var_def = RB_ENTRY_VALUE(entry, struct copy_propagation_var_def, entry); -@@ -1426,7 +1460,8 @@ static struct copy_propagation_value *copy_propagation_get_value(const struct co +@@ -1426,7 +1468,8 @@ static struct copy_propagation_value *copy_propagation_get_value(const struct co static struct copy_propagation_var_def *copy_propagation_create_var_def(struct hlsl_ctx *ctx, struct copy_propagation_state *state, struct hlsl_ir_var *var) { @@ -3919,7 +5718,7 @@ index d11ff481f6b..a43ea53089e 100644 struct copy_propagation_var_def *var_def; unsigned int component_count = hlsl_type_component_count(var->data_type); int res; -@@ -1439,7 +1474,7 @@ static struct copy_propagation_var_def *copy_propagation_create_var_def(struct h +@@ -1439,7 +1482,7 @@ static struct copy_propagation_var_def *copy_propagation_create_var_def(struct h var_def->var = var; @@ -3928,7 +5727,7 @@ index d11ff481f6b..a43ea53089e 100644 VKD3D_ASSERT(!res); return var_def; -@@ -1596,7 +1631,7 @@ static bool copy_propagation_replace_with_single_instr(struct hlsl_ctx *ctx, +@@ -1596,7 +1639,7 @@ static bool copy_propagation_replace_with_single_instr(struct hlsl_ctx *ctx, var->name, start, start + count, debug_hlsl_swizzle(swizzle, instr_component_count)); return false; } @@ -3937,7 +5736,7 @@ index d11ff481f6b..a43ea53089e 100644 } TRACE("Load from %s[%u-%u]%s propagated as instruction %p%s.\n", -@@ -1678,6 +1713,7 @@ static bool copy_propagation_transform_load(struct hlsl_ctx *ctx, +@@ -1678,6 +1721,7 @@ static bool copy_propagation_transform_load(struct hlsl_ctx *ctx, case HLSL_CLASS_DEPTH_STENCIL_VIEW: case HLSL_CLASS_GEOMETRY_SHADER: case HLSL_CLASS_BLEND_STATE: @@ -3945,7 +5744,7 @@ index d11ff481f6b..a43ea53089e 100644 case HLSL_CLASS_NULL: break; -@@ -1719,10 +1755,10 @@ static bool copy_propagation_transform_swizzle(struct hlsl_ctx *ctx, +@@ -1719,10 +1763,10 @@ static bool copy_propagation_transform_swizzle(struct hlsl_ctx *ctx, return false; load = hlsl_ir_load(swizzle->val.node); @@ -3958,7 +5757,7 @@ index d11ff481f6b..a43ea53089e 100644 return true; return false; -@@ -1818,18 +1854,6 @@ static void copy_propagation_record_store(struct hlsl_ctx *ctx, struct hlsl_ir_s +@@ -1818,18 +1862,6 @@ static void copy_propagation_record_store(struct hlsl_ctx *ctx, struct hlsl_ir_s } } @@ -3977,7 +5776,7 @@ index d11ff481f6b..a43ea53089e 100644 static void copy_propagation_invalidate_from_block(struct hlsl_ctx *ctx, struct copy_propagation_state *state, struct hlsl_block *block, unsigned int time) { -@@ -1898,16 +1922,19 @@ static bool copy_propagation_transform_block(struct hlsl_ctx *ctx, struct hlsl_b +@@ -1898,16 +1930,19 @@ static bool copy_propagation_transform_block(struct hlsl_ctx *ctx, struct hlsl_b static bool copy_propagation_process_if(struct hlsl_ctx *ctx, struct hlsl_ir_if *iff, struct copy_propagation_state *state) { @@ -4004,7 +5803,7 @@ index d11ff481f6b..a43ea53089e 100644 /* Ideally we'd invalidate the outer state looking at what was * touched in the two inner states, but this doesn't work for -@@ -1922,14 +1949,16 @@ static bool copy_propagation_process_if(struct hlsl_ctx *ctx, struct hlsl_ir_if +@@ -1922,14 +1957,16 @@ static bool copy_propagation_process_if(struct hlsl_ctx *ctx, struct hlsl_ir_if static bool copy_propagation_process_loop(struct hlsl_ctx *ctx, struct hlsl_ir_loop *loop, struct copy_propagation_state *state) { @@ -4025,7 +5824,7 @@ index d11ff481f6b..a43ea53089e 100644 return progress; } -@@ -1937,15 +1966,16 @@ static bool copy_propagation_process_loop(struct hlsl_ctx *ctx, struct hlsl_ir_l +@@ -1937,15 +1974,16 @@ static bool copy_propagation_process_loop(struct hlsl_ctx *ctx, struct hlsl_ir_l static bool copy_propagation_process_switch(struct hlsl_ctx *ctx, struct hlsl_ir_switch *s, struct copy_propagation_state *state) { @@ -4046,7 +5845,7 @@ index d11ff481f6b..a43ea53089e 100644 } LIST_FOR_EACH_ENTRY(c, &s->cases, struct hlsl_ir_switch_case, entry) -@@ -1964,6 +1994,12 @@ static bool copy_propagation_transform_block(struct hlsl_ctx *ctx, struct hlsl_b +@@ -1964,6 +2002,12 @@ static bool copy_propagation_transform_block(struct hlsl_ctx *ctx, struct hlsl_b LIST_FOR_EACH_ENTRY_SAFE(instr, next, &block->instrs, struct hlsl_ir_node, entry) { @@ -4059,7 +5858,7 @@ index d11ff481f6b..a43ea53089e 100644 switch (instr->type) { case HLSL_IR_LOAD: -@@ -2001,6 +2037,9 @@ static bool copy_propagation_transform_block(struct hlsl_ctx *ctx, struct hlsl_b +@@ -2001,6 +2045,9 @@ static bool copy_propagation_transform_block(struct hlsl_ctx *ctx, struct hlsl_b default: break; } @@ -4069,7 +5868,7 @@ index d11ff481f6b..a43ea53089e 100644 } return progress; -@@ -2013,7 +2052,7 @@ bool hlsl_copy_propagation_execute(struct hlsl_ctx *ctx, struct hlsl_block *bloc +@@ -2013,7 +2060,7 @@ bool hlsl_copy_propagation_execute(struct hlsl_ctx *ctx, struct hlsl_block *bloc index_instructions(block, 2); @@ -4078,27 +5877,137 @@ index d11ff481f6b..a43ea53089e 100644 progress = copy_propagation_transform_block(ctx, block, &state); -@@ -2401,8 +2440,8 @@ static bool fold_swizzle_chains(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr +@@ -2053,10 +2100,10 @@ static enum validation_result validate_component_index_range_from_deref(struct h + switch (type->class) + { + case HLSL_CLASS_VECTOR: +- if (idx >= type->dimx) ++ if (idx >= type->e.numeric.dimx) + { + hlsl_error(ctx, &path_node->loc, VKD3D_SHADER_ERROR_HLSL_OFFSET_OUT_OF_BOUNDS, +- "Vector index is out of bounds. %u/%u", idx, type->dimx); ++ "Vector index is out of bounds. %u/%u", idx, type->e.numeric.dimx); + return DEREF_VALIDATION_OUT_OF_BOUNDS; + } + break; +@@ -2187,7 +2234,7 @@ static bool validate_dereferences(struct hlsl_ctx *ctx, struct hlsl_ir_node *ins + + static bool is_vec1(const struct hlsl_type *type) + { +- return (type->class == HLSL_CLASS_SCALAR) || (type->class == HLSL_CLASS_VECTOR && type->dimx == 1); ++ return (type->class == HLSL_CLASS_SCALAR) || (type->class == HLSL_CLASS_VECTOR && type->e.numeric.dimx == 1); + } + + static bool fold_redundant_casts(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +@@ -2364,18 +2411,20 @@ static bool lower_narrowing_casts(struct hlsl_ctx *ctx, struct hlsl_ir_node *ins + src_type = cast->operands[0].node->data_type; + dst_type = cast->node.data_type; + +- if (src_type->class <= HLSL_CLASS_VECTOR && dst_type->class <= HLSL_CLASS_VECTOR && dst_type->dimx < src_type->dimx) ++ if (src_type->class <= HLSL_CLASS_VECTOR && dst_type->class <= HLSL_CLASS_VECTOR ++ && dst_type->e.numeric.dimx < src_type->e.numeric.dimx) + { + struct hlsl_ir_node *new_cast, *swizzle; + +- dst_vector_type = hlsl_get_vector_type(ctx, dst_type->e.numeric.type, src_type->dimx); ++ dst_vector_type = hlsl_get_vector_type(ctx, dst_type->e.numeric.type, src_type->e.numeric.dimx); + /* We need to preserve the cast since it might be doing more than just + * narrowing the vector. */ + if (!(new_cast = hlsl_new_cast(ctx, cast->operands[0].node, dst_vector_type, &cast->node.loc))) + return false; + hlsl_block_add_instr(block, new_cast); + +- if (!(swizzle = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, Y, Z, W), dst_type->dimx, new_cast, &cast->node.loc))) ++ if (!(swizzle = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, Y, Z, W), ++ dst_type->e.numeric.dimx, new_cast, &cast->node.loc))) + return false; + hlsl_block_add_instr(block, swizzle); + +@@ -2401,11 +2450,12 @@ static bool fold_swizzle_chains(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr struct hlsl_ir_node *new_swizzle; uint32_t combined_swizzle; - combined_swizzle = hlsl_combine_swizzles(hlsl_ir_swizzle(next_instr)->swizzle, - swizzle->swizzle, instr->data_type->dimx); + combined_swizzle = hlsl_combine_swizzles(hlsl_ir_swizzle(next_instr)->u.vector, -+ swizzle->u.vector, instr->data_type->dimx); ++ swizzle->u.vector, instr->data_type->e.numeric.dimx); next_instr = hlsl_ir_swizzle(next_instr)->val.node; - if (!(new_swizzle = hlsl_new_swizzle(ctx, combined_swizzle, instr->data_type->dimx, next_instr, &instr->loc))) -@@ -2429,7 +2468,7 @@ static bool remove_trivial_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *i +- if (!(new_swizzle = hlsl_new_swizzle(ctx, combined_swizzle, instr->data_type->dimx, next_instr, &instr->loc))) ++ if (!(new_swizzle = hlsl_new_swizzle(ctx, combined_swizzle, ++ instr->data_type->e.numeric.dimx, next_instr, &instr->loc))) + return false; + + list_add_before(&instr->entry, &new_swizzle->entry); +@@ -2425,11 +2475,11 @@ static bool remove_trivial_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *i + return false; + swizzle = hlsl_ir_swizzle(instr); + +- if (instr->data_type->dimx != swizzle->val.node->data_type->dimx) ++ if (instr->data_type->e.numeric.dimx != swizzle->val.node->data_type->e.numeric.dimx) return false; - for (i = 0; i < instr->data_type->dimx; ++i) +- for (i = 0; i < instr->data_type->dimx; ++i) - if (hlsl_swizzle_get_component(swizzle->swizzle, i) != i) ++ for (i = 0; i < instr->data_type->e.numeric.dimx; ++i) + if (hlsl_swizzle_get_component(swizzle->u.vector, i) != i) return false; hlsl_replace_node(instr, swizzle->val.node); -@@ -2788,6 +2827,108 @@ static bool lower_nonconstant_array_loads(struct hlsl_ctx *ctx, struct hlsl_ir_n +@@ -2589,6 +2639,7 @@ static bool lower_nonconstant_vector_derefs(struct hlsl_ctx *ctx, struct hlsl_ir + if (type->class == HLSL_CLASS_VECTOR && idx->type != HLSL_IR_CONSTANT) + { + struct hlsl_ir_node *eq, *swizzle, *dot, *c, *operands[HLSL_MAX_OPERANDS] = {0}; ++ unsigned int width = type->e.numeric.dimx; + struct hlsl_constant_value value; + struct hlsl_ir_load *vector_load; + enum hlsl_ir_expr_op op; +@@ -2597,7 +2648,7 @@ static bool lower_nonconstant_vector_derefs(struct hlsl_ctx *ctx, struct hlsl_ir + return false; + hlsl_block_add_instr(block, &vector_load->node); + +- if (!(swizzle = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, X, X, X), type->dimx, idx, &instr->loc))) ++ if (!(swizzle = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, X, X, X), width, idx, &instr->loc))) + return false; + hlsl_block_add_instr(block, swizzle); + +@@ -2605,14 +2656,14 @@ static bool lower_nonconstant_vector_derefs(struct hlsl_ctx *ctx, struct hlsl_ir + value.u[1].u = 1; + value.u[2].u = 2; + value.u[3].u = 3; +- if (!(c = hlsl_new_constant(ctx, hlsl_get_vector_type(ctx, HLSL_TYPE_UINT, type->dimx), &value, &instr->loc))) ++ if (!(c = hlsl_new_constant(ctx, hlsl_get_vector_type(ctx, HLSL_TYPE_UINT, width), &value, &instr->loc))) + return false; + hlsl_block_add_instr(block, c); + + operands[0] = swizzle; + operands[1] = c; + if (!(eq = hlsl_new_expr(ctx, HLSL_OP2_EQUAL, operands, +- hlsl_get_vector_type(ctx, HLSL_TYPE_BOOL, type->dimx), &instr->loc))) ++ hlsl_get_vector_type(ctx, HLSL_TYPE_BOOL, width), &instr->loc))) + return false; + hlsl_block_add_instr(block, eq); + +@@ -2621,7 +2672,7 @@ static bool lower_nonconstant_vector_derefs(struct hlsl_ctx *ctx, struct hlsl_ir + hlsl_block_add_instr(block, eq); + + op = HLSL_OP2_DOT; +- if (type->dimx == 1) ++ if (width == 1) + op = type->e.numeric.type == HLSL_TYPE_BOOL ? HLSL_OP2_LOGIC_AND : HLSL_OP2_MUL; + + /* Note: We may be creating a DOT for bool vectors here, which we need to lower to +@@ -2748,7 +2799,8 @@ static bool lower_nonconstant_array_loads(struct hlsl_ctx *ctx, struct hlsl_ir_n + return false; + hlsl_block_add_instr(block, equals); + +- if (!(equals = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, X, X, X), var->data_type->dimx, equals, &cut_index->loc))) ++ if (!(equals = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, X, X, X), ++ var->data_type->e.numeric.dimx, equals, &cut_index->loc))) + return false; + hlsl_block_add_instr(block, equals); + +@@ -2788,6 +2840,108 @@ static bool lower_nonconstant_array_loads(struct hlsl_ctx *ctx, struct hlsl_ir_n return true; } @@ -4207,7 +6116,18 @@ index d11ff481f6b..a43ea53089e 100644 /* Lower combined samples and sampler variables to synthesized separated textures and samplers. * That is, translate SM1-style samples in the source to SM4-style samples in the bytecode. */ static bool lower_combined_samples(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) -@@ -2899,6 +3040,27 @@ static void insert_ensuring_decreasing_bind_count(struct list *list, struct hlsl +@@ -2808,6 +2962,10 @@ static bool lower_combined_samples(struct hlsl_ctx *ctx, struct hlsl_ir_node *in + case HLSL_RESOURCE_GATHER_GREEN: + case HLSL_RESOURCE_GATHER_BLUE: + case HLSL_RESOURCE_GATHER_ALPHA: ++ case HLSL_RESOURCE_GATHER_CMP_RED: ++ case HLSL_RESOURCE_GATHER_CMP_GREEN: ++ case HLSL_RESOURCE_GATHER_CMP_BLUE: ++ case HLSL_RESOURCE_GATHER_CMP_ALPHA: + case HLSL_RESOURCE_RESINFO: + case HLSL_RESOURCE_SAMPLE_CMP: + case HLSL_RESOURCE_SAMPLE_CMP_LZ: +@@ -2899,6 +3057,27 @@ static void insert_ensuring_decreasing_bind_count(struct list *list, struct hlsl list_add_tail(list, &to_add->extern_entry); } @@ -4235,7 +6155,278 @@ index d11ff481f6b..a43ea53089e 100644 static bool sort_synthetic_separated_samplers_first(struct hlsl_ctx *ctx) { struct list separated_resources; -@@ -4162,9 +4324,6 @@ static bool dce(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +@@ -3010,7 +3189,7 @@ static bool lower_dot(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct h + arg2 = expr->operands[1].node; + if (expr->op != HLSL_OP2_DOT) + return false; +- if (arg1->data_type->dimx != 2) ++ if (arg1->data_type->e.numeric.dimx != 2) + return false; + + if (ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL) +@@ -3034,11 +3213,13 @@ static bool lower_dot(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct h + return false; + hlsl_block_add_instr(block, mul); + +- if (!(add_x = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, X, X, X), instr->data_type->dimx, mul, &expr->node.loc))) ++ if (!(add_x = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, X, X, X), ++ instr->data_type->e.numeric.dimx, mul, &expr->node.loc))) + return false; + hlsl_block_add_instr(block, add_x); + +- if (!(add_y = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(Y, Y, Y, Y), instr->data_type->dimx, mul, &expr->node.loc))) ++ if (!(add_y = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(Y, Y, Y, Y), ++ instr->data_type->e.numeric.dimx, mul, &expr->node.loc))) + return false; + hlsl_block_add_instr(block, add_y); + +@@ -3202,7 +3383,7 @@ static bool lower_trig(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct + type = arg->data_type; + + /* Reduce the range of the input angles to [-pi, pi]. */ +- for (i = 0; i < type->dimx; ++i) ++ for (i = 0; i < type->e.numeric.dimx; ++i) + { + half_value.u[i].f = 0.5; + two_pi_value.u[i].f = 2.0 * M_PI; +@@ -3230,7 +3411,7 @@ static bool lower_trig(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct + return false; + hlsl_block_add_instr(block, reduced); + +- if (type->dimx == 1) ++ if (type->e.numeric.dimx == 1) + { + if (!(sincos = hlsl_new_unary_expr(ctx, op, reduced, &instr->loc))) + return false; +@@ -3243,7 +3424,7 @@ static bool lower_trig(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct + struct hlsl_deref var_deref; + struct hlsl_ir_load *var_load; + +- for (i = 0; i < type->dimx; ++i) ++ for (i = 0; i < type->e.numeric.dimx; ++i) + { + uint32_t s = hlsl_swizzle_from_writemask(1 << i); + +@@ -3256,7 +3437,7 @@ static bool lower_trig(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct + return false; + hlsl_init_simple_deref_from_var(&var_deref, var); + +- for (i = 0; i < type->dimx; ++i) ++ for (i = 0; i < type->e.numeric.dimx; ++i) + { + struct hlsl_block store_block; + +@@ -3292,7 +3473,7 @@ static bool lower_logic_not(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, st + return false; + + arg = expr->operands[0].node; +- float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, arg->data_type->dimx); ++ float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, arg->data_type->e.numeric.dimx); + + /* If this is happens, it means we failed to cast the argument to boolean somewhere. */ + VKD3D_ASSERT(arg->data_type->e.numeric.type == HLSL_TYPE_BOOL); +@@ -3354,7 +3535,7 @@ static bool lower_ternary(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, stru + VKD3D_ASSERT(cond->data_type->e.numeric.type == HLSL_TYPE_BOOL); + + type = hlsl_get_numeric_type(ctx, instr->data_type->class, HLSL_TYPE_FLOAT, +- instr->data_type->dimx, instr->data_type->dimy); ++ instr->data_type->e.numeric.dimx, instr->data_type->e.numeric.dimy); + + if (!(float_cond = hlsl_new_cast(ctx, cond, type, &instr->loc))) + return false; +@@ -3375,6 +3556,51 @@ static bool lower_ternary(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, stru + return true; + } + ++static bool lower_resource_load_bias(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) ++{ ++ struct hlsl_ir_node *swizzle, *store; ++ struct hlsl_ir_resource_load *load; ++ struct hlsl_ir_load *tmp_load; ++ struct hlsl_ir_var *tmp_var; ++ struct hlsl_deref deref; ++ ++ if (instr->type != HLSL_IR_RESOURCE_LOAD) ++ return false; ++ load = hlsl_ir_resource_load(instr); ++ if (load->load_type != HLSL_RESOURCE_SAMPLE_LOD ++ && load->load_type != HLSL_RESOURCE_SAMPLE_LOD_BIAS) ++ return false; ++ ++ if (!load->lod.node) ++ return false; ++ ++ if (!(tmp_var = hlsl_new_synthetic_var(ctx, "coords-with-lod", ++ hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, 4), &instr->loc))) ++ return false; ++ ++ if (!(swizzle = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, X, X, X), 4, load->lod.node, &load->lod.node->loc))) ++ return false; ++ list_add_before(&instr->entry, &swizzle->entry); ++ ++ if (!(store = hlsl_new_simple_store(ctx, tmp_var, swizzle))) ++ return false; ++ list_add_before(&instr->entry, &store->entry); ++ ++ hlsl_init_simple_deref_from_var(&deref, tmp_var); ++ if (!(store = hlsl_new_store_index(ctx, &deref, NULL, load->coords.node, 0, &instr->loc))) ++ return false; ++ list_add_before(&instr->entry, &store->entry); ++ ++ if (!(tmp_load = hlsl_new_var_load(ctx, tmp_var, &instr->loc))) ++ return false; ++ list_add_before(&instr->entry, &tmp_load->node.entry); ++ ++ hlsl_src_remove(&load->coords); ++ hlsl_src_from_node(&load->coords, &tmp_load->node); ++ hlsl_src_remove(&load->lod); ++ return true; ++} ++ + static bool lower_comparison_operators(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, + struct hlsl_block *block) + { +@@ -3393,7 +3619,7 @@ static bool lower_comparison_operators(struct hlsl_ctx *ctx, struct hlsl_ir_node + + arg1 = expr->operands[0].node; + arg2 = expr->operands[1].node; +- float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, instr->data_type->dimx); ++ float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, instr->data_type->e.numeric.dimx); + + if (!(arg1_cast = hlsl_new_cast(ctx, arg1, float_type, &instr->loc))) + return false; +@@ -3519,7 +3745,7 @@ static bool lower_slt(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct h + + arg1 = expr->operands[0].node; + arg2 = expr->operands[1].node; +- float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, instr->data_type->dimx); ++ float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, instr->data_type->e.numeric.dimx); + + if (!(arg1_cast = hlsl_new_cast(ctx, arg1, float_type, &instr->loc))) + return false; +@@ -3579,7 +3805,7 @@ static bool lower_cmp(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct h + if (expr->op != HLSL_OP3_CMP) + return false; + +- float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, instr->data_type->dimx); ++ float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, instr->data_type->e.numeric.dimx); + + for (i = 0; i < 3; ++i) + { +@@ -3649,7 +3875,7 @@ static bool lower_casts_to_bool(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr + return false; + + /* Narrowing casts should have already been lowered. */ +- VKD3D_ASSERT(type->dimx == arg_type->dimx); ++ VKD3D_ASSERT(type->e.numeric.dimx == arg_type->e.numeric.dimx); + + zero = hlsl_new_constant(ctx, arg_type, &zero_value, &instr->loc); + if (!zero) +@@ -3675,7 +3901,8 @@ struct hlsl_ir_node *hlsl_add_conditional(struct hlsl_ctx *ctx, struct hlsl_bloc + + if (cond_type->e.numeric.type != HLSL_TYPE_BOOL) + { +- cond_type = hlsl_get_numeric_type(ctx, cond_type->class, HLSL_TYPE_BOOL, cond_type->dimx, cond_type->dimy); ++ cond_type = hlsl_get_numeric_type(ctx, cond_type->class, HLSL_TYPE_BOOL, ++ cond_type->e.numeric.dimx, cond_type->e.numeric.dimy); + + if (!(condition = hlsl_new_cast(ctx, condition, cond_type, &condition->loc))) + return NULL; +@@ -3711,13 +3938,13 @@ static bool lower_int_division(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, + return false; + if (type->e.numeric.type != HLSL_TYPE_INT) + return false; +- utype = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_UINT, type->dimx, type->dimy); ++ utype = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_UINT, type->e.numeric.dimx, type->e.numeric.dimy); + + if (!(xor = hlsl_new_binary_expr(ctx, HLSL_OP2_BIT_XOR, arg1, arg2))) + return false; + hlsl_block_add_instr(block, xor); + +- for (i = 0; i < type->dimx; ++i) ++ for (i = 0; i < type->e.numeric.dimx; ++i) + high_bit_value.u[i].u = 0x80000000; + if (!(high_bit = hlsl_new_constant(ctx, type, &high_bit_value, &instr->loc))) + return false; +@@ -3777,9 +4004,9 @@ static bool lower_int_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, + return false; + if (type->e.numeric.type != HLSL_TYPE_INT) + return false; +- utype = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_UINT, type->dimx, type->dimy); ++ utype = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_UINT, type->e.numeric.dimx, type->e.numeric.dimy); + +- for (i = 0; i < type->dimx; ++i) ++ for (i = 0; i < type->e.numeric.dimx; ++i) + high_bit_value.u[i].u = 0x80000000; + if (!(high_bit = hlsl_new_constant(ctx, type, &high_bit_value, &instr->loc))) + return false; +@@ -3870,8 +4097,8 @@ static bool lower_int_dot(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, stru + { + arg1 = expr->operands[0].node; + arg2 = expr->operands[1].node; +- VKD3D_ASSERT(arg1->data_type->dimx == arg2->data_type->dimx); +- dimx = arg1->data_type->dimx; ++ VKD3D_ASSERT(arg1->data_type->e.numeric.dimx == arg2->data_type->e.numeric.dimx); ++ dimx = arg1->data_type->e.numeric.dimx; + is_bool = type->e.numeric.type == HLSL_TYPE_BOOL; + + if (!(mult = hlsl_new_binary_expr(ctx, is_bool ? HLSL_OP2_LOGIC_AND : HLSL_OP2_MUL, arg1, arg2))) +@@ -3920,7 +4147,7 @@ static bool lower_float_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr + return false; + if (type->e.numeric.type != HLSL_TYPE_FLOAT) + return false; +- btype = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_BOOL, type->dimx, type->dimy); ++ btype = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_BOOL, type->e.numeric.dimx, type->e.numeric.dimy); + + if (!(mul1 = hlsl_new_binary_expr(ctx, HLSL_OP2_MUL, arg2, arg1))) + return false; +@@ -3942,7 +4169,7 @@ static bool lower_float_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr + if (!(cond = hlsl_add_conditional(ctx, block, ge, arg2, neg2))) + return false; + +- for (i = 0; i < type->dimx; ++i) ++ for (i = 0; i < type->e.numeric.dimx; ++i) + one_value.u[i].f = 1.0f; + if (!(one = hlsl_new_constant(ctx, type, &one_value, &instr->loc))) + return false; +@@ -4000,7 +4227,7 @@ static bool lower_nonfloat_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *inst + if (!arg) + continue; + +- float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, arg->data_type->dimx); ++ float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, arg->data_type->e.numeric.dimx); + if (!(arg_cast = hlsl_new_cast(ctx, arg, float_type, &instr->loc))) + return false; + hlsl_block_add_instr(block, arg_cast); +@@ -4008,7 +4235,7 @@ static bool lower_nonfloat_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *inst + operands[i] = arg_cast; + } + +- float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, instr->data_type->dimx); ++ float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, instr->data_type->e.numeric.dimx); + if (!(float_expr = hlsl_new_expr(ctx, expr->op, operands, float_type, &instr->loc))) + return false; + hlsl_block_add_instr(block, float_expr); +@@ -4049,7 +4276,8 @@ static bool lower_discard_neg(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, + + operands[0] = jump->condition.node; + operands[1] = zero; +- cmp_type = hlsl_get_numeric_type(ctx, arg_type->class, HLSL_TYPE_BOOL, arg_type->dimx, arg_type->dimy); ++ cmp_type = hlsl_get_numeric_type(ctx, arg_type->class, HLSL_TYPE_BOOL, ++ arg_type->e.numeric.dimx, arg_type->e.numeric.dimy); + if (!(cmp = hlsl_new_expr(ctx, HLSL_OP2_LESS, operands, cmp_type, &instr->loc))) + return false; + hlsl_block_add_instr(&block, cmp); +@@ -4093,7 +4321,7 @@ static bool lower_discard_nz(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, v + return false; + + cond = jump->condition.node; +- float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, cond->data_type->dimx); ++ float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, cond->data_type->e.numeric.dimx); + + hlsl_block_init(&block); + +@@ -4162,9 +4390,6 @@ static bool dce(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) case HLSL_IR_STATEBLOCK_CONSTANT: /* Stateblock constants should not appear in the shader program. */ vkd3d_unreachable(); @@ -4245,7 +6436,7 @@ index d11ff481f6b..a43ea53089e 100644 } return false; -@@ -4304,9 +4463,6 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop +@@ -4304,9 +4529,6 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop case HLSL_IR_STATEBLOCK_CONSTANT: /* Stateblock constants should not appear in the shader program. */ vkd3d_unreachable(); @@ -4255,7 +6446,7 @@ index d11ff481f6b..a43ea53089e 100644 case HLSL_IR_STORE: { -@@ -4494,6 +4650,9 @@ struct register_allocator +@@ -4494,6 +4716,9 @@ struct register_allocator /* Two allocations with different mode can't share the same register. */ int mode; @@ -4265,7 +6456,7 @@ index d11ff481f6b..a43ea53089e 100644 } *allocations; size_t count, capacity; -@@ -4513,7 +4672,7 @@ struct register_allocator +@@ -4513,7 +4738,7 @@ struct register_allocator }; static unsigned int get_available_writemask(const struct register_allocator *allocator, @@ -4274,7 +6465,7 @@ index d11ff481f6b..a43ea53089e 100644 { unsigned int writemask = VKD3DSP_WRITEMASK_ALL; size_t i; -@@ -4532,6 +4691,8 @@ static unsigned int get_available_writemask(const struct register_allocator *all +@@ -4532,6 +4757,8 @@ static unsigned int get_available_writemask(const struct register_allocator *all writemask &= ~allocation->writemask; if (allocation->mode != mode) writemask = 0; @@ -4283,7 +6474,7 @@ index d11ff481f6b..a43ea53089e 100644 } if (!writemask) -@@ -4542,7 +4703,7 @@ static unsigned int get_available_writemask(const struct register_allocator *all +@@ -4542,7 +4769,7 @@ static unsigned int get_available_writemask(const struct register_allocator *all } static void record_allocation(struct hlsl_ctx *ctx, struct register_allocator *allocator, uint32_t reg_idx, @@ -4292,7 +6483,7 @@ index d11ff481f6b..a43ea53089e 100644 { struct allocation *allocation; -@@ -4556,16 +4717,25 @@ static void record_allocation(struct hlsl_ctx *ctx, struct register_allocator *a +@@ -4556,16 +4783,25 @@ static void record_allocation(struct hlsl_ctx *ctx, struct register_allocator *a allocation->first_write = first_write; allocation->last_read = last_read; allocation->mode = mode; @@ -4322,7 +6513,7 @@ index d11ff481f6b..a43ea53089e 100644 { struct hlsl_reg ret = {.allocation_size = 1, .allocated = true}; unsigned int required_size = force_align ? 4 : reg_size; -@@ -4579,7 +4749,7 @@ static struct hlsl_reg allocate_register(struct hlsl_ctx *ctx, struct register_a +@@ -4579,7 +4815,7 @@ static struct hlsl_reg allocate_register(struct hlsl_ctx *ctx, struct register_a for (uint32_t reg_idx = 0; reg_idx < allocator->reg_count; ++reg_idx) { unsigned int available_writemask = get_available_writemask(allocator, @@ -4331,7 +6522,7 @@ index d11ff481f6b..a43ea53089e 100644 if (vkd3d_popcount(available_writemask) >= pref) { -@@ -4589,7 +4759,8 @@ static struct hlsl_reg allocate_register(struct hlsl_ctx *ctx, struct register_a +@@ -4589,7 +4825,8 @@ static struct hlsl_reg allocate_register(struct hlsl_ctx *ctx, struct register_a ret.id = reg_idx; ret.writemask = hlsl_combine_writemasks(writemask, vkd3d_write_mask_from_component_count(component_count)); @@ -4341,7 +6532,7 @@ index d11ff481f6b..a43ea53089e 100644 return ret; } } -@@ -4598,13 +4769,14 @@ static struct hlsl_reg allocate_register(struct hlsl_ctx *ctx, struct register_a +@@ -4598,13 +4835,14 @@ static struct hlsl_reg allocate_register(struct hlsl_ctx *ctx, struct register_a ret.id = allocator->reg_count; ret.writemask = vkd3d_write_mask_from_component_count(component_count); record_allocation(ctx, allocator, allocator->reg_count, @@ -4359,7 +6550,7 @@ index d11ff481f6b..a43ea53089e 100644 { struct hlsl_reg ret = {0}; uint32_t reg_idx; -@@ -4614,11 +4786,11 @@ static struct hlsl_reg allocate_register_with_masks(struct hlsl_ctx *ctx, struct +@@ -4614,11 +4852,11 @@ static struct hlsl_reg allocate_register_with_masks(struct hlsl_ctx *ctx, struct for (reg_idx = 0;; ++reg_idx) { if ((get_available_writemask(allocator, first_write, last_read, @@ -4373,7 +6564,7 @@ index d11ff481f6b..a43ea53089e 100644 ret.id = reg_idx; ret.allocation_size = 1; -@@ -4628,7 +4800,7 @@ static struct hlsl_reg allocate_register_with_masks(struct hlsl_ctx *ctx, struct +@@ -4628,7 +4866,7 @@ static struct hlsl_reg allocate_register_with_masks(struct hlsl_ctx *ctx, struct } static bool is_range_available(const struct register_allocator *allocator, unsigned int first_write, @@ -4382,7 +6573,7 @@ index d11ff481f6b..a43ea53089e 100644 { unsigned int last_reg_mask = (1u << (reg_size % 4)) - 1; unsigned int writemask; -@@ -4636,18 +4808,18 @@ static bool is_range_available(const struct register_allocator *allocator, unsig +@@ -4636,18 +4874,18 @@ static bool is_range_available(const struct register_allocator *allocator, unsig for (i = 0; i < (reg_size / 4); ++i) { @@ -4404,7 +6595,7 @@ index d11ff481f6b..a43ea53089e 100644 { struct hlsl_reg ret = {0}; uint32_t reg_idx; -@@ -4655,15 +4827,15 @@ static struct hlsl_reg allocate_range(struct hlsl_ctx *ctx, struct register_allo +@@ -4655,15 +4893,15 @@ static struct hlsl_reg allocate_range(struct hlsl_ctx *ctx, struct register_allo for (reg_idx = 0;; ++reg_idx) { @@ -4423,19 +6614,20 @@ index d11ff481f6b..a43ea53089e 100644 ret.id = reg_idx; ret.allocation_size = align(reg_size, 4) / 4; -@@ -4679,9 +4851,9 @@ static struct hlsl_reg allocate_numeric_registers_for_type(struct hlsl_ctx *ctx, +@@ -4679,9 +4917,10 @@ static struct hlsl_reg allocate_numeric_registers_for_type(struct hlsl_ctx *ctx, /* FIXME: We could potentially pack structs or arrays more efficiently... */ if (type->class <= HLSL_CLASS_VECTOR) - return allocate_register(ctx, allocator, first_write, last_read, type->dimx, type->dimx, 0, false); -+ return allocate_register(ctx, allocator, first_write, last_read, type->dimx, type->dimx, 0, false, false); ++ return allocate_register(ctx, allocator, first_write, last_read, ++ type->e.numeric.dimx, type->e.numeric.dimx, 0, false, false); else - return allocate_range(ctx, allocator, first_write, last_read, reg_size, 0); + return allocate_range(ctx, allocator, first_write, last_read, reg_size, 0, false); } static const char *debug_register(char class, struct hlsl_reg reg, const struct hlsl_type *type) -@@ -4859,8 +5031,8 @@ static void allocate_instr_temp_register(struct hlsl_ctx *ctx, +@@ -4859,8 +5098,8 @@ static void allocate_instr_temp_register(struct hlsl_ctx *ctx, } if (reg_writemask) @@ -4446,7 +6638,33 @@ index d11ff481f6b..a43ea53089e 100644 else instr->reg = allocate_numeric_registers_for_type(ctx, allocator, instr->index, instr->last_read, instr->data_type); -@@ -5084,7 +5256,7 @@ static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, +@@ -5006,13 +5245,13 @@ static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, + TRACE("Allocated constant @%u to %s.\n", instr->index, debug_register('c', constant->reg, type)); + + VKD3D_ASSERT(hlsl_is_numeric_type(type)); +- VKD3D_ASSERT(type->dimy == 1); ++ VKD3D_ASSERT(type->e.numeric.dimy == 1); + VKD3D_ASSERT(constant->reg.writemask); + + for (x = 0, i = 0; x < 4; ++x) + { + const union hlsl_constant_value_component *value; +- float f; ++ float f = 0; + + if (!(constant->reg.writemask & (1u << x))) + continue; +@@ -5040,9 +5279,6 @@ static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, + case HLSL_TYPE_DOUBLE: + FIXME("Double constant.\n"); + return; +- +- default: +- vkd3d_unreachable(); + } + + record_constant(ctx, constant->reg.id * 4 + x, f, &constant->node.loc); +@@ -5084,7 +5320,7 @@ static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, } } @@ -4455,7 +6673,7 @@ index d11ff481f6b..a43ea53089e 100644 { struct hlsl_ir_var *var; -@@ -5092,8 +5264,8 @@ static void sort_uniform_by_numeric_bind_count(struct list *sorted, struct hlsl_ +@@ -5092,8 +5328,8 @@ static void sort_uniform_by_numeric_bind_count(struct list *sorted, struct hlsl_ LIST_FOR_EACH_ENTRY(var, sorted, struct hlsl_ir_var, extern_entry) { @@ -4466,7 +6684,7 @@ index d11ff481f6b..a43ea53089e 100644 if (to_sort_size > var_size) { -@@ -5105,7 +5277,7 @@ static void sort_uniform_by_numeric_bind_count(struct list *sorted, struct hlsl_ +@@ -5105,7 +5341,7 @@ static void sort_uniform_by_numeric_bind_count(struct list *sorted, struct hlsl_ list_add_tail(sorted, &to_sort->extern_entry); } @@ -4475,7 +6693,7 @@ index d11ff481f6b..a43ea53089e 100644 { struct list sorted = LIST_INIT(sorted); struct hlsl_ir_var *var, *next; -@@ -5113,7 +5285,7 @@ static void sort_uniforms_by_numeric_bind_count(struct hlsl_ctx *ctx) +@@ -5113,7 +5349,7 @@ static void sort_uniforms_by_numeric_bind_count(struct hlsl_ctx *ctx) LIST_FOR_EACH_ENTRY_SAFE(var, next, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) { if (var->is_uniform) @@ -4484,7 +6702,7 @@ index d11ff481f6b..a43ea53089e 100644 } list_move_tail(&ctx->extern_vars, &sorted); } -@@ -5161,7 +5333,7 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi +@@ -5161,7 +5397,7 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi struct register_allocator allocator = {0}; struct hlsl_ir_var *var; @@ -4493,7 +6711,7 @@ index d11ff481f6b..a43ea53089e 100644 LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) { -@@ -5181,14 +5353,15 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi +@@ -5181,14 +5417,15 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi { if (i < bind_count) { @@ -4512,7 +6730,7 @@ index d11ff481f6b..a43ea53089e 100644 } var->regs[HLSL_REGSET_NUMERIC].id = reg_idx; -@@ -5211,7 +5384,7 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi +@@ -5211,7 +5448,7 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi if (!var->regs[HLSL_REGSET_NUMERIC].allocated) { @@ -4521,7 +6739,7 @@ index d11ff481f6b..a43ea53089e 100644 TRACE("Allocated %s to %s.\n", var->name, debug_register('c', var->regs[HLSL_REGSET_NUMERIC], var->data_type)); } -@@ -5254,7 +5427,8 @@ static uint32_t allocate_temp_registers(struct hlsl_ctx *ctx, struct hlsl_ir_fun +@@ -5254,7 +5491,8 @@ static uint32_t allocate_temp_registers(struct hlsl_ctx *ctx, struct hlsl_ir_fun var = entry_func->parameters.vars[i]; if (var->is_output_semantic) { @@ -4531,7 +6749,17 @@ index d11ff481f6b..a43ea53089e 100644 break; } } -@@ -5311,6 +5485,8 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var +@@ -5266,7 +5504,8 @@ static uint32_t allocate_temp_registers(struct hlsl_ctx *ctx, struct hlsl_ir_fun + return allocator.reg_count; + } + +-enum vkd3d_shader_interpolation_mode sm4_get_interpolation_mode(struct hlsl_type *type, unsigned int storage_modifiers) ++static enum vkd3d_shader_interpolation_mode sm4_get_interpolation_mode(struct hlsl_type *type, ++ unsigned int storage_modifiers) + { + unsigned int i; + +@@ -5311,6 +5550,8 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var enum vkd3d_shader_register_type type; struct vkd3d_shader_version version; @@ -4540,7 +6768,7 @@ index d11ff481f6b..a43ea53089e 100644 uint32_t reg; bool builtin; -@@ -5363,6 +5539,14 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var +@@ -5363,6 +5604,14 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var * domains, it is allocated as if it was 'float[1]'. */ var->force_align = true; } @@ -4555,21 +6783,45 @@ index d11ff481f6b..a43ea53089e 100644 } if (builtin) -@@ -5376,8 +5560,11 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var +@@ -5374,10 +5623,13 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var + { + int mode = (ctx->profile->major_version < 4) ? 0 : sm4_get_interpolation_mode(var->data_type, var->storage_modifiers); - unsigned int reg_size = optimize ? var->data_type->dimx : 4; +- unsigned int reg_size = optimize ? var->data_type->dimx : 4; ++ unsigned int reg_size = optimize ? var->data_type->e.numeric.dimx : 4; ++ ++ if (special_interpolation) ++ mode = VKD3DSIM_NONE; - var->regs[HLSL_REGSET_NUMERIC] = allocate_register(ctx, allocator, 1, - UINT_MAX, reg_size, var->data_type->dimx, mode, var->force_align); -+ if (special_interpolation) -+ mode = VKD3DSIM_NONE; -+ + var->regs[HLSL_REGSET_NUMERIC] = allocate_register(ctx, allocator, 1, UINT_MAX, -+ reg_size, var->data_type->dimx, mode, var->force_align, vip_allocation); ++ reg_size, var->data_type->e.numeric.dimx, mode, var->force_align, vip_allocation); TRACE("Allocated %s to %s (mode %d).\n", var->name, debug_register(output ? 'o' : 'v', var->regs[HLSL_REGSET_NUMERIC], var->data_type), mode); -@@ -6419,6 +6606,7 @@ void hlsl_run_const_passes(struct hlsl_ctx *ctx, struct hlsl_block *body) +@@ -5831,7 +6083,7 @@ bool hlsl_component_index_range_from_deref(struct hlsl_ctx *ctx, const struct hl + switch (type->class) + { + case HLSL_CLASS_VECTOR: +- if (idx >= type->dimx) ++ if (idx >= type->e.numeric.dimx) + return false; + *start += idx; + break; +@@ -5840,9 +6092,9 @@ bool hlsl_component_index_range_from_deref(struct hlsl_ctx *ctx, const struct hl + if (idx >= hlsl_type_major_size(type)) + return false; + if (hlsl_type_is_row_major(type)) +- *start += idx * type->dimx; ++ *start += idx * type->e.numeric.dimx; + else +- *start += idx * type->dimy; ++ *start += idx * type->e.numeric.dimy; + break; + + case HLSL_CLASS_ARRAY: +@@ -6419,6 +6671,7 @@ void hlsl_run_const_passes(struct hlsl_ctx *ctx, struct hlsl_block *body) { progress = hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, body, NULL); progress |= hlsl_transform_ir(ctx, hlsl_fold_constant_identities, body, NULL); @@ -4577,7 +6829,63 @@ index d11ff481f6b..a43ea53089e 100644 progress |= hlsl_transform_ir(ctx, hlsl_fold_constant_swizzles, body, NULL); progress |= hlsl_copy_propagation_execute(ctx, body); progress |= hlsl_transform_ir(ctx, fold_swizzle_chains, body, NULL); -@@ -6636,7 +6824,6 @@ static uint32_t generate_vsir_get_src_swizzle(uint32_t src_writemask, uint32_t d +@@ -6430,8 +6683,8 @@ void hlsl_run_const_passes(struct hlsl_ctx *ctx, struct hlsl_block *body) + static void generate_vsir_signature_entry(struct hlsl_ctx *ctx, struct vsir_program *program, + struct shader_signature *signature, bool output, bool is_patch_constant_func, struct hlsl_ir_var *var) + { ++ enum vkd3d_shader_component_type component_type = VKD3D_SHADER_COMPONENT_VOID; + enum vkd3d_shader_sysval_semantic sysval = VKD3D_SHADER_SV_NONE; +- enum vkd3d_shader_component_type component_type; + unsigned int register_index, mask, use_mask; + const char *name = var->semantic.name; + enum vkd3d_shader_register_type type; +@@ -6451,7 +6704,7 @@ static void generate_vsir_signature_entry(struct hlsl_ctx *ctx, struct vsir_prog + if (sm4_register_from_semantic_name(&program->shader_version, var->semantic.name, output, &type, &has_idx)) + { + register_index = has_idx ? var->semantic.index : ~0u; +- mask = (1u << var->data_type->dimx) - 1; ++ mask = (1u << var->data_type->e.numeric.dimx) - 1; + } + else + { +@@ -6478,12 +6731,11 @@ static void generate_vsir_signature_entry(struct hlsl_ctx *ctx, struct vsir_prog + component_type = VKD3D_SHADER_COMPONENT_UINT; + break; + +- default: ++ case HLSL_TYPE_DOUBLE: + if ((string = hlsl_type_to_string(ctx, var->data_type))) + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Invalid data type %s for semantic variable %s.", string->buffer, var->name); + hlsl_release_string_buffer(ctx, string); +- component_type = VKD3D_SHADER_COMPONENT_VOID; + break; + } + +@@ -6519,19 +6771,19 @@ static void generate_vsir_signature_entry(struct hlsl_ctx *ctx, struct vsir_prog + sysval = VKD3D_SHADER_SV_POSITION; + } + +- mask = (1 << var->data_type->dimx) - 1; ++ mask = (1 << var->data_type->e.numeric.dimx) - 1; + + if (!ascii_strcasecmp(var->semantic.name, "PSIZE") && output + && program->shader_version.type == VKD3D_SHADER_TYPE_VERTEX) + { +- if (var->data_type->dimx > 1) ++ if (var->data_type->e.numeric.dimx > 1) + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC, + "PSIZE output must have only 1 component in this shader model."); + /* For some reason the writemask has all components set. */ + mask = VKD3DSP_WRITEMASK_ALL; + } + if (!ascii_strcasecmp(var->semantic.name, "FOG") && output && program->shader_version.major < 3 +- && program->shader_version.type == VKD3D_SHADER_TYPE_VERTEX && var->data_type->dimx > 1) ++ && program->shader_version.type == VKD3D_SHADER_TYPE_VERTEX && var->data_type->e.numeric.dimx > 1) + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC, + "FOG output must have only 1 component in this shader model."); + +@@ -6636,7 +6888,6 @@ static uint32_t generate_vsir_get_src_swizzle(uint32_t src_writemask, uint32_t d swizzle = hlsl_swizzle_from_writemask(src_writemask); swizzle = hlsl_map_swizzle(swizzle, dst_writemask); @@ -4585,7 +6893,7 @@ index d11ff481f6b..a43ea53089e 100644 return swizzle; } -@@ -6812,7 +6999,7 @@ static void vsir_src_from_hlsl_constant_value(struct vkd3d_shader_src_param *src +@@ -6812,7 +7063,7 @@ static void vsir_src_from_hlsl_constant_value(struct vkd3d_shader_src_param *src } static void vsir_src_from_hlsl_node(struct vkd3d_shader_src_param *src, @@ -4594,22 +6902,61 @@ index d11ff481f6b..a43ea53089e 100644 { struct hlsl_ir_constant *constant; -@@ -6832,6 +7019,242 @@ static void vsir_src_from_hlsl_node(struct vkd3d_shader_src_param *src, +@@ -6821,7 +7072,7 @@ static void vsir_src_from_hlsl_node(struct vkd3d_shader_src_param *src, + /* In SM4 constants are inlined */ + constant = hlsl_ir_constant(instr); + vsir_src_from_hlsl_constant_value(src, ctx, &constant->value, +- vsir_data_type_from_hlsl_instruction(ctx, instr), instr->data_type->dimx, map_writemask); ++ vsir_data_type_from_hlsl_instruction(ctx, instr), instr->data_type->e.numeric.dimx, map_writemask); + } + else + { +@@ -6832,89 +7083,325 @@ static void vsir_src_from_hlsl_node(struct vkd3d_shader_src_param *src, } } +-static void vsir_dst_from_hlsl_node(struct vkd3d_shader_dst_param *dst, +- struct hlsl_ctx *ctx, const struct hlsl_ir_node *instr) +-{ +- VKD3D_ASSERT(instr->reg.allocated); +- vsir_dst_param_init(dst, VKD3DSPR_TEMP, vsir_data_type_from_hlsl_instruction(ctx, instr), 1); +- dst->reg.idx[0].offset = instr->reg.id; +- dst->reg.dimension = VSIR_DIMENSION_VEC4; +- dst->write_mask = instr->reg.writemask; +-} +- +-static void sm1_generate_vsir_instr_constant(struct hlsl_ctx *ctx, +- struct vsir_program *program, struct hlsl_ir_constant *constant) +static bool sm4_generate_vsir_numeric_reg_from_deref(struct hlsl_ctx *ctx, struct vsir_program *program, + struct vkd3d_shader_register *reg, uint32_t *writemask, const struct hlsl_deref *deref) -+{ + { +- struct hlsl_ir_node *instr = &constant->node; +- struct vkd3d_shader_dst_param *dst_param; +- struct vkd3d_shader_src_param *src_param; +- struct vkd3d_shader_instruction *ins; +- +- VKD3D_ASSERT(instr->reg.allocated); +- VKD3D_ASSERT(constant->reg.allocated); + const struct hlsl_ir_var *var = deref->var; + unsigned int offset_const_deref; -+ + +- if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_MOV, 1, 1))) +- return; + reg->type = var->indexable ? VKD3DSPR_IDXTEMP : VKD3DSPR_TEMP; + reg->idx[0].offset = var->regs[HLSL_REGSET_NUMERIC].id; + reg->dimension = VSIR_DIMENSION_VEC4; -+ + +- src_param = &ins->src[0]; +- vsir_register_init(&src_param->reg, VKD3DSPR_CONST, VKD3D_DATA_FLOAT, 1); +- src_param->reg.idx[0].offset = constant->reg.id; +- src_param->swizzle = generate_vsir_get_src_swizzle(constant->reg.writemask, instr->reg.writemask); + VKD3D_ASSERT(var->regs[HLSL_REGSET_NUMERIC].allocated); -+ + +- dst_param = &ins->dst[0]; +- vsir_register_init(&dst_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); +- dst_param->reg.idx[0].offset = instr->reg.id; +- dst_param->write_mask = instr->reg.writemask; +-} + if (!var->indexable) + { + offset_const_deref = hlsl_offset_from_deref_safe(ctx, deref); @@ -4621,11 +6968,20 @@ index d11ff481f6b..a43ea53089e 100644 + offset_const_deref = deref->const_offset; + reg->idx[1].offset = offset_const_deref / 4; + reg->idx_count = 2; -+ + +-static void sm4_generate_vsir_rasterizer_sample_count(struct hlsl_ctx *ctx, +- struct vsir_program *program, struct hlsl_ir_expr *expr) +-{ +- struct vkd3d_shader_src_param *src_param; +- struct hlsl_ir_node *instr = &expr->node; +- struct vkd3d_shader_instruction *ins; + if (deref->rel_offset.node) + { + struct vkd3d_shader_src_param *idx_src; -+ + +- if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_SAMPLE_INFO, 1, 1))) +- return; +- ins->flags = VKD3DSI_SAMPLE_INFO_UINT; + if (!(idx_src = vsir_program_get_src_params(program, 1))) + { + ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; @@ -4633,28 +6989,55 @@ index d11ff481f6b..a43ea53089e 100644 + } + memset(idx_src, 0, sizeof(*idx_src)); + reg->idx[1].rel_addr = idx_src; -+ + +- vsir_dst_from_hlsl_node(&ins->dst[0], ctx, instr); + vsir_src_from_hlsl_node(idx_src, ctx, deref->rel_offset.node, VKD3DSP_WRITEMASK_ALL); + } + } -+ + +- src_param = &ins->src[0]; +- vsir_src_param_init(src_param, VKD3DSPR_RASTERIZER, VKD3D_DATA_UNUSED, 0); +- src_param->reg.dimension = VSIR_DIMENSION_VEC4; +- src_param->swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X); + *writemask = 0xf & (0xf << (offset_const_deref % 4)); + if (var->regs[HLSL_REGSET_NUMERIC].writemask) + *writemask = hlsl_combine_writemasks(var->regs[HLSL_REGSET_NUMERIC].writemask, *writemask); + return true; -+} -+ + } + +-/* Translate ops that can be mapped to a single vsir instruction with only one dst register. */ +-static void generate_vsir_instr_expr_single_instr_op(struct hlsl_ctx *ctx, +- struct vsir_program *program, struct hlsl_ir_expr *expr, enum vkd3d_shader_opcode opcode, +- uint32_t src_mod, uint32_t dst_mod, bool map_src_swizzles) +static bool sm4_generate_vsir_reg_from_deref(struct hlsl_ctx *ctx, struct vsir_program *program, + struct vkd3d_shader_register *reg, uint32_t *writemask, const struct hlsl_deref *deref) -+{ + { +- struct hlsl_ir_node *instr = &expr->node; +- struct vkd3d_shader_dst_param *dst_param; +- struct vkd3d_shader_src_param *src_param; +- struct vkd3d_shader_instruction *ins; +- unsigned int i, src_count = 0; +- +- VKD3D_ASSERT(instr->reg.allocated); + const struct vkd3d_shader_version *version = &program->shader_version; + const struct hlsl_type *data_type = hlsl_deref_get_type(ctx, deref); + const struct hlsl_ir_var *var = deref->var; -+ + +- for (i = 0; i < HLSL_MAX_OPERANDS; ++i) + if (var->is_uniform) -+ { + { +- if (expr->operands[i].node) +- src_count = i + 1; +- } +- VKD3D_ASSERT(!src_mod || src_count == 1); +- +- if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, opcode, 1, src_count))) +- return; + enum hlsl_regset regset = hlsl_deref_get_regset(ctx, deref); -+ + +- dst_param = &ins->dst[0]; +- vsir_dst_from_hlsl_node(dst_param, ctx, instr); +- dst_param->modifiers = dst_mod; + if (regset == HLSL_REGSET_TEXTURES) + { + reg->type = VKD3DSPR_RESOURCE; @@ -4715,7 +7098,9 @@ index d11ff481f6b..a43ea53089e 100644 + else + { + unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref) + var->buffer_offset; -+ + +- for (i = 0; i < src_count; ++i) +- { + VKD3D_ASSERT(data_type->class <= HLSL_CLASS_VECTOR); + reg->type = VKD3DSPR_CONSTBUFFER; + reg->dimension = VSIR_DIMENSION_VEC4; @@ -4732,7 +7117,7 @@ index d11ff481f6b..a43ea53089e 100644 + reg->idx[1].offset = offset / 4; + reg->idx_count = 2; + } -+ *writemask = ((1u << data_type->dimx) - 1) << (offset & 3); ++ *writemask = ((1u << data_type->e.numeric.dimx) - 1) << (offset & 3); + } + } + else if (var->is_input_semantic) @@ -4753,7 +7138,7 @@ index d11ff481f6b..a43ea53089e 100644 + reg->dimension = VSIR_DIMENSION_SCALAR; + else + reg->dimension = VSIR_DIMENSION_VEC4; -+ *writemask = ((1u << data_type->dimx) - 1) << (offset % 4); ++ *writemask = ((1u << data_type->e.numeric.dimx) - 1) << (offset % 4); + } + else + { @@ -4789,7 +7174,7 @@ index d11ff481f6b..a43ea53089e 100644 + reg->dimension = VSIR_DIMENSION_SCALAR; + else + reg->dimension = VSIR_DIMENSION_VEC4; -+ *writemask = ((1u << data_type->dimx) - 1) << (offset % 4); ++ *writemask = ((1u << data_type->e.numeric.dimx) - 1) << (offset % 4); + } + else + { @@ -4834,10 +7219,112 @@ index d11ff481f6b..a43ea53089e 100644 + return true; +} + - static void vsir_dst_from_hlsl_node(struct vkd3d_shader_dst_param *dst, - struct hlsl_ctx *ctx, const struct hlsl_ir_node *instr) - { -@@ -7059,13 +7482,10 @@ static bool sm1_generate_vsir_instr_expr_cast(struct hlsl_ctx *ctx, ++static void vsir_dst_from_hlsl_node(struct vkd3d_shader_dst_param *dst, ++ struct hlsl_ctx *ctx, const struct hlsl_ir_node *instr) ++{ ++ VKD3D_ASSERT(instr->reg.allocated); ++ vsir_dst_param_init(dst, VKD3DSPR_TEMP, vsir_data_type_from_hlsl_instruction(ctx, instr), 1); ++ dst->reg.idx[0].offset = instr->reg.id; ++ dst->reg.dimension = VSIR_DIMENSION_VEC4; ++ dst->write_mask = instr->reg.writemask; ++} ++ ++static void sm1_generate_vsir_instr_constant(struct hlsl_ctx *ctx, ++ struct vsir_program *program, struct hlsl_ir_constant *constant) ++{ ++ struct hlsl_ir_node *instr = &constant->node; ++ struct vkd3d_shader_dst_param *dst_param; ++ struct vkd3d_shader_src_param *src_param; ++ struct vkd3d_shader_instruction *ins; ++ ++ VKD3D_ASSERT(instr->reg.allocated); ++ VKD3D_ASSERT(constant->reg.allocated); ++ ++ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_MOV, 1, 1))) ++ return; ++ ++ src_param = &ins->src[0]; ++ vsir_register_init(&src_param->reg, VKD3DSPR_CONST, VKD3D_DATA_FLOAT, 1); ++ src_param->reg.idx[0].offset = constant->reg.id; ++ src_param->swizzle = generate_vsir_get_src_swizzle(constant->reg.writemask, instr->reg.writemask); ++ ++ dst_param = &ins->dst[0]; ++ vsir_register_init(&dst_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); ++ dst_param->reg.idx[0].offset = instr->reg.id; ++ dst_param->write_mask = instr->reg.writemask; ++} ++ ++static void sm4_generate_vsir_rasterizer_sample_count(struct hlsl_ctx *ctx, ++ struct vsir_program *program, struct hlsl_ir_expr *expr) ++{ ++ struct vkd3d_shader_src_param *src_param; ++ struct hlsl_ir_node *instr = &expr->node; ++ struct vkd3d_shader_instruction *ins; ++ ++ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_SAMPLE_INFO, 1, 1))) ++ return; ++ ins->flags = VKD3DSI_SAMPLE_INFO_UINT; ++ ++ vsir_dst_from_hlsl_node(&ins->dst[0], ctx, instr); ++ ++ src_param = &ins->src[0]; ++ vsir_src_param_init(src_param, VKD3DSPR_RASTERIZER, VKD3D_DATA_UNUSED, 0); ++ src_param->reg.dimension = VSIR_DIMENSION_VEC4; ++ src_param->swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X); ++} ++ ++/* Translate ops that can be mapped to a single vsir instruction with only one dst register. */ ++static void generate_vsir_instr_expr_single_instr_op(struct hlsl_ctx *ctx, ++ struct vsir_program *program, struct hlsl_ir_expr *expr, enum vkd3d_shader_opcode opcode, ++ uint32_t src_mod, uint32_t dst_mod, bool map_src_swizzles) ++{ ++ struct hlsl_ir_node *instr = &expr->node; ++ struct vkd3d_shader_dst_param *dst_param; ++ struct vkd3d_shader_src_param *src_param; ++ struct vkd3d_shader_instruction *ins; ++ unsigned int i, src_count = 0; ++ ++ VKD3D_ASSERT(instr->reg.allocated); ++ ++ for (i = 0; i < HLSL_MAX_OPERANDS; ++i) ++ { ++ if (expr->operands[i].node) ++ src_count = i + 1; ++ } ++ VKD3D_ASSERT(!src_mod || src_count == 1); ++ ++ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, opcode, 1, src_count))) ++ return; ++ ++ dst_param = &ins->dst[0]; ++ vsir_dst_from_hlsl_node(dst_param, ctx, instr); ++ dst_param->modifiers = dst_mod; ++ ++ for (i = 0; i < src_count; ++i) ++ { + struct hlsl_ir_node *operand = expr->operands[i].node; + + src_param = &ins->src[i]; +@@ -7014,7 +7501,7 @@ static bool sm1_generate_vsir_instr_expr_cast(struct hlsl_ctx *ctx, + dst_type = instr->data_type; + + /* Narrowing casts were already lowered. */ +- VKD3D_ASSERT(src_type->dimx == dst_type->dimx); ++ VKD3D_ASSERT(src_type->e.numeric.dimx == dst_type->e.numeric.dimx); + + switch (dst_type->e.numeric.type) + { +@@ -7040,9 +7527,6 @@ static bool sm1_generate_vsir_instr_expr_cast(struct hlsl_ctx *ctx, + hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "The 'double' type is not supported for the %s profile.", ctx->profile->name); + break; +- +- default: +- vkd3d_unreachable(); + } + break; + +@@ -7059,19 +7543,13 @@ static bool sm1_generate_vsir_instr_expr_cast(struct hlsl_ctx *ctx, case HLSL_TYPE_INT: case HLSL_TYPE_UINT: @@ -4852,18 +7339,68 @@ index d11ff481f6b..a43ea53089e 100644 case HLSL_TYPE_DOUBLE: hlsl_fixme(ctx, &instr->loc, "SM1 cast from double to integer."); break; -@@ -7472,9 +7892,8 @@ static void generate_vsir_instr_swizzle(struct hlsl_ctx *ctx, +- +- default: +- vkd3d_unreachable(); + } + break; + +@@ -7096,7 +7574,6 @@ static bool sm1_generate_vsir_instr_expr_cast(struct hlsl_ctx *ctx, + + case HLSL_TYPE_BOOL: + /* Casts to bool should have already been lowered. */ +- default: + hlsl_fixme(ctx, &expr->node.loc, "SM1 cast from %s to %s.", + debug_hlsl_type(ctx, src_type), debug_hlsl_type(ctx, dst_type)); + break; +@@ -7178,7 +7655,7 @@ static bool sm1_generate_vsir_instr_expr(struct hlsl_ctx *ctx, struct vsir_progr + break; + + case HLSL_OP2_DOT: +- switch (expr->operands[0].node->data_type->dimx) ++ switch (expr->operands[0].node->data_type->e.numeric.dimx) + { + case 3: + generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DP3, 0, 0, false); +@@ -7276,7 +7753,7 @@ static void sm1_generate_vsir_init_dst_param_from_deref(struct hlsl_ctx *ctx, + register_index = reg.id; + } + else +- writemask = (1u << deref->var->data_type->dimx) - 1; ++ writemask = (1u << deref->var->data_type->e.numeric.dimx) - 1; + + if (version.type == VKD3D_SHADER_TYPE_PIXEL && (!ascii_strcasecmp(semantic_name, "PSIZE") + || (!ascii_strcasecmp(semantic_name, "FOG") && version.major < 3))) +@@ -7334,7 +7811,7 @@ static void sm1_generate_vsir_init_src_param_from_deref(struct hlsl_ctx *ctx, + if (sm1_register_from_semantic_name(&version, deref->var->semantic.name, + deref->var->semantic.index, false, &type, ®ister_index)) + { +- writemask = (1 << deref->var->data_type->dimx) - 1; ++ writemask = (1 << deref->var->data_type->e.numeric.dimx) - 1; + } + else + { +@@ -7472,9 +7949,8 @@ static void generate_vsir_instr_swizzle(struct hlsl_ctx *ctx, dst_param->write_mask = instr->reg.writemask; swizzle = hlsl_swizzle_from_writemask(val->reg.writemask); - swizzle = hlsl_combine_swizzles(swizzle, swizzle_instr->swizzle, instr->data_type->dimx); -+ swizzle = hlsl_combine_swizzles(swizzle, swizzle_instr->u.vector, instr->data_type->dimx); ++ swizzle = hlsl_combine_swizzles(swizzle, swizzle_instr->u.vector, instr->data_type->e.numeric.dimx); swizzle = hlsl_map_swizzle(swizzle, ins->dst[0].write_mask); - swizzle = vsir_swizzle_from_hlsl(swizzle); src_param = &ins->src[0]; VKD3D_ASSERT(val->type != HLSL_IR_CONSTANT); -@@ -7624,31 +8043,20 @@ static void sm1_generate_vsir_block(struct hlsl_ctx *ctx, struct hlsl_block *blo +@@ -7539,7 +8015,7 @@ static void sm1_generate_vsir_instr_if(struct hlsl_ctx *ctx, struct vsir_program + hlsl_fixme(ctx, &instr->loc, "Flatten \"if\" conditionals branches."); + return; + } +- VKD3D_ASSERT(condition->data_type->dimx == 1 && condition->data_type->dimy == 1); ++ VKD3D_ASSERT(condition->data_type->e.numeric.dimx == 1 && condition->data_type->e.numeric.dimy == 1); + + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_IFC, 0, 2))) + return; +@@ -7624,31 +8100,20 @@ static void sm1_generate_vsir_block(struct hlsl_ctx *ctx, struct hlsl_block *blo } static void sm1_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, @@ -4897,7 +7434,7 @@ index d11ff481f6b..a43ea53089e 100644 generate_vsir_signature(ctx, program, entry_func); hlsl_block_init(&block); -@@ -7659,38 +8067,401 @@ static void sm1_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl +@@ -7659,66 +8124,434 @@ static void sm1_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl sm1_generate_vsir_block(ctx, &entry_func->body, program); } @@ -4914,8 +7451,6 @@ index d11ff481f6b..a43ea53089e 100644 { - ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; - return; -- } -- hlsl_block_add_instr(block, vsir_instr); + case HLSL_CLASS_ARRAY: + return hlsl_sm1_class(type->e.array.type); + case HLSL_CLASS_MATRIX: @@ -4955,7 +7490,8 @@ index d11ff481f6b..a43ea53089e 100644 + case HLSL_CLASS_STREAM_OUTPUT: + case HLSL_CLASS_NULL: + break; -+ } + } +- hlsl_block_add_instr(block, vsir_instr); + + vkd3d_unreachable(); } @@ -4977,6 +7513,8 @@ index d11ff481f6b..a43ea53089e 100644 + switch (class) { - ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; +- return; +- } + case HLSL_CLASS_SCALAR: + case HLSL_CLASS_VECTOR: + case HLSL_CLASS_MATRIX: @@ -4999,10 +7537,12 @@ index d11ff481f6b..a43ea53089e 100644 + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: + return D3DXPT_INT; -+ default: -+ vkd3d_unreachable(); + } -+ ++ break; + +- list_add_before(&instr->entry, &vsir_instr->entry); +- hlsl_replace_node(instr, vsir_instr); +-} + case HLSL_CLASS_SAMPLER: + switch (type->sampler_dim) + { @@ -5021,7 +7561,21 @@ index d11ff481f6b..a43ea53089e 100644 + vkd3d_unreachable(); + } + break; -+ + +-static void sm4_generate_vsir_instr_dcl_semantic(struct hlsl_ctx *ctx, struct vsir_program *program, +- const struct hlsl_ir_var *var, bool is_patch_constant_func, struct hlsl_block *block, +- const struct vkd3d_shader_location *loc) +-{ +- const struct vkd3d_shader_version *version = &program->shader_version; +- const bool output = var->is_output_semantic; +- enum vkd3d_shader_sysval_semantic semantic; +- struct vkd3d_shader_dst_param *dst_param; +- struct vkd3d_shader_instruction *ins; +- enum vkd3d_shader_register_type type; +- enum vkd3d_shader_opcode opcode; +- unsigned int idx = 0; +- uint32_t write_mask; +- bool has_idx; + case HLSL_CLASS_TEXTURE: + switch (type->sampler_dim) + { @@ -5040,10 +7594,20 @@ index d11ff481f6b..a43ea53089e 100644 + vkd3d_unreachable(); + } + break; -+ + +- sm4_sysval_semantic_from_semantic_name(&semantic, version, ctx->semantic_compat_mapping, +- ctx->domain, var->semantic.name, var->semantic.index, output, is_patch_constant_func); +- if (semantic == ~0u) +- semantic = VKD3D_SHADER_SV_NONE; + case HLSL_CLASS_ARRAY: + return hlsl_sm1_base_type(type->e.array.type, is_combined_sampler); -+ + +- if (var->is_input_semantic) +- { +- switch (semantic) +- { +- case VKD3D_SHADER_SV_NONE: +- opcode = (version->type == VKD3D_SHADER_TYPE_PIXEL) + case HLSL_CLASS_STRUCT: + return D3DXPT_VOID; + @@ -5086,16 +7650,15 @@ index d11ff481f6b..a43ea53089e 100644 + const struct hlsl_type *array_type = hlsl_get_multiarray_element_type(type); + unsigned int array_size = hlsl_get_multiarray_size(type); + struct hlsl_struct_field *field; -+ unsigned int field_count = 0; -+ size_t fields_offset = 0; + size_t i; + + if (type->bytecode_offset) - return; ++ return; + + if (array_type->class == HLSL_CLASS_STRUCT) + { -+ field_count = array_type->e.record.field_count; ++ unsigned int field_count = array_type->e.record.field_count; ++ size_t fields_offset; + + for (i = 0; i < field_count; ++i) + { @@ -5112,13 +7675,23 @@ index d11ff481f6b..a43ea53089e 100644 + put_u32(buffer, field->name_bytecode_offset - ctab_start); + put_u32(buffer, field->type->bytecode_offset - ctab_start); + } -+ } + -+ type->bytecode_offset = put_u32(buffer, -+ vkd3d_make_u32(hlsl_sm1_class(type), hlsl_sm1_base_type(array_type, is_combined_sampler))); -+ put_u32(buffer, vkd3d_make_u32(type->dimy, type->dimx)); -+ put_u32(buffer, vkd3d_make_u32(array_size, field_count)); -+ put_u32(buffer, fields_offset); ++ type->bytecode_offset = put_u32(buffer, vkd3d_make_u32(D3DXPC_STRUCT, D3DXPT_VOID)); ++ put_u32(buffer, vkd3d_make_u32(1, hlsl_type_component_count(array_type))); ++ put_u32(buffer, vkd3d_make_u32(array_size, field_count)); ++ put_u32(buffer, fields_offset); ++ } ++ else ++ { ++ type->bytecode_offset = put_u32(buffer, ++ vkd3d_make_u32(hlsl_sm1_class(type), hlsl_sm1_base_type(array_type, is_combined_sampler))); ++ if (hlsl_is_numeric_type(array_type)) ++ put_u32(buffer, vkd3d_make_u32(array_type->e.numeric.dimy, array_type->e.numeric.dimx)); ++ else ++ put_u32(buffer, vkd3d_make_u32(1, 1)); ++ put_u32(buffer, vkd3d_make_u32(array_size, 0)); ++ put_u32(buffer, 1); ++ } +} + +static void sm1_sort_extern(struct list *sorted, struct hlsl_ir_var *to_sort) @@ -5260,7 +7833,7 @@ index d11ff481f6b..a43ea53089e 100644 + { + uint32_t u; + float f; -+ } uni; ++ } uni = {0}; + + switch (comp_type->e.numeric.type) + { @@ -5284,9 +7857,6 @@ index d11ff481f6b..a43ea53089e 100644 + case HLSL_TYPE_FLOAT: + uni.u = var->default_values[k].number.u; + break; -+ -+ default: -+ vkd3d_unreachable(); + } + + set_u32(buffer, default_value_offset + comp_offset * sizeof(uint32_t), uni.u); @@ -5296,10 +7866,8 @@ index d11ff481f6b..a43ea53089e 100644 + + ++uniform_count; + } - } - -- list_add_before(&instr->entry, &vsir_instr->entry); -- hlsl_replace_node(instr, vsir_instr); ++ } ++ + offset = put_string(buffer, vkd3d_shader_get_version(NULL, NULL)); + set_u32(buffer, creator_offset, offset - ctab_start); +} @@ -5317,10 +7885,47 @@ index d11ff481f6b..a43ea53089e 100644 + } + ctab->code = buffer.data; + ctab->size = buffer.size; - } ++} ++ ++static void sm4_generate_vsir_instr_dcl_semantic(struct hlsl_ctx *ctx, struct vsir_program *program, ++ const struct hlsl_ir_var *var, bool is_patch_constant_func, struct hlsl_block *block, ++ const struct vkd3d_shader_location *loc) ++{ ++ const struct vkd3d_shader_version *version = &program->shader_version; ++ const bool output = var->is_output_semantic; ++ enum vkd3d_shader_sysval_semantic semantic; ++ struct vkd3d_shader_dst_param *dst_param; ++ struct vkd3d_shader_instruction *ins; ++ enum vkd3d_shader_register_type type; ++ enum vkd3d_shader_opcode opcode; ++ unsigned int idx = 0; ++ uint32_t write_mask; ++ bool has_idx; ++ ++ sm4_sysval_semantic_from_semantic_name(&semantic, version, ctx->semantic_compat_mapping, ++ ctx->domain, var->semantic.name, var->semantic.index, output, is_patch_constant_func); ++ if (semantic == ~0u) ++ semantic = VKD3D_SHADER_SV_NONE; ++ ++ if (var->is_input_semantic) ++ { ++ switch (semantic) ++ { ++ case VKD3D_SHADER_SV_NONE: ++ opcode = (version->type == VKD3D_SHADER_TYPE_PIXEL) + ? VKD3DSIH_DCL_INPUT_PS : VKD3DSIH_DCL_INPUT; + break; - static void sm4_generate_vsir_instr_dcl_semantic(struct hlsl_ctx *ctx, struct vsir_program *program, -@@ -7806,8 +8577,6 @@ static void sm4_generate_vsir_instr_dcl_semantic(struct hlsl_ctx *ctx, struct vs +@@ -7749,7 +8582,7 @@ static void sm4_generate_vsir_instr_dcl_semantic(struct hlsl_ctx *ctx, struct vs + { + if (has_idx) + idx = var->semantic.index; +- write_mask = (1u << var->data_type->dimx) - 1; ++ write_mask = (1u << var->data_type->e.numeric.dimx) - 1; + } + else + { +@@ -7806,8 +8639,6 @@ static void sm4_generate_vsir_instr_dcl_semantic(struct hlsl_ctx *ctx, struct vs if (var->is_input_semantic && version->type == VKD3D_SHADER_TYPE_PIXEL) ins->flags = sm4_get_interpolation_mode(var->data_type, var->storage_modifiers); @@ -5329,7 +7934,7 @@ index d11ff481f6b..a43ea53089e 100644 } static void sm4_generate_vsir_instr_dcl_temps(struct hlsl_ctx *ctx, struct vsir_program *program, -@@ -7819,8 +8588,6 @@ static void sm4_generate_vsir_instr_dcl_temps(struct hlsl_ctx *ctx, struct vsir_ +@@ -7819,8 +8650,6 @@ static void sm4_generate_vsir_instr_dcl_temps(struct hlsl_ctx *ctx, struct vsir_ return; ins->declaration.count = temp_count; @@ -5338,7 +7943,7 @@ index d11ff481f6b..a43ea53089e 100644 } static void sm4_generate_vsir_instr_dcl_indexable_temp(struct hlsl_ctx *ctx, -@@ -7838,8 +8605,6 @@ static void sm4_generate_vsir_instr_dcl_indexable_temp(struct hlsl_ctx *ctx, +@@ -7838,8 +8667,6 @@ static void sm4_generate_vsir_instr_dcl_indexable_temp(struct hlsl_ctx *ctx, ins->declaration.indexable_temp.data_type = VKD3D_DATA_FLOAT; ins->declaration.indexable_temp.component_count = comp_count; ins->declaration.indexable_temp.has_function_scope = false; @@ -5347,7 +7952,77 @@ index d11ff481f6b..a43ea53089e 100644 } static bool type_is_float(const struct hlsl_type *type) -@@ -8505,59 +9270,690 @@ static bool sm4_generate_vsir_instr_expr(struct hlsl_ctx *ctx, +@@ -7891,7 +8718,7 @@ static bool sm4_generate_vsir_instr_expr_cast(struct hlsl_ctx *ctx, + } one = { .f = 1.0 }; + + /* Narrowing casts were already lowered. */ +- VKD3D_ASSERT(src_type->dimx == dst_type->dimx); ++ VKD3D_ASSERT(src_type->e.numeric.dimx == dst_type->e.numeric.dimx); + + switch (dst_type->e.numeric.type) + { +@@ -7919,9 +8746,6 @@ static bool sm4_generate_vsir_instr_expr_cast(struct hlsl_ctx *ctx, + case HLSL_TYPE_DOUBLE: + hlsl_fixme(ctx, &expr->node.loc, "SM4 cast from double to float."); + return false; +- +- default: +- vkd3d_unreachable(); + } + break; + +@@ -7945,9 +8769,6 @@ static bool sm4_generate_vsir_instr_expr_cast(struct hlsl_ctx *ctx, + case HLSL_TYPE_DOUBLE: + hlsl_fixme(ctx, &expr->node.loc, "SM4 cast from double to int."); + return false; +- +- default: +- vkd3d_unreachable(); + } + break; + +@@ -7971,9 +8792,6 @@ static bool sm4_generate_vsir_instr_expr_cast(struct hlsl_ctx *ctx, + case HLSL_TYPE_DOUBLE: + hlsl_fixme(ctx, &expr->node.loc, "SM4 cast from double to uint."); + return false; +- +- default: +- vkd3d_unreachable(); + } + break; + +@@ -7983,9 +8801,10 @@ static bool sm4_generate_vsir_instr_expr_cast(struct hlsl_ctx *ctx, + + case HLSL_TYPE_BOOL: + /* Casts to bool should have already been lowered. */ +- default: +- vkd3d_unreachable(); ++ break; + } ++ ++ vkd3d_unreachable(); + } + + static void sm4_generate_vsir_expr_with_two_destinations(struct hlsl_ctx *ctx, struct vsir_program *program, +@@ -8040,7 +8859,7 @@ static void sm4_generate_vsir_rcp_using_div(struct hlsl_ctx *ctx, + value.u[2].f = 1.0f; + value.u[3].f = 1.0f; + vsir_src_from_hlsl_constant_value(&ins->src[0], ctx, &value, +- VKD3D_DATA_FLOAT, instr->data_type->dimx, dst_param->write_mask); ++ VKD3D_DATA_FLOAT, instr->data_type->e.numeric.dimx, dst_param->write_mask); + + vsir_src_from_hlsl_node(&ins->src[1], ctx, operand, dst_param->write_mask); + } +@@ -8270,7 +9089,7 @@ static bool sm4_generate_vsir_instr_expr(struct hlsl_ctx *ctx, + switch (dst_type->e.numeric.type) + { + case HLSL_TYPE_FLOAT: +- switch (expr->operands[0].node->data_type->dimx) ++ switch (expr->operands[0].node->data_type->e.numeric.dimx) + { + case 4: + generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DP4, 0, 0, false); +@@ -8505,188 +9324,1963 @@ static bool sm4_generate_vsir_instr_expr(struct hlsl_ctx *ctx, } } @@ -5427,28 +8102,40 @@ index d11ff481f6b..a43ea53089e 100644 - case HLSL_IR_LOOP: - sm4_generate_vsir_block(ctx, &hlsl_ir_loop(instr)->body, program); +- break; + VKD3D_ASSERT(hlsl_is_numeric_type(type)); + if (type->e.numeric.type == HLSL_TYPE_BOOL && var_is_user_input(version, load->src.var)) + { + /* Uniform bools can be specified as anything, but internal bools + * always have 0 for false and ~0 for true. Normalise that here. */ -+ + +- case HLSL_IR_SWITCH: +- LIST_FOR_EACH_ENTRY(c, &hlsl_ir_switch(instr)->cases, struct hlsl_ir_switch_case, entry) +- sm4_generate_vsir_block(ctx, &c->body, program); +- break; + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_MOVC, 1, 3))) + return false; -+ + +- case HLSL_IR_SWIZZLE: +- generate_vsir_instr_swizzle(ctx, program, hlsl_ir_swizzle(instr)); +- replace_instr_with_last_vsir_instr(ctx, program, instr); +- break; + dst_param = &ins->dst[0]; + vsir_dst_from_hlsl_node(dst_param, ctx, instr); -+ + +- default: +- break; +- } + if (!sm4_generate_vsir_init_src_param_from_deref(ctx, program, + &ins->src[0], &load->src, dst_param->write_mask, &instr->loc)) + return false; + + memset(&value, 0xff, sizeof(value)); + vsir_src_from_hlsl_constant_value(&ins->src[1], ctx, &value, -+ VKD3D_DATA_UINT, type->dimx, dst_param->write_mask); ++ VKD3D_DATA_UINT, type->e.numeric.dimx, dst_param->write_mask); + memset(&value, 0x00, sizeof(value)); + vsir_src_from_hlsl_constant_value(&ins->src[2], ctx, &value, -+ VKD3D_DATA_UINT, type->dimx, dst_param->write_mask); ++ VKD3D_DATA_UINT, type->e.numeric.dimx, dst_param->write_mask); + } + else + { @@ -5491,7 +8178,7 @@ index d11ff481f6b..a43ea53089e 100644 + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_STORE_RAW, 1, 2))) + return false; + -+ writemask = vkd3d_write_mask_from_component_count(value->data_type->dimx); ++ writemask = vkd3d_write_mask_from_component_count(value->data_type->e.numeric.dimx); + if (!sm4_generate_vsir_init_dst_param_from_deref(ctx, program, + &ins->dst[0], &store->resource, &instr->loc, writemask)) + return false; @@ -5523,9 +8210,9 @@ index d11ff481f6b..a43ea53089e 100644 + + if (offset->value.u[0].i < -8 || offset->value.u[0].i > 7) + return false; -+ if (offset->node.data_type->dimx > 1 && (offset->value.u[1].i < -8 || offset->value.u[1].i > 7)) ++ if (offset->node.data_type->e.numeric.dimx > 1 && (offset->value.u[1].i < -8 || offset->value.u[1].i > 7)) + return false; -+ if (offset->node.data_type->dimx > 2 && (offset->value.u[2].i < -8 || offset->value.u[2].i > 7)) ++ if (offset->node.data_type->e.numeric.dimx > 2 && (offset->value.u[2].i < -8 || offset->value.u[2].i > 7)) + return false; + return true; +} @@ -5542,9 +8229,9 @@ index d11ff481f6b..a43ea53089e 100644 + ins->texel_offset.u = offset->value.u[0].i; + ins->texel_offset.v = 0; + ins->texel_offset.w = 0; -+ if (offset->node.data_type->dimx > 1) ++ if (offset->node.data_type->e.numeric.dimx > 1) + ins->texel_offset.v = offset->value.u[1].i; -+ if (offset->node.data_type->dimx > 2) ++ if (offset->node.data_type->e.numeric.dimx > 2) + ins->texel_offset.w = offset->value.u[2].i; +} + @@ -5711,18 +8398,18 @@ index d11ff481f6b..a43ea53089e 100644 +} + +static bool sm4_generate_vsir_instr_gather(struct hlsl_ctx *ctx, struct vsir_program *program, -+ const struct hlsl_ir_resource_load *load, uint32_t swizzle) ++ const struct hlsl_ir_resource_load *load, uint32_t swizzle, bool compare) +{ + const struct vkd3d_shader_version *version = &program->shader_version; + const struct hlsl_ir_node *texel_offset = load->texel_offset.node; + const struct hlsl_ir_node *coords = load->coords.node; + const struct hlsl_deref *resource = &load->resource; ++ enum vkd3d_shader_opcode opcode = VKD3DSIH_GATHER4; + const struct hlsl_deref *sampler = &load->sampler; + const struct hlsl_ir_node *instr = &load->node; ++ unsigned int src_count = 3, current_arg = 0; + struct vkd3d_shader_instruction *ins; -+ enum vkd3d_shader_opcode opcode; + -+ opcode = VKD3DSIH_GATHER4; + if (texel_offset && !sm4_generate_vsir_validate_texel_offset_aoffimmi(texel_offset)) + { + if (!vkd3d_shader_ver_ge(version, 5, 0)) @@ -5732,50 +8419,40 @@ index d11ff481f6b..a43ea53089e 100644 + return false; + } + opcode = VKD3DSIH_GATHER4_PO; ++ ++src_count; + } + -+ if (opcode == VKD3DSIH_GATHER4) ++ if (compare) + { -+ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, opcode, 1, 3))) -+ return false; ++ opcode = opcode == VKD3DSIH_GATHER4 ? VKD3DSIH_GATHER4_C : VKD3DSIH_GATHER4_PO_C; ++ ++src_count; ++ } + -+ vsir_dst_from_hlsl_node(&ins->dst[0], ctx, instr); -+ vsir_src_from_hlsl_node(&ins->src[0], ctx, coords, VKD3DSP_WRITEMASK_ALL); ++ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, opcode, 1, src_count))) ++ return false; ++ ++ vsir_dst_from_hlsl_node(&ins->dst[0], ctx, instr); ++ vsir_src_from_hlsl_node(&ins->src[current_arg++], ctx, coords, VKD3DSP_WRITEMASK_ALL); ++ ++ if (opcode == VKD3DSIH_GATHER4_PO || opcode == VKD3DSIH_GATHER4_PO_C) ++ vsir_src_from_hlsl_node(&ins->src[current_arg++], ctx, texel_offset, VKD3DSP_WRITEMASK_ALL); ++ else + sm4_generate_vsir_encode_texel_offset_as_aoffimmi(ins, texel_offset); + -+ if (!sm4_generate_vsir_init_src_param_from_deref(ctx, program, -+ &ins->src[1], resource, ins->dst[0].write_mask, &instr->loc)) -+ return false; ++ if (!sm4_generate_vsir_init_src_param_from_deref(ctx, program, ++ &ins->src[current_arg++], resource, ins->dst[0].write_mask, &instr->loc)) ++ return false; + -+ if (!sm4_generate_vsir_init_src_param_from_deref(ctx, program, -+ &ins->src[2], sampler, VKD3DSP_WRITEMASK_ALL, &instr->loc)) -+ return false; -+ ins->src[2].reg.dimension = VSIR_DIMENSION_VEC4; -+ ins->src[2].swizzle = swizzle; -+ } -+ else if (opcode == VKD3DSIH_GATHER4_PO) -+ { -+ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, opcode, 1, 4))) -+ return false; ++ if (!sm4_generate_vsir_init_src_param_from_deref(ctx, program, ++ &ins->src[current_arg], sampler, VKD3DSP_WRITEMASK_ALL, &instr->loc)) ++ return false; ++ ins->src[current_arg].reg.dimension = VSIR_DIMENSION_VEC4; ++ ins->src[current_arg].swizzle = swizzle; ++ current_arg++; + -+ vsir_dst_from_hlsl_node(&ins->dst[0], ctx, instr); -+ vsir_src_from_hlsl_node(&ins->src[0], ctx, coords, VKD3DSP_WRITEMASK_ALL); -+ vsir_src_from_hlsl_node(&ins->src[1], ctx, texel_offset, VKD3DSP_WRITEMASK_ALL); ++ if (compare) ++ vsir_src_from_hlsl_node(&ins->src[current_arg++], ctx, load->cmp.node, VKD3DSP_WRITEMASK_0); + -+ if (!sm4_generate_vsir_init_src_param_from_deref(ctx, program, -+ &ins->src[2], resource, ins->dst[0].write_mask, &instr->loc)) -+ return false; -+ -+ if (!sm4_generate_vsir_init_src_param_from_deref(ctx, program, -+ &ins->src[3], sampler, VKD3DSP_WRITEMASK_ALL, &instr->loc)) -+ return false; -+ ins->src[3].reg.dimension = VSIR_DIMENSION_VEC4; -+ ins->src[3].swizzle = swizzle; -+ } -+ else -+ { -+ vkd3d_unreachable(); -+ } + return true; +} + @@ -5838,6 +8515,32 @@ index d11ff481f6b..a43ea53089e 100644 + return true; +} + ++static uint32_t get_gather_swizzle(enum hlsl_resource_load_type type) ++{ ++ switch (type) ++ { ++ case HLSL_RESOURCE_GATHER_RED: ++ case HLSL_RESOURCE_GATHER_CMP_RED: ++ return VKD3D_SHADER_SWIZZLE(X, X, X, X); ++ ++ case HLSL_RESOURCE_GATHER_GREEN: ++ case HLSL_RESOURCE_GATHER_CMP_GREEN: ++ return VKD3D_SHADER_SWIZZLE(Y, Y, Y, Y); ++ ++ case HLSL_RESOURCE_GATHER_BLUE: ++ case HLSL_RESOURCE_GATHER_CMP_BLUE: ++ return VKD3D_SHADER_SWIZZLE(Z, Z, Z, Z); ++ ++ case HLSL_RESOURCE_GATHER_ALPHA: ++ case HLSL_RESOURCE_GATHER_CMP_ALPHA: ++ return VKD3D_SHADER_SWIZZLE(W, W, W, W); ++ default: ++ return 0; ++ } ++ ++ return 0; ++} ++ +static bool sm4_generate_vsir_instr_resource_load(struct hlsl_ctx *ctx, + struct vsir_program *program, const struct hlsl_ir_resource_load *load) +{ @@ -5869,16 +8572,16 @@ index d11ff481f6b..a43ea53089e 100644 + return sm4_generate_vsir_instr_sample(ctx, program, load); + + case HLSL_RESOURCE_GATHER_RED: -+ return sm4_generate_vsir_instr_gather(ctx, program, load, VKD3D_SHADER_SWIZZLE(X, X, X, X)); -+ + case HLSL_RESOURCE_GATHER_GREEN: -+ return sm4_generate_vsir_instr_gather(ctx, program, load, VKD3D_SHADER_SWIZZLE(Y, Y, Y, Y)); -+ + case HLSL_RESOURCE_GATHER_BLUE: -+ return sm4_generate_vsir_instr_gather(ctx, program, load, VKD3D_SHADER_SWIZZLE(Z, Z, Z, Z)); -+ + case HLSL_RESOURCE_GATHER_ALPHA: -+ return sm4_generate_vsir_instr_gather(ctx, program, load, VKD3D_SHADER_SWIZZLE(W, W, W, W)); ++ return sm4_generate_vsir_instr_gather(ctx, program, load, get_gather_swizzle(load->load_type), false); ++ ++ case HLSL_RESOURCE_GATHER_CMP_RED: ++ case HLSL_RESOURCE_GATHER_CMP_GREEN: ++ case HLSL_RESOURCE_GATHER_CMP_BLUE: ++ case HLSL_RESOURCE_GATHER_CMP_ALPHA: ++ return sm4_generate_vsir_instr_gather(ctx, program, load, get_gather_swizzle(load->load_type), true); + + case HLSL_RESOURCE_SAMPLE_INFO: + return sm4_generate_vsir_instr_sample_info(ctx, program, load); @@ -5932,7 +8635,7 @@ index d11ff481f6b..a43ea53089e 100644 + struct hlsl_ir_node *instr = &iff->node; + struct vkd3d_shader_instruction *ins; + -+ VKD3D_ASSERT(iff->condition.node->data_type->dimx == 1); ++ VKD3D_ASSERT(iff->condition.node->data_type->e.numeric.dimx == 1); + + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_IF, 0, 1))) + return; @@ -6061,25 +8764,38 @@ index d11ff481f6b..a43ea53089e 100644 + + case HLSL_IR_STORE: + sm4_generate_vsir_instr_store(ctx, program, hlsl_ir_store(instr)); - break; - - case HLSL_IR_SWITCH: -- LIST_FOR_EACH_ENTRY(c, &hlsl_ir_switch(instr)->cases, struct hlsl_ir_switch_case, entry) -- sm4_generate_vsir_block(ctx, &c->body, program); ++ break; ++ ++ case HLSL_IR_SWITCH: + sm4_generate_vsir_instr_switch(ctx, program, hlsl_ir_switch(instr)); - break; - - case HLSL_IR_SWIZZLE: - generate_vsir_instr_swizzle(ctx, program, hlsl_ir_swizzle(instr)); -- replace_instr_with_last_vsir_instr(ctx, program, instr); - break; - - default: -@@ -8582,42 +9978,330 @@ static void sm4_generate_vsir_add_function(struct hlsl_ctx *ctx, - return; - program->temp_count = max(program->temp_count, temp_count); - -- hlsl_block_init(&block); ++ break; ++ ++ case HLSL_IR_SWIZZLE: ++ generate_vsir_instr_swizzle(ctx, program, hlsl_ir_swizzle(instr)); ++ break; ++ ++ default: ++ break; ++ } ++ } ++} ++ ++static void sm4_generate_vsir_add_function(struct hlsl_ctx *ctx, ++ struct hlsl_ir_function_decl *func, uint64_t config_flags, struct vsir_program *program) ++{ ++ bool is_patch_constant_func = func == ctx->patch_constant_func; ++ struct hlsl_block block = {0}; ++ struct hlsl_scope *scope; ++ struct hlsl_ir_var *var; ++ uint32_t temp_count; ++ ++ compute_liveness(ctx, func); ++ mark_indexable_vars(ctx, func); ++ temp_count = allocate_temp_registers(ctx, func); ++ if (ctx->result) ++ return; ++ program->temp_count = max(program->temp_count, temp_count); ++ + hlsl_block_init(&block); + + LIST_FOR_EACH_ENTRY(var, &func->extern_vars, struct hlsl_ir_var, extern_entry) @@ -6120,6 +8836,208 @@ index d11ff481f6b..a43ea53089e 100644 + generate_vsir_add_program_instruction(ctx, program, &func->loc, VKD3DSIH_RET, 0, 0); +} + ++static int sm4_compare_extern_resources(const void *a, const void *b) ++{ ++ const struct extern_resource *aa = a; ++ const struct extern_resource *bb = b; ++ int r; ++ ++ if ((r = vkd3d_u32_compare(aa->regset, bb->regset))) ++ return r; ++ ++ if ((r = vkd3d_u32_compare(aa->space, bb->space))) ++ return r; ++ ++ return vkd3d_u32_compare(aa->index, bb->index); ++} ++ ++static const char *string_skip_tag(const char *string) ++{ ++ if (!strncmp(string, "", strlen(""))) ++ return string + strlen(""); ++ return string; ++} ++ ++static void sm4_free_extern_resources(struct extern_resource *extern_resources, unsigned int count) ++{ ++ unsigned int i; ++ ++ for (i = 0; i < count; ++i) ++ { ++ vkd3d_free(extern_resources[i].name); ++ } ++ vkd3d_free(extern_resources); ++} ++ ++static struct extern_resource *sm4_get_extern_resources(struct hlsl_ctx *ctx, unsigned int *count) ++{ ++ bool separate_components = ctx->profile->major_version == 5 && ctx->profile->minor_version == 0; ++ struct extern_resource *extern_resources = NULL; ++ const struct hlsl_ir_var *var; ++ struct hlsl_buffer *buffer; ++ enum hlsl_regset regset; ++ size_t capacity = 0; ++ char *name; ++ ++ *count = 0; ++ ++ LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) ++ { ++ if (separate_components) ++ { ++ unsigned int component_count = hlsl_type_component_count(var->data_type); ++ unsigned int k, regset_offset; ++ ++ for (k = 0; k < component_count; ++k) ++ { ++ struct hlsl_type *component_type = hlsl_type_get_component_type(ctx, var->data_type, k); ++ struct vkd3d_string_buffer *name_buffer; ++ ++ if (!hlsl_type_is_resource(component_type)) ++ continue; ++ ++ regset_offset = hlsl_type_get_component_offset(ctx, var->data_type, k, ®set); ++ if (regset_offset > var->regs[regset].allocation_size) ++ continue; ++ ++ if (!var->objects_usage[regset][regset_offset].used) ++ continue; ++ ++ if (!(hlsl_array_reserve(ctx, (void **)&extern_resources, ++ &capacity, *count + 1, sizeof(*extern_resources)))) ++ { ++ sm4_free_extern_resources(extern_resources, *count); ++ *count = 0; ++ return NULL; ++ } ++ ++ if (!(name_buffer = hlsl_component_to_string(ctx, var, k))) ++ { ++ sm4_free_extern_resources(extern_resources, *count); ++ *count = 0; ++ return NULL; ++ } ++ if (!(name = hlsl_strdup(ctx, string_skip_tag(name_buffer->buffer)))) ++ { ++ sm4_free_extern_resources(extern_resources, *count); ++ *count = 0; ++ hlsl_release_string_buffer(ctx, name_buffer); ++ return NULL; ++ } ++ hlsl_release_string_buffer(ctx, name_buffer); ++ ++ extern_resources[*count].var = NULL; ++ extern_resources[*count].buffer = NULL; ++ ++ extern_resources[*count].name = name; ++ extern_resources[*count].is_user_packed = !!var->reg_reservation.reg_type; ++ ++ extern_resources[*count].component_type = component_type; ++ ++ extern_resources[*count].regset = regset; ++ extern_resources[*count].id = var->regs[regset].id; ++ extern_resources[*count].space = var->regs[regset].space; ++ extern_resources[*count].index = var->regs[regset].index + regset_offset; ++ extern_resources[*count].bind_count = 1; ++ extern_resources[*count].loc = var->loc; ++ ++ ++*count; ++ } ++ } ++ else ++ { ++ unsigned int r; ++ ++ if (!hlsl_type_is_resource(var->data_type)) ++ continue; ++ ++ for (r = 0; r <= HLSL_REGSET_LAST; ++r) ++ { ++ if (!var->regs[r].allocated) ++ continue; ++ ++ if (!(hlsl_array_reserve(ctx, (void **)&extern_resources, ++ &capacity, *count + 1, sizeof(*extern_resources)))) ++ { ++ sm4_free_extern_resources(extern_resources, *count); ++ *count = 0; ++ return NULL; ++ } ++ ++ if (!(name = hlsl_strdup(ctx, string_skip_tag(var->name)))) ++ { ++ sm4_free_extern_resources(extern_resources, *count); ++ *count = 0; ++ return NULL; ++ } ++ ++ extern_resources[*count].var = var; ++ extern_resources[*count].buffer = NULL; ++ ++ extern_resources[*count].name = name; ++ /* For some reason 5.1 resources aren't marked as ++ * user-packed, but cbuffers still are. */ ++ extern_resources[*count].is_user_packed = hlsl_version_lt(ctx, 5, 1) ++ && !!var->reg_reservation.reg_type; ++ ++ extern_resources[*count].component_type = hlsl_type_get_component_type(ctx, var->data_type, 0); ++ ++ extern_resources[*count].regset = r; ++ extern_resources[*count].id = var->regs[r].id; ++ extern_resources[*count].space = var->regs[r].space; ++ extern_resources[*count].index = var->regs[r].index; ++ extern_resources[*count].bind_count = var->bind_count[r]; ++ extern_resources[*count].loc = var->loc; ++ ++ ++*count; ++ } ++ } ++ } ++ ++ LIST_FOR_EACH_ENTRY(buffer, &ctx->buffers, struct hlsl_buffer, entry) ++ { ++ if (!buffer->reg.allocated) ++ continue; ++ ++ if (!(hlsl_array_reserve(ctx, (void **)&extern_resources, ++ &capacity, *count + 1, sizeof(*extern_resources)))) ++ { ++ sm4_free_extern_resources(extern_resources, *count); ++ *count = 0; ++ return NULL; ++ } ++ ++ if (!(name = hlsl_strdup(ctx, buffer->name))) ++ { ++ sm4_free_extern_resources(extern_resources, *count); ++ *count = 0; ++ return NULL; ++ } ++ ++ extern_resources[*count].var = NULL; ++ extern_resources[*count].buffer = buffer; ++ ++ extern_resources[*count].name = name; ++ extern_resources[*count].is_user_packed = !!buffer->reservation.reg_type; ++ ++ extern_resources[*count].component_type = NULL; ++ ++ extern_resources[*count].regset = HLSL_REGSET_NUMERIC; ++ extern_resources[*count].id = buffer->reg.id; ++ extern_resources[*count].space = buffer->reg.space; ++ extern_resources[*count].index = buffer->reg.index; ++ extern_resources[*count].bind_count = 1; ++ extern_resources[*count].loc = buffer->loc; ++ ++ ++*count; ++ } ++ ++ if (extern_resources) ++ qsort(extern_resources, *count, sizeof(*extern_resources), sm4_compare_extern_resources); ++ ++ return extern_resources; ++} ++ +static void generate_vsir_scan_required_features(struct hlsl_ctx *ctx, struct vsir_program *program) +{ + struct extern_resource *extern_resources; @@ -6289,15 +9207,13 @@ index d11ff481f6b..a43ea53089e 100644 + + case HLSL_TYPE_INT: + return VKD3D_DATA_INT; -+ break; + + case HLSL_TYPE_BOOL: + case HLSL_TYPE_UINT: + return VKD3D_DATA_UINT; -+ -+ default: -+ vkd3d_unreachable(); + } ++ ++ vkd3d_unreachable(); +} + +static void sm4_generate_vsir_add_dcl_texture(struct hlsl_ctx *ctx, @@ -6316,41 +9232,21 @@ index d11ff481f6b..a43ea53089e 100644 + + VKD3D_ASSERT(resource->regset == regset); + VKD3D_ASSERT(hlsl_version_lt(ctx, 5, 1) || resource->bind_count == 1); - -- LIST_FOR_EACH_ENTRY(var, &func->extern_vars, struct hlsl_ir_var, extern_entry) ++ + component_type = resource->component_type; + + for (i = 0; i < resource->bind_count; ++i) - { -- if ((var->is_input_semantic && var->last_read) -- || (var->is_output_semantic && var->first_write)) -- sm4_generate_vsir_instr_dcl_semantic(ctx, program, var, is_patch_constant_func, &block, &var->loc); -- } ++ { + unsigned int array_first = resource->index + i; + unsigned int array_last = resource->index + i; /* FIXME: array end. */ - -- if (temp_count) -- sm4_generate_vsir_instr_dcl_temps(ctx, program, temp_count, &block, &func->loc); ++ + if (resource->var && !resource->var->objects_usage[regset][i].used) + continue; - -- LIST_FOR_EACH_ENTRY(scope, &ctx->scopes, struct hlsl_scope, entry) -- { -- LIST_FOR_EACH_ENTRY(var, &scope->vars, struct hlsl_ir_var, scope_entry) ++ + if (uav) - { -- if (var->is_uniform || var->is_input_semantic || var->is_output_semantic) -- continue; -- if (!var->regs[HLSL_REGSET_NUMERIC].allocated) -- continue; -- -- if (var->indexable) ++ { + switch (component_type->sampler_dim) - { -- unsigned int id = var->regs[HLSL_REGSET_NUMERIC].id; -- unsigned int size = align(var->data_type->reg_size[HLSL_REGSET_NUMERIC], 4) / 4; -- -- sm4_generate_vsir_instr_dcl_indexable_temp(ctx, program, &block, id, size, 4, &var->loc); ++ { + case HLSL_SAMPLER_DIM_STRUCTURED_BUFFER: + opcode = VKD3DSIH_DCL_UAV_STRUCTURED; + break; @@ -6372,11 +9268,9 @@ index d11ff481f6b..a43ea53089e 100644 + default: + opcode = VKD3DSIH_DCL; + break; - } - } -- } - -- list_move_head(&func->body.instrs, &block.instrs); ++ } ++ } ++ + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &resource->loc, opcode, 0, 0))) + { + ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; @@ -6386,14 +9280,12 @@ index d11ff481f6b..a43ea53089e 100644 + structured_resource = &ins->declaration.structured_resource; + dst_param = &semantic->resource.reg; + vsir_dst_param_init(dst_param, uav ? VKD3DSPR_UAV : VKD3DSPR_RESOURCE, VKD3D_DATA_UNUSED, 0); - -- hlsl_block_cleanup(&block); ++ + if (uav && component_type->sampler_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER) + structured_resource->byte_stride = 4 * component_type->e.resource.format->reg_size[HLSL_REGSET_NUMERIC]; + if (uav && component_type->e.resource.rasteriser_ordered) + ins->flags = VKD3DSUF_RASTERISER_ORDERED_VIEW; - -- sm4_generate_vsir_block(ctx, &func->body, program); ++ + multisampled = component_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS + || component_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY; + @@ -6427,31 +9319,52 @@ index d11ff481f6b..a43ea53089e 100644 + + if (multisampled) + semantic->sample_count = component_type->sample_count; -+ } + } } - /* OBJECTIVE: Translate all the information from ctx and entry_func to the -@@ -8627,12 +10311,15 @@ static void sm4_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl - uint64_t config_flags, struct vsir_program *program) +-static void sm4_generate_vsir_add_function(struct hlsl_ctx *ctx, +- struct hlsl_ir_function_decl *func, uint64_t config_flags, struct vsir_program *program) ++/* OBJECTIVE: Translate all the information from ctx and entry_func to the ++ * vsir_program, so it can be used as input to tpf_compile() without relying ++ * on ctx and entry_func. */ ++static void sm4_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func, ++ uint64_t config_flags, struct vsir_program *program) { - struct vkd3d_shader_version version = {0}; +- bool is_patch_constant_func = func == ctx->patch_constant_func; +- struct hlsl_block block = {0}; +- struct hlsl_scope *scope; +- struct hlsl_ir_var *var; +- uint32_t temp_count; ++ struct vkd3d_shader_version version = {0}; + struct extern_resource *extern_resources; + unsigned int extern_resources_count; + const struct hlsl_buffer *cbuffer; - version.major = ctx->profile->major_version; - version.minor = ctx->profile->minor_version; - version.type = ctx->profile->type; - -- if (!vsir_program_init(program, NULL, &version, 0, VSIR_CF_STRUCTURED, VSIR_NOT_NORMALISED)) +- compute_liveness(ctx, func); +- mark_indexable_vars(ctx, func); +- temp_count = allocate_temp_registers(ctx, func); +- if (ctx->result) ++ version.major = ctx->profile->major_version; ++ version.minor = ctx->profile->minor_version; ++ version.type = ctx->profile->type; ++ + if (!vsir_program_init(program, NULL, &version, 0, VSIR_CF_STRUCTURED, VSIR_NORMALISED_SM4)) - { - ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; ++ { ++ ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; return; -@@ -8648,45 +10335,178 @@ static void sm4_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl - program->thread_group_size.y = ctx->thread_count[1]; - program->thread_group_size.z = ctx->thread_count[2]; - } +- program->temp_count = max(program->temp_count, temp_count); ++ } ++ ++ generate_vsir_signature(ctx, program, func); ++ if (version.type == VKD3D_SHADER_TYPE_HULL) ++ generate_vsir_signature(ctx, program, ctx->patch_constant_func); ++ ++ if (version.type == VKD3D_SHADER_TYPE_COMPUTE) ++ { ++ program->thread_group_size.x = ctx->thread_count[0]; ++ program->thread_group_size.y = ctx->thread_count[1]; ++ program->thread_group_size.z = ctx->thread_count[2]; ++ } + else if (version.type == VKD3D_SHADER_TYPE_HULL) + { + program->input_control_point_count = 1; /* TODO: Obtain from InputPatch */ @@ -6485,31 +9398,559 @@ index d11ff481f6b..a43ea53089e 100644 + sm4_generate_vsir_add_dcl_texture(ctx, program, resource, true); + } + sm4_free_extern_resources(extern_resources, extern_resources_count); - ++ + if (version.type == VKD3D_SHADER_TYPE_HULL) + generate_vsir_add_program_instruction(ctx, program, + &ctx->patch_constant_func->loc, VKD3DSIH_HS_CONTROL_POINT_PHASE, 0, 0); - sm4_generate_vsir_add_function(ctx, func, config_flags, program); - if (version.type == VKD3D_SHADER_TYPE_HULL) ++ sm4_generate_vsir_add_function(ctx, func, config_flags, program); ++ if (version.type == VKD3D_SHADER_TYPE_HULL) + { + generate_vsir_add_program_instruction(ctx, program, + &ctx->patch_constant_func->loc, VKD3DSIH_HS_FORK_PHASE, 0, 0); - sm4_generate_vsir_add_function(ctx, ctx->patch_constant_func, config_flags, program); ++ sm4_generate_vsir_add_function(ctx, ctx->patch_constant_func, config_flags, program); + } + + generate_vsir_scan_required_features(ctx, program); + generate_vsir_scan_global_flags(ctx, program, func); ++} ++ ++/* For some reason, for matrices, values from default value initializers end ++ * up in different components than from regular initializers. Default value ++ * initializers fill the matrix in vertical reading order ++ * (left-to-right top-to-bottom) instead of regular reading order ++ * (top-to-bottom left-to-right), so they have to be adjusted. An exception is ++ * that the order of matrix initializers for function parameters are row-major ++ * (top-to-bottom left-to-right). */ ++static unsigned int get_component_index_from_default_initializer_index(struct hlsl_type *type, unsigned int index) ++{ ++ unsigned int element_comp_count, element, x, y, i; ++ unsigned int base = 0; ++ ++ switch (type->class) ++ { ++ case HLSL_CLASS_MATRIX: ++ x = index / type->e.numeric.dimy; ++ y = index % type->e.numeric.dimy; ++ return y * type->e.numeric.dimx + x; ++ ++ case HLSL_CLASS_ARRAY: ++ element_comp_count = hlsl_type_component_count(type->e.array.type); ++ element = index / element_comp_count; ++ base = element * element_comp_count; ++ return base + get_component_index_from_default_initializer_index(type->e.array.type, index - base); ++ ++ case HLSL_CLASS_STRUCT: ++ for (i = 0; i < type->e.record.field_count; ++i) ++ { ++ struct hlsl_type *field_type = type->e.record.fields[i].type; ++ ++ element_comp_count = hlsl_type_component_count(field_type); ++ if (index - base < element_comp_count) ++ return base + get_component_index_from_default_initializer_index(field_type, index - base); ++ base += element_comp_count; ++ } ++ break; ++ ++ default: ++ return index; ++ } ++ ++ vkd3d_unreachable(); ++} ++ ++static D3D_SRV_DIMENSION sm4_rdef_resource_dimension(const struct hlsl_type *type) ++{ ++ switch (type->sampler_dim) ++ { ++ case HLSL_SAMPLER_DIM_1D: ++ return D3D_SRV_DIMENSION_TEXTURE1D; ++ case HLSL_SAMPLER_DIM_2D: ++ return D3D_SRV_DIMENSION_TEXTURE2D; ++ case HLSL_SAMPLER_DIM_3D: ++ return D3D_SRV_DIMENSION_TEXTURE3D; ++ case HLSL_SAMPLER_DIM_CUBE: ++ return D3D_SRV_DIMENSION_TEXTURECUBE; ++ case HLSL_SAMPLER_DIM_1DARRAY: ++ return D3D_SRV_DIMENSION_TEXTURE1DARRAY; ++ case HLSL_SAMPLER_DIM_2DARRAY: ++ return D3D_SRV_DIMENSION_TEXTURE2DARRAY; ++ case HLSL_SAMPLER_DIM_2DMS: ++ return D3D_SRV_DIMENSION_TEXTURE2DMS; ++ case HLSL_SAMPLER_DIM_2DMSARRAY: ++ return D3D_SRV_DIMENSION_TEXTURE2DMSARRAY; ++ case HLSL_SAMPLER_DIM_CUBEARRAY: ++ return D3D_SRV_DIMENSION_TEXTURECUBEARRAY; ++ case HLSL_SAMPLER_DIM_BUFFER: ++ case HLSL_SAMPLER_DIM_RAW_BUFFER: ++ case HLSL_SAMPLER_DIM_STRUCTURED_BUFFER: ++ return D3D_SRV_DIMENSION_BUFFER; ++ default: ++ break; ++ } ++ ++ vkd3d_unreachable(); ++} ++ ++static enum D3D_RESOURCE_RETURN_TYPE sm4_data_type(const struct hlsl_type *type) ++{ ++ const struct hlsl_type *format = type->e.resource.format; ++ ++ switch (format->e.numeric.type) ++ { ++ case HLSL_TYPE_DOUBLE: ++ return D3D_RETURN_TYPE_DOUBLE; ++ ++ case HLSL_TYPE_FLOAT: ++ case HLSL_TYPE_HALF: ++ if (format->modifiers & HLSL_MODIFIER_UNORM) ++ return D3D_RETURN_TYPE_UNORM; ++ if (format->modifiers & HLSL_MODIFIER_SNORM) ++ return D3D_RETURN_TYPE_SNORM; ++ return D3D_RETURN_TYPE_FLOAT; ++ ++ case HLSL_TYPE_INT: ++ return D3D_RETURN_TYPE_SINT; ++ break; ++ ++ case HLSL_TYPE_BOOL: ++ case HLSL_TYPE_UINT: ++ return D3D_RETURN_TYPE_UINT; ++ } ++ ++ vkd3d_unreachable(); ++} ++ ++static D3D_SHADER_INPUT_TYPE sm4_resource_type(const struct hlsl_type *type) ++{ ++ switch (type->class) ++ { ++ case HLSL_CLASS_SAMPLER: ++ return D3D_SIT_SAMPLER; ++ case HLSL_CLASS_TEXTURE: ++ return D3D_SIT_TEXTURE; ++ case HLSL_CLASS_UAV: ++ return D3D_SIT_UAV_RWTYPED; ++ default: ++ break; ++ } ++ ++ vkd3d_unreachable(); ++} ++ ++static D3D_SHADER_VARIABLE_CLASS sm4_class(const struct hlsl_type *type) ++{ ++ switch (type->class) ++ { ++ case HLSL_CLASS_MATRIX: ++ VKD3D_ASSERT(type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK); ++ if (type->modifiers & HLSL_MODIFIER_COLUMN_MAJOR) ++ return D3D_SVC_MATRIX_COLUMNS; ++ else ++ return D3D_SVC_MATRIX_ROWS; ++ case HLSL_CLASS_SCALAR: ++ return D3D_SVC_SCALAR; ++ case HLSL_CLASS_VECTOR: ++ return D3D_SVC_VECTOR; ++ ++ case HLSL_CLASS_ARRAY: ++ case HLSL_CLASS_DEPTH_STENCIL_STATE: ++ case HLSL_CLASS_DEPTH_STENCIL_VIEW: ++ case HLSL_CLASS_EFFECT_GROUP: ++ case HLSL_CLASS_ERROR: ++ case HLSL_CLASS_STRUCT: ++ case HLSL_CLASS_PASS: ++ case HLSL_CLASS_PIXEL_SHADER: ++ case HLSL_CLASS_RASTERIZER_STATE: ++ case HLSL_CLASS_RENDER_TARGET_VIEW: ++ case HLSL_CLASS_SAMPLER: ++ case HLSL_CLASS_STRING: ++ case HLSL_CLASS_TECHNIQUE: ++ case HLSL_CLASS_TEXTURE: ++ case HLSL_CLASS_UAV: ++ case HLSL_CLASS_VERTEX_SHADER: ++ case HLSL_CLASS_VOID: ++ case HLSL_CLASS_CONSTANT_BUFFER: ++ case HLSL_CLASS_COMPUTE_SHADER: ++ case HLSL_CLASS_DOMAIN_SHADER: ++ case HLSL_CLASS_HULL_SHADER: ++ case HLSL_CLASS_GEOMETRY_SHADER: ++ case HLSL_CLASS_BLEND_STATE: ++ case HLSL_CLASS_STREAM_OUTPUT: ++ case HLSL_CLASS_NULL: ++ break; ++ } ++ ++ vkd3d_unreachable(); ++} ++ ++static D3D_SHADER_VARIABLE_TYPE sm4_base_type(const struct hlsl_type *type) ++{ ++ switch (type->e.numeric.type) ++ { ++ case HLSL_TYPE_BOOL: ++ return D3D_SVT_BOOL; ++ case HLSL_TYPE_DOUBLE: ++ return D3D_SVT_DOUBLE; ++ case HLSL_TYPE_FLOAT: ++ case HLSL_TYPE_HALF: ++ return D3D_SVT_FLOAT; ++ case HLSL_TYPE_INT: ++ return D3D_SVT_INT; ++ case HLSL_TYPE_UINT: ++ return D3D_SVT_UINT; ++ } ++ ++ vkd3d_unreachable(); ++} ++ ++static void write_sm4_type(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, struct hlsl_type *type) ++{ ++ const struct hlsl_type *array_type = hlsl_get_multiarray_element_type(type); ++ const char *name = array_type->name ? array_type->name : ""; ++ const struct hlsl_profile_info *profile = ctx->profile; ++ unsigned int array_size = 0; ++ size_t name_offset = 0; ++ size_t i; ++ ++ if (type->bytecode_offset) ++ return; ++ ++ if (profile->major_version >= 5) ++ name_offset = put_string(buffer, name); ++ ++ if (type->class == HLSL_CLASS_ARRAY) ++ array_size = hlsl_get_multiarray_size(type); ++ ++ if (array_type->class == HLSL_CLASS_STRUCT) ++ { ++ unsigned int field_count = 0; ++ size_t fields_offset = 0; ++ ++ for (i = 0; i < array_type->e.record.field_count; ++i) ++ { ++ struct hlsl_struct_field *field = &array_type->e.record.fields[i]; ++ ++ if (!field->type->reg_size[HLSL_REGSET_NUMERIC]) ++ continue; ++ ++ field->name_bytecode_offset = put_string(buffer, field->name); ++ write_sm4_type(ctx, buffer, field->type); ++ ++field_count; ++ } ++ ++ fields_offset = bytecode_align(buffer); ++ ++ for (i = 0; i < array_type->e.record.field_count; ++i) ++ { ++ struct hlsl_struct_field *field = &array_type->e.record.fields[i]; ++ ++ if (!field->type->reg_size[HLSL_REGSET_NUMERIC]) ++ continue; ++ ++ put_u32(buffer, field->name_bytecode_offset); ++ put_u32(buffer, field->type->bytecode_offset); ++ put_u32(buffer, field->reg_offset[HLSL_REGSET_NUMERIC] * sizeof(float)); ++ } ++ type->bytecode_offset = put_u32(buffer, vkd3d_make_u32(D3D_SVC_STRUCT, D3D_SVT_VOID)); ++ put_u32(buffer, vkd3d_make_u32(1, hlsl_type_component_count(array_type))); ++ put_u32(buffer, vkd3d_make_u32(array_size, field_count)); ++ put_u32(buffer, fields_offset); ++ } ++ else ++ { ++ VKD3D_ASSERT(array_type->class <= HLSL_CLASS_LAST_NUMERIC); ++ type->bytecode_offset = put_u32(buffer, vkd3d_make_u32(sm4_class(array_type), sm4_base_type(array_type))); ++ put_u32(buffer, vkd3d_make_u32(array_type->e.numeric.dimy, array_type->e.numeric.dimx)); ++ put_u32(buffer, vkd3d_make_u32(array_size, 0)); ++ put_u32(buffer, 1); ++ } ++ ++ if (profile->major_version >= 5) ++ { ++ put_u32(buffer, 0); /* FIXME: unknown */ ++ put_u32(buffer, 0); /* FIXME: unknown */ ++ put_u32(buffer, 0); /* FIXME: unknown */ ++ put_u32(buffer, 0); /* FIXME: unknown */ ++ put_u32(buffer, name_offset); ++ } ++} ++ ++static void sm4_generate_rdef(struct hlsl_ctx *ctx, struct vkd3d_shader_code *rdef) ++{ ++ uint32_t binding_desc_size = (hlsl_version_ge(ctx, 5, 1) ? 10 : 8) * sizeof(uint32_t); ++ size_t cbuffers_offset, resources_offset, creator_offset, string_offset; ++ unsigned int cbuffer_count = 0, extern_resources_count, i, j; ++ size_t cbuffer_position, resource_position, creator_position; ++ const struct hlsl_profile_info *profile = ctx->profile; ++ struct vkd3d_bytecode_buffer buffer = {0}; ++ struct extern_resource *extern_resources; ++ const struct hlsl_buffer *cbuffer; ++ const struct hlsl_ir_var *var; ++ ++ static const uint16_t target_types[] = ++ { ++ 0xffff, /* PIXEL */ ++ 0xfffe, /* VERTEX */ ++ 0x4753, /* GEOMETRY */ ++ 0x4853, /* HULL */ ++ 0x4453, /* DOMAIN */ ++ 0x4353, /* COMPUTE */ ++ }; ++ ++ extern_resources = sm4_get_extern_resources(ctx, &extern_resources_count); ++ ++ LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) ++ { ++ if (cbuffer->reg.allocated) ++ ++cbuffer_count; ++ } ++ ++ put_u32(&buffer, cbuffer_count); ++ cbuffer_position = put_u32(&buffer, 0); ++ put_u32(&buffer, extern_resources_count); ++ resource_position = put_u32(&buffer, 0); ++ put_u32(&buffer, vkd3d_make_u32(vkd3d_make_u16(profile->minor_version, profile->major_version), ++ target_types[profile->type])); ++ put_u32(&buffer, 0); /* FIXME: compilation flags */ ++ creator_position = put_u32(&buffer, 0); ++ ++ if (profile->major_version >= 5) ++ { ++ put_u32(&buffer, hlsl_version_ge(ctx, 5, 1) ? TAG_RD11_REVERSE : TAG_RD11); ++ put_u32(&buffer, 15 * sizeof(uint32_t)); /* size of RDEF header including this header */ ++ put_u32(&buffer, 6 * sizeof(uint32_t)); /* size of buffer desc */ ++ put_u32(&buffer, binding_desc_size); /* size of binding desc */ ++ put_u32(&buffer, 10 * sizeof(uint32_t)); /* size of variable desc */ ++ put_u32(&buffer, 9 * sizeof(uint32_t)); /* size of type desc */ ++ put_u32(&buffer, 3 * sizeof(uint32_t)); /* size of member desc */ ++ put_u32(&buffer, 0); /* unknown; possibly a null terminator */ ++ } ++ ++ /* Bound resources. */ ++ ++ resources_offset = bytecode_align(&buffer); ++ set_u32(&buffer, resource_position, resources_offset); ++ ++ for (i = 0; i < extern_resources_count; ++i) ++ { ++ const struct extern_resource *resource = &extern_resources[i]; ++ uint32_t flags = 0; ++ ++ if (resource->is_user_packed) ++ flags |= D3D_SIF_USERPACKED; ++ ++ put_u32(&buffer, 0); /* name */ ++ if (resource->buffer) ++ put_u32(&buffer, resource->buffer->type == HLSL_BUFFER_CONSTANT ? D3D_SIT_CBUFFER : D3D_SIT_TBUFFER); ++ else ++ put_u32(&buffer, sm4_resource_type(resource->component_type)); ++ if (resource->regset == HLSL_REGSET_TEXTURES || resource->regset == HLSL_REGSET_UAVS) ++ { ++ unsigned int dimx = resource->component_type->e.resource.format->e.numeric.dimx; ++ ++ put_u32(&buffer, sm4_data_type(resource->component_type)); ++ put_u32(&buffer, sm4_rdef_resource_dimension(resource->component_type)); ++ put_u32(&buffer, ~0u); /* FIXME: multisample count */ ++ flags |= (dimx - 1) << VKD3D_SM4_SIF_TEXTURE_COMPONENTS_SHIFT; ++ } ++ else ++ { ++ put_u32(&buffer, 0); ++ put_u32(&buffer, 0); ++ put_u32(&buffer, 0); ++ } ++ put_u32(&buffer, resource->index); ++ put_u32(&buffer, resource->bind_count); ++ put_u32(&buffer, flags); ++ ++ if (hlsl_version_ge(ctx, 5, 1)) ++ { ++ put_u32(&buffer, resource->space); ++ put_u32(&buffer, resource->id); ++ } ++ } ++ ++ for (i = 0; i < extern_resources_count; ++i) ++ { ++ const struct extern_resource *resource = &extern_resources[i]; ++ ++ string_offset = put_string(&buffer, resource->name); ++ set_u32(&buffer, resources_offset + i * binding_desc_size, string_offset); ++ } ++ ++ /* Buffers. */ ++ ++ cbuffers_offset = bytecode_align(&buffer); ++ set_u32(&buffer, cbuffer_position, cbuffers_offset); ++ LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) ++ { ++ unsigned int var_count = 0; ++ ++ if (!cbuffer->reg.allocated) ++ continue; + +- hlsl_block_init(&block); ++ LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) ++ { ++ if (var->is_uniform && var->buffer == cbuffer && var->data_type->reg_size[HLSL_REGSET_NUMERIC]) ++ ++var_count; ++ } + +- LIST_FOR_EACH_ENTRY(var, &func->extern_vars, struct hlsl_ir_var, extern_entry) +- { +- if ((var->is_input_semantic && var->last_read) +- || (var->is_output_semantic && var->first_write)) +- sm4_generate_vsir_instr_dcl_semantic(ctx, program, var, is_patch_constant_func, &block, &var->loc); ++ put_u32(&buffer, 0); /* name */ ++ put_u32(&buffer, var_count); ++ put_u32(&buffer, 0); /* variable offset */ ++ put_u32(&buffer, align(cbuffer->size, 4) * sizeof(float)); ++ put_u32(&buffer, 0); /* FIXME: flags */ ++ put_u32(&buffer, cbuffer->type == HLSL_BUFFER_CONSTANT ? D3D_CT_CBUFFER : D3D_CT_TBUFFER); + } + +- if (temp_count) +- sm4_generate_vsir_instr_dcl_temps(ctx, program, temp_count, &block, &func->loc); ++ i = 0; ++ LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) ++ { ++ if (!cbuffer->reg.allocated) ++ continue; + +- LIST_FOR_EACH_ENTRY(scope, &ctx->scopes, struct hlsl_scope, entry) ++ string_offset = put_string(&buffer, cbuffer->name); ++ set_u32(&buffer, cbuffers_offset + i++ * 6 * sizeof(uint32_t), string_offset); ++ } ++ ++ i = 0; ++ LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) + { +- LIST_FOR_EACH_ENTRY(var, &scope->vars, struct hlsl_ir_var, scope_entry) ++ size_t vars_start = bytecode_align(&buffer); ++ ++ if (!cbuffer->reg.allocated) ++ continue; ++ ++ set_u32(&buffer, cbuffers_offset + (i++ * 6 + 2) * sizeof(uint32_t), vars_start); ++ ++ LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { +- if (var->is_uniform || var->is_input_semantic || var->is_output_semantic) ++ uint32_t flags = 0; ++ ++ if (!var->is_uniform || var->buffer != cbuffer || !var->data_type->reg_size[HLSL_REGSET_NUMERIC]) + continue; +- if (!var->regs[HLSL_REGSET_NUMERIC].allocated) ++ ++ if (var->is_read) ++ flags |= D3D_SVF_USED; ++ ++ put_u32(&buffer, 0); /* name */ ++ put_u32(&buffer, var->buffer_offset * sizeof(float)); ++ put_u32(&buffer, var->data_type->reg_size[HLSL_REGSET_NUMERIC] * sizeof(float)); ++ put_u32(&buffer, flags); ++ put_u32(&buffer, 0); /* type */ ++ put_u32(&buffer, 0); /* default value */ ++ ++ if (profile->major_version >= 5) ++ { ++ put_u32(&buffer, 0); /* texture start */ ++ put_u32(&buffer, 0); /* texture count */ ++ put_u32(&buffer, 0); /* sampler start */ ++ put_u32(&buffer, 0); /* sampler count */ ++ } ++ } ++ ++ j = 0; ++ LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) ++ { ++ const unsigned int var_size = (profile->major_version >= 5 ? 10 : 6); ++ size_t var_offset = vars_start + j * var_size * sizeof(uint32_t); ++ ++ if (!var->is_uniform || var->buffer != cbuffer || !var->data_type->reg_size[HLSL_REGSET_NUMERIC]) + continue; + +- if (var->indexable) ++ string_offset = put_string(&buffer, var->name); ++ set_u32(&buffer, var_offset, string_offset); ++ write_sm4_type(ctx, &buffer, var->data_type); ++ set_u32(&buffer, var_offset + 4 * sizeof(uint32_t), var->data_type->bytecode_offset); ++ ++ if (var->default_values) + { +- unsigned int id = var->regs[HLSL_REGSET_NUMERIC].id; +- unsigned int size = align(var->data_type->reg_size[HLSL_REGSET_NUMERIC], 4) / 4; ++ unsigned int reg_size = var->data_type->reg_size[HLSL_REGSET_NUMERIC]; ++ unsigned int comp_count = hlsl_type_component_count(var->data_type); ++ unsigned int default_value_offset; ++ unsigned int k; + +- sm4_generate_vsir_instr_dcl_indexable_temp(ctx, program, &block, id, size, 4, &var->loc); ++ default_value_offset = bytecode_reserve_bytes(&buffer, reg_size * sizeof(uint32_t)); ++ set_u32(&buffer, var_offset + 5 * sizeof(uint32_t), default_value_offset); ++ ++ for (k = 0; k < comp_count; ++k) ++ { ++ struct hlsl_type *comp_type = hlsl_type_get_component_type(ctx, var->data_type, k); ++ unsigned int comp_offset, comp_index; ++ enum hlsl_regset regset; ++ ++ if (comp_type->class == HLSL_CLASS_STRING) ++ { ++ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, ++ "Cannot write string default value."); ++ continue; ++ } ++ ++ comp_index = get_component_index_from_default_initializer_index(var->data_type, k); ++ comp_offset = hlsl_type_get_component_offset(ctx, var->data_type, comp_index, ®set); ++ if (regset == HLSL_REGSET_NUMERIC) ++ { ++ if (comp_type->e.numeric.type == HLSL_TYPE_DOUBLE) ++ hlsl_fixme(ctx, &var->loc, "Write double default values."); ++ ++ set_u32(&buffer, default_value_offset + comp_offset * sizeof(uint32_t), ++ var->default_values[k].number.u); ++ } ++ } + } ++ ++ ++j; + } + } + +- list_move_head(&func->body.instrs, &block.instrs); ++ creator_offset = put_string(&buffer, vkd3d_shader_get_version(NULL, NULL)); ++ set_u32(&buffer, creator_position, creator_offset); + +- hlsl_block_cleanup(&block); ++ sm4_free_extern_resources(extern_resources, extern_resources_count); + +- sm4_generate_vsir_block(ctx, &func->body, program); ++ if (buffer.status) ++ { ++ vkd3d_free(buffer.data); ++ ctx->result = buffer.status; ++ return; ++ } ++ rdef->code = buffer.data; ++ rdef->size = buffer.size; } --static struct hlsl_ir_jump *loop_unrolling_find_jump(struct hlsl_block *block, struct hlsl_ir_node *stop_point, -- struct hlsl_block **found_block) +-/* OBJECTIVE: Translate all the information from ctx and entry_func to the +- * vsir_program, so it can be used as input to tpf_compile() without relying +- * on ctx and entry_func. */ +-static void sm4_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func, +- uint64_t config_flags, struct vsir_program *program) +static bool loop_unrolling_generate_const_bool_store(struct hlsl_ctx *ctx, struct hlsl_ir_var *var, + bool val, struct hlsl_block *block, struct vkd3d_shader_location *loc) { -- struct hlsl_ir_node *node; +- struct vkd3d_shader_version version = {0}; + struct hlsl_ir_node *const_node, *store; -- LIST_FOR_EACH_ENTRY(node, &block->instrs, struct hlsl_ir_node, entry) +- version.major = ctx->profile->major_version; +- version.minor = ctx->profile->minor_version; +- version.type = ctx->profile->type; + if (!(const_node = hlsl_new_bool_constant(ctx, val, loc))) + return false; + hlsl_block_add_instr(block, const_node); @@ -6531,45 +9972,36 @@ index d11ff481f6b..a43ea53089e 100644 + struct hlsl_ir_var *var; + struct hlsl_block draft; + struct hlsl_ir_if *iff; -+ + +- if (!vsir_program_init(program, NULL, &version, 0, VSIR_CF_STRUCTURED, VSIR_NOT_NORMALISED)) + if (node->type == HLSL_IR_IF) { -- if (node == stop_point) -- return NULL; +- ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; +- return; + iff = hlsl_ir_if(node); + if (loop_unrolling_remove_jumps_recurse(ctx, &iff->then_block, loop_broken, loop_continued)) + return true; + if (loop_unrolling_remove_jumps_recurse(ctx, &iff->else_block, loop_broken, loop_continued)) + return true; + return false; -+ } + } -- if (node->type == HLSL_IR_IF) -- { -- struct hlsl_ir_if *iff = hlsl_ir_if(node); -- struct hlsl_ir_jump *jump = NULL; +- generate_vsir_signature(ctx, program, func); +- if (version.type == VKD3D_SHADER_TYPE_HULL) +- generate_vsir_signature(ctx, program, ctx->patch_constant_func); +- +- if (version.type == VKD3D_SHADER_TYPE_COMPUTE) + if (node->type == HLSL_IR_JUMP) -+ { + { +- program->thread_group_size.x = ctx->thread_count[0]; +- program->thread_group_size.y = ctx->thread_count[1]; +- program->thread_group_size.z = ctx->thread_count[2]; + jump = hlsl_ir_jump(node); + if (jump->type != HLSL_IR_JUMP_UNRESOLVED_CONTINUE && jump->type != HLSL_IR_JUMP_BREAK) + return false; - -- if ((jump = loop_unrolling_find_jump(&iff->then_block, stop_point, found_block))) -- return jump; -- if ((jump = loop_unrolling_find_jump(&iff->else_block, stop_point, found_block))) -- return jump; -- } -- else if (node->type == HLSL_IR_JUMP) -- { -- struct hlsl_ir_jump *jump = hlsl_ir_jump(node); ++ + hlsl_block_init(&draft); - -- if (jump->type == HLSL_IR_JUMP_BREAK || jump->type == HLSL_IR_JUMP_CONTINUE) -- { -- *found_block = block; -- return jump; -- } -- } ++ + if (jump->type == HLSL_IR_JUMP_UNRESOLVED_CONTINUE) + var = loop_continued; + else @@ -6585,17 +10017,23 @@ index d11ff481f6b..a43ea53089e 100644 + return true; } -- return NULL; +- sm4_generate_vsir_add_function(ctx, func, config_flags, program); +- if (version.type == VKD3D_SHADER_TYPE_HULL) +- sm4_generate_vsir_add_function(ctx, ctx->patch_constant_func, config_flags, program); + return false; -+} -+ + } + +-static struct hlsl_ir_jump *loop_unrolling_find_jump(struct hlsl_block *block, struct hlsl_ir_node *stop_point, +- struct hlsl_block **found_block) +static struct hlsl_ir_if *loop_unrolling_generate_var_check(struct hlsl_ctx *ctx, + struct hlsl_block *dst, struct hlsl_ir_var *var, struct vkd3d_shader_location *loc) -+{ + { +- struct hlsl_ir_node *node; + struct hlsl_ir_node *cond, *iff; + struct hlsl_block then_block; + struct hlsl_ir_load *load; -+ + +- LIST_FOR_EACH_ENTRY(node, &block->instrs, struct hlsl_ir_node, entry) + hlsl_block_init(&then_block); + + if (!(load = hlsl_new_var_load(ctx, var, loc))) @@ -6619,16 +10057,36 @@ index d11ff481f6b..a43ea53089e 100644 + struct hlsl_ir_node *node, *next; + + LIST_FOR_EACH_ENTRY_SAFE(node, next, &block->instrs, struct hlsl_ir_node, entry) -+ { + { +- if (node == stop_point) +- return NULL; + struct hlsl_ir_if *broken_check, *continued_check; + struct hlsl_block draft; -+ + +- if (node->type == HLSL_IR_IF) +- { +- struct hlsl_ir_if *iff = hlsl_ir_if(node); +- struct hlsl_ir_jump *jump = NULL; + if (!loop_unrolling_remove_jumps_visit(ctx, node, loop_broken, loop_continued)) + continue; -+ + +- if ((jump = loop_unrolling_find_jump(&iff->then_block, stop_point, found_block))) +- return jump; +- if ((jump = loop_unrolling_find_jump(&iff->else_block, stop_point, found_block))) +- return jump; +- } +- else if (node->type == HLSL_IR_JUMP) +- { +- struct hlsl_ir_jump *jump = hlsl_ir_jump(node); + if (&next->entry == &block->instrs) + return true; -+ + +- if (jump->type == HLSL_IR_JUMP_BREAK || jump->type == HLSL_IR_JUMP_CONTINUE) +- { +- *found_block = block; +- return jump; +- } +- } + hlsl_block_init(&draft); + + broken_check = loop_unrolling_generate_var_check(ctx, &draft, loop_broken, &next->loc); @@ -6640,8 +10098,9 @@ index d11ff481f6b..a43ea53089e 100644 + list_move_slice_tail(&continued_check->then_block.instrs, &next->entry, list_tail(&block->instrs)); + + return true; -+ } -+ + } + +- return NULL; + return false; +} + @@ -6652,7 +10111,7 @@ index d11ff481f6b..a43ea53089e 100644 } static unsigned int loop_unrolling_get_max_iterations(struct hlsl_ctx *ctx, struct hlsl_ir_loop *loop) -@@ -8696,7 +10516,7 @@ static unsigned int loop_unrolling_get_max_iterations(struct hlsl_ctx *ctx, stru +@@ -8696,7 +11290,7 @@ static unsigned int loop_unrolling_get_max_iterations(struct hlsl_ctx *ctx, stru return loop->unroll_limit; /* All SMs will default to 1024 if [unroll] has been specified without an explicit limit. */ @@ -6661,7 +10120,7 @@ index d11ff481f6b..a43ea53089e 100644 return 1024; /* SM4 limits implicit unrolling to 254 iterations. */ -@@ -8707,167 +10527,279 @@ static unsigned int loop_unrolling_get_max_iterations(struct hlsl_ctx *ctx, stru +@@ -8707,167 +11301,279 @@ static unsigned int loop_unrolling_get_max_iterations(struct hlsl_ctx *ctx, stru return 1024; } @@ -7042,8 +10501,19 @@ index d11ff481f6b..a43ea53089e 100644 } static bool lower_f16tof32(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, struct hlsl_block *block) -@@ -9116,7 +11048,7 @@ static void process_entry_function(struct hlsl_ctx *ctx, +@@ -9107,16 +11813,17 @@ static void process_entry_function(struct hlsl_ctx *ctx, + append_output_var_copy(ctx, entry_func, entry_func->return_var); + } + +- if (profile->major_version >= 4) ++ if (hlsl_version_ge(ctx, 4, 0)) + { + hlsl_transform_ir(ctx, lower_discard_neg, body, NULL); + } + else + { hlsl_transform_ir(ctx, lower_discard_nz, body, NULL); ++ hlsl_transform_ir(ctx, lower_resource_load_bias, body, NULL); } - transform_unroll_loops(ctx, body); @@ -7051,7 +10521,7 @@ index d11ff481f6b..a43ea53089e 100644 hlsl_run_const_passes(ctx, body); remove_unreachable_code(ctx, body); -@@ -9126,9 +11058,13 @@ static void process_entry_function(struct hlsl_ctx *ctx, +@@ -9126,9 +11833,13 @@ static void process_entry_function(struct hlsl_ctx *ctx, lower_ir(ctx, lower_casts_to_bool, body); lower_ir(ctx, lower_int_dot, body); @@ -7066,7 +10536,7 @@ index d11ff481f6b..a43ea53089e 100644 hlsl_transform_ir(ctx, lower_combined_samples, body, NULL); do -@@ -9136,7 +11072,10 @@ static void process_entry_function(struct hlsl_ctx *ctx, +@@ -9136,7 +11847,10 @@ static void process_entry_function(struct hlsl_ctx *ctx, while (hlsl_transform_ir(ctx, dce, body, NULL)); hlsl_transform_ir(ctx, track_components_usage, body, NULL); @@ -7078,7 +10548,7 @@ index d11ff481f6b..a43ea53089e 100644 if (profile->major_version < 4) { -@@ -9241,14 +11180,16 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry +@@ -9241,14 +11955,16 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry mark_indexable_vars(ctx, entry_func); allocate_temp_registers(ctx, entry_func); allocate_const_registers(ctx, entry_func); @@ -7096,7 +10566,7 @@ index d11ff481f6b..a43ea53089e 100644 if (TRACE_ON()) rb_for_each_entry(&ctx->functions, dump_function, ctx); -@@ -9265,7 +11206,11 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry +@@ -9265,7 +11981,11 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry struct vsir_program program; int result; @@ -7109,7 +10579,7 @@ index d11ff481f6b..a43ea53089e 100644 if (ctx->result) { vsir_program_cleanup(&program); -@@ -9282,18 +11227,25 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry +@@ -9282,18 +12002,25 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry case VKD3D_SHADER_TARGET_DXBC_TPF: { uint32_t config_flags = vkd3d_shader_init_config_flags(); @@ -7137,21 +10607,436 @@ index d11ff481f6b..a43ea53089e 100644 } diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c -index 716adb15f08..cd7cd2fe6a3 100644 +index 716adb15f08..e8dd4d62ae2 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c -@@ -220,7 +220,9 @@ static bool fold_cast(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, +@@ -30,7 +30,7 @@ static bool fold_abs(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, + + VKD3D_ASSERT(type == src->node.data_type->e.numeric.type); + +- for (k = 0; k < dst_type->dimx; ++k) ++ for (k = 0; k < dst_type->e.numeric.dimx; ++k) + { + switch (type) + { +@@ -121,7 +121,7 @@ static bool fold_bit_not(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, + + VKD3D_ASSERT(type == src->node.data_type->e.numeric.type); + +- for (k = 0; k < dst_type->dimx; ++k) ++ for (k = 0; k < dst_type->e.numeric.dimx; ++k) + { + switch (type) + { +@@ -143,20 +143,20 @@ static bool fold_cast(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, + const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src) + { + unsigned int k; +- uint32_t u; +- int32_t i; +- double d; +- float f; ++ uint32_t u = 0; ++ double d = 0.0; ++ float f = 0.0f; ++ int32_t i = 0; + +- if (dst_type->dimx != src->node.data_type->dimx +- || dst_type->dimy != src->node.data_type->dimy) ++ if (dst_type->e.numeric.dimx != src->node.data_type->e.numeric.dimx ++ || dst_type->e.numeric.dimy != src->node.data_type->e.numeric.dimy) + { + FIXME("Cast from %s to %s.\n", debug_hlsl_type(ctx, src->node.data_type), + debug_hlsl_type(ctx, dst_type)); + return false; + } + +- for (k = 0; k < dst_type->dimx; ++k) ++ for (k = 0; k < dst_type->e.numeric.dimx; ++k) + { + switch (src->node.data_type->e.numeric.type) + { +@@ -195,9 +195,6 @@ static bool fold_cast(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, + f = !!src->value.u[k].u; + d = !!src->value.u[k].u; + break; +- +- default: +- vkd3d_unreachable(); + } + + switch (dst_type->e.numeric.type) +@@ -220,9 +217,8 @@ static bool fold_cast(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, break; case HLSL_TYPE_BOOL: - /* Casts to bool should have already been lowered. */ +- default: +- vkd3d_unreachable(); + dst->u[k].u = u ? ~0u : 0u; + break; -+ - default: - vkd3d_unreachable(); } -@@ -1544,6 +1546,149 @@ bool hlsl_fold_constant_identities(struct hlsl_ctx *ctx, struct hlsl_ir_node *in + } + return true; +@@ -236,7 +232,7 @@ static bool fold_ceil(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, + + VKD3D_ASSERT(type == src->node.data_type->e.numeric.type); + +- for (k = 0; k < dst_type->dimx; ++k) ++ for (k = 0; k < dst_type->e.numeric.dimx; ++k) + { + switch (type) + { +@@ -262,7 +258,7 @@ static bool fold_exp2(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, + + VKD3D_ASSERT(type == src->node.data_type->e.numeric.type); + +- for (k = 0; k < dst_type->dimx; ++k) ++ for (k = 0; k < dst_type->e.numeric.dimx; ++k) + { + switch (type) + { +@@ -288,7 +284,7 @@ static bool fold_floor(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, + + VKD3D_ASSERT(type == src->node.data_type->e.numeric.type); + +- for (k = 0; k < dst_type->dimx; ++k) ++ for (k = 0; k < dst_type->e.numeric.dimx; ++k) + { + switch (type) + { +@@ -315,7 +311,7 @@ static bool fold_fract(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, + + VKD3D_ASSERT(type == src->node.data_type->e.numeric.type); + +- for (k = 0; k < dst_type->dimx; ++k) ++ for (k = 0; k < dst_type->e.numeric.dimx; ++k) + { + switch (type) + { +@@ -341,7 +337,7 @@ static bool fold_log2(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, con + + VKD3D_ASSERT(type == src->node.data_type->e.numeric.type); + +- for (k = 0; k < dst_type->dimx; ++k) ++ for (k = 0; k < dst_type->e.numeric.dimx; ++k) + { + switch (type) + { +@@ -386,7 +382,7 @@ static bool fold_neg(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, + + VKD3D_ASSERT(type == src->node.data_type->e.numeric.type); + +- for (k = 0; k < dst_type->dimx; ++k) ++ for (k = 0; k < dst_type->e.numeric.dimx; ++k) + { + switch (type) + { +@@ -420,7 +416,7 @@ static bool fold_not(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, + + VKD3D_ASSERT(type == src->node.data_type->e.numeric.type); + +- for (k = 0; k < dst_type->dimx; ++k) ++ for (k = 0; k < dst_type->e.numeric.dimx; ++k) + { + switch (type) + { +@@ -444,7 +440,7 @@ static bool fold_rcp(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons + + VKD3D_ASSERT(type == src->node.data_type->e.numeric.type); + +- for (k = 0; k < dst_type->dimx; ++k) ++ for (k = 0; k < dst_type->e.numeric.dimx; ++k) + { + switch (type) + { +@@ -489,7 +485,7 @@ static bool fold_rsq(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons + + VKD3D_ASSERT(type == src->node.data_type->e.numeric.type); + +- for (k = 0; k < dst_type->dimx; ++k) ++ for (k = 0; k < dst_type->e.numeric.dimx; ++k) + { + switch (type) + { +@@ -529,7 +525,7 @@ static bool fold_sat(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons + + VKD3D_ASSERT(type == src->node.data_type->e.numeric.type); + +- for (k = 0; k < dst_type->dimx; ++k) ++ for (k = 0; k < dst_type->e.numeric.dimx; ++k) + { + switch (type) + { +@@ -555,7 +551,7 @@ static bool fold_sqrt(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, con + + VKD3D_ASSERT(type == src->node.data_type->e.numeric.type); + +- for (k = 0; k < dst_type->dimx; ++k) ++ for (k = 0; k < dst_type->e.numeric.dimx; ++k) + { + switch (type) + { +@@ -601,7 +597,7 @@ static bool fold_add(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons + VKD3D_ASSERT(type == src1->node.data_type->e.numeric.type); + VKD3D_ASSERT(type == src2->node.data_type->e.numeric.type); + +- for (k = 0; k < dst_type->dimx; ++k) ++ for (k = 0; k < dst_type->e.numeric.dimx; ++k) + { + switch (type) + { +@@ -638,7 +634,7 @@ static bool fold_and(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons + VKD3D_ASSERT(type == src1->node.data_type->e.numeric.type); + VKD3D_ASSERT(type == src2->node.data_type->e.numeric.type); + +- for (k = 0; k < dst_type->dimx; ++k) ++ for (k = 0; k < dst_type->e.numeric.dimx; ++k) + { + switch (type) + { +@@ -665,7 +661,7 @@ static bool fold_or(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const + VKD3D_ASSERT(type == src1->node.data_type->e.numeric.type); + VKD3D_ASSERT(type == src2->node.data_type->e.numeric.type); + +- for (k = 0; k < dst_type->dimx; ++k) ++ for (k = 0; k < dst_type->e.numeric.dimx; ++k) + { + switch (type) + { +@@ -692,7 +688,7 @@ static bool fold_bit_xor(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, + VKD3D_ASSERT(type == src1->node.data_type->e.numeric.type); + VKD3D_ASSERT(type == src2->node.data_type->e.numeric.type); + +- for (k = 0; k < dst_type->dimx; ++k) ++ for (k = 0; k < dst_type->e.numeric.dimx; ++k) + { + switch (type) + { +@@ -717,10 +713,10 @@ static bool fold_dot(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons + + VKD3D_ASSERT(type == src1->node.data_type->e.numeric.type); + VKD3D_ASSERT(type == src2->node.data_type->e.numeric.type); +- VKD3D_ASSERT(src1->node.data_type->dimx == src2->node.data_type->dimx); ++ VKD3D_ASSERT(src1->node.data_type->e.numeric.dimx == src2->node.data_type->e.numeric.dimx); + + dst->u[0].f = 0.0f; +- for (k = 0; k < src1->node.data_type->dimx; ++k) ++ for (k = 0; k < src1->node.data_type->e.numeric.dimx; ++k) + { + switch (type) + { +@@ -746,11 +742,11 @@ static bool fold_dp2add(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, c + VKD3D_ASSERT(type == src1->node.data_type->e.numeric.type); + VKD3D_ASSERT(type == src2->node.data_type->e.numeric.type); + VKD3D_ASSERT(type == src3->node.data_type->e.numeric.type); +- VKD3D_ASSERT(src1->node.data_type->dimx == src2->node.data_type->dimx); +- VKD3D_ASSERT(src3->node.data_type->dimx == 1); ++ VKD3D_ASSERT(src1->node.data_type->e.numeric.dimx == src2->node.data_type->e.numeric.dimx); ++ VKD3D_ASSERT(src3->node.data_type->e.numeric.dimx == 1); + + dst->u[0].f = src3->value.u[0].f; +- for (k = 0; k < src1->node.data_type->dimx; ++k) ++ for (k = 0; k < src1->node.data_type->e.numeric.dimx; ++k) + { + switch (type) + { +@@ -777,7 +773,7 @@ static bool fold_div(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons + VKD3D_ASSERT(type == src1->node.data_type->e.numeric.type); + VKD3D_ASSERT(type == src2->node.data_type->e.numeric.type); + +- for (k = 0; k < dst_type->dimx; ++k) ++ for (k = 0; k < dst_type->e.numeric.dimx; ++k) + { + switch (type) + { +@@ -844,7 +840,7 @@ static bool fold_equal(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, co + VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); + VKD3D_ASSERT(src1->node.data_type->e.numeric.type == src2->node.data_type->e.numeric.type); + +- for (k = 0; k < dst_type->dimx; ++k) ++ for (k = 0; k < dst_type->e.numeric.dimx; ++k) + { + switch (src1->node.data_type->e.numeric.type) + { +@@ -862,9 +858,6 @@ static bool fold_equal(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, co + case HLSL_TYPE_BOOL: + dst->u[k].u = src1->value.u[k].u == src2->value.u[k].u; + break; +- +- default: +- vkd3d_unreachable(); + } + + dst->u[k].u *= ~0u; +@@ -880,7 +873,7 @@ static bool fold_gequal(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, c + VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); + VKD3D_ASSERT(src1->node.data_type->e.numeric.type == src2->node.data_type->e.numeric.type); + +- for (k = 0; k < dst_type->dimx; ++k) ++ for (k = 0; k < dst_type->e.numeric.dimx; ++k) + { + switch (src1->node.data_type->e.numeric.type) + { +@@ -901,9 +894,6 @@ static bool fold_gequal(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, c + case HLSL_TYPE_BOOL: + dst->u[k].u = src1->value.u[k].u >= src2->value.u[k].u; + break; +- +- default: +- vkd3d_unreachable(); + } + + dst->u[k].u *= ~0u; +@@ -919,7 +909,7 @@ static bool fold_less(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, con + VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); + VKD3D_ASSERT(src1->node.data_type->e.numeric.type == src2->node.data_type->e.numeric.type); + +- for (k = 0; k < dst_type->dimx; ++k) ++ for (k = 0; k < dst_type->e.numeric.dimx; ++k) + { + switch (src1->node.data_type->e.numeric.type) + { +@@ -940,9 +930,6 @@ static bool fold_less(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, con + case HLSL_TYPE_BOOL: + dst->u[k].u = src1->value.u[k].u < src2->value.u[k].u; + break; +- +- default: +- vkd3d_unreachable(); + } + + dst->u[k].u *= ~0u; +@@ -958,16 +945,13 @@ static bool fold_lshift(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, c + VKD3D_ASSERT(dst_type->e.numeric.type == src1->node.data_type->e.numeric.type); + VKD3D_ASSERT(src2->node.data_type->e.numeric.type == HLSL_TYPE_INT); + +- for (k = 0; k < dst_type->dimx; ++k) ++ for (k = 0; k < dst_type->e.numeric.dimx; ++k) + { + unsigned int shift = src2->value.u[k].u % 32; + + switch (src1->node.data_type->e.numeric.type) + { + case HLSL_TYPE_INT: +- dst->u[k].i = src1->value.u[k].i << shift; +- break; +- + case HLSL_TYPE_UINT: + dst->u[k].u = src1->value.u[k].u << shift; + break; +@@ -989,7 +973,7 @@ static bool fold_max(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons + VKD3D_ASSERT(type == src1->node.data_type->e.numeric.type); + VKD3D_ASSERT(type == src2->node.data_type->e.numeric.type); + +- for (k = 0; k < dst_type->dimx; ++k) ++ for (k = 0; k < dst_type->e.numeric.dimx; ++k) + { + switch (type) + { +@@ -1027,7 +1011,7 @@ static bool fold_min(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons + VKD3D_ASSERT(type == src1->node.data_type->e.numeric.type); + VKD3D_ASSERT(type == src2->node.data_type->e.numeric.type); + +- for (k = 0; k < dst_type->dimx; ++k) ++ for (k = 0; k < dst_type->e.numeric.dimx; ++k) + { + switch (type) + { +@@ -1066,7 +1050,7 @@ static bool fold_mod(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons + VKD3D_ASSERT(type == src1->node.data_type->e.numeric.type); + VKD3D_ASSERT(type == src2->node.data_type->e.numeric.type); + +- for (k = 0; k < dst_type->dimx; ++k) ++ for (k = 0; k < dst_type->e.numeric.dimx; ++k) + { + switch (type) + { +@@ -1108,7 +1092,7 @@ static bool fold_mul(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons + VKD3D_ASSERT(type == src1->node.data_type->e.numeric.type); + VKD3D_ASSERT(type == src2->node.data_type->e.numeric.type); + +- for (k = 0; k < dst_type->dimx; ++k) ++ for (k = 0; k < dst_type->e.numeric.dimx; ++k) + { + switch (type) + { +@@ -1142,7 +1126,7 @@ static bool fold_nequal(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, c + VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); + VKD3D_ASSERT(src1->node.data_type->e.numeric.type == src2->node.data_type->e.numeric.type); + +- for (k = 0; k < dst_type->dimx; ++k) ++ for (k = 0; k < dst_type->e.numeric.dimx; ++k) + { + switch (src1->node.data_type->e.numeric.type) + { +@@ -1160,9 +1144,6 @@ static bool fold_nequal(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, c + case HLSL_TYPE_BOOL: + dst->u[k].u = src1->value.u[k].u != src2->value.u[k].u; + break; +- +- default: +- vkd3d_unreachable(); + } + + dst->u[k].u *= ~0u; +@@ -1179,7 +1160,7 @@ static bool fold_ternary(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, + VKD3D_ASSERT(dst_type->e.numeric.type == src3->node.data_type->e.numeric.type); + VKD3D_ASSERT(src1->node.data_type->e.numeric.type == HLSL_TYPE_BOOL); + +- for (k = 0; k < dst_type->dimx; ++k) ++ for (k = 0; k < dst_type->e.numeric.dimx; ++k) + dst->u[k] = src1->value.u[k].u ? src2->value.u[k] : src3->value.u[k]; + + return true; +@@ -1193,7 +1174,7 @@ static bool fold_rshift(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, c + VKD3D_ASSERT(dst_type->e.numeric.type == src1->node.data_type->e.numeric.type); + VKD3D_ASSERT(src2->node.data_type->e.numeric.type == HLSL_TYPE_INT); + +- for (k = 0; k < dst_type->dimx; ++k) ++ for (k = 0; k < dst_type->e.numeric.dimx; ++k) + { + unsigned int shift = src2->value.u[k].u % 32; + +@@ -1401,7 +1382,7 @@ static bool constant_is_zero(struct hlsl_ir_constant *const_arg) + struct hlsl_type *data_type = const_arg->node.data_type; + unsigned int k; + +- for (k = 0; k < data_type->dimx; ++k) ++ for (k = 0; k < data_type->e.numeric.dimx; ++k) + { + switch (data_type->e.numeric.type) + { +@@ -1422,9 +1403,6 @@ static bool constant_is_zero(struct hlsl_ir_constant *const_arg) + if (const_arg->value.u[k].u != 0) + return false; + break; +- +- default: +- return false; + } + } + return true; +@@ -1435,7 +1413,7 @@ static bool constant_is_one(struct hlsl_ir_constant *const_arg) + struct hlsl_type *data_type = const_arg->node.data_type; + unsigned int k; + +- for (k = 0; k < data_type->dimx; ++k) ++ for (k = 0; k < data_type->e.numeric.dimx; ++k) + { + switch (data_type->e.numeric.type) + { +@@ -1460,9 +1438,6 @@ static bool constant_is_one(struct hlsl_ir_constant *const_arg) + if (const_arg->value.u[k].u != ~0) + return false; + break; +- +- default: +- return false; + } + } + return true; +@@ -1544,6 +1519,250 @@ bool hlsl_fold_constant_identities(struct hlsl_ctx *ctx, struct hlsl_ir_node *in return false; } @@ -7198,9 +11083,71 @@ index 716adb15f08..cd7cd2fe6a3 100644 + } +} + ++/* Returns true iff x OPL (y OPR z) = (x OPL y) OPR (x OPL z). */ ++static bool is_op_left_distributive(enum hlsl_ir_expr_op opl, enum hlsl_ir_expr_op opr, enum hlsl_base_type type) ++{ ++ switch (opl) ++ { ++ case HLSL_OP2_BIT_AND: ++ return opr == HLSL_OP2_BIT_OR || opr == HLSL_OP2_BIT_XOR; ++ ++ case HLSL_OP2_BIT_OR: ++ return opr == HLSL_OP2_BIT_AND; ++ ++ case HLSL_OP2_DOT: ++ case HLSL_OP2_MUL: ++ return opr == HLSL_OP2_ADD && (type == HLSL_TYPE_INT || type == HLSL_TYPE_UINT); ++ ++ case HLSL_OP2_MAX: ++ return opr == HLSL_OP2_MIN; ++ ++ case HLSL_OP2_MIN: ++ return opr == HLSL_OP2_MAX; ++ ++ default: ++ return false; ++ } ++} ++ ++/* Attempt to collect together the expression (x OPL a) OPR (x OPL b) -> x OPL (a OPR b). */ ++static struct hlsl_ir_node *collect_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, ++ enum hlsl_ir_expr_op opr, struct hlsl_ir_node *node1, struct hlsl_ir_node *node2) ++{ ++ enum hlsl_base_type type = instr->data_type->e.numeric.type; ++ struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {0}; ++ struct hlsl_ir_node *ab, *res; ++ struct hlsl_ir_expr *e1, *e2; ++ enum hlsl_ir_expr_op opl; ++ ++ if (!node1 || !node2 || node1->type != HLSL_IR_EXPR || node2->type != HLSL_IR_EXPR) ++ return NULL; ++ e1 = hlsl_ir_expr(node1); ++ e2 = hlsl_ir_expr(node2); ++ opl = e1->op; ++ ++ if (e2->op != opl || !is_op_left_distributive(opl, opr, type)) ++ return NULL; ++ if (e1->operands[0].node != e2->operands[0].node) ++ return NULL; ++ if (e1->operands[1].node->type != HLSL_IR_CONSTANT || e2->operands[1].node->type != HLSL_IR_CONSTANT) ++ return NULL; ++ ++ if (!(ab = hlsl_new_binary_expr(ctx, opr, e1->operands[1].node, e2->operands[1].node))) ++ return NULL; ++ list_add_before(&instr->entry, &ab->entry); ++ ++ operands[0] = e1->operands[0].node; ++ operands[1] = ab; ++ ++ if (!(res = hlsl_new_expr(ctx, opl, operands, instr->data_type, &instr->loc))) ++ return NULL; ++ list_add_before(&instr->entry, &res->entry); ++ return res; ++} ++ +bool hlsl_normalize_binary_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +{ -+ struct hlsl_ir_node *arg1 , *arg2; ++ struct hlsl_ir_node *arg1, *arg2, *tmp; + struct hlsl_ir_expr *expr; + enum hlsl_base_type type; + enum hlsl_ir_expr_op op; @@ -7221,11 +11168,17 @@ index 716adb15f08..cd7cd2fe6a3 100644 + if (!arg1 || !arg2) + return false; + ++ if ((tmp = collect_exprs(ctx, instr, op, arg1, arg2))) ++ { ++ /* (x OPL a) OPR (x OPL b) -> x OPL (a OPR b) */ ++ hlsl_replace_node(instr, tmp); ++ return true; ++ } ++ + if (is_op_commutative(op) && arg1->type == HLSL_IR_CONSTANT && arg2->type != HLSL_IR_CONSTANT) + { + /* a OP x -> x OP a */ -+ struct hlsl_ir_node *tmp = arg1; -+ ++ tmp = arg1; + arg1 = arg2; + arg2 = tmp; + progress = true; @@ -7282,6 +11235,39 @@ index 716adb15f08..cd7cd2fe6a3 100644 + progress = true; + } + ++ if (!progress && e1 && (tmp = collect_exprs(ctx, instr, op, e1->operands[1].node, arg2))) ++ { ++ /* (y OPR (x OPL a)) OPR (x OPL b) -> y OPR (x OPL (a OPR b)) */ ++ arg1 = e1->operands[0].node; ++ arg2 = tmp; ++ progress = true; ++ } ++ ++ if (!progress && is_op_commutative(op) && e1 ++ && (tmp = collect_exprs(ctx, instr, op, e1->operands[0].node, arg2))) ++ { ++ /* ((x OPL a) OPR y) OPR (x OPL b) -> (x OPL (a OPR b)) OPR y */ ++ arg1 = tmp; ++ arg2 = e1->operands[1].node; ++ progress = true; ++ } ++ ++ if (!progress && e2 && (tmp = collect_exprs(ctx, instr, op, arg1, e2->operands[0].node))) ++ { ++ /* (x OPL a) OPR ((x OPL b) OPR y) -> (x OPL (a OPR b)) OPR y */ ++ arg1 = tmp; ++ arg2 = e2->operands[1].node; ++ progress = true; ++ } ++ ++ if (!progress && is_op_commutative(op) && e2 ++ && (tmp = collect_exprs(ctx, instr, op, arg1, e2->operands[1].node))) ++ { ++ /* (x OPL a) OPR (y OPR (x OPL b)) -> (x OPL (a OPR b)) OPR y */ ++ arg1 = tmp; ++ arg2 = e2->operands[0].node; ++ progress = true; ++ } + } + + if (progress) @@ -7301,17 +11287,19 @@ index 716adb15f08..cd7cd2fe6a3 100644 bool hlsl_fold_constant_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) { struct hlsl_constant_value value; -@@ -1560,7 +1705,7 @@ bool hlsl_fold_constant_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *inst +@@ -1559,8 +1778,8 @@ bool hlsl_fold_constant_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *inst + return false; src = hlsl_ir_constant(swizzle->val.node); - for (i = 0; i < swizzle->node.data_type->dimx; ++i) +- for (i = 0; i < swizzle->node.data_type->dimx; ++i) - value.u[i] = src->value.u[hlsl_swizzle_get_component(swizzle->swizzle, i)]; ++ for (i = 0; i < swizzle->node.data_type->e.numeric.dimx; ++i) + value.u[i] = src->value.u[hlsl_swizzle_get_component(swizzle->u.vector, i)]; if (!(dst = hlsl_new_constant(ctx, instr->data_type, &value, &instr->loc))) return false; diff --git a/libs/vkd3d/libs/vkd3d-shader/ir.c b/libs/vkd3d/libs/vkd3d-shader/ir.c -index b0e89bededb..cdc0c18466f 100644 +index b0e89bededb..3678ad0bacf 100644 --- a/libs/vkd3d/libs/vkd3d-shader/ir.c +++ b/libs/vkd3d/libs/vkd3d-shader/ir.c @@ -1,5 +1,6 @@ @@ -7396,7 +11384,104 @@ index b0e89bededb..cdc0c18466f 100644 static void dst_param_init_temp_bool(struct vkd3d_shader_dst_param *dst, unsigned int idx) { vsir_dst_param_init(dst, VKD3DSPR_TEMP, VKD3D_DATA_BOOL, 1); -@@ -709,6 +750,76 @@ static enum vkd3d_result vsir_program_lower_texldd(struct vsir_program *program, +@@ -662,7 +703,56 @@ static enum vkd3d_result vsir_program_lower_sm1_sincos(struct vsir_program *prog + return VKD3D_OK; + } + +-static enum vkd3d_result vsir_program_lower_tex(struct vsir_program *program, struct vkd3d_shader_instruction *tex) ++static enum vkd3d_result vsir_program_lower_texldp(struct vsir_program *program, ++ struct vkd3d_shader_instruction *tex, unsigned int *tmp_idx) ++{ ++ struct vkd3d_shader_instruction_array *instructions = &program->instructions; ++ struct vkd3d_shader_location *location = &tex->location; ++ struct vkd3d_shader_instruction *div_ins, *tex_ins; ++ size_t pos = tex - instructions->elements; ++ unsigned int w_comp; ++ ++ w_comp = vsir_swizzle_get_component(tex->src[0].swizzle, 3); ++ ++ if (!shader_instruction_array_insert_at(instructions, pos + 1, 2)) ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ ++ if (*tmp_idx == ~0u) ++ *tmp_idx = program->temp_count++; ++ ++ div_ins = &instructions->elements[pos + 1]; ++ tex_ins = &instructions->elements[pos + 2]; ++ ++ if (!vsir_instruction_init_with_params(program, div_ins, location, VKD3DSIH_DIV, 1, 2)) ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ ++ vsir_dst_param_init(&div_ins->dst[0], VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); ++ div_ins->dst[0].reg.dimension = VSIR_DIMENSION_VEC4; ++ div_ins->dst[0].reg.idx[0].offset = *tmp_idx; ++ div_ins->dst[0].write_mask = VKD3DSP_WRITEMASK_ALL; ++ ++ div_ins->src[0] = tex->src[0]; ++ ++ div_ins->src[1] = tex->src[0]; ++ div_ins->src[1].swizzle = vkd3d_shader_create_swizzle(w_comp, w_comp, w_comp, w_comp); ++ ++ if (!vsir_instruction_init_with_params(program, tex_ins, location, VKD3DSIH_TEX, 1, 2)) ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ ++ tex_ins->dst[0] = tex->dst[0]; ++ ++ tex_ins->src[0].reg = div_ins->dst[0].reg; ++ tex_ins->src[0].swizzle = VKD3D_SHADER_NO_SWIZZLE; ++ ++ tex_ins->src[1] = tex->src[1]; ++ ++ vkd3d_shader_instruction_make_nop(tex); ++ ++ return VKD3D_OK; ++} ++ ++static enum vkd3d_result vsir_program_lower_tex(struct vsir_program *program, ++ struct vkd3d_shader_instruction *tex, struct vkd3d_shader_message_context *message_context) + { + unsigned int idx = tex->src[1].reg.idx[0].offset; + struct vkd3d_shader_src_param *srcs; +@@ -670,16 +760,34 @@ static enum vkd3d_result vsir_program_lower_tex(struct vsir_program *program, st + VKD3D_ASSERT(tex->src[1].reg.idx_count == 1); + VKD3D_ASSERT(!tex->src[1].reg.idx[0].rel_addr); + +- if (!(srcs = shader_src_param_allocator_get(&program->instructions.src_params, 3))) ++ if (!(srcs = shader_src_param_allocator_get(&program->instructions.src_params, 4))) + return VKD3D_ERROR_OUT_OF_MEMORY; + + srcs[0] = tex->src[0]; + vsir_src_param_init_resource(&srcs[1], idx, idx); + vsir_src_param_init_sampler(&srcs[2], idx, idx); + +- tex->opcode = VKD3DSIH_SAMPLE; +- tex->src = srcs; +- tex->src_count = 3; ++ if (!tex->flags) ++ { ++ tex->opcode = VKD3DSIH_SAMPLE; ++ tex->src = srcs; ++ tex->src_count = 3; ++ } ++ else if (tex->flags == VKD3DSI_TEXLD_BIAS) ++ { ++ tex->opcode = VKD3DSIH_SAMPLE_B; ++ tex->src = srcs; ++ tex->src_count = 4; ++ ++ srcs[3] = tex->src[0]; ++ srcs[3].swizzle = VKD3D_SHADER_SWIZZLE(W, W, W, W); ++ } ++ else ++ { ++ vkd3d_shader_error(message_context, &tex->location, ++ VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, "Unhandled tex flags %#x.", tex->flags); ++ return VKD3D_ERROR_NOT_IMPLEMENTED; ++ } + + return VKD3D_OK; + } +@@ -709,6 +817,76 @@ static enum vkd3d_result vsir_program_lower_texldd(struct vsir_program *program, return VKD3D_OK; } @@ -7473,7 +11558,7 @@ index b0e89bededb..cdc0c18466f 100644 static enum vkd3d_result vsir_program_lower_instructions(struct vsir_program *program, struct vsir_transformation_context *ctx) { -@@ -743,11 +854,31 @@ static enum vkd3d_result vsir_program_lower_instructions(struct vsir_program *pr +@@ -743,19 +921,47 @@ static enum vkd3d_result vsir_program_lower_instructions(struct vsir_program *pr case VKD3DSIH_DCL_GLOBAL_FLAGS: case VKD3DSIH_DCL_SAMPLER: case VKD3DSIH_DCL_TEMPS: @@ -7505,7 +11590,25 @@ index b0e89bededb..cdc0c18466f 100644 case VKD3DSIH_SINCOS: if ((ret = vsir_program_lower_sm1_sincos(program, ins)) < 0) return ret; -@@ -847,11 +978,36 @@ static enum vkd3d_result vsir_program_ensure_ret(struct vsir_program *program, + break; + + case VKD3DSIH_TEX: +- if ((ret = vsir_program_lower_tex(program, ins)) < 0) +- return ret; ++ if (ins->flags == VKD3DSI_TEXLD_PROJECT) ++ { ++ if ((ret = vsir_program_lower_texldp(program, ins, &tmp_idx)) < 0) ++ return ret; ++ } ++ else ++ { ++ if ((ret = vsir_program_lower_tex(program, ins, message_context)) < 0) ++ return ret; ++ } + break; + + case VKD3DSIH_TEXLDD: +@@ -847,11 +1053,36 @@ static enum vkd3d_result vsir_program_ensure_ret(struct vsir_program *program, return VKD3D_OK; } @@ -7543,7 +11646,7 @@ index b0e89bededb..cdc0c18466f 100644 if (program->shader_version.type != VKD3D_SHADER_TYPE_VERTEX) return VKD3D_OK; -@@ -864,22 +1020,8 @@ static enum vkd3d_result vsir_program_add_diffuse_output(struct vsir_program *pr +@@ -864,22 +1095,8 @@ static enum vkd3d_result vsir_program_add_diffuse_output(struct vsir_program *pr return VKD3D_OK; } @@ -7567,7 +11670,15 @@ index b0e89bededb..cdc0c18466f 100644 return VKD3D_OK; } -@@ -1034,6 +1176,9 @@ static enum vkd3d_result vsir_program_remap_output_signature(struct vsir_program +@@ -975,6 +1192,7 @@ static void remove_unread_output_components(const struct shader_signature *signa + switch (dst->reg.type) + { + case VKD3DSPR_OUTPUT: ++ case VKD3DSPR_TEXCRDOUT: + e = vsir_signature_find_element_for_reg(signature, dst->reg.idx[0].offset, 0); + break; + +@@ -1034,6 +1252,9 @@ static enum vkd3d_result vsir_program_remap_output_signature(struct vsir_program e->target_location = map->input_register_index; @@ -7577,7 +11688,7 @@ index b0e89bededb..cdc0c18466f 100644 if ((input_mask & e->mask) == input_mask) { ++subset_varying_count; -@@ -1054,6 +1199,8 @@ static enum vkd3d_result vsir_program_remap_output_signature(struct vsir_program +@@ -1054,6 +1275,8 @@ static enum vkd3d_result vsir_program_remap_output_signature(struct vsir_program } else { @@ -7586,7 +11697,7 @@ index b0e89bededb..cdc0c18466f 100644 e->target_location = SIGNATURE_TARGET_LOCATION_UNUSED; } -@@ -1213,12 +1360,6 @@ static void flattener_eliminate_phase_related_dcls(struct hull_flattener *normal +@@ -1213,12 +1436,6 @@ static void flattener_eliminate_phase_related_dcls(struct hull_flattener *normal vkd3d_shader_instruction_make_nop(ins); return; } @@ -7599,7 +11710,7 @@ index b0e89bededb..cdc0c18466f 100644 if (normaliser->phase == VKD3DSIH_INVALID || vsir_instruction_is_dcl(ins)) return; -@@ -1369,25 +1510,15 @@ static void shader_dst_param_normalise_outpointid(struct vkd3d_shader_dst_param +@@ -1369,25 +1586,15 @@ static void shader_dst_param_normalise_outpointid(struct vkd3d_shader_dst_param } } @@ -7627,7 +11738,7 @@ index b0e89bededb..cdc0c18466f 100644 count += !!s->elements[i].used_mask; if (!shader_instruction_array_reserve(&normaliser->instructions, normaliser->instructions.count + count)) -@@ -1399,7 +1530,7 @@ static enum vkd3d_result control_point_normaliser_emit_hs_input(struct control_p +@@ -1399,7 +1606,7 @@ static enum vkd3d_result control_point_normaliser_emit_hs_input(struct control_p ins = &normaliser->instructions.elements[dst]; vsir_instruction_init(ins, location, VKD3DSIH_HS_CONTROL_POINT_PHASE); @@ -7636,7 +11747,7 @@ index b0e89bededb..cdc0c18466f 100644 ++ins; for (i = 0; i < s->element_count; ++i) -@@ -1408,26 +1539,35 @@ static enum vkd3d_result control_point_normaliser_emit_hs_input(struct control_p +@@ -1408,26 +1615,35 @@ static enum vkd3d_result control_point_normaliser_emit_hs_input(struct control_p if (!e->used_mask) continue; @@ -7685,7 +11796,7 @@ index b0e89bededb..cdc0c18466f 100644 return VKD3D_OK; } -@@ -1442,7 +1582,7 @@ static enum vkd3d_result instruction_array_normalise_hull_shader_control_point_i +@@ -1442,7 +1658,7 @@ static enum vkd3d_result instruction_array_normalise_hull_shader_control_point_i enum vkd3d_result ret; unsigned int i, j; @@ -7694,7 +11805,7 @@ index b0e89bededb..cdc0c18466f 100644 if (program->shader_version.type != VKD3D_SHADER_TYPE_HULL) { -@@ -1545,11 +1685,6 @@ static bool io_normaliser_is_in_fork_or_join_phase(const struct io_normaliser *n +@@ -1545,11 +1761,6 @@ static bool io_normaliser_is_in_fork_or_join_phase(const struct io_normaliser *n return normaliser->phase == VKD3DSIH_HS_FORK_PHASE || normaliser->phase == VKD3DSIH_HS_JOIN_PHASE; } @@ -7706,7 +11817,17 @@ index b0e89bededb..cdc0c18466f 100644 static bool shader_signature_find_element_for_reg(const struct shader_signature *signature, unsigned int reg_idx, unsigned int write_mask, unsigned int *element_idx) { -@@ -1920,41 +2055,26 @@ static unsigned int shader_register_normalise_arrayed_addressing(struct vkd3d_sh +@@ -1820,7 +2031,8 @@ static bool shader_signature_merge(struct shader_signature *s, uint8_t range_map + element_count = s->element_count; + if (!(elements = vkd3d_malloc(element_count * sizeof(*elements)))) + return false; +- memcpy(elements, s->elements, element_count * sizeof(*elements)); ++ if (element_count) ++ memcpy(elements, s->elements, element_count * sizeof(*elements)); + + for (i = 0; i < element_count; ++i) + elements[i].sort_index = i; +@@ -1920,41 +2132,26 @@ static unsigned int shader_register_normalise_arrayed_addressing(struct vkd3d_sh { VKD3D_ASSERT(id_idx < ARRAY_SIZE(reg->idx) - 1); @@ -7760,7 +11881,7 @@ index b0e89bededb..cdc0c18466f 100644 const struct shader_signature *signature; const struct signature_element *e; -@@ -1970,26 +2090,22 @@ static bool shader_dst_param_io_normalise(struct vkd3d_shader_dst_param *dst_par +@@ -1970,26 +2167,23 @@ static bool shader_dst_param_io_normalise(struct vkd3d_shader_dst_param *dst_par /* Convert patch constant outputs to the patch constant register type to avoid the need * to convert compiler symbols when accessed as inputs in a later stage. */ reg->type = VKD3DSPR_PATCHCONST; @@ -7779,6 +11900,7 @@ index b0e89bededb..cdc0c18466f 100644 - dcl_params = normaliser->pc_dcl_params; break; ++ case VKD3DSPR_TEXCRDOUT: case VKD3DSPR_COLOROUT: reg_idx = reg->idx[0].offset; signature = normaliser->output_signature; @@ -7787,7 +11909,7 @@ index b0e89bededb..cdc0c18466f 100644 break; case VKD3DSPR_INCONTROLPOINT: -@@ -1997,14 +2113,12 @@ static bool shader_dst_param_io_normalise(struct vkd3d_shader_dst_param *dst_par +@@ -1997,14 +2191,12 @@ static bool shader_dst_param_io_normalise(struct vkd3d_shader_dst_param *dst_par reg_idx = reg->idx[reg->idx_count - 1].offset; signature = normaliser->input_signature; reg->type = VKD3DSPR_INPUT; @@ -7802,7 +11924,7 @@ index b0e89bededb..cdc0c18466f 100644 break; case VKD3DSPR_RASTOUT: -@@ -2014,7 +2128,6 @@ static bool shader_dst_param_io_normalise(struct vkd3d_shader_dst_param *dst_par +@@ -2014,7 +2206,6 @@ static bool shader_dst_param_io_normalise(struct vkd3d_shader_dst_param *dst_par reg_idx = SM1_RASTOUT_REGISTER_OFFSET + reg->idx[0].offset; signature = normaliser->output_signature; reg->type = VKD3DSPR_OUTPUT; @@ -7810,7 +11932,7 @@ index b0e89bededb..cdc0c18466f 100644 /* Fog and point size are scalar, but fxc/d3dcompiler emits a full * write mask when writing to them. */ if (reg->idx[0].offset > 0) -@@ -2030,54 +2143,8 @@ static bool shader_dst_param_io_normalise(struct vkd3d_shader_dst_param *dst_par +@@ -2030,54 +2221,8 @@ static bool shader_dst_param_io_normalise(struct vkd3d_shader_dst_param *dst_par vkd3d_unreachable(); e = &signature->elements[element_idx]; @@ -7866,7 +11988,7 @@ index b0e89bededb..cdc0c18466f 100644 /* Replace the register index with the signature element index */ reg->idx[id_idx].offset = element_idx; -@@ -2129,6 +2196,8 @@ static void shader_src_param_io_normalise(struct vkd3d_shader_src_param *src_par +@@ -2129,6 +2274,8 @@ static void shader_src_param_io_normalise(struct vkd3d_shader_src_param *src_par case VKD3DSPR_OUTCONTROLPOINT: reg->type = VKD3DSPR_OUTPUT; @@ -7875,7 +11997,16 @@ index b0e89bededb..cdc0c18466f 100644 /* fall through */ case VKD3DSPR_OUTPUT: reg_idx = reg->idx[reg->idx_count - 1].offset; -@@ -2169,40 +2238,10 @@ static void shader_src_param_io_normalise(struct vkd3d_shader_src_param *src_par +@@ -2136,8 +2283,6 @@ static void shader_src_param_io_normalise(struct vkd3d_shader_src_param *src_par + break; + + case VKD3DSPR_TEXTURE: +- if (normaliser->shader_type != VKD3D_SHADER_TYPE_PIXEL) +- return; + reg->type = VKD3DSPR_INPUT; + reg_idx = reg->idx[0].offset; + signature = normaliser->input_signature; +@@ -2169,40 +2314,10 @@ static void shader_src_param_io_normalise(struct vkd3d_shader_src_param *src_par static void shader_instruction_normalise_io_params(struct vkd3d_shader_instruction *ins, struct io_normaliser *normaliser) { @@ -7916,7 +12047,7 @@ index b0e89bededb..cdc0c18466f 100644 case VKD3DSIH_HS_CONTROL_POINT_PHASE: case VKD3DSIH_HS_FORK_PHASE: case VKD3DSIH_HS_JOIN_PHASE: -@@ -2215,7 +2254,7 @@ static void shader_instruction_normalise_io_params(struct vkd3d_shader_instructi +@@ -2215,7 +2330,7 @@ static void shader_instruction_normalise_io_params(struct vkd3d_shader_instructi if (vsir_instruction_is_dcl(ins)) break; for (i = 0; i < ins->dst_count; ++i) @@ -7925,7 +12056,7 @@ index b0e89bededb..cdc0c18466f 100644 for (i = 0; i < ins->src_count; ++i) shader_src_param_io_normalise(&ins->src[i], normaliser); break; -@@ -2275,7 +2314,7 @@ static enum vkd3d_result vsir_program_normalise_io_registers(struct vsir_program +@@ -2275,7 +2390,7 @@ static enum vkd3d_result vsir_program_normalise_io_registers(struct vsir_program program->instructions = normaliser.instructions; program->use_vocp = normaliser.use_vocp; @@ -7934,7 +12065,56 @@ index b0e89bededb..cdc0c18466f 100644 return VKD3D_OK; } -@@ -6634,149 +6673,747 @@ static enum vkd3d_result vsir_program_insert_point_coord(struct vsir_program *pr +@@ -2299,16 +2414,12 @@ static bool get_flat_constant_register_type(const struct vkd3d_shader_register * + { + enum vkd3d_shader_register_type type; + enum vkd3d_shader_d3dbc_constant_register set; +- uint32_t offset; + } + regs[] = + { +- {VKD3DSPR_CONST, VKD3D_SHADER_D3DBC_FLOAT_CONSTANT_REGISTER, 0}, +- {VKD3DSPR_CONST2, VKD3D_SHADER_D3DBC_FLOAT_CONSTANT_REGISTER, 2048}, +- {VKD3DSPR_CONST3, VKD3D_SHADER_D3DBC_FLOAT_CONSTANT_REGISTER, 4096}, +- {VKD3DSPR_CONST4, VKD3D_SHADER_D3DBC_FLOAT_CONSTANT_REGISTER, 6144}, +- {VKD3DSPR_CONSTINT, VKD3D_SHADER_D3DBC_INT_CONSTANT_REGISTER, 0}, +- {VKD3DSPR_CONSTBOOL, VKD3D_SHADER_D3DBC_BOOL_CONSTANT_REGISTER, 0}, ++ {VKD3DSPR_CONST, VKD3D_SHADER_D3DBC_FLOAT_CONSTANT_REGISTER}, ++ {VKD3DSPR_CONSTINT, VKD3D_SHADER_D3DBC_INT_CONSTANT_REGISTER}, ++ {VKD3DSPR_CONSTBOOL, VKD3D_SHADER_D3DBC_BOOL_CONSTANT_REGISTER}, + }; + + unsigned int i; +@@ -2324,7 +2435,7 @@ static bool get_flat_constant_register_type(const struct vkd3d_shader_register * + } + + *set = regs[i].set; +- *index = regs[i].offset + reg->idx[0].offset; ++ *index = reg->idx[0].offset; + return true; + } + } +@@ -3726,7 +3837,8 @@ static enum vkd3d_result vsir_cfg_structure_list_append_from_region(struct vsir_ + sizeof(*list->structures))) + return VKD3D_ERROR_OUT_OF_MEMORY; + +- memcpy(&list->structures[list->count], begin, size * sizeof(*begin)); ++ if (size) ++ memcpy(&list->structures[list->count], begin, size * sizeof(*begin)); + + list->count += size; + +@@ -4663,7 +4775,8 @@ static enum vkd3d_result vsir_cfg_generate_synthetic_loop_intervals(struct vsir_ + } + } + +- qsort(cfg->loop_intervals, cfg->loop_interval_count, sizeof(*cfg->loop_intervals), compare_loop_intervals); ++ if (cfg->loop_intervals) ++ qsort(cfg->loop_intervals, cfg->loop_interval_count, sizeof(*cfg->loop_intervals), compare_loop_intervals); + + if (TRACE_ON()) + for (i = 0; i < cfg->loop_interval_count; ++i) +@@ -6634,52 +6747,486 @@ static enum vkd3d_result vsir_program_insert_point_coord(struct vsir_program *pr return VKD3D_OK; } @@ -7951,16 +12131,14 @@ index b0e89bededb..cdc0c18466f 100644 - bool dcl_temps_found; - enum vkd3d_shader_opcode phase; - bool inside_block; -+ struct shader_signature *signature = &program->input_signature; -+ uint32_t register_idx = 0; - +- - struct validation_context_temp_data - { - enum vsir_dimension dimension; - size_t first_seen; - } *temps; -+ if (program->shader_version.type != VKD3D_SHADER_TYPE_PIXEL) -+ return VKD3D_OK; ++ struct shader_signature *signature = &program->input_signature; ++ uint32_t register_idx = 0; - struct validation_context_ssa_data - { @@ -7971,44 +12149,42 @@ index b0e89bededb..cdc0c18466f 100644 - uint32_t read_mask; - size_t first_assigned; - } *ssas; -+ if (!vsir_program_get_parameter(program, VKD3D_SHADER_PARAMETER_NAME_FOG_FRAGMENT_MODE)) ++ if (program->shader_version.type != VKD3D_SHADER_TYPE_PIXEL) + return VKD3D_OK; - enum vkd3d_shader_opcode *blocks; - size_t depth; - size_t blocks_capacity; -}; -+ /* We could check the value and skip this if NONE, but chances are if a -+ * user specifies the fog fragment mode as a parameter, they'll want to -+ * enable it dynamically. Always specifying it (and hence always outputting -+ * it from the VS) avoids an extra VS variant. */ ++ if (!vsir_program_get_parameter(program, VKD3D_SHADER_PARAMETER_NAME_FOG_FRAGMENT_MODE)) ++ return VKD3D_OK; -static void VKD3D_PRINTF_FUNC(3, 4) validator_error(struct validation_context *ctx, - enum vkd3d_shader_error error, const char *format, ...) -{ - struct vkd3d_string_buffer buf; - va_list args; -+ if (vsir_signature_find_element_by_name(signature, "FOG", 0)) -+ return VKD3D_OK; ++ /* We could check the value and skip this if NONE, but chances are if a ++ * user specifies the fog fragment mode as a parameter, they'll want to ++ * enable it dynamically. Always specifying it (and hence always outputting ++ * it from the VS) avoids an extra VS variant. */ - vkd3d_string_buffer_init(&buf); -+ for (unsigned int i = 0; i < signature->element_count; ++i) -+ register_idx = max(register_idx, signature->elements[i].register_index + 1); ++ if (vsir_signature_find_element_by_name(signature, "FOG", 0)) ++ return VKD3D_OK; - va_start(args, format); - vkd3d_string_buffer_vprintf(&buf, format, args); - va_end(args); ++ for (unsigned int i = 0; i < signature->element_count; ++i) ++ register_idx = max(register_idx, signature->elements[i].register_index + 1); + +- if (ctx->invalid_instruction_idx) + if (!add_signature_element(signature, "FOG", 0, VKD3DSP_WRITEMASK_0, register_idx, VKD3DSIM_LINEAR)) + return VKD3D_ERROR_OUT_OF_MEMORY; + return VKD3D_OK; +} - -- if (ctx->invalid_instruction_idx) -- { -- vkd3d_shader_error(ctx->message_context, &ctx->null_location, error, "%s", buf.buffer); -- WARN("VSIR validation error: %s\n", buf.buffer); -- } -- else ++ +static enum vkd3d_result insert_fragment_fog_before_ret(struct vsir_program *program, + const struct vkd3d_shader_instruction *ret, enum vkd3d_shader_fog_fragment_mode mode, + uint32_t fog_signature_idx, uint32_t colour_signature_idx, uint32_t colour_temp, @@ -8022,12 +12198,7 @@ index b0e89bededb..cdc0c18466f 100644 + uint32_t ssa_temp, ssa_temp2; + + switch (mode) - { -- const struct vkd3d_shader_instruction *ins = &ctx->program->instructions.elements[ctx->instruction_idx]; -- vkd3d_shader_error(ctx->message_context, &ins->location, error, -- "instruction %zu: %s", ctx->instruction_idx + 1, buf.buffer); -- WARN("VSIR validation error: instruction %zu: %s\n", ctx->instruction_idx + 1, buf.buffer); -- } ++ { + case VKD3D_SHADER_FOG_FRAGMENT_LINEAR: + /* We generate the following code: + * @@ -8057,8 +12228,7 @@ index b0e89bededb..cdc0c18466f 100644 + src_param_init_ssa_float(&ins->src[0], ssa_temp); + src_param_init_parameter(&ins->src[1], VKD3D_SHADER_PARAMETER_NAME_FOG_SCALE, VKD3D_DATA_FLOAT); + break; - -- vkd3d_string_buffer_cleanup(&buf); ++ + case VKD3D_SHADER_FOG_FRAGMENT_EXP: + /* We generate the following code: + * @@ -8068,27 +12238,11 @@ index b0e89bededb..cdc0c18466f 100644 + if (!shader_instruction_array_insert_at(&program->instructions, pos, 4)) + return VKD3D_ERROR_OUT_OF_MEMORY; + *ret_pos = pos + 4; - -- if (!ctx->status) -- ctx->status = VKD3D_ERROR_INVALID_SHADER; --} ++ + ssa_temp = program->ssa_count++; - --static void vsir_validate_register_without_indices(struct validation_context *ctx, -- const struct vkd3d_shader_register *reg) --{ -- if (reg->idx_count != 0) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, -- "Invalid index count %u for a register of type %#x.", -- reg->idx_count, reg->type); --} ++ + ins = &program->instructions.elements[pos]; - --static void vsir_validate_io_register(struct validation_context *ctx, -- const struct vkd3d_shader_register *reg) --{ -- const struct shader_signature *signature; -- bool has_control_point = false; ++ + vsir_instruction_init_with_params(program, ins, &loc, VKD3DSIH_MUL, 1, 2); + dst_param_init_ssa_float(&ins->dst[0], ssa_temp); + src_param_init_parameter(&ins->src[0], VKD3D_SHADER_PARAMETER_NAME_FOG_SCALE, VKD3D_DATA_FLOAT); @@ -8096,29 +12250,14 @@ index b0e89bededb..cdc0c18466f 100644 + ins->src[1].reg.idx[0].offset = fog_signature_idx; + ins->src[1].reg.dimension = VSIR_DIMENSION_VEC4; + ins->src[1].swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X); - -- switch (reg->type) -- { -- case VKD3DSPR_INPUT: -- signature = &ctx->program->input_signature; ++ + vsir_instruction_init_with_params(program, ++ins, &loc, VKD3DSIH_EXP, 1, 1); + dst_param_init_ssa_float(&ins->dst[0], ssa_factor); + ins->dst[0].modifiers = VKD3DSPDM_SATURATE; + src_param_init_ssa_float(&ins->src[0], ssa_temp); + ins->src[0].modifiers = VKD3DSPSM_NEG; + break; - -- switch (ctx->program->shader_version.type) -- { -- case VKD3D_SHADER_TYPE_GEOMETRY: -- case VKD3D_SHADER_TYPE_HULL: -- case VKD3D_SHADER_TYPE_DOMAIN: -- has_control_point = true; -- break; -- -- default: -- break; -- } ++ + case VKD3D_SHADER_FOG_FRAGMENT_EXP2: + /* We generate the following code: + * @@ -8153,10 +12292,8 @@ index b0e89bededb..cdc0c18466f 100644 + ins->dst[0].modifiers = VKD3DSPDM_SATURATE; + src_param_init_ssa_float(&ins->src[0], ssa_temp2); + ins->src[0].modifiers = VKD3DSPSM_NEG; - break; - -- case VKD3DSPR_OUTPUT: -- switch (ctx->program->shader_version.type) ++ break; ++ + default: + vkd3d_unreachable(); + } @@ -8256,28 +12393,13 @@ index b0e89bededb..cdc0c18466f 100644 + + /* Note we run after I/O normalization. */ + if (dst->reg.type == VKD3DSPR_OUTPUT && dst->reg.idx[0].offset == colour_signature_idx) - { -- case VKD3D_SHADER_TYPE_HULL: -- if (ctx->phase == VKD3DSIH_HS_CONTROL_POINT_PHASE -- || ctx->program->normalisation_level >= VSIR_FULLY_NORMALISED_IO) -- { -- signature = &ctx->program->output_signature; -- has_control_point = ctx->program->normalisation_level >= VSIR_NORMALISED_HULL_CONTROL_POINT_IO; -- } -- else -- { -- signature = &ctx->program->patch_constant_signature; -- } -- break; ++ { + dst->reg.type = VKD3DSPR_TEMP; + dst->reg.idx[0].offset = colour_temp; + } + } + } - -- default: -- signature = &ctx->program->output_signature; -- break; ++ + return VKD3D_OK; +} + @@ -8443,27 +12565,15 @@ index b0e89bededb..cdc0c18466f 100644 + { + dst->reg.type = VKD3DSPR_TEMP; + dst->reg.idx[0].offset = temp; - } -- break; ++ } + } + } - -- case VKD3DSPR_INCONTROLPOINT: -- signature = &ctx->program->input_signature; -- has_control_point = true; -- break; ++ + program->has_fog = true; - -- case VKD3DSPR_OUTCONTROLPOINT: -- signature = &ctx->program->output_signature; -- has_control_point = true; -- break; ++ + return VKD3D_OK; +} - -- case VKD3DSPR_PATCHCONST: -- signature = &ctx->program->patch_constant_signature; -- break; ++ +struct validation_context +{ + struct vkd3d_shader_message_context *message_context; @@ -8527,33 +12637,15 @@ index b0e89bededb..cdc0c18466f 100644 + va_end(args); + + if (ctx->invalid_instruction_idx) -+ { -+ vkd3d_shader_error(ctx->message_context, &ctx->null_location, error, "%s", buf.buffer); -+ WARN("VSIR validation error: %s\n", buf.buffer); -+ } -+ else -+ { -+ const struct vkd3d_shader_instruction *ins = &ctx->program->instructions.elements[ctx->instruction_idx]; -+ vkd3d_shader_error(ctx->message_context, &ins->location, error, -+ "instruction %zu: %s", ctx->instruction_idx + 1, buf.buffer); -+ WARN("VSIR validation error: instruction %zu: %s\n", ctx->instruction_idx + 1, buf.buffer); -+ } -+ -+ vkd3d_string_buffer_cleanup(&buf); -+ -+ if (!ctx->status) -+ ctx->status = VKD3D_ERROR_INVALID_SHADER; -+} -+ -+static void vsir_validate_register_without_indices(struct validation_context *ctx, -+ const struct vkd3d_shader_register *reg) -+{ -+ if (reg->idx_count != 0) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, -+ "Invalid index count %u for a register of type %#x.", -+ reg->idx_count, reg->type); -+} -+ + { + vkd3d_shader_error(ctx->message_context, &ctx->null_location, error, "%s", buf.buffer); + WARN("VSIR validation error: %s\n", buf.buffer); +@@ -6707,76 +7254,240 @@ static void vsir_validate_register_without_indices(struct validation_context *ct + reg->idx_count, reg->type); + } + +-static void vsir_validate_io_register(struct validation_context *ctx, +- const struct vkd3d_shader_register *reg) +enum vsir_signature_type +{ + SIGNATURE_TYPE_INPUT, @@ -8562,7 +12654,7 @@ index b0e89bededb..cdc0c18466f 100644 +}; + +enum vsir_io_reg_type -+{ + { + REG_V, + REG_O, + REG_VPC, @@ -8584,10 +12676,12 @@ index b0e89bededb..cdc0c18466f 100644 +{ + unsigned int flags; + enum vsir_signature_type signature_type; -+ const struct shader_signature *signature; + const struct shader_signature *signature; +- bool has_control_point = false; + unsigned int control_point_count; +}; -+ + +- switch (reg->type) +enum +{ + INPUT_BIT = (1u << 0), @@ -8703,33 +12797,74 @@ index b0e89bededb..cdc0c18466f 100644 + signature_register_data = &vsir_sm4_io_register_data; + + switch (register_type) -+ { + { +- case VKD3DSPR_INPUT: +- signature = &ctx->program->input_signature; + case VKD3DSPR_INPUT: io_reg_type = REG_V; break; + case VKD3DSPR_OUTPUT: io_reg_type = REG_O; break; + case VKD3DSPR_INCONTROLPOINT: io_reg_type = REG_VICP; break; + case VKD3DSPR_OUTCONTROLPOINT: io_reg_type = REG_VOCP; break; + case VKD3DSPR_PATCHCONST: io_reg_type = REG_VPC; break; -+ + +- switch (ctx->program->shader_version.type) +- { +- case VKD3D_SHADER_TYPE_GEOMETRY: +- case VKD3D_SHADER_TYPE_HULL: +- case VKD3D_SHADER_TYPE_DOMAIN: +- has_control_point = true; +- break; + default: + return NULL; + } -+ + +- default: +- break; +- } +- break; + switch (ctx->phase) + { + case VKD3DSIH_HS_CONTROL_POINT_PHASE: phase = PHASE_CONTROL_POINT; break; + case VKD3DSIH_HS_FORK_PHASE: phase = PHASE_FORK; break; + case VKD3DSIH_HS_JOIN_PHASE: phase = PHASE_JOIN; break; + case VKD3DSIH_INVALID: phase = PHASE_NONE; break; -+ + +- case VKD3DSPR_OUTPUT: +- switch (ctx->program->shader_version.type) +- { +- case VKD3D_SHADER_TYPE_HULL: +- if (ctx->phase == VKD3DSIH_HS_CONTROL_POINT_PHASE +- || ctx->program->normalisation_level >= VSIR_FULLY_NORMALISED_IO) +- { +- signature = &ctx->program->output_signature; +- has_control_point = ctx->program->normalisation_level >= VSIR_NORMALISED_HULL_CONTROL_POINT_IO; +- } +- else +- { +- signature = &ctx->program->patch_constant_signature; +- } +- break; + default: + vkd3d_unreachable(); + } -+ + +- default: +- signature = &ctx->program->output_signature; +- break; +- } +- break; + *data = (*signature_register_data)[ctx->program->shader_version.type][phase][io_reg_type]; -+ + +- case VKD3DSPR_INCONTROLPOINT: +- signature = &ctx->program->input_signature; +- has_control_point = true; +- break; + if (!(data->flags & (INPUT_BIT | OUTPUT_BIT))) + return false; -+ + +- case VKD3DSPR_OUTCONTROLPOINT: +- signature = &ctx->program->output_signature; +- has_control_point = true; +- break; + /* VSIR_NORMALISED_HULL_CONTROL_POINT_IO differs from VSIR_NORMALISED_SM4 + * for just a single flag. So we don't keep a whole copy of it, but just + * patch SM4 when needed. */ @@ -8740,7 +12875,10 @@ index b0e89bededb..cdc0c18466f 100644 + VKD3D_ASSERT(!(data->flags & CONTROL_POINT_BIT)); + data->flags |= CONTROL_POINT_BIT; + } -+ + +- case VKD3DSPR_PATCHCONST: +- signature = &ctx->program->patch_constant_signature; +- break; + switch (data->signature_type) + { + case SIGNATURE_TYPE_INPUT: @@ -8761,7 +12899,8 @@ index b0e89bededb..cdc0c18466f 100644 vkd3d_unreachable(); } +} -+ + +- if (ctx->program->normalisation_level < VSIR_FULLY_NORMALISED_IO) +static void vsir_validate_io_register(struct validation_context *ctx, const struct vkd3d_shader_register *reg) +{ + unsigned int control_point_index, control_point_count; @@ -8779,8 +12918,7 @@ index b0e89bededb..cdc0c18466f 100644 + signature = io_reg_data.signature; + has_control_point = io_reg_data.flags & CONTROL_POINT_BIT; + control_point_count = io_reg_data.control_point_count; - -- if (ctx->program->normalisation_level < VSIR_FULLY_NORMALISED_IO) ++ + if (ctx->program->normalisation_level < VSIR_NORMALISED_SM6) { /* Indices are [register] or [control point, register]. Both are @@ -8792,7 +12930,7 @@ index b0e89bededb..cdc0c18466f 100644 if (reg->idx_count != expected_idx_count) { validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, -@@ -6795,7 +7432,7 @@ static void vsir_validate_io_register(struct validation_context *ctx, +@@ -6795,7 +7506,7 @@ static void vsir_validate_io_register(struct validation_context *ctx, /* If the signature element is not an array, indices are * [signature] or [control point, signature]. If the signature * element is an array, indices are [array, signature] or @@ -8801,7 +12939,7 @@ index b0e89bededb..cdc0c18466f 100644 * not allowed to have a relative address, while the others are. */ if (reg->idx_count < 1) -@@ -6829,6 +7466,7 @@ static void vsir_validate_io_register(struct validation_context *ctx, +@@ -6829,6 +7540,7 @@ static void vsir_validate_io_register(struct validation_context *ctx, is_array = true; expected_idx_count = 1 + !!has_control_point + !!is_array; @@ -8809,7 +12947,7 @@ index b0e89bededb..cdc0c18466f 100644 if (reg->idx_count != expected_idx_count) { -@@ -6837,7 +7475,18 @@ static void vsir_validate_io_register(struct validation_context *ctx, +@@ -6837,7 +7549,18 @@ static void vsir_validate_io_register(struct validation_context *ctx, reg->idx_count, reg->type); return; } @@ -8828,7 +12966,7 @@ index b0e89bededb..cdc0c18466f 100644 } static void vsir_validate_temp_register(struct validation_context *ctx, -@@ -7143,8 +7792,26 @@ static void vsir_validate_register(struct validation_context *ctx, +@@ -7143,8 +7866,26 @@ static void vsir_validate_register(struct validation_context *ctx, for (i = 0; i < min(reg->idx_count, ARRAY_SIZE(reg->idx)); ++i) { const struct vkd3d_shader_src_param *param = reg->idx[i].rel_addr; @@ -8856,7 +12994,7 @@ index b0e89bededb..cdc0c18466f 100644 } switch (reg->type) -@@ -7185,6 +7852,10 @@ static void vsir_validate_register(struct validation_context *ctx, +@@ -7185,6 +7926,10 @@ static void vsir_validate_register(struct validation_context *ctx, vsir_validate_register_without_indices(ctx, reg); break; @@ -8867,7 +13005,7 @@ index b0e89bededb..cdc0c18466f 100644 case VKD3DSPR_NULL: vsir_validate_register_without_indices(ctx, reg); break; -@@ -7201,6 +7872,18 @@ static void vsir_validate_register(struct validation_context *ctx, +@@ -7201,6 +7946,18 @@ static void vsir_validate_register(struct validation_context *ctx, vsir_validate_uav_register(ctx, reg); break; @@ -8886,7 +13024,7 @@ index b0e89bededb..cdc0c18466f 100644 case VKD3DSPR_INCONTROLPOINT: vsir_validate_io_register(ctx, reg); break; -@@ -7213,6 +7896,38 @@ static void vsir_validate_register(struct validation_context *ctx, +@@ -7213,6 +7970,38 @@ static void vsir_validate_register(struct validation_context *ctx, vsir_validate_io_register(ctx, reg); break; @@ -8925,7 +13063,7 @@ index b0e89bededb..cdc0c18466f 100644 case VKD3DSPR_DEPTHOUTGE: vsir_validate_register_without_indices(ctx, reg); break; -@@ -7221,15 +7936,37 @@ static void vsir_validate_register(struct validation_context *ctx, +@@ -7221,15 +8010,37 @@ static void vsir_validate_register(struct validation_context *ctx, vsir_validate_register_without_indices(ctx, reg); break; @@ -8963,7 +13101,7 @@ index b0e89bededb..cdc0c18466f 100644 static void vsir_validate_dst_param(struct validation_context *ctx, const struct vkd3d_shader_dst_param *dst) { -@@ -7304,15 +8041,28 @@ static void vsir_validate_dst_param(struct validation_context *ctx, +@@ -7304,15 +8115,28 @@ static void vsir_validate_dst_param(struct validation_context *ctx, case VKD3DSPR_IMMCONST64: case VKD3DSPR_SAMPLER: case VKD3DSPR_RESOURCE: @@ -8996,7 +13134,57 @@ index b0e89bededb..cdc0c18466f 100644 break; default: -@@ -7420,13 +8170,6 @@ static bool vsir_validate_src_max_count(struct validation_context *ctx, +@@ -7320,6 +8144,16 @@ static void vsir_validate_dst_param(struct validation_context *ctx, + } + } + ++static void vsir_validate_io_src_param(struct validation_context *ctx, ++ const struct vkd3d_shader_src_param *src) ++{ ++ struct vsir_io_register_data io_reg_data; ++ ++ if (!vsir_get_io_register_data(ctx, src->reg.type, &io_reg_data) || !(io_reg_data.flags & INPUT_BIT)) ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, ++ "Invalid register type %#x used as source parameter.", src->reg.type); ++} ++ + static void vsir_validate_src_param(struct validation_context *ctx, + const struct vkd3d_shader_src_param *src) + { +@@ -7355,18 +8189,24 @@ static void vsir_validate_src_param(struct validation_context *ctx, + "Invalid NULL register used as source parameter."); + break; + ++ case VKD3DSPR_INPUT: ++ vsir_validate_io_src_param(ctx, src); ++ break; ++ + case VKD3DSPR_OUTPUT: +- if (ctx->program->shader_version.type != VKD3D_SHADER_TYPE_HULL +- || (ctx->phase != VKD3DSIH_HS_FORK_PHASE && ctx->phase != VKD3DSIH_HS_JOIN_PHASE)) +- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, +- "Invalid OUTPUT register used as source parameter."); ++ vsir_validate_io_src_param(ctx, src); ++ break; ++ ++ case VKD3DSPR_INCONTROLPOINT: ++ vsir_validate_io_src_param(ctx, src); ++ break; ++ ++ case VKD3DSPR_OUTCONTROLPOINT: ++ vsir_validate_io_src_param(ctx, src); + break; + + case VKD3DSPR_PATCHCONST: +- if (ctx->program->shader_version.type != VKD3D_SHADER_TYPE_DOMAIN +- && ctx->program->shader_version.type != VKD3D_SHADER_TYPE_HULL) +- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, +- "PATCHCONST register used as source parameters are only allowed in Hull and Domain Shaders."); ++ vsir_validate_io_src_param(ctx, src); + break; + + default: +@@ -7420,13 +8260,6 @@ static bool vsir_validate_src_max_count(struct validation_context *ctx, return true; } @@ -9010,7 +13198,7 @@ index b0e89bededb..cdc0c18466f 100644 static const char * const signature_type_names[] = { [SIGNATURE_TYPE_INPUT] = "input", -@@ -7466,17 +8209,32 @@ sysval_validation_data[] = +@@ -7466,17 +8299,37 @@ sysval_validation_data[] = }; static void vsir_validate_signature_element(struct validation_context *ctx, @@ -9030,6 +13218,11 @@ index b0e89bededb..cdc0c18466f 100644 validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, "element %u of %s signature: Invalid zero register count.", idx, signature_type_name); ++ if (ctx->program->normalisation_level < VSIR_NORMALISED_SM6 && element->register_count != 1) ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, ++ "element %u of %s signature: Invalid register count %u.", idx, signature_type_name, ++ element->register_count); ++ + if (element->register_index != UINT_MAX && (element->register_index >= MAX_REG_OUTPUT + || MAX_REG_OUTPUT - element->register_index < element->register_count)) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, @@ -9046,7 +13239,7 @@ index b0e89bededb..cdc0c18466f 100644 if (element->mask == 0 || (element->mask & ~0xf)) validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, "element %u of %s signature: Invalid mask %#x.", idx, signature_type_name, element->mask); -@@ -7486,33 +8244,27 @@ static void vsir_validate_signature_element(struct validation_context *ctx, +@@ -7486,33 +8339,27 @@ static void vsir_validate_signature_element(struct validation_context *ctx, "element %u of %s signature: Non-contiguous mask %#x.", idx, signature_type_name, element->mask); @@ -9097,7 +13290,7 @@ index b0e89bededb..cdc0c18466f 100644 case VKD3D_SHADER_SV_POSITION: case VKD3D_SHADER_SV_CLIP_DISTANCE: case VKD3D_SHADER_SV_CULL_DISTANCE: -@@ -7523,18 +8275,43 @@ static void vsir_validate_signature_element(struct validation_context *ctx, +@@ -7523,18 +8370,43 @@ static void vsir_validate_signature_element(struct validation_context *ctx, case VKD3D_SHADER_SV_INSTANCE_ID: case VKD3D_SHADER_SV_IS_FRONT_FACE: case VKD3D_SHADER_SV_SAMPLE_INDEX: @@ -9147,7 +13340,7 @@ index b0e89bededb..cdc0c18466f 100644 break; default: -@@ -7544,6 +8321,43 @@ static void vsir_validate_signature_element(struct validation_context *ctx, +@@ -7544,6 +8416,43 @@ static void vsir_validate_signature_element(struct validation_context *ctx, break; } @@ -9191,7 +13384,7 @@ index b0e89bededb..cdc0c18466f 100644 if (element->sysval_semantic < ARRAY_SIZE(sysval_validation_data)) { const struct sysval_validation_data_element *data = &sysval_validation_data[element->sysval_semantic]; -@@ -7622,6 +8436,31 @@ static void vsir_validate_signature_element(struct validation_context *ctx, +@@ -7622,6 +8531,31 @@ static void vsir_validate_signature_element(struct validation_context *ctx, validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, "element %u of %s signature: Invalid interpolation mode %#x for integer component type.", idx, signature_type_name, element->interpolation_mode); @@ -9223,7 +13416,7 @@ index b0e89bededb..cdc0c18466f 100644 } static const unsigned int allowed_signature_phases[] = -@@ -7631,8 +8470,8 @@ static const unsigned int allowed_signature_phases[] = +@@ -7631,8 +8565,8 @@ static const unsigned int allowed_signature_phases[] = [SIGNATURE_TYPE_PATCH_CONSTANT] = HS_BIT | DS_BIT, }; @@ -9234,7 +13427,7 @@ index b0e89bededb..cdc0c18466f 100644 { unsigned int i; -@@ -7642,7 +8481,110 @@ static void vsir_validate_signature(struct validation_context *ctx, +@@ -7642,7 +8576,110 @@ static void vsir_validate_signature(struct validation_context *ctx, "Unexpected %s signature.", signature_type_names[signature_type]); for (i = 0; i < signature->element_count; ++i) @@ -9346,7 +13539,7 @@ index b0e89bededb..cdc0c18466f 100644 } static const char *name_from_cf_type(enum vsir_control_flow_type type) -@@ -7754,6 +8696,206 @@ static void vsir_validate_dcl_hs_max_tessfactor(struct validation_context *ctx, +@@ -7754,6 +8791,206 @@ static void vsir_validate_dcl_hs_max_tessfactor(struct validation_context *ctx, instruction->declaration.max_tessellation_factor); } @@ -9553,7 +13746,7 @@ index b0e89bededb..cdc0c18466f 100644 static void vsir_validate_dcl_input_primitive(struct validation_context *ctx, const struct vkd3d_shader_instruction *instruction) { -@@ -7763,6 +8905,105 @@ static void vsir_validate_dcl_input_primitive(struct validation_context *ctx, +@@ -7763,6 +9000,105 @@ static void vsir_validate_dcl_input_primitive(struct validation_context *ctx, instruction->declaration.primitive_type.type); } @@ -9659,7 +13852,7 @@ index b0e89bededb..cdc0c18466f 100644 static void vsir_validate_dcl_output_control_point_count(struct validation_context *ctx, const struct vkd3d_shader_instruction *instruction) { -@@ -7772,6 +9013,22 @@ static void vsir_validate_dcl_output_control_point_count(struct validation_conte +@@ -7772,6 +9108,22 @@ static void vsir_validate_dcl_output_control_point_count(struct validation_conte instruction->declaration.count); } @@ -9682,7 +13875,7 @@ index b0e89bededb..cdc0c18466f 100644 static void vsir_validate_dcl_output_topology(struct validation_context *ctx, const struct vkd3d_shader_instruction *instruction) { -@@ -7801,6 +9058,11 @@ static void vsir_validate_dcl_tessellator_domain(struct validation_context *ctx, +@@ -7801,6 +9153,11 @@ static void vsir_validate_dcl_tessellator_domain(struct validation_context *ctx, || instruction->declaration.tessellator_domain >= VKD3D_TESSELLATOR_DOMAIN_COUNT) validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION, "Tessellator domain %#x is invalid.", instruction->declaration.tessellator_domain); @@ -9694,7 +13887,7 @@ index b0e89bededb..cdc0c18466f 100644 } static void vsir_validate_dcl_tessellator_output_primitive(struct validation_context *ctx, -@@ -8063,8 +9325,17 @@ static const struct vsir_validator_instruction_desc vsir_validator_instructions[ +@@ -8063,8 +9420,17 @@ static const struct vsir_validator_instruction_desc vsir_validator_instructions[ [VKD3DSIH_HS_JOIN_PHASE] = {0, 0, vsir_validate_hull_shader_phase}, [VKD3DSIH_DCL_GS_INSTANCES] = {0, 0, vsir_validate_dcl_gs_instances}, [VKD3DSIH_DCL_HS_MAX_TESSFACTOR] = {0, 0, vsir_validate_dcl_hs_max_tessfactor}, @@ -9712,7 +13905,7 @@ index b0e89bededb..cdc0c18466f 100644 [VKD3DSIH_DCL_OUTPUT_TOPOLOGY] = {0, 0, vsir_validate_dcl_output_topology}, [VKD3DSIH_DCL_TEMPS] = {0, 0, vsir_validate_dcl_temps}, [VKD3DSIH_DCL_TESSELLATOR_DOMAIN] = {0, 0, vsir_validate_dcl_tessellator_domain}, -@@ -8177,6 +9448,12 @@ enum vkd3d_result vsir_program_validate(struct vsir_program *program, uint64_t c +@@ -8177,6 +9543,12 @@ enum vkd3d_result vsir_program_validate(struct vsir_program *program, uint64_t c .status = VKD3D_OK, .phase = VKD3DSIH_INVALID, .invalid_instruction_idx = true, @@ -9725,7 +13918,7 @@ index b0e89bededb..cdc0c18466f 100644 }; unsigned int i; -@@ -8187,12 +9464,20 @@ enum vkd3d_result vsir_program_validate(struct vsir_program *program, uint64_t c +@@ -8187,12 +9559,20 @@ enum vkd3d_result vsir_program_validate(struct vsir_program *program, uint64_t c { case VKD3D_SHADER_TYPE_HULL: case VKD3D_SHADER_TYPE_DOMAIN: @@ -9746,7 +13939,7 @@ index b0e89bededb..cdc0c18466f 100644 } switch (program->shader_version.type) -@@ -8226,9 +9511,47 @@ enum vkd3d_result vsir_program_validate(struct vsir_program *program, uint64_t c +@@ -8226,9 +9606,47 @@ enum vkd3d_result vsir_program_validate(struct vsir_program *program, uint64_t c program->output_control_point_count); } @@ -9797,7 +13990,7 @@ index b0e89bededb..cdc0c18466f 100644 if (!(ctx.temps = vkd3d_calloc(ctx.program->temp_count, sizeof(*ctx.temps)))) goto fail; -@@ -8318,6 +9641,12 @@ enum vkd3d_result vsir_program_transform_early(struct vsir_program *program, uin +@@ -8318,6 +9736,12 @@ enum vkd3d_result vsir_program_transform_early(struct vsir_program *program, uin if (program->shader_version.major <= 2) vsir_transform(&ctx, vsir_program_add_diffuse_output); @@ -9810,7 +14003,7 @@ index b0e89bededb..cdc0c18466f 100644 return ctx.result; } -@@ -8372,6 +9701,8 @@ enum vkd3d_result vsir_program_transform(struct vsir_program *program, uint64_t +@@ -8372,6 +9796,8 @@ enum vkd3d_result vsir_program_transform(struct vsir_program *program, uint64_t vsir_transform(&ctx, vsir_program_insert_point_size); vsir_transform(&ctx, vsir_program_insert_point_size_clamp); vsir_transform(&ctx, vsir_program_insert_point_coord); @@ -10460,7 +14653,7 @@ index 4a8d0fddae1..d167415c356 100644 #undef ERROR /* defined in wingdi.h */ diff --git a/libs/vkd3d/libs/vkd3d-shader/spirv.c b/libs/vkd3d/libs/vkd3d-shader/spirv.c -index bdfd632ad12..a7b935543a0 100644 +index bdfd632ad12..efa76983546 100644 --- a/libs/vkd3d/libs/vkd3d-shader/spirv.c +++ b/libs/vkd3d/libs/vkd3d-shader/spirv.c @@ -34,6 +34,32 @@ @@ -11379,6 +15572,30 @@ index bdfd632ad12..a7b935543a0 100644 static void spirv_compiler_emit_barrier(struct spirv_compiler *compiler, SpvScope execution_scope, SpvScope memory_scope, SpvMemorySemanticsMask semantics) { +@@ -7105,12 +7259,13 @@ static void spirv_compiler_emit_shader_epilogue_invocation(struct spirv_compiler + + static void spirv_compiler_emit_hull_shader_main(struct spirv_compiler *compiler) + { ++ size_t table_count = compiler->offset_info.descriptor_table_count; + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + uint32_t void_id; + + /* If a patch constant function used descriptor indexing the offsets must be reloaded. */ +- memset(compiler->descriptor_offset_ids, 0, compiler->offset_info.descriptor_table_count +- * sizeof(*compiler->descriptor_offset_ids)); ++ if (table_count) ++ memset(compiler->descriptor_offset_ids, 0, table_count * sizeof(*compiler->descriptor_offset_ids)); + vkd3d_spirv_builder_begin_main_function(builder); + vkd3d_spirv_build_op_label(builder, vkd3d_spirv_alloc_id(builder)); + +@@ -7147,7 +7302,6 @@ static SpvOp spirv_compiler_map_alu_instruction(const struct vkd3d_shader_instru + {VKD3DSIH_DDIV, SpvOpFDiv}, + {VKD3DSIH_DIV, SpvOpFDiv}, + {VKD3DSIH_DMUL, SpvOpFMul}, +- {VKD3DSIH_DTOF, SpvOpFConvert}, + {VKD3DSIH_DTOI, SpvOpConvertFToS}, + {VKD3DSIH_DTOU, SpvOpConvertFToU}, + {VKD3DSIH_FREM, SpvOpFRem}, @@ -7501,7 +7655,7 @@ static void spirv_compiler_emit_mov(struct spirv_compiler *compiler, general_implementation: @@ -11388,7 +15605,107 @@ index bdfd632ad12..a7b935543a0 100644 write_mask = vsir_write_mask_64_from_32(write_mask); else if (!data_type_is_64_bit(src->reg.data_type) && data_type_is_64_bit(dst->reg.data_type)) write_mask = vsir_write_mask_32_from_64(write_mask); -@@ -10214,13 +10368,6 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, +@@ -7785,6 +7939,7 @@ static void spirv_compiler_emit_ftoi(struct spirv_compiler *compiler, + uint32_t src_type_id, dst_type_id, condition_type_id; + enum vkd3d_shader_component_type component_type; + unsigned int component_count; ++ uint32_t write_mask; + + VKD3D_ASSERT(instruction->dst_count == 1); + VKD3D_ASSERT(instruction->src_count == 1); +@@ -7794,21 +7949,23 @@ static void spirv_compiler_emit_ftoi(struct spirv_compiler *compiler, + * and for NaN to yield zero. */ + + component_count = vsir_write_mask_component_count(dst->write_mask); +- src_type_id = spirv_compiler_get_type_id_for_reg(compiler, &src->reg, dst->write_mask); +- dst_type_id = spirv_compiler_get_type_id_for_dst(compiler, dst); +- src_id = spirv_compiler_emit_load_src(compiler, src, dst->write_mask); + + if (src->reg.data_type == VKD3D_DATA_DOUBLE) + { ++ write_mask = vkd3d_write_mask_from_component_count(component_count); + int_min_id = spirv_compiler_get_constant_double_vector(compiler, -2147483648.0, component_count); + float_max_id = spirv_compiler_get_constant_double_vector(compiler, 2147483648.0, component_count); + } + else + { ++ write_mask = dst->write_mask; + int_min_id = spirv_compiler_get_constant_float_vector(compiler, -2147483648.0f, component_count); + float_max_id = spirv_compiler_get_constant_float_vector(compiler, 2147483648.0f, component_count); + } + ++ src_type_id = spirv_compiler_get_type_id_for_reg(compiler, &src->reg, write_mask); ++ dst_type_id = spirv_compiler_get_type_id_for_dst(compiler, dst); ++ src_id = spirv_compiler_emit_load_src(compiler, src, write_mask); + val_id = vkd3d_spirv_build_op_glsl_std450_max(builder, src_type_id, src_id, int_min_id); + + /* VSIR allows the destination of a signed conversion to be unsigned. */ +@@ -7838,6 +7995,7 @@ static void spirv_compiler_emit_ftou(struct spirv_compiler *compiler, + const struct vkd3d_shader_src_param *src = instruction->src; + uint32_t src_type_id, dst_type_id, condition_type_id; + unsigned int component_count; ++ uint32_t write_mask; + + VKD3D_ASSERT(instruction->dst_count == 1); + VKD3D_ASSERT(instruction->src_count == 1); +@@ -7847,21 +8005,23 @@ static void spirv_compiler_emit_ftou(struct spirv_compiler *compiler, + * and for NaN to yield zero. */ + + component_count = vsir_write_mask_component_count(dst->write_mask); +- src_type_id = spirv_compiler_get_type_id_for_reg(compiler, &src->reg, dst->write_mask); +- dst_type_id = spirv_compiler_get_type_id_for_dst(compiler, dst); +- src_id = spirv_compiler_emit_load_src(compiler, src, dst->write_mask); + + if (src->reg.data_type == VKD3D_DATA_DOUBLE) + { ++ write_mask = vkd3d_write_mask_from_component_count(component_count); + zero_id = spirv_compiler_get_constant_double_vector(compiler, 0.0, component_count); + float_max_id = spirv_compiler_get_constant_double_vector(compiler, 4294967296.0, component_count); + } + else + { ++ write_mask = dst->write_mask; + zero_id = spirv_compiler_get_constant_float_vector(compiler, 0.0f, component_count); + float_max_id = spirv_compiler_get_constant_float_vector(compiler, 4294967296.0f, component_count); + } + ++ src_type_id = spirv_compiler_get_type_id_for_reg(compiler, &src->reg, write_mask); ++ dst_type_id = spirv_compiler_get_type_id_for_dst(compiler, dst); ++ src_id = spirv_compiler_emit_load_src(compiler, src, write_mask); + val_id = vkd3d_spirv_build_op_glsl_std450_max(builder, src_type_id, src_id, zero_id); + + uint_max_id = spirv_compiler_get_constant_uint_vector(compiler, UINT_MAX, component_count); +@@ -7875,6 +8035,29 @@ static void spirv_compiler_emit_ftou(struct spirv_compiler *compiler, + spirv_compiler_emit_store_dst(compiler, dst, val_id); + } + ++static void spirv_compiler_emit_dtof(struct spirv_compiler *compiler, ++ const struct vkd3d_shader_instruction *instruction) ++{ ++ struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; ++ const struct vkd3d_shader_dst_param *dst = instruction->dst; ++ const struct vkd3d_shader_src_param *src = instruction->src; ++ uint32_t type_id, val_id, src_id; ++ unsigned int component_count; ++ uint32_t write_mask; ++ ++ component_count = vsir_write_mask_component_count(dst->write_mask); ++ write_mask = vkd3d_write_mask_from_component_count(component_count); ++ ++ src_id = spirv_compiler_emit_load_src(compiler, src, write_mask); ++ ++ type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_FLOAT, component_count); ++ val_id = vkd3d_spirv_build_op_tr1(builder, &builder->function_stream, SpvOpFConvert, type_id, src_id); ++ if (instruction->flags & VKD3DSI_PRECISE_XYZW) ++ vkd3d_spirv_build_op_decorate(builder, val_id, SpvDecorationNoContraction, NULL, 0); ++ ++ spirv_compiler_emit_store_dst(compiler, dst, val_id); ++} ++ + static void spirv_compiler_emit_bitfield_instruction(struct spirv_compiler *compiler, + const struct vkd3d_shader_instruction *instruction) + { +@@ -10214,13 +10397,6 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, case VKD3DSIH_DCL_TGSM_STRUCTURED: spirv_compiler_emit_dcl_tgsm_structured(compiler, instruction); break; @@ -11402,7 +15719,7 @@ index bdfd632ad12..a7b935543a0 100644 case VKD3DSIH_DCL_STREAM: spirv_compiler_emit_dcl_stream(compiler, instruction); break; -@@ -10239,9 +10386,6 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, +@@ -10239,9 +10415,6 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, case VKD3DSIH_DCL_OUTPUT_CONTROL_POINT_COUNT: spirv_compiler_emit_output_vertex_count(compiler, instruction); break; @@ -11412,7 +15729,25 @@ index bdfd632ad12..a7b935543a0 100644 case VKD3DSIH_DCL_TESSELLATOR_OUTPUT_PRIMITIVE: spirv_compiler_emit_tessellator_output_primitive(compiler, instruction->declaration.tessellator_output_primitive); -@@ -10561,11 +10705,6 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, +@@ -10275,7 +10448,6 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, + case VKD3DSIH_DDIV: + case VKD3DSIH_DIV: + case VKD3DSIH_DMUL: +- case VKD3DSIH_DTOF: + case VKD3DSIH_FREM: + case VKD3DSIH_FTOD: + case VKD3DSIH_IADD: +@@ -10363,6 +10535,9 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, + case VKD3DSIH_FTOU: + spirv_compiler_emit_ftou(compiler, instruction); + break; ++ case VKD3DSIH_DTOF: ++ spirv_compiler_emit_dtof(compiler, instruction); ++ break; + case VKD3DSIH_DEQO: + case VKD3DSIH_DGEO: + case VKD3DSIH_DLT: +@@ -10561,11 +10736,6 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, break; case VKD3DSIH_DCL_HS_MAX_TESSFACTOR: case VKD3DSIH_DCL_INPUT_CONTROL_POINT_COUNT: @@ -11424,7 +15759,7 @@ index bdfd632ad12..a7b935543a0 100644 case VKD3DSIH_DCL_RESOURCE_RAW: case VKD3DSIH_DCL_RESOURCE_STRUCTURED: case VKD3DSIH_DCL_UAV_RAW: -@@ -10586,6 +10725,8 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, +@@ -10586,6 +10756,8 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, static void spirv_compiler_emit_io_declarations(struct spirv_compiler *compiler) { @@ -11433,7 +15768,7 @@ index bdfd632ad12..a7b935543a0 100644 for (unsigned int i = 0; i < compiler->input_signature.element_count; ++i) spirv_compiler_emit_input(compiler, VKD3DSPR_INPUT, i); -@@ -10609,19 +10750,27 @@ static void spirv_compiler_emit_io_declarations(struct spirv_compiler *compiler) +@@ -10609,19 +10781,27 @@ static void spirv_compiler_emit_io_declarations(struct spirv_compiler *compiler) if (compiler->program->has_point_size) { @@ -11467,7 +15802,7 @@ index bdfd632ad12..a7b935543a0 100644 } } -@@ -10677,7 +10826,7 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, struct +@@ -10677,7 +10857,7 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, struct compile_info, compiler->message_context)) < 0) return result; @@ -11476,7 +15811,7 @@ index bdfd632ad12..a7b935543a0 100644 max_element_count = max(program->output_signature.element_count, program->patch_constant_signature.element_count); if (!(compiler->output_info = vkd3d_calloc(max_element_count, sizeof(*compiler->output_info)))) -@@ -10743,6 +10892,9 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, struct +@@ -10743,6 +10923,9 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, struct compiler->input_control_point_count = program->input_control_point_count; compiler->output_control_point_count = program->output_control_point_count; @@ -11486,7 +15821,7 @@ index bdfd632ad12..a7b935543a0 100644 if (compiler->shader_type != VKD3D_SHADER_TYPE_HULL) spirv_compiler_emit_shader_signature_outputs(compiler); -@@ -10823,7 +10975,8 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, struct +@@ -10823,7 +11006,8 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, struct if (compile_info->target_type == VKD3D_SHADER_TARGET_SPIRV_TEXT) { struct vkd3d_shader_code text; @@ -11497,13 +15832,26 @@ index bdfd632ad12..a7b935543a0 100644 vkd3d_shader_free_shader_code(spirv); *spirv = text; diff --git a/libs/vkd3d/libs/vkd3d-shader/tpf.c b/libs/vkd3d/libs/vkd3d-shader/tpf.c -index 9c41e2c2053..872603052ac 100644 +index 9c41e2c2053..aa666086710 100644 --- a/libs/vkd3d/libs/vkd3d-shader/tpf.c +++ b/libs/vkd3d/libs/vkd3d-shader/tpf.c -@@ -166,21 +166,6 @@ STATIC_ASSERT(SM4_MAX_SRC_COUNT <= SPIRV_MAX_SRC_COUNT); - /* The shift that corresponds to the D3D_SIF_TEXTURE_COMPONENTS mask. */ - #define VKD3D_SM4_SIF_TEXTURE_COMPONENTS_SHIFT 2 +@@ -21,9 +21,7 @@ + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA + */ +-#include "hlsl.h" + #include "vkd3d_shader_private.h" +-#include "d3dcommon.h" + + #define SM4_MAX_SRC_COUNT 6 + #define SM4_MAX_DST_COUNT 2 +@@ -163,24 +161,6 @@ STATIC_ASSERT(SM4_MAX_SRC_COUNT <= SPIRV_MAX_SRC_COUNT); + + #define VKD3D_SM4_TYPE_COMPONENT(com, i) (((com) >> (4 * (i))) & 0xfu) + +-/* The shift that corresponds to the D3D_SIF_TEXTURE_COMPONENTS mask. */ +-#define VKD3D_SM4_SIF_TEXTURE_COMPONENTS_SHIFT 2 +- -#define VKD3D_SM4_REQUIRES_DOUBLES 0x00000001 -#define VKD3D_SM4_REQUIRES_EARLY_DEPTH_STENCIL 0x00000002 -#define VKD3D_SM4_REQUIRES_UAVS_AT_EVERY_STAGE 0x00000004 @@ -11522,7 +15870,7 @@ index 9c41e2c2053..872603052ac 100644 enum vkd3d_sm4_opcode { VKD3D_SM4_OP_ADD = 0x00, -@@ -707,6 +692,7 @@ struct vkd3d_sm4_opcode_info +@@ -707,6 +687,7 @@ struct vkd3d_sm4_opcode_info char src_info[SM4_MAX_SRC_COUNT]; void (*read_opcode_func)(struct vkd3d_shader_instruction *ins, uint32_t opcode, uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv); @@ -11530,7 +15878,7 @@ index 9c41e2c2053..872603052ac 100644 }; static const enum vkd3d_primitive_type output_primitive_type_table[] = -@@ -1268,6 +1254,7 @@ static void shader_sm5_read_dcl_tessellator_domain(struct vkd3d_shader_instructi +@@ -1268,6 +1249,7 @@ static void shader_sm5_read_dcl_tessellator_domain(struct vkd3d_shader_instructi { ins->declaration.tessellator_domain = (opcode_token & VKD3D_SM5_TESSELLATOR_MASK) >> VKD3D_SM5_TESSELLATOR_SHIFT; @@ -11538,7 +15886,7 @@ index 9c41e2c2053..872603052ac 100644 } static void shader_sm5_read_dcl_tessellator_partitioning(struct vkd3d_shader_instruction *ins, uint32_t opcode, -@@ -1275,6 +1262,7 @@ static void shader_sm5_read_dcl_tessellator_partitioning(struct vkd3d_shader_ins +@@ -1275,6 +1257,7 @@ static void shader_sm5_read_dcl_tessellator_partitioning(struct vkd3d_shader_ins { ins->declaration.tessellator_partitioning = (opcode_token & VKD3D_SM5_TESSELLATOR_MASK) >> VKD3D_SM5_TESSELLATOR_SHIFT; @@ -11546,7 +15894,7 @@ index 9c41e2c2053..872603052ac 100644 } static void shader_sm5_read_dcl_tessellator_output_primitive(struct vkd3d_shader_instruction *ins, uint32_t opcode, -@@ -1282,6 +1270,7 @@ static void shader_sm5_read_dcl_tessellator_output_primitive(struct vkd3d_shader +@@ -1282,6 +1265,7 @@ static void shader_sm5_read_dcl_tessellator_output_primitive(struct vkd3d_shader { ins->declaration.tessellator_output_primitive = (opcode_token & VKD3D_SM5_TESSELLATOR_MASK) >> VKD3D_SM5_TESSELLATOR_SHIFT; @@ -11554,7 +15902,7 @@ index 9c41e2c2053..872603052ac 100644 } static void shader_sm5_read_dcl_hs_max_tessfactor(struct vkd3d_shader_instruction *ins, uint32_t opcode, -@@ -1407,8 +1396,6 @@ struct sm4_stat +@@ -1407,8 +1391,6 @@ struct sm4_stat struct tpf_compiler { @@ -11563,7 +15911,7 @@ index 9c41e2c2053..872603052ac 100644 struct vsir_program *program; struct vkd3d_sm4_lookup_tables lookup; struct sm4_stat *stat; -@@ -1439,18 +1426,18 @@ static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup) +@@ -1439,18 +1421,18 @@ static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup) {VKD3D_SM4_OP_AND, VKD3DSIH_AND, "u", "uu"}, {VKD3D_SM4_OP_BREAK, VKD3DSIH_BREAK, "", ""}, {VKD3D_SM4_OP_BREAKC, VKD3DSIH_BREAKP, "", "u", @@ -11585,7 +15933,7 @@ index 9c41e2c2053..872603052ac 100644 {VKD3D_SM4_OP_DIV, VKD3DSIH_DIV, "f", "ff"}, {VKD3D_SM4_OP_DP2, VKD3DSIH_DP2, "f", "ff"}, {VKD3D_SM4_OP_DP3, VKD3DSIH_DP3, "f", "ff"}, -@@ -1468,7 +1455,7 @@ static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup) +@@ -1468,7 +1450,7 @@ static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup) {VKD3D_SM4_OP_GE, VKD3DSIH_GEO, "u", "ff"}, {VKD3D_SM4_OP_IADD, VKD3DSIH_IADD, "i", "ii"}, {VKD3D_SM4_OP_IF, VKD3DSIH_IF, "", "u", @@ -11594,7 +15942,7 @@ index 9c41e2c2053..872603052ac 100644 {VKD3D_SM4_OP_IEQ, VKD3DSIH_IEQ, "u", "ii"}, {VKD3D_SM4_OP_IGE, VKD3DSIH_IGE, "u", "ii"}, {VKD3D_SM4_OP_ILT, VKD3DSIH_ILT, "u", "ii"}, -@@ -1502,7 +1489,7 @@ static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup) +@@ -1502,7 +1484,7 @@ static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup) {VKD3D_SM4_OP_RESINFO, VKD3DSIH_RESINFO, "f", "i*"}, {VKD3D_SM4_OP_RET, VKD3DSIH_RET, "", ""}, {VKD3D_SM4_OP_RETC, VKD3DSIH_RETP, "", "u", @@ -11603,7 +15951,7 @@ index 9c41e2c2053..872603052ac 100644 {VKD3D_SM4_OP_ROUND_NE, VKD3DSIH_ROUND_NE, "f", "f"}, {VKD3D_SM4_OP_ROUND_NI, VKD3DSIH_ROUND_NI, "f", "f"}, {VKD3D_SM4_OP_ROUND_PI, VKD3DSIH_ROUND_PI, "f", "f"}, -@@ -1967,16 +1954,6 @@ static const struct vkd3d_sm4_register_type_info *get_info_from_vkd3d_register_t +@@ -1967,16 +1949,6 @@ static const struct vkd3d_sm4_register_type_info *get_info_from_vkd3d_register_t return lookup->register_type_info_from_vkd3d[vkd3d_type]; } @@ -11620,7 +15968,7 @@ index 9c41e2c2053..872603052ac 100644 static enum vkd3d_sm4_stat_field get_stat_field_from_sm4_opcode( const struct vkd3d_sm4_lookup_tables *lookup, enum vkd3d_sm4_opcode sm4_opcode) { -@@ -2816,7 +2793,7 @@ static bool shader_sm4_init(struct vkd3d_shader_sm4_parser *sm4, struct vsir_pro +@@ -2816,7 +2788,7 @@ static bool shader_sm4_init(struct vkd3d_shader_sm4_parser *sm4, struct vsir_pro /* Estimate instruction count to avoid reallocation in most shaders. */ if (!vsir_program_init(program, compile_info, @@ -11629,7 +15977,7 @@ index 9c41e2c2053..872603052ac 100644 return false; vkd3d_shader_parser_init(&sm4->p, program, message_context, compile_info->source_name); sm4->ptr = sm4->start; -@@ -2925,6 +2902,7 @@ int tpf_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t con +@@ -2925,6 +2897,7 @@ int tpf_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t con program->input_signature = dxbc_desc.input_signature; program->output_signature = dxbc_desc.output_signature; program->patch_constant_signature = dxbc_desc.patch_constant_signature; @@ -11637,7 +15985,7 @@ index 9c41e2c2053..872603052ac 100644 memset(&dxbc_desc, 0, sizeof(dxbc_desc)); /* DXBC stores used masks inverted for output signatures, for some reason. -@@ -2993,8 +2971,6 @@ int tpf_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t con +@@ -2993,8 +2966,6 @@ int tpf_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t con return VKD3D_OK; } @@ -11646,7 +15994,7 @@ index 9c41e2c2053..872603052ac 100644 bool sm4_register_from_semantic_name(const struct vkd3d_shader_version *version, const char *semantic_name, bool output, enum vkd3d_shader_register_type *type, bool *has_idx) { -@@ -3217,18 +3193,17 @@ bool sm4_sysval_semantic_from_semantic_name(enum vkd3d_shader_sysval_semantic *s +@@ -3217,18 +3188,17 @@ bool sm4_sysval_semantic_from_semantic_name(enum vkd3d_shader_sysval_semantic *s return true; } @@ -11668,7 +16016,7 @@ index 9c41e2c2053..872603052ac 100644 } static int signature_element_pointer_compare(const void *x, const void *y) -@@ -3289,7 +3264,7 @@ static void tpf_write_signature(struct tpf_compiler *tpf, const struct shader_si +@@ -3289,747 +3259,33 @@ static void tpf_write_signature(struct tpf_compiler *tpf, const struct shader_si set_u32(&buffer, (2 + i * 6) * sizeof(uint32_t), string_offset); } @@ -11677,18 +16025,220 @@ index 9c41e2c2053..872603052ac 100644 vkd3d_free(sorted_elements); } -@@ -3331,6 +3306,7 @@ static D3D_SHADER_VARIABLE_CLASS sm4_class(const struct hlsl_type *type) - case HLSL_CLASS_HULL_SHADER: - case HLSL_CLASS_GEOMETRY_SHADER: - case HLSL_CLASS_BLEND_STATE: -+ case HLSL_CLASS_STREAM_OUTPUT: - case HLSL_CLASS_NULL: - break; - } -@@ -3507,28 +3483,6 @@ static D3D_SRV_DIMENSION sm4_rdef_resource_dimension(const struct hlsl_type *typ - } - } - +-static D3D_SHADER_VARIABLE_CLASS sm4_class(const struct hlsl_type *type) +-{ +- switch (type->class) +- { +- case HLSL_CLASS_MATRIX: +- VKD3D_ASSERT(type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK); +- if (type->modifiers & HLSL_MODIFIER_COLUMN_MAJOR) +- return D3D_SVC_MATRIX_COLUMNS; +- else +- return D3D_SVC_MATRIX_ROWS; +- case HLSL_CLASS_SCALAR: +- return D3D_SVC_SCALAR; +- case HLSL_CLASS_VECTOR: +- return D3D_SVC_VECTOR; +- +- case HLSL_CLASS_ARRAY: +- case HLSL_CLASS_DEPTH_STENCIL_STATE: +- case HLSL_CLASS_DEPTH_STENCIL_VIEW: +- case HLSL_CLASS_EFFECT_GROUP: +- case HLSL_CLASS_ERROR: +- case HLSL_CLASS_STRUCT: +- case HLSL_CLASS_PASS: +- case HLSL_CLASS_PIXEL_SHADER: +- case HLSL_CLASS_RASTERIZER_STATE: +- case HLSL_CLASS_RENDER_TARGET_VIEW: +- case HLSL_CLASS_SAMPLER: +- case HLSL_CLASS_STRING: +- case HLSL_CLASS_TECHNIQUE: +- case HLSL_CLASS_TEXTURE: +- case HLSL_CLASS_UAV: +- case HLSL_CLASS_VERTEX_SHADER: +- case HLSL_CLASS_VOID: +- case HLSL_CLASS_CONSTANT_BUFFER: +- case HLSL_CLASS_COMPUTE_SHADER: +- case HLSL_CLASS_DOMAIN_SHADER: +- case HLSL_CLASS_HULL_SHADER: +- case HLSL_CLASS_GEOMETRY_SHADER: +- case HLSL_CLASS_BLEND_STATE: +- case HLSL_CLASS_NULL: +- break; +- } +- vkd3d_unreachable(); +-} +- +-static D3D_SHADER_VARIABLE_TYPE sm4_base_type(const struct hlsl_type *type) +-{ +- switch (type->e.numeric.type) +- { +- case HLSL_TYPE_BOOL: +- return D3D_SVT_BOOL; +- case HLSL_TYPE_DOUBLE: +- return D3D_SVT_DOUBLE; +- case HLSL_TYPE_FLOAT: +- case HLSL_TYPE_HALF: +- return D3D_SVT_FLOAT; +- case HLSL_TYPE_INT: +- return D3D_SVT_INT; +- case HLSL_TYPE_UINT: +- return D3D_SVT_UINT; +- default: +- vkd3d_unreachable(); +- } +-} +- +-static void write_sm4_type(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, struct hlsl_type *type) +-{ +- const struct hlsl_type *array_type = hlsl_get_multiarray_element_type(type); +- const char *name = array_type->name ? array_type->name : ""; +- const struct hlsl_profile_info *profile = ctx->profile; +- unsigned int array_size = 0; +- size_t name_offset = 0; +- size_t i; +- +- if (type->bytecode_offset) +- return; +- +- if (profile->major_version >= 5) +- name_offset = put_string(buffer, name); +- +- if (type->class == HLSL_CLASS_ARRAY) +- array_size = hlsl_get_multiarray_size(type); +- +- if (array_type->class == HLSL_CLASS_STRUCT) +- { +- unsigned int field_count = 0; +- size_t fields_offset = 0; +- +- for (i = 0; i < array_type->e.record.field_count; ++i) +- { +- struct hlsl_struct_field *field = &array_type->e.record.fields[i]; +- +- if (!field->type->reg_size[HLSL_REGSET_NUMERIC]) +- continue; +- +- field->name_bytecode_offset = put_string(buffer, field->name); +- write_sm4_type(ctx, buffer, field->type); +- ++field_count; +- } +- +- fields_offset = bytecode_align(buffer); +- +- for (i = 0; i < array_type->e.record.field_count; ++i) +- { +- struct hlsl_struct_field *field = &array_type->e.record.fields[i]; +- +- if (!field->type->reg_size[HLSL_REGSET_NUMERIC]) +- continue; +- +- put_u32(buffer, field->name_bytecode_offset); +- put_u32(buffer, field->type->bytecode_offset); +- put_u32(buffer, field->reg_offset[HLSL_REGSET_NUMERIC] * sizeof(float)); +- } +- type->bytecode_offset = put_u32(buffer, vkd3d_make_u32(D3D_SVC_STRUCT, D3D_SVT_VOID)); +- put_u32(buffer, vkd3d_make_u32(1, hlsl_type_component_count(array_type))); +- put_u32(buffer, vkd3d_make_u32(array_size, field_count)); +- put_u32(buffer, fields_offset); +- } +- else +- { +- VKD3D_ASSERT(array_type->class <= HLSL_CLASS_LAST_NUMERIC); +- type->bytecode_offset = put_u32(buffer, vkd3d_make_u32(sm4_class(array_type), sm4_base_type(array_type))); +- put_u32(buffer, vkd3d_make_u32(array_type->dimy, array_type->dimx)); +- put_u32(buffer, vkd3d_make_u32(array_size, 0)); +- put_u32(buffer, 1); +- } +- +- if (profile->major_version >= 5) +- { +- put_u32(buffer, 0); /* FIXME: unknown */ +- put_u32(buffer, 0); /* FIXME: unknown */ +- put_u32(buffer, 0); /* FIXME: unknown */ +- put_u32(buffer, 0); /* FIXME: unknown */ +- put_u32(buffer, name_offset); +- } +-} +- +-static D3D_SHADER_INPUT_TYPE sm4_resource_type(const struct hlsl_type *type) +-{ +- switch (type->class) +- { +- case HLSL_CLASS_SAMPLER: +- return D3D_SIT_SAMPLER; +- case HLSL_CLASS_TEXTURE: +- return D3D_SIT_TEXTURE; +- case HLSL_CLASS_UAV: +- return D3D_SIT_UAV_RWTYPED; +- default: +- break; +- } +- +- vkd3d_unreachable(); +-} +- +-static enum vkd3d_sm4_data_type sm4_data_type(const struct hlsl_type *type) +-{ +- const struct hlsl_type *format = type->e.resource.format; +- +- switch (format->e.numeric.type) +- { +- case HLSL_TYPE_DOUBLE: +- return VKD3D_SM4_DATA_DOUBLE; +- +- case HLSL_TYPE_FLOAT: +- case HLSL_TYPE_HALF: +- if (format->modifiers & HLSL_MODIFIER_UNORM) +- return VKD3D_SM4_DATA_UNORM; +- if (format->modifiers & HLSL_MODIFIER_SNORM) +- return VKD3D_SM4_DATA_SNORM; +- return VKD3D_SM4_DATA_FLOAT; +- +- case HLSL_TYPE_INT: +- return VKD3D_SM4_DATA_INT; +- break; +- +- case HLSL_TYPE_BOOL: +- case HLSL_TYPE_UINT: +- return VKD3D_SM4_DATA_UINT; +- +- default: +- vkd3d_unreachable(); +- } +-} +- +-static D3D_SRV_DIMENSION sm4_rdef_resource_dimension(const struct hlsl_type *type) +-{ +- switch (type->sampler_dim) +- { +- case HLSL_SAMPLER_DIM_1D: +- return D3D_SRV_DIMENSION_TEXTURE1D; +- case HLSL_SAMPLER_DIM_2D: +- return D3D_SRV_DIMENSION_TEXTURE2D; +- case HLSL_SAMPLER_DIM_3D: +- return D3D_SRV_DIMENSION_TEXTURE3D; +- case HLSL_SAMPLER_DIM_CUBE: +- return D3D_SRV_DIMENSION_TEXTURECUBE; +- case HLSL_SAMPLER_DIM_1DARRAY: +- return D3D_SRV_DIMENSION_TEXTURE1DARRAY; +- case HLSL_SAMPLER_DIM_2DARRAY: +- return D3D_SRV_DIMENSION_TEXTURE2DARRAY; +- case HLSL_SAMPLER_DIM_2DMS: +- return D3D_SRV_DIMENSION_TEXTURE2DMS; +- case HLSL_SAMPLER_DIM_2DMSARRAY: +- return D3D_SRV_DIMENSION_TEXTURE2DMSARRAY; +- case HLSL_SAMPLER_DIM_CUBEARRAY: +- return D3D_SRV_DIMENSION_TEXTURECUBEARRAY; +- case HLSL_SAMPLER_DIM_BUFFER: +- case HLSL_SAMPLER_DIM_RAW_BUFFER: +- case HLSL_SAMPLER_DIM_STRUCTURED_BUFFER: +- return D3D_SRV_DIMENSION_BUFFER; +- default: +- vkd3d_unreachable(); +- } +-} +- -struct extern_resource -{ - /* var is only not NULL if this resource is a whole variable, so it may be responsible for more @@ -11711,54 +16261,482 @@ index 9c41e2c2053..872603052ac 100644 - struct vkd3d_shader_location loc; -}; - - static int sm4_compare_extern_resources(const void *a, const void *b) - { - const struct extern_resource *aa = (const struct extern_resource *)a; -@@ -3544,7 +3498,7 @@ static int sm4_compare_extern_resources(const void *a, const void *b) - return vkd3d_u32_compare(aa->index, bb->index); - } - +-static int sm4_compare_extern_resources(const void *a, const void *b) +-{ +- const struct extern_resource *aa = (const struct extern_resource *)a; +- const struct extern_resource *bb = (const struct extern_resource *)b; +- int r; +- +- if ((r = vkd3d_u32_compare(aa->regset, bb->regset))) +- return r; +- +- if ((r = vkd3d_u32_compare(aa->space, bb->space))) +- return r; +- +- return vkd3d_u32_compare(aa->index, bb->index); +-} +- -static void sm4_free_extern_resources(struct extern_resource *extern_resources, unsigned int count) -+void sm4_free_extern_resources(struct extern_resource *extern_resources, unsigned int count) - { - unsigned int i; - -@@ -3560,7 +3514,7 @@ static const char *string_skip_tag(const char *string) - return string; - } - +-{ +- unsigned int i; +- +- for (i = 0; i < count; ++i) +- vkd3d_free(extern_resources[i].name); +- vkd3d_free(extern_resources); +-} +- +-static const char *string_skip_tag(const char *string) +-{ +- if (!strncmp(string, "", strlen(""))) +- return string + strlen(""); +- return string; +-} +- -static struct extern_resource *sm4_get_extern_resources(struct hlsl_ctx *ctx, unsigned int *count) -+struct extern_resource *sm4_get_extern_resources(struct hlsl_ctx *ctx, unsigned int *count) - { - bool separate_components = ctx->profile->major_version == 5 && ctx->profile->minor_version == 0; - struct extern_resource *extern_resources = NULL; -@@ -3770,7 +3724,7 @@ static unsigned int get_component_index_from_default_initializer_index(struct hl - vkd3d_unreachable(); - } - +-{ +- bool separate_components = ctx->profile->major_version == 5 && ctx->profile->minor_version == 0; +- struct extern_resource *extern_resources = NULL; +- const struct hlsl_ir_var *var; +- struct hlsl_buffer *buffer; +- enum hlsl_regset regset; +- size_t capacity = 0; +- char *name; +- +- *count = 0; +- +- LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) +- { +- if (separate_components) +- { +- unsigned int component_count = hlsl_type_component_count(var->data_type); +- unsigned int k, regset_offset; +- +- for (k = 0; k < component_count; ++k) +- { +- struct hlsl_type *component_type = hlsl_type_get_component_type(ctx, var->data_type, k); +- struct vkd3d_string_buffer *name_buffer; +- +- if (!hlsl_type_is_resource(component_type)) +- continue; +- +- regset_offset = hlsl_type_get_component_offset(ctx, var->data_type, k, ®set); +- +- if (regset_offset > var->regs[regset].allocation_size) +- continue; +- +- if (var->objects_usage[regset][regset_offset].used) +- { +- if (!(hlsl_array_reserve(ctx, (void **)&extern_resources, &capacity, *count + 1, +- sizeof(*extern_resources)))) +- { +- sm4_free_extern_resources(extern_resources, *count); +- *count = 0; +- return NULL; +- } +- +- if (!(name_buffer = hlsl_component_to_string(ctx, var, k))) +- { +- sm4_free_extern_resources(extern_resources, *count); +- *count = 0; +- return NULL; +- } +- if (!(name = hlsl_strdup(ctx, string_skip_tag(name_buffer->buffer)))) +- { +- sm4_free_extern_resources(extern_resources, *count); +- *count = 0; +- hlsl_release_string_buffer(ctx, name_buffer); +- return NULL; +- } +- hlsl_release_string_buffer(ctx, name_buffer); +- +- extern_resources[*count].var = NULL; +- extern_resources[*count].buffer = NULL; +- +- extern_resources[*count].name = name; +- extern_resources[*count].is_user_packed = !!var->reg_reservation.reg_type; +- +- extern_resources[*count].component_type = component_type; +- +- extern_resources[*count].regset = regset; +- extern_resources[*count].id = var->regs[regset].id; +- extern_resources[*count].space = var->regs[regset].space; +- extern_resources[*count].index = var->regs[regset].index + regset_offset; +- extern_resources[*count].bind_count = 1; +- extern_resources[*count].loc = var->loc; +- +- ++*count; +- } +- } +- } +- else +- { +- unsigned int r; +- +- if (!hlsl_type_is_resource(var->data_type)) +- continue; +- +- for (r = 0; r <= HLSL_REGSET_LAST; ++r) +- { +- if (!var->regs[r].allocated) +- continue; +- +- if (!(hlsl_array_reserve(ctx, (void **)&extern_resources, &capacity, *count + 1, +- sizeof(*extern_resources)))) +- { +- sm4_free_extern_resources(extern_resources, *count); +- *count = 0; +- return NULL; +- } +- +- if (!(name = hlsl_strdup(ctx, string_skip_tag(var->name)))) +- { +- sm4_free_extern_resources(extern_resources, *count); +- *count = 0; +- return NULL; +- } +- +- extern_resources[*count].var = var; +- extern_resources[*count].buffer = NULL; +- +- extern_resources[*count].name = name; +- /* For some reason 5.1 resources aren't marked as +- * user-packed, but cbuffers still are. */ +- extern_resources[*count].is_user_packed = hlsl_version_lt(ctx, 5, 1) +- && !!var->reg_reservation.reg_type; +- +- extern_resources[*count].component_type = hlsl_type_get_component_type(ctx, var->data_type, 0); +- +- extern_resources[*count].regset = r; +- extern_resources[*count].id = var->regs[r].id; +- extern_resources[*count].space = var->regs[r].space; +- extern_resources[*count].index = var->regs[r].index; +- extern_resources[*count].bind_count = var->bind_count[r]; +- extern_resources[*count].loc = var->loc; +- +- ++*count; +- } +- } +- } +- +- LIST_FOR_EACH_ENTRY(buffer, &ctx->buffers, struct hlsl_buffer, entry) +- { +- if (!buffer->reg.allocated) +- continue; +- +- if (!(hlsl_array_reserve(ctx, (void **)&extern_resources, &capacity, *count + 1, +- sizeof(*extern_resources)))) +- { +- sm4_free_extern_resources(extern_resources, *count); +- *count = 0; +- return NULL; +- } +- +- if (!(name = hlsl_strdup(ctx, buffer->name))) +- { +- sm4_free_extern_resources(extern_resources, *count); +- *count = 0; +- return NULL; +- } +- +- extern_resources[*count].var = NULL; +- extern_resources[*count].buffer = buffer; +- +- extern_resources[*count].name = name; +- extern_resources[*count].is_user_packed = !!buffer->reservation.reg_type; +- +- extern_resources[*count].component_type = NULL; +- +- extern_resources[*count].regset = HLSL_REGSET_NUMERIC; +- extern_resources[*count].id = buffer->reg.id; +- extern_resources[*count].space = buffer->reg.space; +- extern_resources[*count].index = buffer->reg.index; +- extern_resources[*count].bind_count = 1; +- extern_resources[*count].loc = buffer->loc; +- +- ++*count; +- } +- +- qsort(extern_resources, *count, sizeof(*extern_resources), sm4_compare_extern_resources); +- return extern_resources; +-} +- +-/* For some reason, for matrices, values from default value initializers end up in different +- * components than from regular initializers. Default value initializers fill the matrix in +- * vertical reading order (left-to-right top-to-bottom) instead of regular reading order +- * (top-to-bottom left-to-right), so they have to be adjusted. +- * An exception is that the order of matrix initializers for function parameters are row-major +- * (top-to-bottom left-to-right). */ +-static unsigned int get_component_index_from_default_initializer_index(struct hlsl_type *type, unsigned int index) +-{ +- unsigned int element_comp_count, element, x, y, i; +- unsigned int base = 0; +- +- switch (type->class) +- { +- case HLSL_CLASS_MATRIX: +- x = index / type->dimy; +- y = index % type->dimy; +- return y * type->dimx + x; +- +- case HLSL_CLASS_ARRAY: +- element_comp_count = hlsl_type_component_count(type->e.array.type); +- element = index / element_comp_count; +- base = element * element_comp_count; +- return base + get_component_index_from_default_initializer_index(type->e.array.type, index - base); +- +- case HLSL_CLASS_STRUCT: +- for (i = 0; i < type->e.record.field_count; ++i) +- { +- struct hlsl_type *field_type = type->e.record.fields[i].type; +- +- element_comp_count = hlsl_type_component_count(field_type); +- if (index - base < element_comp_count) +- return base + get_component_index_from_default_initializer_index(field_type, index - base); +- base += element_comp_count; +- } +- break; +- +- default: +- return index; +- } +- vkd3d_unreachable(); +-} +- -static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) -+void sm4_generate_rdef(struct hlsl_ctx *ctx, struct vkd3d_shader_code *rdef) - { - uint32_t binding_desc_size = (hlsl_version_ge(ctx, 5, 1) ? 10 : 8) * sizeof(uint32_t); - size_t cbuffers_offset, resources_offset, creator_offset, string_offset; -@@ -4000,36 +3954,41 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) - creator_offset = put_string(&buffer, vkd3d_shader_get_version(NULL, NULL)); - set_u32(&buffer, creator_position, creator_offset); - +-{ +- uint32_t binding_desc_size = (hlsl_version_ge(ctx, 5, 1) ? 10 : 8) * sizeof(uint32_t); +- size_t cbuffers_offset, resources_offset, creator_offset, string_offset; +- unsigned int cbuffer_count = 0, extern_resources_count, i, j; +- size_t cbuffer_position, resource_position, creator_position; +- const struct hlsl_profile_info *profile = ctx->profile; +- struct vkd3d_bytecode_buffer buffer = {0}; +- struct extern_resource *extern_resources; +- const struct hlsl_buffer *cbuffer; +- const struct hlsl_ir_var *var; +- +- static const uint16_t target_types[] = +- { +- 0xffff, /* PIXEL */ +- 0xfffe, /* VERTEX */ +- 0x4753, /* GEOMETRY */ +- 0x4853, /* HULL */ +- 0x4453, /* DOMAIN */ +- 0x4353, /* COMPUTE */ +- }; +- +- extern_resources = sm4_get_extern_resources(ctx, &extern_resources_count); +- +- LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) +- { +- if (cbuffer->reg.allocated) +- ++cbuffer_count; +- } +- +- put_u32(&buffer, cbuffer_count); +- cbuffer_position = put_u32(&buffer, 0); +- put_u32(&buffer, extern_resources_count); +- resource_position = put_u32(&buffer, 0); +- put_u32(&buffer, vkd3d_make_u32(vkd3d_make_u16(profile->minor_version, profile->major_version), +- target_types[profile->type])); +- put_u32(&buffer, 0); /* FIXME: compilation flags */ +- creator_position = put_u32(&buffer, 0); +- +- if (profile->major_version >= 5) +- { +- put_u32(&buffer, hlsl_version_ge(ctx, 5, 1) ? TAG_RD11_REVERSE : TAG_RD11); +- put_u32(&buffer, 15 * sizeof(uint32_t)); /* size of RDEF header including this header */ +- put_u32(&buffer, 6 * sizeof(uint32_t)); /* size of buffer desc */ +- put_u32(&buffer, binding_desc_size); /* size of binding desc */ +- put_u32(&buffer, 10 * sizeof(uint32_t)); /* size of variable desc */ +- put_u32(&buffer, 9 * sizeof(uint32_t)); /* size of type desc */ +- put_u32(&buffer, 3 * sizeof(uint32_t)); /* size of member desc */ +- put_u32(&buffer, 0); /* unknown; possibly a null terminator */ +- } +- +- /* Bound resources. */ +- +- resources_offset = bytecode_align(&buffer); +- set_u32(&buffer, resource_position, resources_offset); +- +- for (i = 0; i < extern_resources_count; ++i) +- { +- const struct extern_resource *resource = &extern_resources[i]; +- uint32_t flags = 0; +- +- if (resource->is_user_packed) +- flags |= D3D_SIF_USERPACKED; +- +- put_u32(&buffer, 0); /* name */ +- if (resource->buffer) +- put_u32(&buffer, resource->buffer->type == HLSL_BUFFER_CONSTANT ? D3D_SIT_CBUFFER : D3D_SIT_TBUFFER); +- else +- put_u32(&buffer, sm4_resource_type(resource->component_type)); +- if (resource->regset == HLSL_REGSET_TEXTURES || resource->regset == HLSL_REGSET_UAVS) +- { +- unsigned int dimx = resource->component_type->e.resource.format->dimx; +- +- put_u32(&buffer, sm4_data_type(resource->component_type)); +- put_u32(&buffer, sm4_rdef_resource_dimension(resource->component_type)); +- put_u32(&buffer, ~0u); /* FIXME: multisample count */ +- flags |= (dimx - 1) << VKD3D_SM4_SIF_TEXTURE_COMPONENTS_SHIFT; +- } +- else +- { +- put_u32(&buffer, 0); +- put_u32(&buffer, 0); +- put_u32(&buffer, 0); +- } +- put_u32(&buffer, resource->index); +- put_u32(&buffer, resource->bind_count); +- put_u32(&buffer, flags); +- +- if (hlsl_version_ge(ctx, 5, 1)) +- { +- put_u32(&buffer, resource->space); +- put_u32(&buffer, resource->id); +- } +- } +- +- for (i = 0; i < extern_resources_count; ++i) +- { +- const struct extern_resource *resource = &extern_resources[i]; +- +- string_offset = put_string(&buffer, resource->name); +- set_u32(&buffer, resources_offset + i * binding_desc_size, string_offset); +- } +- +- /* Buffers. */ +- +- cbuffers_offset = bytecode_align(&buffer); +- set_u32(&buffer, cbuffer_position, cbuffers_offset); +- LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) +- { +- unsigned int var_count = 0; +- +- if (!cbuffer->reg.allocated) +- continue; +- +- LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) +- { +- if (var->is_uniform && var->buffer == cbuffer && var->data_type->reg_size[HLSL_REGSET_NUMERIC]) +- ++var_count; +- } +- +- put_u32(&buffer, 0); /* name */ +- put_u32(&buffer, var_count); +- put_u32(&buffer, 0); /* variable offset */ +- put_u32(&buffer, align(cbuffer->size, 4) * sizeof(float)); +- put_u32(&buffer, 0); /* FIXME: flags */ +- put_u32(&buffer, cbuffer->type == HLSL_BUFFER_CONSTANT ? D3D_CT_CBUFFER : D3D_CT_TBUFFER); +- } +- +- i = 0; +- LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) +- { +- if (!cbuffer->reg.allocated) +- continue; +- +- string_offset = put_string(&buffer, cbuffer->name); +- set_u32(&buffer, cbuffers_offset + i++ * 6 * sizeof(uint32_t), string_offset); +- } +- +- i = 0; +- LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) +- { +- size_t vars_start = bytecode_align(&buffer); +- +- if (!cbuffer->reg.allocated) +- continue; +- +- set_u32(&buffer, cbuffers_offset + (i++ * 6 + 2) * sizeof(uint32_t), vars_start); +- +- LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) +- { +- if (var->is_uniform && var->buffer == cbuffer && var->data_type->reg_size[HLSL_REGSET_NUMERIC]) +- { +- uint32_t flags = 0; +- +- if (var->is_read) +- flags |= D3D_SVF_USED; +- +- put_u32(&buffer, 0); /* name */ +- put_u32(&buffer, var->buffer_offset * sizeof(float)); +- put_u32(&buffer, var->data_type->reg_size[HLSL_REGSET_NUMERIC] * sizeof(float)); +- put_u32(&buffer, flags); +- put_u32(&buffer, 0); /* type */ +- put_u32(&buffer, 0); /* default value */ +- +- if (profile->major_version >= 5) +- { +- put_u32(&buffer, 0); /* texture start */ +- put_u32(&buffer, 0); /* texture count */ +- put_u32(&buffer, 0); /* sampler start */ +- put_u32(&buffer, 0); /* sampler count */ +- } +- } +- } +- +- j = 0; +- LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) +- { +- if (var->is_uniform && var->buffer == cbuffer && var->data_type->reg_size[HLSL_REGSET_NUMERIC]) +- { +- const unsigned int var_size = (profile->major_version >= 5 ? 10 : 6); +- size_t var_offset = vars_start + j * var_size * sizeof(uint32_t); +- +- string_offset = put_string(&buffer, var->name); +- set_u32(&buffer, var_offset, string_offset); +- write_sm4_type(ctx, &buffer, var->data_type); +- set_u32(&buffer, var_offset + 4 * sizeof(uint32_t), var->data_type->bytecode_offset); +- +- if (var->default_values) +- { +- unsigned int reg_size = var->data_type->reg_size[HLSL_REGSET_NUMERIC]; +- unsigned int comp_count = hlsl_type_component_count(var->data_type); +- unsigned int default_value_offset; +- unsigned int k; +- +- default_value_offset = bytecode_reserve_bytes(&buffer, reg_size * sizeof(uint32_t)); +- set_u32(&buffer, var_offset + 5 * sizeof(uint32_t), default_value_offset); +- +- for (k = 0; k < comp_count; ++k) +- { +- struct hlsl_type *comp_type = hlsl_type_get_component_type(ctx, var->data_type, k); +- unsigned int comp_offset, comp_index; +- enum hlsl_regset regset; +- +- if (comp_type->class == HLSL_CLASS_STRING) +- { +- hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, +- "Cannot write string default value."); +- continue; +- } +- +- comp_index = get_component_index_from_default_initializer_index(var->data_type, k); +- comp_offset = hlsl_type_get_component_offset(ctx, var->data_type, comp_index, ®set); +- if (regset == HLSL_REGSET_NUMERIC) +- { +- if (comp_type->e.numeric.type == HLSL_TYPE_DOUBLE) +- hlsl_fixme(ctx, &var->loc, "Write double default values."); +- +- set_u32(&buffer, default_value_offset + comp_offset * sizeof(uint32_t), +- var->default_values[k].number.u); +- } +- } +- } +- ++j; +- } +- } +- } +- +- creator_offset = put_string(&buffer, vkd3d_shader_get_version(NULL, NULL)); +- set_u32(&buffer, creator_position, creator_offset); +- - add_section(ctx, dxbc, TAG_RDEF, &buffer); - - sm4_free_extern_resources(extern_resources, extern_resources_count); -+ -+ if (buffer.status) -+ { -+ vkd3d_free(buffer.data); -+ ctx->result = buffer.status; -+ return; -+ } -+ rdef->code = buffer.data; -+ rdef->size = buffer.size; - } - +- sm4_free_extern_resources(extern_resources, extern_resources_count); +-} +- -static enum vkd3d_sm4_resource_type sm4_resource_dimension(const struct hlsl_type *type) +static enum vkd3d_sm4_resource_type sm4_resource_dimension(enum vkd3d_shader_resource_type resource_type) { @@ -11799,7 +16777,7 @@ index 9c41e2c2053..872603052ac 100644 return VKD3D_SM4_RESOURCE_BUFFER; default: vkd3d_unreachable(); -@@ -4096,297 +4055,6 @@ struct sm4_instruction +@@ -4096,297 +3352,6 @@ struct sm4_instruction unsigned int idx_src_count; }; @@ -12097,14 +17075,13 @@ index 9c41e2c2053..872603052ac 100644 static unsigned int sm4_get_index_addressing_from_reg(const struct vkd3d_shader_register *reg, unsigned int i) { -@@ -4650,204 +4318,41 @@ static void write_sm4_instruction(const struct tpf_compiler *tpf, const struct s +@@ -4650,204 +3615,41 @@ static void write_sm4_instruction(const struct tpf_compiler *tpf, const struct s sm4_update_stat_counters(tpf, instr); } -static bool encode_texel_offset_as_aoffimmi(struct sm4_instruction *instr, - const struct hlsl_ir_node *texel_offset) -+static void tpf_dcl_constant_buffer(const struct tpf_compiler *tpf, const struct vkd3d_shader_instruction *ins) - { +-{ - struct sm4_instruction_modifier modif; - struct hlsl_ir_constant *offset; - @@ -12130,7 +17107,8 @@ index 9c41e2c2053..872603052ac 100644 -} - -static void write_sm4_dcl_constant_buffer(const struct tpf_compiler *tpf, const struct hlsl_buffer *cbuffer) --{ ++static void tpf_dcl_constant_buffer(const struct tpf_compiler *tpf, const struct vkd3d_shader_instruction *ins) + { - size_t size = (cbuffer->used_size + 3) / 4; + const struct vkd3d_shader_constant_buffer *cb = &ins->declaration.cb; + size_t size = (cb->size + 3) / 4; @@ -12315,7 +17293,7 @@ index 9c41e2c2053..872603052ac 100644 } static void tpf_dcl_temps(const struct tpf_compiler *tpf, unsigned int count) -@@ -4924,42 +4429,116 @@ static void tpf_dcl_thread_group(const struct tpf_compiler *tpf, const struct vs +@@ -4924,42 +3726,116 @@ static void tpf_dcl_thread_group(const struct tpf_compiler *tpf, const struct vs write_sm4_instruction(tpf, &instr); } @@ -12334,38 +17312,29 @@ index 9c41e2c2053..872603052ac 100644 + .dst_count = 1, }; -- write_sm4_instruction(tpf, &instr); --} + if (vkd3d_shader_ver_ge(&tpf->program->shader_version, 5, 1)) + { + instr.dsts[0].reg.idx[0].offset = sampler->src.reg.idx[0].offset; + instr.dsts[0].reg.idx[1].offset = sampler->range.first; + instr.dsts[0].reg.idx[2].offset = sampler->range.last; + instr.dsts[0].reg.idx_count = 3; - --static void tpf_write_hs_decls(const struct tpf_compiler *tpf) --{ -- struct sm4_instruction instr = ++ + instr.idx[0] = ins->declaration.sampler.range.space; + instr.idx_count = 1; + } + else - { -- .opcode = VKD3D_SM5_OP_HS_DECLS, -- }; ++ { + instr.dsts[0].reg.idx[0].offset = sampler->range.first; + instr.dsts[0].reg.idx_count = 1; + } - ++ write_sm4_instruction(tpf, &instr); } --static void tpf_write_hs_control_point_phase(const struct tpf_compiler *tpf) +-static void tpf_write_hs_decls(const struct tpf_compiler *tpf) +static void tpf_dcl_texture(const struct tpf_compiler *tpf, const struct vkd3d_shader_instruction *ins) { - struct sm4_instruction instr = -- { -- .opcode = VKD3D_SM5_OP_HS_CONTROL_POINT_PHASE, + const struct vkd3d_shader_structured_resource *structured_resource = &ins->declaration.structured_resource; + const struct vkd3d_shader_semantic *semantic = &ins->declaration.semantic; + const struct vkd3d_shader_version *version = &tpf->program->shader_version; @@ -12387,7 +17356,9 @@ index 9c41e2c2053..872603052ac 100644 + instr.dst_count = 1; + + for (k = 0; k < 4; ++k) -+ { + { +- .opcode = VKD3D_SM5_OP_HS_DECLS, +- }; + for (i = ARRAY_SIZE(data_type_table) - 1; i < ARRAY_SIZE(data_type_table); --i) + { + if (semantic->resource_data_type[k] == data_type_table[i]) @@ -12423,14 +17394,16 @@ index 9c41e2c2053..872603052ac 100644 + + if (ins->structured) + instr.byte_stride = structured_resource->byte_stride; -+ -+ write_sm4_instruction(tpf, &instr); -+} -+ + + write_sm4_instruction(tpf, &instr); + } + +-static void tpf_write_hs_control_point_phase(const struct tpf_compiler *tpf) +static void write_sm4_dcl_global_flags(const struct tpf_compiler *tpf, uint32_t flags) -+{ -+ struct sm4_instruction instr = -+ { + { + struct sm4_instruction instr = + { +- .opcode = VKD3D_SM5_OP_HS_CONTROL_POINT_PHASE, + .opcode = VKD3D_SM4_OP_DCL_GLOBAL_FLAGS, + .extra_bits = flags << VKD3D_SM4_GLOBAL_FLAGS_SHIFT, }; @@ -12448,7 +17421,7 @@ index 9c41e2c2053..872603052ac 100644 }; write_sm4_instruction(tpf, &instr); -@@ -5022,594 +4601,9 @@ static void tpf_write_dcl_tessellator_output_primitive(const struct tpf_compiler +@@ -5022,594 +3898,9 @@ static void tpf_write_dcl_tessellator_output_primitive(const struct tpf_compiler write_sm4_instruction(tpf, &instr); } @@ -13038,13 +18011,14 @@ index 9c41e2c2053..872603052ac 100644 - write_sm4_instruction(tpf, &instr); -} - - static void tpf_simple_instruction(struct tpf_compiler *tpf, const struct vkd3d_shader_instruction *ins) +-static void tpf_simple_instruction(struct tpf_compiler *tpf, const struct vkd3d_shader_instruction *ins) ++static void tpf_simple_instruction(struct tpf_compiler *tpf, const struct vkd3d_shader_instruction *ins) { + struct sm4_instruction_modifier *modifier; const struct vkd3d_sm4_opcode_info *info; struct sm4_instruction instr = {0}; unsigned int dst_count, src_count; -@@ -5655,6 +4649,22 @@ static void tpf_simple_instruction(struct tpf_compiler *tpf, const struct vkd3d_ +@@ -5655,6 +3946,22 @@ static void tpf_simple_instruction(struct tpf_compiler *tpf, const struct vkd3d_ for (unsigned int i = 0; i < ins->src_count; ++i) instr.srcs[i] = ins->src[i]; @@ -13067,7 +18041,7 @@ index 9c41e2c2053..872603052ac 100644 write_sm4_instruction(tpf, &instr); } -@@ -5662,6 +4672,10 @@ static void tpf_handle_instruction(struct tpf_compiler *tpf, const struct vkd3d_ +@@ -5662,6 +3969,10 @@ static void tpf_handle_instruction(struct tpf_compiler *tpf, const struct vkd3d_ { switch (ins->opcode) { @@ -13078,7 +18052,7 @@ index 9c41e2c2053..872603052ac 100644 case VKD3DSIH_DCL_TEMPS: tpf_dcl_temps(tpf, ins->declaration.count); break; -@@ -5702,8 +4716,25 @@ static void tpf_handle_instruction(struct tpf_compiler *tpf, const struct vkd3d_ +@@ -5702,8 +4013,25 @@ static void tpf_handle_instruction(struct tpf_compiler *tpf, const struct vkd3d_ tpf_dcl_siv_semantic(tpf, VKD3D_SM4_OP_DCL_OUTPUT_SIV, &ins->declaration.register_semantic, 0); break; @@ -13104,7 +18078,7 @@ index 9c41e2c2053..872603052ac 100644 case VKD3DSIH_DIV: case VKD3DSIH_DP2: case VKD3DSIH_DP3: -@@ -5714,6 +4745,10 @@ static void tpf_handle_instruction(struct tpf_compiler *tpf, const struct vkd3d_ +@@ -5714,6 +4042,10 @@ static void tpf_handle_instruction(struct tpf_compiler *tpf, const struct vkd3d_ case VKD3DSIH_DSY: case VKD3DSIH_DSY_COARSE: case VKD3DSIH_DSY_FINE: @@ -13115,12 +18089,14 @@ index 9c41e2c2053..872603052ac 100644 case VKD3DSIH_EQO: case VKD3DSIH_EXP: case VKD3DSIH_F16TOF32: -@@ -5721,9 +4756,14 @@ static void tpf_handle_instruction(struct tpf_compiler *tpf, const struct vkd3d_ +@@ -5721,9 +4053,16 @@ static void tpf_handle_instruction(struct tpf_compiler *tpf, const struct vkd3d_ case VKD3DSIH_FRC: case VKD3DSIH_FTOI: case VKD3DSIH_FTOU: + case VKD3DSIH_GATHER4: + case VKD3DSIH_GATHER4_PO: ++ case VKD3DSIH_GATHER4_C: ++ case VKD3DSIH_GATHER4_PO_C: case VKD3DSIH_GEO: + case VKD3DSIH_HS_CONTROL_POINT_PHASE: + case VKD3DSIH_HS_FORK_PHASE: @@ -13130,7 +18106,7 @@ index 9c41e2c2053..872603052ac 100644 case VKD3DSIH_IGE: case VKD3DSIH_ILT: case VKD3DSIH_IMAD: -@@ -5735,7 +4775,12 @@ static void tpf_handle_instruction(struct tpf_compiler *tpf, const struct vkd3d_ +@@ -5735,7 +4074,12 @@ static void tpf_handle_instruction(struct tpf_compiler *tpf, const struct vkd3d_ case VKD3DSIH_ISHL: case VKD3DSIH_ISHR: case VKD3DSIH_ITOF: @@ -13143,7 +18119,7 @@ index 9c41e2c2053..872603052ac 100644 case VKD3DSIH_LTO: case VKD3DSIH_MAD: case VKD3DSIH_MAX: -@@ -5747,14 +4792,25 @@ static void tpf_handle_instruction(struct tpf_compiler *tpf, const struct vkd3d_ +@@ -5747,14 +4091,25 @@ static void tpf_handle_instruction(struct tpf_compiler *tpf, const struct vkd3d_ case VKD3DSIH_NOT: case VKD3DSIH_OR: case VKD3DSIH_RCP: @@ -13169,7 +18145,7 @@ index 9c41e2c2053..872603052ac 100644 case VKD3DSIH_UDIV: case VKD3DSIH_UGE: case VKD3DSIH_ULT: -@@ -5772,102 +4828,23 @@ static void tpf_handle_instruction(struct tpf_compiler *tpf, const struct vkd3d_ +@@ -5772,102 +4127,23 @@ static void tpf_handle_instruction(struct tpf_compiler *tpf, const struct vkd3d_ } } @@ -13279,7 +18255,7 @@ index 9c41e2c2053..872603052ac 100644 static const uint16_t shader_types[VKD3D_SHADER_TYPE_COUNT] = { -@@ -5884,101 +4861,45 @@ static void tpf_write_shdr(struct tpf_compiler *tpf, struct hlsl_ir_function_dec +@@ -5884,101 +4160,45 @@ static void tpf_write_shdr(struct tpf_compiler *tpf, struct hlsl_ir_function_dec tpf->buffer = &buffer; @@ -13395,7 +18371,7 @@ index 9c41e2c2053..872603052ac 100644 /* FIXME: We also emit code that should require UAVS_AT_EVERY_STAGE, * STENCIL_REF, and TYPED_UAV_LOAD_ADDITIONAL_FORMATS. */ -@@ -5993,7 +4914,6 @@ static void tpf_write_stat(struct tpf_compiler *tpf) +@@ -5993,7 +4213,6 @@ static void tpf_write_stat(struct tpf_compiler *tpf) { struct vkd3d_bytecode_buffer buffer = {0}; const struct sm4_stat *stat = tpf->stat; @@ -13403,7 +18379,7 @@ index 9c41e2c2053..872603052ac 100644 put_u32(&buffer, stat->fields[VKD3D_STAT_INSTR_COUNT]); put_u32(&buffer, stat->fields[VKD3D_STAT_TEMPS]); -@@ -6025,7 +4945,7 @@ static void tpf_write_stat(struct tpf_compiler *tpf) +@@ -6025,7 +4244,7 @@ static void tpf_write_stat(struct tpf_compiler *tpf) put_u32(&buffer, stat->fields[VKD3D_STAT_LOD]); put_u32(&buffer, 0); /* Sample frequency */ @@ -13412,7 +18388,7 @@ index 9c41e2c2053..872603052ac 100644 { put_u32(&buffer, stat->fields[VKD3D_STAT_DCL_GS_INSTANCES]); put_u32(&buffer, stat->fields[VKD3D_STAT_TESS_CONTROL_POINT_COUNT]); -@@ -6037,15 +4957,19 @@ static void tpf_write_stat(struct tpf_compiler *tpf) +@@ -6037,15 +4256,19 @@ static void tpf_write_stat(struct tpf_compiler *tpf) put_u32(&buffer, stat->fields[VKD3D_STAT_STORE]); } @@ -13439,7 +18415,7 @@ index 9c41e2c2053..872603052ac 100644 { enum vkd3d_shader_type shader_type = program->shader_version.type; struct tpf_compiler tpf = {0}; -@@ -6053,7 +4977,6 @@ int tpf_compile(struct vsir_program *program, uint64_t config_flags, +@@ -6053,7 +4276,6 @@ int tpf_compile(struct vsir_program *program, uint64_t config_flags, size_t i; int ret; @@ -13447,7 +18423,7 @@ index 9c41e2c2053..872603052ac 100644 tpf.program = program; tpf.buffer = NULL; tpf.stat = &stat; -@@ -6064,14 +4987,12 @@ int tpf_compile(struct vsir_program *program, uint64_t config_flags, +@@ -6064,14 +4286,12 @@ int tpf_compile(struct vsir_program *program, uint64_t config_flags, tpf_write_signature(&tpf, &program->output_signature, TAG_OSGN); if (shader_type == VKD3D_SHADER_TYPE_HULL || shader_type == VKD3D_SHADER_TYPE_DOMAIN) tpf_write_signature(&tpf, &program->patch_constant_signature, TAG_PCSG); @@ -13465,7 +18441,7 @@ index 9c41e2c2053..872603052ac 100644 ret = tpf.result; diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c -index d751f2dc6bf..86ec8f15fb7 100644 +index d751f2dc6bf..c7ad407f6fb 100644 --- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c +++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c @@ -23,6 +23,8 @@ @@ -13477,7 +18453,27 @@ index d751f2dc6bf..86ec8f15fb7 100644 static inline int char_to_int(char c) { if ('0' <= c && c <= '9') -@@ -805,6 +807,9 @@ struct vkd3d_shader_scan_context +@@ -377,7 +379,8 @@ size_t bytecode_align(struct vkd3d_bytecode_buffer *buffer) + return aligned_size; + } + +- memset(buffer->data + buffer->size, 0xab, aligned_size - buffer->size); ++ if (aligned_size > buffer->size) ++ memset(&buffer->data[buffer->size], 0xab, aligned_size - buffer->size); + buffer->size = aligned_size; + return aligned_size; + } +@@ -394,7 +397,8 @@ size_t bytecode_put_bytes_unaligned(struct vkd3d_bytecode_buffer *buffer, const + buffer->status = VKD3D_ERROR_OUT_OF_MEMORY; + return offset; + } +- memcpy(buffer->data + offset, bytes, size); ++ if (size) ++ memcpy(&buffer->data[offset], bytes, size); + buffer->size = offset + size; + return offset; + } +@@ -805,6 +809,9 @@ struct vkd3d_shader_scan_context struct vkd3d_shader_scan_combined_resource_sampler_info *combined_sampler_info; size_t combined_samplers_size; @@ -13487,7 +18483,7 @@ index d751f2dc6bf..86ec8f15fb7 100644 }; static VKD3D_PRINTF_FUNC(3, 4) void vkd3d_shader_scan_error(struct vkd3d_shader_scan_context *context, -@@ -1262,6 +1267,12 @@ static int vkd3d_shader_scan_instruction(struct vkd3d_shader_scan_context *conte +@@ -1262,6 +1269,12 @@ static int vkd3d_shader_scan_instruction(struct vkd3d_shader_scan_context *conte VKD3D_SHADER_RESOURCE_BUFFER, VKD3D_SHADER_RESOURCE_DATA_UINT, 0, instruction->declaration.structured_resource.byte_stride, false, instruction->flags); break; @@ -13500,7 +18496,7 @@ index d751f2dc6bf..86ec8f15fb7 100644 case VKD3DSIH_IF: case VKD3DSIH_IFC: cf_info = vkd3d_shader_scan_push_cf_info(context); -@@ -1502,6 +1513,7 @@ static int vsir_program_scan(struct vsir_program *program, const struct vkd3d_sh +@@ -1502,6 +1515,7 @@ static int vsir_program_scan(struct vsir_program *program, const struct vkd3d_sh struct vkd3d_shader_scan_descriptor_info1 *descriptor_info1) { struct vkd3d_shader_scan_combined_resource_sampler_info *combined_sampler_info; @@ -13508,7 +18504,7 @@ index d751f2dc6bf..86ec8f15fb7 100644 struct vkd3d_shader_scan_descriptor_info1 local_descriptor_info1 = {0}; struct vkd3d_shader_scan_descriptor_info *descriptor_info; struct vkd3d_shader_scan_signature_info *signature_info; -@@ -1530,6 +1542,8 @@ static int vsir_program_scan(struct vsir_program *program, const struct vkd3d_sh +@@ -1530,6 +1544,8 @@ static int vsir_program_scan(struct vsir_program *program, const struct vkd3d_sh descriptor_info1 = &local_descriptor_info1; } @@ -13517,7 +18513,7 @@ index d751f2dc6bf..86ec8f15fb7 100644 vkd3d_shader_scan_context_init(&context, &program->shader_version, compile_info, descriptor_info1, combined_sampler_info, message_context); -@@ -1573,6 +1587,12 @@ static int vsir_program_scan(struct vsir_program *program, const struct vkd3d_sh +@@ -1573,6 +1589,12 @@ static int vsir_program_scan(struct vsir_program *program, const struct vkd3d_sh if (!ret && descriptor_info) ret = convert_descriptor_info(descriptor_info, descriptor_info1); @@ -13530,7 +18526,7 @@ index d751f2dc6bf..86ec8f15fb7 100644 if (ret < 0) { if (combined_sampler_info) -@@ -1959,7 +1979,7 @@ const enum vkd3d_shader_target_type *vkd3d_shader_get_supported_target_types( +@@ -1959,7 +1981,7 @@ const enum vkd3d_shader_target_type *vkd3d_shader_get_supported_target_types( static const enum vkd3d_shader_target_type dxbc_tpf_types[] = { VKD3D_SHADER_TARGET_SPIRV_BINARY, @@ -13539,7 +18535,7 @@ index d751f2dc6bf..86ec8f15fb7 100644 VKD3D_SHADER_TARGET_SPIRV_TEXT, #endif VKD3D_SHADER_TARGET_D3D_ASM, -@@ -1974,7 +1994,7 @@ const enum vkd3d_shader_target_type *vkd3d_shader_get_supported_target_types( +@@ -1974,7 +1996,7 @@ const enum vkd3d_shader_target_type *vkd3d_shader_get_supported_target_types( static const enum vkd3d_shader_target_type hlsl_types[] = { VKD3D_SHADER_TARGET_SPIRV_BINARY, @@ -13548,7 +18544,7 @@ index d751f2dc6bf..86ec8f15fb7 100644 VKD3D_SHADER_TARGET_SPIRV_TEXT, #endif VKD3D_SHADER_TARGET_D3D_ASM, -@@ -1986,7 +2006,7 @@ const enum vkd3d_shader_target_type *vkd3d_shader_get_supported_target_types( +@@ -1986,7 +2008,7 @@ const enum vkd3d_shader_target_type *vkd3d_shader_get_supported_target_types( static const enum vkd3d_shader_target_type d3dbc_types[] = { VKD3D_SHADER_TARGET_SPIRV_BINARY, @@ -13557,7 +18553,7 @@ index d751f2dc6bf..86ec8f15fb7 100644 VKD3D_SHADER_TARGET_SPIRV_TEXT, #endif VKD3D_SHADER_TARGET_D3D_ASM, -@@ -1996,7 +2016,7 @@ const enum vkd3d_shader_target_type *vkd3d_shader_get_supported_target_types( +@@ -1996,7 +2018,7 @@ const enum vkd3d_shader_target_type *vkd3d_shader_get_supported_target_types( static const enum vkd3d_shader_target_type dxbc_dxil_types[] = { VKD3D_SHADER_TARGET_SPIRV_BINARY, @@ -13567,7 +18563,7 @@ index d751f2dc6bf..86ec8f15fb7 100644 # endif VKD3D_SHADER_TARGET_D3D_ASM, diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -index be7c0b73a22..3bfb0a7c3cd 100644 +index be7c0b73a22..7e8ec156aad 100644 --- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h +++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h @@ -62,6 +62,8 @@ @@ -13595,7 +18591,55 @@ index be7c0b73a22..3bfb0a7c3cd 100644 VKD3D_SHADER_WARNING_VSIR_DYNAMIC_DESCRIPTOR_ARRAY = 9300, -@@ -1123,6 +1127,12 @@ bool vsir_signature_find_sysval(const struct shader_signature *signature, +@@ -591,28 +595,25 @@ enum vkd3d_shader_opcode + + enum vkd3d_shader_register_type + { +- VKD3DSPR_TEMP = 0, +- VKD3DSPR_INPUT = 1, +- VKD3DSPR_CONST = 2, +- VKD3DSPR_ADDR = 3, +- VKD3DSPR_TEXTURE = 3, +- VKD3DSPR_RASTOUT = 4, +- VKD3DSPR_ATTROUT = 5, +- VKD3DSPR_TEXCRDOUT = 6, +- VKD3DSPR_OUTPUT = 6, +- VKD3DSPR_CONSTINT = 7, +- VKD3DSPR_COLOROUT = 8, +- VKD3DSPR_DEPTHOUT = 9, +- VKD3DSPR_COMBINED_SAMPLER = 10, +- VKD3DSPR_CONST2 = 11, +- VKD3DSPR_CONST3 = 12, +- VKD3DSPR_CONST4 = 13, +- VKD3DSPR_CONSTBOOL = 14, +- VKD3DSPR_LOOP = 15, +- VKD3DSPR_TEMPFLOAT16 = 16, +- VKD3DSPR_MISCTYPE = 17, +- VKD3DSPR_LABEL = 18, +- VKD3DSPR_PREDICATE = 19, ++ VKD3DSPR_TEMP, ++ VKD3DSPR_INPUT, ++ VKD3DSPR_CONST, ++ VKD3DSPR_ADDR, ++ VKD3DSPR_TEXTURE, ++ VKD3DSPR_RASTOUT, ++ VKD3DSPR_ATTROUT, ++ VKD3DSPR_TEXCRDOUT, ++ VKD3DSPR_OUTPUT, ++ VKD3DSPR_CONSTINT, ++ VKD3DSPR_COLOROUT, ++ VKD3DSPR_DEPTHOUT, ++ VKD3DSPR_COMBINED_SAMPLER, ++ VKD3DSPR_CONSTBOOL, ++ VKD3DSPR_LOOP, ++ VKD3DSPR_TEMPFLOAT16, ++ VKD3DSPR_MISCTYPE, ++ VKD3DSPR_LABEL, ++ VKD3DSPR_PREDICATE, + VKD3DSPR_IMMCONST, + VKD3DSPR_IMMCONST64, + VKD3DSPR_CONSTBUFFER, +@@ -1123,6 +1124,12 @@ bool vsir_signature_find_sysval(const struct shader_signature *signature, enum vkd3d_shader_sysval_semantic sysval, unsigned int semantic_index, unsigned int *element_index); void shader_signature_cleanup(struct shader_signature *signature); @@ -13608,7 +18652,7 @@ index be7c0b73a22..3bfb0a7c3cd 100644 struct dxbc_shader_desc { const uint32_t *byte_code; -@@ -1131,6 +1141,7 @@ struct dxbc_shader_desc +@@ -1131,6 +1138,7 @@ struct dxbc_shader_desc struct shader_signature input_signature; struct shader_signature output_signature; struct shader_signature patch_constant_signature; @@ -13616,7 +18660,7 @@ index be7c0b73a22..3bfb0a7c3cd 100644 }; struct vkd3d_shader_register_semantic -@@ -1400,9 +1411,10 @@ enum vsir_control_flow_type +@@ -1400,9 +1408,10 @@ enum vsir_control_flow_type enum vsir_normalisation_level { @@ -13629,7 +18673,7 @@ index be7c0b73a22..3bfb0a7c3cd 100644 }; struct vsir_program -@@ -1428,9 +1440,16 @@ struct vsir_program +@@ -1428,9 +1437,16 @@ struct vsir_program bool use_vocp; bool has_point_size; bool has_point_coord; @@ -13646,7 +18690,7 @@ index be7c0b73a22..3bfb0a7c3cd 100644 const char **block_names; size_t block_name_count; -@@ -1643,6 +1662,10 @@ int shader_extract_from_dxbc(const struct vkd3d_shader_code *dxbc, +@@ -1643,6 +1659,10 @@ int shader_extract_from_dxbc(const struct vkd3d_shader_code *dxbc, int shader_parse_input_signature(const struct vkd3d_shader_code *dxbc, struct vkd3d_shader_message_context *message_context, struct shader_signature *signature); @@ -13657,7 +18701,7 @@ index be7c0b73a22..3bfb0a7c3cd 100644 int glsl_compile(struct vsir_program *program, uint64_t config_flags, const struct vkd3d_shader_scan_descriptor_info1 *descriptor_info, const struct vkd3d_shader_scan_combined_resource_sampler_info *combined_sampler_info, -@@ -1661,6 +1684,9 @@ int msl_compile(struct vsir_program *program, uint64_t config_flags, +@@ -1661,6 +1681,9 @@ int msl_compile(struct vsir_program *program, uint64_t config_flags, const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context); @@ -13667,7 +18711,7 @@ index be7c0b73a22..3bfb0a7c3cd 100644 enum vkd3d_md5_variant { VKD3D_MD5_STANDARD, -@@ -1942,6 +1968,21 @@ static inline void *vkd3d_find_struct_(const struct vkd3d_struct *chain, +@@ -1942,6 +1965,21 @@ static inline void *vkd3d_find_struct_(const struct vkd3d_struct *chain, #define DXBC_MAX_SECTION_COUNT 7 @@ -14187,7 +19231,7 @@ index a55a97f6f2f..6c7bf167910 100644 ERR("Failed to allocate descriptor set.\n"); return; diff --git a/libs/vkd3d/libs/vkd3d/device.c b/libs/vkd3d/libs/vkd3d/device.c -index e92373a36fa..7b491805a72 100644 +index e92373a36fa..9aa4adb6c06 100644 --- a/libs/vkd3d/libs/vkd3d/device.c +++ b/libs/vkd3d/libs/vkd3d/device.c @@ -1473,16 +1473,21 @@ static void vkd3d_device_vk_heaps_descriptor_limits_init(struct vkd3d_device_des @@ -14267,8 +19311,16 @@ index e92373a36fa..7b491805a72 100644 }; static void vkd3d_desc_object_cache_init(struct vkd3d_desc_object_cache *cache, size_t size) +@@ -3461,6 +3443,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CheckFeatureSupport(ID3D12Device9 + for (i = 0; i < data->NumFeatureLevels; ++i) + { + D3D_FEATURE_LEVEL fl = data->pFeatureLevelsRequested[i]; ++ TRACE("Requested feature level %#x.\n", fl); + if (data->MaxSupportedFeatureLevel < fl && fl <= vulkan_info->max_feature_level) + data->MaxSupportedFeatureLevel = fl; + } diff --git a/libs/vkd3d/libs/vkd3d/state.c b/libs/vkd3d/libs/vkd3d/state.c -index 2b0f81d3812..32f34479ea1 100644 +index 2b0f81d3812..bd3c3758ecb 100644 --- a/libs/vkd3d/libs/vkd3d/state.c +++ b/libs/vkd3d/libs/vkd3d/state.c @@ -265,25 +265,6 @@ static enum vkd3d_shader_visibility vkd3d_shader_visibility_from_d3d12(D3D12_SHA @@ -14297,7 +19349,19 @@ index 2b0f81d3812..32f34479ea1 100644 static enum vkd3d_shader_descriptor_type vkd3d_descriptor_type_from_d3d12_range_type( D3D12_DESCRIPTOR_RANGE_TYPE type) { -@@ -717,6 +698,8 @@ struct vk_binding_array +@@ -597,8 +578,9 @@ static HRESULT d3d12_root_signature_info_from_desc(struct d3d12_root_signature_i + goto done; + } + +- qsort(info->ranges, info->range_count, sizeof(*info->ranges), +- d3d12_root_signature_info_range_compare); ++ if (info->ranges) ++ qsort(info->ranges, info->range_count, sizeof(*info->ranges), ++ d3d12_root_signature_info_range_compare); + + for (i = D3D12_SHADER_VISIBILITY_VERTEX; i <= D3D12_SHADER_VISIBILITY_MESH; ++i) + { +@@ -717,6 +699,8 @@ struct vk_binding_array VkDescriptorSetLayoutBinding *bindings; size_t capacity, count; @@ -14306,7 +19370,7 @@ index 2b0f81d3812..32f34479ea1 100644 unsigned int table_index; unsigned int unbounded_offset; VkDescriptorSetLayoutCreateFlags flags; -@@ -754,14 +737,24 @@ static bool vk_binding_array_add_binding(struct vk_binding_array *array, +@@ -754,14 +738,24 @@ static bool vk_binding_array_add_binding(struct vk_binding_array *array, return true; } @@ -14332,7 +19396,7 @@ index 2b0f81d3812..32f34479ea1 100644 }; static void descriptor_set_context_cleanup(struct vkd3d_descriptor_set_context *context) -@@ -786,46 +779,66 @@ static bool vkd3d_validate_descriptor_set_count(struct d3d12_device *device, uns +@@ -786,46 +780,66 @@ static bool vkd3d_validate_descriptor_set_count(struct d3d12_device *device, uns return true; } @@ -14420,7 +19484,7 @@ index 2b0f81d3812..32f34479ea1 100644 return E_OUTOFMEMORY; mapping = &root_signature->descriptor_mapping[context->descriptor_index++]; -@@ -834,7 +847,7 @@ static HRESULT d3d12_root_signature_append_vk_binding(struct d3d12_root_signatur +@@ -834,7 +848,7 @@ static HRESULT d3d12_root_signature_append_vk_binding(struct d3d12_root_signatur mapping->register_index = register_idx; mapping->shader_visibility = shader_visibility; mapping->flags = buffer_descriptor ? VKD3D_SHADER_BINDING_FLAG_BUFFER : VKD3D_SHADER_BINDING_FLAG_IMAGE; @@ -14429,7 +19493,7 @@ index 2b0f81d3812..32f34479ea1 100644 mapping->binding.binding = idx; mapping->binding.count = descriptor_count; if (offset) -@@ -843,12 +856,6 @@ static HRESULT d3d12_root_signature_append_vk_binding(struct d3d12_root_signatur +@@ -843,12 +857,6 @@ static HRESULT d3d12_root_signature_append_vk_binding(struct d3d12_root_signatur offset->dynamic_offset_index = ~0u; } @@ -14442,7 +19506,7 @@ index 2b0f81d3812..32f34479ea1 100644 return S_OK; } -@@ -911,7 +918,7 @@ static unsigned int vk_binding_count_from_descriptor_range(const struct d3d12_ro +@@ -911,7 +919,7 @@ static unsigned int vk_binding_count_from_descriptor_range(const struct d3d12_ro } static HRESULT d3d12_root_signature_init_descriptor_table_binding(struct d3d12_root_signature *root_signature, @@ -14451,7 +19515,7 @@ index 2b0f81d3812..32f34479ea1 100644 unsigned int vk_binding_array_count, unsigned int bindings_per_range, struct vkd3d_descriptor_set_context *context) { -@@ -919,34 +926,49 @@ static HRESULT d3d12_root_signature_init_descriptor_table_binding(struct d3d12_r +@@ -919,34 +927,49 @@ static HRESULT d3d12_root_signature_init_descriptor_table_binding(struct d3d12_r bool is_buffer = range->type != VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER; enum vkd3d_shader_descriptor_type descriptor_type = range->type; unsigned int i, register_space = range->register_space; @@ -14511,7 +19575,7 @@ index 2b0f81d3812..32f34479ea1 100644 return S_OK; } -@@ -1199,16 +1221,16 @@ static HRESULT d3d12_root_signature_init_root_descriptor_tables(struct d3d12_roo +@@ -1199,16 +1222,16 @@ static HRESULT d3d12_root_signature_init_root_descriptor_tables(struct d3d12_roo if (use_vk_heaps) { @@ -14531,7 +19595,7 @@ index 2b0f81d3812..32f34479ea1 100644 if (root_signature->use_descriptor_arrays) { if (j && range->type != table->ranges[j - 1].type) -@@ -1229,6 +1251,8 @@ static HRESULT d3d12_root_signature_init_root_descriptor_tables(struct d3d12_roo +@@ -1229,6 +1252,8 @@ static HRESULT d3d12_root_signature_init_root_descriptor_tables(struct d3d12_roo range->set = base_range->set; range->binding = base_range->binding; @@ -14540,7 +19604,7 @@ index 2b0f81d3812..32f34479ea1 100644 range->vk_binding_count = base_range->vk_binding_count - rel_offset; d3d12_root_signature_map_descriptor_unbounded_binding(root_signature, range, rel_offset, shader_visibility, context); -@@ -1251,8 +1275,6 @@ static HRESULT d3d12_root_signature_init_root_descriptor_tables(struct d3d12_roo +@@ -1251,8 +1276,6 @@ static HRESULT d3d12_root_signature_init_root_descriptor_tables(struct d3d12_roo bindings_per_range = range->descriptor_count; } @@ -14549,7 +19613,7 @@ index 2b0f81d3812..32f34479ea1 100644 if (FAILED(hr = d3d12_root_signature_init_descriptor_table_binding(root_signature, range, p->ShaderVisibility, vk_binding_array_count, bindings_per_range, context))) return hr; -@@ -1266,7 +1288,9 @@ static HRESULT d3d12_root_signature_init_root_descriptor_tables(struct d3d12_roo +@@ -1266,7 +1289,9 @@ static HRESULT d3d12_root_signature_init_root_descriptor_tables(struct d3d12_roo static HRESULT d3d12_root_signature_init_root_descriptors(struct d3d12_root_signature *root_signature, const D3D12_ROOT_SIGNATURE_DESC *desc, struct vkd3d_descriptor_set_context *context) { @@ -14560,7 +19624,7 @@ index 2b0f81d3812..32f34479ea1 100644 HRESULT hr; root_signature->push_descriptor_mask = 0; -@@ -1281,14 +1305,19 @@ static HRESULT d3d12_root_signature_init_root_descriptors(struct d3d12_root_sign +@@ -1281,14 +1306,19 @@ static HRESULT d3d12_root_signature_init_root_descriptors(struct d3d12_root_sign root_signature->push_descriptor_mask |= 1u << i; @@ -14585,7 +19649,7 @@ index 2b0f81d3812..32f34479ea1 100644 } return S_OK; -@@ -1298,10 +1327,19 @@ static HRESULT d3d12_root_signature_init_static_samplers(struct d3d12_root_signa +@@ -1298,10 +1328,19 @@ static HRESULT d3d12_root_signature_init_static_samplers(struct d3d12_root_signa struct d3d12_device *device, const D3D12_ROOT_SIGNATURE_DESC *desc, struct vkd3d_descriptor_set_context *context) { @@ -14605,7 +19669,7 @@ index 2b0f81d3812..32f34479ea1 100644 for (i = 0; i < desc->NumStaticSamplers; ++i) { const D3D12_STATIC_SAMPLER_DESC *s = &desc->pStaticSamplers[i]; -@@ -1309,16 +1347,13 @@ static HRESULT d3d12_root_signature_init_static_samplers(struct d3d12_root_signa +@@ -1309,16 +1348,13 @@ static HRESULT d3d12_root_signature_init_static_samplers(struct d3d12_root_signa if (FAILED(hr = vkd3d_create_static_sampler(device, s, &root_signature->static_samplers[i]))) return hr; @@ -14624,7 +19688,7 @@ index 2b0f81d3812..32f34479ea1 100644 return S_OK; } -@@ -1450,29 +1485,52 @@ static HRESULT vkd3d_create_pipeline_layout(struct d3d12_device *device, +@@ -1450,29 +1486,52 @@ static HRESULT vkd3d_create_pipeline_layout(struct d3d12_device *device, return S_OK; } @@ -14685,7 +19749,7 @@ index 2b0f81d3812..32f34479ea1 100644 } return S_OK; -@@ -1518,7 +1576,6 @@ static HRESULT d3d12_root_signature_init(struct d3d12_root_signature *root_signa +@@ -1518,7 +1577,6 @@ static HRESULT d3d12_root_signature_init(struct d3d12_root_signature *root_signa HRESULT hr; memset(&context, 0, sizeof(context)); @@ -14693,7 +19757,7 @@ index 2b0f81d3812..32f34479ea1 100644 root_signature->ID3D12RootSignature_iface.lpVtbl = &d3d12_root_signature_vtbl; root_signature->refcount = 1; -@@ -1580,17 +1637,11 @@ static HRESULT d3d12_root_signature_init(struct d3d12_root_signature *root_signa +@@ -1580,17 +1638,11 @@ static HRESULT d3d12_root_signature_init(struct d3d12_root_signature *root_signa sizeof(*root_signature->static_samplers)))) goto fail; diff --git a/patches/vkd3d-latest/0006-Updated-vkd3d-to-a082daeb56c239b41d67b5df5abceb342c0.patch b/patches/vkd3d-latest/0002-Updated-vkd3d-to-a082daeb56c239b41d67b5df5abceb342c0.patch similarity index 99% rename from patches/vkd3d-latest/0006-Updated-vkd3d-to-a082daeb56c239b41d67b5df5abceb342c0.patch rename to patches/vkd3d-latest/0002-Updated-vkd3d-to-a082daeb56c239b41d67b5df5abceb342c0.patch index 0a7b08ce..667bef7e 100644 --- a/patches/vkd3d-latest/0006-Updated-vkd3d-to-a082daeb56c239b41d67b5df5abceb342c0.patch +++ b/patches/vkd3d-latest/0002-Updated-vkd3d-to-a082daeb56c239b41d67b5df5abceb342c0.patch @@ -1,4 +1,4 @@ -From 150e550f9e45fc08850ee21cc7c954e60594858c Mon Sep 17 00:00:00 2001 +From c3d5c3b03aa6a746797f6e1debf17f0978ed68c0 Mon Sep 17 00:00:00 2001 From: Alistair Leslie-Hughes Date: Sat, 18 Jan 2025 16:15:28 +1100 Subject: [PATCH] Updated vkd3d to a082daeb56c239b41d67b5df5abceb342c0b32b9. diff --git a/patches/vkd3d-latest/0002-Updated-vkd3d-to-d164752efc2dffc06d1e3ad9dd04cafff31.patch b/patches/vkd3d-latest/0002-Updated-vkd3d-to-d164752efc2dffc06d1e3ad9dd04cafff31.patch deleted file mode 100644 index e56c5473..00000000 --- a/patches/vkd3d-latest/0002-Updated-vkd3d-to-d164752efc2dffc06d1e3ad9dd04cafff31.patch +++ /dev/null @@ -1,1516 +0,0 @@ -From 4cfee2377ba2cf2eefea8abbb53596fca5dd6f11 Mon Sep 17 00:00:00 2001 -From: Alistair Leslie-Hughes -Date: Thu, 19 Dec 2024 06:57:42 +1100 -Subject: [PATCH] Updated vkd3d to d164752efc2dffc06d1e3ad9dd04cafff31e742a. - ---- - libs/vkd3d/libs/vkd3d-shader/d3d_asm.c | 301 ++++-------------- - libs/vkd3d/libs/vkd3d-shader/d3dbc.c | 234 +++++++++----- - libs/vkd3d/libs/vkd3d-shader/dxil.c | 51 +++ - libs/vkd3d/libs/vkd3d-shader/hlsl.h | 4 + - libs/vkd3d/libs/vkd3d-shader/hlsl.y | 86 +++++ - libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 158 ++++++--- - libs/vkd3d/libs/vkd3d-shader/ir.c | 105 +++++- - libs/vkd3d/libs/vkd3d-shader/spirv.c | 46 ++- - libs/vkd3d/libs/vkd3d-shader/tpf.c | 2 + - .../libs/vkd3d-shader/vkd3d_shader_private.h | 41 ++- - 10 files changed, 629 insertions(+), 399 deletions(-) - -diff --git a/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c -index 8c96befadea..69e14e0c7bf 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c -+++ b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c -@@ -354,6 +354,64 @@ static const char * const shader_opcode_names[] = - [VKD3DSIH_XOR ] = "xor", - }; - -+static const char * const shader_register_names[] = -+{ -+ [VKD3DSPR_ADDR ] = "a", -+ [VKD3DSPR_ATTROUT ] = "oD", -+ [VKD3DSPR_COLOROUT ] = "oC", -+ [VKD3DSPR_COMBINED_SAMPLER ] = "s", -+ [VKD3DSPR_CONST ] = "c", -+ [VKD3DSPR_CONSTBOOL ] = "b", -+ [VKD3DSPR_CONSTBUFFER ] = "cb", -+ [VKD3DSPR_CONSTINT ] = "i", -+ [VKD3DSPR_COVERAGE ] = "vCoverage", -+ [VKD3DSPR_DEPTHOUT ] = "oDepth", -+ [VKD3DSPR_DEPTHOUTGE ] = "oDepthGE", -+ [VKD3DSPR_DEPTHOUTLE ] = "oDepthLE", -+ [VKD3DSPR_FORKINSTID ] = "vForkInstanceId", -+ [VKD3DSPR_FUNCTIONBODY ] = "fb", -+ [VKD3DSPR_FUNCTIONPOINTER ] = "fp", -+ [VKD3DSPR_GROUPSHAREDMEM ] = "g", -+ [VKD3DSPR_GSINSTID ] = "vGSInstanceID", -+ [VKD3DSPR_IDXTEMP ] = "x", -+ [VKD3DSPR_IMMCONST ] = "l", -+ [VKD3DSPR_IMMCONST64 ] = "d", -+ [VKD3DSPR_IMMCONSTBUFFER ] = "icb", -+ [VKD3DSPR_INCONTROLPOINT ] = "vicp", -+ [VKD3DSPR_INPUT ] = "v", -+ [VKD3DSPR_JOININSTID ] = "vJoinInstanceId", -+ [VKD3DSPR_LABEL ] = "l", -+ [VKD3DSPR_LOCALTHREADID ] = "vThreadIDInGroup", -+ [VKD3DSPR_LOCALTHREADINDEX ] = "vThreadIDInGroupFlattened", -+ [VKD3DSPR_LOOP ] = "aL", -+ [VKD3DSPR_NULL ] = "null", -+ [VKD3DSPR_OUTCONTROLPOINT ] = "vocp", -+ [VKD3DSPR_OUTPOINTID ] = "vOutputControlPointID", -+ [VKD3DSPR_OUTPUT ] = "o", -+ [VKD3DSPR_OUTSTENCILREF ] = "oStencilRef", -+ [VKD3DSPR_PARAMETER ] = "parameter", -+ [VKD3DSPR_PATCHCONST ] = "vpc", -+ [VKD3DSPR_POINT_COORD ] = "vPointCoord", -+ [VKD3DSPR_PREDICATE ] = "p", -+ [VKD3DSPR_PRIMID ] = "primID", -+ [VKD3DSPR_RASTERIZER ] = "rasterizer", -+ [VKD3DSPR_RESOURCE ] = "t", -+ [VKD3DSPR_SAMPLEMASK ] = "oMask", -+ [VKD3DSPR_SAMPLER ] = "s", -+ [VKD3DSPR_SSA ] = "sr", -+ [VKD3DSPR_STREAM ] = "m", -+ [VKD3DSPR_TEMP ] = "r", -+ [VKD3DSPR_TESSCOORD ] = "vDomainLocation", -+ [VKD3DSPR_TEXCRDOUT ] = "oT", -+ [VKD3DSPR_TEXTURE ] = "t", -+ [VKD3DSPR_THREADGROUPID ] = "vThreadGroupID", -+ [VKD3DSPR_THREADID ] = "vThreadID", -+ [VKD3DSPR_UAV ] = "u", -+ [VKD3DSPR_UNDEF ] = "undef", -+ [VKD3DSPR_WAVELANECOUNT ] = "vWaveLaneCount", -+ [VKD3DSPR_WAVELANEINDEX ] = "vWaveLaneIndex", -+}; -+ - struct vkd3d_d3d_asm_colours - { - const char *reset; -@@ -377,22 +435,6 @@ struct vkd3d_d3d_asm_compiler - const struct vkd3d_shader_instruction *current; - }; - --/* Convert floating point offset relative to a register file to an absolute -- * offset for float constants. */ --static unsigned int shader_get_float_offset(enum vkd3d_shader_register_type register_type, UINT register_idx) --{ -- switch (register_type) -- { -- case VKD3DSPR_CONST: return register_idx; -- case VKD3DSPR_CONST2: return 2048 + register_idx; -- case VKD3DSPR_CONST3: return 4096 + register_idx; -- case VKD3DSPR_CONST4: return 6144 + register_idx; -- default: -- FIXME("Unsupported register type: %u.\n", register_type); -- return register_idx; -- } --} -- - static void shader_dump_global_flags(struct vkd3d_d3d_asm_compiler *compiler, enum vsir_global_flags global_flags) - { - unsigned int i; -@@ -966,82 +1008,10 @@ static void shader_print_register(struct vkd3d_d3d_asm_compiler *compiler, const - reg->type == VKD3DSPR_LABEL ? compiler->colours.label : compiler->colours.reg); - switch (reg->type) - { -- case VKD3DSPR_TEMP: -- vkd3d_string_buffer_printf(buffer, "r"); -- break; -- -- case VKD3DSPR_INPUT: -- vkd3d_string_buffer_printf(buffer, "v"); -- break; -- -- case VKD3DSPR_CONST: -- case VKD3DSPR_CONST2: -- case VKD3DSPR_CONST3: -- case VKD3DSPR_CONST4: -- vkd3d_string_buffer_printf(buffer, "c"); -- offset = shader_get_float_offset(reg->type, offset); -- break; -- -- case VKD3DSPR_TEXTURE: /* vs: case VKD3DSPR_ADDR */ -- vkd3d_string_buffer_printf(buffer, "%c", -- compiler->shader_version.type == VKD3D_SHADER_TYPE_PIXEL ? 't' : 'a'); -- break; -- - case VKD3DSPR_RASTOUT: - vkd3d_string_buffer_printf(buffer, "%s", rastout_reg_names[offset]); - break; - -- case VKD3DSPR_COLOROUT: -- vkd3d_string_buffer_printf(buffer, "oC"); -- break; -- -- case VKD3DSPR_DEPTHOUT: -- vkd3d_string_buffer_printf(buffer, "oDepth"); -- break; -- -- case VKD3DSPR_DEPTHOUTGE: -- vkd3d_string_buffer_printf(buffer, "oDepthGE"); -- break; -- -- case VKD3DSPR_DEPTHOUTLE: -- vkd3d_string_buffer_printf(buffer, "oDepthLE"); -- break; -- -- case VKD3DSPR_ATTROUT: -- vkd3d_string_buffer_printf(buffer, "oD"); -- break; -- -- case VKD3DSPR_TEXCRDOUT: -- /* Vertex shaders >= 3.0 use general purpose output registers -- * (VKD3DSPR_OUTPUT), which can include an address token. */ -- if (vkd3d_shader_ver_ge(&compiler->shader_version, 3, 0)) -- vkd3d_string_buffer_printf(buffer, "o"); -- else -- vkd3d_string_buffer_printf(buffer, "oT"); -- break; -- -- case VKD3DSPR_CONSTINT: -- vkd3d_string_buffer_printf(buffer, "i"); -- break; -- -- case VKD3DSPR_CONSTBOOL: -- vkd3d_string_buffer_printf(buffer, "b"); -- break; -- -- case VKD3DSPR_LABEL: -- vkd3d_string_buffer_printf(buffer, "l"); -- break; -- -- case VKD3DSPR_LOOP: -- vkd3d_string_buffer_printf(buffer, "aL"); -- break; -- -- case VKD3DSPR_COMBINED_SAMPLER: -- case VKD3DSPR_SAMPLER: -- vkd3d_string_buffer_printf(buffer, "s"); -- is_descriptor = true; -- break; -- - case VKD3DSPR_MISCTYPE: - if (offset > 1) - vkd3d_string_buffer_printf(buffer, "%s%s", -@@ -1050,156 +1020,20 @@ static void shader_print_register(struct vkd3d_d3d_asm_compiler *compiler, const - vkd3d_string_buffer_printf(buffer, "%s", misctype_reg_names[offset]); - break; - -- case VKD3DSPR_PREDICATE: -- vkd3d_string_buffer_printf(buffer, "p"); -- break; -- -- case VKD3DSPR_IMMCONST: -- vkd3d_string_buffer_printf(buffer, "l"); -- break; -- -- case VKD3DSPR_IMMCONST64: -- vkd3d_string_buffer_printf(buffer, "d"); -- break; -- -+ case VKD3DSPR_COMBINED_SAMPLER: -+ case VKD3DSPR_SAMPLER: - case VKD3DSPR_CONSTBUFFER: -- vkd3d_string_buffer_printf(buffer, "cb"); -- is_descriptor = true; -- break; -- -- case VKD3DSPR_IMMCONSTBUFFER: -- vkd3d_string_buffer_printf(buffer, "icb"); -- break; -- -- case VKD3DSPR_PRIMID: -- vkd3d_string_buffer_printf(buffer, "primID"); -- break; -- -- case VKD3DSPR_NULL: -- vkd3d_string_buffer_printf(buffer, "null"); -- break; -- -- case VKD3DSPR_RASTERIZER: -- vkd3d_string_buffer_printf(buffer, "rasterizer"); -- break; -- - case VKD3DSPR_RESOURCE: -- vkd3d_string_buffer_printf(buffer, "t"); -- is_descriptor = true; -- break; -- - case VKD3DSPR_UAV: -- vkd3d_string_buffer_printf(buffer, "u"); - is_descriptor = true; -- break; -- -- case VKD3DSPR_OUTPOINTID: -- vkd3d_string_buffer_printf(buffer, "vOutputControlPointID"); -- break; -- -- case VKD3DSPR_FORKINSTID: -- vkd3d_string_buffer_printf(buffer, "vForkInstanceId"); -- break; -- -- case VKD3DSPR_JOININSTID: -- vkd3d_string_buffer_printf(buffer, "vJoinInstanceId"); -- break; -- -- case VKD3DSPR_INCONTROLPOINT: -- vkd3d_string_buffer_printf(buffer, "vicp"); -- break; -- -- case VKD3DSPR_OUTCONTROLPOINT: -- vkd3d_string_buffer_printf(buffer, "vocp"); -- break; -- -- case VKD3DSPR_PATCHCONST: -- vkd3d_string_buffer_printf(buffer, "vpc"); -- break; -- -- case VKD3DSPR_TESSCOORD: -- vkd3d_string_buffer_printf(buffer, "vDomainLocation"); -- break; -- -- case VKD3DSPR_GROUPSHAREDMEM: -- vkd3d_string_buffer_printf(buffer, "g"); -- break; -- -- case VKD3DSPR_THREADID: -- vkd3d_string_buffer_printf(buffer, "vThreadID"); -- break; -- -- case VKD3DSPR_THREADGROUPID: -- vkd3d_string_buffer_printf(buffer, "vThreadGroupID"); -- break; -- -- case VKD3DSPR_LOCALTHREADID: -- vkd3d_string_buffer_printf(buffer, "vThreadIDInGroup"); -- break; -- -- case VKD3DSPR_LOCALTHREADINDEX: -- vkd3d_string_buffer_printf(buffer, "vThreadIDInGroupFlattened"); -- break; -- -- case VKD3DSPR_IDXTEMP: -- vkd3d_string_buffer_printf(buffer, "x"); -- break; -- -- case VKD3DSPR_STREAM: -- vkd3d_string_buffer_printf(buffer, "m"); -- break; -- -- case VKD3DSPR_FUNCTIONBODY: -- vkd3d_string_buffer_printf(buffer, "fb"); -- break; -- -- case VKD3DSPR_FUNCTIONPOINTER: -- vkd3d_string_buffer_printf(buffer, "fp"); -- break; -- -- case VKD3DSPR_COVERAGE: -- vkd3d_string_buffer_printf(buffer, "vCoverage"); -- break; -- -- case VKD3DSPR_SAMPLEMASK: -- vkd3d_string_buffer_printf(buffer, "oMask"); -- break; -- -- case VKD3DSPR_GSINSTID: -- vkd3d_string_buffer_printf(buffer, "vGSInstanceID"); -- break; -- -- case VKD3DSPR_OUTSTENCILREF: -- vkd3d_string_buffer_printf(buffer, "oStencilRef"); -- break; -- -- case VKD3DSPR_UNDEF: -- vkd3d_string_buffer_printf(buffer, "undef"); -- break; -- -- case VKD3DSPR_SSA: -- vkd3d_string_buffer_printf(buffer, "sr"); -- break; -- -- case VKD3DSPR_WAVELANECOUNT: -- vkd3d_string_buffer_printf(buffer, "vWaveLaneCount"); -- break; -- -- case VKD3DSPR_WAVELANEINDEX: -- vkd3d_string_buffer_printf(buffer, "vWaveLaneIndex"); -- break; -- -- case VKD3DSPR_PARAMETER: -- vkd3d_string_buffer_printf(buffer, "parameter"); -- break; -- -- case VKD3DSPR_POINT_COORD: -- vkd3d_string_buffer_printf(buffer, "vPointCoord"); -- break; -+ /* fall through */ - - default: -- vkd3d_string_buffer_printf(buffer, "%s%s", -- compiler->colours.error, reg->type, compiler->colours.reset); -+ if (reg->type < ARRAY_SIZE(shader_register_names) && shader_register_names[reg->type]) -+ vkd3d_string_buffer_printf(buffer, "%s", shader_register_names[reg->type]); -+ else -+ vkd3d_string_buffer_printf(buffer, "%s%s", -+ compiler->colours.error, reg->type, compiler->colours.reset); - break; - } - -@@ -2132,8 +1966,7 @@ static void shader_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler, - - case VKD3DSIH_DEF: - vkd3d_string_buffer_printf(buffer, " %sc%u%s", compiler->colours.reg, -- shader_get_float_offset(ins->dst[0].reg.type, ins->dst[0].reg.idx[0].offset), -- compiler->colours.reset); -+ ins->dst[0].reg.idx[0].offset, compiler->colours.reset); - shader_print_float_literal(compiler, " = ", ins->src[0].reg.u.immconst_f32[0], ""); - shader_print_float_literal(compiler, ", ", ins->src[0].reg.u.immconst_f32[1], ""); - shader_print_float_literal(compiler, ", ", ins->src[0].reg.u.immconst_f32[2], ""); -diff --git a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -index a931883e8d1..58e35cf22e8 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -+++ b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -@@ -89,6 +89,32 @@ - #define VKD3D_SM1_VERSION_MAJOR(version) (((version) >> 8u) & 0xffu) - #define VKD3D_SM1_VERSION_MINOR(version) (((version) >> 0u) & 0xffu) - -+enum vkd3d_sm1_register_type -+{ -+ VKD3D_SM1_REG_TEMP = 0x00, -+ VKD3D_SM1_REG_INPUT = 0x01, -+ VKD3D_SM1_REG_CONST = 0x02, -+ VKD3D_SM1_REG_ADDR = 0x03, -+ VKD3D_SM1_REG_TEXTURE = 0x03, -+ VKD3D_SM1_REG_RASTOUT = 0x04, -+ VKD3D_SM1_REG_ATTROUT = 0x05, -+ VKD3D_SM1_REG_TEXCRDOUT = 0x06, -+ VKD3D_SM1_REG_OUTPUT = 0x06, -+ VKD3D_SM1_REG_CONSTINT = 0x07, -+ VKD3D_SM1_REG_COLOROUT = 0x08, -+ VKD3D_SM1_REG_DEPTHOUT = 0x09, -+ VKD3D_SM1_REG_SAMPLER = 0x0a, -+ VKD3D_SM1_REG_CONST2 = 0x0b, -+ VKD3D_SM1_REG_CONST3 = 0x0c, -+ VKD3D_SM1_REG_CONST4 = 0x0d, -+ VKD3D_SM1_REG_CONSTBOOL = 0x0e, -+ VKD3D_SM1_REG_LOOP = 0x0f, -+ VKD3D_SM1_REG_TEMPFLOAT16 = 0x10, -+ VKD3D_SM1_REG_MISCTYPE = 0x11, -+ VKD3D_SM1_REG_LABEL = 0x12, -+ VKD3D_SM1_REG_PREDICATE = 0x13, -+}; -+ - enum vkd3d_sm1_address_mode_type - { - VKD3D_SM1_ADDRESS_MODE_ABSOLUTE = 0x0, -@@ -388,6 +414,34 @@ static const struct vkd3d_sm1_opcode_info ps_opcode_table[] = - {0, 0, 0, VKD3DSIH_INVALID}, - }; - -+static const struct -+{ -+ enum vkd3d_sm1_register_type d3dbc_type; -+ enum vkd3d_shader_register_type vsir_type; -+} -+register_types[] = -+{ -+ {VKD3D_SM1_REG_TEMP, VKD3DSPR_TEMP}, -+ {VKD3D_SM1_REG_INPUT, VKD3DSPR_INPUT}, -+ {VKD3D_SM1_REG_CONST, VKD3DSPR_CONST}, -+ {VKD3D_SM1_REG_ADDR, VKD3DSPR_ADDR}, -+ {VKD3D_SM1_REG_TEXTURE, VKD3DSPR_TEXTURE}, -+ {VKD3D_SM1_REG_RASTOUT, VKD3DSPR_RASTOUT}, -+ {VKD3D_SM1_REG_ATTROUT, VKD3DSPR_ATTROUT}, -+ {VKD3D_SM1_REG_OUTPUT, VKD3DSPR_OUTPUT}, -+ {VKD3D_SM1_REG_TEXCRDOUT, VKD3DSPR_TEXCRDOUT}, -+ {VKD3D_SM1_REG_CONSTINT, VKD3DSPR_CONSTINT}, -+ {VKD3D_SM1_REG_COLOROUT, VKD3DSPR_COLOROUT}, -+ {VKD3D_SM1_REG_DEPTHOUT, VKD3DSPR_DEPTHOUT}, -+ {VKD3D_SM1_REG_SAMPLER, VKD3DSPR_COMBINED_SAMPLER}, -+ {VKD3D_SM1_REG_CONSTBOOL, VKD3DSPR_CONSTBOOL}, -+ {VKD3D_SM1_REG_LOOP, VKD3DSPR_LOOP}, -+ {VKD3D_SM1_REG_TEMPFLOAT16, VKD3DSPR_TEMPFLOAT16}, -+ {VKD3D_SM1_REG_MISCTYPE, VKD3DSPR_MISCTYPE}, -+ {VKD3D_SM1_REG_LABEL, VKD3DSPR_LABEL}, -+ {VKD3D_SM1_REG_PREDICATE, VKD3DSPR_PREDICATE}, -+}; -+ - static const enum vkd3d_shader_resource_type resource_type_table[] = - { - /* VKD3D_SM1_RESOURCE_UNKNOWN */ VKD3D_SHADER_RESOURCE_NONE, -@@ -453,6 +507,7 @@ static unsigned int idx_count_from_reg_type(enum vkd3d_shader_register_type reg_ - switch (reg_type) - { - case VKD3DSPR_DEPTHOUT: -+ case VKD3DSPR_ADDR: - return 0; - - default: -@@ -460,52 +515,82 @@ static unsigned int idx_count_from_reg_type(enum vkd3d_shader_register_type reg_ - } - } - --static void shader_sm1_parse_src_param(uint32_t param, struct vkd3d_shader_src_param *rel_addr, -- struct vkd3d_shader_src_param *src) -+static enum vkd3d_shader_register_type parse_register_type( -+ struct vkd3d_shader_sm1_parser *sm1, uint32_t param, unsigned int *index_offset) - { -- enum vkd3d_shader_register_type reg_type = ((param & VKD3D_SM1_REGISTER_TYPE_MASK) >> VKD3D_SM1_REGISTER_TYPE_SHIFT) -+ enum vkd3d_sm1_register_type d3dbc_type = ((param & VKD3D_SM1_REGISTER_TYPE_MASK) >> VKD3D_SM1_REGISTER_TYPE_SHIFT) - | ((param & VKD3D_SM1_REGISTER_TYPE_MASK2) >> VKD3D_SM1_REGISTER_TYPE_SHIFT2); -- unsigned int idx_count = idx_count_from_reg_type(reg_type); - -- vsir_register_init(&src->reg, reg_type, VKD3D_DATA_FLOAT, idx_count); -- src->reg.precision = VKD3D_SHADER_REGISTER_PRECISION_DEFAULT; -- src->reg.non_uniform = false; -- if (idx_count == 1) -+ *index_offset = 0; -+ -+ if (d3dbc_type == VKD3D_SM1_REG_CONST2) - { -- src->reg.idx[0].offset = param & VKD3D_SM1_REGISTER_NUMBER_MASK; -- src->reg.idx[0].rel_addr = rel_addr; -+ *index_offset = 2048; -+ return VKD3DSPR_CONST; - } -- if (src->reg.type == VKD3DSPR_SAMPLER) -- src->reg.dimension = VSIR_DIMENSION_NONE; -- else if (src->reg.type == VKD3DSPR_DEPTHOUT) -- src->reg.dimension = VSIR_DIMENSION_SCALAR; -- else -- src->reg.dimension = VSIR_DIMENSION_VEC4; -- src->swizzle = swizzle_from_sm1((param & VKD3D_SM1_SWIZZLE_MASK) >> VKD3D_SM1_SWIZZLE_SHIFT); -- src->modifiers = (param & VKD3D_SM1_SRC_MODIFIER_MASK) >> VKD3D_SM1_SRC_MODIFIER_SHIFT; -+ -+ if (d3dbc_type == VKD3D_SM1_REG_CONST3) -+ { -+ *index_offset = 4096; -+ return VKD3DSPR_CONST; -+ } -+ -+ if (d3dbc_type == VKD3D_SM1_REG_CONST4) -+ { -+ *index_offset = 6144; -+ return VKD3DSPR_CONST; -+ } -+ -+ if (d3dbc_type == VKD3D_SM1_REG_ADDR) -+ return sm1->p.program->shader_version.type == VKD3D_SHADER_TYPE_PIXEL ? VKD3DSPR_TEXTURE : VKD3DSPR_ADDR; -+ if (d3dbc_type == VKD3D_SM1_REG_TEXCRDOUT) -+ return vkd3d_shader_ver_ge(&sm1->p.program->shader_version, 3, 0) ? VKD3DSPR_OUTPUT : VKD3DSPR_TEXCRDOUT; -+ -+ for (unsigned int i = 0; i < ARRAY_SIZE(register_types); ++i) -+ { -+ if (register_types[i].d3dbc_type == d3dbc_type) -+ return register_types[i].vsir_type; -+ } -+ -+ return VKD3DSPR_INVALID; - } - --static void shader_sm1_parse_dst_param(uint32_t param, struct vkd3d_shader_src_param *rel_addr, -- struct vkd3d_shader_dst_param *dst) -+static void d3dbc_parse_register(struct vkd3d_shader_sm1_parser *d3dbc, -+ struct vkd3d_shader_register *reg, uint32_t param, struct vkd3d_shader_src_param *rel_addr) - { -- enum vkd3d_shader_register_type reg_type = ((param & VKD3D_SM1_REGISTER_TYPE_MASK) >> VKD3D_SM1_REGISTER_TYPE_SHIFT) -- | ((param & VKD3D_SM1_REGISTER_TYPE_MASK2) >> VKD3D_SM1_REGISTER_TYPE_SHIFT2); -- unsigned int idx_count = idx_count_from_reg_type(reg_type); -- -- vsir_register_init(&dst->reg, reg_type, VKD3D_DATA_FLOAT, idx_count); -- dst->reg.precision = VKD3D_SHADER_REGISTER_PRECISION_DEFAULT; -- dst->reg.non_uniform = false; -+ enum vkd3d_shader_register_type reg_type; -+ unsigned int index_offset, idx_count; -+ -+ reg_type = parse_register_type(d3dbc, param, &index_offset); -+ idx_count = idx_count_from_reg_type(reg_type); -+ vsir_register_init(reg, reg_type, VKD3D_DATA_FLOAT, idx_count); -+ reg->precision = VKD3D_SHADER_REGISTER_PRECISION_DEFAULT; -+ reg->non_uniform = false; - if (idx_count == 1) - { -- dst->reg.idx[0].offset = param & VKD3D_SM1_REGISTER_NUMBER_MASK; -- dst->reg.idx[0].rel_addr = rel_addr; -+ reg->idx[0].offset = index_offset + (param & VKD3D_SM1_REGISTER_NUMBER_MASK); -+ reg->idx[0].rel_addr = rel_addr; - } -- if (dst->reg.type == VKD3DSPR_SAMPLER) -- dst->reg.dimension = VSIR_DIMENSION_NONE; -- else if (dst->reg.type == VKD3DSPR_DEPTHOUT) -- dst->reg.dimension = VSIR_DIMENSION_SCALAR; -+ if (reg->type == VKD3DSPR_SAMPLER) -+ reg->dimension = VSIR_DIMENSION_NONE; -+ else if (reg->type == VKD3DSPR_DEPTHOUT) -+ reg->dimension = VSIR_DIMENSION_SCALAR; - else -- dst->reg.dimension = VSIR_DIMENSION_VEC4; -+ reg->dimension = VSIR_DIMENSION_VEC4; -+} -+ -+static void shader_sm1_parse_src_param(struct vkd3d_shader_sm1_parser *sm1, uint32_t param, -+ struct vkd3d_shader_src_param *rel_addr, struct vkd3d_shader_src_param *src) -+{ -+ d3dbc_parse_register(sm1, &src->reg, param, rel_addr); -+ src->swizzle = swizzle_from_sm1((param & VKD3D_SM1_SWIZZLE_MASK) >> VKD3D_SM1_SWIZZLE_SHIFT); -+ src->modifiers = (param & VKD3D_SM1_SRC_MODIFIER_MASK) >> VKD3D_SM1_SRC_MODIFIER_SHIFT; -+} -+ -+static void shader_sm1_parse_dst_param(struct vkd3d_shader_sm1_parser *sm1, uint32_t param, -+ struct vkd3d_shader_src_param *rel_addr, struct vkd3d_shader_dst_param *dst) -+{ -+ d3dbc_parse_register(sm1, &dst->reg, param, rel_addr); - dst->modifiers = (param & VKD3D_SM1_DST_MODIFIER_MASK) >> VKD3D_SM1_DST_MODIFIER_SHIFT; - dst->shift = (param & VKD3D_SM1_DSTSHIFT_MASK) >> VKD3D_SM1_DSTSHIFT_SHIFT; - -@@ -686,26 +771,18 @@ static bool add_signature_element_from_register(struct vkd3d_shader_sm1_parser * - VKD3D_SHADER_SV_NONE, SM1_COLOR_REGISTER_OFFSET + register_index, is_dcl, mask); - - case VKD3DSPR_TEXTURE: -- /* For vertex shaders, this is ADDR. */ -- if (version->type == VKD3D_SHADER_TYPE_VERTEX) -- return true; - return add_signature_element(sm1, false, "TEXCOORD", register_index, - VKD3D_SHADER_SV_NONE, register_index, is_dcl, mask); - -+ case VKD3DSPR_TEXCRDOUT: -+ return add_signature_element(sm1, true, "TEXCOORD", register_index, -+ VKD3D_SHADER_SV_NONE, register_index, is_dcl, mask); -+ - case VKD3DSPR_OUTPUT: - if (version->type == VKD3D_SHADER_TYPE_VERTEX) - { -- /* For sm < 2 vertex shaders, this is TEXCRDOUT. -- * -- * For sm3 vertex shaders, this is OUTPUT, but we already -- * should have had a DCL instruction. */ -- if (version->major == 3) -- { -- add_signature_mask(sm1, true, register_index, mask); -- return true; -- } -- return add_signature_element(sm1, true, "TEXCOORD", register_index, -- VKD3D_SHADER_SV_NONE, register_index, is_dcl, mask); -+ add_signature_mask(sm1, true, register_index, mask); -+ return true; - } - /* fall through */ - -@@ -842,18 +919,6 @@ static void shader_sm1_scan_register(struct vkd3d_shader_sm1_parser *sm1, - record_constant_register(sm1, VKD3D_SHADER_D3DBC_FLOAT_CONSTANT_REGISTER, register_index, from_def); - break; - -- case VKD3DSPR_CONST2: -- record_constant_register(sm1, VKD3D_SHADER_D3DBC_FLOAT_CONSTANT_REGISTER, 2048 + register_index, from_def); -- break; -- -- case VKD3DSPR_CONST3: -- record_constant_register(sm1, VKD3D_SHADER_D3DBC_FLOAT_CONSTANT_REGISTER, 4096 + register_index, from_def); -- break; -- -- case VKD3DSPR_CONST4: -- record_constant_register(sm1, VKD3D_SHADER_D3DBC_FLOAT_CONSTANT_REGISTER, 6144 + register_index, from_def); -- break; -- - case VKD3DSPR_CONSTINT: - record_constant_register(sm1, VKD3D_SHADER_D3DBC_INT_CONSTANT_REGISTER, register_index, from_def); - break; -@@ -961,9 +1026,9 @@ static void shader_sm1_read_src_param(struct vkd3d_shader_sm1_parser *sm1, const - sm1->abort = true; - return; - } -- shader_sm1_parse_src_param(addr_token, NULL, src_rel_addr); -+ shader_sm1_parse_src_param(sm1, addr_token, NULL, src_rel_addr); - } -- shader_sm1_parse_src_param(token, src_rel_addr, src_param); -+ shader_sm1_parse_src_param(sm1, token, src_rel_addr, src_param); - } - - static void shader_sm1_read_dst_param(struct vkd3d_shader_sm1_parser *sm1, const uint32_t **ptr, -@@ -982,9 +1047,9 @@ static void shader_sm1_read_dst_param(struct vkd3d_shader_sm1_parser *sm1, const - sm1->abort = true; - return; - } -- shader_sm1_parse_src_param(addr_token, NULL, dst_rel_addr); -+ shader_sm1_parse_src_param(sm1, addr_token, NULL, dst_rel_addr); - } -- shader_sm1_parse_dst_param(token, dst_rel_addr, dst_param); -+ shader_sm1_parse_dst_param(sm1, token, dst_rel_addr, dst_param); - - if (dst_param->reg.type == VKD3DSPR_RASTOUT && dst_param->reg.idx[0].offset == VSIR_RASTOUT_POINT_SIZE) - sm1->p.program->has_point_size = true; -@@ -1027,7 +1092,7 @@ static void shader_sm1_read_semantic(struct vkd3d_shader_sm1_parser *sm1, - semantic->resource_data_type[1] = VKD3D_DATA_FLOAT; - semantic->resource_data_type[2] = VKD3D_DATA_FLOAT; - semantic->resource_data_type[3] = VKD3D_DATA_FLOAT; -- shader_sm1_parse_dst_param(dst_token, NULL, &semantic->resource.reg); -+ shader_sm1_parse_dst_param(sm1, dst_token, NULL, &semantic->resource.reg); - range = &semantic->resource.range; - range->space = 0; - range->first = range->last = semantic->resource.reg.reg.idx[0].offset; -@@ -1621,10 +1686,33 @@ static void d3dbc_write_comment(struct d3dbc_compiler *d3dbc, - set_u32(buffer, offset, vkd3d_make_u32(VKD3D_SM1_OP_COMMENT, (end - start) / sizeof(uint32_t))); - } - --static uint32_t sm1_encode_register_type(enum vkd3d_shader_register_type type) -+static enum vkd3d_sm1_register_type d3dbc_register_type_from_vsir(const struct vkd3d_shader_register *reg) -+{ -+ if (reg->type == VKD3DSPR_CONST) -+ { -+ if (reg->idx[0].offset >= 6144) -+ return VKD3D_SM1_REG_CONST4; -+ if (reg->idx[0].offset >= 4096) -+ return VKD3D_SM1_REG_CONST3; -+ if (reg->idx[0].offset >= 2048) -+ return VKD3D_SM1_REG_CONST2; -+ } -+ -+ for (unsigned int i = 0; i < ARRAY_SIZE(register_types); ++i) -+ { -+ if (register_types[i].vsir_type == reg->type) -+ return register_types[i].d3dbc_type; -+ } -+ -+ vkd3d_unreachable(); -+} -+ -+static uint32_t sm1_encode_register_type(const struct vkd3d_shader_register *reg) - { -- return ((type << VKD3D_SM1_REGISTER_TYPE_SHIFT) & VKD3D_SM1_REGISTER_TYPE_MASK) -- | ((type << VKD3D_SM1_REGISTER_TYPE_SHIFT2) & VKD3D_SM1_REGISTER_TYPE_MASK2); -+ enum vkd3d_sm1_register_type sm1_type = d3dbc_register_type_from_vsir(reg); -+ -+ return ((sm1_type << VKD3D_SM1_REGISTER_TYPE_SHIFT) & VKD3D_SM1_REGISTER_TYPE_MASK) -+ | ((sm1_type << VKD3D_SM1_REGISTER_TYPE_SHIFT2) & VKD3D_SM1_REGISTER_TYPE_MASK2); - } - - static uint32_t swizzle_from_vsir(uint32_t swizzle) -@@ -1673,17 +1761,19 @@ static void write_sm1_dst_register(struct vkd3d_bytecode_buffer *buffer, const s - { - VKD3D_ASSERT(reg->write_mask); - put_u32(buffer, VKD3D_SM1_INSTRUCTION_PARAMETER -- | sm1_encode_register_type(reg->reg.type) -+ | sm1_encode_register_type(®->reg) - | (reg->modifiers << VKD3D_SM1_DST_MODIFIER_SHIFT) -- | (reg->write_mask << VKD3D_SM1_WRITEMASK_SHIFT) | reg->reg.idx[0].offset); -+ | (reg->write_mask << VKD3D_SM1_WRITEMASK_SHIFT) -+ | (reg->reg.idx[0].offset & VKD3D_SM1_REGISTER_NUMBER_MASK)); - } - - static void write_sm1_src_register(struct vkd3d_bytecode_buffer *buffer, const struct vkd3d_shader_src_param *reg) - { - put_u32(buffer, VKD3D_SM1_INSTRUCTION_PARAMETER -- | sm1_encode_register_type(reg->reg.type) -+ | sm1_encode_register_type(®->reg) - | (reg->modifiers << VKD3D_SM1_SRC_MODIFIER_SHIFT) -- | (swizzle_from_vsir(reg->swizzle) << VKD3D_SM1_SWIZZLE_SHIFT) | reg->reg.idx[0].offset); -+ | (swizzle_from_vsir(reg->swizzle) << VKD3D_SM1_SWIZZLE_SHIFT) -+ | (reg->reg.idx[0].offset & VKD3D_SM1_REGISTER_NUMBER_MASK)); - } - - static void d3dbc_write_instruction(struct d3dbc_compiler *d3dbc, const struct vkd3d_shader_instruction *ins) -diff --git a/libs/vkd3d/libs/vkd3d-shader/dxil.c b/libs/vkd3d/libs/vkd3d-shader/dxil.c -index 4493602dfb7..399c2b67eae 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/dxil.c -+++ b/libs/vkd3d/libs/vkd3d-shader/dxil.c -@@ -430,6 +430,8 @@ enum dx_intrinsic_opcode - DX_DERIV_COARSEY = 84, - DX_DERIV_FINEX = 85, - DX_DERIV_FINEY = 86, -+ DX_EVAL_SAMPLE_INDEX = 88, -+ DX_EVAL_CENTROID = 89, - DX_SAMPLE_INDEX = 90, - DX_COVERAGE = 91, - DX_THREAD_ID = 93, -@@ -5098,6 +5100,53 @@ static void sm6_parser_emit_dx_dot(struct sm6_parser *sm6, enum dx_intrinsic_opc - instruction_dst_param_init_ssa_scalar(ins, sm6); - } - -+static void sm6_parser_emit_dx_eval_attrib(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, -+ const struct sm6_value **operands, struct function_emission_state *state) -+{ -+ struct vkd3d_shader_instruction *ins = state->ins; -+ struct vkd3d_shader_src_param *src_params; -+ const struct shader_signature *signature; -+ unsigned int row_index, column_index; -+ const struct signature_element *e; -+ -+ row_index = sm6_value_get_constant_uint(operands[0]); -+ column_index = sm6_value_get_constant_uint(operands[2]); -+ -+ signature = &sm6->p.program->input_signature; -+ if (row_index >= signature->element_count) -+ { -+ WARN("Invalid row index %u.\n", row_index); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, -+ "Invalid input row index %u for an attribute evaluation.", row_index); -+ return; -+ } -+ -+ e = &signature->elements[row_index]; -+ if (column_index >= VKD3D_VEC4_SIZE || !(e->mask & (1 << column_index))) -+ { -+ WARN("Invalid column index %u.\n", column_index); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, -+ "Invalid input column index %u for an attribute evaluation.", column_index); -+ return; -+ } -+ -+ vsir_instruction_init(ins, &sm6->p.location, (op == DX_EVAL_CENTROID) -+ ? VKD3DSIH_EVAL_CENTROID : VKD3DSIH_EVAL_SAMPLE_INDEX); -+ -+ if (!(src_params = instruction_src_params_alloc(ins, 1 + (op == DX_EVAL_SAMPLE_INDEX), sm6))) -+ return; -+ -+ src_params[0].reg = sm6->input_params[row_index].reg; -+ src_param_init_scalar(&src_params[0], column_index); -+ if (e->register_count > 1) -+ register_index_address_init(&src_params[0].reg.idx[0], operands[1], sm6); -+ -+ if (op == DX_EVAL_SAMPLE_INDEX) -+ src_param_init_from_value(&src_params[1], operands[3]); -+ -+ instruction_dst_param_init_ssa_scalar(ins, sm6); -+} -+ - static void sm6_parser_emit_dx_fabs(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, - const struct sm6_value **operands, struct function_emission_state *state) - { -@@ -6288,6 +6337,8 @@ static const struct sm6_dx_opcode_info sm6_dx_op_table[] = - [DX_DOT4 ] = {"g", "RRRRRRRR", sm6_parser_emit_dx_dot}, - [DX_EMIT_STREAM ] = {"v", "c", sm6_parser_emit_dx_stream}, - [DX_EMIT_THEN_CUT_STREAM ] = {"v", "c", sm6_parser_emit_dx_stream}, -+ [DX_EVAL_CENTROID ] = {"o", "cic", sm6_parser_emit_dx_eval_attrib}, -+ [DX_EVAL_SAMPLE_INDEX ] = {"o", "cici", sm6_parser_emit_dx_eval_attrib}, - [DX_EXP ] = {"g", "R", sm6_parser_emit_dx_unary}, - [DX_FABS ] = {"g", "R", sm6_parser_emit_dx_fabs}, - [DX_FIRST_BIT_HI ] = {"i", "m", sm6_parser_emit_dx_unary}, -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h -index 7c9547a1c01..2acc003c9a1 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.h -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h -@@ -861,6 +861,10 @@ enum hlsl_resource_load_type - HLSL_RESOURCE_GATHER_GREEN, - HLSL_RESOURCE_GATHER_BLUE, - HLSL_RESOURCE_GATHER_ALPHA, -+ HLSL_RESOURCE_GATHER_CMP_RED, -+ HLSL_RESOURCE_GATHER_CMP_GREEN, -+ HLSL_RESOURCE_GATHER_CMP_BLUE, -+ HLSL_RESOURCE_GATHER_CMP_ALPHA, - HLSL_RESOURCE_SAMPLE_INFO, - HLSL_RESOURCE_RESINFO, - }; -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.y b/libs/vkd3d/libs/vkd3d-shader/hlsl.y -index a3814a810b5..dbed11cd8b3 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.y -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.y -@@ -6124,6 +6124,87 @@ static bool add_gather_method_call(struct hlsl_ctx *ctx, struct hlsl_block *bloc - return true; - } - -+static bool add_gather_cmp_method_call(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *object, -+ const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) -+{ -+ const struct hlsl_type *object_type = object->data_type; -+ struct hlsl_resource_load_params load_params = {0}; -+ unsigned int sampler_dim, offset_dim; -+ const struct hlsl_type *sampler_type; -+ struct hlsl_ir_node *load; -+ -+ sampler_dim = hlsl_sampler_dim_count(object_type->sampler_dim); -+ offset_dim = hlsl_offset_dim_count(object_type->sampler_dim); -+ -+ if (!strcmp(name, "GatherCmpGreen")) -+ load_params.type = HLSL_RESOURCE_GATHER_CMP_GREEN; -+ else if (!strcmp(name, "GatherCmpBlue")) -+ load_params.type = HLSL_RESOURCE_GATHER_CMP_BLUE; -+ else if (!strcmp(name, "GatherCmpAlpha")) -+ load_params.type = HLSL_RESOURCE_GATHER_CMP_ALPHA; -+ else -+ load_params.type = HLSL_RESOURCE_GATHER_CMP_RED; -+ -+ if (!strcmp(name, "GatherCmp") || !offset_dim) -+ { -+ if (params->args_count < 3 || params->args_count > 4 + !!offset_dim) -+ { -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, -+ "Wrong number of arguments to method '%s': expected from 3 to %u, but got %u.", -+ name, 4 + !!offset_dim, params->args_count); -+ return false; -+ } -+ } -+ else if (params->args_count < 3 || params->args_count == 6 || params->args_count > 8) -+ { -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, -+ "Wrong number of arguments to method '%s': expected 3, 4, 5, 7, or 8, but got %u.", -+ name, params->args_count); -+ return false; -+ } -+ -+ if (params->args_count == 5 || params->args_count == 8) -+ { -+ hlsl_fixme(ctx, loc, "Tiled resource status argument."); -+ } -+ else if (offset_dim && params->args_count > 3) -+ { -+ if (!(load_params.texel_offset = add_implicit_conversion(ctx, block, params->args[3], -+ hlsl_get_vector_type(ctx, HLSL_TYPE_INT, offset_dim), loc))) -+ return false; -+ } -+ -+ sampler_type = params->args[0]->data_type; -+ if (sampler_type->class != HLSL_CLASS_SAMPLER || sampler_type->sampler_dim != HLSL_SAMPLER_DIM_COMPARISON) -+ { -+ struct vkd3d_string_buffer *string; -+ -+ if ((string = hlsl_type_to_string(ctx, sampler_type))) -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -+ "Wrong type for argument 0 of %s(): expected 'SamplerComparisonState', but got '%s'.", -+ name, string->buffer); -+ hlsl_release_string_buffer(ctx, string); -+ return false; -+ } -+ -+ if (!(load_params.coords = add_implicit_conversion(ctx, block, params->args[1], -+ hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) -+ return false; -+ -+ if (!(load_params.cmp = add_implicit_conversion(ctx, block, params->args[2], -+ hlsl_get_scalar_type(ctx, HLSL_TYPE_FLOAT), loc))) -+ return false; -+ -+ load_params.format = hlsl_get_vector_type(ctx, object_type->e.resource.format->e.numeric.type, 4); -+ load_params.resource = object; -+ load_params.sampler = params->args[0]; -+ -+ if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) -+ return false; -+ hlsl_block_add_instr(block, load); -+ return true; -+} -+ - static bool add_assignment_from_component(struct hlsl_ctx *ctx, struct hlsl_block *instrs, struct hlsl_ir_node *dest, - struct hlsl_ir_node *src, unsigned int component, const struct vkd3d_shader_location *loc) - { -@@ -6491,6 +6572,11 @@ texture_methods[] = - { "Gather", add_gather_method_call, "00010101001000" }, - { "GatherAlpha", add_gather_method_call, "00010101001000" }, - { "GatherBlue", add_gather_method_call, "00010101001000" }, -+ { "GatherCmp", add_gather_cmp_method_call, "00010101001000" }, -+ { "GatherCmpAlpha", add_gather_cmp_method_call, "00010101001000" }, -+ { "GatherCmpBlue", add_gather_cmp_method_call, "00010101001000" }, -+ { "GatherCmpGreen", add_gather_cmp_method_call, "00010101001000" }, -+ { "GatherCmpRed", add_gather_cmp_method_call, "00010101001000" }, - { "GatherGreen", add_gather_method_call, "00010101001000" }, - { "GatherRed", add_gather_method_call, "00010101001000" }, - -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -index a43ea53089e..08f139f5e8f 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -@@ -2949,6 +2949,10 @@ static bool lower_combined_samples(struct hlsl_ctx *ctx, struct hlsl_ir_node *in - case HLSL_RESOURCE_GATHER_GREEN: - case HLSL_RESOURCE_GATHER_BLUE: - case HLSL_RESOURCE_GATHER_ALPHA: -+ case HLSL_RESOURCE_GATHER_CMP_RED: -+ case HLSL_RESOURCE_GATHER_CMP_GREEN: -+ case HLSL_RESOURCE_GATHER_CMP_BLUE: -+ case HLSL_RESOURCE_GATHER_CMP_ALPHA: - case HLSL_RESOURCE_RESINFO: - case HLSL_RESOURCE_SAMPLE_CMP: - case HLSL_RESOURCE_SAMPLE_CMP_LZ: -@@ -3537,6 +3541,51 @@ static bool lower_ternary(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, stru - return true; - } - -+static bool lower_resource_load_bias(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) -+{ -+ struct hlsl_ir_node *swizzle, *store; -+ struct hlsl_ir_resource_load *load; -+ struct hlsl_ir_load *tmp_load; -+ struct hlsl_ir_var *tmp_var; -+ struct hlsl_deref deref; -+ -+ if (instr->type != HLSL_IR_RESOURCE_LOAD) -+ return false; -+ load = hlsl_ir_resource_load(instr); -+ if (load->load_type != HLSL_RESOURCE_SAMPLE_LOD -+ && load->load_type != HLSL_RESOURCE_SAMPLE_LOD_BIAS) -+ return false; -+ -+ if (!load->lod.node) -+ return false; -+ -+ if (!(tmp_var = hlsl_new_synthetic_var(ctx, "coords-with-lod", -+ hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, 4), &instr->loc))) -+ return false; -+ -+ if (!(swizzle = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, X, X, X), 4, load->lod.node, &load->lod.node->loc))) -+ return false; -+ list_add_before(&instr->entry, &swizzle->entry); -+ -+ if (!(store = hlsl_new_simple_store(ctx, tmp_var, swizzle))) -+ return false; -+ list_add_before(&instr->entry, &store->entry); -+ -+ hlsl_init_simple_deref_from_var(&deref, tmp_var); -+ if (!(store = hlsl_new_store_index(ctx, &deref, NULL, load->coords.node, 0, &instr->loc))) -+ return false; -+ list_add_before(&instr->entry, &store->entry); -+ -+ if (!(tmp_load = hlsl_new_var_load(ctx, tmp_var, &instr->loc))) -+ return false; -+ list_add_before(&instr->entry, &tmp_load->node.entry); -+ -+ hlsl_src_remove(&load->coords); -+ hlsl_src_from_node(&load->coords, &tmp_load->node); -+ hlsl_src_remove(&load->lod); -+ return true; -+} -+ - static bool lower_comparison_operators(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, - struct hlsl_block *block) - { -@@ -9596,18 +9645,18 @@ static bool sm4_generate_vsir_instr_sample(struct hlsl_ctx *ctx, - } - - static bool sm4_generate_vsir_instr_gather(struct hlsl_ctx *ctx, struct vsir_program *program, -- const struct hlsl_ir_resource_load *load, uint32_t swizzle) -+ const struct hlsl_ir_resource_load *load, uint32_t swizzle, bool compare) - { - const struct vkd3d_shader_version *version = &program->shader_version; - const struct hlsl_ir_node *texel_offset = load->texel_offset.node; - const struct hlsl_ir_node *coords = load->coords.node; - const struct hlsl_deref *resource = &load->resource; -+ enum vkd3d_shader_opcode opcode = VKD3DSIH_GATHER4; - const struct hlsl_deref *sampler = &load->sampler; - const struct hlsl_ir_node *instr = &load->node; -+ unsigned int src_count = 3, current_arg = 0; - struct vkd3d_shader_instruction *ins; -- enum vkd3d_shader_opcode opcode; - -- opcode = VKD3DSIH_GATHER4; - if (texel_offset && !sm4_generate_vsir_validate_texel_offset_aoffimmi(texel_offset)) - { - if (!vkd3d_shader_ver_ge(version, 5, 0)) -@@ -9617,50 +9666,40 @@ static bool sm4_generate_vsir_instr_gather(struct hlsl_ctx *ctx, struct vsir_pro - return false; - } - opcode = VKD3DSIH_GATHER4_PO; -+ ++src_count; - } - -- if (opcode == VKD3DSIH_GATHER4) -+ if (compare) - { -- if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, opcode, 1, 3))) -- return false; -+ opcode = opcode == VKD3DSIH_GATHER4 ? VKD3DSIH_GATHER4_C : VKD3DSIH_GATHER4_PO_C; -+ ++src_count; -+ } - -- vsir_dst_from_hlsl_node(&ins->dst[0], ctx, instr); -- vsir_src_from_hlsl_node(&ins->src[0], ctx, coords, VKD3DSP_WRITEMASK_ALL); -- sm4_generate_vsir_encode_texel_offset_as_aoffimmi(ins, texel_offset); -+ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, opcode, 1, src_count))) -+ return false; - -- if (!sm4_generate_vsir_init_src_param_from_deref(ctx, program, -- &ins->src[1], resource, ins->dst[0].write_mask, &instr->loc)) -- return false; -+ vsir_dst_from_hlsl_node(&ins->dst[0], ctx, instr); -+ vsir_src_from_hlsl_node(&ins->src[current_arg++], ctx, coords, VKD3DSP_WRITEMASK_ALL); - -- if (!sm4_generate_vsir_init_src_param_from_deref(ctx, program, -- &ins->src[2], sampler, VKD3DSP_WRITEMASK_ALL, &instr->loc)) -- return false; -- ins->src[2].reg.dimension = VSIR_DIMENSION_VEC4; -- ins->src[2].swizzle = swizzle; -- } -- else if (opcode == VKD3DSIH_GATHER4_PO) -- { -- if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, opcode, 1, 4))) -- return false; -+ if (opcode == VKD3DSIH_GATHER4_PO || opcode == VKD3DSIH_GATHER4_PO_C) -+ vsir_src_from_hlsl_node(&ins->src[current_arg++], ctx, texel_offset, VKD3DSP_WRITEMASK_ALL); -+ else -+ sm4_generate_vsir_encode_texel_offset_as_aoffimmi(ins, texel_offset); - -- vsir_dst_from_hlsl_node(&ins->dst[0], ctx, instr); -- vsir_src_from_hlsl_node(&ins->src[0], ctx, coords, VKD3DSP_WRITEMASK_ALL); -- vsir_src_from_hlsl_node(&ins->src[1], ctx, texel_offset, VKD3DSP_WRITEMASK_ALL); -+ if (!sm4_generate_vsir_init_src_param_from_deref(ctx, program, -+ &ins->src[current_arg++], resource, ins->dst[0].write_mask, &instr->loc)) -+ return false; - -- if (!sm4_generate_vsir_init_src_param_from_deref(ctx, program, -- &ins->src[2], resource, ins->dst[0].write_mask, &instr->loc)) -- return false; -+ if (!sm4_generate_vsir_init_src_param_from_deref(ctx, program, -+ &ins->src[current_arg], sampler, VKD3DSP_WRITEMASK_ALL, &instr->loc)) -+ return false; -+ ins->src[current_arg].reg.dimension = VSIR_DIMENSION_VEC4; -+ ins->src[current_arg].swizzle = swizzle; -+ current_arg++; -+ -+ if (compare) -+ vsir_src_from_hlsl_node(&ins->src[current_arg++], ctx, load->cmp.node, VKD3DSP_WRITEMASK_0); - -- if (!sm4_generate_vsir_init_src_param_from_deref(ctx, program, -- &ins->src[3], sampler, VKD3DSP_WRITEMASK_ALL, &instr->loc)) -- return false; -- ins->src[3].reg.dimension = VSIR_DIMENSION_VEC4; -- ins->src[3].swizzle = swizzle; -- } -- else -- { -- vkd3d_unreachable(); -- } - return true; - } - -@@ -9723,6 +9762,32 @@ static bool sm4_generate_vsir_instr_resinfo(struct hlsl_ctx *ctx, - return true; - } - -+static uint32_t get_gather_swizzle(enum hlsl_resource_load_type type) -+{ -+ switch (type) -+ { -+ case HLSL_RESOURCE_GATHER_RED: -+ case HLSL_RESOURCE_GATHER_CMP_RED: -+ return VKD3D_SHADER_SWIZZLE(X, X, X, X); -+ -+ case HLSL_RESOURCE_GATHER_GREEN: -+ case HLSL_RESOURCE_GATHER_CMP_GREEN: -+ return VKD3D_SHADER_SWIZZLE(Y, Y, Y, Y); -+ -+ case HLSL_RESOURCE_GATHER_BLUE: -+ case HLSL_RESOURCE_GATHER_CMP_BLUE: -+ return VKD3D_SHADER_SWIZZLE(Z, Z, Z, Z); -+ -+ case HLSL_RESOURCE_GATHER_ALPHA: -+ case HLSL_RESOURCE_GATHER_CMP_ALPHA: -+ return VKD3D_SHADER_SWIZZLE(W, W, W, W); -+ default: -+ return 0; -+ } -+ -+ return 0; -+} -+ - static bool sm4_generate_vsir_instr_resource_load(struct hlsl_ctx *ctx, - struct vsir_program *program, const struct hlsl_ir_resource_load *load) - { -@@ -9754,16 +9819,16 @@ static bool sm4_generate_vsir_instr_resource_load(struct hlsl_ctx *ctx, - return sm4_generate_vsir_instr_sample(ctx, program, load); - - case HLSL_RESOURCE_GATHER_RED: -- return sm4_generate_vsir_instr_gather(ctx, program, load, VKD3D_SHADER_SWIZZLE(X, X, X, X)); -- - case HLSL_RESOURCE_GATHER_GREEN: -- return sm4_generate_vsir_instr_gather(ctx, program, load, VKD3D_SHADER_SWIZZLE(Y, Y, Y, Y)); -- - case HLSL_RESOURCE_GATHER_BLUE: -- return sm4_generate_vsir_instr_gather(ctx, program, load, VKD3D_SHADER_SWIZZLE(Z, Z, Z, Z)); -- - case HLSL_RESOURCE_GATHER_ALPHA: -- return sm4_generate_vsir_instr_gather(ctx, program, load, VKD3D_SHADER_SWIZZLE(W, W, W, W)); -+ return sm4_generate_vsir_instr_gather(ctx, program, load, get_gather_swizzle(load->load_type), false); -+ -+ case HLSL_RESOURCE_GATHER_CMP_RED: -+ case HLSL_RESOURCE_GATHER_CMP_GREEN: -+ case HLSL_RESOURCE_GATHER_CMP_BLUE: -+ case HLSL_RESOURCE_GATHER_CMP_ALPHA: -+ return sm4_generate_vsir_instr_gather(ctx, program, load, get_gather_swizzle(load->load_type), true); - - case HLSL_RESOURCE_SAMPLE_INFO: - return sm4_generate_vsir_instr_sample_info(ctx, program, load); -@@ -11039,13 +11104,14 @@ static void process_entry_function(struct hlsl_ctx *ctx, - append_output_var_copy(ctx, entry_func, entry_func->return_var); - } - -- if (profile->major_version >= 4) -+ if (hlsl_version_ge(ctx, 4, 0)) - { - hlsl_transform_ir(ctx, lower_discard_neg, body, NULL); - } - else - { - hlsl_transform_ir(ctx, lower_discard_nz, body, NULL); -+ hlsl_transform_ir(ctx, lower_resource_load_bias, body, NULL); - } - - loop_unrolling_execute(ctx, body); -diff --git a/libs/vkd3d/libs/vkd3d-shader/ir.c b/libs/vkd3d/libs/vkd3d-shader/ir.c -index cdc0c18466f..ec7e2d036c8 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/ir.c -+++ b/libs/vkd3d/libs/vkd3d-shader/ir.c -@@ -703,7 +703,56 @@ static enum vkd3d_result vsir_program_lower_sm1_sincos(struct vsir_program *prog - return VKD3D_OK; - } - --static enum vkd3d_result vsir_program_lower_tex(struct vsir_program *program, struct vkd3d_shader_instruction *tex) -+static enum vkd3d_result vsir_program_lower_texldp(struct vsir_program *program, -+ struct vkd3d_shader_instruction *tex, unsigned int *tmp_idx) -+{ -+ struct vkd3d_shader_instruction_array *instructions = &program->instructions; -+ struct vkd3d_shader_location *location = &tex->location; -+ struct vkd3d_shader_instruction *div_ins, *tex_ins; -+ size_t pos = tex - instructions->elements; -+ unsigned int w_comp; -+ -+ w_comp = vsir_swizzle_get_component(tex->src[0].swizzle, 3); -+ -+ if (!shader_instruction_array_insert_at(instructions, pos + 1, 2)) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ -+ if (*tmp_idx == ~0u) -+ *tmp_idx = program->temp_count++; -+ -+ div_ins = &instructions->elements[pos + 1]; -+ tex_ins = &instructions->elements[pos + 2]; -+ -+ if (!vsir_instruction_init_with_params(program, div_ins, location, VKD3DSIH_DIV, 1, 2)) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ -+ vsir_dst_param_init(&div_ins->dst[0], VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); -+ div_ins->dst[0].reg.dimension = VSIR_DIMENSION_VEC4; -+ div_ins->dst[0].reg.idx[0].offset = *tmp_idx; -+ div_ins->dst[0].write_mask = VKD3DSP_WRITEMASK_ALL; -+ -+ div_ins->src[0] = tex->src[0]; -+ -+ div_ins->src[1] = tex->src[0]; -+ div_ins->src[1].swizzle = vkd3d_shader_create_swizzle(w_comp, w_comp, w_comp, w_comp); -+ -+ if (!vsir_instruction_init_with_params(program, tex_ins, location, VKD3DSIH_TEX, 1, 2)) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ -+ tex_ins->dst[0] = tex->dst[0]; -+ -+ tex_ins->src[0].reg = div_ins->dst[0].reg; -+ tex_ins->src[0].swizzle = VKD3D_SHADER_NO_SWIZZLE; -+ -+ tex_ins->src[1] = tex->src[1]; -+ -+ vkd3d_shader_instruction_make_nop(tex); -+ -+ return VKD3D_OK; -+} -+ -+static enum vkd3d_result vsir_program_lower_tex(struct vsir_program *program, -+ struct vkd3d_shader_instruction *tex, struct vkd3d_shader_message_context *message_context) - { - unsigned int idx = tex->src[1].reg.idx[0].offset; - struct vkd3d_shader_src_param *srcs; -@@ -711,16 +760,34 @@ static enum vkd3d_result vsir_program_lower_tex(struct vsir_program *program, st - VKD3D_ASSERT(tex->src[1].reg.idx_count == 1); - VKD3D_ASSERT(!tex->src[1].reg.idx[0].rel_addr); - -- if (!(srcs = shader_src_param_allocator_get(&program->instructions.src_params, 3))) -+ if (!(srcs = shader_src_param_allocator_get(&program->instructions.src_params, 4))) - return VKD3D_ERROR_OUT_OF_MEMORY; - - srcs[0] = tex->src[0]; - vsir_src_param_init_resource(&srcs[1], idx, idx); - vsir_src_param_init_sampler(&srcs[2], idx, idx); - -- tex->opcode = VKD3DSIH_SAMPLE; -- tex->src = srcs; -- tex->src_count = 3; -+ if (!tex->flags) -+ { -+ tex->opcode = VKD3DSIH_SAMPLE; -+ tex->src = srcs; -+ tex->src_count = 3; -+ } -+ else if (tex->flags == VKD3DSI_TEXLD_BIAS) -+ { -+ tex->opcode = VKD3DSIH_SAMPLE_B; -+ tex->src = srcs; -+ tex->src_count = 4; -+ -+ srcs[3] = tex->src[0]; -+ srcs[3].swizzle = VKD3D_SHADER_SWIZZLE(W, W, W, W); -+ } -+ else -+ { -+ vkd3d_shader_error(message_context, &tex->location, -+ VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, "Unhandled tex flags %#x.", tex->flags); -+ return VKD3D_ERROR_NOT_IMPLEMENTED; -+ } - - return VKD3D_OK; - } -@@ -885,8 +952,16 @@ static enum vkd3d_result vsir_program_lower_instructions(struct vsir_program *pr - break; - - case VKD3DSIH_TEX: -- if ((ret = vsir_program_lower_tex(program, ins)) < 0) -- return ret; -+ if (ins->flags == VKD3DSI_TEXLD_PROJECT) -+ { -+ if ((ret = vsir_program_lower_texldp(program, ins, &tmp_idx)) < 0) -+ return ret; -+ } -+ else -+ { -+ if ((ret = vsir_program_lower_tex(program, ins, message_context)) < 0) -+ return ret; -+ } - break; - - case VKD3DSIH_TEXLDD: -@@ -1117,6 +1192,7 @@ static void remove_unread_output_components(const struct shader_signature *signa - switch (dst->reg.type) - { - case VKD3DSPR_OUTPUT: -+ case VKD3DSPR_TEXCRDOUT: - e = vsir_signature_find_element_for_reg(signature, dst->reg.idx[0].offset, 0); - break; - -@@ -2102,6 +2178,7 @@ static bool shader_dst_param_io_normalise(struct vkd3d_shader_dst_param *dst_par - signature = normaliser->patch_constant_signature; - break; - -+ case VKD3DSPR_TEXCRDOUT: - case VKD3DSPR_COLOROUT: - reg_idx = reg->idx[0].offset; - signature = normaliser->output_signature; -@@ -2205,8 +2282,6 @@ static void shader_src_param_io_normalise(struct vkd3d_shader_src_param *src_par - break; - - case VKD3DSPR_TEXTURE: -- if (normaliser->shader_type != VKD3D_SHADER_TYPE_PIXEL) -- return; - reg->type = VKD3DSPR_INPUT; - reg_idx = reg->idx[0].offset; - signature = normaliser->input_signature; -@@ -2338,16 +2413,12 @@ static bool get_flat_constant_register_type(const struct vkd3d_shader_register * - { - enum vkd3d_shader_register_type type; - enum vkd3d_shader_d3dbc_constant_register set; -- uint32_t offset; - } - regs[] = - { -- {VKD3DSPR_CONST, VKD3D_SHADER_D3DBC_FLOAT_CONSTANT_REGISTER, 0}, -- {VKD3DSPR_CONST2, VKD3D_SHADER_D3DBC_FLOAT_CONSTANT_REGISTER, 2048}, -- {VKD3DSPR_CONST3, VKD3D_SHADER_D3DBC_FLOAT_CONSTANT_REGISTER, 4096}, -- {VKD3DSPR_CONST4, VKD3D_SHADER_D3DBC_FLOAT_CONSTANT_REGISTER, 6144}, -- {VKD3DSPR_CONSTINT, VKD3D_SHADER_D3DBC_INT_CONSTANT_REGISTER, 0}, -- {VKD3DSPR_CONSTBOOL, VKD3D_SHADER_D3DBC_BOOL_CONSTANT_REGISTER, 0}, -+ {VKD3DSPR_CONST, VKD3D_SHADER_D3DBC_FLOAT_CONSTANT_REGISTER}, -+ {VKD3DSPR_CONSTINT, VKD3D_SHADER_D3DBC_INT_CONSTANT_REGISTER}, -+ {VKD3DSPR_CONSTBOOL, VKD3D_SHADER_D3DBC_BOOL_CONSTANT_REGISTER}, - }; - - unsigned int i; -@@ -2363,7 +2434,7 @@ static bool get_flat_constant_register_type(const struct vkd3d_shader_register * - } - - *set = regs[i].set; -- *index = regs[i].offset + reg->idx[0].offset; -+ *index = reg->idx[0].offset; - return true; - } - } -diff --git a/libs/vkd3d/libs/vkd3d-shader/spirv.c b/libs/vkd3d/libs/vkd3d-shader/spirv.c -index a7b935543a0..cfbadab8933 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/spirv.c -+++ b/libs/vkd3d/libs/vkd3d-shader/spirv.c -@@ -7301,7 +7301,6 @@ static SpvOp spirv_compiler_map_alu_instruction(const struct vkd3d_shader_instru - {VKD3DSIH_DDIV, SpvOpFDiv}, - {VKD3DSIH_DIV, SpvOpFDiv}, - {VKD3DSIH_DMUL, SpvOpFMul}, -- {VKD3DSIH_DTOF, SpvOpFConvert}, - {VKD3DSIH_DTOI, SpvOpConvertFToS}, - {VKD3DSIH_DTOU, SpvOpConvertFToU}, - {VKD3DSIH_FREM, SpvOpFRem}, -@@ -7939,6 +7938,7 @@ static void spirv_compiler_emit_ftoi(struct spirv_compiler *compiler, - uint32_t src_type_id, dst_type_id, condition_type_id; - enum vkd3d_shader_component_type component_type; - unsigned int component_count; -+ uint32_t write_mask; - - VKD3D_ASSERT(instruction->dst_count == 1); - VKD3D_ASSERT(instruction->src_count == 1); -@@ -7948,21 +7948,23 @@ static void spirv_compiler_emit_ftoi(struct spirv_compiler *compiler, - * and for NaN to yield zero. */ - - component_count = vsir_write_mask_component_count(dst->write_mask); -- src_type_id = spirv_compiler_get_type_id_for_reg(compiler, &src->reg, dst->write_mask); -- dst_type_id = spirv_compiler_get_type_id_for_dst(compiler, dst); -- src_id = spirv_compiler_emit_load_src(compiler, src, dst->write_mask); - - if (src->reg.data_type == VKD3D_DATA_DOUBLE) - { -+ write_mask = vkd3d_write_mask_from_component_count(component_count); - int_min_id = spirv_compiler_get_constant_double_vector(compiler, -2147483648.0, component_count); - float_max_id = spirv_compiler_get_constant_double_vector(compiler, 2147483648.0, component_count); - } - else - { -+ write_mask = dst->write_mask; - int_min_id = spirv_compiler_get_constant_float_vector(compiler, -2147483648.0f, component_count); - float_max_id = spirv_compiler_get_constant_float_vector(compiler, 2147483648.0f, component_count); - } - -+ src_type_id = spirv_compiler_get_type_id_for_reg(compiler, &src->reg, write_mask); -+ dst_type_id = spirv_compiler_get_type_id_for_dst(compiler, dst); -+ src_id = spirv_compiler_emit_load_src(compiler, src, write_mask); - val_id = vkd3d_spirv_build_op_glsl_std450_max(builder, src_type_id, src_id, int_min_id); - - /* VSIR allows the destination of a signed conversion to be unsigned. */ -@@ -7992,6 +7994,7 @@ static void spirv_compiler_emit_ftou(struct spirv_compiler *compiler, - const struct vkd3d_shader_src_param *src = instruction->src; - uint32_t src_type_id, dst_type_id, condition_type_id; - unsigned int component_count; -+ uint32_t write_mask; - - VKD3D_ASSERT(instruction->dst_count == 1); - VKD3D_ASSERT(instruction->src_count == 1); -@@ -8001,21 +8004,23 @@ static void spirv_compiler_emit_ftou(struct spirv_compiler *compiler, - * and for NaN to yield zero. */ - - component_count = vsir_write_mask_component_count(dst->write_mask); -- src_type_id = spirv_compiler_get_type_id_for_reg(compiler, &src->reg, dst->write_mask); -- dst_type_id = spirv_compiler_get_type_id_for_dst(compiler, dst); -- src_id = spirv_compiler_emit_load_src(compiler, src, dst->write_mask); - - if (src->reg.data_type == VKD3D_DATA_DOUBLE) - { -+ write_mask = vkd3d_write_mask_from_component_count(component_count); - zero_id = spirv_compiler_get_constant_double_vector(compiler, 0.0, component_count); - float_max_id = spirv_compiler_get_constant_double_vector(compiler, 4294967296.0, component_count); - } - else - { -+ write_mask = dst->write_mask; - zero_id = spirv_compiler_get_constant_float_vector(compiler, 0.0f, component_count); - float_max_id = spirv_compiler_get_constant_float_vector(compiler, 4294967296.0f, component_count); - } - -+ src_type_id = spirv_compiler_get_type_id_for_reg(compiler, &src->reg, write_mask); -+ dst_type_id = spirv_compiler_get_type_id_for_dst(compiler, dst); -+ src_id = spirv_compiler_emit_load_src(compiler, src, write_mask); - val_id = vkd3d_spirv_build_op_glsl_std450_max(builder, src_type_id, src_id, zero_id); - - uint_max_id = spirv_compiler_get_constant_uint_vector(compiler, UINT_MAX, component_count); -@@ -8029,6 +8034,29 @@ static void spirv_compiler_emit_ftou(struct spirv_compiler *compiler, - spirv_compiler_emit_store_dst(compiler, dst, val_id); - } - -+static void spirv_compiler_emit_dtof(struct spirv_compiler *compiler, -+ const struct vkd3d_shader_instruction *instruction) -+{ -+ struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; -+ const struct vkd3d_shader_dst_param *dst = instruction->dst; -+ const struct vkd3d_shader_src_param *src = instruction->src; -+ uint32_t type_id, val_id, src_id; -+ unsigned int component_count; -+ uint32_t write_mask; -+ -+ component_count = vsir_write_mask_component_count(dst->write_mask); -+ write_mask = vkd3d_write_mask_from_component_count(component_count); -+ -+ src_id = spirv_compiler_emit_load_src(compiler, src, write_mask); -+ -+ type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_FLOAT, component_count); -+ val_id = vkd3d_spirv_build_op_tr1(builder, &builder->function_stream, SpvOpFConvert, type_id, src_id); -+ if (instruction->flags & VKD3DSI_PRECISE_XYZW) -+ vkd3d_spirv_build_op_decorate(builder, val_id, SpvDecorationNoContraction, NULL, 0); -+ -+ spirv_compiler_emit_store_dst(compiler, dst, val_id); -+} -+ - static void spirv_compiler_emit_bitfield_instruction(struct spirv_compiler *compiler, - const struct vkd3d_shader_instruction *instruction) - { -@@ -10419,7 +10447,6 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, - case VKD3DSIH_DDIV: - case VKD3DSIH_DIV: - case VKD3DSIH_DMUL: -- case VKD3DSIH_DTOF: - case VKD3DSIH_FREM: - case VKD3DSIH_FTOD: - case VKD3DSIH_IADD: -@@ -10507,6 +10534,9 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, - case VKD3DSIH_FTOU: - spirv_compiler_emit_ftou(compiler, instruction); - break; -+ case VKD3DSIH_DTOF: -+ spirv_compiler_emit_dtof(compiler, instruction); -+ break; - case VKD3DSIH_DEQO: - case VKD3DSIH_DGEO: - case VKD3DSIH_DLT: -diff --git a/libs/vkd3d/libs/vkd3d-shader/tpf.c b/libs/vkd3d/libs/vkd3d-shader/tpf.c -index 872603052ac..7f115057622 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/tpf.c -+++ b/libs/vkd3d/libs/vkd3d-shader/tpf.c -@@ -4758,6 +4758,8 @@ static void tpf_handle_instruction(struct tpf_compiler *tpf, const struct vkd3d_ - case VKD3DSIH_FTOU: - case VKD3DSIH_GATHER4: - case VKD3DSIH_GATHER4_PO: -+ case VKD3DSIH_GATHER4_C: -+ case VKD3DSIH_GATHER4_PO_C: - case VKD3DSIH_GEO: - case VKD3DSIH_HS_CONTROL_POINT_PHASE: - case VKD3DSIH_HS_FORK_PHASE: -diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -index 3bfb0a7c3cd..7e8ec156aad 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -+++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -@@ -595,28 +595,25 @@ enum vkd3d_shader_opcode - - enum vkd3d_shader_register_type - { -- VKD3DSPR_TEMP = 0, -- VKD3DSPR_INPUT = 1, -- VKD3DSPR_CONST = 2, -- VKD3DSPR_ADDR = 3, -- VKD3DSPR_TEXTURE = 3, -- VKD3DSPR_RASTOUT = 4, -- VKD3DSPR_ATTROUT = 5, -- VKD3DSPR_TEXCRDOUT = 6, -- VKD3DSPR_OUTPUT = 6, -- VKD3DSPR_CONSTINT = 7, -- VKD3DSPR_COLOROUT = 8, -- VKD3DSPR_DEPTHOUT = 9, -- VKD3DSPR_COMBINED_SAMPLER = 10, -- VKD3DSPR_CONST2 = 11, -- VKD3DSPR_CONST3 = 12, -- VKD3DSPR_CONST4 = 13, -- VKD3DSPR_CONSTBOOL = 14, -- VKD3DSPR_LOOP = 15, -- VKD3DSPR_TEMPFLOAT16 = 16, -- VKD3DSPR_MISCTYPE = 17, -- VKD3DSPR_LABEL = 18, -- VKD3DSPR_PREDICATE = 19, -+ VKD3DSPR_TEMP, -+ VKD3DSPR_INPUT, -+ VKD3DSPR_CONST, -+ VKD3DSPR_ADDR, -+ VKD3DSPR_TEXTURE, -+ VKD3DSPR_RASTOUT, -+ VKD3DSPR_ATTROUT, -+ VKD3DSPR_TEXCRDOUT, -+ VKD3DSPR_OUTPUT, -+ VKD3DSPR_CONSTINT, -+ VKD3DSPR_COLOROUT, -+ VKD3DSPR_DEPTHOUT, -+ VKD3DSPR_COMBINED_SAMPLER, -+ VKD3DSPR_CONSTBOOL, -+ VKD3DSPR_LOOP, -+ VKD3DSPR_TEMPFLOAT16, -+ VKD3DSPR_MISCTYPE, -+ VKD3DSPR_LABEL, -+ VKD3DSPR_PREDICATE, - VKD3DSPR_IMMCONST, - VKD3DSPR_IMMCONST64, - VKD3DSPR_CONSTBUFFER, --- -2.45.2 - diff --git a/patches/vkd3d-latest/0003-Updated-vkd3d-to-5b2d62e59a6365e32aac3fa37fe16ab3582.patch b/patches/vkd3d-latest/0003-Updated-vkd3d-to-5b2d62e59a6365e32aac3fa37fe16ab3582.patch new file mode 100644 index 00000000..0cdbc9c1 --- /dev/null +++ b/patches/vkd3d-latest/0003-Updated-vkd3d-to-5b2d62e59a6365e32aac3fa37fe16ab3582.patch @@ -0,0 +1,1119 @@ +From 1b071c5cb4a8559f992c0a9cb46b784abe640cb5 Mon Sep 17 00:00:00 2001 +From: Alistair Leslie-Hughes +Date: Wed, 22 Jan 2025 07:08:19 +1100 +Subject: [PATCH] Updated vkd3d to 5b2d62e59a6365e32aac3fa37fe16ab3582deae4. + +--- + libs/vkd3d/include/vkd3d.h | 29 ++- + libs/vkd3d/libs/vkd3d-shader/d3d_asm.c | 4 +- + libs/vkd3d/libs/vkd3d-shader/hlsl.c | 90 +++++++++ + libs/vkd3d/libs/vkd3d-shader/hlsl.h | 36 ++++ + libs/vkd3d/libs/vkd3d-shader/hlsl.y | 194 ++++++++++++++++++++ + libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 131 +++++++++++++ + libs/vkd3d/libs/vkd3d-shader/spirv.c | 3 +- + libs/vkd3d/libs/vkd3d-shader/tpf.c | 19 ++ + libs/vkd3d/libs/vkd3d/command.c | 57 +++++- + libs/vkd3d/libs/vkd3d/device.c | 34 ++-- + libs/vkd3d/libs/vkd3d/resource.c | 9 +- + libs/vkd3d/libs/vkd3d/utils.c | 2 +- + libs/vkd3d/libs/vkd3d/vkd3d_private.h | 7 +- + 13 files changed, 581 insertions(+), 34 deletions(-) + +diff --git a/libs/vkd3d/include/vkd3d.h b/libs/vkd3d/include/vkd3d.h +index b18fd14f4c3..2376693421c 100644 +--- a/libs/vkd3d/include/vkd3d.h ++++ b/libs/vkd3d/include/vkd3d.h +@@ -411,9 +411,13 @@ VKD3D_API uint32_t vkd3d_get_vk_queue_family_index(ID3D12CommandQueue *queue); + * the Vulkan driver as being submitted before other work submitted + * though the Direct3D 12 API. If this is not desired, it is + * recommended to synchronize work submission using an ID3D12Fence +- * object, by submitting to the queue a signal operation after all the +- * Direct3D 12 work is submitted and waiting for it before calling +- * vkd3d_acquire_vk_queue(). ++ * object: ++ * 1. submit work through the Direct3D 12 API; ++ * 2. call vkd3d_queue_signal_on_cpu(); ++ * 3. wait for the fence to be signalled; ++ * 4. call vkd3d_acquire_vk_queue(); it is guaranteed that all work submitted ++ * at point 1 has already been submitted to Vulkan (though not necessarily ++ * executed). + * + * \since 1.0 + */ +@@ -466,6 +470,21 @@ VKD3D_API HRESULT vkd3d_create_versioned_root_signature_deserializer(const void + */ + VKD3D_API void vkd3d_set_log_callback(PFN_vkd3d_log callback); + ++/** ++ * Signal a fence on the CPU once all the currently outstanding queue work is ++ * submitted to Vulkan. ++ * ++ * The fence will be signalled on the CPU (as if ID3D12Fence_Signal() was ++ * called) once all the work submitted through the Direct3D 12 API before ++ * vkd3d_queue_signal_on_cpu() is called has left the internal queue and has ++ * been submitted to the underlying Vulkan queue. Read the documentation for ++ * vkd3d_acquire_vk_queue() for more details. ++ * ++ * \since 1.15 ++ */ ++VKD3D_API HRESULT vkd3d_queue_signal_on_cpu(ID3D12CommandQueue *queue, ++ ID3D12Fence *fence, uint64_t value); ++ + #endif /* VKD3D_NO_PROTOTYPES */ + + /* +@@ -512,6 +531,10 @@ typedef HRESULT (*PFN_vkd3d_create_versioned_root_signature_deserializer)(const + /** Type of vkd3d_set_log_callback(). \since 1.4 */ + typedef void (*PFN_vkd3d_set_log_callback)(PFN_vkd3d_log callback); + ++/** Type of vkd3d_queue_signal_on_cpu(). \since 1.15 */ ++typedef HRESULT (*PFN_vkd3d_queue_signal_on_cpu)(ID3D12CommandQueue *queue, ++ ID3D12Fence *fence, uint64_t value); ++ + #ifdef __cplusplus + } + #endif /* __cplusplus */ +diff --git a/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c +index 69e14e0c7bf..0639da83aa6 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c ++++ b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c +@@ -1180,8 +1180,8 @@ static void shader_print_register(struct vkd3d_d3d_asm_compiler *compiler, const + bool is_sm_5_1 = vkd3d_shader_ver_ge(&compiler->shader_version, 5, 1); + + if (reg->idx[0].rel_addr || reg->type == VKD3DSPR_IMMCONSTBUFFER +- || reg->type == VKD3DSPR_INCONTROLPOINT || (reg->type == VKD3DSPR_INPUT +- && (compiler->shader_version.type == VKD3D_SHADER_TYPE_GEOMETRY ++ || reg->type == VKD3DSPR_INCONTROLPOINT || reg->type == VKD3DSPR_OUTCONTROLPOINT ++ || (reg->type == VKD3DSPR_INPUT && (compiler->shader_version.type == VKD3D_SHADER_TYPE_GEOMETRY + || compiler->shader_version.type == VKD3D_SHADER_TYPE_HULL))) + { + vkd3d_string_buffer_printf(buffer, "%s", compiler->colours.reset); +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.c b/libs/vkd3d/libs/vkd3d-shader/hlsl.c +index 23f54d3edec..48d9d4e0023 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.c ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.c +@@ -2031,6 +2031,25 @@ struct hlsl_ir_node *hlsl_new_stateblock_constant(struct hlsl_ctx *ctx, const ch + return &constant->node; + } + ++struct hlsl_ir_node *hlsl_new_interlocked(struct hlsl_ctx *ctx, enum hlsl_interlocked_op op, struct hlsl_type *type, ++ const struct hlsl_deref *dst, struct hlsl_ir_node *coords, struct hlsl_ir_node *cmp_value, ++ struct hlsl_ir_node *value, const struct vkd3d_shader_location *loc) ++{ ++ struct hlsl_ir_interlocked *interlocked; ++ ++ if (!(interlocked = hlsl_alloc(ctx, sizeof(*interlocked)))) ++ return NULL; ++ ++ init_node(&interlocked->node, HLSL_IR_INTERLOCKED, type, loc); ++ interlocked->op = op; ++ hlsl_copy_deref(ctx, &interlocked->dst, dst); ++ hlsl_src_from_node(&interlocked->coords, coords); ++ hlsl_src_from_node(&interlocked->cmp_value, cmp_value); ++ hlsl_src_from_node(&interlocked->value, value); ++ ++ return &interlocked->node; ++} ++ + bool hlsl_index_is_noncontiguous(struct hlsl_ir_index *index) + { + struct hlsl_type *type = index->val.node->data_type; +@@ -2375,6 +2394,27 @@ static struct hlsl_ir_node *clone_index(struct hlsl_ctx *ctx, struct clone_instr + return dst; + } + ++static struct hlsl_ir_node *clone_interlocked(struct hlsl_ctx *ctx, ++ struct clone_instr_map *map, struct hlsl_ir_interlocked *src) ++{ ++ struct hlsl_ir_interlocked *dst; ++ ++ if (!(dst = hlsl_alloc(ctx, sizeof(*dst)))) ++ return NULL; ++ init_node(&dst->node, HLSL_IR_INTERLOCKED, NULL, &src->node.loc); ++ dst->op = src->op; ++ ++ if (!clone_deref(ctx, map, &dst->dst, &src->dst)) ++ { ++ vkd3d_free(dst); ++ return NULL; ++ } ++ clone_src(map, &dst->coords, &src->coords); ++ clone_src(map, &dst->cmp_value, &src->cmp_value); ++ clone_src(map, &dst->value, &src->value); ++ return &dst->node; ++} ++ + static struct hlsl_ir_node *clone_compile(struct hlsl_ctx *ctx, + struct clone_instr_map *map, struct hlsl_ir_compile *compile) + { +@@ -2575,6 +2615,9 @@ static struct hlsl_ir_node *clone_instr(struct hlsl_ctx *ctx, + case HLSL_IR_SWIZZLE: + return clone_swizzle(ctx, map, hlsl_ir_swizzle(instr)); + ++ case HLSL_IR_INTERLOCKED: ++ return clone_interlocked(ctx, map, hlsl_ir_interlocked(instr)); ++ + case HLSL_IR_COMPILE: + return clone_compile(ctx, map, hlsl_ir_compile(instr)); + +@@ -3013,6 +3056,7 @@ const char *hlsl_node_type_to_string(enum hlsl_ir_node_type type) + [HLSL_IR_STORE ] = "HLSL_IR_STORE", + [HLSL_IR_SWITCH ] = "HLSL_IR_SWITCH", + [HLSL_IR_SWIZZLE ] = "HLSL_IR_SWIZZLE", ++ [HLSL_IR_INTERLOCKED ] = "HLSL_IR_INTERLOCKED", + + [HLSL_IR_COMPILE] = "HLSL_IR_COMPILE", + [HLSL_IR_SAMPLER_STATE] = "HLSL_IR_SAMPLER_STATE", +@@ -3458,6 +3502,35 @@ static void dump_ir_index(struct vkd3d_string_buffer *buffer, const struct hlsl_ + vkd3d_string_buffer_printf(buffer, "]"); + } + ++static void dump_ir_interlocked(struct vkd3d_string_buffer *buffer, const struct hlsl_ir_interlocked *interlocked) ++{ ++ static const char *const op_names[] = ++ { ++ [HLSL_INTERLOCKED_ADD] = "add", ++ [HLSL_INTERLOCKED_AND] = "and", ++ [HLSL_INTERLOCKED_CMP_EXCH] = "cmp_exch", ++ [HLSL_INTERLOCKED_EXCH] = "exch", ++ [HLSL_INTERLOCKED_MAX] = "max", ++ [HLSL_INTERLOCKED_MIN] = "min", ++ [HLSL_INTERLOCKED_OR] = "or", ++ [HLSL_INTERLOCKED_XOR] = "xor", ++ }; ++ ++ VKD3D_ASSERT(interlocked->op < ARRAY_SIZE(op_names)); ++ vkd3d_string_buffer_printf(buffer, "interlocked_%s(dst = ", op_names[interlocked->op]); ++ dump_deref(buffer, &interlocked->dst); ++ vkd3d_string_buffer_printf(buffer, ", coords = "); ++ dump_src(buffer, &interlocked->coords); ++ if (interlocked->cmp_value.node) ++ { ++ vkd3d_string_buffer_printf(buffer, ", cmp_value = "); ++ dump_src(buffer, &interlocked->cmp_value); ++ } ++ vkd3d_string_buffer_printf(buffer, ", value = "); ++ dump_src(buffer, &interlocked->value); ++ vkd3d_string_buffer_printf(buffer, ")"); ++} ++ + static void dump_ir_compile(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, + const struct hlsl_ir_compile *compile) + { +@@ -3591,6 +3664,10 @@ static void dump_instr(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, + dump_ir_swizzle(buffer, hlsl_ir_swizzle(instr)); + break; + ++ case HLSL_IR_INTERLOCKED: ++ dump_ir_interlocked(buffer, hlsl_ir_interlocked(instr)); ++ break; ++ + case HLSL_IR_COMPILE: + dump_ir_compile(ctx, buffer, hlsl_ir_compile(instr)); + break; +@@ -3819,6 +3896,15 @@ static void free_ir_index(struct hlsl_ir_index *index) + vkd3d_free(index); + } + ++static void free_ir_interlocked(struct hlsl_ir_interlocked *interlocked) ++{ ++ hlsl_cleanup_deref(&interlocked->dst); ++ hlsl_src_remove(&interlocked->coords); ++ hlsl_src_remove(&interlocked->cmp_value); ++ hlsl_src_remove(&interlocked->value); ++ vkd3d_free(interlocked); ++} ++ + static void free_ir_compile(struct hlsl_ir_compile *compile) + { + unsigned int i; +@@ -3905,6 +3991,10 @@ void hlsl_free_instr(struct hlsl_ir_node *node) + free_ir_switch(hlsl_ir_switch(node)); + break; + ++ case HLSL_IR_INTERLOCKED: ++ free_ir_interlocked(hlsl_ir_interlocked(node)); ++ break; ++ + case HLSL_IR_COMPILE: + free_ir_compile(hlsl_ir_compile(node)); + break; +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h +index 4d78dbebb34..e9845f8f887 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.h ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h +@@ -319,6 +319,7 @@ enum hlsl_ir_node_type + HLSL_IR_STORE, + HLSL_IR_SWIZZLE, + HLSL_IR_SWITCH, ++ HLSL_IR_INTERLOCKED, + + HLSL_IR_COMPILE, + HLSL_IR_SAMPLER_STATE, +@@ -950,6 +951,32 @@ struct hlsl_ir_stateblock_constant + char *name; + }; + ++enum hlsl_interlocked_op ++{ ++ HLSL_INTERLOCKED_ADD, ++ HLSL_INTERLOCKED_AND, ++ HLSL_INTERLOCKED_CMP_EXCH, ++ HLSL_INTERLOCKED_EXCH, ++ HLSL_INTERLOCKED_MAX, ++ HLSL_INTERLOCKED_MIN, ++ HLSL_INTERLOCKED_OR, ++ HLSL_INTERLOCKED_XOR, ++}; ++ ++/* Represents an interlocked operation. ++ * ++ * The data_type of the node indicates whether or not the original value is returned. ++ * If the original value is not returned, the data_type is set to NULL. ++ * Otherwise, the data_type is set to the type of the original value. ++ */ ++struct hlsl_ir_interlocked ++{ ++ struct hlsl_ir_node node; ++ enum hlsl_interlocked_op op; ++ struct hlsl_deref dst; ++ struct hlsl_src coords, cmp_value, value; ++}; ++ + struct hlsl_scope + { + /* Item entry for hlsl_ctx.scopes. */ +@@ -1247,6 +1274,12 @@ static inline struct hlsl_ir_switch *hlsl_ir_switch(const struct hlsl_ir_node *n + return CONTAINING_RECORD(node, struct hlsl_ir_switch, node); + } + ++static inline struct hlsl_ir_interlocked *hlsl_ir_interlocked(const struct hlsl_ir_node *node) ++{ ++ VKD3D_ASSERT(node->type == HLSL_IR_INTERLOCKED); ++ return CONTAINING_RECORD(node, struct hlsl_ir_interlocked, node); ++} ++ + static inline struct hlsl_ir_compile *hlsl_ir_compile(const struct hlsl_ir_node *node) + { + VKD3D_ASSERT(node->type == HLSL_IR_COMPILE); +@@ -1554,6 +1587,9 @@ struct hlsl_ir_node *hlsl_new_compile(struct hlsl_ctx *ctx, enum hlsl_compile_ty + struct hlsl_block *args_instrs, const struct vkd3d_shader_location *loc); + struct hlsl_ir_node *hlsl_new_index(struct hlsl_ctx *ctx, struct hlsl_ir_node *val, + struct hlsl_ir_node *idx, const struct vkd3d_shader_location *loc); ++struct hlsl_ir_node *hlsl_new_interlocked(struct hlsl_ctx *ctx, enum hlsl_interlocked_op op, struct hlsl_type *type, ++ const struct hlsl_deref *dst, struct hlsl_ir_node *coords, struct hlsl_ir_node *cmp_value, ++ struct hlsl_ir_node *value, const struct vkd3d_shader_location *loc); + struct hlsl_ir_node *hlsl_new_loop(struct hlsl_ctx *ctx, struct hlsl_block *iter, + struct hlsl_block *block, enum hlsl_loop_unroll_type unroll_type, + unsigned int unroll_limit, const struct vkd3d_shader_location *loc); +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.y b/libs/vkd3d/libs/vkd3d-shader/hlsl.y +index e5a03067d16..da2f482b148 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.y ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.y +@@ -667,6 +667,7 @@ static struct hlsl_default_value evaluate_static_expression(struct hlsl_ctx *ctx + case HLSL_IR_RESOURCE_LOAD: + case HLSL_IR_RESOURCE_STORE: + case HLSL_IR_SWITCH: ++ case HLSL_IR_INTERLOCKED: + case HLSL_IR_STATEBLOCK_CONSTANT: + hlsl_error(ctx, &node->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, + "Expected literal expression."); +@@ -1322,6 +1323,11 @@ static bool add_func_parameter(struct hlsl_ctx *ctx, struct hlsl_func_parameters + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, + "Parameter '%s' is declared as both \"out\" and \"uniform\".", param->name); + ++ if ((param->modifiers & HLSL_STORAGE_OUT) && !(param->modifiers & HLSL_STORAGE_IN) ++ && (param->type->modifiers & HLSL_MODIFIER_CONST)) ++ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, ++ "Parameter '%s' is declared as both \"out\" and \"const\".", param->name); ++ + if (param->reg_reservation.offset_type) + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, + "packoffset() is not allowed on function parameters."); +@@ -5374,6 +5380,185 @@ static bool intrinsic_GetRenderTargetSampleCount(struct hlsl_ctx *ctx, + return true; + } + ++static bool intrinsic_interlocked(struct hlsl_ctx *ctx, enum hlsl_interlocked_op op, ++ const struct parse_initializer *params, const struct vkd3d_shader_location *loc, const char *name) ++{ ++ struct hlsl_ir_node *lhs, *coords, *val, *cmp_val = NULL, *orig_val = NULL; ++ struct hlsl_ir_node *interlocked, *void_ret; ++ struct hlsl_type *lhs_type, *val_type; ++ struct vkd3d_string_buffer *string; ++ struct hlsl_deref dst_deref; ++ ++ if (hlsl_version_lt(ctx, 5, 0)) ++ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INCOMPATIBLE_PROFILE, ++ "Interlocked functions can only be used in shader model 5.0 or higher."); ++ ++ if (op != HLSL_INTERLOCKED_CMP_EXCH && op != HLSL_INTERLOCKED_EXCH ++ && params->args_count != 2 && params->args_count != 3) ++ { ++ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, ++ "Unexpected number of arguments to function '%s': expected 2 or 3, but got %u.", ++ name, params->args_count); ++ return false; ++ } ++ ++ lhs = params->args[0]; ++ lhs_type = lhs->data_type; ++ ++ if (op == HLSL_INTERLOCKED_CMP_EXCH) ++ { ++ cmp_val = params->args[1]; ++ val = params->args[2]; ++ if (params->args_count == 4) ++ orig_val = params->args[3]; ++ } ++ else ++ { ++ val = params->args[1]; ++ if (params->args_count == 3) ++ orig_val = params->args[2]; ++ } ++ ++ if (lhs_type->class != HLSL_CLASS_SCALAR || (lhs_type->e.numeric.type != HLSL_TYPE_UINT ++ && lhs_type->e.numeric.type != HLSL_TYPE_INT)) ++ { ++ if ((string = hlsl_type_to_string(ctx, lhs_type))) ++ { ++ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, ++ "Unexpected type for argument 0 of '%s': expected 'uint' or 'int', but got '%s'.", ++ name, string->buffer); ++ hlsl_release_string_buffer(ctx, string); ++ } ++ return false; ++ } ++ ++ /* Interlocked*() functions always take uint for the value parameters, ++ * except for InterlockedMax()/InterlockedMin(). */ ++ if (op == HLSL_INTERLOCKED_MAX || op == HLSL_INTERLOCKED_MIN) ++ { ++ enum hlsl_base_type val_base_type = val->data_type->e.numeric.type; ++ ++ /* Floating values are always cast to signed integers. */ ++ if (val_base_type == HLSL_TYPE_FLOAT || val_base_type == HLSL_TYPE_HALF || val_base_type == HLSL_TYPE_DOUBLE) ++ val_type = hlsl_get_scalar_type(ctx, HLSL_TYPE_INT); ++ else ++ val_type = hlsl_get_scalar_type(ctx, lhs_type->e.numeric.type); ++ } ++ else ++ { ++ val_type = hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT); ++ } ++ ++ if (cmp_val && !(cmp_val = add_implicit_conversion(ctx, params->instrs, cmp_val, val_type, loc))) ++ return false; ++ if (!(val = add_implicit_conversion(ctx, params->instrs, val, val_type, loc))) ++ return false; ++ ++ /* TODO: groupshared variables */ ++ if (lhs->type == HLSL_IR_INDEX && hlsl_index_chain_has_resource_access(hlsl_ir_index(lhs))) ++ { ++ if (!hlsl_index_is_resource_access(hlsl_ir_index(lhs))) ++ { ++ hlsl_fixme(ctx, &lhs->loc, "Non-direct structured resource interlocked targets."); ++ return false; ++ } ++ ++ if (!hlsl_init_deref_from_index_chain(ctx, &dst_deref, hlsl_ir_index(lhs)->val.node)) ++ return false; ++ coords = hlsl_ir_index(lhs)->idx.node; ++ ++ VKD3D_ASSERT(coords->data_type->class == HLSL_CLASS_VECTOR); ++ VKD3D_ASSERT(coords->data_type->e.numeric.type == HLSL_TYPE_UINT); ++ ++ if (hlsl_deref_get_type(ctx, &dst_deref)->class != HLSL_CLASS_UAV) ++ { ++ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Interlocked targets must be UAV elements."); ++ return false; ++ } ++ } ++ else ++ { ++ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Interlocked targets must be UAV elements."); ++ return false; ++ } ++ ++ interlocked = hlsl_new_interlocked(ctx, op, orig_val ? lhs_type : NULL, &dst_deref, coords, cmp_val, val, loc); ++ hlsl_cleanup_deref(&dst_deref); ++ if (!interlocked) ++ return false; ++ hlsl_block_add_instr(params->instrs, interlocked); ++ ++ if (orig_val) ++ { ++ if (orig_val->data_type->modifiers & HLSL_MODIFIER_CONST) ++ hlsl_error(ctx, &orig_val->loc, VKD3D_SHADER_ERROR_HLSL_MODIFIES_CONST, ++ "Output argument to '%s' is const.", name); ++ ++ if (!add_assignment(ctx, params->instrs, orig_val, ASSIGN_OP_ASSIGN, interlocked)) ++ return false; ++ } ++ ++ if (!(void_ret = hlsl_new_void_expr(ctx, loc))) ++ return false; ++ hlsl_block_add_instr(params->instrs, void_ret); ++ ++ return true; ++} ++ ++static bool intrinsic_InterlockedAdd(struct hlsl_ctx *ctx, ++ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) ++{ ++ return intrinsic_interlocked(ctx, HLSL_INTERLOCKED_ADD, params, loc, "InterlockedAdd"); ++} ++ ++static bool intrinsic_InterlockedAnd(struct hlsl_ctx *ctx, ++ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) ++{ ++ return intrinsic_interlocked(ctx, HLSL_INTERLOCKED_AND, params, loc, "InterlockedAnd"); ++} ++ ++static bool intrinsic_InterlockedCompareExchange(struct hlsl_ctx *ctx, ++ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) ++{ ++ return intrinsic_interlocked(ctx, HLSL_INTERLOCKED_CMP_EXCH, params, loc, "InterlockedCompareExchange"); ++} ++ ++static bool intrinsic_InterlockedCompareStore(struct hlsl_ctx *ctx, ++ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) ++{ ++ return intrinsic_interlocked(ctx, HLSL_INTERLOCKED_CMP_EXCH, params, loc, "InterlockedCompareStore"); ++} ++ ++static bool intrinsic_InterlockedExchange(struct hlsl_ctx *ctx, ++ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) ++{ ++ return intrinsic_interlocked(ctx, HLSL_INTERLOCKED_EXCH, params, loc, "InterlockedExchange"); ++} ++ ++static bool intrinsic_InterlockedMax(struct hlsl_ctx *ctx, ++ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) ++{ ++ return intrinsic_interlocked(ctx, HLSL_INTERLOCKED_MAX, params, loc, "InterlockedMax"); ++} ++ ++static bool intrinsic_InterlockedMin(struct hlsl_ctx *ctx, ++ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) ++{ ++ return intrinsic_interlocked(ctx, HLSL_INTERLOCKED_MIN, params, loc, "InterlockedMin"); ++} ++ ++static bool intrinsic_InterlockedOr(struct hlsl_ctx *ctx, ++ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) ++{ ++ return intrinsic_interlocked(ctx, HLSL_INTERLOCKED_OR, params, loc, "InterlockedOr"); ++} ++ ++static bool intrinsic_InterlockedXor(struct hlsl_ctx *ctx, ++ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) ++{ ++ return intrinsic_interlocked(ctx, HLSL_INTERLOCKED_XOR, params, loc, "InterlockedXor"); ++} ++ + static const struct intrinsic_function + { + const char *name; +@@ -5387,6 +5572,15 @@ intrinsic_functions[] = + /* Note: these entries should be kept in alphabetical order. */ + {"D3DCOLORtoUBYTE4", 1, true, intrinsic_d3dcolor_to_ubyte4}, + {"GetRenderTargetSampleCount", 0, true, intrinsic_GetRenderTargetSampleCount}, ++ {"InterlockedAdd", -1, true, intrinsic_InterlockedAdd}, ++ {"InterlockedAnd", -1, true, intrinsic_InterlockedAnd}, ++ {"InterlockedCompareExchange", 4, true, intrinsic_InterlockedCompareExchange}, ++ {"InterlockedCompareStore", 3, true, intrinsic_InterlockedCompareStore}, ++ {"InterlockedExchange", 3, true, intrinsic_InterlockedExchange}, ++ {"InterlockedMax", -1, true, intrinsic_InterlockedMax}, ++ {"InterlockedMin", -1, true, intrinsic_InterlockedMin}, ++ {"InterlockedOr", -1, true, intrinsic_InterlockedOr}, ++ {"InterlockedXor", -1, true, intrinsic_InterlockedXor}, + {"abs", 1, true, intrinsic_abs}, + {"acos", 1, true, intrinsic_acos}, + {"all", 1, true, intrinsic_all}, +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c +index cef6a87c8b6..8d817b051ce 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c +@@ -739,6 +739,10 @@ static bool transform_instr_derefs(struct hlsl_ctx *ctx, struct hlsl_ir_node *in + res = func(ctx, &hlsl_ir_resource_store(instr)->resource, instr); + return res; + ++ case HLSL_IR_INTERLOCKED: ++ res = func(ctx, &hlsl_ir_interlocked(instr)->dst, instr); ++ return res; ++ + default: + return false; + } +@@ -1836,6 +1840,15 @@ static bool copy_propagation_transform_resource_store(struct hlsl_ctx *ctx, + return progress; + } + ++static bool copy_propagation_transform_interlocked(struct hlsl_ctx *ctx, ++ struct hlsl_ir_interlocked *interlocked, struct copy_propagation_state *state) ++{ ++ bool progress = false; ++ ++ progress |= copy_propagation_transform_object_load(ctx, &interlocked->dst, state, interlocked->node.index); ++ return progress; ++} ++ + static void copy_propagation_record_store(struct hlsl_ctx *ctx, struct hlsl_ir_store *store, + struct copy_propagation_state *state) + { +@@ -2042,6 +2055,9 @@ static bool copy_propagation_transform_block(struct hlsl_ctx *ctx, struct hlsl_b + progress |= copy_propagation_process_switch(ctx, hlsl_ir_switch(instr), state); + break; + ++ case HLSL_IR_INTERLOCKED: ++ progress |= copy_propagation_transform_interlocked(ctx, hlsl_ir_interlocked(instr), state); ++ + default: + break; + } +@@ -2225,6 +2241,24 @@ static bool validate_dereferences(struct hlsl_ctx *ctx, struct hlsl_ir_node *ins + validate_component_index_range_from_deref(ctx, &store->lhs); + break; + } ++ case HLSL_IR_INTERLOCKED: ++ { ++ struct hlsl_ir_interlocked *interlocked = hlsl_ir_interlocked(instr); ++ ++ if (!interlocked->dst.var->is_uniform) ++ { ++ hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, ++ "Accessed resource must have a single uniform source."); ++ } ++ else if (validate_component_index_range_from_deref(ctx, &interlocked->dst) == DEREF_VALIDATION_NOT_CONSTANT) ++ { ++ hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, ++ "Accessed resource from \"%s\" must be determinable at compile time.", ++ interlocked->dst.var->name); ++ note_non_static_deref_expressions(ctx, &interlocked->dst, "accessed resource"); ++ } ++ break; ++ } + default: + break; + } +@@ -4478,6 +4512,7 @@ static bool dce(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) + case HLSL_IR_LOOP: + case HLSL_IR_RESOURCE_STORE: + case HLSL_IR_SWITCH: ++ case HLSL_IR_INTERLOCKED: + break; + case HLSL_IR_STATEBLOCK_CONSTANT: + /* Stateblock constants should not appear in the shader program. */ +@@ -4724,6 +4759,19 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop + index->idx.node->last_read = last_read; + break; + } ++ case HLSL_IR_INTERLOCKED: ++ { ++ struct hlsl_ir_interlocked *interlocked = hlsl_ir_interlocked(instr); ++ ++ var = interlocked->dst.var; ++ var->last_read = max(var->last_read, last_read); ++ deref_mark_last_read(&interlocked->dst, last_read); ++ interlocked->coords.node->last_read = last_read; ++ interlocked->value.node->last_read = last_read; ++ if (interlocked->cmp_value.node) ++ interlocked->cmp_value.node->last_read = last_read; ++ break; ++ } + case HLSL_IR_JUMP: + { + struct hlsl_ir_jump *jump = hlsl_ir_jump(instr); +@@ -5135,6 +5183,10 @@ static bool track_components_usage(struct hlsl_ctx *ctx, struct hlsl_ir_node *in + register_deref_usage(ctx, &hlsl_ir_resource_store(instr)->resource); + break; + ++ case HLSL_IR_INTERLOCKED: ++ register_deref_usage(ctx, &hlsl_ir_interlocked(instr)->dst); ++ break; ++ + default: + break; + } +@@ -9942,6 +9994,81 @@ static bool sm4_generate_vsir_instr_resource_load(struct hlsl_ctx *ctx, + } + } + ++static bool sm4_generate_vsir_instr_interlocked(struct hlsl_ctx *ctx, ++ struct vsir_program *program, struct hlsl_ir_interlocked *interlocked) ++{ ++ ++ static const enum vkd3d_shader_opcode opcodes[] = ++ { ++ [HLSL_INTERLOCKED_ADD] = VKD3DSIH_ATOMIC_IADD, ++ [HLSL_INTERLOCKED_AND] = VKD3DSIH_ATOMIC_AND, ++ [HLSL_INTERLOCKED_CMP_EXCH] = VKD3DSIH_ATOMIC_CMP_STORE, ++ [HLSL_INTERLOCKED_MAX] = VKD3DSIH_ATOMIC_UMAX, ++ [HLSL_INTERLOCKED_MIN] = VKD3DSIH_ATOMIC_UMIN, ++ [HLSL_INTERLOCKED_OR] = VKD3DSIH_ATOMIC_OR, ++ [HLSL_INTERLOCKED_XOR] = VKD3DSIH_ATOMIC_XOR, ++ }; ++ ++ static const enum vkd3d_shader_opcode imm_opcodes[] = ++ { ++ [HLSL_INTERLOCKED_ADD] = VKD3DSIH_IMM_ATOMIC_IADD, ++ [HLSL_INTERLOCKED_AND] = VKD3DSIH_IMM_ATOMIC_AND, ++ [HLSL_INTERLOCKED_CMP_EXCH] = VKD3DSIH_IMM_ATOMIC_CMP_EXCH, ++ [HLSL_INTERLOCKED_EXCH] = VKD3DSIH_IMM_ATOMIC_EXCH, ++ [HLSL_INTERLOCKED_MAX] = VKD3DSIH_IMM_ATOMIC_UMAX, ++ [HLSL_INTERLOCKED_MIN] = VKD3DSIH_IMM_ATOMIC_UMIN, ++ [HLSL_INTERLOCKED_OR] = VKD3DSIH_IMM_ATOMIC_OR, ++ [HLSL_INTERLOCKED_XOR] = VKD3DSIH_IMM_ATOMIC_XOR, ++ }; ++ ++ struct hlsl_ir_node *cmp_value = interlocked->cmp_value.node, *value = interlocked->value.node; ++ struct hlsl_ir_node *coords = interlocked->coords.node; ++ struct hlsl_ir_node *instr = &interlocked->node; ++ bool is_imm = interlocked->node.reg.allocated; ++ struct vkd3d_shader_dst_param *dst_param; ++ struct vkd3d_shader_instruction *ins; ++ enum vkd3d_shader_opcode opcode; ++ ++ opcode = is_imm ? imm_opcodes[interlocked->op] : opcodes[interlocked->op]; ++ ++ if (value->data_type->e.numeric.type == HLSL_TYPE_INT) ++ { ++ if (opcode == VKD3DSIH_ATOMIC_UMAX) ++ opcode = VKD3DSIH_ATOMIC_IMAX; ++ else if (opcode == VKD3DSIH_ATOMIC_UMIN) ++ opcode = VKD3DSIH_ATOMIC_IMIN; ++ else if (opcode == VKD3DSIH_IMM_ATOMIC_UMAX) ++ opcode = VKD3DSIH_IMM_ATOMIC_IMAX; ++ else if (opcode == VKD3DSIH_IMM_ATOMIC_UMIN) ++ opcode = VKD3DSIH_IMM_ATOMIC_IMIN; ++ } ++ ++ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, opcode, ++ is_imm ? 2 : 1, cmp_value ? 3 : 2))) ++ return false; ++ ++ if (is_imm) ++ vsir_dst_from_hlsl_node(&ins->dst[0], ctx, instr); ++ ++ dst_param = is_imm ? &ins->dst[1] : &ins->dst[0]; ++ if (!sm4_generate_vsir_init_dst_param_from_deref(ctx, program, dst_param, &interlocked->dst, &instr->loc, 0)) ++ return false; ++ dst_param->reg.dimension = VSIR_DIMENSION_NONE; ++ ++ vsir_src_from_hlsl_node(&ins->src[0], ctx, coords, VKD3DSP_WRITEMASK_ALL); ++ if (cmp_value) ++ { ++ vsir_src_from_hlsl_node(&ins->src[1], ctx, cmp_value, VKD3DSP_WRITEMASK_ALL); ++ vsir_src_from_hlsl_node(&ins->src[2], ctx, value, VKD3DSP_WRITEMASK_ALL); ++ } ++ else ++ { ++ vsir_src_from_hlsl_node(&ins->src[1], ctx, value, VKD3DSP_WRITEMASK_ALL); ++ } ++ ++ return true; ++} ++ + static bool sm4_generate_vsir_instr_jump(struct hlsl_ctx *ctx, + struct vsir_program *program, const struct hlsl_ir_jump *jump) + { +@@ -10119,6 +10246,10 @@ static void sm4_generate_vsir_block(struct hlsl_ctx *ctx, struct hlsl_block *blo + generate_vsir_instr_swizzle(ctx, program, hlsl_ir_swizzle(instr)); + break; + ++ case HLSL_IR_INTERLOCKED: ++ sm4_generate_vsir_instr_interlocked(ctx, program, hlsl_ir_interlocked(instr)); ++ break; ++ + default: + break; + } +diff --git a/libs/vkd3d/libs/vkd3d-shader/spirv.c b/libs/vkd3d/libs/vkd3d-shader/spirv.c +index efa76983546..b1caf61d512 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/spirv.c ++++ b/libs/vkd3d/libs/vkd3d-shader/spirv.c +@@ -10923,7 +10923,8 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, struct + compiler->input_control_point_count = program->input_control_point_count; + compiler->output_control_point_count = program->output_control_point_count; + +- if (program->shader_version.type == VKD3D_SHADER_TYPE_HULL && !spirv_compiler_is_opengl_target(compiler)) ++ if (program->shader_version.type == VKD3D_SHADER_TYPE_DOMAIN ++ || (program->shader_version.type == VKD3D_SHADER_TYPE_HULL && !spirv_compiler_is_opengl_target(compiler))) + spirv_compiler_emit_tessellator_domain(compiler, program->tess_domain); + + if (compiler->shader_type != VKD3D_SHADER_TYPE_HULL) +diff --git a/libs/vkd3d/libs/vkd3d-shader/tpf.c b/libs/vkd3d/libs/vkd3d-shader/tpf.c +index aa666086710..1ecfe32de45 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/tpf.c ++++ b/libs/vkd3d/libs/vkd3d-shader/tpf.c +@@ -4026,6 +4026,15 @@ static void tpf_handle_instruction(struct tpf_compiler *tpf, const struct vkd3d_ + break; + + case VKD3DSIH_ADD: ++ case VKD3DSIH_ATOMIC_AND: ++ case VKD3DSIH_ATOMIC_CMP_STORE: ++ case VKD3DSIH_ATOMIC_IADD: ++ case VKD3DSIH_ATOMIC_IMAX: ++ case VKD3DSIH_ATOMIC_IMIN: ++ case VKD3DSIH_ATOMIC_UMAX: ++ case VKD3DSIH_ATOMIC_UMIN: ++ case VKD3DSIH_ATOMIC_OR: ++ case VKD3DSIH_ATOMIC_XOR: + case VKD3DSIH_AND: + case VKD3DSIH_BREAK: + case VKD3DSIH_CASE: +@@ -4068,6 +4077,16 @@ static void tpf_handle_instruction(struct tpf_compiler *tpf, const struct vkd3d_ + case VKD3DSIH_IMAD: + case VKD3DSIH_IMAX: + case VKD3DSIH_IMIN: ++ case VKD3DSIH_IMM_ATOMIC_AND: ++ case VKD3DSIH_IMM_ATOMIC_CMP_EXCH: ++ case VKD3DSIH_IMM_ATOMIC_EXCH: ++ case VKD3DSIH_IMM_ATOMIC_IADD: ++ case VKD3DSIH_IMM_ATOMIC_IMAX: ++ case VKD3DSIH_IMM_ATOMIC_IMIN: ++ case VKD3DSIH_IMM_ATOMIC_UMAX: ++ case VKD3DSIH_IMM_ATOMIC_UMIN: ++ case VKD3DSIH_IMM_ATOMIC_OR: ++ case VKD3DSIH_IMM_ATOMIC_XOR: + case VKD3DSIH_IMUL: + case VKD3DSIH_INE: + case VKD3DSIH_INEG: +diff --git a/libs/vkd3d/libs/vkd3d/command.c b/libs/vkd3d/libs/vkd3d/command.c +index 6c7bf167910..ce0c3b9128f 100644 +--- a/libs/vkd3d/libs/vkd3d/command.c ++++ b/libs/vkd3d/libs/vkd3d/command.c +@@ -327,9 +327,12 @@ static void *vkd3d_fence_worker_main(void *arg) + struct vkd3d_waiting_fence *old_fences, *cur_fences = NULL; + struct vkd3d_fence_worker *worker = arg; + unsigned int i; ++ bool timeline; + + vkd3d_set_thread_name("vkd3d_fence"); + ++ timeline = worker->device->vk_info.KHR_timeline_semaphore; ++ + for (;;) + { + vkd3d_mutex_lock(&worker->mutex); +@@ -357,7 +360,12 @@ static void *vkd3d_fence_worker_main(void *arg) + vkd3d_mutex_unlock(&worker->mutex); + + for (i = 0; i < cur_fence_count; ++i) +- worker->wait_for_gpu_fence(worker, &cur_fences[i]); ++ { ++ if (timeline) ++ vkd3d_wait_for_gpu_timeline_semaphore(worker, &cur_fences[i]); ++ else ++ vkd3d_wait_for_gpu_fence(worker, &cur_fences[i]); ++ } + } + + vkd3d_free(cur_fences); +@@ -379,9 +387,6 @@ static HRESULT vkd3d_fence_worker_start(struct vkd3d_fence_worker *worker, + worker->fences = NULL; + worker->fences_size = 0; + +- worker->wait_for_gpu_fence = device->vk_info.KHR_timeline_semaphore +- ? vkd3d_wait_for_gpu_timeline_semaphore : vkd3d_wait_for_gpu_fence; +- + vkd3d_mutex_init(&worker->mutex); + + vkd3d_cond_init(&worker->cond); +@@ -399,6 +404,7 @@ static HRESULT vkd3d_fence_worker_start(struct vkd3d_fence_worker *worker, + static HRESULT vkd3d_fence_worker_stop(struct vkd3d_fence_worker *worker, + struct d3d12_device *device) + { ++ unsigned int i; + HRESULT hr; + + TRACE("worker %p.\n", worker); +@@ -416,6 +422,9 @@ static HRESULT vkd3d_fence_worker_stop(struct vkd3d_fence_worker *worker, + vkd3d_mutex_destroy(&worker->mutex); + vkd3d_cond_destroy(&worker->cond); + ++ for (i = 0; i < worker->fence_count; ++i) ++ d3d12_fence_decref(worker->fences[i].fence); ++ + vkd3d_free(worker->fences); + + return S_OK; +@@ -556,7 +565,8 @@ static void d3d12_fence_destroy_vk_objects(struct d3d12_fence *fence) + fence->old_vk_fences[i] = VK_NULL_HANDLE; + } + +- d3d12_fence_garbage_collect_vk_semaphores_locked(fence, true); ++ if (!device->vk_info.KHR_timeline_semaphore) ++ d3d12_fence_garbage_collect_vk_semaphores_locked(fence, true); + VK_CALL(vkDestroySemaphore(device->vk_device, fence->timeline_semaphore, NULL)); + + vkd3d_mutex_unlock(&fence->mutex); +@@ -6450,6 +6460,7 @@ static void d3d12_command_queue_destroy_op(struct vkd3d_cs_op_data *op) + break; + + case VKD3D_CS_OP_SIGNAL: ++ case VKD3D_CS_OP_SIGNAL_ON_CPU: + d3d12_fence_decref(op->u.signal.fence); + break; + +@@ -7440,6 +7451,7 @@ static HRESULT d3d12_command_queue_flush_ops_locked(struct d3d12_command_queue * + struct vkd3d_cs_op_data *op; + struct d3d12_fence *fence; + unsigned int i; ++ HRESULT hr; + + queue->is_flushing = true; + +@@ -7473,6 +7485,11 @@ static HRESULT d3d12_command_queue_flush_ops_locked(struct d3d12_command_queue * + d3d12_command_queue_signal(queue, op->u.signal.fence, op->u.signal.value); + break; + ++ case VKD3D_CS_OP_SIGNAL_ON_CPU: ++ if (FAILED(hr = d3d12_fence_Signal(&op->u.signal.fence->ID3D12Fence1_iface, op->u.signal.value))) ++ ERR("Failed to signal fence %p, hr %s.\n", op->u.signal.fence, debugstr_hresult(hr)); ++ break; ++ + case VKD3D_CS_OP_EXECUTE: + d3d12_command_queue_execute(queue, op->u.execute.buffers, op->u.execute.buffer_count); + break; +@@ -7615,6 +7632,36 @@ void vkd3d_release_vk_queue(ID3D12CommandQueue *queue) + return vkd3d_queue_release(d3d12_queue->vkd3d_queue); + } + ++HRESULT vkd3d_queue_signal_on_cpu(ID3D12CommandQueue *iface, ID3D12Fence *fence_iface, uint64_t value) ++{ ++ struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface); ++ struct d3d12_fence *fence = unsafe_impl_from_ID3D12Fence(fence_iface); ++ struct vkd3d_cs_op_data *op; ++ HRESULT hr = S_OK; ++ ++ TRACE("iface %p, fence %p, value %#"PRIx64".\n", iface, fence_iface, value); ++ ++ vkd3d_mutex_lock(&command_queue->op_mutex); ++ ++ if (!(op = d3d12_command_queue_op_array_require_space(&command_queue->op_queue))) ++ { ++ ERR("Failed to add op.\n"); ++ hr = E_OUTOFMEMORY; ++ goto done; ++ } ++ op->opcode = VKD3D_CS_OP_SIGNAL_ON_CPU; ++ op->u.signal.fence = fence; ++ op->u.signal.value = value; ++ ++ d3d12_fence_incref(fence); ++ ++ d3d12_command_queue_submit_locked(command_queue); ++ ++done: ++ vkd3d_mutex_unlock(&command_queue->op_mutex); ++ return hr; ++} ++ + /* ID3D12CommandSignature */ + static inline struct d3d12_command_signature *impl_from_ID3D12CommandSignature(ID3D12CommandSignature *iface) + { +diff --git a/libs/vkd3d/libs/vkd3d/device.c b/libs/vkd3d/libs/vkd3d/device.c +index 9aa4adb6c06..b51e2963efa 100644 +--- a/libs/vkd3d/libs/vkd3d/device.c ++++ b/libs/vkd3d/libs/vkd3d/device.c +@@ -3557,12 +3557,6 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CheckFeatureSupport(ID3D12Device9 + return E_INVALIDARG; + } + +- if (data->Format == DXGI_FORMAT_UNKNOWN) +- { +- data->PlaneCount = 1; +- return S_OK; +- } +- + if (!(format = vkd3d_get_format(device, data->Format, false))) + format = vkd3d_get_format(device, data->Format, true); + if (!format) +@@ -4368,7 +4362,7 @@ static void d3d12_device_get_resource1_allocation_info(struct d3d12_device *devi + { + desc = &resource_descs[i]; + +- if (FAILED(d3d12_resource_validate_desc(desc, device))) ++ if (FAILED(d3d12_resource_validate_desc(desc, device, 0))) + { + WARN("Invalid resource desc.\n"); + goto invalid; +@@ -4699,10 +4693,11 @@ static void d3d12_device_get_copyable_footprints(struct d3d12_device *device, + uint64_t base_offset, D3D12_PLACED_SUBRESOURCE_FOOTPRINT *layouts, UINT *row_counts, + UINT64 *row_sizes, UINT64 *total_bytes) + { +- unsigned int i, sub_resource_idx, miplevel_idx, row_count, row_size, row_pitch; ++ unsigned int i, sub_resource_idx, plane_idx, miplevel_idx, row_count, row_size, row_pitch; + unsigned int width, height, depth, plane_count, sub_resources_per_plane; + const struct vkd3d_format *format; + uint64_t offset, size, total; ++ DXGI_FORMAT plane_format; + + if (layouts) + memset(layouts, 0xff, sizeof(*layouts) * sub_resource_count); +@@ -4713,20 +4708,19 @@ static void d3d12_device_get_copyable_footprints(struct d3d12_device *device, + if (total_bytes) + *total_bytes = ~(uint64_t)0; + +- if (!(format = vkd3d_format_from_d3d12_resource_desc(device, desc, 0))) ++ if (!(format = vkd3d_get_format(device, desc->Format, true))) + { + WARN("Invalid format %#x.\n", desc->Format); + return; + } + +- if (FAILED(d3d12_resource_validate_desc(desc, device))) ++ if (FAILED(d3d12_resource_validate_desc(desc, device, VKD3D_VALIDATE_FORCE_ALLOW_DS))) + { + WARN("Invalid resource desc.\n"); + return; + } + +- plane_count = ((format->vk_aspect_mask & VK_IMAGE_ASPECT_DEPTH_BIT) +- && (format->vk_aspect_mask & VK_IMAGE_ASPECT_STENCIL_BIT)) ? 2 : 1; ++ plane_count = format->plane_count; + sub_resources_per_plane = d3d12_resource_desc_get_sub_resource_count(desc); + + if (!vkd3d_bound_range(first_sub_resource, sub_resource_count, sub_resources_per_plane * plane_count)) +@@ -4737,21 +4731,31 @@ static void d3d12_device_get_copyable_footprints(struct d3d12_device *device, + + offset = 0; + total = 0; ++ plane_format = desc->Format; + for (i = 0; i < sub_resource_count; ++i) + { + sub_resource_idx = (first_sub_resource + i) % sub_resources_per_plane; ++ plane_idx = (first_sub_resource + i) / sub_resources_per_plane; + miplevel_idx = sub_resource_idx % desc->MipLevels; ++ ++ if (plane_count > 1) ++ { ++ plane_format = !plane_idx ? DXGI_FORMAT_R32_TYPELESS : DXGI_FORMAT_R8_TYPELESS; ++ format = vkd3d_get_format(device, plane_format, true); ++ } ++ + width = align(d3d12_resource_desc_get_width(desc, miplevel_idx), format->block_width); + height = align(d3d12_resource_desc_get_height(desc, miplevel_idx), format->block_height); + depth = d3d12_resource_desc_get_depth(desc, miplevel_idx); + row_count = height / format->block_height; + row_size = (width / format->block_width) * format->byte_count * format->block_byte_count; +- row_pitch = align(row_size, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT); ++ /* Direct3D 12 requires double the alignment for dual planes. */ ++ row_pitch = align(row_size, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT * plane_count); + + if (layouts) + { + layouts[i].Offset = base_offset + offset; +- layouts[i].Footprint.Format = desc->Format; ++ layouts[i].Footprint.Format = plane_format; + layouts[i].Footprint.Width = width; + layouts[i].Footprint.Height = height; + layouts[i].Footprint.Depth = depth; +@@ -4763,7 +4767,7 @@ static void d3d12_device_get_copyable_footprints(struct d3d12_device *device, + row_sizes[i] = row_size; + + size = max(0, row_count - 1) * row_pitch + row_size; +- size = max(0, depth - 1) * align(size, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT) + size; ++ size = max(0, depth - 1) * align(size, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT * plane_count) + size; + + total = offset + size; + offset = align(total, D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT); +diff --git a/libs/vkd3d/libs/vkd3d/resource.c b/libs/vkd3d/libs/vkd3d/resource.c +index 1f7d90eb95f..eab97715944 100644 +--- a/libs/vkd3d/libs/vkd3d/resource.c ++++ b/libs/vkd3d/libs/vkd3d/resource.c +@@ -951,7 +951,7 @@ HRESULT vkd3d_get_image_allocation_info(struct d3d12_device *device, + HRESULT hr; + + VKD3D_ASSERT(desc->Dimension != D3D12_RESOURCE_DIMENSION_BUFFER); +- VKD3D_ASSERT(d3d12_resource_validate_desc(desc, device) == S_OK); ++ VKD3D_ASSERT(d3d12_resource_validate_desc(desc, device, 0) == S_OK); + + if (!desc->MipLevels) + { +@@ -1847,7 +1847,7 @@ static bool d3d12_resource_validate_texture_alignment(const D3D12_RESOURCE_DESC1 + return true; + } + +-HRESULT d3d12_resource_validate_desc(const D3D12_RESOURCE_DESC1 *desc, struct d3d12_device *device) ++HRESULT d3d12_resource_validate_desc(const D3D12_RESOURCE_DESC1 *desc, struct d3d12_device *device, uint32_t flags) + { + const D3D12_MIP_REGION *mip_region = &desc->SamplerFeedbackMipRegion; + const struct vkd3d_format *format; +@@ -1893,7 +1893,8 @@ HRESULT d3d12_resource_validate_desc(const D3D12_RESOURCE_DESC1 *desc, struct d3 + return E_INVALIDARG; + } + +- if (!(format = vkd3d_format_from_d3d12_resource_desc(device, desc, 0))) ++ if (!(format = vkd3d_get_format(device, desc->Format, ++ desc->Flags & D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL || flags & VKD3D_VALIDATE_FORCE_ALLOW_DS))) + { + WARN("Invalid format %#x.\n", desc->Format); + return E_INVALIDARG; +@@ -2013,7 +2014,7 @@ static HRESULT d3d12_resource_init(struct d3d12_resource *resource, struct d3d12 + resource->gpu_address = 0; + resource->flags = 0; + +- if (FAILED(hr = d3d12_resource_validate_desc(&resource->desc, device))) ++ if (FAILED(hr = d3d12_resource_validate_desc(&resource->desc, device, 0))) + return hr; + + resource->format = vkd3d_format_from_d3d12_resource_desc(device, desc, 0); +diff --git a/libs/vkd3d/libs/vkd3d/utils.c b/libs/vkd3d/libs/vkd3d/utils.c +index 839bb173854..c2832a61f67 100644 +--- a/libs/vkd3d/libs/vkd3d/utils.c ++++ b/libs/vkd3d/libs/vkd3d/utils.c +@@ -29,7 +29,7 @@ + #define UINT VKD3D_FORMAT_TYPE_UINT + static const struct vkd3d_format vkd3d_formats[] = + { +- {DXGI_FORMAT_UNKNOWN, VK_FORMAT_UNDEFINED, 1, 1, 1, 1}, ++ {DXGI_FORMAT_UNKNOWN, VK_FORMAT_UNDEFINED, 1, 1, 1, 1, 0, 1}, + {DXGI_FORMAT_R32G32B32A32_TYPELESS, VK_FORMAT_R32G32B32A32_SFLOAT, 16, 1, 1, 1, COLOR, 1, TYPELESS}, + {DXGI_FORMAT_R32G32B32A32_FLOAT, VK_FORMAT_R32G32B32A32_SFLOAT, 16, 1, 1, 1, COLOR, 1}, + {DXGI_FORMAT_R32G32B32A32_UINT, VK_FORMAT_R32G32B32A32_UINT, 16, 1, 1, 1, COLOR, 1, UINT}, +diff --git a/libs/vkd3d/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/libs/vkd3d/vkd3d_private.h +index 8488d5db3fa..fd1fbb1679a 100644 +--- a/libs/vkd3d/libs/vkd3d/vkd3d_private.h ++++ b/libs/vkd3d/libs/vkd3d/vkd3d_private.h +@@ -70,6 +70,8 @@ + + #define VKD3D_SHADER_DESCRIPTOR_TYPE_COUNT (VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER + 1) + ++#define VKD3D_VALIDATE_FORCE_ALLOW_DS 0x1u ++ + extern uint64_t object_global_serial_id; + + struct d3d12_command_list; +@@ -240,8 +242,6 @@ struct vkd3d_fence_worker + struct vkd3d_waiting_fence *fences; + size_t fences_size; + +- void (*wait_for_gpu_fence)(struct vkd3d_fence_worker *worker, const struct vkd3d_waiting_fence *enqueued_fence); +- + struct vkd3d_queue *queue; + struct d3d12_device *device; + }; +@@ -534,7 +534,7 @@ struct vkd3d_resource_allocation_info + }; + + bool d3d12_resource_is_cpu_accessible(const struct d3d12_resource *resource); +-HRESULT d3d12_resource_validate_desc(const D3D12_RESOURCE_DESC1 *desc, struct d3d12_device *device); ++HRESULT d3d12_resource_validate_desc(const D3D12_RESOURCE_DESC1 *desc, struct d3d12_device *device, uint32_t flags); + void d3d12_resource_get_tiling(struct d3d12_device *device, const struct d3d12_resource *resource, + UINT *total_tile_count, D3D12_PACKED_MIP_INFO *packed_mip_info, D3D12_TILE_SHAPE *standard_tile_shape, + UINT *sub_resource_tiling_count, UINT first_sub_resource_tiling, +@@ -1362,6 +1362,7 @@ enum vkd3d_cs_op + { + VKD3D_CS_OP_WAIT, + VKD3D_CS_OP_SIGNAL, ++ VKD3D_CS_OP_SIGNAL_ON_CPU, + VKD3D_CS_OP_EXECUTE, + VKD3D_CS_OP_UPDATE_MAPPINGS, + VKD3D_CS_OP_COPY_MAPPINGS, +-- +2.45.2 + diff --git a/patches/vkd3d-latest/0003-Updated-vkd3d-to-b60995b106724581ed33d3ea327e7dd662f.patch b/patches/vkd3d-latest/0003-Updated-vkd3d-to-b60995b106724581ed33d3ea327e7dd662f.patch deleted file mode 100644 index 8ba53b92..00000000 --- a/patches/vkd3d-latest/0003-Updated-vkd3d-to-b60995b106724581ed33d3ea327e7dd662f.patch +++ /dev/null @@ -1,527 +0,0 @@ -From b4d1573a8e604378ac437fb837e4cef70e54b32f Mon Sep 17 00:00:00 2001 -From: Alistair Leslie-Hughes -Date: Sat, 21 Dec 2024 12:28:11 +1100 -Subject: [PATCH] Updated vkd3d to b60995b106724581ed33d3ea327e7dd662f1f4d9. - ---- - libs/vkd3d/libs/vkd3d-shader/hlsl.h | 3 - - libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 204 +++++++++++++++++++- - libs/vkd3d/libs/vkd3d-shader/ir.c | 37 +++- - libs/vkd3d/libs/vkd3d-shader/tpf.c | 199 ------------------- - 4 files changed, 232 insertions(+), 211 deletions(-) - -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h -index 2acc003c9a1..b0e2b54c348 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.h -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h -@@ -1688,9 +1688,6 @@ struct extern_resource *sm4_get_extern_resources(struct hlsl_ctx *ctx, unsigned - void sm4_free_extern_resources(struct extern_resource *extern_resources, unsigned int count); - void sm4_generate_rdef(struct hlsl_ctx *ctx, struct vkd3d_shader_code *rdef); - --enum vkd3d_shader_interpolation_mode sm4_get_interpolation_mode(struct hlsl_type *type, -- unsigned int storage_modifiers); -- - struct hlsl_ir_function_decl *hlsl_compile_internal_function(struct hlsl_ctx *ctx, const char *name, const char *hlsl); - - int hlsl_lexer_compile(struct hlsl_ctx *ctx, const struct vkd3d_shader_code *hlsl); -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -index 08f139f5e8f..4ccbed78f38 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -@@ -5489,7 +5489,8 @@ static uint32_t allocate_temp_registers(struct hlsl_ctx *ctx, struct hlsl_ir_fun - return allocator.reg_count; - } - --enum vkd3d_shader_interpolation_mode sm4_get_interpolation_mode(struct hlsl_type *type, unsigned int storage_modifiers) -+static enum vkd3d_shader_interpolation_mode sm4_get_interpolation_mode(struct hlsl_type *type, -+ unsigned int storage_modifiers) - { - unsigned int i; - -@@ -10083,6 +10084,207 @@ static void sm4_generate_vsir_add_function(struct hlsl_ctx *ctx, - generate_vsir_add_program_instruction(ctx, program, &func->loc, VKD3DSIH_RET, 0, 0); - } - -+static int sm4_compare_extern_resources(const void *a, const void *b) -+{ -+ const struct extern_resource *aa = a; -+ const struct extern_resource *bb = b; -+ int r; -+ -+ if ((r = vkd3d_u32_compare(aa->regset, bb->regset))) -+ return r; -+ -+ if ((r = vkd3d_u32_compare(aa->space, bb->space))) -+ return r; -+ -+ return vkd3d_u32_compare(aa->index, bb->index); -+} -+ -+static const char *string_skip_tag(const char *string) -+{ -+ if (!strncmp(string, "", strlen(""))) -+ return string + strlen(""); -+ return string; -+} -+ -+void sm4_free_extern_resources(struct extern_resource *extern_resources, unsigned int count) -+{ -+ unsigned int i; -+ -+ for (i = 0; i < count; ++i) -+ { -+ vkd3d_free(extern_resources[i].name); -+ } -+ vkd3d_free(extern_resources); -+} -+ -+struct extern_resource *sm4_get_extern_resources(struct hlsl_ctx *ctx, unsigned int *count) -+{ -+ bool separate_components = ctx->profile->major_version == 5 && ctx->profile->minor_version == 0; -+ struct extern_resource *extern_resources = NULL; -+ const struct hlsl_ir_var *var; -+ struct hlsl_buffer *buffer; -+ enum hlsl_regset regset; -+ size_t capacity = 0; -+ char *name; -+ -+ *count = 0; -+ -+ LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) -+ { -+ if (separate_components) -+ { -+ unsigned int component_count = hlsl_type_component_count(var->data_type); -+ unsigned int k, regset_offset; -+ -+ for (k = 0; k < component_count; ++k) -+ { -+ struct hlsl_type *component_type = hlsl_type_get_component_type(ctx, var->data_type, k); -+ struct vkd3d_string_buffer *name_buffer; -+ -+ if (!hlsl_type_is_resource(component_type)) -+ continue; -+ -+ regset_offset = hlsl_type_get_component_offset(ctx, var->data_type, k, ®set); -+ if (regset_offset > var->regs[regset].allocation_size) -+ continue; -+ -+ if (!var->objects_usage[regset][regset_offset].used) -+ continue; -+ -+ if (!(hlsl_array_reserve(ctx, (void **)&extern_resources, -+ &capacity, *count + 1, sizeof(*extern_resources)))) -+ { -+ sm4_free_extern_resources(extern_resources, *count); -+ *count = 0; -+ return NULL; -+ } -+ -+ if (!(name_buffer = hlsl_component_to_string(ctx, var, k))) -+ { -+ sm4_free_extern_resources(extern_resources, *count); -+ *count = 0; -+ return NULL; -+ } -+ if (!(name = hlsl_strdup(ctx, string_skip_tag(name_buffer->buffer)))) -+ { -+ sm4_free_extern_resources(extern_resources, *count); -+ *count = 0; -+ hlsl_release_string_buffer(ctx, name_buffer); -+ return NULL; -+ } -+ hlsl_release_string_buffer(ctx, name_buffer); -+ -+ extern_resources[*count].var = NULL; -+ extern_resources[*count].buffer = NULL; -+ -+ extern_resources[*count].name = name; -+ extern_resources[*count].is_user_packed = !!var->reg_reservation.reg_type; -+ -+ extern_resources[*count].component_type = component_type; -+ -+ extern_resources[*count].regset = regset; -+ extern_resources[*count].id = var->regs[regset].id; -+ extern_resources[*count].space = var->regs[regset].space; -+ extern_resources[*count].index = var->regs[regset].index + regset_offset; -+ extern_resources[*count].bind_count = 1; -+ extern_resources[*count].loc = var->loc; -+ -+ ++*count; -+ } -+ } -+ else -+ { -+ unsigned int r; -+ -+ if (!hlsl_type_is_resource(var->data_type)) -+ continue; -+ -+ for (r = 0; r <= HLSL_REGSET_LAST; ++r) -+ { -+ if (!var->regs[r].allocated) -+ continue; -+ -+ if (!(hlsl_array_reserve(ctx, (void **)&extern_resources, -+ &capacity, *count + 1, sizeof(*extern_resources)))) -+ { -+ sm4_free_extern_resources(extern_resources, *count); -+ *count = 0; -+ return NULL; -+ } -+ -+ if (!(name = hlsl_strdup(ctx, string_skip_tag(var->name)))) -+ { -+ sm4_free_extern_resources(extern_resources, *count); -+ *count = 0; -+ return NULL; -+ } -+ -+ extern_resources[*count].var = var; -+ extern_resources[*count].buffer = NULL; -+ -+ extern_resources[*count].name = name; -+ /* For some reason 5.1 resources aren't marked as -+ * user-packed, but cbuffers still are. */ -+ extern_resources[*count].is_user_packed = hlsl_version_lt(ctx, 5, 1) -+ && !!var->reg_reservation.reg_type; -+ -+ extern_resources[*count].component_type = hlsl_type_get_component_type(ctx, var->data_type, 0); -+ -+ extern_resources[*count].regset = r; -+ extern_resources[*count].id = var->regs[r].id; -+ extern_resources[*count].space = var->regs[r].space; -+ extern_resources[*count].index = var->regs[r].index; -+ extern_resources[*count].bind_count = var->bind_count[r]; -+ extern_resources[*count].loc = var->loc; -+ -+ ++*count; -+ } -+ } -+ } -+ -+ LIST_FOR_EACH_ENTRY(buffer, &ctx->buffers, struct hlsl_buffer, entry) -+ { -+ if (!buffer->reg.allocated) -+ continue; -+ -+ if (!(hlsl_array_reserve(ctx, (void **)&extern_resources, -+ &capacity, *count + 1, sizeof(*extern_resources)))) -+ { -+ sm4_free_extern_resources(extern_resources, *count); -+ *count = 0; -+ return NULL; -+ } -+ -+ if (!(name = hlsl_strdup(ctx, buffer->name))) -+ { -+ sm4_free_extern_resources(extern_resources, *count); -+ *count = 0; -+ return NULL; -+ } -+ -+ extern_resources[*count].var = NULL; -+ extern_resources[*count].buffer = buffer; -+ -+ extern_resources[*count].name = name; -+ extern_resources[*count].is_user_packed = !!buffer->reservation.reg_type; -+ -+ extern_resources[*count].component_type = NULL; -+ -+ extern_resources[*count].regset = HLSL_REGSET_NUMERIC; -+ extern_resources[*count].id = buffer->reg.id; -+ extern_resources[*count].space = buffer->reg.space; -+ extern_resources[*count].index = buffer->reg.index; -+ extern_resources[*count].bind_count = 1; -+ extern_resources[*count].loc = buffer->loc; -+ -+ ++*count; -+ } -+ -+ qsort(extern_resources, *count, sizeof(*extern_resources), sm4_compare_extern_resources); -+ -+ return extern_resources; -+} -+ - static void generate_vsir_scan_required_features(struct hlsl_ctx *ctx, struct vsir_program *program) - { - struct extern_resource *extern_resources; -diff --git a/libs/vkd3d/libs/vkd3d-shader/ir.c b/libs/vkd3d/libs/vkd3d-shader/ir.c -index ec7e2d036c8..c2e4b5a4947 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/ir.c -+++ b/libs/vkd3d/libs/vkd3d-shader/ir.c -@@ -8141,6 +8141,16 @@ static void vsir_validate_dst_param(struct validation_context *ctx, - } - } - -+static void vsir_validate_io_src_param(struct validation_context *ctx, -+ const struct vkd3d_shader_src_param *src) -+{ -+ struct vsir_io_register_data io_reg_data; -+ -+ if (!vsir_get_io_register_data(ctx, src->reg.type, &io_reg_data) || !(io_reg_data.flags & INPUT_BIT)) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, -+ "Invalid register type %#x used as source parameter.", src->reg.type); -+} -+ - static void vsir_validate_src_param(struct validation_context *ctx, - const struct vkd3d_shader_src_param *src) - { -@@ -8176,18 +8186,24 @@ static void vsir_validate_src_param(struct validation_context *ctx, - "Invalid NULL register used as source parameter."); - break; - -+ case VKD3DSPR_INPUT: -+ vsir_validate_io_src_param(ctx, src); -+ break; -+ - case VKD3DSPR_OUTPUT: -- if (ctx->program->shader_version.type != VKD3D_SHADER_TYPE_HULL -- || (ctx->phase != VKD3DSIH_HS_FORK_PHASE && ctx->phase != VKD3DSIH_HS_JOIN_PHASE)) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, -- "Invalid OUTPUT register used as source parameter."); -+ vsir_validate_io_src_param(ctx, src); -+ break; -+ -+ case VKD3DSPR_INCONTROLPOINT: -+ vsir_validate_io_src_param(ctx, src); -+ break; -+ -+ case VKD3DSPR_OUTCONTROLPOINT: -+ vsir_validate_io_src_param(ctx, src); - break; - - case VKD3DSPR_PATCHCONST: -- if (ctx->program->shader_version.type != VKD3D_SHADER_TYPE_DOMAIN -- && ctx->program->shader_version.type != VKD3D_SHADER_TYPE_HULL) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, -- "PATCHCONST register used as source parameters are only allowed in Hull and Domain Shaders."); -+ vsir_validate_io_src_param(ctx, src); - break; - - default: -@@ -8293,6 +8309,11 @@ static void vsir_validate_signature_element(struct validation_context *ctx, - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, - "element %u of %s signature: Invalid zero register count.", idx, signature_type_name); - -+ if (ctx->program->normalisation_level < VSIR_NORMALISED_SM6 && element->register_count != 1) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, -+ "element %u of %s signature: Invalid register count %u.", idx, signature_type_name, -+ element->register_count); -+ - if (element->register_index != UINT_MAX && (element->register_index >= MAX_REG_OUTPUT - || MAX_REG_OUTPUT - element->register_index < element->register_count)) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, -diff --git a/libs/vkd3d/libs/vkd3d-shader/tpf.c b/libs/vkd3d/libs/vkd3d-shader/tpf.c -index 7f115057622..bdc1c738a32 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/tpf.c -+++ b/libs/vkd3d/libs/vkd3d-shader/tpf.c -@@ -3483,205 +3483,6 @@ static D3D_SRV_DIMENSION sm4_rdef_resource_dimension(const struct hlsl_type *typ - } - } - --static int sm4_compare_extern_resources(const void *a, const void *b) --{ -- const struct extern_resource *aa = (const struct extern_resource *)a; -- const struct extern_resource *bb = (const struct extern_resource *)b; -- int r; -- -- if ((r = vkd3d_u32_compare(aa->regset, bb->regset))) -- return r; -- -- if ((r = vkd3d_u32_compare(aa->space, bb->space))) -- return r; -- -- return vkd3d_u32_compare(aa->index, bb->index); --} -- --void sm4_free_extern_resources(struct extern_resource *extern_resources, unsigned int count) --{ -- unsigned int i; -- -- for (i = 0; i < count; ++i) -- vkd3d_free(extern_resources[i].name); -- vkd3d_free(extern_resources); --} -- --static const char *string_skip_tag(const char *string) --{ -- if (!strncmp(string, "", strlen(""))) -- return string + strlen(""); -- return string; --} -- --struct extern_resource *sm4_get_extern_resources(struct hlsl_ctx *ctx, unsigned int *count) --{ -- bool separate_components = ctx->profile->major_version == 5 && ctx->profile->minor_version == 0; -- struct extern_resource *extern_resources = NULL; -- const struct hlsl_ir_var *var; -- struct hlsl_buffer *buffer; -- enum hlsl_regset regset; -- size_t capacity = 0; -- char *name; -- -- *count = 0; -- -- LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) -- { -- if (separate_components) -- { -- unsigned int component_count = hlsl_type_component_count(var->data_type); -- unsigned int k, regset_offset; -- -- for (k = 0; k < component_count; ++k) -- { -- struct hlsl_type *component_type = hlsl_type_get_component_type(ctx, var->data_type, k); -- struct vkd3d_string_buffer *name_buffer; -- -- if (!hlsl_type_is_resource(component_type)) -- continue; -- -- regset_offset = hlsl_type_get_component_offset(ctx, var->data_type, k, ®set); -- -- if (regset_offset > var->regs[regset].allocation_size) -- continue; -- -- if (var->objects_usage[regset][regset_offset].used) -- { -- if (!(hlsl_array_reserve(ctx, (void **)&extern_resources, &capacity, *count + 1, -- sizeof(*extern_resources)))) -- { -- sm4_free_extern_resources(extern_resources, *count); -- *count = 0; -- return NULL; -- } -- -- if (!(name_buffer = hlsl_component_to_string(ctx, var, k))) -- { -- sm4_free_extern_resources(extern_resources, *count); -- *count = 0; -- return NULL; -- } -- if (!(name = hlsl_strdup(ctx, string_skip_tag(name_buffer->buffer)))) -- { -- sm4_free_extern_resources(extern_resources, *count); -- *count = 0; -- hlsl_release_string_buffer(ctx, name_buffer); -- return NULL; -- } -- hlsl_release_string_buffer(ctx, name_buffer); -- -- extern_resources[*count].var = NULL; -- extern_resources[*count].buffer = NULL; -- -- extern_resources[*count].name = name; -- extern_resources[*count].is_user_packed = !!var->reg_reservation.reg_type; -- -- extern_resources[*count].component_type = component_type; -- -- extern_resources[*count].regset = regset; -- extern_resources[*count].id = var->regs[regset].id; -- extern_resources[*count].space = var->regs[regset].space; -- extern_resources[*count].index = var->regs[regset].index + regset_offset; -- extern_resources[*count].bind_count = 1; -- extern_resources[*count].loc = var->loc; -- -- ++*count; -- } -- } -- } -- else -- { -- unsigned int r; -- -- if (!hlsl_type_is_resource(var->data_type)) -- continue; -- -- for (r = 0; r <= HLSL_REGSET_LAST; ++r) -- { -- if (!var->regs[r].allocated) -- continue; -- -- if (!(hlsl_array_reserve(ctx, (void **)&extern_resources, &capacity, *count + 1, -- sizeof(*extern_resources)))) -- { -- sm4_free_extern_resources(extern_resources, *count); -- *count = 0; -- return NULL; -- } -- -- if (!(name = hlsl_strdup(ctx, string_skip_tag(var->name)))) -- { -- sm4_free_extern_resources(extern_resources, *count); -- *count = 0; -- return NULL; -- } -- -- extern_resources[*count].var = var; -- extern_resources[*count].buffer = NULL; -- -- extern_resources[*count].name = name; -- /* For some reason 5.1 resources aren't marked as -- * user-packed, but cbuffers still are. */ -- extern_resources[*count].is_user_packed = hlsl_version_lt(ctx, 5, 1) -- && !!var->reg_reservation.reg_type; -- -- extern_resources[*count].component_type = hlsl_type_get_component_type(ctx, var->data_type, 0); -- -- extern_resources[*count].regset = r; -- extern_resources[*count].id = var->regs[r].id; -- extern_resources[*count].space = var->regs[r].space; -- extern_resources[*count].index = var->regs[r].index; -- extern_resources[*count].bind_count = var->bind_count[r]; -- extern_resources[*count].loc = var->loc; -- -- ++*count; -- } -- } -- } -- -- LIST_FOR_EACH_ENTRY(buffer, &ctx->buffers, struct hlsl_buffer, entry) -- { -- if (!buffer->reg.allocated) -- continue; -- -- if (!(hlsl_array_reserve(ctx, (void **)&extern_resources, &capacity, *count + 1, -- sizeof(*extern_resources)))) -- { -- sm4_free_extern_resources(extern_resources, *count); -- *count = 0; -- return NULL; -- } -- -- if (!(name = hlsl_strdup(ctx, buffer->name))) -- { -- sm4_free_extern_resources(extern_resources, *count); -- *count = 0; -- return NULL; -- } -- -- extern_resources[*count].var = NULL; -- extern_resources[*count].buffer = buffer; -- -- extern_resources[*count].name = name; -- extern_resources[*count].is_user_packed = !!buffer->reservation.reg_type; -- -- extern_resources[*count].component_type = NULL; -- -- extern_resources[*count].regset = HLSL_REGSET_NUMERIC; -- extern_resources[*count].id = buffer->reg.id; -- extern_resources[*count].space = buffer->reg.space; -- extern_resources[*count].index = buffer->reg.index; -- extern_resources[*count].bind_count = 1; -- extern_resources[*count].loc = buffer->loc; -- -- ++*count; -- } -- -- qsort(extern_resources, *count, sizeof(*extern_resources), sm4_compare_extern_resources); -- return extern_resources; --} -- - /* For some reason, for matrices, values from default value initializers end up in different - * components than from regular initializers. Default value initializers fill the matrix in - * vertical reading order (left-to-right top-to-bottom) instead of regular reading order --- -2.45.2 - diff --git a/patches/vkd3d-latest/0004-Updated-vkd3d-to-4227858cfee7d4c1a0bf0ff9f59e45fca61.patch b/patches/vkd3d-latest/0004-Updated-vkd3d-to-4227858cfee7d4c1a0bf0ff9f59e45fca61.patch deleted file mode 100644 index 93f85b19..00000000 --- a/patches/vkd3d-latest/0004-Updated-vkd3d-to-4227858cfee7d4c1a0bf0ff9f59e45fca61.patch +++ /dev/null @@ -1,3662 +0,0 @@ -From 17a3d863d0d7782f3e675186fa42fbb2e3b0dab1 Mon Sep 17 00:00:00 2001 -From: Alistair Leslie-Hughes -Date: Sat, 11 Jan 2025 11:27:47 +1100 -Subject: [PATCH] Updated vkd3d to 4227858cfee7d4c1a0bf0ff9f59e45fca619e79d. - ---- - libs/vkd3d/libs/vkd3d-shader/fx.c | 12 +- - libs/vkd3d/libs/vkd3d-shader/hlsl.c | 108 +-- - libs/vkd3d/libs/vkd3d-shader/hlsl.h | 18 +- - libs/vkd3d/libs/vkd3d-shader/hlsl.y | 181 +++-- - libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 767 +++++++++++++++--- - .../libs/vkd3d-shader/hlsl_constant_ops.c | 222 +++-- - libs/vkd3d/libs/vkd3d-shader/ir.c | 9 +- - libs/vkd3d/libs/vkd3d-shader/spirv.c | 5 +- - libs/vkd3d/libs/vkd3d-shader/tpf.c | 504 ------------ - .../libs/vkd3d-shader/vkd3d_shader_main.c | 6 +- - libs/vkd3d/libs/vkd3d/state.c | 5 +- - 11 files changed, 962 insertions(+), 875 deletions(-) - -diff --git a/libs/vkd3d/libs/vkd3d-shader/fx.c b/libs/vkd3d/libs/vkd3d-shader/fx.c -index 3795add87c7..779ffa1e156 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/fx.c -+++ b/libs/vkd3d/libs/vkd3d-shader/fx.c -@@ -610,8 +610,8 @@ static uint32_t get_fx_4_numeric_type_description(const struct hlsl_type *type, - return 0; - } - -- value |= (type->dimy & 0x7) << FX_4_NUMERIC_ROWS_SHIFT; -- value |= (type->dimx & 0x7) << FX_4_NUMERIC_COLUMNS_SHIFT; -+ value |= (type->e.numeric.dimy & 0x7) << FX_4_NUMERIC_ROWS_SHIFT; -+ value |= (type->e.numeric.dimx & 0x7) << FX_4_NUMERIC_COLUMNS_SHIFT; - if (type->modifiers & HLSL_MODIFIER_COLUMN_MAJOR) - value |= FX_4_NUMERIC_COLUMN_MAJOR_MASK; - -@@ -1047,13 +1047,13 @@ static uint32_t write_fx_2_parameter(const struct hlsl_type *type, const char *n - switch (type->class) - { - case HLSL_CLASS_VECTOR: -- put_u32(buffer, type->dimx); -- put_u32(buffer, type->dimy); -+ put_u32(buffer, type->e.numeric.dimx); -+ put_u32(buffer, type->e.numeric.dimy); - break; - case HLSL_CLASS_SCALAR: - case HLSL_CLASS_MATRIX: -- put_u32(buffer, type->dimy); -- put_u32(buffer, type->dimx); -+ put_u32(buffer, type->e.numeric.dimy); -+ put_u32(buffer, type->e.numeric.dimx); - break; - case HLSL_CLASS_STRUCT: - put_u32(buffer, type->e.record.field_count); -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.c b/libs/vkd3d/libs/vkd3d-shader/hlsl.c -index 84da2fcbc9f..858186a1071 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.c -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.c -@@ -192,18 +192,20 @@ bool hlsl_type_is_row_major(const struct hlsl_type *type) - - unsigned int hlsl_type_minor_size(const struct hlsl_type *type) - { -+ VKD3D_ASSERT(hlsl_is_numeric_type(type)); - if (type->class != HLSL_CLASS_MATRIX || hlsl_type_is_row_major(type)) -- return type->dimx; -+ return type->e.numeric.dimx; - else -- return type->dimy; -+ return type->e.numeric.dimy; - } - - unsigned int hlsl_type_major_size(const struct hlsl_type *type) - { -+ VKD3D_ASSERT(hlsl_is_numeric_type(type)); - if (type->class != HLSL_CLASS_MATRIX || hlsl_type_is_row_major(type)) -- return type->dimy; -+ return type->e.numeric.dimy; - else -- return type->dimx; -+ return type->e.numeric.dimx; - } - - unsigned int hlsl_type_element_count(const struct hlsl_type *type) -@@ -211,7 +213,7 @@ unsigned int hlsl_type_element_count(const struct hlsl_type *type) - switch (type->class) - { - case HLSL_CLASS_VECTOR: -- return type->dimx; -+ return type->e.numeric.dimx; - case HLSL_CLASS_MATRIX: - return hlsl_type_major_size(type); - case HLSL_CLASS_ARRAY: -@@ -355,14 +357,24 @@ static void hlsl_type_calculate_reg_size(struct hlsl_ctx *ctx, struct hlsl_type - { - case HLSL_CLASS_SCALAR: - case HLSL_CLASS_VECTOR: -- type->reg_size[HLSL_REGSET_NUMERIC] = is_sm4 ? type->dimx : 4; -+ type->reg_size[HLSL_REGSET_NUMERIC] = is_sm4 ? type->e.numeric.dimx : 4; - break; - - case HLSL_CLASS_MATRIX: - if (hlsl_type_is_row_major(type)) -- type->reg_size[HLSL_REGSET_NUMERIC] = is_sm4 ? (4 * (type->dimy - 1) + type->dimx) : (4 * type->dimy); -+ { -+ if (is_sm4) -+ type->reg_size[HLSL_REGSET_NUMERIC] = 4 * (type->e.numeric.dimy - 1) + type->e.numeric.dimx; -+ else -+ type->reg_size[HLSL_REGSET_NUMERIC] = 4 * type->e.numeric.dimy; -+ } - else -- type->reg_size[HLSL_REGSET_NUMERIC] = is_sm4 ? (4 * (type->dimx - 1) + type->dimy) : (4 * type->dimx); -+ { -+ if (is_sm4) -+ type->reg_size[HLSL_REGSET_NUMERIC] = 4 * (type->e.numeric.dimx - 1) + type->e.numeric.dimy; -+ else -+ type->reg_size[HLSL_REGSET_NUMERIC] = 4 * type->e.numeric.dimx; -+ } - break; - - case HLSL_CLASS_ARRAY: -@@ -387,7 +399,6 @@ static void hlsl_type_calculate_reg_size(struct hlsl_ctx *ctx, struct hlsl_type - { - unsigned int i; - -- type->dimx = 0; - for (i = 0; i < type->e.record.field_count; ++i) - { - struct hlsl_struct_field *field = &type->e.record.fields[i]; -@@ -399,8 +410,6 @@ static void hlsl_type_calculate_reg_size(struct hlsl_ctx *ctx, struct hlsl_type - field->reg_offset[k] = type->reg_size[k]; - type->reg_size[k] += field->type->reg_size[k]; - } -- -- type->dimx += field->type->dimx * field->type->dimy * hlsl_get_multiarray_size(field->type); - } - break; - } -@@ -483,8 +492,8 @@ static struct hlsl_type *hlsl_new_type(struct hlsl_ctx *ctx, const char *name, e - } - type->class = type_class; - type->e.numeric.type = base_type; -- type->dimx = dimx; -- type->dimy = dimy; -+ type->e.numeric.dimx = dimx; -+ type->e.numeric.dimy = dimy; - hlsl_type_calculate_reg_size(ctx, type); - - list_add_tail(&ctx->types, &type->entry); -@@ -552,18 +561,19 @@ static unsigned int traverse_path_from_component_index(struct hlsl_ctx *ctx, - switch (type->class) - { - case HLSL_CLASS_VECTOR: -- VKD3D_ASSERT(index < type->dimx); -+ VKD3D_ASSERT(index < type->e.numeric.dimx); - *type_ptr = hlsl_get_scalar_type(ctx, type->e.numeric.type); - *index_ptr = 0; - return index; - - case HLSL_CLASS_MATRIX: - { -- unsigned int y = index / type->dimx, x = index % type->dimx; -+ unsigned int y = index / type->e.numeric.dimx, x = index % type->e.numeric.dimx; - bool row_major = hlsl_type_is_row_major(type); - -- VKD3D_ASSERT(index < type->dimx * type->dimy); -- *type_ptr = hlsl_get_vector_type(ctx, type->e.numeric.type, row_major ? type->dimx : type->dimy); -+ VKD3D_ASSERT(index < type->e.numeric.dimx * type->e.numeric.dimy); -+ *type_ptr = hlsl_get_vector_type(ctx, type->e.numeric.type, -+ row_major ? type->e.numeric.dimx : type->e.numeric.dimy); - *index_ptr = row_major ? x : y; - return row_major ? y : x; - } -@@ -861,9 +871,9 @@ struct hlsl_type *hlsl_get_element_type_from_path_index(struct hlsl_ctx *ctx, co - - case HLSL_CLASS_MATRIX: - if (hlsl_type_is_row_major(type)) -- return hlsl_get_vector_type(ctx, type->e.numeric.type, type->dimx); -+ return hlsl_get_vector_type(ctx, type->e.numeric.type, type->e.numeric.dimx); - else -- return hlsl_get_vector_type(ctx, type->e.numeric.type, type->dimy); -+ return hlsl_get_vector_type(ctx, type->e.numeric.type, type->e.numeric.dimy); - - case HLSL_CLASS_ARRAY: - return type->e.array.type; -@@ -892,8 +902,6 @@ struct hlsl_type *hlsl_new_array_type(struct hlsl_ctx *ctx, struct hlsl_type *ba - type->modifiers = basic_type->modifiers; - type->e.array.elements_count = array_size; - type->e.array.type = basic_type; -- type->dimx = basic_type->dimx; -- type->dimy = basic_type->dimy; - type->sampler_dim = basic_type->sampler_dim; - hlsl_type_calculate_reg_size(ctx, type); - -@@ -927,7 +935,6 @@ struct hlsl_type *hlsl_new_struct_type(struct hlsl_ctx *ctx, const char *name, - return NULL; - type->class = HLSL_CLASS_STRUCT; - type->name = name; -- type->dimy = 1; - type->e.record.fields = fields; - type->e.record.field_count = field_count; - hlsl_type_calculate_reg_size(ctx, type); -@@ -945,8 +952,6 @@ struct hlsl_type *hlsl_new_texture_type(struct hlsl_ctx *ctx, enum hlsl_sampler_ - if (!(type = hlsl_alloc(ctx, sizeof(*type)))) - return NULL; - type->class = HLSL_CLASS_TEXTURE; -- type->dimx = 4; -- type->dimy = 1; - type->sampler_dim = dim; - type->e.resource.format = format; - type->sample_count = sample_count; -@@ -963,8 +968,6 @@ struct hlsl_type *hlsl_new_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim - if (!(type = hlsl_alloc(ctx, sizeof(*type)))) - return NULL; - type->class = HLSL_CLASS_UAV; -- type->dimx = format->dimx; -- type->dimy = 1; - type->sampler_dim = dim; - type->e.resource.format = format; - type->e.resource.rasteriser_ordered = rasteriser_ordered; -@@ -980,7 +983,6 @@ struct hlsl_type *hlsl_new_cb_type(struct hlsl_ctx *ctx, struct hlsl_type *forma - if (!(type = hlsl_alloc(ctx, sizeof(*type)))) - return NULL; - type->class = HLSL_CLASS_CONSTANT_BUFFER; -- type->dimy = 1; - type->e.resource.format = format; - hlsl_type_calculate_reg_size(ctx, type); - list_add_tail(&ctx->types, &type->entry); -@@ -1066,7 +1068,7 @@ unsigned int hlsl_type_component_count(const struct hlsl_type *type) - case HLSL_CLASS_SCALAR: - case HLSL_CLASS_VECTOR: - case HLSL_CLASS_MATRIX: -- return type->dimx * type->dimy; -+ return type->e.numeric.dimx * type->e.numeric.dimy; - - case HLSL_CLASS_STRUCT: - { -@@ -1131,9 +1133,9 @@ bool hlsl_types_are_equal(const struct hlsl_type *t1, const struct hlsl_type *t2 - if ((t1->modifiers & HLSL_MODIFIER_ROW_MAJOR) - != (t2->modifiers & HLSL_MODIFIER_ROW_MAJOR)) - return false; -- if (t1->dimx != t2->dimx) -+ if (t1->e.numeric.dimx != t2->e.numeric.dimx) - return false; -- if (t1->dimy != t2->dimy) -+ if (t1->e.numeric.dimy != t2->e.numeric.dimy) - return false; - return true; - -@@ -1224,8 +1226,6 @@ struct hlsl_type *hlsl_type_clone(struct hlsl_ctx *ctx, struct hlsl_type *old, - } - } - type->class = old->class; -- type->dimx = old->dimx; -- type->dimy = old->dimy; - type->modifiers = old->modifiers | modifiers; - if (!(type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK)) - type->modifiers |= default_majority; -@@ -1238,6 +1238,8 @@ struct hlsl_type *hlsl_type_clone(struct hlsl_ctx *ctx, struct hlsl_type *old, - case HLSL_CLASS_SCALAR: - case HLSL_CLASS_VECTOR: - case HLSL_CLASS_MATRIX: -+ type->e.numeric.dimx = old->e.numeric.dimx; -+ type->e.numeric.dimy = old->e.numeric.dimy; - type->e.numeric.type = old->e.numeric.type; - break; - -@@ -1497,7 +1499,7 @@ struct hlsl_ir_node *hlsl_new_store_index(struct hlsl_ctx *ctx, const struct hls - hlsl_src_from_node(&store->rhs, rhs); - - if (!writemask && type_is_single_reg(rhs->data_type)) -- writemask = (1 << rhs->data_type->dimx) - 1; -+ writemask = (1 << rhs->data_type->e.numeric.dimx) - 1; - store->writemask = writemask; - - return &store->node; -@@ -1524,7 +1526,7 @@ bool hlsl_new_store_component(struct hlsl_ctx *ctx, struct hlsl_block *block, - hlsl_src_from_node(&store->rhs, rhs); - - if (type_is_single_reg(rhs->data_type)) -- store->writemask = (1 << rhs->data_type->dimx) - 1; -+ store->writemask = (1 << rhs->data_type->e.numeric.dimx) - 1; - - hlsl_block_add_instr(block, &store->node); - -@@ -2064,7 +2066,7 @@ struct hlsl_ir_node *hlsl_new_index(struct hlsl_ctx *ctx, struct hlsl_ir_node *v - if (type->class == HLSL_CLASS_TEXTURE || type->class == HLSL_CLASS_UAV) - type = type->e.resource.format; - else if (type->class == HLSL_CLASS_MATRIX) -- type = hlsl_get_vector_type(ctx, type->e.numeric.type, type->dimx); -+ type = hlsl_get_vector_type(ctx, type->e.numeric.type, type->e.numeric.dimx); - else - type = hlsl_get_element_type_from_path_index(ctx, type, idx); - -@@ -2355,10 +2357,10 @@ static struct hlsl_ir_node *clone_swizzle(struct hlsl_ctx *ctx, - struct clone_instr_map *map, struct hlsl_ir_swizzle *src) - { - if (src->val.node->data_type->class == HLSL_CLASS_MATRIX) -- return hlsl_new_matrix_swizzle(ctx, src->u.matrix, src->node.data_type->dimx, -+ return hlsl_new_matrix_swizzle(ctx, src->u.matrix, src->node.data_type->e.numeric.dimx, - map_instr(map, src->val.node), &src->node.loc); - else -- return hlsl_new_swizzle(ctx, src->u.vector, src->node.data_type->dimx, -+ return hlsl_new_swizzle(ctx, src->u.vector, src->node.data_type->e.numeric.dimx, - map_instr(map, src->val.node), &src->node.loc); - } - -@@ -2778,12 +2780,13 @@ static void hlsl_dump_type(struct vkd3d_string_buffer *buffer, const struct hlsl - - case HLSL_CLASS_VECTOR: - VKD3D_ASSERT(type->e.numeric.type < ARRAY_SIZE(base_types)); -- vkd3d_string_buffer_printf(buffer, "%s%u", base_types[type->e.numeric.type], type->dimx); -+ vkd3d_string_buffer_printf(buffer, "%s%u", base_types[type->e.numeric.type], type->e.numeric.dimx); - return; - - case HLSL_CLASS_MATRIX: - VKD3D_ASSERT(type->e.numeric.type < ARRAY_SIZE(base_types)); -- vkd3d_string_buffer_printf(buffer, "%s%ux%u", base_types[type->e.numeric.type], type->dimy, type->dimx); -+ vkd3d_string_buffer_printf(buffer, "%s%ux%u", base_types[type->e.numeric.type], -+ type->e.numeric.dimy, type->e.numeric.dimx); - return; - - case HLSL_CLASS_ARRAY: -@@ -3176,9 +3179,9 @@ static void dump_ir_constant(struct vkd3d_string_buffer *buffer, const struct hl - struct hlsl_type *type = constant->node.data_type; - unsigned int x; - -- if (type->dimx != 1) -+ if (type->e.numeric.dimx != 1) - vkd3d_string_buffer_printf(buffer, "{"); -- for (x = 0; x < type->dimx; ++x) -+ for (x = 0; x < type->e.numeric.dimx; ++x) - { - const union hlsl_constant_value_component *value = &constant->value.u[x]; - -@@ -3204,12 +3207,9 @@ static void dump_ir_constant(struct vkd3d_string_buffer *buffer, const struct hl - case HLSL_TYPE_UINT: - vkd3d_string_buffer_printf(buffer, "%u ", value->u); - break; -- -- default: -- vkd3d_unreachable(); - } - } -- if (type->dimx != 1) -+ if (type->e.numeric.dimx != 1) - vkd3d_string_buffer_printf(buffer, "}"); - } - -@@ -3435,16 +3435,17 @@ static void dump_ir_swizzle(struct vkd3d_string_buffer *buffer, const struct hls - unsigned int i; - - dump_src(buffer, &swizzle->val); -- if (swizzle->val.node->data_type->dimy > 1) -+ if (swizzle->val.node->data_type->e.numeric.dimy > 1) - { - vkd3d_string_buffer_printf(buffer, "."); -- for (i = 0; i < swizzle->node.data_type->dimx; ++i) -+ for (i = 0; i < swizzle->node.data_type->e.numeric.dimx; ++i) - vkd3d_string_buffer_printf(buffer, "_m%u%u", - swizzle->u.matrix.components[i].y, swizzle->u.matrix.components[i].x); - } - else - { -- vkd3d_string_buffer_printf(buffer, "%s", debug_hlsl_swizzle(swizzle->u.vector, swizzle->node.data_type->dimx)); -+ vkd3d_string_buffer_printf(buffer, "%s", -+ debug_hlsl_swizzle(swizzle->u.vector, swizzle->node.data_type->e.numeric.dimx)); - } - } - -@@ -3658,10 +3659,15 @@ void hlsl_dump_var_default_values(const struct hlsl_ir_var *var) - - void hlsl_replace_node(struct hlsl_ir_node *old, struct hlsl_ir_node *new) - { -+ const struct hlsl_type *old_type = old->data_type, *new_type = new->data_type; - struct hlsl_src *src, *next; - -- VKD3D_ASSERT(old->data_type == new->data_type || old->data_type->dimx == new->data_type->dimx); -- VKD3D_ASSERT(old->data_type == new->data_type || old->data_type->dimy == new->data_type->dimy); -+ if (hlsl_is_numeric_type(old_type)) -+ { -+ VKD3D_ASSERT(hlsl_is_numeric_type(new_type)); -+ VKD3D_ASSERT(old_type->e.numeric.dimx == new_type->e.numeric.dimx); -+ VKD3D_ASSERT(old_type->e.numeric.dimy == new_type->e.numeric.dimy); -+ } - - LIST_FOR_EACH_ENTRY_SAFE(src, next, &old->uses, struct hlsl_src, entry) - { -@@ -4331,7 +4337,7 @@ static void declare_predefined_types(struct hlsl_ctx *ctx) - } - - ctx->builtin_types.Void = hlsl_new_simple_type(ctx, "void", HLSL_CLASS_VOID); -- ctx->builtin_types.null = hlsl_new_type(ctx, "NULL", HLSL_CLASS_NULL, HLSL_TYPE_UINT, 1, 1); -+ ctx->builtin_types.null = hlsl_new_simple_type(ctx, "NULL", HLSL_CLASS_NULL); - ctx->builtin_types.string = hlsl_new_simple_type(ctx, "string", HLSL_CLASS_STRING); - ctx->builtin_types.error = hlsl_new_simple_type(ctx, "", HLSL_CLASS_ERROR); - hlsl_scope_add_type(ctx->globals, ctx->builtin_types.string); -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h -index b0e2b54c348..d712a325322 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.h -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h -@@ -169,16 +169,6 @@ struct hlsl_type - * Modifiers that don't fall inside this mask are to be stored in the variable in - * hlsl_ir_var.modifiers, or in the struct field in hlsl_ir_field.modifiers. */ - uint32_t modifiers; -- /* Size of the type values on each dimension. For non-numeric types, they are set for the -- * convenience of the sm1/sm4 backends. -- * If type is HLSL_CLASS_SCALAR, then both dimx = 1 and dimy = 1. -- * If type is HLSL_CLASS_VECTOR, then dimx is the size of the vector, and dimy = 1. -- * If type is HLSL_CLASS_MATRIX, then dimx is the number of columns, and dimy the number of rows. -- * If type is HLSL_CLASS_ARRAY, then dimx and dimy have the same value as in the type of the array elements. -- * If type is HLSL_CLASS_STRUCT, then dimx is the sum of (dimx * dimy) of every component, and dimy = 1. -- */ -- unsigned int dimx; -- unsigned int dimy; - /* Sample count for HLSL_SAMPLER_DIM_2DMS or HLSL_SAMPLER_DIM_2DMSARRAY. */ - unsigned int sample_count; - -@@ -188,6 +178,10 @@ struct hlsl_type - struct - { - enum hlsl_base_type type; -+ /* For scalars, dimx == dimy == 1. -+ * For vectors, dimx == vector width; dimy == 1. -+ * For matrices, dimx == column count; dimy == row count. */ -+ unsigned int dimx, dimy; - } numeric; - /* Additional information if type is HLSL_CLASS_STRUCT. */ - struct -@@ -1684,10 +1678,6 @@ struct extern_resource - struct vkd3d_shader_location loc; - }; - --struct extern_resource *sm4_get_extern_resources(struct hlsl_ctx *ctx, unsigned int *count); --void sm4_free_extern_resources(struct extern_resource *extern_resources, unsigned int count); --void sm4_generate_rdef(struct hlsl_ctx *ctx, struct vkd3d_shader_code *rdef); -- - struct hlsl_ir_function_decl *hlsl_compile_internal_function(struct hlsl_ctx *ctx, const char *name, const char *hlsl); - - int hlsl_lexer_compile(struct hlsl_ctx *ctx, const struct vkd3d_shader_code *hlsl); -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.y b/libs/vkd3d/libs/vkd3d-shader/hlsl.y -index dbed11cd8b3..e6eaac78994 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.y -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.y -@@ -247,18 +247,19 @@ static bool type_contains_only_numerics(const struct hlsl_type *type) - - static bool explicit_compatible_data_types(struct hlsl_ctx *ctx, struct hlsl_type *src, struct hlsl_type *dst) - { -- if (hlsl_is_numeric_type(src) && src->dimx == 1 && src->dimy == 1 && type_contains_only_numerics(dst)) -+ if (hlsl_is_numeric_type(src) && src->e.numeric.dimx == 1 && src->e.numeric.dimy == 1 -+ && type_contains_only_numerics(dst)) - return true; - - if (src->class == HLSL_CLASS_MATRIX && dst->class == HLSL_CLASS_MATRIX -- && src->dimx >= dst->dimx && src->dimy >= dst->dimy) -+ && src->e.numeric.dimx >= dst->e.numeric.dimx && src->e.numeric.dimy >= dst->e.numeric.dimy) - return true; - -- if ((src->class == HLSL_CLASS_MATRIX && src->dimx > 1 && src->dimy > 1) -+ if ((src->class == HLSL_CLASS_MATRIX && src->e.numeric.dimx > 1 && src->e.numeric.dimy > 1) - && hlsl_type_component_count(src) != hlsl_type_component_count(dst)) - return false; - -- if ((dst->class == HLSL_CLASS_MATRIX && dst->dimy > 1) -+ if ((dst->class == HLSL_CLASS_MATRIX && dst->e.numeric.dimy > 1) - && hlsl_type_component_count(src) != hlsl_type_component_count(dst)) - return false; - -@@ -273,16 +274,16 @@ static bool implicit_compatible_data_types(struct hlsl_ctx *ctx, struct hlsl_typ - if (hlsl_is_numeric_type(src)) - { - /* Scalar vars can be converted to any other numeric data type */ -- if (src->dimx == 1 && src->dimy == 1) -+ if (src->e.numeric.dimx == 1 && src->e.numeric.dimy == 1) - return true; - /* The other way around is true too */ -- if (dst->dimx == 1 && dst->dimy == 1) -+ if (dst->e.numeric.dimx == 1 && dst->e.numeric.dimy == 1) - return true; - - if (src->class == HLSL_CLASS_MATRIX || dst->class == HLSL_CLASS_MATRIX) - { - if (src->class == HLSL_CLASS_MATRIX && dst->class == HLSL_CLASS_MATRIX) -- return src->dimx >= dst->dimx && src->dimy >= dst->dimy; -+ return src->e.numeric.dimx >= dst->e.numeric.dimx && src->e.numeric.dimy >= dst->e.numeric.dimy; - - /* Matrix-vector conversion is apparently allowed if they have - * the same components count, or if the matrix is 1xN or Nx1 -@@ -292,8 +293,8 @@ static bool implicit_compatible_data_types(struct hlsl_ctx *ctx, struct hlsl_typ - if (hlsl_type_component_count(src) == hlsl_type_component_count(dst)) - return true; - -- if ((src->class == HLSL_CLASS_VECTOR || src->dimx == 1 || src->dimy == 1) && -- (dst->class == HLSL_CLASS_VECTOR || dst->dimx == 1 || dst->dimy == 1)) -+ if ((src->class == HLSL_CLASS_VECTOR || src->e.numeric.dimx == 1 || src->e.numeric.dimy == 1) -+ && (dst->class == HLSL_CLASS_VECTOR || dst->e.numeric.dimx == 1 || dst->e.numeric.dimy == 1)) - return hlsl_type_component_count(src) >= hlsl_type_component_count(dst); - } - -@@ -301,7 +302,7 @@ static bool implicit_compatible_data_types(struct hlsl_ctx *ctx, struct hlsl_typ - } - else - { -- return src->dimx >= dst->dimx; -+ return src->e.numeric.dimx >= dst->e.numeric.dimx; - } - } - -@@ -335,7 +336,7 @@ static void check_condition_type(struct hlsl_ctx *ctx, const struct hlsl_ir_node - if (type->class == HLSL_CLASS_ERROR) - return; - -- if (type->class > HLSL_CLASS_LAST_NUMERIC || type->dimx > 1 || type->dimy > 1) -+ if (type->class > HLSL_CLASS_LAST_NUMERIC || type->e.numeric.dimx > 1 || type->e.numeric.dimy > 1) - { - struct vkd3d_string_buffer *string; - -@@ -368,14 +369,14 @@ static struct hlsl_ir_node *add_cast(struct hlsl_ctx *ctx, struct hlsl_block *bl - struct hlsl_ir_var *var; - unsigned int dst_idx; - -- broadcast = hlsl_is_numeric_type(src_type) && src_type->dimx == 1 && src_type->dimy == 1; -+ broadcast = hlsl_is_numeric_type(src_type) && src_type->e.numeric.dimx == 1 && src_type->e.numeric.dimy == 1; - matrix_cast = !broadcast && dst_comp_count != src_comp_count - && src_type->class == HLSL_CLASS_MATRIX && dst_type->class == HLSL_CLASS_MATRIX; - VKD3D_ASSERT(src_comp_count >= dst_comp_count || broadcast); - if (matrix_cast) - { -- VKD3D_ASSERT(dst_type->dimx <= src_type->dimx); -- VKD3D_ASSERT(dst_type->dimy <= src_type->dimy); -+ VKD3D_ASSERT(dst_type->e.numeric.dimx <= src_type->e.numeric.dimx); -+ VKD3D_ASSERT(dst_type->e.numeric.dimy <= src_type->e.numeric.dimy); - } - - if (!(var = hlsl_new_synthetic_var(ctx, "cast", dst_type, loc))) -@@ -395,9 +396,9 @@ static struct hlsl_ir_node *add_cast(struct hlsl_ctx *ctx, struct hlsl_block *bl - } - else if (matrix_cast) - { -- unsigned int x = dst_idx % dst_type->dimx, y = dst_idx / dst_type->dimx; -+ unsigned int x = dst_idx % dst_type->e.numeric.dimx, y = dst_idx / dst_type->e.numeric.dimx; - -- src_idx = y * src_type->dimx + x; -+ src_idx = y * src_type->e.numeric.dimx + x; - } - else - { -@@ -458,7 +459,9 @@ static struct hlsl_ir_node *add_implicit_conversion(struct hlsl_ctx *ctx, struct - return NULL; - } - -- if (dst_type->dimx * dst_type->dimy < src_type->dimx * src_type->dimy && ctx->warn_implicit_truncation) -+ if (hlsl_is_numeric_type(dst_type) && hlsl_is_numeric_type(src_type) -+ && dst_type->e.numeric.dimx * dst_type->e.numeric.dimy < src_type->e.numeric.dimx * src_type->e.numeric.dimy -+ && ctx->warn_implicit_truncation) - hlsl_warning(ctx, loc, VKD3D_SHADER_WARNING_HLSL_IMPLICIT_TRUNCATION, "Implicit truncation of %s type.", - src_type->class == HLSL_CLASS_VECTOR ? "vector" : "matrix"); - -@@ -874,7 +877,7 @@ static struct hlsl_ir_node *get_swizzle(struct hlsl_ctx *ctx, struct hlsl_ir_nod - x = swizzle[i + 2] - '1'; - } - -- if (x >= value->data_type->dimx || y >= value->data_type->dimy) -+ if (x >= value->data_type->e.numeric.dimx || y >= value->data_type->e.numeric.dimy) - return NULL; - s.components[component].x = x; - s.components[component].y = y; -@@ -907,7 +910,7 @@ static struct hlsl_ir_node *get_swizzle(struct hlsl_ctx *ctx, struct hlsl_ir_nod - break; - } - -- if (s >= value->data_type->dimx) -+ if (s >= value->data_type->e.numeric.dimx) - return NULL; - hlsl_swizzle_set_component(&swiz, component++, s); - } -@@ -1021,7 +1024,7 @@ static bool add_array_access(struct hlsl_ctx *ctx, struct hlsl_block *block, str - { - unsigned int dim_count = hlsl_sampler_dim_count(expr_type->sampler_dim); - -- if (index_type->class > HLSL_CLASS_VECTOR || index_type->dimx != dim_count) -+ if (index_type->class > HLSL_CLASS_VECTOR || index_type->e.numeric.dimx != dim_count) - { - struct vkd3d_string_buffer *string; - -@@ -1574,7 +1577,7 @@ static struct hlsl_block *make_block(struct hlsl_ctx *ctx, struct hlsl_ir_node * - static bool expr_compatible_data_types(struct hlsl_type *t1, struct hlsl_type *t2) - { - /* Scalar vars can be converted to pretty much everything */ -- if ((t1->dimx == 1 && t1->dimy == 1) || (t2->dimx == 1 && t2->dimy == 1)) -+ if ((t1->e.numeric.dimx == 1 && t1->e.numeric.dimy == 1) || (t2->e.numeric.dimx == 1 && t2->e.numeric.dimy == 1)) - return true; - - if (t1->class == HLSL_CLASS_VECTOR && t2->class == HLSL_CLASS_VECTOR) -@@ -1589,13 +1592,13 @@ static bool expr_compatible_data_types(struct hlsl_type *t1, struct hlsl_type *t - if (hlsl_type_component_count(t1) == hlsl_type_component_count(t2)) - return true; - -- return (t1->class == HLSL_CLASS_MATRIX && (t1->dimx == 1 || t1->dimy == 1)) -- || (t2->class == HLSL_CLASS_MATRIX && (t2->dimx == 1 || t2->dimy == 1)); -+ return (t1->class == HLSL_CLASS_MATRIX && (t1->e.numeric.dimx == 1 || t1->e.numeric.dimy == 1)) -+ || (t2->class == HLSL_CLASS_MATRIX && (t2->e.numeric.dimx == 1 || t2->e.numeric.dimy == 1)); - } - - /* Both matrices */ -- if ((t1->dimx >= t2->dimx && t1->dimy >= t2->dimy) -- || (t1->dimx <= t2->dimx && t1->dimy <= t2->dimy)) -+ if ((t1->e.numeric.dimx >= t2->e.numeric.dimx && t1->e.numeric.dimy >= t2->e.numeric.dimy) -+ || (t1->e.numeric.dimx <= t2->e.numeric.dimx && t1->e.numeric.dimy <= t2->e.numeric.dimy)) - return true; - } - -@@ -1655,37 +1658,37 @@ static bool expr_common_shape(struct hlsl_ctx *ctx, struct hlsl_type *t1, struct - return false; - } - -- if (t1->dimx == 1 && t1->dimy == 1) -+ if (t1->e.numeric.dimx == 1 && t1->e.numeric.dimy == 1) - { - *type = t2->class; -- *dimx = t2->dimx; -- *dimy = t2->dimy; -+ *dimx = t2->e.numeric.dimx; -+ *dimy = t2->e.numeric.dimy; - } -- else if (t2->dimx == 1 && t2->dimy == 1) -+ else if (t2->e.numeric.dimx == 1 && t2->e.numeric.dimy == 1) - { - *type = t1->class; -- *dimx = t1->dimx; -- *dimy = t1->dimy; -+ *dimx = t1->e.numeric.dimx; -+ *dimy = t1->e.numeric.dimy; - } - else if (t1->class == HLSL_CLASS_MATRIX && t2->class == HLSL_CLASS_MATRIX) - { - *type = HLSL_CLASS_MATRIX; -- *dimx = min(t1->dimx, t2->dimx); -- *dimy = min(t1->dimy, t2->dimy); -+ *dimx = min(t1->e.numeric.dimx, t2->e.numeric.dimx); -+ *dimy = min(t1->e.numeric.dimy, t2->e.numeric.dimy); - } - else - { -- if (t1->dimx * t1->dimy <= t2->dimx * t2->dimy) -+ if (t1->e.numeric.dimx * t1->e.numeric.dimy <= t2->e.numeric.dimx * t2->e.numeric.dimy) - { - *type = t1->class; -- *dimx = t1->dimx; -- *dimy = t1->dimy; -+ *dimx = t1->e.numeric.dimx; -+ *dimy = t1->e.numeric.dimy; - } - else - { - *type = t2->class; -- *dimx = t2->dimx; -- *dimy = t2->dimy; -+ *dimx = t2->e.numeric.dimx; -+ *dimy = t2->e.numeric.dimy; - } - } - -@@ -1713,7 +1716,7 @@ static struct hlsl_ir_node *add_expr(struct hlsl_ctx *ctx, struct hlsl_block *bl - return NULL; - hlsl_init_simple_deref_from_var(&var_deref, var); - -- for (i = 0; i < type->dimy * type->dimx; ++i) -+ for (i = 0; i < type->e.numeric.dimy * type->e.numeric.dimx; ++i) - { - struct hlsl_ir_node *value, *cell_operands[HLSL_MAX_OPERANDS] = { NULL }; - struct hlsl_block store_block; -@@ -1816,7 +1819,7 @@ static struct hlsl_ir_node *add_unary_logical_expr(struct hlsl_ctx *ctx, struct - return arg; - - bool_type = hlsl_get_numeric_type(ctx, arg->data_type->class, HLSL_TYPE_BOOL, -- arg->data_type->dimx, arg->data_type->dimy); -+ arg->data_type->e.numeric.dimx, arg->data_type->e.numeric.dimy); - - if (!(args[0] = add_implicit_conversion(ctx, block, arg, bool_type, loc))) - return NULL; -@@ -1979,11 +1982,11 @@ static struct hlsl_ir_node *add_binary_dot_expr(struct hlsl_ctx *ctx, struct hls - } - - if (arg1->data_type->class == HLSL_CLASS_SCALAR) -- dim = arg2->data_type->dimx; -+ dim = arg2->data_type->e.numeric.dimx; - else if (arg2->data_type->class == HLSL_CLASS_SCALAR) -- dim = arg1->data_type->dimx; -+ dim = arg1->data_type->e.numeric.dimx; - else -- dim = min(arg1->data_type->dimx, arg2->data_type->dimx); -+ dim = min(arg1->data_type->e.numeric.dimx, arg2->data_type->e.numeric.dimx); - - if (dim == 1) - op = HLSL_OP2_MUL; -@@ -2187,8 +2190,8 @@ static bool add_assignment(struct hlsl_ctx *ctx, struct hlsl_block *block, struc - - if (hlsl_is_numeric_type(lhs_type)) - { -- writemask = (1 << lhs_type->dimx) - 1; -- width = lhs_type->dimx; -+ writemask = (1 << lhs_type->e.numeric.dimx) - 1; -+ width = lhs_type->e.numeric.dimx; - } - - if (!(rhs = add_implicit_conversion(ctx, block, rhs, lhs_type, &rhs->loc))) -@@ -2275,13 +2278,13 @@ static bool add_assignment(struct hlsl_ctx *ctx, struct hlsl_block *block, struc - - dim_count = hlsl_sampler_dim_count(resource_type->sampler_dim); - -- if (width != resource_type->e.resource.format->dimx * resource_type->e.resource.format->dimy) -+ if (width != resource_type->e.resource.format->e.numeric.dimx * resource_type->e.resource.format->e.numeric.dimy) - hlsl_error(ctx, &lhs->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_WRITEMASK, - "Resource store expressions must write to all components."); - - VKD3D_ASSERT(coords->data_type->class == HLSL_CLASS_VECTOR); - VKD3D_ASSERT(coords->data_type->e.numeric.type == HLSL_TYPE_UINT); -- VKD3D_ASSERT(coords->data_type->dimx == dim_count); -+ VKD3D_ASSERT(coords->data_type->e.numeric.dimx == dim_count); - - if (!(store = hlsl_new_resource_store(ctx, &resource_deref, coords, rhs, &lhs->loc))) - { -@@ -2298,14 +2301,14 @@ static bool add_assignment(struct hlsl_ctx *ctx, struct hlsl_block *block, struc - - hlsl_init_deref_from_index_chain(ctx, &deref, lhs); - -- for (i = 0; i < lhs->data_type->dimy; ++i) -+ for (i = 0; i < lhs->data_type->e.numeric.dimy; ++i) - { -- for (j = 0; j < lhs->data_type->dimx; ++j) -+ for (j = 0; j < lhs->data_type->e.numeric.dimx; ++j) - { - struct hlsl_ir_node *load; - struct hlsl_block store_block; - const unsigned int idx = i * 4 + j; -- const unsigned int component = i * lhs->data_type->dimx + j; -+ const unsigned int component = i * lhs->data_type->e.numeric.dimx + j; - - if (!(writemask & (1 << idx))) - continue; -@@ -2335,7 +2338,7 @@ static bool add_assignment(struct hlsl_ctx *ctx, struct hlsl_block *block, struc - - VKD3D_ASSERT(!matrix_writemask); - -- for (i = 0; i < mat->data_type->dimx; ++i) -+ for (i = 0; i < mat->data_type->e.numeric.dimx; ++i) - { - struct hlsl_ir_node *cell, *load, *store, *c; - struct hlsl_deref deref; -@@ -3333,7 +3336,7 @@ static struct hlsl_ir_node *intrinsic_float_convert_arg(struct hlsl_ctx *ctx, - if (!type_is_integer(type->e.numeric.type)) - return arg; - -- type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_FLOAT, type->dimx, type->dimy); -+ type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_FLOAT, type->e.numeric.dimx, type->e.numeric.dimy); - return add_implicit_conversion(ctx, params->instrs, arg, type, loc); - } - -@@ -3372,13 +3375,13 @@ static struct hlsl_type *elementwise_intrinsic_get_common_type(struct hlsl_ctx * - if (arg_type->class == HLSL_CLASS_VECTOR) - { - vectors = true; -- dimx = min(dimx, arg_type->dimx); -+ dimx = min(dimx, arg_type->e.numeric.dimx); - } - else if (arg_type->class == HLSL_CLASS_MATRIX) - { - matrices = true; -- dimx = min(dimx, arg_type->dimx); -- dimy = min(dimy, arg_type->dimy); -+ dimx = min(dimx, arg_type->e.numeric.dimx); -+ dimy = min(dimy, arg_type->e.numeric.dimy); - } - } - -@@ -3423,7 +3426,7 @@ static bool elementwise_intrinsic_float_convert_args(struct hlsl_ctx *ctx, - if (!(type = elementwise_intrinsic_get_common_type(ctx, params, loc))) - return false; - if (type_is_integer(type->e.numeric.type)) -- type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_FLOAT, type->dimx, type->dimy); -+ type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_FLOAT, type->e.numeric.dimx, type->e.numeric.dimy); - - return convert_args(ctx, params, type, loc); - } -@@ -3436,7 +3439,7 @@ static bool elementwise_intrinsic_uint_convert_args(struct hlsl_ctx *ctx, - if (!(type = elementwise_intrinsic_get_common_type(ctx, params, loc))) - return false; - -- type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_UINT, type->dimx, type->dimy); -+ type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_UINT, type->e.numeric.dimx, type->e.numeric.dimy); - - return convert_args(ctx, params, type, loc); - } -@@ -3503,7 +3506,7 @@ static bool intrinsic_acos(struct hlsl_ctx *ctx, - static struct hlsl_type *convert_numeric_type(const struct hlsl_ctx *ctx, - const struct hlsl_type *type, enum hlsl_base_type base_type) - { -- return hlsl_get_numeric_type(ctx, type->class, base_type, type->dimx, type->dimy); -+ return hlsl_get_numeric_type(ctx, type->class, base_type, type->e.numeric.dimx, type->e.numeric.dimy); - } - - static bool add_combine_components(struct hlsl_ctx *ctx, const struct parse_initializer *params, -@@ -4024,7 +4027,7 @@ static bool intrinsic_determinant(struct hlsl_ctx *ctx, - if (!(arg = intrinsic_float_convert_arg(ctx, params, arg, loc))) - return false; - -- dim = min(type->dimx, type->dimy); -+ dim = min(type->e.numeric.dimx, type->e.numeric.dimy); - if (dim == 1) - return hlsl_add_load_component(ctx, params->instrs, arg, 0, loc); - -@@ -4108,7 +4111,7 @@ static bool intrinsic_dst(struct hlsl_ctx *ctx, const struct parse_initializer * - return false; - type = params->args[0]->data_type; - if (!(type->class == HLSL_CLASS_SCALAR -- || (type->class == HLSL_CLASS_VECTOR && type->dimx == 4))) -+ || (type->class == HLSL_CLASS_VECTOR && type->e.numeric.dimx == 4))) - { - struct vkd3d_string_buffer *string; - if ((string = hlsl_type_to_string(ctx, type))) -@@ -4540,15 +4543,15 @@ static bool intrinsic_mul(struct hlsl_ctx *ctx, - if (arg1->data_type->class == HLSL_CLASS_VECTOR) - { - vect_count++; -- cast_type1 = hlsl_get_matrix_type(ctx, base, arg1->data_type->dimx, 1); -+ cast_type1 = hlsl_get_matrix_type(ctx, base, arg1->data_type->e.numeric.dimx, 1); - } - if (arg2->data_type->class == HLSL_CLASS_VECTOR) - { - vect_count++; -- cast_type2 = hlsl_get_matrix_type(ctx, base, 1, arg2->data_type->dimx); -+ cast_type2 = hlsl_get_matrix_type(ctx, base, 1, arg2->data_type->e.numeric.dimx); - } - -- matrix_type = hlsl_get_matrix_type(ctx, base, cast_type2->dimx, cast_type1->dimy); -+ matrix_type = hlsl_get_matrix_type(ctx, base, cast_type2->e.numeric.dimx, cast_type1->e.numeric.dimy); - - if (vect_count == 0) - { -@@ -4556,12 +4559,12 @@ static bool intrinsic_mul(struct hlsl_ctx *ctx, - } - else if (vect_count == 1) - { -- VKD3D_ASSERT(matrix_type->dimx == 1 || matrix_type->dimy == 1); -- ret_type = hlsl_get_vector_type(ctx, base, matrix_type->dimx * matrix_type->dimy); -+ VKD3D_ASSERT(matrix_type->e.numeric.dimx == 1 || matrix_type->e.numeric.dimy == 1); -+ ret_type = hlsl_get_vector_type(ctx, base, matrix_type->e.numeric.dimx * matrix_type->e.numeric.dimy); - } - else - { -- VKD3D_ASSERT(matrix_type->dimx == 1 && matrix_type->dimy == 1); -+ VKD3D_ASSERT(matrix_type->e.numeric.dimx == 1 && matrix_type->e.numeric.dimy == 1); - ret_type = hlsl_get_scalar_type(ctx, base); - } - -@@ -4575,23 +4578,23 @@ static bool intrinsic_mul(struct hlsl_ctx *ctx, - return false; - hlsl_init_simple_deref_from_var(&var_deref, var); - -- for (i = 0; i < matrix_type->dimx; ++i) -+ for (i = 0; i < matrix_type->e.numeric.dimx; ++i) - { -- for (j = 0; j < matrix_type->dimy; ++j) -+ for (j = 0; j < matrix_type->e.numeric.dimy; ++j) - { - struct hlsl_ir_node *instr = NULL; - struct hlsl_block block; - -- for (k = 0; k < cast_type1->dimx && k < cast_type2->dimy; ++k) -+ for (k = 0; k < cast_type1->e.numeric.dimx && k < cast_type2->e.numeric.dimy; ++k) - { - struct hlsl_ir_node *value1, *value2, *mul; - - if (!(value1 = hlsl_add_load_component(ctx, params->instrs, -- cast1, j * cast1->data_type->dimx + k, loc))) -+ cast1, j * cast1->data_type->e.numeric.dimx + k, loc))) - return false; - - if (!(value2 = hlsl_add_load_component(ctx, params->instrs, -- cast2, k * cast2->data_type->dimx + i, loc))) -+ cast2, k * cast2->data_type->e.numeric.dimx + i, loc))) - return false; - - if (!(mul = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, value1, value2, loc))) -@@ -4608,7 +4611,7 @@ static bool intrinsic_mul(struct hlsl_ctx *ctx, - } - } - -- if (!hlsl_new_store_component(ctx, &block, &var_deref, j * matrix_type->dimx + i, instr)) -+ if (!hlsl_new_store_component(ctx, &block, &var_deref, j * matrix_type->e.numeric.dimx + i, instr)) - return false; - hlsl_block_add_block(params->instrs, &block); - } -@@ -4801,7 +4804,7 @@ static bool intrinsic_sign(struct hlsl_ctx *ctx, - static const struct hlsl_constant_value zero_value; - - struct hlsl_type *int_type = hlsl_get_numeric_type(ctx, arg->data_type->class, HLSL_TYPE_INT, -- arg->data_type->dimx, arg->data_type->dimy); -+ arg->data_type->e.numeric.dimx, arg->data_type->e.numeric.dimy); - - if (!(zero = hlsl_new_constant(ctx, hlsl_get_scalar_type(ctx, arg->data_type->e.numeric.type), &zero_value, loc))) - return false; -@@ -5255,22 +5258,23 @@ static bool intrinsic_transpose(struct hlsl_ctx *ctx, - return true; - } - -- mat_type = hlsl_get_matrix_type(ctx, arg_type->e.numeric.type, arg_type->dimy, arg_type->dimx); -+ mat_type = hlsl_get_matrix_type(ctx, arg_type->e.numeric.type, arg_type->e.numeric.dimy, arg_type->e.numeric.dimx); - - if (!(var = hlsl_new_synthetic_var(ctx, "transpose", mat_type, loc))) - return false; - hlsl_init_simple_deref_from_var(&var_deref, var); - -- for (i = 0; i < arg_type->dimx; ++i) -+ for (i = 0; i < arg_type->e.numeric.dimx; ++i) - { -- for (j = 0; j < arg_type->dimy; ++j) -+ for (j = 0; j < arg_type->e.numeric.dimy; ++j) - { - struct hlsl_block block; - -- if (!(load = hlsl_add_load_component(ctx, params->instrs, arg, j * arg->data_type->dimx + i, loc))) -+ if (!(load = hlsl_add_load_component(ctx, params->instrs, arg, -+ j * arg->data_type->e.numeric.dimx + i, loc))) - return false; - -- if (!hlsl_new_store_component(ctx, &block, &var_deref, i * var->data_type->dimx + j, load)) -+ if (!hlsl_new_store_component(ctx, &block, &var_deref, i * var->data_type->e.numeric.dimx + j, load)) - return false; - hlsl_block_add_block(params->instrs, &block); - } -@@ -5300,7 +5304,8 @@ static bool intrinsic_d3dcolor_to_ubyte4(struct hlsl_ctx *ctx, - struct hlsl_ir_node *arg = params->args[0], *ret, *c, *swizzle; - struct hlsl_type *arg_type = arg->data_type; - -- if (arg_type->class != HLSL_CLASS_SCALAR && !(arg_type->class == HLSL_CLASS_VECTOR && arg_type->dimx == 4)) -+ if (arg_type->class != HLSL_CLASS_SCALAR && !(arg_type->class == HLSL_CLASS_VECTOR -+ && arg_type->e.numeric.dimx == 4)) - { - struct vkd3d_string_buffer *string; - -@@ -5663,6 +5668,7 @@ static bool add_ternary(struct hlsl_ctx *ctx, struct hlsl_block *block, - hlsl_error(ctx, &cond->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, - "Ternary condition type '%s' is not numeric.", string->buffer); - hlsl_release_string_buffer(ctx, string); -+ return false; - } - - if (first->data_type->class <= HLSL_CLASS_LAST_NUMERIC -@@ -5671,21 +5677,22 @@ static bool add_ternary(struct hlsl_ctx *ctx, struct hlsl_block *block, - if (!(common_type = get_common_numeric_type(ctx, first, second, &first->loc))) - return false; - -- if (cond_type->dimx == 1 && cond_type->dimy == 1) -+ if (cond_type->e.numeric.dimx == 1 && cond_type->e.numeric.dimy == 1) - { - cond_type = hlsl_get_numeric_type(ctx, common_type->class, -- HLSL_TYPE_BOOL, common_type->dimx, common_type->dimy); -+ HLSL_TYPE_BOOL, common_type->e.numeric.dimx, common_type->e.numeric.dimy); - if (!(cond = add_implicit_conversion(ctx, block, cond, cond_type, &cond->loc))) - return false; - } - else - { -- if (common_type->dimx == 1 && common_type->dimy == 1) -+ if (common_type->e.numeric.dimx == 1 && common_type->e.numeric.dimy == 1) - { - common_type = hlsl_get_numeric_type(ctx, cond_type->class, -- common_type->e.numeric.type, cond_type->dimx, cond_type->dimy); -+ common_type->e.numeric.type, cond_type->e.numeric.dimx, cond_type->e.numeric.dimy); - } -- else if (cond_type->dimx != common_type->dimx || cond_type->dimy != common_type->dimy) -+ else if (cond_type->e.numeric.dimx != common_type->e.numeric.dimx -+ || cond_type->e.numeric.dimy != common_type->e.numeric.dimy) - { - /* This condition looks wrong but is correct. - * floatN is compatible with float1xN, but not with floatNx1. */ -@@ -5703,7 +5710,7 @@ static bool add_ternary(struct hlsl_ctx *ctx, struct hlsl_block *block, - } - - cond_type = hlsl_get_numeric_type(ctx, common_type->class, HLSL_TYPE_BOOL, -- common_type->dimx, common_type->dimy); -+ common_type->e.numeric.dimx, common_type->e.numeric.dimy); - if (!(cond = add_implicit_conversion(ctx, block, cond, cond_type, &cond->loc))) - return false; - } -@@ -5731,7 +5738,7 @@ static bool add_ternary(struct hlsl_ctx *ctx, struct hlsl_block *block, - } - - cond_type = hlsl_get_numeric_type(ctx, cond_type->class, HLSL_TYPE_BOOL, -- cond_type->dimx, cond_type->dimy); -+ cond_type->e.numeric.dimx, cond_type->e.numeric.dimy); - if (!(cond = add_implicit_conversion(ctx, block, cond, cond_type, &cond->loc))) - return false; - -@@ -6103,7 +6110,7 @@ static bool add_gather_method_call(struct hlsl_ctx *ctx, struct hlsl_block *bloc - return false; - } - -- if (read_channel >= object_type->e.resource.format->dimx) -+ if (read_channel >= object_type->e.resource.format->e.numeric.dimx) - { - hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, - "Method %s() requires at least %u channels.", name, read_channel + 1); -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -index 4ccbed78f38..c666599b342 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -@@ -20,9 +20,13 @@ - - #include "hlsl.h" - #include "vkd3d_shader_private.h" -+#include "d3dcommon.h" - #include - #include - -+/* The shift that corresponds to the D3D_SIF_TEXTURE_COMPONENTS mask. */ -+#define VKD3D_SM4_SIF_TEXTURE_COMPONENTS_SHIFT 2 -+ - /* TODO: remove when no longer needed, only used for new_offset_instr_from_deref() */ - static struct hlsl_ir_node *new_offset_from_path_index(struct hlsl_ctx *ctx, struct hlsl_block *block, - struct hlsl_type *type, struct hlsl_ir_node *base_offset, struct hlsl_ir_node *idx, -@@ -270,7 +274,7 @@ static bool types_are_semantic_equivalent(struct hlsl_ctx *ctx, const struct hls - if (ctx->profile->major_version < 4) - return true; - -- if (type1->dimx != type2->dimx) -+ if (type1->e.numeric.dimx != type2->e.numeric.dimx) - return false; - - return base_type_get_semantic_equivalent(type1->e.numeric.type) -@@ -292,6 +296,9 @@ static struct hlsl_ir_var *add_semantic_var(struct hlsl_ctx *ctx, struct hlsl_ir - { - if (!ascii_strcasecmp(ext_var->name, new_name)) - { -+ VKD3D_ASSERT(ext_var->data_type->class <= HLSL_CLASS_VECTOR); -+ VKD3D_ASSERT(type->class <= HLSL_CLASS_VECTOR); -+ - if (output) - { - if (index >= semantic->reported_duplicated_output_next_index) -@@ -1032,7 +1039,7 @@ static bool lower_calls(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void * - static struct hlsl_ir_node *add_zero_mipmap_level(struct hlsl_ctx *ctx, struct hlsl_ir_node *index, - const struct vkd3d_shader_location *loc) - { -- unsigned int dim_count = index->data_type->dimx; -+ unsigned int dim_count = index->data_type->e.numeric.dimx; - struct hlsl_ir_node *store, *zero; - struct hlsl_ir_load *coords_load; - struct hlsl_deref coords_deref; -@@ -1089,12 +1096,12 @@ static bool lower_matrix_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *ins - return false; - hlsl_init_simple_deref_from_var(&var_deref, var); - -- for (i = 0; i < instr->data_type->dimx; ++i) -+ for (i = 0; i < instr->data_type->e.numeric.dimx; ++i) - { - struct hlsl_block store_block; - struct hlsl_ir_node *load; - -- k = swizzle->u.matrix.components[i].y * matrix_type->dimx + swizzle->u.matrix.components[i].x; -+ k = swizzle->u.matrix.components[i].y * matrix_type->e.numeric.dimx + swizzle->u.matrix.components[i].x; - - if (!(load = hlsl_add_load_component(ctx, block, swizzle->val.node, k, &instr->loc))) - return false; -@@ -1139,7 +1146,7 @@ static bool lower_index_loads(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, - - VKD3D_ASSERT(coords->data_type->class == HLSL_CLASS_VECTOR); - VKD3D_ASSERT(coords->data_type->e.numeric.type == HLSL_TYPE_UINT); -- VKD3D_ASSERT(coords->data_type->dimx == dim_count); -+ VKD3D_ASSERT(coords->data_type->e.numeric.dimx == dim_count); - - if (!(coords = add_zero_mipmap_level(ctx, coords, &instr->loc))) - return false; -@@ -1175,7 +1182,7 @@ static bool lower_index_loads(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, - return false; - hlsl_init_simple_deref_from_var(&row_deref, var); - -- for (i = 0; i < mat->data_type->dimx; ++i) -+ for (i = 0; i < mat->data_type->e.numeric.dimx; ++i) - { - struct hlsl_ir_node *c; - -@@ -1224,7 +1231,7 @@ static bool lower_broadcasts(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, s - src_type = cast->operands[0].node->data_type; - dst_type = cast->node.data_type; - -- if (src_type->class <= HLSL_CLASS_VECTOR && dst_type->class <= HLSL_CLASS_VECTOR && src_type->dimx == 1) -+ if (src_type->class <= HLSL_CLASS_VECTOR && dst_type->class <= HLSL_CLASS_VECTOR && src_type->e.numeric.dimx == 1) - { - struct hlsl_ir_node *new_cast, *swizzle; - -@@ -1235,9 +1242,10 @@ static bool lower_broadcasts(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, s - return false; - hlsl_block_add_instr(block, new_cast); - -- if (dst_type->dimx != 1) -+ if (dst_type->e.numeric.dimx != 1) - { -- if (!(swizzle = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, X, X, X), dst_type->dimx, new_cast, &cast->node.loc))) -+ if (!(swizzle = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, X, X, X), -+ dst_type->e.numeric.dimx, new_cast, &cast->node.loc))) - return false; - hlsl_block_add_instr(block, swizzle); - } -@@ -2092,10 +2100,10 @@ static enum validation_result validate_component_index_range_from_deref(struct h - switch (type->class) - { - case HLSL_CLASS_VECTOR: -- if (idx >= type->dimx) -+ if (idx >= type->e.numeric.dimx) - { - hlsl_error(ctx, &path_node->loc, VKD3D_SHADER_ERROR_HLSL_OFFSET_OUT_OF_BOUNDS, -- "Vector index is out of bounds. %u/%u", idx, type->dimx); -+ "Vector index is out of bounds. %u/%u", idx, type->e.numeric.dimx); - return DEREF_VALIDATION_OUT_OF_BOUNDS; - } - break; -@@ -2226,7 +2234,7 @@ static bool validate_dereferences(struct hlsl_ctx *ctx, struct hlsl_ir_node *ins - - static bool is_vec1(const struct hlsl_type *type) - { -- return (type->class == HLSL_CLASS_SCALAR) || (type->class == HLSL_CLASS_VECTOR && type->dimx == 1); -+ return (type->class == HLSL_CLASS_SCALAR) || (type->class == HLSL_CLASS_VECTOR && type->e.numeric.dimx == 1); - } - - static bool fold_redundant_casts(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) -@@ -2403,18 +2411,20 @@ static bool lower_narrowing_casts(struct hlsl_ctx *ctx, struct hlsl_ir_node *ins - src_type = cast->operands[0].node->data_type; - dst_type = cast->node.data_type; - -- if (src_type->class <= HLSL_CLASS_VECTOR && dst_type->class <= HLSL_CLASS_VECTOR && dst_type->dimx < src_type->dimx) -+ if (src_type->class <= HLSL_CLASS_VECTOR && dst_type->class <= HLSL_CLASS_VECTOR -+ && dst_type->e.numeric.dimx < src_type->e.numeric.dimx) - { - struct hlsl_ir_node *new_cast, *swizzle; - -- dst_vector_type = hlsl_get_vector_type(ctx, dst_type->e.numeric.type, src_type->dimx); -+ dst_vector_type = hlsl_get_vector_type(ctx, dst_type->e.numeric.type, src_type->e.numeric.dimx); - /* We need to preserve the cast since it might be doing more than just - * narrowing the vector. */ - if (!(new_cast = hlsl_new_cast(ctx, cast->operands[0].node, dst_vector_type, &cast->node.loc))) - return false; - hlsl_block_add_instr(block, new_cast); - -- if (!(swizzle = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, Y, Z, W), dst_type->dimx, new_cast, &cast->node.loc))) -+ if (!(swizzle = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, Y, Z, W), -+ dst_type->e.numeric.dimx, new_cast, &cast->node.loc))) - return false; - hlsl_block_add_instr(block, swizzle); - -@@ -2441,10 +2451,11 @@ static bool fold_swizzle_chains(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr - uint32_t combined_swizzle; - - combined_swizzle = hlsl_combine_swizzles(hlsl_ir_swizzle(next_instr)->u.vector, -- swizzle->u.vector, instr->data_type->dimx); -+ swizzle->u.vector, instr->data_type->e.numeric.dimx); - next_instr = hlsl_ir_swizzle(next_instr)->val.node; - -- if (!(new_swizzle = hlsl_new_swizzle(ctx, combined_swizzle, instr->data_type->dimx, next_instr, &instr->loc))) -+ if (!(new_swizzle = hlsl_new_swizzle(ctx, combined_swizzle, -+ instr->data_type->e.numeric.dimx, next_instr, &instr->loc))) - return false; - - list_add_before(&instr->entry, &new_swizzle->entry); -@@ -2464,10 +2475,10 @@ static bool remove_trivial_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *i - return false; - swizzle = hlsl_ir_swizzle(instr); - -- if (instr->data_type->dimx != swizzle->val.node->data_type->dimx) -+ if (instr->data_type->e.numeric.dimx != swizzle->val.node->data_type->e.numeric.dimx) - return false; - -- for (i = 0; i < instr->data_type->dimx; ++i) -+ for (i = 0; i < instr->data_type->e.numeric.dimx; ++i) - if (hlsl_swizzle_get_component(swizzle->u.vector, i) != i) - return false; - -@@ -2628,6 +2639,7 @@ static bool lower_nonconstant_vector_derefs(struct hlsl_ctx *ctx, struct hlsl_ir - if (type->class == HLSL_CLASS_VECTOR && idx->type != HLSL_IR_CONSTANT) - { - struct hlsl_ir_node *eq, *swizzle, *dot, *c, *operands[HLSL_MAX_OPERANDS] = {0}; -+ unsigned int width = type->e.numeric.dimx; - struct hlsl_constant_value value; - struct hlsl_ir_load *vector_load; - enum hlsl_ir_expr_op op; -@@ -2636,7 +2648,7 @@ static bool lower_nonconstant_vector_derefs(struct hlsl_ctx *ctx, struct hlsl_ir - return false; - hlsl_block_add_instr(block, &vector_load->node); - -- if (!(swizzle = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, X, X, X), type->dimx, idx, &instr->loc))) -+ if (!(swizzle = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, X, X, X), width, idx, &instr->loc))) - return false; - hlsl_block_add_instr(block, swizzle); - -@@ -2644,14 +2656,14 @@ static bool lower_nonconstant_vector_derefs(struct hlsl_ctx *ctx, struct hlsl_ir - value.u[1].u = 1; - value.u[2].u = 2; - value.u[3].u = 3; -- if (!(c = hlsl_new_constant(ctx, hlsl_get_vector_type(ctx, HLSL_TYPE_UINT, type->dimx), &value, &instr->loc))) -+ if (!(c = hlsl_new_constant(ctx, hlsl_get_vector_type(ctx, HLSL_TYPE_UINT, width), &value, &instr->loc))) - return false; - hlsl_block_add_instr(block, c); - - operands[0] = swizzle; - operands[1] = c; - if (!(eq = hlsl_new_expr(ctx, HLSL_OP2_EQUAL, operands, -- hlsl_get_vector_type(ctx, HLSL_TYPE_BOOL, type->dimx), &instr->loc))) -+ hlsl_get_vector_type(ctx, HLSL_TYPE_BOOL, width), &instr->loc))) - return false; - hlsl_block_add_instr(block, eq); - -@@ -2660,7 +2672,7 @@ static bool lower_nonconstant_vector_derefs(struct hlsl_ctx *ctx, struct hlsl_ir - hlsl_block_add_instr(block, eq); - - op = HLSL_OP2_DOT; -- if (type->dimx == 1) -+ if (width == 1) - op = type->e.numeric.type == HLSL_TYPE_BOOL ? HLSL_OP2_LOGIC_AND : HLSL_OP2_MUL; - - /* Note: We may be creating a DOT for bool vectors here, which we need to lower to -@@ -2787,7 +2799,8 @@ static bool lower_nonconstant_array_loads(struct hlsl_ctx *ctx, struct hlsl_ir_n - return false; - hlsl_block_add_instr(block, equals); - -- if (!(equals = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, X, X, X), var->data_type->dimx, equals, &cut_index->loc))) -+ if (!(equals = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, X, X, X), -+ var->data_type->e.numeric.dimx, equals, &cut_index->loc))) - return false; - hlsl_block_add_instr(block, equals); - -@@ -3176,7 +3189,7 @@ static bool lower_dot(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct h - arg2 = expr->operands[1].node; - if (expr->op != HLSL_OP2_DOT) - return false; -- if (arg1->data_type->dimx != 2) -+ if (arg1->data_type->e.numeric.dimx != 2) - return false; - - if (ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL) -@@ -3200,11 +3213,13 @@ static bool lower_dot(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct h - return false; - hlsl_block_add_instr(block, mul); - -- if (!(add_x = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, X, X, X), instr->data_type->dimx, mul, &expr->node.loc))) -+ if (!(add_x = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, X, X, X), -+ instr->data_type->e.numeric.dimx, mul, &expr->node.loc))) - return false; - hlsl_block_add_instr(block, add_x); - -- if (!(add_y = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(Y, Y, Y, Y), instr->data_type->dimx, mul, &expr->node.loc))) -+ if (!(add_y = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(Y, Y, Y, Y), -+ instr->data_type->e.numeric.dimx, mul, &expr->node.loc))) - return false; - hlsl_block_add_instr(block, add_y); - -@@ -3368,7 +3383,7 @@ static bool lower_trig(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct - type = arg->data_type; - - /* Reduce the range of the input angles to [-pi, pi]. */ -- for (i = 0; i < type->dimx; ++i) -+ for (i = 0; i < type->e.numeric.dimx; ++i) - { - half_value.u[i].f = 0.5; - two_pi_value.u[i].f = 2.0 * M_PI; -@@ -3396,7 +3411,7 @@ static bool lower_trig(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct - return false; - hlsl_block_add_instr(block, reduced); - -- if (type->dimx == 1) -+ if (type->e.numeric.dimx == 1) - { - if (!(sincos = hlsl_new_unary_expr(ctx, op, reduced, &instr->loc))) - return false; -@@ -3409,7 +3424,7 @@ static bool lower_trig(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct - struct hlsl_deref var_deref; - struct hlsl_ir_load *var_load; - -- for (i = 0; i < type->dimx; ++i) -+ for (i = 0; i < type->e.numeric.dimx; ++i) - { - uint32_t s = hlsl_swizzle_from_writemask(1 << i); - -@@ -3422,7 +3437,7 @@ static bool lower_trig(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct - return false; - hlsl_init_simple_deref_from_var(&var_deref, var); - -- for (i = 0; i < type->dimx; ++i) -+ for (i = 0; i < type->e.numeric.dimx; ++i) - { - struct hlsl_block store_block; - -@@ -3458,7 +3473,7 @@ static bool lower_logic_not(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, st - return false; - - arg = expr->operands[0].node; -- float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, arg->data_type->dimx); -+ float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, arg->data_type->e.numeric.dimx); - - /* If this is happens, it means we failed to cast the argument to boolean somewhere. */ - VKD3D_ASSERT(arg->data_type->e.numeric.type == HLSL_TYPE_BOOL); -@@ -3520,7 +3535,7 @@ static bool lower_ternary(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, stru - VKD3D_ASSERT(cond->data_type->e.numeric.type == HLSL_TYPE_BOOL); - - type = hlsl_get_numeric_type(ctx, instr->data_type->class, HLSL_TYPE_FLOAT, -- instr->data_type->dimx, instr->data_type->dimy); -+ instr->data_type->e.numeric.dimx, instr->data_type->e.numeric.dimy); - - if (!(float_cond = hlsl_new_cast(ctx, cond, type, &instr->loc))) - return false; -@@ -3604,7 +3619,7 @@ static bool lower_comparison_operators(struct hlsl_ctx *ctx, struct hlsl_ir_node - - arg1 = expr->operands[0].node; - arg2 = expr->operands[1].node; -- float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, instr->data_type->dimx); -+ float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, instr->data_type->e.numeric.dimx); - - if (!(arg1_cast = hlsl_new_cast(ctx, arg1, float_type, &instr->loc))) - return false; -@@ -3730,7 +3745,7 @@ static bool lower_slt(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct h - - arg1 = expr->operands[0].node; - arg2 = expr->operands[1].node; -- float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, instr->data_type->dimx); -+ float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, instr->data_type->e.numeric.dimx); - - if (!(arg1_cast = hlsl_new_cast(ctx, arg1, float_type, &instr->loc))) - return false; -@@ -3790,7 +3805,7 @@ static bool lower_cmp(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct h - if (expr->op != HLSL_OP3_CMP) - return false; - -- float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, instr->data_type->dimx); -+ float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, instr->data_type->e.numeric.dimx); - - for (i = 0; i < 3; ++i) - { -@@ -3860,7 +3875,7 @@ static bool lower_casts_to_bool(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr - return false; - - /* Narrowing casts should have already been lowered. */ -- VKD3D_ASSERT(type->dimx == arg_type->dimx); -+ VKD3D_ASSERT(type->e.numeric.dimx == arg_type->e.numeric.dimx); - - zero = hlsl_new_constant(ctx, arg_type, &zero_value, &instr->loc); - if (!zero) -@@ -3886,7 +3901,8 @@ struct hlsl_ir_node *hlsl_add_conditional(struct hlsl_ctx *ctx, struct hlsl_bloc - - if (cond_type->e.numeric.type != HLSL_TYPE_BOOL) - { -- cond_type = hlsl_get_numeric_type(ctx, cond_type->class, HLSL_TYPE_BOOL, cond_type->dimx, cond_type->dimy); -+ cond_type = hlsl_get_numeric_type(ctx, cond_type->class, HLSL_TYPE_BOOL, -+ cond_type->e.numeric.dimx, cond_type->e.numeric.dimy); - - if (!(condition = hlsl_new_cast(ctx, condition, cond_type, &condition->loc))) - return NULL; -@@ -3922,13 +3938,13 @@ static bool lower_int_division(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, - return false; - if (type->e.numeric.type != HLSL_TYPE_INT) - return false; -- utype = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_UINT, type->dimx, type->dimy); -+ utype = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_UINT, type->e.numeric.dimx, type->e.numeric.dimy); - - if (!(xor = hlsl_new_binary_expr(ctx, HLSL_OP2_BIT_XOR, arg1, arg2))) - return false; - hlsl_block_add_instr(block, xor); - -- for (i = 0; i < type->dimx; ++i) -+ for (i = 0; i < type->e.numeric.dimx; ++i) - high_bit_value.u[i].u = 0x80000000; - if (!(high_bit = hlsl_new_constant(ctx, type, &high_bit_value, &instr->loc))) - return false; -@@ -3988,9 +4004,9 @@ static bool lower_int_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, - return false; - if (type->e.numeric.type != HLSL_TYPE_INT) - return false; -- utype = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_UINT, type->dimx, type->dimy); -+ utype = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_UINT, type->e.numeric.dimx, type->e.numeric.dimy); - -- for (i = 0; i < type->dimx; ++i) -+ for (i = 0; i < type->e.numeric.dimx; ++i) - high_bit_value.u[i].u = 0x80000000; - if (!(high_bit = hlsl_new_constant(ctx, type, &high_bit_value, &instr->loc))) - return false; -@@ -4081,8 +4097,8 @@ static bool lower_int_dot(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, stru - { - arg1 = expr->operands[0].node; - arg2 = expr->operands[1].node; -- VKD3D_ASSERT(arg1->data_type->dimx == arg2->data_type->dimx); -- dimx = arg1->data_type->dimx; -+ VKD3D_ASSERT(arg1->data_type->e.numeric.dimx == arg2->data_type->e.numeric.dimx); -+ dimx = arg1->data_type->e.numeric.dimx; - is_bool = type->e.numeric.type == HLSL_TYPE_BOOL; - - if (!(mult = hlsl_new_binary_expr(ctx, is_bool ? HLSL_OP2_LOGIC_AND : HLSL_OP2_MUL, arg1, arg2))) -@@ -4131,7 +4147,7 @@ static bool lower_float_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr - return false; - if (type->e.numeric.type != HLSL_TYPE_FLOAT) - return false; -- btype = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_BOOL, type->dimx, type->dimy); -+ btype = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_BOOL, type->e.numeric.dimx, type->e.numeric.dimy); - - if (!(mul1 = hlsl_new_binary_expr(ctx, HLSL_OP2_MUL, arg2, arg1))) - return false; -@@ -4153,7 +4169,7 @@ static bool lower_float_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr - if (!(cond = hlsl_add_conditional(ctx, block, ge, arg2, neg2))) - return false; - -- for (i = 0; i < type->dimx; ++i) -+ for (i = 0; i < type->e.numeric.dimx; ++i) - one_value.u[i].f = 1.0f; - if (!(one = hlsl_new_constant(ctx, type, &one_value, &instr->loc))) - return false; -@@ -4211,7 +4227,7 @@ static bool lower_nonfloat_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *inst - if (!arg) - continue; - -- float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, arg->data_type->dimx); -+ float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, arg->data_type->e.numeric.dimx); - if (!(arg_cast = hlsl_new_cast(ctx, arg, float_type, &instr->loc))) - return false; - hlsl_block_add_instr(block, arg_cast); -@@ -4219,7 +4235,7 @@ static bool lower_nonfloat_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *inst - operands[i] = arg_cast; - } - -- float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, instr->data_type->dimx); -+ float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, instr->data_type->e.numeric.dimx); - if (!(float_expr = hlsl_new_expr(ctx, expr->op, operands, float_type, &instr->loc))) - return false; - hlsl_block_add_instr(block, float_expr); -@@ -4260,7 +4276,8 @@ static bool lower_discard_neg(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, - - operands[0] = jump->condition.node; - operands[1] = zero; -- cmp_type = hlsl_get_numeric_type(ctx, arg_type->class, HLSL_TYPE_BOOL, arg_type->dimx, arg_type->dimy); -+ cmp_type = hlsl_get_numeric_type(ctx, arg_type->class, HLSL_TYPE_BOOL, -+ arg_type->e.numeric.dimx, arg_type->e.numeric.dimy); - if (!(cmp = hlsl_new_expr(ctx, HLSL_OP2_LESS, operands, cmp_type, &instr->loc))) - return false; - hlsl_block_add_instr(&block, cmp); -@@ -4304,7 +4321,7 @@ static bool lower_discard_nz(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, v - return false; - - cond = jump->condition.node; -- float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, cond->data_type->dimx); -+ float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, cond->data_type->e.numeric.dimx); - - hlsl_block_init(&block); - -@@ -4900,7 +4917,8 @@ static struct hlsl_reg allocate_numeric_registers_for_type(struct hlsl_ctx *ctx, - /* FIXME: We could potentially pack structs or arrays more efficiently... */ - - if (type->class <= HLSL_CLASS_VECTOR) -- return allocate_register(ctx, allocator, first_write, last_read, type->dimx, type->dimx, 0, false, false); -+ return allocate_register(ctx, allocator, first_write, last_read, -+ type->e.numeric.dimx, type->e.numeric.dimx, 0, false, false); - else - return allocate_range(ctx, allocator, first_write, last_read, reg_size, 0, false); - } -@@ -5227,13 +5245,13 @@ static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, - TRACE("Allocated constant @%u to %s.\n", instr->index, debug_register('c', constant->reg, type)); - - VKD3D_ASSERT(hlsl_is_numeric_type(type)); -- VKD3D_ASSERT(type->dimy == 1); -+ VKD3D_ASSERT(type->e.numeric.dimy == 1); - VKD3D_ASSERT(constant->reg.writemask); - - for (x = 0, i = 0; x < 4; ++x) - { - const union hlsl_constant_value_component *value; -- float f; -+ float f = 0; - - if (!(constant->reg.writemask & (1u << x))) - continue; -@@ -5261,9 +5279,6 @@ static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, - case HLSL_TYPE_DOUBLE: - FIXME("Double constant.\n"); - return; -- -- default: -- vkd3d_unreachable(); - } - - record_constant(ctx, constant->reg.id * 4 + x, f, &constant->node.loc); -@@ -5608,13 +5623,13 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var - { - int mode = (ctx->profile->major_version < 4) - ? 0 : sm4_get_interpolation_mode(var->data_type, var->storage_modifiers); -- unsigned int reg_size = optimize ? var->data_type->dimx : 4; -+ unsigned int reg_size = optimize ? var->data_type->e.numeric.dimx : 4; - - if (special_interpolation) - mode = VKD3DSIM_NONE; - - var->regs[HLSL_REGSET_NUMERIC] = allocate_register(ctx, allocator, 1, UINT_MAX, -- reg_size, var->data_type->dimx, mode, var->force_align, vip_allocation); -+ reg_size, var->data_type->e.numeric.dimx, mode, var->force_align, vip_allocation); - - TRACE("Allocated %s to %s (mode %d).\n", var->name, debug_register(output ? 'o' : 'v', - var->regs[HLSL_REGSET_NUMERIC], var->data_type), mode); -@@ -6068,7 +6083,7 @@ bool hlsl_component_index_range_from_deref(struct hlsl_ctx *ctx, const struct hl - switch (type->class) - { - case HLSL_CLASS_VECTOR: -- if (idx >= type->dimx) -+ if (idx >= type->e.numeric.dimx) - return false; - *start += idx; - break; -@@ -6077,9 +6092,9 @@ bool hlsl_component_index_range_from_deref(struct hlsl_ctx *ctx, const struct hl - if (idx >= hlsl_type_major_size(type)) - return false; - if (hlsl_type_is_row_major(type)) -- *start += idx * type->dimx; -+ *start += idx * type->e.numeric.dimx; - else -- *start += idx * type->dimy; -+ *start += idx * type->e.numeric.dimy; - break; - - case HLSL_CLASS_ARRAY: -@@ -6668,8 +6683,8 @@ void hlsl_run_const_passes(struct hlsl_ctx *ctx, struct hlsl_block *body) - static void generate_vsir_signature_entry(struct hlsl_ctx *ctx, struct vsir_program *program, - struct shader_signature *signature, bool output, bool is_patch_constant_func, struct hlsl_ir_var *var) - { -+ enum vkd3d_shader_component_type component_type = VKD3D_SHADER_COMPONENT_VOID; - enum vkd3d_shader_sysval_semantic sysval = VKD3D_SHADER_SV_NONE; -- enum vkd3d_shader_component_type component_type; - unsigned int register_index, mask, use_mask; - const char *name = var->semantic.name; - enum vkd3d_shader_register_type type; -@@ -6689,7 +6704,7 @@ static void generate_vsir_signature_entry(struct hlsl_ctx *ctx, struct vsir_prog - if (sm4_register_from_semantic_name(&program->shader_version, var->semantic.name, output, &type, &has_idx)) - { - register_index = has_idx ? var->semantic.index : ~0u; -- mask = (1u << var->data_type->dimx) - 1; -+ mask = (1u << var->data_type->e.numeric.dimx) - 1; - } - else - { -@@ -6716,12 +6731,11 @@ static void generate_vsir_signature_entry(struct hlsl_ctx *ctx, struct vsir_prog - component_type = VKD3D_SHADER_COMPONENT_UINT; - break; - -- default: -+ case HLSL_TYPE_DOUBLE: - if ((string = hlsl_type_to_string(ctx, var->data_type))) - hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, - "Invalid data type %s for semantic variable %s.", string->buffer, var->name); - hlsl_release_string_buffer(ctx, string); -- component_type = VKD3D_SHADER_COMPONENT_VOID; - break; - } - -@@ -6757,19 +6771,19 @@ static void generate_vsir_signature_entry(struct hlsl_ctx *ctx, struct vsir_prog - sysval = VKD3D_SHADER_SV_POSITION; - } - -- mask = (1 << var->data_type->dimx) - 1; -+ mask = (1 << var->data_type->e.numeric.dimx) - 1; - - if (!ascii_strcasecmp(var->semantic.name, "PSIZE") && output - && program->shader_version.type == VKD3D_SHADER_TYPE_VERTEX) - { -- if (var->data_type->dimx > 1) -+ if (var->data_type->e.numeric.dimx > 1) - hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC, - "PSIZE output must have only 1 component in this shader model."); - /* For some reason the writemask has all components set. */ - mask = VKD3DSP_WRITEMASK_ALL; - } - if (!ascii_strcasecmp(var->semantic.name, "FOG") && output && program->shader_version.major < 3 -- && program->shader_version.type == VKD3D_SHADER_TYPE_VERTEX && var->data_type->dimx > 1) -+ && program->shader_version.type == VKD3D_SHADER_TYPE_VERTEX && var->data_type->e.numeric.dimx > 1) - hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC, - "FOG output must have only 1 component in this shader model."); - -@@ -7058,7 +7072,7 @@ static void vsir_src_from_hlsl_node(struct vkd3d_shader_src_param *src, - /* In SM4 constants are inlined */ - constant = hlsl_ir_constant(instr); - vsir_src_from_hlsl_constant_value(src, ctx, &constant->value, -- vsir_data_type_from_hlsl_instruction(ctx, instr), instr->data_type->dimx, map_writemask); -+ vsir_data_type_from_hlsl_instruction(ctx, instr), instr->data_type->e.numeric.dimx, map_writemask); - } - else - { -@@ -7203,7 +7217,7 @@ static bool sm4_generate_vsir_reg_from_deref(struct hlsl_ctx *ctx, struct vsir_p - reg->idx[1].offset = offset / 4; - reg->idx_count = 2; - } -- *writemask = ((1u << data_type->dimx) - 1) << (offset & 3); -+ *writemask = ((1u << data_type->e.numeric.dimx) - 1) << (offset & 3); - } - } - else if (var->is_input_semantic) -@@ -7224,7 +7238,7 @@ static bool sm4_generate_vsir_reg_from_deref(struct hlsl_ctx *ctx, struct vsir_p - reg->dimension = VSIR_DIMENSION_SCALAR; - else - reg->dimension = VSIR_DIMENSION_VEC4; -- *writemask = ((1u << data_type->dimx) - 1) << (offset % 4); -+ *writemask = ((1u << data_type->e.numeric.dimx) - 1) << (offset % 4); - } - else - { -@@ -7260,7 +7274,7 @@ static bool sm4_generate_vsir_reg_from_deref(struct hlsl_ctx *ctx, struct vsir_p - reg->dimension = VSIR_DIMENSION_SCALAR; - else - reg->dimension = VSIR_DIMENSION_VEC4; -- *writemask = ((1u << data_type->dimx) - 1) << (offset % 4); -+ *writemask = ((1u << data_type->e.numeric.dimx) - 1) << (offset % 4); - } - else - { -@@ -7487,7 +7501,7 @@ static bool sm1_generate_vsir_instr_expr_cast(struct hlsl_ctx *ctx, - dst_type = instr->data_type; - - /* Narrowing casts were already lowered. */ -- VKD3D_ASSERT(src_type->dimx == dst_type->dimx); -+ VKD3D_ASSERT(src_type->e.numeric.dimx == dst_type->e.numeric.dimx); - - switch (dst_type->e.numeric.type) - { -@@ -7513,9 +7527,6 @@ static bool sm1_generate_vsir_instr_expr_cast(struct hlsl_ctx *ctx, - hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, - "The 'double' type is not supported for the %s profile.", ctx->profile->name); - break; -- -- default: -- vkd3d_unreachable(); - } - break; - -@@ -7539,9 +7550,6 @@ static bool sm1_generate_vsir_instr_expr_cast(struct hlsl_ctx *ctx, - case HLSL_TYPE_DOUBLE: - hlsl_fixme(ctx, &instr->loc, "SM1 cast from double to integer."); - break; -- -- default: -- vkd3d_unreachable(); - } - break; - -@@ -7566,7 +7574,6 @@ static bool sm1_generate_vsir_instr_expr_cast(struct hlsl_ctx *ctx, - - case HLSL_TYPE_BOOL: - /* Casts to bool should have already been lowered. */ -- default: - hlsl_fixme(ctx, &expr->node.loc, "SM1 cast from %s to %s.", - debug_hlsl_type(ctx, src_type), debug_hlsl_type(ctx, dst_type)); - break; -@@ -7648,7 +7655,7 @@ static bool sm1_generate_vsir_instr_expr(struct hlsl_ctx *ctx, struct vsir_progr - break; - - case HLSL_OP2_DOT: -- switch (expr->operands[0].node->data_type->dimx) -+ switch (expr->operands[0].node->data_type->e.numeric.dimx) - { - case 3: - generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DP3, 0, 0, false); -@@ -7746,7 +7753,7 @@ static void sm1_generate_vsir_init_dst_param_from_deref(struct hlsl_ctx *ctx, - register_index = reg.id; - } - else -- writemask = (1u << deref->var->data_type->dimx) - 1; -+ writemask = (1u << deref->var->data_type->e.numeric.dimx) - 1; - - if (version.type == VKD3D_SHADER_TYPE_PIXEL && (!ascii_strcasecmp(semantic_name, "PSIZE") - || (!ascii_strcasecmp(semantic_name, "FOG") && version.major < 3))) -@@ -7804,7 +7811,7 @@ static void sm1_generate_vsir_init_src_param_from_deref(struct hlsl_ctx *ctx, - if (sm1_register_from_semantic_name(&version, deref->var->semantic.name, - deref->var->semantic.index, false, &type, ®ister_index)) - { -- writemask = (1 << deref->var->data_type->dimx) - 1; -+ writemask = (1 << deref->var->data_type->e.numeric.dimx) - 1; - } - else - { -@@ -7942,7 +7949,7 @@ static void generate_vsir_instr_swizzle(struct hlsl_ctx *ctx, - dst_param->write_mask = instr->reg.writemask; - - swizzle = hlsl_swizzle_from_writemask(val->reg.writemask); -- swizzle = hlsl_combine_swizzles(swizzle, swizzle_instr->u.vector, instr->data_type->dimx); -+ swizzle = hlsl_combine_swizzles(swizzle, swizzle_instr->u.vector, instr->data_type->e.numeric.dimx); - swizzle = hlsl_map_swizzle(swizzle, ins->dst[0].write_mask); - - src_param = &ins->src[0]; -@@ -8008,7 +8015,7 @@ static void sm1_generate_vsir_instr_if(struct hlsl_ctx *ctx, struct vsir_program - hlsl_fixme(ctx, &instr->loc, "Flatten \"if\" conditionals branches."); - return; - } -- VKD3D_ASSERT(condition->data_type->dimx == 1 && condition->data_type->dimy == 1); -+ VKD3D_ASSERT(condition->data_type->e.numeric.dimx == 1 && condition->data_type->e.numeric.dimy == 1); - - if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_IFC, 0, 2))) - return; -@@ -8196,9 +8203,8 @@ D3DXPARAMETER_TYPE hlsl_sm1_base_type(const struct hlsl_type *type, bool is_comb - case HLSL_TYPE_INT: - case HLSL_TYPE_UINT: - return D3DXPT_INT; -- default: -- vkd3d_unreachable(); - } -+ break; - - case HLSL_CLASS_SAMPLER: - switch (type->sampler_dim) -@@ -8283,8 +8289,6 @@ static void write_sm1_type(struct vkd3d_bytecode_buffer *buffer, - const struct hlsl_type *array_type = hlsl_get_multiarray_element_type(type); - unsigned int array_size = hlsl_get_multiarray_size(type); - struct hlsl_struct_field *field; -- unsigned int field_count = 0; -- size_t fields_offset = 0; - size_t i; - - if (type->bytecode_offset) -@@ -8292,7 +8296,8 @@ static void write_sm1_type(struct vkd3d_bytecode_buffer *buffer, - - if (array_type->class == HLSL_CLASS_STRUCT) - { -- field_count = array_type->e.record.field_count; -+ unsigned int field_count = array_type->e.record.field_count; -+ size_t fields_offset; - - for (i = 0; i < field_count; ++i) - { -@@ -8309,13 +8314,23 @@ static void write_sm1_type(struct vkd3d_bytecode_buffer *buffer, - put_u32(buffer, field->name_bytecode_offset - ctab_start); - put_u32(buffer, field->type->bytecode_offset - ctab_start); - } -- } - -- type->bytecode_offset = put_u32(buffer, -- vkd3d_make_u32(hlsl_sm1_class(type), hlsl_sm1_base_type(array_type, is_combined_sampler))); -- put_u32(buffer, vkd3d_make_u32(type->dimy, type->dimx)); -- put_u32(buffer, vkd3d_make_u32(array_size, field_count)); -- put_u32(buffer, fields_offset); -+ type->bytecode_offset = put_u32(buffer, vkd3d_make_u32(D3DXPC_STRUCT, D3DXPT_VOID)); -+ put_u32(buffer, vkd3d_make_u32(1, hlsl_type_component_count(array_type))); -+ put_u32(buffer, vkd3d_make_u32(array_size, field_count)); -+ put_u32(buffer, fields_offset); -+ } -+ else -+ { -+ type->bytecode_offset = put_u32(buffer, -+ vkd3d_make_u32(hlsl_sm1_class(type), hlsl_sm1_base_type(array_type, is_combined_sampler))); -+ if (hlsl_is_numeric_type(array_type)) -+ put_u32(buffer, vkd3d_make_u32(array_type->e.numeric.dimy, array_type->e.numeric.dimx)); -+ else -+ put_u32(buffer, vkd3d_make_u32(1, 1)); -+ put_u32(buffer, vkd3d_make_u32(array_size, 0)); -+ put_u32(buffer, 1); -+ } - } - - static void sm1_sort_extern(struct list *sorted, struct hlsl_ir_var *to_sort) -@@ -8457,7 +8472,7 @@ static void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffe - { - uint32_t u; - float f; -- } uni; -+ } uni = {0}; - - switch (comp_type->e.numeric.type) - { -@@ -8481,9 +8496,6 @@ static void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffe - case HLSL_TYPE_FLOAT: - uni.u = var->default_values[k].number.u; - break; -- -- default: -- vkd3d_unreachable(); - } - - set_u32(buffer, default_value_offset + comp_offset * sizeof(uint32_t), uni.u); -@@ -8570,7 +8582,7 @@ static void sm4_generate_vsir_instr_dcl_semantic(struct hlsl_ctx *ctx, struct vs - { - if (has_idx) - idx = var->semantic.index; -- write_mask = (1u << var->data_type->dimx) - 1; -+ write_mask = (1u << var->data_type->e.numeric.dimx) - 1; - } - else - { -@@ -8706,7 +8718,7 @@ static bool sm4_generate_vsir_instr_expr_cast(struct hlsl_ctx *ctx, - } one = { .f = 1.0 }; - - /* Narrowing casts were already lowered. */ -- VKD3D_ASSERT(src_type->dimx == dst_type->dimx); -+ VKD3D_ASSERT(src_type->e.numeric.dimx == dst_type->e.numeric.dimx); - - switch (dst_type->e.numeric.type) - { -@@ -8734,9 +8746,6 @@ static bool sm4_generate_vsir_instr_expr_cast(struct hlsl_ctx *ctx, - case HLSL_TYPE_DOUBLE: - hlsl_fixme(ctx, &expr->node.loc, "SM4 cast from double to float."); - return false; -- -- default: -- vkd3d_unreachable(); - } - break; - -@@ -8760,9 +8769,6 @@ static bool sm4_generate_vsir_instr_expr_cast(struct hlsl_ctx *ctx, - case HLSL_TYPE_DOUBLE: - hlsl_fixme(ctx, &expr->node.loc, "SM4 cast from double to int."); - return false; -- -- default: -- vkd3d_unreachable(); - } - break; - -@@ -8786,9 +8792,6 @@ static bool sm4_generate_vsir_instr_expr_cast(struct hlsl_ctx *ctx, - case HLSL_TYPE_DOUBLE: - hlsl_fixme(ctx, &expr->node.loc, "SM4 cast from double to uint."); - return false; -- -- default: -- vkd3d_unreachable(); - } - break; - -@@ -8798,9 +8801,10 @@ static bool sm4_generate_vsir_instr_expr_cast(struct hlsl_ctx *ctx, - - case HLSL_TYPE_BOOL: - /* Casts to bool should have already been lowered. */ -- default: -- vkd3d_unreachable(); -+ break; - } -+ -+ vkd3d_unreachable(); - } - - static void sm4_generate_vsir_expr_with_two_destinations(struct hlsl_ctx *ctx, struct vsir_program *program, -@@ -8855,7 +8859,7 @@ static void sm4_generate_vsir_rcp_using_div(struct hlsl_ctx *ctx, - value.u[2].f = 1.0f; - value.u[3].f = 1.0f; - vsir_src_from_hlsl_constant_value(&ins->src[0], ctx, &value, -- VKD3D_DATA_FLOAT, instr->data_type->dimx, dst_param->write_mask); -+ VKD3D_DATA_FLOAT, instr->data_type->e.numeric.dimx, dst_param->write_mask); - - vsir_src_from_hlsl_node(&ins->src[1], ctx, operand, dst_param->write_mask); - } -@@ -9085,7 +9089,7 @@ static bool sm4_generate_vsir_instr_expr(struct hlsl_ctx *ctx, - switch (dst_type->e.numeric.type) - { - case HLSL_TYPE_FLOAT: -- switch (expr->operands[0].node->data_type->dimx) -+ switch (expr->operands[0].node->data_type->e.numeric.dimx) - { - case 4: - generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DP4, 0, 0, false); -@@ -9380,10 +9384,10 @@ static bool sm4_generate_vsir_instr_load(struct hlsl_ctx *ctx, struct vsir_progr - - memset(&value, 0xff, sizeof(value)); - vsir_src_from_hlsl_constant_value(&ins->src[1], ctx, &value, -- VKD3D_DATA_UINT, type->dimx, dst_param->write_mask); -+ VKD3D_DATA_UINT, type->e.numeric.dimx, dst_param->write_mask); - memset(&value, 0x00, sizeof(value)); - vsir_src_from_hlsl_constant_value(&ins->src[2], ctx, &value, -- VKD3D_DATA_UINT, type->dimx, dst_param->write_mask); -+ VKD3D_DATA_UINT, type->e.numeric.dimx, dst_param->write_mask); - } - else - { -@@ -9426,7 +9430,7 @@ static bool sm4_generate_vsir_instr_resource_store(struct hlsl_ctx *ctx, - if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_STORE_RAW, 1, 2))) - return false; - -- writemask = vkd3d_write_mask_from_component_count(value->data_type->dimx); -+ writemask = vkd3d_write_mask_from_component_count(value->data_type->e.numeric.dimx); - if (!sm4_generate_vsir_init_dst_param_from_deref(ctx, program, - &ins->dst[0], &store->resource, &instr->loc, writemask)) - return false; -@@ -9458,9 +9462,9 @@ static bool sm4_generate_vsir_validate_texel_offset_aoffimmi(const struct hlsl_i - - if (offset->value.u[0].i < -8 || offset->value.u[0].i > 7) - return false; -- if (offset->node.data_type->dimx > 1 && (offset->value.u[1].i < -8 || offset->value.u[1].i > 7)) -+ if (offset->node.data_type->e.numeric.dimx > 1 && (offset->value.u[1].i < -8 || offset->value.u[1].i > 7)) - return false; -- if (offset->node.data_type->dimx > 2 && (offset->value.u[2].i < -8 || offset->value.u[2].i > 7)) -+ if (offset->node.data_type->e.numeric.dimx > 2 && (offset->value.u[2].i < -8 || offset->value.u[2].i > 7)) - return false; - return true; - } -@@ -9477,9 +9481,9 @@ static void sm4_generate_vsir_encode_texel_offset_as_aoffimmi( - ins->texel_offset.u = offset->value.u[0].i; - ins->texel_offset.v = 0; - ins->texel_offset.w = 0; -- if (offset->node.data_type->dimx > 1) -+ if (offset->node.data_type->e.numeric.dimx > 1) - ins->texel_offset.v = offset->value.u[1].i; -- if (offset->node.data_type->dimx > 2) -+ if (offset->node.data_type->e.numeric.dimx > 2) - ins->texel_offset.w = offset->value.u[2].i; - } - -@@ -9883,7 +9887,7 @@ static void sm4_generate_vsir_instr_if(struct hlsl_ctx *ctx, struct vsir_program - struct hlsl_ir_node *instr = &iff->node; - struct vkd3d_shader_instruction *ins; - -- VKD3D_ASSERT(iff->condition.node->data_type->dimx == 1); -+ VKD3D_ASSERT(iff->condition.node->data_type->e.numeric.dimx == 1); - - if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_IF, 0, 1))) - return; -@@ -10106,7 +10110,7 @@ static const char *string_skip_tag(const char *string) - return string; - } - --void sm4_free_extern_resources(struct extern_resource *extern_resources, unsigned int count) -+static void sm4_free_extern_resources(struct extern_resource *extern_resources, unsigned int count) - { - unsigned int i; - -@@ -10117,7 +10121,7 @@ void sm4_free_extern_resources(struct extern_resource *extern_resources, unsigne - vkd3d_free(extern_resources); - } - --struct extern_resource *sm4_get_extern_resources(struct hlsl_ctx *ctx, unsigned int *count) -+static struct extern_resource *sm4_get_extern_resources(struct hlsl_ctx *ctx, unsigned int *count) - { - bool separate_components = ctx->profile->major_version == 5 && ctx->profile->minor_version == 0; - struct extern_resource *extern_resources = NULL; -@@ -10280,7 +10284,8 @@ struct extern_resource *sm4_get_extern_resources(struct hlsl_ctx *ctx, unsigned - ++*count; - } - -- qsort(extern_resources, *count, sizeof(*extern_resources), sm4_compare_extern_resources); -+ if (extern_resources) -+ qsort(extern_resources, *count, sizeof(*extern_resources), sm4_compare_extern_resources); - - return extern_resources; - } -@@ -10454,15 +10459,13 @@ static enum vkd3d_data_type sm4_generate_vsir_get_format_type(const struct hlsl_ - - case HLSL_TYPE_INT: - return VKD3D_DATA_INT; -- break; - - case HLSL_TYPE_BOOL: - case HLSL_TYPE_UINT: - return VKD3D_DATA_UINT; -- -- default: -- vkd3d_unreachable(); - } -+ -+ vkd3d_unreachable(); - } - - static void sm4_generate_vsir_add_dcl_texture(struct hlsl_ctx *ctx, -@@ -10651,6 +10654,510 @@ static void sm4_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl - generate_vsir_scan_global_flags(ctx, program, func); - } - -+/* For some reason, for matrices, values from default value initializers end -+ * up in different components than from regular initializers. Default value -+ * initializers fill the matrix in vertical reading order -+ * (left-to-right top-to-bottom) instead of regular reading order -+ * (top-to-bottom left-to-right), so they have to be adjusted. An exception is -+ * that the order of matrix initializers for function parameters are row-major -+ * (top-to-bottom left-to-right). */ -+static unsigned int get_component_index_from_default_initializer_index(struct hlsl_type *type, unsigned int index) -+{ -+ unsigned int element_comp_count, element, x, y, i; -+ unsigned int base = 0; -+ -+ switch (type->class) -+ { -+ case HLSL_CLASS_MATRIX: -+ x = index / type->e.numeric.dimy; -+ y = index % type->e.numeric.dimy; -+ return y * type->e.numeric.dimx + x; -+ -+ case HLSL_CLASS_ARRAY: -+ element_comp_count = hlsl_type_component_count(type->e.array.type); -+ element = index / element_comp_count; -+ base = element * element_comp_count; -+ return base + get_component_index_from_default_initializer_index(type->e.array.type, index - base); -+ -+ case HLSL_CLASS_STRUCT: -+ for (i = 0; i < type->e.record.field_count; ++i) -+ { -+ struct hlsl_type *field_type = type->e.record.fields[i].type; -+ -+ element_comp_count = hlsl_type_component_count(field_type); -+ if (index - base < element_comp_count) -+ return base + get_component_index_from_default_initializer_index(field_type, index - base); -+ base += element_comp_count; -+ } -+ break; -+ -+ default: -+ return index; -+ } -+ -+ vkd3d_unreachable(); -+} -+ -+static D3D_SRV_DIMENSION sm4_rdef_resource_dimension(const struct hlsl_type *type) -+{ -+ switch (type->sampler_dim) -+ { -+ case HLSL_SAMPLER_DIM_1D: -+ return D3D_SRV_DIMENSION_TEXTURE1D; -+ case HLSL_SAMPLER_DIM_2D: -+ return D3D_SRV_DIMENSION_TEXTURE2D; -+ case HLSL_SAMPLER_DIM_3D: -+ return D3D_SRV_DIMENSION_TEXTURE3D; -+ case HLSL_SAMPLER_DIM_CUBE: -+ return D3D_SRV_DIMENSION_TEXTURECUBE; -+ case HLSL_SAMPLER_DIM_1DARRAY: -+ return D3D_SRV_DIMENSION_TEXTURE1DARRAY; -+ case HLSL_SAMPLER_DIM_2DARRAY: -+ return D3D_SRV_DIMENSION_TEXTURE2DARRAY; -+ case HLSL_SAMPLER_DIM_2DMS: -+ return D3D_SRV_DIMENSION_TEXTURE2DMS; -+ case HLSL_SAMPLER_DIM_2DMSARRAY: -+ return D3D_SRV_DIMENSION_TEXTURE2DMSARRAY; -+ case HLSL_SAMPLER_DIM_CUBEARRAY: -+ return D3D_SRV_DIMENSION_TEXTURECUBEARRAY; -+ case HLSL_SAMPLER_DIM_BUFFER: -+ case HLSL_SAMPLER_DIM_RAW_BUFFER: -+ case HLSL_SAMPLER_DIM_STRUCTURED_BUFFER: -+ return D3D_SRV_DIMENSION_BUFFER; -+ default: -+ break; -+ } -+ -+ vkd3d_unreachable(); -+} -+ -+static enum D3D_RESOURCE_RETURN_TYPE sm4_data_type(const struct hlsl_type *type) -+{ -+ const struct hlsl_type *format = type->e.resource.format; -+ -+ switch (format->e.numeric.type) -+ { -+ case HLSL_TYPE_DOUBLE: -+ return D3D_RETURN_TYPE_DOUBLE; -+ -+ case HLSL_TYPE_FLOAT: -+ case HLSL_TYPE_HALF: -+ if (format->modifiers & HLSL_MODIFIER_UNORM) -+ return D3D_RETURN_TYPE_UNORM; -+ if (format->modifiers & HLSL_MODIFIER_SNORM) -+ return D3D_RETURN_TYPE_SNORM; -+ return D3D_RETURN_TYPE_FLOAT; -+ -+ case HLSL_TYPE_INT: -+ return D3D_RETURN_TYPE_SINT; -+ break; -+ -+ case HLSL_TYPE_BOOL: -+ case HLSL_TYPE_UINT: -+ return D3D_RETURN_TYPE_UINT; -+ } -+ -+ vkd3d_unreachable(); -+} -+ -+static D3D_SHADER_INPUT_TYPE sm4_resource_type(const struct hlsl_type *type) -+{ -+ switch (type->class) -+ { -+ case HLSL_CLASS_SAMPLER: -+ return D3D_SIT_SAMPLER; -+ case HLSL_CLASS_TEXTURE: -+ return D3D_SIT_TEXTURE; -+ case HLSL_CLASS_UAV: -+ return D3D_SIT_UAV_RWTYPED; -+ default: -+ break; -+ } -+ -+ vkd3d_unreachable(); -+} -+ -+static D3D_SHADER_VARIABLE_CLASS sm4_class(const struct hlsl_type *type) -+{ -+ switch (type->class) -+ { -+ case HLSL_CLASS_MATRIX: -+ VKD3D_ASSERT(type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK); -+ if (type->modifiers & HLSL_MODIFIER_COLUMN_MAJOR) -+ return D3D_SVC_MATRIX_COLUMNS; -+ else -+ return D3D_SVC_MATRIX_ROWS; -+ case HLSL_CLASS_SCALAR: -+ return D3D_SVC_SCALAR; -+ case HLSL_CLASS_VECTOR: -+ return D3D_SVC_VECTOR; -+ -+ case HLSL_CLASS_ARRAY: -+ case HLSL_CLASS_DEPTH_STENCIL_STATE: -+ case HLSL_CLASS_DEPTH_STENCIL_VIEW: -+ case HLSL_CLASS_EFFECT_GROUP: -+ case HLSL_CLASS_ERROR: -+ case HLSL_CLASS_STRUCT: -+ case HLSL_CLASS_PASS: -+ case HLSL_CLASS_PIXEL_SHADER: -+ case HLSL_CLASS_RASTERIZER_STATE: -+ case HLSL_CLASS_RENDER_TARGET_VIEW: -+ case HLSL_CLASS_SAMPLER: -+ case HLSL_CLASS_STRING: -+ case HLSL_CLASS_TECHNIQUE: -+ case HLSL_CLASS_TEXTURE: -+ case HLSL_CLASS_UAV: -+ case HLSL_CLASS_VERTEX_SHADER: -+ case HLSL_CLASS_VOID: -+ case HLSL_CLASS_CONSTANT_BUFFER: -+ case HLSL_CLASS_COMPUTE_SHADER: -+ case HLSL_CLASS_DOMAIN_SHADER: -+ case HLSL_CLASS_HULL_SHADER: -+ case HLSL_CLASS_GEOMETRY_SHADER: -+ case HLSL_CLASS_BLEND_STATE: -+ case HLSL_CLASS_STREAM_OUTPUT: -+ case HLSL_CLASS_NULL: -+ break; -+ } -+ -+ vkd3d_unreachable(); -+} -+ -+static D3D_SHADER_VARIABLE_TYPE sm4_base_type(const struct hlsl_type *type) -+{ -+ switch (type->e.numeric.type) -+ { -+ case HLSL_TYPE_BOOL: -+ return D3D_SVT_BOOL; -+ case HLSL_TYPE_DOUBLE: -+ return D3D_SVT_DOUBLE; -+ case HLSL_TYPE_FLOAT: -+ case HLSL_TYPE_HALF: -+ return D3D_SVT_FLOAT; -+ case HLSL_TYPE_INT: -+ return D3D_SVT_INT; -+ case HLSL_TYPE_UINT: -+ return D3D_SVT_UINT; -+ } -+ -+ vkd3d_unreachable(); -+} -+ -+static void write_sm4_type(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, struct hlsl_type *type) -+{ -+ const struct hlsl_type *array_type = hlsl_get_multiarray_element_type(type); -+ const char *name = array_type->name ? array_type->name : ""; -+ const struct hlsl_profile_info *profile = ctx->profile; -+ unsigned int array_size = 0; -+ size_t name_offset = 0; -+ size_t i; -+ -+ if (type->bytecode_offset) -+ return; -+ -+ if (profile->major_version >= 5) -+ name_offset = put_string(buffer, name); -+ -+ if (type->class == HLSL_CLASS_ARRAY) -+ array_size = hlsl_get_multiarray_size(type); -+ -+ if (array_type->class == HLSL_CLASS_STRUCT) -+ { -+ unsigned int field_count = 0; -+ size_t fields_offset = 0; -+ -+ for (i = 0; i < array_type->e.record.field_count; ++i) -+ { -+ struct hlsl_struct_field *field = &array_type->e.record.fields[i]; -+ -+ if (!field->type->reg_size[HLSL_REGSET_NUMERIC]) -+ continue; -+ -+ field->name_bytecode_offset = put_string(buffer, field->name); -+ write_sm4_type(ctx, buffer, field->type); -+ ++field_count; -+ } -+ -+ fields_offset = bytecode_align(buffer); -+ -+ for (i = 0; i < array_type->e.record.field_count; ++i) -+ { -+ struct hlsl_struct_field *field = &array_type->e.record.fields[i]; -+ -+ if (!field->type->reg_size[HLSL_REGSET_NUMERIC]) -+ continue; -+ -+ put_u32(buffer, field->name_bytecode_offset); -+ put_u32(buffer, field->type->bytecode_offset); -+ put_u32(buffer, field->reg_offset[HLSL_REGSET_NUMERIC] * sizeof(float)); -+ } -+ type->bytecode_offset = put_u32(buffer, vkd3d_make_u32(D3D_SVC_STRUCT, D3D_SVT_VOID)); -+ put_u32(buffer, vkd3d_make_u32(1, hlsl_type_component_count(array_type))); -+ put_u32(buffer, vkd3d_make_u32(array_size, field_count)); -+ put_u32(buffer, fields_offset); -+ } -+ else -+ { -+ VKD3D_ASSERT(array_type->class <= HLSL_CLASS_LAST_NUMERIC); -+ type->bytecode_offset = put_u32(buffer, vkd3d_make_u32(sm4_class(array_type), sm4_base_type(array_type))); -+ put_u32(buffer, vkd3d_make_u32(array_type->e.numeric.dimy, array_type->e.numeric.dimx)); -+ put_u32(buffer, vkd3d_make_u32(array_size, 0)); -+ put_u32(buffer, 1); -+ } -+ -+ if (profile->major_version >= 5) -+ { -+ put_u32(buffer, 0); /* FIXME: unknown */ -+ put_u32(buffer, 0); /* FIXME: unknown */ -+ put_u32(buffer, 0); /* FIXME: unknown */ -+ put_u32(buffer, 0); /* FIXME: unknown */ -+ put_u32(buffer, name_offset); -+ } -+} -+ -+static void sm4_generate_rdef(struct hlsl_ctx *ctx, struct vkd3d_shader_code *rdef) -+{ -+ uint32_t binding_desc_size = (hlsl_version_ge(ctx, 5, 1) ? 10 : 8) * sizeof(uint32_t); -+ size_t cbuffers_offset, resources_offset, creator_offset, string_offset; -+ unsigned int cbuffer_count = 0, extern_resources_count, i, j; -+ size_t cbuffer_position, resource_position, creator_position; -+ const struct hlsl_profile_info *profile = ctx->profile; -+ struct vkd3d_bytecode_buffer buffer = {0}; -+ struct extern_resource *extern_resources; -+ const struct hlsl_buffer *cbuffer; -+ const struct hlsl_ir_var *var; -+ -+ static const uint16_t target_types[] = -+ { -+ 0xffff, /* PIXEL */ -+ 0xfffe, /* VERTEX */ -+ 0x4753, /* GEOMETRY */ -+ 0x4853, /* HULL */ -+ 0x4453, /* DOMAIN */ -+ 0x4353, /* COMPUTE */ -+ }; -+ -+ extern_resources = sm4_get_extern_resources(ctx, &extern_resources_count); -+ -+ LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) -+ { -+ if (cbuffer->reg.allocated) -+ ++cbuffer_count; -+ } -+ -+ put_u32(&buffer, cbuffer_count); -+ cbuffer_position = put_u32(&buffer, 0); -+ put_u32(&buffer, extern_resources_count); -+ resource_position = put_u32(&buffer, 0); -+ put_u32(&buffer, vkd3d_make_u32(vkd3d_make_u16(profile->minor_version, profile->major_version), -+ target_types[profile->type])); -+ put_u32(&buffer, 0); /* FIXME: compilation flags */ -+ creator_position = put_u32(&buffer, 0); -+ -+ if (profile->major_version >= 5) -+ { -+ put_u32(&buffer, hlsl_version_ge(ctx, 5, 1) ? TAG_RD11_REVERSE : TAG_RD11); -+ put_u32(&buffer, 15 * sizeof(uint32_t)); /* size of RDEF header including this header */ -+ put_u32(&buffer, 6 * sizeof(uint32_t)); /* size of buffer desc */ -+ put_u32(&buffer, binding_desc_size); /* size of binding desc */ -+ put_u32(&buffer, 10 * sizeof(uint32_t)); /* size of variable desc */ -+ put_u32(&buffer, 9 * sizeof(uint32_t)); /* size of type desc */ -+ put_u32(&buffer, 3 * sizeof(uint32_t)); /* size of member desc */ -+ put_u32(&buffer, 0); /* unknown; possibly a null terminator */ -+ } -+ -+ /* Bound resources. */ -+ -+ resources_offset = bytecode_align(&buffer); -+ set_u32(&buffer, resource_position, resources_offset); -+ -+ for (i = 0; i < extern_resources_count; ++i) -+ { -+ const struct extern_resource *resource = &extern_resources[i]; -+ uint32_t flags = 0; -+ -+ if (resource->is_user_packed) -+ flags |= D3D_SIF_USERPACKED; -+ -+ put_u32(&buffer, 0); /* name */ -+ if (resource->buffer) -+ put_u32(&buffer, resource->buffer->type == HLSL_BUFFER_CONSTANT ? D3D_SIT_CBUFFER : D3D_SIT_TBUFFER); -+ else -+ put_u32(&buffer, sm4_resource_type(resource->component_type)); -+ if (resource->regset == HLSL_REGSET_TEXTURES || resource->regset == HLSL_REGSET_UAVS) -+ { -+ unsigned int dimx = resource->component_type->e.resource.format->e.numeric.dimx; -+ -+ put_u32(&buffer, sm4_data_type(resource->component_type)); -+ put_u32(&buffer, sm4_rdef_resource_dimension(resource->component_type)); -+ put_u32(&buffer, ~0u); /* FIXME: multisample count */ -+ flags |= (dimx - 1) << VKD3D_SM4_SIF_TEXTURE_COMPONENTS_SHIFT; -+ } -+ else -+ { -+ put_u32(&buffer, 0); -+ put_u32(&buffer, 0); -+ put_u32(&buffer, 0); -+ } -+ put_u32(&buffer, resource->index); -+ put_u32(&buffer, resource->bind_count); -+ put_u32(&buffer, flags); -+ -+ if (hlsl_version_ge(ctx, 5, 1)) -+ { -+ put_u32(&buffer, resource->space); -+ put_u32(&buffer, resource->id); -+ } -+ } -+ -+ for (i = 0; i < extern_resources_count; ++i) -+ { -+ const struct extern_resource *resource = &extern_resources[i]; -+ -+ string_offset = put_string(&buffer, resource->name); -+ set_u32(&buffer, resources_offset + i * binding_desc_size, string_offset); -+ } -+ -+ /* Buffers. */ -+ -+ cbuffers_offset = bytecode_align(&buffer); -+ set_u32(&buffer, cbuffer_position, cbuffers_offset); -+ LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) -+ { -+ unsigned int var_count = 0; -+ -+ if (!cbuffer->reg.allocated) -+ continue; -+ -+ LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) -+ { -+ if (var->is_uniform && var->buffer == cbuffer && var->data_type->reg_size[HLSL_REGSET_NUMERIC]) -+ ++var_count; -+ } -+ -+ put_u32(&buffer, 0); /* name */ -+ put_u32(&buffer, var_count); -+ put_u32(&buffer, 0); /* variable offset */ -+ put_u32(&buffer, align(cbuffer->size, 4) * sizeof(float)); -+ put_u32(&buffer, 0); /* FIXME: flags */ -+ put_u32(&buffer, cbuffer->type == HLSL_BUFFER_CONSTANT ? D3D_CT_CBUFFER : D3D_CT_TBUFFER); -+ } -+ -+ i = 0; -+ LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) -+ { -+ if (!cbuffer->reg.allocated) -+ continue; -+ -+ string_offset = put_string(&buffer, cbuffer->name); -+ set_u32(&buffer, cbuffers_offset + i++ * 6 * sizeof(uint32_t), string_offset); -+ } -+ -+ i = 0; -+ LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) -+ { -+ size_t vars_start = bytecode_align(&buffer); -+ -+ if (!cbuffer->reg.allocated) -+ continue; -+ -+ set_u32(&buffer, cbuffers_offset + (i++ * 6 + 2) * sizeof(uint32_t), vars_start); -+ -+ LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) -+ { -+ uint32_t flags = 0; -+ -+ if (!var->is_uniform || var->buffer != cbuffer || !var->data_type->reg_size[HLSL_REGSET_NUMERIC]) -+ continue; -+ -+ if (var->is_read) -+ flags |= D3D_SVF_USED; -+ -+ put_u32(&buffer, 0); /* name */ -+ put_u32(&buffer, var->buffer_offset * sizeof(float)); -+ put_u32(&buffer, var->data_type->reg_size[HLSL_REGSET_NUMERIC] * sizeof(float)); -+ put_u32(&buffer, flags); -+ put_u32(&buffer, 0); /* type */ -+ put_u32(&buffer, 0); /* default value */ -+ -+ if (profile->major_version >= 5) -+ { -+ put_u32(&buffer, 0); /* texture start */ -+ put_u32(&buffer, 0); /* texture count */ -+ put_u32(&buffer, 0); /* sampler start */ -+ put_u32(&buffer, 0); /* sampler count */ -+ } -+ } -+ -+ j = 0; -+ LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) -+ { -+ const unsigned int var_size = (profile->major_version >= 5 ? 10 : 6); -+ size_t var_offset = vars_start + j * var_size * sizeof(uint32_t); -+ -+ if (!var->is_uniform || var->buffer != cbuffer || !var->data_type->reg_size[HLSL_REGSET_NUMERIC]) -+ continue; -+ -+ string_offset = put_string(&buffer, var->name); -+ set_u32(&buffer, var_offset, string_offset); -+ write_sm4_type(ctx, &buffer, var->data_type); -+ set_u32(&buffer, var_offset + 4 * sizeof(uint32_t), var->data_type->bytecode_offset); -+ -+ if (var->default_values) -+ { -+ unsigned int reg_size = var->data_type->reg_size[HLSL_REGSET_NUMERIC]; -+ unsigned int comp_count = hlsl_type_component_count(var->data_type); -+ unsigned int default_value_offset; -+ unsigned int k; -+ -+ default_value_offset = bytecode_reserve_bytes(&buffer, reg_size * sizeof(uint32_t)); -+ set_u32(&buffer, var_offset + 5 * sizeof(uint32_t), default_value_offset); -+ -+ for (k = 0; k < comp_count; ++k) -+ { -+ struct hlsl_type *comp_type = hlsl_type_get_component_type(ctx, var->data_type, k); -+ unsigned int comp_offset, comp_index; -+ enum hlsl_regset regset; -+ -+ if (comp_type->class == HLSL_CLASS_STRING) -+ { -+ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -+ "Cannot write string default value."); -+ continue; -+ } -+ -+ comp_index = get_component_index_from_default_initializer_index(var->data_type, k); -+ comp_offset = hlsl_type_get_component_offset(ctx, var->data_type, comp_index, ®set); -+ if (regset == HLSL_REGSET_NUMERIC) -+ { -+ if (comp_type->e.numeric.type == HLSL_TYPE_DOUBLE) -+ hlsl_fixme(ctx, &var->loc, "Write double default values."); -+ -+ set_u32(&buffer, default_value_offset + comp_offset * sizeof(uint32_t), -+ var->default_values[k].number.u); -+ } -+ } -+ } -+ -+ ++j; -+ } -+ } -+ -+ creator_offset = put_string(&buffer, vkd3d_shader_get_version(NULL, NULL)); -+ set_u32(&buffer, creator_position, creator_offset); -+ -+ sm4_free_extern_resources(extern_resources, extern_resources_count); -+ -+ if (buffer.status) -+ { -+ vkd3d_free(buffer.data); -+ ctx->result = buffer.status; -+ return; -+ } -+ rdef->code = buffer.data; -+ rdef->size = buffer.size; -+} -+ - static bool loop_unrolling_generate_const_bool_store(struct hlsl_ctx *ctx, struct hlsl_ir_var *var, - bool val, struct hlsl_block *block, struct vkd3d_shader_location *loc) - { -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c -index cd7cd2fe6a3..e8dd4d62ae2 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c -@@ -30,7 +30,7 @@ static bool fold_abs(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, - - VKD3D_ASSERT(type == src->node.data_type->e.numeric.type); - -- for (k = 0; k < dst_type->dimx; ++k) -+ for (k = 0; k < dst_type->e.numeric.dimx; ++k) - { - switch (type) - { -@@ -121,7 +121,7 @@ static bool fold_bit_not(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, - - VKD3D_ASSERT(type == src->node.data_type->e.numeric.type); - -- for (k = 0; k < dst_type->dimx; ++k) -+ for (k = 0; k < dst_type->e.numeric.dimx; ++k) - { - switch (type) - { -@@ -143,20 +143,20 @@ static bool fold_cast(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, - const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src) - { - unsigned int k; -- uint32_t u; -- int32_t i; -- double d; -- float f; -+ uint32_t u = 0; -+ double d = 0.0; -+ float f = 0.0f; -+ int32_t i = 0; - -- if (dst_type->dimx != src->node.data_type->dimx -- || dst_type->dimy != src->node.data_type->dimy) -+ if (dst_type->e.numeric.dimx != src->node.data_type->e.numeric.dimx -+ || dst_type->e.numeric.dimy != src->node.data_type->e.numeric.dimy) - { - FIXME("Cast from %s to %s.\n", debug_hlsl_type(ctx, src->node.data_type), - debug_hlsl_type(ctx, dst_type)); - return false; - } - -- for (k = 0; k < dst_type->dimx; ++k) -+ for (k = 0; k < dst_type->e.numeric.dimx; ++k) - { - switch (src->node.data_type->e.numeric.type) - { -@@ -195,9 +195,6 @@ static bool fold_cast(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, - f = !!src->value.u[k].u; - d = !!src->value.u[k].u; - break; -- -- default: -- vkd3d_unreachable(); - } - - switch (dst_type->e.numeric.type) -@@ -222,9 +219,6 @@ static bool fold_cast(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, - case HLSL_TYPE_BOOL: - dst->u[k].u = u ? ~0u : 0u; - break; -- -- default: -- vkd3d_unreachable(); - } - } - return true; -@@ -238,7 +232,7 @@ static bool fold_ceil(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, - - VKD3D_ASSERT(type == src->node.data_type->e.numeric.type); - -- for (k = 0; k < dst_type->dimx; ++k) -+ for (k = 0; k < dst_type->e.numeric.dimx; ++k) - { - switch (type) - { -@@ -264,7 +258,7 @@ static bool fold_exp2(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, - - VKD3D_ASSERT(type == src->node.data_type->e.numeric.type); - -- for (k = 0; k < dst_type->dimx; ++k) -+ for (k = 0; k < dst_type->e.numeric.dimx; ++k) - { - switch (type) - { -@@ -290,7 +284,7 @@ static bool fold_floor(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, - - VKD3D_ASSERT(type == src->node.data_type->e.numeric.type); - -- for (k = 0; k < dst_type->dimx; ++k) -+ for (k = 0; k < dst_type->e.numeric.dimx; ++k) - { - switch (type) - { -@@ -317,7 +311,7 @@ static bool fold_fract(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, - - VKD3D_ASSERT(type == src->node.data_type->e.numeric.type); - -- for (k = 0; k < dst_type->dimx; ++k) -+ for (k = 0; k < dst_type->e.numeric.dimx; ++k) - { - switch (type) - { -@@ -343,7 +337,7 @@ static bool fold_log2(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, con - - VKD3D_ASSERT(type == src->node.data_type->e.numeric.type); - -- for (k = 0; k < dst_type->dimx; ++k) -+ for (k = 0; k < dst_type->e.numeric.dimx; ++k) - { - switch (type) - { -@@ -388,7 +382,7 @@ static bool fold_neg(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, - - VKD3D_ASSERT(type == src->node.data_type->e.numeric.type); - -- for (k = 0; k < dst_type->dimx; ++k) -+ for (k = 0; k < dst_type->e.numeric.dimx; ++k) - { - switch (type) - { -@@ -422,7 +416,7 @@ static bool fold_not(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, - - VKD3D_ASSERT(type == src->node.data_type->e.numeric.type); - -- for (k = 0; k < dst_type->dimx; ++k) -+ for (k = 0; k < dst_type->e.numeric.dimx; ++k) - { - switch (type) - { -@@ -446,7 +440,7 @@ static bool fold_rcp(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons - - VKD3D_ASSERT(type == src->node.data_type->e.numeric.type); - -- for (k = 0; k < dst_type->dimx; ++k) -+ for (k = 0; k < dst_type->e.numeric.dimx; ++k) - { - switch (type) - { -@@ -491,7 +485,7 @@ static bool fold_rsq(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons - - VKD3D_ASSERT(type == src->node.data_type->e.numeric.type); - -- for (k = 0; k < dst_type->dimx; ++k) -+ for (k = 0; k < dst_type->e.numeric.dimx; ++k) - { - switch (type) - { -@@ -531,7 +525,7 @@ static bool fold_sat(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons - - VKD3D_ASSERT(type == src->node.data_type->e.numeric.type); - -- for (k = 0; k < dst_type->dimx; ++k) -+ for (k = 0; k < dst_type->e.numeric.dimx; ++k) - { - switch (type) - { -@@ -557,7 +551,7 @@ static bool fold_sqrt(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, con - - VKD3D_ASSERT(type == src->node.data_type->e.numeric.type); - -- for (k = 0; k < dst_type->dimx; ++k) -+ for (k = 0; k < dst_type->e.numeric.dimx; ++k) - { - switch (type) - { -@@ -603,7 +597,7 @@ static bool fold_add(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons - VKD3D_ASSERT(type == src1->node.data_type->e.numeric.type); - VKD3D_ASSERT(type == src2->node.data_type->e.numeric.type); - -- for (k = 0; k < dst_type->dimx; ++k) -+ for (k = 0; k < dst_type->e.numeric.dimx; ++k) - { - switch (type) - { -@@ -640,7 +634,7 @@ static bool fold_and(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons - VKD3D_ASSERT(type == src1->node.data_type->e.numeric.type); - VKD3D_ASSERT(type == src2->node.data_type->e.numeric.type); - -- for (k = 0; k < dst_type->dimx; ++k) -+ for (k = 0; k < dst_type->e.numeric.dimx; ++k) - { - switch (type) - { -@@ -667,7 +661,7 @@ static bool fold_or(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const - VKD3D_ASSERT(type == src1->node.data_type->e.numeric.type); - VKD3D_ASSERT(type == src2->node.data_type->e.numeric.type); - -- for (k = 0; k < dst_type->dimx; ++k) -+ for (k = 0; k < dst_type->e.numeric.dimx; ++k) - { - switch (type) - { -@@ -694,7 +688,7 @@ static bool fold_bit_xor(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, - VKD3D_ASSERT(type == src1->node.data_type->e.numeric.type); - VKD3D_ASSERT(type == src2->node.data_type->e.numeric.type); - -- for (k = 0; k < dst_type->dimx; ++k) -+ for (k = 0; k < dst_type->e.numeric.dimx; ++k) - { - switch (type) - { -@@ -719,10 +713,10 @@ static bool fold_dot(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons - - VKD3D_ASSERT(type == src1->node.data_type->e.numeric.type); - VKD3D_ASSERT(type == src2->node.data_type->e.numeric.type); -- VKD3D_ASSERT(src1->node.data_type->dimx == src2->node.data_type->dimx); -+ VKD3D_ASSERT(src1->node.data_type->e.numeric.dimx == src2->node.data_type->e.numeric.dimx); - - dst->u[0].f = 0.0f; -- for (k = 0; k < src1->node.data_type->dimx; ++k) -+ for (k = 0; k < src1->node.data_type->e.numeric.dimx; ++k) - { - switch (type) - { -@@ -748,11 +742,11 @@ static bool fold_dp2add(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, c - VKD3D_ASSERT(type == src1->node.data_type->e.numeric.type); - VKD3D_ASSERT(type == src2->node.data_type->e.numeric.type); - VKD3D_ASSERT(type == src3->node.data_type->e.numeric.type); -- VKD3D_ASSERT(src1->node.data_type->dimx == src2->node.data_type->dimx); -- VKD3D_ASSERT(src3->node.data_type->dimx == 1); -+ VKD3D_ASSERT(src1->node.data_type->e.numeric.dimx == src2->node.data_type->e.numeric.dimx); -+ VKD3D_ASSERT(src3->node.data_type->e.numeric.dimx == 1); - - dst->u[0].f = src3->value.u[0].f; -- for (k = 0; k < src1->node.data_type->dimx; ++k) -+ for (k = 0; k < src1->node.data_type->e.numeric.dimx; ++k) - { - switch (type) - { -@@ -779,7 +773,7 @@ static bool fold_div(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons - VKD3D_ASSERT(type == src1->node.data_type->e.numeric.type); - VKD3D_ASSERT(type == src2->node.data_type->e.numeric.type); - -- for (k = 0; k < dst_type->dimx; ++k) -+ for (k = 0; k < dst_type->e.numeric.dimx; ++k) - { - switch (type) - { -@@ -846,7 +840,7 @@ static bool fold_equal(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, co - VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); - VKD3D_ASSERT(src1->node.data_type->e.numeric.type == src2->node.data_type->e.numeric.type); - -- for (k = 0; k < dst_type->dimx; ++k) -+ for (k = 0; k < dst_type->e.numeric.dimx; ++k) - { - switch (src1->node.data_type->e.numeric.type) - { -@@ -864,9 +858,6 @@ static bool fold_equal(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, co - case HLSL_TYPE_BOOL: - dst->u[k].u = src1->value.u[k].u == src2->value.u[k].u; - break; -- -- default: -- vkd3d_unreachable(); - } - - dst->u[k].u *= ~0u; -@@ -882,7 +873,7 @@ static bool fold_gequal(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, c - VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); - VKD3D_ASSERT(src1->node.data_type->e.numeric.type == src2->node.data_type->e.numeric.type); - -- for (k = 0; k < dst_type->dimx; ++k) -+ for (k = 0; k < dst_type->e.numeric.dimx; ++k) - { - switch (src1->node.data_type->e.numeric.type) - { -@@ -903,9 +894,6 @@ static bool fold_gequal(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, c - case HLSL_TYPE_BOOL: - dst->u[k].u = src1->value.u[k].u >= src2->value.u[k].u; - break; -- -- default: -- vkd3d_unreachable(); - } - - dst->u[k].u *= ~0u; -@@ -921,7 +909,7 @@ static bool fold_less(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, con - VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); - VKD3D_ASSERT(src1->node.data_type->e.numeric.type == src2->node.data_type->e.numeric.type); - -- for (k = 0; k < dst_type->dimx; ++k) -+ for (k = 0; k < dst_type->e.numeric.dimx; ++k) - { - switch (src1->node.data_type->e.numeric.type) - { -@@ -942,9 +930,6 @@ static bool fold_less(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, con - case HLSL_TYPE_BOOL: - dst->u[k].u = src1->value.u[k].u < src2->value.u[k].u; - break; -- -- default: -- vkd3d_unreachable(); - } - - dst->u[k].u *= ~0u; -@@ -960,16 +945,13 @@ static bool fold_lshift(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, c - VKD3D_ASSERT(dst_type->e.numeric.type == src1->node.data_type->e.numeric.type); - VKD3D_ASSERT(src2->node.data_type->e.numeric.type == HLSL_TYPE_INT); - -- for (k = 0; k < dst_type->dimx; ++k) -+ for (k = 0; k < dst_type->e.numeric.dimx; ++k) - { - unsigned int shift = src2->value.u[k].u % 32; - - switch (src1->node.data_type->e.numeric.type) - { - case HLSL_TYPE_INT: -- dst->u[k].i = src1->value.u[k].i << shift; -- break; -- - case HLSL_TYPE_UINT: - dst->u[k].u = src1->value.u[k].u << shift; - break; -@@ -991,7 +973,7 @@ static bool fold_max(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons - VKD3D_ASSERT(type == src1->node.data_type->e.numeric.type); - VKD3D_ASSERT(type == src2->node.data_type->e.numeric.type); - -- for (k = 0; k < dst_type->dimx; ++k) -+ for (k = 0; k < dst_type->e.numeric.dimx; ++k) - { - switch (type) - { -@@ -1029,7 +1011,7 @@ static bool fold_min(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons - VKD3D_ASSERT(type == src1->node.data_type->e.numeric.type); - VKD3D_ASSERT(type == src2->node.data_type->e.numeric.type); - -- for (k = 0; k < dst_type->dimx; ++k) -+ for (k = 0; k < dst_type->e.numeric.dimx; ++k) - { - switch (type) - { -@@ -1068,7 +1050,7 @@ static bool fold_mod(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons - VKD3D_ASSERT(type == src1->node.data_type->e.numeric.type); - VKD3D_ASSERT(type == src2->node.data_type->e.numeric.type); - -- for (k = 0; k < dst_type->dimx; ++k) -+ for (k = 0; k < dst_type->e.numeric.dimx; ++k) - { - switch (type) - { -@@ -1110,7 +1092,7 @@ static bool fold_mul(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons - VKD3D_ASSERT(type == src1->node.data_type->e.numeric.type); - VKD3D_ASSERT(type == src2->node.data_type->e.numeric.type); - -- for (k = 0; k < dst_type->dimx; ++k) -+ for (k = 0; k < dst_type->e.numeric.dimx; ++k) - { - switch (type) - { -@@ -1144,7 +1126,7 @@ static bool fold_nequal(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, c - VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); - VKD3D_ASSERT(src1->node.data_type->e.numeric.type == src2->node.data_type->e.numeric.type); - -- for (k = 0; k < dst_type->dimx; ++k) -+ for (k = 0; k < dst_type->e.numeric.dimx; ++k) - { - switch (src1->node.data_type->e.numeric.type) - { -@@ -1162,9 +1144,6 @@ static bool fold_nequal(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, c - case HLSL_TYPE_BOOL: - dst->u[k].u = src1->value.u[k].u != src2->value.u[k].u; - break; -- -- default: -- vkd3d_unreachable(); - } - - dst->u[k].u *= ~0u; -@@ -1181,7 +1160,7 @@ static bool fold_ternary(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, - VKD3D_ASSERT(dst_type->e.numeric.type == src3->node.data_type->e.numeric.type); - VKD3D_ASSERT(src1->node.data_type->e.numeric.type == HLSL_TYPE_BOOL); - -- for (k = 0; k < dst_type->dimx; ++k) -+ for (k = 0; k < dst_type->e.numeric.dimx; ++k) - dst->u[k] = src1->value.u[k].u ? src2->value.u[k] : src3->value.u[k]; - - return true; -@@ -1195,7 +1174,7 @@ static bool fold_rshift(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, c - VKD3D_ASSERT(dst_type->e.numeric.type == src1->node.data_type->e.numeric.type); - VKD3D_ASSERT(src2->node.data_type->e.numeric.type == HLSL_TYPE_INT); - -- for (k = 0; k < dst_type->dimx; ++k) -+ for (k = 0; k < dst_type->e.numeric.dimx; ++k) - { - unsigned int shift = src2->value.u[k].u % 32; - -@@ -1403,7 +1382,7 @@ static bool constant_is_zero(struct hlsl_ir_constant *const_arg) - struct hlsl_type *data_type = const_arg->node.data_type; - unsigned int k; - -- for (k = 0; k < data_type->dimx; ++k) -+ for (k = 0; k < data_type->e.numeric.dimx; ++k) - { - switch (data_type->e.numeric.type) - { -@@ -1424,9 +1403,6 @@ static bool constant_is_zero(struct hlsl_ir_constant *const_arg) - if (const_arg->value.u[k].u != 0) - return false; - break; -- -- default: -- return false; - } - } - return true; -@@ -1437,7 +1413,7 @@ static bool constant_is_one(struct hlsl_ir_constant *const_arg) - struct hlsl_type *data_type = const_arg->node.data_type; - unsigned int k; - -- for (k = 0; k < data_type->dimx; ++k) -+ for (k = 0; k < data_type->e.numeric.dimx; ++k) - { - switch (data_type->e.numeric.type) - { -@@ -1462,9 +1438,6 @@ static bool constant_is_one(struct hlsl_ir_constant *const_arg) - if (const_arg->value.u[k].u != ~0) - return false; - break; -- -- default: -- return false; - } - } - return true; -@@ -1589,9 +1562,71 @@ static bool is_op_commutative(enum hlsl_ir_expr_op op) - } - } - -+/* Returns true iff x OPL (y OPR z) = (x OPL y) OPR (x OPL z). */ -+static bool is_op_left_distributive(enum hlsl_ir_expr_op opl, enum hlsl_ir_expr_op opr, enum hlsl_base_type type) -+{ -+ switch (opl) -+ { -+ case HLSL_OP2_BIT_AND: -+ return opr == HLSL_OP2_BIT_OR || opr == HLSL_OP2_BIT_XOR; -+ -+ case HLSL_OP2_BIT_OR: -+ return opr == HLSL_OP2_BIT_AND; -+ -+ case HLSL_OP2_DOT: -+ case HLSL_OP2_MUL: -+ return opr == HLSL_OP2_ADD && (type == HLSL_TYPE_INT || type == HLSL_TYPE_UINT); -+ -+ case HLSL_OP2_MAX: -+ return opr == HLSL_OP2_MIN; -+ -+ case HLSL_OP2_MIN: -+ return opr == HLSL_OP2_MAX; -+ -+ default: -+ return false; -+ } -+} -+ -+/* Attempt to collect together the expression (x OPL a) OPR (x OPL b) -> x OPL (a OPR b). */ -+static struct hlsl_ir_node *collect_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, -+ enum hlsl_ir_expr_op opr, struct hlsl_ir_node *node1, struct hlsl_ir_node *node2) -+{ -+ enum hlsl_base_type type = instr->data_type->e.numeric.type; -+ struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {0}; -+ struct hlsl_ir_node *ab, *res; -+ struct hlsl_ir_expr *e1, *e2; -+ enum hlsl_ir_expr_op opl; -+ -+ if (!node1 || !node2 || node1->type != HLSL_IR_EXPR || node2->type != HLSL_IR_EXPR) -+ return NULL; -+ e1 = hlsl_ir_expr(node1); -+ e2 = hlsl_ir_expr(node2); -+ opl = e1->op; -+ -+ if (e2->op != opl || !is_op_left_distributive(opl, opr, type)) -+ return NULL; -+ if (e1->operands[0].node != e2->operands[0].node) -+ return NULL; -+ if (e1->operands[1].node->type != HLSL_IR_CONSTANT || e2->operands[1].node->type != HLSL_IR_CONSTANT) -+ return NULL; -+ -+ if (!(ab = hlsl_new_binary_expr(ctx, opr, e1->operands[1].node, e2->operands[1].node))) -+ return NULL; -+ list_add_before(&instr->entry, &ab->entry); -+ -+ operands[0] = e1->operands[0].node; -+ operands[1] = ab; -+ -+ if (!(res = hlsl_new_expr(ctx, opl, operands, instr->data_type, &instr->loc))) -+ return NULL; -+ list_add_before(&instr->entry, &res->entry); -+ return res; -+} -+ - bool hlsl_normalize_binary_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) - { -- struct hlsl_ir_node *arg1 , *arg2; -+ struct hlsl_ir_node *arg1, *arg2, *tmp; - struct hlsl_ir_expr *expr; - enum hlsl_base_type type; - enum hlsl_ir_expr_op op; -@@ -1612,11 +1647,17 @@ bool hlsl_normalize_binary_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *inst - if (!arg1 || !arg2) - return false; - -+ if ((tmp = collect_exprs(ctx, instr, op, arg1, arg2))) -+ { -+ /* (x OPL a) OPR (x OPL b) -> x OPL (a OPR b) */ -+ hlsl_replace_node(instr, tmp); -+ return true; -+ } -+ - if (is_op_commutative(op) && arg1->type == HLSL_IR_CONSTANT && arg2->type != HLSL_IR_CONSTANT) - { - /* a OP x -> x OP a */ -- struct hlsl_ir_node *tmp = arg1; -- -+ tmp = arg1; - arg1 = arg2; - arg2 = tmp; - progress = true; -@@ -1673,6 +1714,39 @@ bool hlsl_normalize_binary_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *inst - progress = true; - } - -+ if (!progress && e1 && (tmp = collect_exprs(ctx, instr, op, e1->operands[1].node, arg2))) -+ { -+ /* (y OPR (x OPL a)) OPR (x OPL b) -> y OPR (x OPL (a OPR b)) */ -+ arg1 = e1->operands[0].node; -+ arg2 = tmp; -+ progress = true; -+ } -+ -+ if (!progress && is_op_commutative(op) && e1 -+ && (tmp = collect_exprs(ctx, instr, op, e1->operands[0].node, arg2))) -+ { -+ /* ((x OPL a) OPR y) OPR (x OPL b) -> (x OPL (a OPR b)) OPR y */ -+ arg1 = tmp; -+ arg2 = e1->operands[1].node; -+ progress = true; -+ } -+ -+ if (!progress && e2 && (tmp = collect_exprs(ctx, instr, op, arg1, e2->operands[0].node))) -+ { -+ /* (x OPL a) OPR ((x OPL b) OPR y) -> (x OPL (a OPR b)) OPR y */ -+ arg1 = tmp; -+ arg2 = e2->operands[1].node; -+ progress = true; -+ } -+ -+ if (!progress && is_op_commutative(op) && e2 -+ && (tmp = collect_exprs(ctx, instr, op, arg1, e2->operands[1].node))) -+ { -+ /* (x OPL a) OPR (y OPR (x OPL b)) -> (x OPL (a OPR b)) OPR y */ -+ arg1 = tmp; -+ arg2 = e2->operands[0].node; -+ progress = true; -+ } - } - - if (progress) -@@ -1704,7 +1778,7 @@ bool hlsl_fold_constant_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *inst - return false; - src = hlsl_ir_constant(swizzle->val.node); - -- for (i = 0; i < swizzle->node.data_type->dimx; ++i) -+ for (i = 0; i < swizzle->node.data_type->e.numeric.dimx; ++i) - value.u[i] = src->value.u[hlsl_swizzle_get_component(swizzle->u.vector, i)]; - - if (!(dst = hlsl_new_constant(ctx, instr->data_type, &value, &instr->loc))) -diff --git a/libs/vkd3d/libs/vkd3d-shader/ir.c b/libs/vkd3d/libs/vkd3d-shader/ir.c -index c2e4b5a4947..3678ad0bacf 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/ir.c -+++ b/libs/vkd3d/libs/vkd3d-shader/ir.c -@@ -2031,7 +2031,8 @@ static bool shader_signature_merge(struct shader_signature *s, uint8_t range_map - element_count = s->element_count; - if (!(elements = vkd3d_malloc(element_count * sizeof(*elements)))) - return false; -- memcpy(elements, s->elements, element_count * sizeof(*elements)); -+ if (element_count) -+ memcpy(elements, s->elements, element_count * sizeof(*elements)); - - for (i = 0; i < element_count; ++i) - elements[i].sort_index = i; -@@ -3836,7 +3837,8 @@ static enum vkd3d_result vsir_cfg_structure_list_append_from_region(struct vsir_ - sizeof(*list->structures))) - return VKD3D_ERROR_OUT_OF_MEMORY; - -- memcpy(&list->structures[list->count], begin, size * sizeof(*begin)); -+ if (size) -+ memcpy(&list->structures[list->count], begin, size * sizeof(*begin)); - - list->count += size; - -@@ -4773,7 +4775,8 @@ static enum vkd3d_result vsir_cfg_generate_synthetic_loop_intervals(struct vsir_ - } - } - -- qsort(cfg->loop_intervals, cfg->loop_interval_count, sizeof(*cfg->loop_intervals), compare_loop_intervals); -+ if (cfg->loop_intervals) -+ qsort(cfg->loop_intervals, cfg->loop_interval_count, sizeof(*cfg->loop_intervals), compare_loop_intervals); - - if (TRACE_ON()) - for (i = 0; i < cfg->loop_interval_count; ++i) -diff --git a/libs/vkd3d/libs/vkd3d-shader/spirv.c b/libs/vkd3d/libs/vkd3d-shader/spirv.c -index cfbadab8933..efa76983546 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/spirv.c -+++ b/libs/vkd3d/libs/vkd3d-shader/spirv.c -@@ -7259,12 +7259,13 @@ static void spirv_compiler_emit_shader_epilogue_invocation(struct spirv_compiler - - static void spirv_compiler_emit_hull_shader_main(struct spirv_compiler *compiler) - { -+ size_t table_count = compiler->offset_info.descriptor_table_count; - struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; - uint32_t void_id; - - /* If a patch constant function used descriptor indexing the offsets must be reloaded. */ -- memset(compiler->descriptor_offset_ids, 0, compiler->offset_info.descriptor_table_count -- * sizeof(*compiler->descriptor_offset_ids)); -+ if (table_count) -+ memset(compiler->descriptor_offset_ids, 0, table_count * sizeof(*compiler->descriptor_offset_ids)); - vkd3d_spirv_builder_begin_main_function(builder); - vkd3d_spirv_build_op_label(builder, vkd3d_spirv_alloc_id(builder)); - -diff --git a/libs/vkd3d/libs/vkd3d-shader/tpf.c b/libs/vkd3d/libs/vkd3d-shader/tpf.c -index bdc1c738a32..aa666086710 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/tpf.c -+++ b/libs/vkd3d/libs/vkd3d-shader/tpf.c -@@ -21,9 +21,7 @@ - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA - */ - --#include "hlsl.h" - #include "vkd3d_shader_private.h" --#include "d3dcommon.h" - - #define SM4_MAX_SRC_COUNT 6 - #define SM4_MAX_DST_COUNT 2 -@@ -163,9 +161,6 @@ STATIC_ASSERT(SM4_MAX_SRC_COUNT <= SPIRV_MAX_SRC_COUNT); - - #define VKD3D_SM4_TYPE_COMPONENT(com, i) (((com) >> (4 * (i))) & 0xfu) - --/* The shift that corresponds to the D3D_SIF_TEXTURE_COMPONENTS mask. */ --#define VKD3D_SM4_SIF_TEXTURE_COMPONENTS_SHIFT 2 -- - enum vkd3d_sm4_opcode - { - VKD3D_SM4_OP_ADD = 0x00, -@@ -3268,505 +3263,6 @@ static void tpf_write_signature(struct tpf_compiler *tpf, const struct shader_si - vkd3d_free(sorted_elements); - } - --static D3D_SHADER_VARIABLE_CLASS sm4_class(const struct hlsl_type *type) --{ -- switch (type->class) -- { -- case HLSL_CLASS_MATRIX: -- VKD3D_ASSERT(type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK); -- if (type->modifiers & HLSL_MODIFIER_COLUMN_MAJOR) -- return D3D_SVC_MATRIX_COLUMNS; -- else -- return D3D_SVC_MATRIX_ROWS; -- case HLSL_CLASS_SCALAR: -- return D3D_SVC_SCALAR; -- case HLSL_CLASS_VECTOR: -- return D3D_SVC_VECTOR; -- -- case HLSL_CLASS_ARRAY: -- case HLSL_CLASS_DEPTH_STENCIL_STATE: -- case HLSL_CLASS_DEPTH_STENCIL_VIEW: -- case HLSL_CLASS_EFFECT_GROUP: -- case HLSL_CLASS_ERROR: -- case HLSL_CLASS_STRUCT: -- case HLSL_CLASS_PASS: -- case HLSL_CLASS_PIXEL_SHADER: -- case HLSL_CLASS_RASTERIZER_STATE: -- case HLSL_CLASS_RENDER_TARGET_VIEW: -- case HLSL_CLASS_SAMPLER: -- case HLSL_CLASS_STRING: -- case HLSL_CLASS_TECHNIQUE: -- case HLSL_CLASS_TEXTURE: -- case HLSL_CLASS_UAV: -- case HLSL_CLASS_VERTEX_SHADER: -- case HLSL_CLASS_VOID: -- case HLSL_CLASS_CONSTANT_BUFFER: -- case HLSL_CLASS_COMPUTE_SHADER: -- case HLSL_CLASS_DOMAIN_SHADER: -- case HLSL_CLASS_HULL_SHADER: -- case HLSL_CLASS_GEOMETRY_SHADER: -- case HLSL_CLASS_BLEND_STATE: -- case HLSL_CLASS_STREAM_OUTPUT: -- case HLSL_CLASS_NULL: -- break; -- } -- vkd3d_unreachable(); --} -- --static D3D_SHADER_VARIABLE_TYPE sm4_base_type(const struct hlsl_type *type) --{ -- switch (type->e.numeric.type) -- { -- case HLSL_TYPE_BOOL: -- return D3D_SVT_BOOL; -- case HLSL_TYPE_DOUBLE: -- return D3D_SVT_DOUBLE; -- case HLSL_TYPE_FLOAT: -- case HLSL_TYPE_HALF: -- return D3D_SVT_FLOAT; -- case HLSL_TYPE_INT: -- return D3D_SVT_INT; -- case HLSL_TYPE_UINT: -- return D3D_SVT_UINT; -- default: -- vkd3d_unreachable(); -- } --} -- --static void write_sm4_type(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, struct hlsl_type *type) --{ -- const struct hlsl_type *array_type = hlsl_get_multiarray_element_type(type); -- const char *name = array_type->name ? array_type->name : ""; -- const struct hlsl_profile_info *profile = ctx->profile; -- unsigned int array_size = 0; -- size_t name_offset = 0; -- size_t i; -- -- if (type->bytecode_offset) -- return; -- -- if (profile->major_version >= 5) -- name_offset = put_string(buffer, name); -- -- if (type->class == HLSL_CLASS_ARRAY) -- array_size = hlsl_get_multiarray_size(type); -- -- if (array_type->class == HLSL_CLASS_STRUCT) -- { -- unsigned int field_count = 0; -- size_t fields_offset = 0; -- -- for (i = 0; i < array_type->e.record.field_count; ++i) -- { -- struct hlsl_struct_field *field = &array_type->e.record.fields[i]; -- -- if (!field->type->reg_size[HLSL_REGSET_NUMERIC]) -- continue; -- -- field->name_bytecode_offset = put_string(buffer, field->name); -- write_sm4_type(ctx, buffer, field->type); -- ++field_count; -- } -- -- fields_offset = bytecode_align(buffer); -- -- for (i = 0; i < array_type->e.record.field_count; ++i) -- { -- struct hlsl_struct_field *field = &array_type->e.record.fields[i]; -- -- if (!field->type->reg_size[HLSL_REGSET_NUMERIC]) -- continue; -- -- put_u32(buffer, field->name_bytecode_offset); -- put_u32(buffer, field->type->bytecode_offset); -- put_u32(buffer, field->reg_offset[HLSL_REGSET_NUMERIC] * sizeof(float)); -- } -- type->bytecode_offset = put_u32(buffer, vkd3d_make_u32(D3D_SVC_STRUCT, D3D_SVT_VOID)); -- put_u32(buffer, vkd3d_make_u32(1, hlsl_type_component_count(array_type))); -- put_u32(buffer, vkd3d_make_u32(array_size, field_count)); -- put_u32(buffer, fields_offset); -- } -- else -- { -- VKD3D_ASSERT(array_type->class <= HLSL_CLASS_LAST_NUMERIC); -- type->bytecode_offset = put_u32(buffer, vkd3d_make_u32(sm4_class(array_type), sm4_base_type(array_type))); -- put_u32(buffer, vkd3d_make_u32(array_type->dimy, array_type->dimx)); -- put_u32(buffer, vkd3d_make_u32(array_size, 0)); -- put_u32(buffer, 1); -- } -- -- if (profile->major_version >= 5) -- { -- put_u32(buffer, 0); /* FIXME: unknown */ -- put_u32(buffer, 0); /* FIXME: unknown */ -- put_u32(buffer, 0); /* FIXME: unknown */ -- put_u32(buffer, 0); /* FIXME: unknown */ -- put_u32(buffer, name_offset); -- } --} -- --static D3D_SHADER_INPUT_TYPE sm4_resource_type(const struct hlsl_type *type) --{ -- switch (type->class) -- { -- case HLSL_CLASS_SAMPLER: -- return D3D_SIT_SAMPLER; -- case HLSL_CLASS_TEXTURE: -- return D3D_SIT_TEXTURE; -- case HLSL_CLASS_UAV: -- return D3D_SIT_UAV_RWTYPED; -- default: -- break; -- } -- -- vkd3d_unreachable(); --} -- --static enum vkd3d_sm4_data_type sm4_data_type(const struct hlsl_type *type) --{ -- const struct hlsl_type *format = type->e.resource.format; -- -- switch (format->e.numeric.type) -- { -- case HLSL_TYPE_DOUBLE: -- return VKD3D_SM4_DATA_DOUBLE; -- -- case HLSL_TYPE_FLOAT: -- case HLSL_TYPE_HALF: -- if (format->modifiers & HLSL_MODIFIER_UNORM) -- return VKD3D_SM4_DATA_UNORM; -- if (format->modifiers & HLSL_MODIFIER_SNORM) -- return VKD3D_SM4_DATA_SNORM; -- return VKD3D_SM4_DATA_FLOAT; -- -- case HLSL_TYPE_INT: -- return VKD3D_SM4_DATA_INT; -- break; -- -- case HLSL_TYPE_BOOL: -- case HLSL_TYPE_UINT: -- return VKD3D_SM4_DATA_UINT; -- -- default: -- vkd3d_unreachable(); -- } --} -- --static D3D_SRV_DIMENSION sm4_rdef_resource_dimension(const struct hlsl_type *type) --{ -- switch (type->sampler_dim) -- { -- case HLSL_SAMPLER_DIM_1D: -- return D3D_SRV_DIMENSION_TEXTURE1D; -- case HLSL_SAMPLER_DIM_2D: -- return D3D_SRV_DIMENSION_TEXTURE2D; -- case HLSL_SAMPLER_DIM_3D: -- return D3D_SRV_DIMENSION_TEXTURE3D; -- case HLSL_SAMPLER_DIM_CUBE: -- return D3D_SRV_DIMENSION_TEXTURECUBE; -- case HLSL_SAMPLER_DIM_1DARRAY: -- return D3D_SRV_DIMENSION_TEXTURE1DARRAY; -- case HLSL_SAMPLER_DIM_2DARRAY: -- return D3D_SRV_DIMENSION_TEXTURE2DARRAY; -- case HLSL_SAMPLER_DIM_2DMS: -- return D3D_SRV_DIMENSION_TEXTURE2DMS; -- case HLSL_SAMPLER_DIM_2DMSARRAY: -- return D3D_SRV_DIMENSION_TEXTURE2DMSARRAY; -- case HLSL_SAMPLER_DIM_CUBEARRAY: -- return D3D_SRV_DIMENSION_TEXTURECUBEARRAY; -- case HLSL_SAMPLER_DIM_BUFFER: -- case HLSL_SAMPLER_DIM_RAW_BUFFER: -- case HLSL_SAMPLER_DIM_STRUCTURED_BUFFER: -- return D3D_SRV_DIMENSION_BUFFER; -- default: -- vkd3d_unreachable(); -- } --} -- --/* For some reason, for matrices, values from default value initializers end up in different -- * components than from regular initializers. Default value initializers fill the matrix in -- * vertical reading order (left-to-right top-to-bottom) instead of regular reading order -- * (top-to-bottom left-to-right), so they have to be adjusted. -- * An exception is that the order of matrix initializers for function parameters are row-major -- * (top-to-bottom left-to-right). */ --static unsigned int get_component_index_from_default_initializer_index(struct hlsl_type *type, unsigned int index) --{ -- unsigned int element_comp_count, element, x, y, i; -- unsigned int base = 0; -- -- switch (type->class) -- { -- case HLSL_CLASS_MATRIX: -- x = index / type->dimy; -- y = index % type->dimy; -- return y * type->dimx + x; -- -- case HLSL_CLASS_ARRAY: -- element_comp_count = hlsl_type_component_count(type->e.array.type); -- element = index / element_comp_count; -- base = element * element_comp_count; -- return base + get_component_index_from_default_initializer_index(type->e.array.type, index - base); -- -- case HLSL_CLASS_STRUCT: -- for (i = 0; i < type->e.record.field_count; ++i) -- { -- struct hlsl_type *field_type = type->e.record.fields[i].type; -- -- element_comp_count = hlsl_type_component_count(field_type); -- if (index - base < element_comp_count) -- return base + get_component_index_from_default_initializer_index(field_type, index - base); -- base += element_comp_count; -- } -- break; -- -- default: -- return index; -- } -- vkd3d_unreachable(); --} -- --void sm4_generate_rdef(struct hlsl_ctx *ctx, struct vkd3d_shader_code *rdef) --{ -- uint32_t binding_desc_size = (hlsl_version_ge(ctx, 5, 1) ? 10 : 8) * sizeof(uint32_t); -- size_t cbuffers_offset, resources_offset, creator_offset, string_offset; -- unsigned int cbuffer_count = 0, extern_resources_count, i, j; -- size_t cbuffer_position, resource_position, creator_position; -- const struct hlsl_profile_info *profile = ctx->profile; -- struct vkd3d_bytecode_buffer buffer = {0}; -- struct extern_resource *extern_resources; -- const struct hlsl_buffer *cbuffer; -- const struct hlsl_ir_var *var; -- -- static const uint16_t target_types[] = -- { -- 0xffff, /* PIXEL */ -- 0xfffe, /* VERTEX */ -- 0x4753, /* GEOMETRY */ -- 0x4853, /* HULL */ -- 0x4453, /* DOMAIN */ -- 0x4353, /* COMPUTE */ -- }; -- -- extern_resources = sm4_get_extern_resources(ctx, &extern_resources_count); -- -- LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) -- { -- if (cbuffer->reg.allocated) -- ++cbuffer_count; -- } -- -- put_u32(&buffer, cbuffer_count); -- cbuffer_position = put_u32(&buffer, 0); -- put_u32(&buffer, extern_resources_count); -- resource_position = put_u32(&buffer, 0); -- put_u32(&buffer, vkd3d_make_u32(vkd3d_make_u16(profile->minor_version, profile->major_version), -- target_types[profile->type])); -- put_u32(&buffer, 0); /* FIXME: compilation flags */ -- creator_position = put_u32(&buffer, 0); -- -- if (profile->major_version >= 5) -- { -- put_u32(&buffer, hlsl_version_ge(ctx, 5, 1) ? TAG_RD11_REVERSE : TAG_RD11); -- put_u32(&buffer, 15 * sizeof(uint32_t)); /* size of RDEF header including this header */ -- put_u32(&buffer, 6 * sizeof(uint32_t)); /* size of buffer desc */ -- put_u32(&buffer, binding_desc_size); /* size of binding desc */ -- put_u32(&buffer, 10 * sizeof(uint32_t)); /* size of variable desc */ -- put_u32(&buffer, 9 * sizeof(uint32_t)); /* size of type desc */ -- put_u32(&buffer, 3 * sizeof(uint32_t)); /* size of member desc */ -- put_u32(&buffer, 0); /* unknown; possibly a null terminator */ -- } -- -- /* Bound resources. */ -- -- resources_offset = bytecode_align(&buffer); -- set_u32(&buffer, resource_position, resources_offset); -- -- for (i = 0; i < extern_resources_count; ++i) -- { -- const struct extern_resource *resource = &extern_resources[i]; -- uint32_t flags = 0; -- -- if (resource->is_user_packed) -- flags |= D3D_SIF_USERPACKED; -- -- put_u32(&buffer, 0); /* name */ -- if (resource->buffer) -- put_u32(&buffer, resource->buffer->type == HLSL_BUFFER_CONSTANT ? D3D_SIT_CBUFFER : D3D_SIT_TBUFFER); -- else -- put_u32(&buffer, sm4_resource_type(resource->component_type)); -- if (resource->regset == HLSL_REGSET_TEXTURES || resource->regset == HLSL_REGSET_UAVS) -- { -- unsigned int dimx = resource->component_type->e.resource.format->dimx; -- -- put_u32(&buffer, sm4_data_type(resource->component_type)); -- put_u32(&buffer, sm4_rdef_resource_dimension(resource->component_type)); -- put_u32(&buffer, ~0u); /* FIXME: multisample count */ -- flags |= (dimx - 1) << VKD3D_SM4_SIF_TEXTURE_COMPONENTS_SHIFT; -- } -- else -- { -- put_u32(&buffer, 0); -- put_u32(&buffer, 0); -- put_u32(&buffer, 0); -- } -- put_u32(&buffer, resource->index); -- put_u32(&buffer, resource->bind_count); -- put_u32(&buffer, flags); -- -- if (hlsl_version_ge(ctx, 5, 1)) -- { -- put_u32(&buffer, resource->space); -- put_u32(&buffer, resource->id); -- } -- } -- -- for (i = 0; i < extern_resources_count; ++i) -- { -- const struct extern_resource *resource = &extern_resources[i]; -- -- string_offset = put_string(&buffer, resource->name); -- set_u32(&buffer, resources_offset + i * binding_desc_size, string_offset); -- } -- -- /* Buffers. */ -- -- cbuffers_offset = bytecode_align(&buffer); -- set_u32(&buffer, cbuffer_position, cbuffers_offset); -- LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) -- { -- unsigned int var_count = 0; -- -- if (!cbuffer->reg.allocated) -- continue; -- -- LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) -- { -- if (var->is_uniform && var->buffer == cbuffer && var->data_type->reg_size[HLSL_REGSET_NUMERIC]) -- ++var_count; -- } -- -- put_u32(&buffer, 0); /* name */ -- put_u32(&buffer, var_count); -- put_u32(&buffer, 0); /* variable offset */ -- put_u32(&buffer, align(cbuffer->size, 4) * sizeof(float)); -- put_u32(&buffer, 0); /* FIXME: flags */ -- put_u32(&buffer, cbuffer->type == HLSL_BUFFER_CONSTANT ? D3D_CT_CBUFFER : D3D_CT_TBUFFER); -- } -- -- i = 0; -- LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) -- { -- if (!cbuffer->reg.allocated) -- continue; -- -- string_offset = put_string(&buffer, cbuffer->name); -- set_u32(&buffer, cbuffers_offset + i++ * 6 * sizeof(uint32_t), string_offset); -- } -- -- i = 0; -- LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) -- { -- size_t vars_start = bytecode_align(&buffer); -- -- if (!cbuffer->reg.allocated) -- continue; -- -- set_u32(&buffer, cbuffers_offset + (i++ * 6 + 2) * sizeof(uint32_t), vars_start); -- -- LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) -- { -- if (var->is_uniform && var->buffer == cbuffer && var->data_type->reg_size[HLSL_REGSET_NUMERIC]) -- { -- uint32_t flags = 0; -- -- if (var->is_read) -- flags |= D3D_SVF_USED; -- -- put_u32(&buffer, 0); /* name */ -- put_u32(&buffer, var->buffer_offset * sizeof(float)); -- put_u32(&buffer, var->data_type->reg_size[HLSL_REGSET_NUMERIC] * sizeof(float)); -- put_u32(&buffer, flags); -- put_u32(&buffer, 0); /* type */ -- put_u32(&buffer, 0); /* default value */ -- -- if (profile->major_version >= 5) -- { -- put_u32(&buffer, 0); /* texture start */ -- put_u32(&buffer, 0); /* texture count */ -- put_u32(&buffer, 0); /* sampler start */ -- put_u32(&buffer, 0); /* sampler count */ -- } -- } -- } -- -- j = 0; -- LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) -- { -- if (var->is_uniform && var->buffer == cbuffer && var->data_type->reg_size[HLSL_REGSET_NUMERIC]) -- { -- const unsigned int var_size = (profile->major_version >= 5 ? 10 : 6); -- size_t var_offset = vars_start + j * var_size * sizeof(uint32_t); -- -- string_offset = put_string(&buffer, var->name); -- set_u32(&buffer, var_offset, string_offset); -- write_sm4_type(ctx, &buffer, var->data_type); -- set_u32(&buffer, var_offset + 4 * sizeof(uint32_t), var->data_type->bytecode_offset); -- -- if (var->default_values) -- { -- unsigned int reg_size = var->data_type->reg_size[HLSL_REGSET_NUMERIC]; -- unsigned int comp_count = hlsl_type_component_count(var->data_type); -- unsigned int default_value_offset; -- unsigned int k; -- -- default_value_offset = bytecode_reserve_bytes(&buffer, reg_size * sizeof(uint32_t)); -- set_u32(&buffer, var_offset + 5 * sizeof(uint32_t), default_value_offset); -- -- for (k = 0; k < comp_count; ++k) -- { -- struct hlsl_type *comp_type = hlsl_type_get_component_type(ctx, var->data_type, k); -- unsigned int comp_offset, comp_index; -- enum hlsl_regset regset; -- -- if (comp_type->class == HLSL_CLASS_STRING) -- { -- hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -- "Cannot write string default value."); -- continue; -- } -- -- comp_index = get_component_index_from_default_initializer_index(var->data_type, k); -- comp_offset = hlsl_type_get_component_offset(ctx, var->data_type, comp_index, ®set); -- if (regset == HLSL_REGSET_NUMERIC) -- { -- if (comp_type->e.numeric.type == HLSL_TYPE_DOUBLE) -- hlsl_fixme(ctx, &var->loc, "Write double default values."); -- -- set_u32(&buffer, default_value_offset + comp_offset * sizeof(uint32_t), -- var->default_values[k].number.u); -- } -- } -- } -- ++j; -- } -- } -- } -- -- creator_offset = put_string(&buffer, vkd3d_shader_get_version(NULL, NULL)); -- set_u32(&buffer, creator_position, creator_offset); -- -- sm4_free_extern_resources(extern_resources, extern_resources_count); -- -- if (buffer.status) -- { -- vkd3d_free(buffer.data); -- ctx->result = buffer.status; -- return; -- } -- rdef->code = buffer.data; -- rdef->size = buffer.size; --} -- - static enum vkd3d_sm4_resource_type sm4_resource_dimension(enum vkd3d_shader_resource_type resource_type) - { - switch (resource_type) -diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c -index 86ec8f15fb7..c7ad407f6fb 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c -+++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c -@@ -379,7 +379,8 @@ size_t bytecode_align(struct vkd3d_bytecode_buffer *buffer) - return aligned_size; - } - -- memset(buffer->data + buffer->size, 0xab, aligned_size - buffer->size); -+ if (aligned_size > buffer->size) -+ memset(&buffer->data[buffer->size], 0xab, aligned_size - buffer->size); - buffer->size = aligned_size; - return aligned_size; - } -@@ -396,7 +397,8 @@ size_t bytecode_put_bytes_unaligned(struct vkd3d_bytecode_buffer *buffer, const - buffer->status = VKD3D_ERROR_OUT_OF_MEMORY; - return offset; - } -- memcpy(buffer->data + offset, bytes, size); -+ if (size) -+ memcpy(&buffer->data[offset], bytes, size); - buffer->size = offset + size; - return offset; - } -diff --git a/libs/vkd3d/libs/vkd3d/state.c b/libs/vkd3d/libs/vkd3d/state.c -index 32f34479ea1..bd3c3758ecb 100644 ---- a/libs/vkd3d/libs/vkd3d/state.c -+++ b/libs/vkd3d/libs/vkd3d/state.c -@@ -578,8 +578,9 @@ static HRESULT d3d12_root_signature_info_from_desc(struct d3d12_root_signature_i - goto done; - } - -- qsort(info->ranges, info->range_count, sizeof(*info->ranges), -- d3d12_root_signature_info_range_compare); -+ if (info->ranges) -+ qsort(info->ranges, info->range_count, sizeof(*info->ranges), -+ d3d12_root_signature_info_range_compare); - - for (i = D3D12_SHADER_VISIBILITY_VERTEX; i <= D3D12_SHADER_VISIBILITY_MESH; ++i) - { --- -2.45.2 - diff --git a/patches/vkd3d-latest/0005-Updated-vkd3d-to-c7e173a1ffa1ba6916dd549bf9f32225440.patch b/patches/vkd3d-latest/0005-Updated-vkd3d-to-c7e173a1ffa1ba6916dd549bf9f32225440.patch deleted file mode 100644 index a01c7d86..00000000 --- a/patches/vkd3d-latest/0005-Updated-vkd3d-to-c7e173a1ffa1ba6916dd549bf9f32225440.patch +++ /dev/null @@ -1,24 +0,0 @@ -From 9281cca9b739538f3519f5ab602de53323e4f5a7 Mon Sep 17 00:00:00 2001 -From: Alistair Leslie-Hughes -Date: Tue, 14 Jan 2025 06:41:42 +1100 -Subject: [PATCH] Updated vkd3d to c7e173a1ffa1ba6916dd549bf9f32225440d1ec6. - ---- - libs/vkd3d/libs/vkd3d/device.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/libs/vkd3d/libs/vkd3d/device.c b/libs/vkd3d/libs/vkd3d/device.c -index 7b491805a72..9aa4adb6c06 100644 ---- a/libs/vkd3d/libs/vkd3d/device.c -+++ b/libs/vkd3d/libs/vkd3d/device.c -@@ -3443,6 +3443,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CheckFeatureSupport(ID3D12Device9 - for (i = 0; i < data->NumFeatureLevels; ++i) - { - D3D_FEATURE_LEVEL fl = data->pFeatureLevelsRequested[i]; -+ TRACE("Requested feature level %#x.\n", fl); - if (data->MaxSupportedFeatureLevel < fl && fl <= vulkan_info->max_feature_level) - data->MaxSupportedFeatureLevel = fl; - } --- -2.45.2 -