diff --git a/patches/vkd3d-latest/0001-Updated-vkd3d-to-e383834049825dde8feb0a230c39d499e58.patch b/patches/vkd3d-latest/0001-Updated-vkd3d-to-e383834049825dde8feb0a230c39d499e58.patch deleted file mode 100644 index 51e6c899..00000000 --- a/patches/vkd3d-latest/0001-Updated-vkd3d-to-e383834049825dde8feb0a230c39d499e58.patch +++ /dev/null @@ -1,29283 +0,0 @@ -From abcbb54af650bd1699f695bdbbffcbffe6ef84fe Mon Sep 17 00:00:00 2001 -From: Alistair Leslie-Hughes <leslie_alistair@hotmail.com> -Date: Tue, 3 Sep 2024 07:18:49 +1000 -Subject: [PATCH] Updated vkd3d to e383834049825dde8feb0a230c39d499e580cdf1. - ---- - libs/vkd3d/Makefile.in | 1 + - libs/vkd3d/include/private/vkd3d_common.h | 4 +- - libs/vkd3d/include/vkd3d.h | 1 + - libs/vkd3d/include/vkd3d_shader.h | 219 +- - libs/vkd3d/libs/vkd3d-common/blob.c | 1 + - libs/vkd3d/libs/vkd3d-shader/checksum.c | 49 +- - libs/vkd3d/libs/vkd3d-shader/d3d_asm.c | 107 +- - libs/vkd3d/libs/vkd3d-shader/d3dbc.c | 1262 ++--- - libs/vkd3d/libs/vkd3d-shader/dxbc.c | 21 +- - libs/vkd3d/libs/vkd3d-shader/dxil.c | 167 +- - libs/vkd3d/libs/vkd3d-shader/fx.c | 2016 ++++++-- - libs/vkd3d/libs/vkd3d-shader/glsl.c | 2369 +++++++++- - libs/vkd3d/libs/vkd3d-shader/hlsl.c | 430 +- - libs/vkd3d/libs/vkd3d-shader/hlsl.h | 181 +- - libs/vkd3d/libs/vkd3d-shader/hlsl.l | 10 +- - libs/vkd3d/libs/vkd3d-shader/hlsl.y | 1348 ++++-- - libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 3775 +++++++++++++-- - .../libs/vkd3d-shader/hlsl_constant_ops.c | 20 +- - libs/vkd3d/libs/vkd3d-shader/ir.c | 4159 ++++++++++++----- - libs/vkd3d/libs/vkd3d-shader/msl.c | 898 ++++ - libs/vkd3d/libs/vkd3d-shader/preproc.h | 3 +- - libs/vkd3d/libs/vkd3d-shader/preproc.l | 56 +- - libs/vkd3d/libs/vkd3d-shader/preproc.y | 13 - - libs/vkd3d/libs/vkd3d-shader/spirv.c | 530 ++- - libs/vkd3d/libs/vkd3d-shader/tpf.c | 2647 +++++------ - .../libs/vkd3d-shader/vkd3d_shader_main.c | 269 +- - .../libs/vkd3d-shader/vkd3d_shader_private.h | 134 +- - libs/vkd3d/libs/vkd3d/command.c | 123 +- - libs/vkd3d/libs/vkd3d/device.c | 175 +- - libs/vkd3d/libs/vkd3d/resource.c | 14 +- - libs/vkd3d/libs/vkd3d/state.c | 410 +- - libs/vkd3d/libs/vkd3d/utils.c | 3 +- - libs/vkd3d/libs/vkd3d/vkd3d_main.c | 1 + - libs/vkd3d/libs/vkd3d/vkd3d_private.h | 27 +- - 34 files changed, 15965 insertions(+), 5478 deletions(-) - create mode 100644 libs/vkd3d/libs/vkd3d-shader/msl.c - -diff --git a/libs/vkd3d/Makefile.in b/libs/vkd3d/Makefile.in -index 94e4833dc9a..b073790d986 100644 ---- a/libs/vkd3d/Makefile.in -+++ b/libs/vkd3d/Makefile.in -@@ -25,6 +25,7 @@ SOURCES = \ - libs/vkd3d-shader/hlsl_codegen.c \ - libs/vkd3d-shader/hlsl_constant_ops.c \ - libs/vkd3d-shader/ir.c \ -+ libs/vkd3d-shader/msl.c \ - libs/vkd3d-shader/preproc.l \ - libs/vkd3d-shader/preproc.y \ - libs/vkd3d-shader/spirv.c \ -diff --git a/libs/vkd3d/include/private/vkd3d_common.h b/libs/vkd3d/include/private/vkd3d_common.h -index 39145a97df1..fd62730f948 100644 ---- a/libs/vkd3d/include/private/vkd3d_common.h -+++ b/libs/vkd3d/include/private/vkd3d_common.h -@@ -62,6 +62,8 @@ - #define VKD3D_STRINGIFY(x) #x - #define VKD3D_EXPAND_AND_STRINGIFY(x) VKD3D_EXPAND(VKD3D_STRINGIFY(x)) - -+#define vkd3d_clamp(value, lower, upper) max(min(value, upper), lower) -+ - #define TAG_AON9 VKD3D_MAKE_TAG('A', 'o', 'n', '9') - #define TAG_DXBC VKD3D_MAKE_TAG('D', 'X', 'B', 'C') - #define TAG_DXIL VKD3D_MAKE_TAG('D', 'X', 'I', 'L') -@@ -273,7 +275,7 @@ static inline unsigned int vkd3d_popcount(unsigned int v) - { - #ifdef _MSC_VER - return __popcnt(v); --#elif defined(__MINGW32__) -+#elif defined(HAVE_BUILTIN_POPCOUNT) - return __builtin_popcount(v); - #else - v -= (v >> 1) & 0x55555555; -diff --git a/libs/vkd3d/include/vkd3d.h b/libs/vkd3d/include/vkd3d.h -index 398ae2442d6..b18fd14f4c3 100644 ---- a/libs/vkd3d/include/vkd3d.h -+++ b/libs/vkd3d/include/vkd3d.h -@@ -98,6 +98,7 @@ enum vkd3d_api_version - VKD3D_API_VERSION_1_11, - VKD3D_API_VERSION_1_12, - VKD3D_API_VERSION_1_13, -+ VKD3D_API_VERSION_1_14, - - VKD3D_FORCE_32_BIT_ENUM(VKD3D_API_VERSION), - }; -diff --git a/libs/vkd3d/include/vkd3d_shader.h b/libs/vkd3d/include/vkd3d_shader.h -index d9a355d3bc9..cb561d7f079 100644 ---- a/libs/vkd3d/include/vkd3d_shader.h -+++ b/libs/vkd3d/include/vkd3d_shader.h -@@ -56,6 +56,7 @@ enum vkd3d_shader_api_version - VKD3D_SHADER_API_VERSION_1_11, - VKD3D_SHADER_API_VERSION_1_12, - VKD3D_SHADER_API_VERSION_1_13, -+ VKD3D_SHADER_API_VERSION_1_14, - - VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_API_VERSION), - }; -@@ -111,6 +112,11 @@ enum vkd3d_shader_structure_type - * \since 1.13 - */ - VKD3D_SHADER_STRUCTURE_TYPE_PARAMETER_INFO, -+ /** -+ * The structure is a vkd3d_shader_scan_hull_shader_tessellation_info structure. -+ * \since 1.15 -+ */ -+ VKD3D_SHADER_STRUCTURE_TYPE_SCAN_HULL_SHADER_TESSELLATION_INFO, - - VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_STRUCTURE_TYPE), - }; -@@ -190,6 +196,17 @@ enum vkd3d_shader_compile_option_backward_compatibility - * - DEPTH to SV_Depth for pixel shader outputs. - */ - VKD3D_SHADER_COMPILE_OPTION_BACKCOMPAT_MAP_SEMANTIC_NAMES = 0x00000001, -+ /** -+ * Causes 'double' to behave as an alias for 'float'. This option only -+ * applies to HLSL sources with shader model 1-3 target profiles. Without -+ * this option using the 'double' type produces compilation errors in -+ * these target profiles. -+ * -+ * This option is disabled by default. -+ * -+ * \since 1.14 -+ */ -+ VKD3D_SHADER_COMPILE_OPTION_DOUBLE_AS_FLOAT_ALIAS = 0x00000002, - - VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_COMPILE_OPTION_BACKWARD_COMPATIBILITY), - }; -@@ -469,8 +486,8 @@ enum vkd3d_shader_parameter_type - /** The parameter value is embedded directly in the shader. */ - VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT, - /** -- * The parameter value is provided to the shader via a specialization -- * constant. This value is only supported for the SPIR-V target type. -+ * The parameter value is provided to the shader via specialization -+ * constants. This value is only supported for the SPIR-V target type. - */ - VKD3D_SHADER_PARAMETER_TYPE_SPECIALIZATION_CONSTANT, - /** -@@ -495,6 +512,13 @@ enum vkd3d_shader_parameter_data_type - VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32, - /** The parameter is provided as a 32-bit float. \since 1.13 */ - VKD3D_SHADER_PARAMETER_DATA_TYPE_FLOAT32, -+ /** -+ * The parameter is provided as a 4-dimensional vector of 32-bit floats. -+ * This parameter must be used with struct vkd3d_shader_parameter1; -+ * it cannot be used with struct vkd3d_shader_parameter. -+ * \since 1.14 -+ */ -+ VKD3D_SHADER_PARAMETER_DATA_TYPE_FLOAT32_VEC4, - - VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_PARAMETER_DATA_TYPE), - }; -@@ -578,6 +602,143 @@ enum vkd3d_shader_parameter_name - * \since 1.13 - */ - VKD3D_SHADER_PARAMETER_NAME_FLAT_INTERPOLATION, -+ /** -+ * A mask of enabled clip planes. -+ * -+ * When this parameter is provided to a vertex shader, for each nonzero bit -+ * of this mask, a user clip distance will be generated from vertex position -+ * in clip space, and the clip plane defined by the indexed vector, taken -+ * from the VKD3D_SHADER_PARAMETER_NAME_CLIP_PLANE_# parameter. -+ * -+ * Regardless of the specific clip planes which are enabled, the clip -+ * distances which are output are a contiguous array starting from clip -+ * distance 0. This affects the interface of OpenGL. For example, if only -+ * clip planes 1 and 3 are enabled (and so the value of the mask is 0xa), -+ * the user should enable only GL_CLIP_DISTANCE0 and GL_CLIP_DISTANCE1. -+ * -+ * The default value is zero, i.e. do not enable any clip planes. -+ * -+ * The data type for this parameter must be -+ * VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32. -+ * -+ * Only VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT is supported in this -+ * version of vkd3d-shader. -+ * -+ * If the source shader writes clip distances and this parameter is nonzero, -+ * compilation fails. -+ * -+ * \since 1.14 -+ */ -+ VKD3D_SHADER_PARAMETER_NAME_CLIP_PLANE_MASK, -+ /** -+ * Clip plane values. -+ * See VKD3D_SHADER_PARAMETER_NAME_CLIP_PLANE_MASK for documentation of -+ * clip planes. -+ * -+ * These enum values are contiguous and arithmetic may safely be performed -+ * on them. That is, VKD3D_SHADER_PARAMETER_NAME_CLIP_PLANE_[n] is -+ * VKD3D_SHADER_PARAMETER_NAME_CLIP_PLANE_0 plus n. -+ * -+ * The data type for each parameter must be -+ * VKD3D_SHADER_PARAMETER_DATA_TYPE_FLOAT32_VEC4. -+ * -+ * The default value for each plane is a (0, 0, 0, 0) vector. -+ * -+ * \since 1.14 -+ */ -+ VKD3D_SHADER_PARAMETER_NAME_CLIP_PLANE_0, -+ VKD3D_SHADER_PARAMETER_NAME_CLIP_PLANE_1, -+ VKD3D_SHADER_PARAMETER_NAME_CLIP_PLANE_2, -+ VKD3D_SHADER_PARAMETER_NAME_CLIP_PLANE_3, -+ VKD3D_SHADER_PARAMETER_NAME_CLIP_PLANE_4, -+ VKD3D_SHADER_PARAMETER_NAME_CLIP_PLANE_5, -+ VKD3D_SHADER_PARAMETER_NAME_CLIP_PLANE_6, -+ VKD3D_SHADER_PARAMETER_NAME_CLIP_PLANE_7, -+ /** -+ * Point size. -+ * -+ * When this parameter is provided to a vertex, tessellation, or geometry -+ * shader, and the source shader does not write point size, it specifies a -+ * uniform value which will be written to point size. -+ * If the source shader writes point size, this parameter is ignored. -+ * -+ * This parameter can be used to implement fixed function point size, as -+ * present in Direct3D versions 8 and 9, if the target environment does not -+ * support point size as part of its own fixed-function API (as Vulkan and -+ * core OpenGL). -+ * -+ * The data type for this parameter must be -+ * VKD3D_SHADER_PARAMETER_DATA_TYPE_FLOAT32. -+ * -+ * \since 1.14 -+ */ -+ VKD3D_SHADER_PARAMETER_NAME_POINT_SIZE, -+ /** -+ * Minimum point size. -+ * -+ * When this parameter is provided to a vertex, tessellation, or geometry -+ * shader, and the source shader writes point size or uses the -+ * VKD3D_SHADER_PARAMETER_NAME_POINT_SIZE parameter, the point size will -+ * be clamped to the provided minimum value. -+ * If point size is not written in one of these ways, -+ * this parameter is ignored. -+ * If this parameter is not provided, the point size will not be clamped -+ * to a minimum size by vkd3d-shader. -+ * -+ * This parameter can be used to implement fixed function point size, as -+ * present in Direct3D versions 8 and 9, if the target environment does not -+ * support point size as part of its own fixed-function API (as Vulkan and -+ * core OpenGL). -+ * -+ * The data type for this parameter must be -+ * VKD3D_SHADER_PARAMETER_DATA_TYPE_FLOAT32. -+ * -+ * \since 1.14 -+ */ -+ VKD3D_SHADER_PARAMETER_NAME_POINT_SIZE_MIN, -+ /** -+ * Maximum point size. -+ * -+ * This parameter has identical behaviour to -+ * VKD3D_SHADER_PARAMETER_NAME_POINT_SIZE_MIN, except that it provides -+ * the maximum size rather than the minimum. -+ * -+ * \since 1.14 -+ */ -+ VKD3D_SHADER_PARAMETER_NAME_POINT_SIZE_MAX, -+ /** -+ * Whether texture coordinate inputs should take their values from the -+ * point coordinate. -+ * -+ * When this parameter is provided to a pixel shader, and the value is -+ * nonzero, any fragment shader input with the semantic name "TEXCOORD" -+ * takes its value from the point coordinates instead of from the previous -+ * shader. The point coordinates here are defined as a four-component vector -+ * whose X and Y components are the X and Y coordinates of the fragment -+ * within a point being rasterized, and whose Z and W components are zero. -+ * -+ * In GLSL, the X and Y components are drawn from gl_PointCoord; in SPIR-V, -+ * they are drawn from a variable with the BuiltinPointCoord decoration. -+ * -+ * This includes t# fragment shader inputs in shader model 2 shaders, -+ * as well as texture sampling in shader model 1 shaders. -+ * -+ * This parameter can be used to implement fixed function point sprite, as -+ * present in Direct3D versions 8 and 9, if the target environment does not -+ * support point sprite as part of its own fixed-function API (as Vulkan and -+ * core OpenGL). -+ * -+ * The data type for this parameter must be -+ * VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32. -+ * -+ * The default value is zero, i.e. use the original varyings. -+ * -+ * Only VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT is supported in this -+ * version of vkd3d-shader. -+ * -+ * \since 1.14 -+ */ -+ VKD3D_SHADER_PARAMETER_NAME_POINT_SPRITE, - - VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_PARAMETER_NAME), - }; -@@ -625,6 +786,13 @@ struct vkd3d_shader_parameter_immediate_constant1 - * VKD3D_SHADER_PARAMETER_DATA_TYPE_FLOAT32. - */ - float f32; -+ /** -+ * A pointer to the value if the parameter's data type is -+ * VKD3D_SHADER_PARAMETER_DATA_TYPE_FLOAT32_VEC4. -+ * -+ * \since 1.14 -+ */ -+ float f32_vec4[4]; - void *_pointer_pad; - uint32_t _pad[4]; - } u; -@@ -636,7 +804,13 @@ struct vkd3d_shader_parameter_immediate_constant1 - */ - struct vkd3d_shader_parameter_specialization_constant - { -- /** The ID of the specialization constant. */ -+ /** -+ * The ID of the specialization constant. -+ * If the type comprises more than one constant, such as -+ * VKD3D_SHADER_PARAMETER_DATA_TYPE_FLOAT32_VEC4, then a contiguous -+ * array of specialization constants should be used, one for each component, -+ * and this ID should point to the first component. -+ */ - uint32_t id; - }; - -@@ -1046,6 +1220,11 @@ enum vkd3d_shader_source_type - * the format used for Direct3D shader model 6 shaders. \since 1.9 - */ - VKD3D_SHADER_SOURCE_DXBC_DXIL, -+ /** -+ * Binary format used by Direct3D 9/10.x/11 effects. -+ * Input is a raw FX section without container. \since 1.14 -+ */ -+ VKD3D_SHADER_SOURCE_FX, - - VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_SOURCE_TYPE), - }; -@@ -1087,6 +1266,10 @@ enum vkd3d_shader_target_type - * Output is a raw FX section without container. \since 1.11 - */ - VKD3D_SHADER_TARGET_FX, -+ /** -+ * A 'Metal Shading Language' shader. \since 1.14 -+ */ -+ VKD3D_SHADER_TARGET_MSL, - - VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_TARGET_TYPE), - }; -@@ -1292,7 +1475,8 @@ typedef int (*PFN_vkd3d_shader_open_include)(const char *filename, bool local, - * vkd3d_shader_preprocess_info. - * - * \param code Contents of the included file, which were allocated by the -- * \ref pfn_open_include callback. The user must free them. -+ * vkd3d_shader_preprocess_info.pfn_open_include callback. -+ * The user must free them. - * - * \param context The user-defined pointer passed to struct - * vkd3d_shader_preprocess_info. -@@ -1319,8 +1503,8 @@ struct vkd3d_shader_preprocess_info - - /** - * Pointer to an array of predefined macros. Each macro in this array will -- * be expanded as if a corresponding #define statement were prepended to the -- * source code. -+ * be expanded as if a corresponding \#define statement were prepended to -+ * the source code. - * - * If the same macro is specified multiple times, only the last value is - * used. -@@ -1861,6 +2045,26 @@ struct vkd3d_shader_scan_combined_resource_sampler_info - unsigned int combined_sampler_count; - }; - -+/** -+ * A chained structure describing the tessellation information in a hull shader. -+ * -+ * This structure extends vkd3d_shader_compile_info. -+ * -+ * \since 1.15 -+ */ -+struct vkd3d_shader_scan_hull_shader_tessellation_info -+{ -+ /** Must be set to VKD3D_SHADER_STRUCTURE_TYPE_SCAN_HULL_SHADER_TESSELLATION_INFO. */ -+ enum vkd3d_shader_structure_type type; -+ /** Optional pointer to a structure containing further parameters. */ -+ const void *next; -+ -+ /** The tessellation output primitive. */ -+ enum vkd3d_shader_tessellator_output_primitive output_primitive; -+ /** The tessellation partitioning mode. */ -+ enum vkd3d_shader_tessellator_partitioning partitioning; -+}; -+ - /** - * Data type of a shader varying, returned as part of struct - * vkd3d_shader_signature_element. -@@ -2333,6 +2537,7 @@ VKD3D_SHADER_API const enum vkd3d_shader_target_type *vkd3d_shader_get_supported - * - VKD3D_SHADER_SOURCE_HLSL to VKD3D_SHADER_TARGET_D3D_BYTECODE - * - VKD3D_SHADER_SOURCE_HLSL to VKD3D_SHADER_TARGET_DXBC_TPF - * - VKD3D_SHADER_SOURCE_HLSL to VKD3D_SHADER_TARGET_FX -+ * - VKD3D_SHADER_SOURCE_FX to VKD3D_SHADER_TARGET_D3D_ASM - * - * Supported transformations can also be detected at runtime with the functions - * vkd3d_shader_get_supported_source_types() and -@@ -2798,7 +3003,7 @@ VKD3D_SHADER_API void vkd3d_shader_free_scan_signature_info(struct vkd3d_shader_ - * \param input_signature The input signature of the second shader. - * - * \param count On output, contains the number of entries written into -- * \ref varyings. -+ * "varyings". - * - * \param varyings Pointer to an output array of varyings. - * This must point to space for N varyings, where N is the number of elements -diff --git a/libs/vkd3d/libs/vkd3d-common/blob.c b/libs/vkd3d/libs/vkd3d-common/blob.c -index f60ef7db769..c2c6ad67804 100644 ---- a/libs/vkd3d/libs/vkd3d-common/blob.c -+++ b/libs/vkd3d/libs/vkd3d-common/blob.c -@@ -20,6 +20,7 @@ - #define WIDL_C_INLINE_WRAPPERS - #endif - #define COBJMACROS -+ - #define CONST_VTABLE - #include "vkd3d.h" - #include "vkd3d_blob.h" -diff --git a/libs/vkd3d/libs/vkd3d-shader/checksum.c b/libs/vkd3d/libs/vkd3d-shader/checksum.c -index d9560628c77..45de1c92513 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/checksum.c -+++ b/libs/vkd3d/libs/vkd3d-shader/checksum.c -@@ -33,6 +33,11 @@ - * will fill a supplied 16-byte array with the digest. - */ - -+/* -+ * DXBC uses a variation of the MD5 algorithm, which only changes the way -+ * the message is padded in the final step. -+ */ -+ - #include "vkd3d_shader_private.h" - - #define DXBC_CHECKSUM_BLOCK_SIZE 64 -@@ -230,10 +235,9 @@ static void md5_update(struct md5_ctx *ctx, const unsigned char *buf, unsigned i - memcpy(ctx->in, buf, len); - } - --static void dxbc_checksum_final(struct md5_ctx *ctx) -+static void md5_final(struct md5_ctx *ctx, enum vkd3d_md5_variant variant) - { - unsigned int padding; -- unsigned int length; - unsigned int count; - unsigned char *p; - -@@ -260,7 +264,7 @@ static void dxbc_checksum_final(struct md5_ctx *ctx) - /* Now fill the next block */ - memset(ctx->in, 0, DXBC_CHECKSUM_BLOCK_SIZE); - } -- else -+ else if (variant == VKD3D_MD5_DXBC) - { - /* Make place for bitcount at the beginning of the block */ - memmove(&ctx->in[4], ctx->in, count); -@@ -268,33 +272,44 @@ static void dxbc_checksum_final(struct md5_ctx *ctx) - /* Pad block to 60 bytes */ - memset(p + 4, 0, padding - 4); - } -+ else -+ { -+ /* Pad block to 56 bytes */ -+ memset(p, 0, padding - 8); -+ } - - /* Append length in bits and transform */ -- length = ctx->i[0]; -- memcpy(&ctx->in[0], &length, sizeof(length)); -- byte_reverse(&ctx->in[4], 14); -- length = ctx->i[0] >> 2 | 0x1; -- memcpy(&ctx->in[DXBC_CHECKSUM_BLOCK_SIZE - 4], &length, sizeof(length)); -+ if (variant == VKD3D_MD5_DXBC) -+ { -+ unsigned int length; -+ -+ length = ctx->i[0]; -+ memcpy(&ctx->in[0], &length, sizeof(length)); -+ byte_reverse(&ctx->in[4], 14); -+ length = ctx->i[0] >> 2 | 0x1; -+ memcpy(&ctx->in[DXBC_CHECKSUM_BLOCK_SIZE - 4], &length, sizeof(length)); -+ } -+ else -+ { -+ byte_reverse(ctx->in, 14); -+ -+ ((unsigned int *)ctx->in)[14] = ctx->i[0]; -+ ((unsigned int *)ctx->in)[15] = ctx->i[1]; -+ } - - md5_transform(ctx->buf, (unsigned int *)ctx->in); - byte_reverse((unsigned char *)ctx->buf, 4); - memcpy(ctx->digest, ctx->buf, 16); - } - --#define DXBC_CHECKSUM_SKIP_BYTE_COUNT 20 -- --void vkd3d_compute_dxbc_checksum(const void *dxbc, size_t size, uint32_t checksum[4]) -+void vkd3d_compute_md5(const void *data, size_t size, uint32_t checksum[4], enum vkd3d_md5_variant variant) - { -- const uint8_t *ptr = dxbc; -+ const uint8_t *ptr = data; - struct md5_ctx ctx; - -- VKD3D_ASSERT(size > DXBC_CHECKSUM_SKIP_BYTE_COUNT); -- ptr += DXBC_CHECKSUM_SKIP_BYTE_COUNT; -- size -= DXBC_CHECKSUM_SKIP_BYTE_COUNT; -- - md5_init(&ctx); - md5_update(&ctx, ptr, size); -- dxbc_checksum_final(&ctx); -+ md5_final(&ctx, variant); - - memcpy(checksum, ctx.digest, sizeof(ctx.digest)); - } -diff --git a/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c -index 77e9711300f..7c5444f63a3 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c -+++ b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c -@@ -79,7 +79,7 @@ static const char * const shader_opcode_names[] = - [VKD3DSIH_DCL_INDEXABLE_TEMP ] = "dcl_indexableTemp", - [VKD3DSIH_DCL_INPUT ] = "dcl_input", - [VKD3DSIH_DCL_INPUT_CONTROL_POINT_COUNT ] = "dcl_input_control_point_count", -- [VKD3DSIH_DCL_INPUT_PRIMITIVE ] = "dcl_inputPrimitive", -+ [VKD3DSIH_DCL_INPUT_PRIMITIVE ] = "dcl_inputprimitive", - [VKD3DSIH_DCL_INPUT_PS ] = "dcl_input_ps", - [VKD3DSIH_DCL_INPUT_PS_SGV ] = "dcl_input_ps_sgv", - [VKD3DSIH_DCL_INPUT_PS_SIV ] = "dcl_input_ps_siv", -@@ -89,7 +89,7 @@ static const char * const shader_opcode_names[] = - [VKD3DSIH_DCL_OUTPUT ] = "dcl_output", - [VKD3DSIH_DCL_OUTPUT_CONTROL_POINT_COUNT ] = "dcl_output_control_point_count", - [VKD3DSIH_DCL_OUTPUT_SIV ] = "dcl_output_siv", -- [VKD3DSIH_DCL_OUTPUT_TOPOLOGY ] = "dcl_outputTopology", -+ [VKD3DSIH_DCL_OUTPUT_TOPOLOGY ] = "dcl_outputtopology", - [VKD3DSIH_DCL_RESOURCE_RAW ] = "dcl_resource_raw", - [VKD3DSIH_DCL_RESOURCE_STRUCTURED ] = "dcl_resource_structured", - [VKD3DSIH_DCL_SAMPLER ] = "dcl_sampler", -@@ -104,7 +104,7 @@ static const char * const shader_opcode_names[] = - [VKD3DSIH_DCL_UAV_RAW ] = "dcl_uav_raw", - [VKD3DSIH_DCL_UAV_STRUCTURED ] = "dcl_uav_structured", - [VKD3DSIH_DCL_UAV_TYPED ] = "dcl_uav_typed", -- [VKD3DSIH_DCL_VERTICES_OUT ] = "dcl_maxOutputVertexCount", -+ [VKD3DSIH_DCL_VERTICES_OUT ] = "dcl_maxout", - [VKD3DSIH_DDIV ] = "ddiv", - [VKD3DSIH_DEF ] = "def", - [VKD3DSIH_DEFAULT ] = "default", -@@ -393,14 +393,13 @@ static unsigned int shader_get_float_offset(enum vkd3d_shader_register_type regi - } - } - --static void shader_dump_global_flags(struct vkd3d_d3d_asm_compiler *compiler, -- enum vkd3d_shader_global_flags global_flags) -+static void shader_dump_global_flags(struct vkd3d_d3d_asm_compiler *compiler, enum vsir_global_flags global_flags) - { - unsigned int i; - - static const struct - { -- enum vkd3d_shader_global_flags flag; -+ enum vsir_global_flags flag; - const char *name; - } - global_flag_info[] = -@@ -675,9 +674,6 @@ static void shader_dump_data_type(struct vkd3d_d3d_asm_compiler *compiler, enum - { - [VKD3D_DATA_FLOAT ] = "float", - [VKD3D_DATA_INT ] = "int", -- [VKD3D_DATA_RESOURCE ] = "resource", -- [VKD3D_DATA_SAMPLER ] = "sampler", -- [VKD3D_DATA_UAV ] = "uav", - [VKD3D_DATA_UINT ] = "uint", - [VKD3D_DATA_UNORM ] = "unorm", - [VKD3D_DATA_SNORM ] = "snorm", -@@ -1193,6 +1189,14 @@ static void shader_print_register(struct vkd3d_d3d_asm_compiler *compiler, const - vkd3d_string_buffer_printf(buffer, "vWaveLaneIndex"); - break; - -+ case VKD3DSPR_PARAMETER: -+ vkd3d_string_buffer_printf(buffer, "parameter"); -+ break; -+ -+ case VKD3DSPR_POINT_COORD: -+ vkd3d_string_buffer_printf(buffer, "vPointCoord"); -+ break; -+ - default: - vkd3d_string_buffer_printf(buffer, "%s<unhandled register type %#x>%s", - compiler->colours.error, reg->type, compiler->colours.reset); -@@ -1229,8 +1233,6 @@ static void shader_print_register(struct vkd3d_d3d_asm_compiler *compiler, const - case VKD3D_DATA_INT: - shader_print_int_literal(compiler, "", reg->u.immconst_u32[0], ""); - break; -- case VKD3D_DATA_RESOURCE: -- case VKD3D_DATA_SAMPLER: - case VKD3D_DATA_UINT: - shader_print_uint_literal(compiler, "", reg->u.immconst_u32[0], ""); - break; -@@ -1266,8 +1268,6 @@ static void shader_print_register(struct vkd3d_d3d_asm_compiler *compiler, const - shader_print_int_literal(compiler, ", ", reg->u.immconst_u32[2], ""); - shader_print_int_literal(compiler, ", ", reg->u.immconst_u32[3], ""); - break; -- case VKD3D_DATA_RESOURCE: -- case VKD3D_DATA_SAMPLER: - case VKD3D_DATA_UINT: - shader_print_uint_literal(compiler, "", reg->u.immconst_u32[0], ""); - shader_print_uint_literal(compiler, ", ", reg->u.immconst_u32[1], ""); -@@ -1319,6 +1319,23 @@ static void shader_print_register(struct vkd3d_d3d_asm_compiler *compiler, const - } - vkd3d_string_buffer_printf(buffer, ")"); - } -+ else if (compiler->flags & VSIR_ASM_FLAG_DUMP_ALL_INDICES) -+ { -+ unsigned int i = 0; -+ -+ if (reg->idx_count == 0 || reg->idx[0].rel_addr) -+ { -+ vkd3d_string_buffer_printf(buffer, "%s", compiler->colours.reset); -+ } -+ else -+ { -+ vkd3d_string_buffer_printf(buffer, "%u%s", offset, compiler->colours.reset); -+ i = 1; -+ } -+ -+ for (; i < reg->idx_count; ++i) -+ shader_print_subscript(compiler, reg->idx[i].offset, reg->idx[i].rel_addr); -+ } - else if (reg->type != VKD3DSPR_RASTOUT - && reg->type != VKD3DSPR_MISCTYPE - && reg->type != VKD3DSPR_NULL -@@ -2258,7 +2275,7 @@ static const char *get_semantic_register_name(enum vkd3d_shader_sysval_semantic - } - } - --static enum vkd3d_result dump_signature(struct vkd3d_d3d_asm_compiler *compiler, -+static enum vkd3d_result dump_dxbc_signature(struct vkd3d_d3d_asm_compiler *compiler, - const char *name, const char *register_name, const struct shader_signature *signature) - { - struct vkd3d_string_buffer *buffer = &compiler->buffer; -@@ -2325,21 +2342,21 @@ static enum vkd3d_result dump_signature(struct vkd3d_d3d_asm_compiler *compiler, - return VKD3D_OK; - } - --static enum vkd3d_result dump_signatures(struct vkd3d_d3d_asm_compiler *compiler, -+static enum vkd3d_result dump_dxbc_signatures(struct vkd3d_d3d_asm_compiler *compiler, - const struct vsir_program *program) - { - enum vkd3d_result ret; - -- if ((ret = dump_signature(compiler, ".input", -+ if ((ret = dump_dxbc_signature(compiler, ".input", - program->shader_version.type == VKD3D_SHADER_TYPE_DOMAIN ? "vicp" : "v", - &program->input_signature)) < 0) - return ret; - -- if ((ret = dump_signature(compiler, ".output", "o", -+ if ((ret = dump_dxbc_signature(compiler, ".output", "o", - &program->output_signature)) < 0) - return ret; - -- if ((ret = dump_signature(compiler, ".patch_constant", -+ if ((ret = dump_dxbc_signature(compiler, ".patch_constant", - program->shader_version.type == VKD3D_SHADER_TYPE_DOMAIN ? "vpc" : "o", - &program->patch_constant_signature)) < 0) - return ret; -@@ -2427,7 +2444,7 @@ enum vkd3d_result d3d_asm_compile(const struct vsir_program *program, - * doesn't even have an explicit concept of signature. */ - if (formatting & VKD3D_SHADER_COMPILE_OPTION_FORMATTING_IO_SIGNATURES && shader_version->major >= 4) - { -- if ((result = dump_signatures(&compiler, program)) < 0) -+ if ((result = dump_dxbc_signatures(&compiler, program)) < 0) - { - vkd3d_string_buffer_cleanup(buffer); - return result; -@@ -2489,12 +2506,58 @@ enum vkd3d_result d3d_asm_compile(const struct vsir_program *program, - return result; - } - --void vkd3d_shader_trace(const struct vsir_program *program) -+/* This is meant exclusively for development use. Therefore, differently from -+ * dump_dxbc_signature(), it doesn't try particularly hard to make the output -+ * nice or easily parsable, and it dumps all fields, not just the DXBC ones. -+ * This format isn't meant to be stable. */ -+static void trace_signature(const struct shader_signature *signature, const char *signature_type) - { -- const char *p, *q, *end; -+ struct vkd3d_string_buffer buffer; -+ unsigned int i; -+ -+ TRACE("%s signature:%s\n", signature_type, signature->element_count == 0 ? " empty" : ""); -+ -+ vkd3d_string_buffer_init(&buffer); -+ -+ for (i = 0; i < signature->element_count; ++i) -+ { -+ const struct signature_element *element = &signature->elements[i]; -+ -+ vkd3d_string_buffer_clear(&buffer); -+ -+ vkd3d_string_buffer_printf(&buffer, "Element %u: %s %u-%u %s", i, -+ get_component_type_name(element->component_type), -+ element->register_index, element->register_index + element->register_count, -+ element->semantic_name); -+ if (element->semantic_index != -1) -+ vkd3d_string_buffer_printf(&buffer, "%u", element->semantic_index); -+ vkd3d_string_buffer_printf(&buffer, -+ " mask %#x used_mask %#x sysval %s min_precision %s interpolation %u stream %u", -+ element->mask, element->used_mask, get_sysval_semantic_name(element->sysval_semantic), -+ get_minimum_precision_name(element->min_precision), element->interpolation_mode, -+ element->stream_index); -+ if (element->target_location != -1) -+ vkd3d_string_buffer_printf(&buffer, " target %u", element->target_location); -+ else -+ vkd3d_string_buffer_printf(&buffer, " unused"); -+ -+ TRACE("%s\n", buffer.buffer); -+ } -+ -+ vkd3d_string_buffer_cleanup(&buffer); -+} -+ -+void vsir_program_trace(const struct vsir_program *program) -+{ -+ const unsigned int flags = VSIR_ASM_FLAG_DUMP_TYPES | VSIR_ASM_FLAG_DUMP_ALL_INDICES; - struct vkd3d_shader_code code; -+ const char *p, *q, *end; -+ -+ trace_signature(&program->input_signature, "Input"); -+ trace_signature(&program->output_signature, "Output"); -+ trace_signature(&program->patch_constant_signature, "Patch-constant"); - -- if (d3d_asm_compile(program, NULL, &code, VSIR_ASM_FLAG_DUMP_TYPES) != VKD3D_OK) -+ if (d3d_asm_compile(program, NULL, &code, flags) != VKD3D_OK) - return; - - end = (const char *)code.code + code.size; -diff --git a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -index d05394c3ab7..bda9bc72f56 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -+++ b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -@@ -104,6 +104,12 @@ enum vkd3d_sm1_resource_type - VKD3D_SM1_RESOURCE_TEXTURE_3D = 0x4, - }; - -+enum vkd3d_sm1_misc_register -+{ -+ VKD3D_SM1_MISC_POSITION = 0x0, -+ VKD3D_SM1_MISC_FACE = 0x1, -+}; -+ - enum vkd3d_sm1_opcode - { - VKD3D_SM1_OP_NOP = 0x00, -@@ -444,17 +450,36 @@ static uint32_t swizzle_from_sm1(uint32_t swizzle) - shader_sm1_get_swizzle_component(swizzle, 3)); - } - -+/* D3DBC doesn't have the concept of index count. All registers implicitly have -+ * exactly one index. However for some register types the index doesn't make -+ * sense, so we remove it. */ -+static unsigned int idx_count_from_reg_type(enum vkd3d_shader_register_type reg_type) -+{ -+ switch (reg_type) -+ { -+ case VKD3DSPR_DEPTHOUT: -+ return 0; -+ -+ default: -+ return 1; -+ } -+} -+ - static void shader_sm1_parse_src_param(uint32_t param, struct vkd3d_shader_src_param *rel_addr, - struct vkd3d_shader_src_param *src) - { - enum vkd3d_shader_register_type reg_type = ((param & VKD3D_SM1_REGISTER_TYPE_MASK) >> VKD3D_SM1_REGISTER_TYPE_SHIFT) - | ((param & VKD3D_SM1_REGISTER_TYPE_MASK2) >> VKD3D_SM1_REGISTER_TYPE_SHIFT2); -+ unsigned int idx_count = idx_count_from_reg_type(reg_type); - -- vsir_register_init(&src->reg, reg_type, VKD3D_DATA_FLOAT, 1); -+ vsir_register_init(&src->reg, reg_type, VKD3D_DATA_FLOAT, idx_count); - src->reg.precision = VKD3D_SHADER_REGISTER_PRECISION_DEFAULT; - src->reg.non_uniform = false; -- src->reg.idx[0].offset = param & VKD3D_SM1_REGISTER_NUMBER_MASK; -- src->reg.idx[0].rel_addr = rel_addr; -+ if (idx_count == 1) -+ { -+ src->reg.idx[0].offset = param & VKD3D_SM1_REGISTER_NUMBER_MASK; -+ src->reg.idx[0].rel_addr = rel_addr; -+ } - if (src->reg.type == VKD3DSPR_SAMPLER) - src->reg.dimension = VSIR_DIMENSION_NONE; - else if (src->reg.type == VKD3DSPR_DEPTHOUT) -@@ -470,12 +495,16 @@ static void shader_sm1_parse_dst_param(uint32_t param, struct vkd3d_shader_src_p - { - enum vkd3d_shader_register_type reg_type = ((param & VKD3D_SM1_REGISTER_TYPE_MASK) >> VKD3D_SM1_REGISTER_TYPE_SHIFT) - | ((param & VKD3D_SM1_REGISTER_TYPE_MASK2) >> VKD3D_SM1_REGISTER_TYPE_SHIFT2); -+ unsigned int idx_count = idx_count_from_reg_type(reg_type); - -- vsir_register_init(&dst->reg, reg_type, VKD3D_DATA_FLOAT, 1); -+ vsir_register_init(&dst->reg, reg_type, VKD3D_DATA_FLOAT, idx_count); - dst->reg.precision = VKD3D_SHADER_REGISTER_PRECISION_DEFAULT; - dst->reg.non_uniform = false; -- dst->reg.idx[0].offset = param & VKD3D_SM1_REGISTER_NUMBER_MASK; -- dst->reg.idx[0].rel_addr = rel_addr; -+ if (idx_count == 1) -+ { -+ dst->reg.idx[0].offset = param & VKD3D_SM1_REGISTER_NUMBER_MASK; -+ dst->reg.idx[0].rel_addr = rel_addr; -+ } - if (dst->reg.type == VKD3DSPR_SAMPLER) - dst->reg.dimension = VSIR_DIMENSION_NONE; - else if (dst->reg.type == VKD3DSPR_DEPTHOUT) -@@ -532,6 +561,21 @@ static struct signature_element *find_signature_element_by_register_index( - return NULL; - } - -+/* Add missing bits to a mask to make it contiguous. */ -+static unsigned int make_mask_contiguous(unsigned int mask) -+{ -+ static const unsigned int table[] = -+ { -+ 0x0, 0x1, 0x2, 0x3, -+ 0x4, 0x7, 0x6, 0x7, -+ 0x8, 0xf, 0xe, 0xf, -+ 0xc, 0xf, 0xe, 0xf, -+ }; -+ -+ VKD3D_ASSERT(mask < ARRAY_SIZE(table)); -+ return table[mask]; -+} -+ - static bool add_signature_element(struct vkd3d_shader_sm1_parser *sm1, bool output, - const char *name, unsigned int index, enum vkd3d_shader_sysval_semantic sysval, - unsigned int register_index, bool is_dcl, unsigned int mask) -@@ -547,7 +591,7 @@ static bool add_signature_element(struct vkd3d_shader_sm1_parser *sm1, bool outp - - if ((element = find_signature_element(signature, name, index))) - { -- element->mask |= mask; -+ element->mask = make_mask_contiguous(element->mask | mask); - if (!is_dcl) - element->used_mask |= mask; - return true; -@@ -567,7 +611,7 @@ static bool add_signature_element(struct vkd3d_shader_sm1_parser *sm1, bool outp - element->register_index = register_index; - element->target_location = register_index; - element->register_count = 1; -- element->mask = mask; -+ element->mask = make_mask_contiguous(mask); - element->used_mask = is_dcl ? 0 : mask; - if (program->shader_version.type == VKD3D_SHADER_TYPE_PIXEL && !output) - element->interpolation_mode = VKD3DSIM_LINEAR; -@@ -601,7 +645,7 @@ static bool add_signature_element_from_register(struct vkd3d_shader_sm1_parser * - const struct vkd3d_shader_register *reg, bool is_dcl, unsigned int mask) - { - const struct vkd3d_shader_version *version = &sm1->p.program->shader_version; -- unsigned int register_index = reg->idx[0].offset; -+ unsigned int register_index = reg->idx_count > 0 ? reg->idx[0].offset : 0; - - switch (reg->type) - { -@@ -921,6 +965,9 @@ static void shader_sm1_read_dst_param(struct vkd3d_shader_sm1_parser *sm1, const - shader_sm1_parse_src_param(addr_token, NULL, dst_rel_addr); - } - shader_sm1_parse_dst_param(token, dst_rel_addr, dst_param); -+ -+ if (dst_param->reg.type == VKD3DSPR_RASTOUT && dst_param->reg.idx[0].offset == VSIR_RASTOUT_POINT_SIZE) -+ sm1->p.program->has_point_size = true; - } - - static void shader_sm1_read_semantic(struct vkd3d_shader_sm1_parser *sm1, -@@ -1272,7 +1319,8 @@ static enum vkd3d_result shader_sm1_init(struct vkd3d_shader_sm1_parser *sm1, st - sm1->end = &code[token_count]; - - /* Estimate instruction count to avoid reallocation in most shaders. */ -- if (!vsir_program_init(program, compile_info, &version, code_size != ~(size_t)0 ? token_count / 4u + 4 : 16)) -+ if (!vsir_program_init(program, compile_info, &version, -+ code_size != ~(size_t)0 ? token_count / 4u + 4 : 16, VSIR_CF_STRUCTURED, VSIR_NOT_NORMALISED)) - return VKD3D_ERROR_OUT_OF_MEMORY; - - vkd3d_shader_parser_init(&sm1->p, program, message_context, compile_info->source_name); -@@ -1338,23 +1386,19 @@ int d3dbc_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t c - for (i = 0; i < ARRAY_SIZE(program->flat_constant_count); ++i) - program->flat_constant_count[i] = get_external_constant_count(&sm1, i); - -- if (!sm1.p.failed) -- ret = vkd3d_shader_parser_validate(&sm1.p, config_flags); -- - if (sm1.p.failed && ret >= 0) - ret = VKD3D_ERROR_INVALID_SHADER; - - if (ret < 0) - { -- WARN("Failed to parse shader.\n"); - vsir_program_cleanup(program); - return ret; - } - -- return ret; -+ return VKD3D_OK; - } - --bool hlsl_sm1_register_from_semantic(const struct vkd3d_shader_version *version, const char *semantic_name, -+bool sm1_register_from_semantic_name(const struct vkd3d_shader_version *version, const char *semantic_name, - unsigned int semantic_index, bool output, enum vkd3d_shader_register_type *type, unsigned int *reg) - { - unsigned int i; -@@ -1384,22 +1428,22 @@ bool hlsl_sm1_register_from_semantic(const struct vkd3d_shader_version *version, - {"depth", true, VKD3D_SHADER_TYPE_PIXEL, 3, VKD3DSPR_DEPTHOUT}, - {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, 3, VKD3DSPR_DEPTHOUT}, - {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, 3, VKD3DSPR_COLOROUT}, -- {"sv_position", false, VKD3D_SHADER_TYPE_PIXEL, 3, VKD3DSPR_MISCTYPE, D3DSMO_POSITION}, -- {"vface", false, VKD3D_SHADER_TYPE_PIXEL, 3, VKD3DSPR_MISCTYPE, D3DSMO_FACE}, -- {"vpos", false, VKD3D_SHADER_TYPE_PIXEL, 3, VKD3DSPR_MISCTYPE, D3DSMO_POSITION}, -+ {"sv_position", false, VKD3D_SHADER_TYPE_PIXEL, 3, VKD3DSPR_MISCTYPE, VKD3D_SM1_MISC_POSITION}, -+ {"vface", false, VKD3D_SHADER_TYPE_PIXEL, 3, VKD3DSPR_MISCTYPE, VKD3D_SM1_MISC_FACE}, -+ {"vpos", false, VKD3D_SHADER_TYPE_PIXEL, 3, VKD3DSPR_MISCTYPE, VKD3D_SM1_MISC_POSITION}, - - {"color", true, VKD3D_SHADER_TYPE_VERTEX, 1, VKD3DSPR_ATTROUT}, -- {"fog", true, VKD3D_SHADER_TYPE_VERTEX, 1, VKD3DSPR_RASTOUT, D3DSRO_FOG}, -- {"position", true, VKD3D_SHADER_TYPE_VERTEX, 1, VKD3DSPR_RASTOUT, D3DSRO_POSITION}, -- {"psize", true, VKD3D_SHADER_TYPE_VERTEX, 1, VKD3DSPR_RASTOUT, D3DSRO_POINT_SIZE}, -- {"sv_position", true, VKD3D_SHADER_TYPE_VERTEX, 1, VKD3DSPR_RASTOUT, D3DSRO_POSITION}, -+ {"fog", true, VKD3D_SHADER_TYPE_VERTEX, 1, VKD3DSPR_RASTOUT, VSIR_RASTOUT_FOG}, -+ {"position", true, VKD3D_SHADER_TYPE_VERTEX, 1, VKD3DSPR_RASTOUT, VSIR_RASTOUT_POSITION}, -+ {"psize", true, VKD3D_SHADER_TYPE_VERTEX, 1, VKD3DSPR_RASTOUT, VSIR_RASTOUT_POINT_SIZE}, -+ {"sv_position", true, VKD3D_SHADER_TYPE_VERTEX, 1, VKD3DSPR_RASTOUT, VSIR_RASTOUT_POSITION}, - {"texcoord", true, VKD3D_SHADER_TYPE_VERTEX, 1, VKD3DSPR_TEXCRDOUT}, - - {"color", true, VKD3D_SHADER_TYPE_VERTEX, 2, VKD3DSPR_ATTROUT}, -- {"fog", true, VKD3D_SHADER_TYPE_VERTEX, 2, VKD3DSPR_RASTOUT, D3DSRO_FOG}, -- {"position", true, VKD3D_SHADER_TYPE_VERTEX, 2, VKD3DSPR_RASTOUT, D3DSRO_POSITION}, -- {"psize", true, VKD3D_SHADER_TYPE_VERTEX, 2, VKD3DSPR_RASTOUT, D3DSRO_POINT_SIZE}, -- {"sv_position", true, VKD3D_SHADER_TYPE_VERTEX, 2, VKD3DSPR_RASTOUT, D3DSRO_POSITION}, -+ {"fog", true, VKD3D_SHADER_TYPE_VERTEX, 2, VKD3DSPR_RASTOUT, VSIR_RASTOUT_FOG}, -+ {"position", true, VKD3D_SHADER_TYPE_VERTEX, 2, VKD3DSPR_RASTOUT, VSIR_RASTOUT_POSITION}, -+ {"psize", true, VKD3D_SHADER_TYPE_VERTEX, 2, VKD3DSPR_RASTOUT, VSIR_RASTOUT_POINT_SIZE}, -+ {"sv_position", true, VKD3D_SHADER_TYPE_VERTEX, 2, VKD3DSPR_RASTOUT, VSIR_RASTOUT_POSITION}, - {"texcoord", true, VKD3D_SHADER_TYPE_VERTEX, 2, VKD3DSPR_TEXCRDOUT}, - }; - -@@ -1422,33 +1466,33 @@ bool hlsl_sm1_register_from_semantic(const struct vkd3d_shader_version *version, - return false; - } - --bool hlsl_sm1_usage_from_semantic(const char *semantic_name, -- uint32_t semantic_index, D3DDECLUSAGE *usage, uint32_t *usage_idx) -+bool sm1_usage_from_semantic_name(const char *semantic_name, -+ uint32_t semantic_index, enum vkd3d_decl_usage *usage, uint32_t *usage_idx) - { - static const struct - { - const char *name; -- D3DDECLUSAGE usage; -+ enum vkd3d_decl_usage usage; - } - semantics[] = - { -- {"binormal", D3DDECLUSAGE_BINORMAL}, -- {"blendindices", D3DDECLUSAGE_BLENDINDICES}, -- {"blendweight", D3DDECLUSAGE_BLENDWEIGHT}, -- {"color", D3DDECLUSAGE_COLOR}, -- {"depth", D3DDECLUSAGE_DEPTH}, -- {"fog", D3DDECLUSAGE_FOG}, -- {"normal", D3DDECLUSAGE_NORMAL}, -- {"position", D3DDECLUSAGE_POSITION}, -- {"positiont", D3DDECLUSAGE_POSITIONT}, -- {"psize", D3DDECLUSAGE_PSIZE}, -- {"sample", D3DDECLUSAGE_SAMPLE}, -- {"sv_depth", D3DDECLUSAGE_DEPTH}, -- {"sv_position", D3DDECLUSAGE_POSITION}, -- {"sv_target", D3DDECLUSAGE_COLOR}, -- {"tangent", D3DDECLUSAGE_TANGENT}, -- {"tessfactor", D3DDECLUSAGE_TESSFACTOR}, -- {"texcoord", D3DDECLUSAGE_TEXCOORD}, -+ {"binormal", VKD3D_DECL_USAGE_BINORMAL}, -+ {"blendindices", VKD3D_DECL_USAGE_BLEND_INDICES}, -+ {"blendweight", VKD3D_DECL_USAGE_BLEND_WEIGHT}, -+ {"color", VKD3D_DECL_USAGE_COLOR}, -+ {"depth", VKD3D_DECL_USAGE_DEPTH}, -+ {"fog", VKD3D_DECL_USAGE_FOG}, -+ {"normal", VKD3D_DECL_USAGE_NORMAL}, -+ {"position", VKD3D_DECL_USAGE_POSITION}, -+ {"positiont", VKD3D_DECL_USAGE_POSITIONT}, -+ {"psize", VKD3D_DECL_USAGE_PSIZE}, -+ {"sample", VKD3D_DECL_USAGE_SAMPLE}, -+ {"sv_depth", VKD3D_DECL_USAGE_DEPTH}, -+ {"sv_position", VKD3D_DECL_USAGE_POSITION}, -+ {"sv_target", VKD3D_DECL_USAGE_COLOR}, -+ {"tangent", VKD3D_DECL_USAGE_TANGENT}, -+ {"tessfactor", VKD3D_DECL_USAGE_TESS_FACTOR}, -+ {"texcoord", VKD3D_DECL_USAGE_TEXCOORD}, - }; - - unsigned int i; -@@ -1468,21 +1512,17 @@ bool hlsl_sm1_usage_from_semantic(const char *semantic_name, - - struct d3dbc_compiler - { -+ const struct vkd3d_sm1_opcode_info *opcode_table; - struct vsir_program *program; - struct vkd3d_bytecode_buffer buffer; - struct vkd3d_shader_message_context *message_context; -- -- /* OBJECTIVE: Store all the required information in the other fields so -- * that this hlsl_ctx is no longer necessary. */ -- struct hlsl_ctx *ctx; -+ bool failed; - }; - - static uint32_t sm1_version(enum vkd3d_shader_type type, unsigned int major, unsigned int minor) - { -- if (type == VKD3D_SHADER_TYPE_VERTEX) -- return D3DVS_VERSION(major, minor); -- else -- return D3DPS_VERSION(major, minor); -+ return vkd3d_make_u32(vkd3d_make_u16(minor, major), -+ type == VKD3D_SHADER_TYPE_VERTEX ? VKD3D_SM1_VS : VKD3D_SM1_PS); - } - - D3DXPARAMETER_CLASS hlsl_sm1_class(const struct hlsl_type *type) -@@ -1512,6 +1552,7 @@ D3DXPARAMETER_CLASS hlsl_sm1_class(const struct hlsl_type *type) - case HLSL_CLASS_DEPTH_STENCIL_STATE: - case HLSL_CLASS_DEPTH_STENCIL_VIEW: - case HLSL_CLASS_EFFECT_GROUP: -+ case HLSL_CLASS_ERROR: - case HLSL_CLASS_PASS: - case HLSL_CLASS_RASTERIZER_STATE: - case HLSL_CLASS_RENDER_TARGET_VIEW: -@@ -1524,6 +1565,7 @@ D3DXPARAMETER_CLASS hlsl_sm1_class(const struct hlsl_type *type) - case HLSL_CLASS_HULL_SHADER: - case HLSL_CLASS_GEOMETRY_SHADER: - case HLSL_CLASS_BLEND_STATE: -+ case HLSL_CLASS_STREAM_OUTPUT: - case HLSL_CLASS_NULL: - break; - } -@@ -1617,6 +1659,7 @@ D3DXPARAMETER_TYPE hlsl_sm1_base_type(const struct hlsl_type *type) - case HLSL_CLASS_DEPTH_STENCIL_STATE: - case HLSL_CLASS_DEPTH_STENCIL_VIEW: - case HLSL_CLASS_EFFECT_GROUP: -+ case HLSL_CLASS_ERROR: - case HLSL_CLASS_PASS: - case HLSL_CLASS_RASTERIZER_STATE: - case HLSL_CLASS_RENDER_TARGET_VIEW: -@@ -1629,6 +1672,7 @@ D3DXPARAMETER_TYPE hlsl_sm1_base_type(const struct hlsl_type *type) - case HLSL_CLASS_HULL_SHADER: - case HLSL_CLASS_GEOMETRY_SHADER: - case HLSL_CLASS_BLEND_STATE: -+ case HLSL_CLASS_STREAM_OUTPUT: - case HLSL_CLASS_NULL: - break; - } -@@ -1709,7 +1753,7 @@ static void sm1_sort_externs(struct hlsl_ctx *ctx) - - void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer) - { -- size_t ctab_offset, ctab_start, ctab_end, vars_start, size_offset, creator_offset, offset; -+ size_t ctab_offset, ctab_start, ctab_end, vars_offset, vars_start, size_offset, creator_offset, offset; - unsigned int uniform_count = 0; - struct hlsl_ir_var *var; - -@@ -1741,15 +1785,16 @@ void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buff - size_offset = put_u32(buffer, 0); - ctab_offset = put_u32(buffer, VKD3D_MAKE_TAG('C','T','A','B')); - -- ctab_start = put_u32(buffer, sizeof(D3DXSHADER_CONSTANTTABLE)); -+ ctab_start = put_u32(buffer, 7 * sizeof(uint32_t)); /* CTAB header size. */ - creator_offset = put_u32(buffer, 0); - put_u32(buffer, sm1_version(ctx->profile->type, ctx->profile->major_version, ctx->profile->minor_version)); - put_u32(buffer, uniform_count); -- put_u32(buffer, sizeof(D3DXSHADER_CONSTANTTABLE)); /* offset of constants */ -+ vars_offset = put_u32(buffer, 0); - put_u32(buffer, 0); /* FIXME: flags */ - put_u32(buffer, 0); /* FIXME: target string */ - - vars_start = bytecode_align(buffer); -+ set_u32(buffer, vars_offset, vars_start - ctab_start); - - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) - { -@@ -1825,8 +1870,10 @@ void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buff - switch (comp_type->e.numeric.type) - { - case HLSL_TYPE_DOUBLE: -- hlsl_fixme(ctx, &var->loc, "Write double default values."); -- uni.u = 0; -+ if (ctx->double_as_float_alias) -+ uni.u = var->default_values[k].number.u; -+ else -+ uni.u = 0; - break; - - case HLSL_TYPE_INT: -@@ -1860,24 +1907,24 @@ void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buff - set_u32(buffer, creator_offset, offset - ctab_start); - - ctab_end = bytecode_align(buffer); -- set_u32(buffer, size_offset, vkd3d_make_u32(D3DSIO_COMMENT, (ctab_end - ctab_offset) / sizeof(uint32_t))); -+ set_u32(buffer, size_offset, vkd3d_make_u32(VKD3D_SM1_OP_COMMENT, (ctab_end - ctab_offset) / sizeof(uint32_t))); - } - - static uint32_t sm1_encode_register_type(enum vkd3d_shader_register_type type) - { -- return ((type << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK) -- | ((type << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2); -+ return ((type << VKD3D_SM1_REGISTER_TYPE_SHIFT) & VKD3D_SM1_REGISTER_TYPE_MASK) -+ | ((type << VKD3D_SM1_REGISTER_TYPE_SHIFT2) & VKD3D_SM1_REGISTER_TYPE_MASK2); - } - - struct sm1_instruction - { -- D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode; -+ enum vkd3d_sm1_opcode opcode; - unsigned int flags; - - struct sm1_dst_register - { - enum vkd3d_shader_register_type type; -- D3DSHADER_PARAM_DSTMOD_TYPE mod; -+ enum vkd3d_shader_dst_modifier mod; - unsigned int writemask; - uint32_t reg; - } dst; -@@ -1885,7 +1932,7 @@ struct sm1_instruction - struct sm1_src_register - { - enum vkd3d_shader_register_type type; -- D3DSHADER_PARAM_SRCMOD_TYPE mod; -+ enum vkd3d_shader_src_modifier mod; - unsigned int swizzle; - uint32_t reg; - } srcs[4]; -@@ -1900,11 +1947,11 @@ static bool is_inconsequential_instr(const struct sm1_instruction *instr) - const struct sm1_dst_register *dst = &instr->dst; - unsigned int i; - -- if (instr->opcode != D3DSIO_MOV) -+ if (instr->opcode != VKD3D_SM1_OP_MOV) - return false; -- if (dst->mod != D3DSPDM_NONE) -+ if (dst->mod != VKD3DSPDM_NONE) - return false; -- if (src->mod != D3DSPSM_NONE) -+ if (src->mod != VKD3DSPSM_NONE) - return false; - if (src->type != dst->type) - return false; -@@ -1923,13 +1970,19 @@ static bool is_inconsequential_instr(const struct sm1_instruction *instr) - static void write_sm1_dst_register(struct vkd3d_bytecode_buffer *buffer, const struct sm1_dst_register *reg) - { - VKD3D_ASSERT(reg->writemask); -- put_u32(buffer, (1u << 31) | sm1_encode_register_type(reg->type) | reg->mod | (reg->writemask << 16) | reg->reg); -+ put_u32(buffer, VKD3D_SM1_INSTRUCTION_PARAMETER -+ | sm1_encode_register_type(reg->type) -+ | (reg->mod << VKD3D_SM1_DST_MODIFIER_SHIFT) -+ | (reg->writemask << VKD3D_SM1_WRITEMASK_SHIFT) | reg->reg); - } - - static void write_sm1_src_register(struct vkd3d_bytecode_buffer *buffer, - const struct sm1_src_register *reg) - { -- put_u32(buffer, (1u << 31) | sm1_encode_register_type(reg->type) | reg->mod | (reg->swizzle << 16) | reg->reg); -+ put_u32(buffer, VKD3D_SM1_INSTRUCTION_PARAMETER -+ | sm1_encode_register_type(reg->type) -+ | (reg->mod << VKD3D_SM1_SRC_MODIFIER_SHIFT) -+ | (reg->swizzle << VKD3D_SM1_SWIZZLE_SHIFT) | reg->reg); - } - - static void d3dbc_write_instruction(struct d3dbc_compiler *d3dbc, const struct sm1_instruction *instr) -@@ -1945,7 +1998,7 @@ static void d3dbc_write_instruction(struct d3dbc_compiler *d3dbc, const struct s - token |= VKD3D_SM1_INSTRUCTION_FLAGS_MASK & (instr->flags << VKD3D_SM1_INSTRUCTION_FLAGS_SHIFT); - - if (version->major > 1) -- token |= (instr->has_dst + instr->src_count) << D3DSI_INSTLENGTH_SHIFT; -+ token |= (instr->has_dst + instr->src_count) << VKD3D_SM1_INSTRUCTION_LENGTH_SHIFT; - put_u32(buffer, token); - - if (instr->has_dst) -@@ -1955,346 +2008,112 @@ static void d3dbc_write_instruction(struct d3dbc_compiler *d3dbc, const struct s - write_sm1_src_register(buffer, &instr->srcs[i]); - }; - --static void sm1_map_src_swizzle(struct sm1_src_register *src, unsigned int map_writemask) --{ -- src->swizzle = hlsl_map_swizzle(src->swizzle, map_writemask); --} -- --static void d3dbc_write_dp2add(struct d3dbc_compiler *d3dbc, const struct hlsl_reg *dst, -- const struct hlsl_reg *src1, const struct hlsl_reg *src2, const struct hlsl_reg *src3) -+static const struct vkd3d_sm1_opcode_info *shader_sm1_get_opcode_info_from_vsir( -+ struct d3dbc_compiler *d3dbc, enum vkd3d_shader_opcode vkd3d_opcode) - { -- struct sm1_instruction instr = -- { -- .opcode = D3DSIO_DP2ADD, -- -- .dst.type = VKD3DSPR_TEMP, -- .dst.writemask = dst->writemask, -- .dst.reg = dst->id, -- .has_dst = 1, -- -- .srcs[0].type = VKD3DSPR_TEMP, -- .srcs[0].swizzle = hlsl_swizzle_from_writemask(src1->writemask), -- .srcs[0].reg = src1->id, -- .srcs[1].type = VKD3DSPR_TEMP, -- .srcs[1].swizzle = hlsl_swizzle_from_writemask(src2->writemask), -- .srcs[1].reg = src2->id, -- .srcs[2].type = VKD3DSPR_TEMP, -- .srcs[2].swizzle = hlsl_swizzle_from_writemask(src3->writemask), -- .srcs[2].reg = src3->id, -- .src_count = 3, -- }; -- -- d3dbc_write_instruction(d3dbc, &instr); --} -- --static void d3dbc_write_ternary_op(struct d3dbc_compiler *d3dbc, -- D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, const struct hlsl_reg *dst, -- const struct hlsl_reg *src1, const struct hlsl_reg *src2, const struct hlsl_reg *src3) --{ -- struct sm1_instruction instr = -- { -- .opcode = opcode, -- -- .dst.type = VKD3DSPR_TEMP, -- .dst.writemask = dst->writemask, -- .dst.reg = dst->id, -- .has_dst = 1, -- -- .srcs[0].type = VKD3DSPR_TEMP, -- .srcs[0].swizzle = hlsl_swizzle_from_writemask(src1->writemask), -- .srcs[0].reg = src1->id, -- .srcs[1].type = VKD3DSPR_TEMP, -- .srcs[1].swizzle = hlsl_swizzle_from_writemask(src2->writemask), -- .srcs[1].reg = src2->id, -- .srcs[2].type = VKD3DSPR_TEMP, -- .srcs[2].swizzle = hlsl_swizzle_from_writemask(src3->writemask), -- .srcs[2].reg = src3->id, -- .src_count = 3, -- }; -- -- sm1_map_src_swizzle(&instr.srcs[0], instr.dst.writemask); -- sm1_map_src_swizzle(&instr.srcs[1], instr.dst.writemask); -- sm1_map_src_swizzle(&instr.srcs[2], instr.dst.writemask); -- d3dbc_write_instruction(d3dbc, &instr); --} -- --static void d3dbc_write_binary_op(struct d3dbc_compiler *d3dbc, D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, -- const struct hlsl_reg *dst, const struct hlsl_reg *src1, const struct hlsl_reg *src2) --{ -- struct sm1_instruction instr = -- { -- .opcode = opcode, -- -- .dst.type = VKD3DSPR_TEMP, -- .dst.writemask = dst->writemask, -- .dst.reg = dst->id, -- .has_dst = 1, -- -- .srcs[0].type = VKD3DSPR_TEMP, -- .srcs[0].swizzle = hlsl_swizzle_from_writemask(src1->writemask), -- .srcs[0].reg = src1->id, -- .srcs[1].type = VKD3DSPR_TEMP, -- .srcs[1].swizzle = hlsl_swizzle_from_writemask(src2->writemask), -- .srcs[1].reg = src2->id, -- .src_count = 2, -- }; -- -- sm1_map_src_swizzle(&instr.srcs[0], instr.dst.writemask); -- sm1_map_src_swizzle(&instr.srcs[1], instr.dst.writemask); -- d3dbc_write_instruction(d3dbc, &instr); --} -+ const struct vkd3d_shader_version *version = &d3dbc->program->shader_version; -+ const struct vkd3d_sm1_opcode_info *info; -+ unsigned int i = 0; - --static void d3dbc_write_dot(struct d3dbc_compiler *d3dbc, D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, -- const struct hlsl_reg *dst, const struct hlsl_reg *src1, const struct hlsl_reg *src2) --{ -- struct sm1_instruction instr = -+ for (;;) - { -- .opcode = opcode, -- -- .dst.type = VKD3DSPR_TEMP, -- .dst.writemask = dst->writemask, -- .dst.reg = dst->id, -- .has_dst = 1, -- -- .srcs[0].type = VKD3DSPR_TEMP, -- .srcs[0].swizzle = hlsl_swizzle_from_writemask(src1->writemask), -- .srcs[0].reg = src1->id, -- .srcs[1].type = VKD3DSPR_TEMP, -- .srcs[1].swizzle = hlsl_swizzle_from_writemask(src2->writemask), -- .srcs[1].reg = src2->id, -- .src_count = 2, -- }; -+ info = &d3dbc->opcode_table[i++]; -+ if (info->vkd3d_opcode == VKD3DSIH_INVALID) -+ return NULL; - -- d3dbc_write_instruction(d3dbc, &instr); -+ if (vkd3d_opcode == info->vkd3d_opcode -+ && vkd3d_shader_ver_ge(version, info->min_version.major, info->min_version.minor) -+ && (vkd3d_shader_ver_le(version, info->max_version.major, info->max_version.minor) -+ || !info->max_version.major)) -+ return info; -+ } - } - --static void d3dbc_write_unary_op(struct d3dbc_compiler *d3dbc, D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, -- const struct hlsl_reg *dst, const struct hlsl_reg *src, -- D3DSHADER_PARAM_SRCMOD_TYPE src_mod, D3DSHADER_PARAM_DSTMOD_TYPE dst_mod) -+static uint32_t swizzle_from_vsir(uint32_t swizzle) - { -- struct sm1_instruction instr = -- { -- .opcode = opcode, -+ uint32_t x = vsir_swizzle_get_component(swizzle, 0); -+ uint32_t y = vsir_swizzle_get_component(swizzle, 1); -+ uint32_t z = vsir_swizzle_get_component(swizzle, 2); -+ uint32_t w = vsir_swizzle_get_component(swizzle, 3); - -- .dst.type = VKD3DSPR_TEMP, -- .dst.mod = dst_mod, -- .dst.writemask = dst->writemask, -- .dst.reg = dst->id, -- .has_dst = 1, -+ if (x & ~0x3u || y & ~0x3u || z & ~0x3u || w & ~0x3u) -+ ERR("Unexpected vsir swizzle: 0x%08x.\n", swizzle); - -- .srcs[0].type = VKD3DSPR_TEMP, -- .srcs[0].swizzle = hlsl_swizzle_from_writemask(src->writemask), -- .srcs[0].reg = src->id, -- .srcs[0].mod = src_mod, -- .src_count = 1, -- }; -- -- sm1_map_src_swizzle(&instr.srcs[0], instr.dst.writemask); -- d3dbc_write_instruction(d3dbc, &instr); -+ return ((x & 0x3u) << VKD3D_SM1_SWIZZLE_COMPONENT_SHIFT(0)) -+ | ((y & 0x3) << VKD3D_SM1_SWIZZLE_COMPONENT_SHIFT(1)) -+ | ((z & 0x3) << VKD3D_SM1_SWIZZLE_COMPONENT_SHIFT(2)) -+ | ((w & 0x3) << VKD3D_SM1_SWIZZLE_COMPONENT_SHIFT(3)); - } - --static void d3dbc_write_cast(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr) -+static void sm1_src_reg_from_vsir(struct d3dbc_compiler *d3dbc, const struct vkd3d_shader_src_param *param, -+ struct sm1_src_register *src, const struct vkd3d_shader_location *loc) - { -- struct hlsl_ir_expr *expr = hlsl_ir_expr(instr); -- const struct hlsl_ir_node *arg1 = expr->operands[0].node; -- const struct hlsl_type *dst_type = expr->node.data_type; -- const struct hlsl_type *src_type = arg1->data_type; -- struct hlsl_ctx *ctx = d3dbc->ctx; -+ src->mod = param->modifiers; -+ src->reg = param->reg.idx[0].offset; -+ src->type = param->reg.type; -+ src->swizzle = swizzle_from_vsir(param->swizzle); - -- /* Narrowing casts were already lowered. */ -- VKD3D_ASSERT(src_type->dimx == dst_type->dimx); -- -- switch (dst_type->e.numeric.type) -+ if (param->reg.idx[0].rel_addr) - { -- case HLSL_TYPE_HALF: -- case HLSL_TYPE_FLOAT: -- switch (src_type->e.numeric.type) -- { -- case HLSL_TYPE_INT: -- case HLSL_TYPE_UINT: -- case HLSL_TYPE_BOOL: -- /* Integrals are internally represented as floats, so no change is necessary.*/ -- case HLSL_TYPE_HALF: -- case HLSL_TYPE_FLOAT: -- d3dbc_write_unary_op(d3dbc, D3DSIO_MOV, &instr->reg, &arg1->reg, 0, 0); -- break; -- -- case HLSL_TYPE_DOUBLE: -- hlsl_fixme(ctx, &instr->loc, "SM1 cast from double to float."); -- break; -- -- default: -- vkd3d_unreachable(); -- } -- break; -- -- case HLSL_TYPE_INT: -- case HLSL_TYPE_UINT: -- switch(src_type->e.numeric.type) -- { -- case HLSL_TYPE_HALF: -- case HLSL_TYPE_FLOAT: -- /* A compilation pass turns these into FLOOR+REINTERPRET, so we should not -- * reach this case unless we are missing something. */ -- hlsl_fixme(ctx, &instr->loc, "Unlowered SM1 cast from float to integer."); -- break; -- case HLSL_TYPE_INT: -- case HLSL_TYPE_UINT: -- d3dbc_write_unary_op(d3dbc, D3DSIO_MOV, &instr->reg, &arg1->reg, 0, 0); -- break; -- -- case HLSL_TYPE_BOOL: -- hlsl_fixme(ctx, &instr->loc, "SM1 cast from bool to integer."); -- break; -- -- case HLSL_TYPE_DOUBLE: -- hlsl_fixme(ctx, &instr->loc, "SM1 cast from double to integer."); -- break; -- -- default: -- vkd3d_unreachable(); -- } -- break; -- -- case HLSL_TYPE_DOUBLE: -- hlsl_fixme(ctx, &instr->loc, "SM1 cast to double."); -- break; -- -- case HLSL_TYPE_BOOL: -- /* Casts to bool should have already been lowered. */ -- default: -- hlsl_fixme(ctx, &expr->node.loc, "SM1 cast from %s to %s.", -- debug_hlsl_type(ctx, src_type), debug_hlsl_type(ctx, dst_type)); -- break; -+ vkd3d_shader_error(d3dbc->message_context, loc, VKD3D_SHADER_ERROR_D3DBC_NOT_IMPLEMENTED, -+ "Unhandled relative addressing on source register."); -+ d3dbc->failed = true; - } - } - --static void d3dbc_write_constant_defs(struct d3dbc_compiler *d3dbc) -+static void sm1_dst_reg_from_vsir(struct d3dbc_compiler *d3dbc, const struct vkd3d_shader_dst_param *param, -+ struct sm1_dst_register *dst, const struct vkd3d_shader_location *loc) - { -- const struct vkd3d_shader_version *version = &d3dbc->program->shader_version; -- struct vkd3d_bytecode_buffer *buffer = &d3dbc->buffer; -- struct hlsl_ctx *ctx = d3dbc->ctx; -- unsigned int i, x; -+ dst->mod = param->modifiers; -+ dst->reg = param->reg.idx[0].offset; -+ dst->type = param->reg.type; -+ dst->writemask = param->write_mask; - -- for (i = 0; i < ctx->constant_defs.count; ++i) -+ if (param->reg.idx[0].rel_addr) - { -- const struct hlsl_constant_register *constant_reg = &ctx->constant_defs.regs[i]; -- uint32_t token = D3DSIO_DEF; -- const struct sm1_dst_register reg = -- { -- .type = VKD3DSPR_CONST, -- .writemask = VKD3DSP_WRITEMASK_ALL, -- .reg = constant_reg->index, -- }; -- -- if (version->major > 1) -- token |= 5 << D3DSI_INSTLENGTH_SHIFT; -- put_u32(buffer, token); -- -- write_sm1_dst_register(buffer, ®); -- for (x = 0; x < 4; ++x) -- put_f32(buffer, constant_reg->value.f[x]); -+ vkd3d_shader_error(d3dbc->message_context, loc, VKD3D_SHADER_ERROR_D3DBC_NOT_IMPLEMENTED, -+ "Unhandled relative addressing on destination register."); -+ d3dbc->failed = true; - } - } - --static void d3dbc_write_semantic_dcl(struct d3dbc_compiler *d3dbc, -- const struct signature_element *element, bool output) -+static void d3dbc_write_vsir_def(struct d3dbc_compiler *d3dbc, const struct vkd3d_shader_instruction *ins) - { - const struct vkd3d_shader_version *version = &d3dbc->program->shader_version; - struct vkd3d_bytecode_buffer *buffer = &d3dbc->buffer; -- struct sm1_dst_register reg = {0}; -- uint32_t token, usage_idx; -- D3DDECLUSAGE usage; -- bool ret; -+ uint32_t token; - -- if (hlsl_sm1_register_from_semantic(version, element->semantic_name, -- element->semantic_index, output, ®.type, ®.reg)) -- { -- usage = 0; -- usage_idx = 0; -- } -- else -+ const struct sm1_dst_register reg = - { -- ret = hlsl_sm1_usage_from_semantic(element->semantic_name, element->semantic_index, &usage, &usage_idx); -- VKD3D_ASSERT(ret); -- reg.type = output ? VKD3DSPR_OUTPUT : VKD3DSPR_INPUT; -- reg.reg = element->register_index; -- } -+ .type = VKD3DSPR_CONST, -+ .writemask = VKD3DSP_WRITEMASK_ALL, -+ .reg = ins->dst[0].reg.idx[0].offset, -+ }; - -- token = D3DSIO_DCL; -+ token = VKD3D_SM1_OP_DEF; - if (version->major > 1) -- token |= 2 << D3DSI_INSTLENGTH_SHIFT; -+ token |= 5 << VKD3D_SM1_INSTRUCTION_LENGTH_SHIFT; - put_u32(buffer, token); - -- token = (1u << 31); -- token |= usage << D3DSP_DCL_USAGE_SHIFT; -- token |= usage_idx << D3DSP_DCL_USAGEINDEX_SHIFT; -- put_u32(buffer, token); -- -- reg.writemask = element->mask; - write_sm1_dst_register(buffer, ®); -+ for (unsigned int x = 0; x < 4; ++x) -+ put_f32(buffer, ins->src[0].reg.u.immconst_f32[x]); - } - --static void d3dbc_write_semantic_dcls(struct d3dbc_compiler *d3dbc) --{ -- struct vsir_program *program = d3dbc->program; -- const struct vkd3d_shader_version *version; -- bool write_in = false, write_out = false; -- -- version = &program->shader_version; -- if (version->type == VKD3D_SHADER_TYPE_PIXEL && version->major >= 2) -- write_in = true; -- else if (version->type == VKD3D_SHADER_TYPE_VERTEX && version->major == 3) -- write_in = write_out = true; -- else if (version->type == VKD3D_SHADER_TYPE_VERTEX && version->major < 3) -- write_in = true; -- -- if (write_in) -- { -- for (unsigned int i = 0; i < program->input_signature.element_count; ++i) -- d3dbc_write_semantic_dcl(d3dbc, &program->input_signature.elements[i], false); -- } -- -- if (write_out) -- { -- for (unsigned int i = 0; i < program->output_signature.element_count; ++i) -- d3dbc_write_semantic_dcl(d3dbc, &program->output_signature.elements[i], true); -- } --} -- --static void d3dbc_write_sampler_dcl(struct d3dbc_compiler *d3dbc, -- unsigned int reg_id, enum hlsl_sampler_dim sampler_dim) -+static void d3dbc_write_vsir_sampler_dcl(struct d3dbc_compiler *d3dbc, -+ unsigned int reg_id, enum vkd3d_sm1_resource_type res_type) - { - const struct vkd3d_shader_version *version = &d3dbc->program->shader_version; - struct vkd3d_bytecode_buffer *buffer = &d3dbc->buffer; - struct sm1_dst_register reg = {0}; -- uint32_t token, res_type = 0; -+ uint32_t token; - -- token = D3DSIO_DCL; -+ token = VKD3D_SM1_OP_DCL; - if (version->major > 1) -- token |= 2 << D3DSI_INSTLENGTH_SHIFT; -+ token |= 2 << VKD3D_SM1_INSTRUCTION_LENGTH_SHIFT; - put_u32(buffer, token); - -- switch (sampler_dim) -- { -- case HLSL_SAMPLER_DIM_2D: -- res_type = VKD3D_SM1_RESOURCE_TEXTURE_2D; -- break; -- -- case HLSL_SAMPLER_DIM_CUBE: -- res_type = VKD3D_SM1_RESOURCE_TEXTURE_CUBE; -- break; -- -- case HLSL_SAMPLER_DIM_3D: -- res_type = VKD3D_SM1_RESOURCE_TEXTURE_3D; -- break; -- -- default: -- vkd3d_unreachable(); -- break; -- } -- -- token = (1u << 31); -+ token = VKD3D_SM1_INSTRUCTION_PARAMETER; - token |= res_type << VKD3D_SM1_RESOURCE_TYPE_SHIFT; - put_u32(buffer, token); - -@@ -2305,618 +2124,283 @@ static void d3dbc_write_sampler_dcl(struct d3dbc_compiler *d3dbc, - write_sm1_dst_register(buffer, ®); - } - --static void d3dbc_write_sampler_dcls(struct d3dbc_compiler *d3dbc) -+static void d3dbc_write_vsir_dcl(struct d3dbc_compiler *d3dbc, const struct vkd3d_shader_instruction *ins) - { - const struct vkd3d_shader_version *version = &d3dbc->program->shader_version; -- struct hlsl_ctx *ctx = d3dbc->ctx; -- enum hlsl_sampler_dim sampler_dim; -- unsigned int i, count, reg_id; -- struct hlsl_ir_var *var; -+ const struct vkd3d_shader_semantic *semantic = &ins->declaration.semantic; -+ unsigned int reg_id; - - if (version->major < 2) - return; - -- LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) -- { -- if (!var->regs[HLSL_REGSET_SAMPLERS].allocated) -- continue; -- -- count = var->bind_count[HLSL_REGSET_SAMPLERS]; -- -- for (i = 0; i < count; ++i) -- { -- if (var->objects_usage[HLSL_REGSET_SAMPLERS][i].used) -- { -- sampler_dim = var->objects_usage[HLSL_REGSET_SAMPLERS][i].sampler_dim; -- if (sampler_dim == HLSL_SAMPLER_DIM_GENERIC) -- { -- /* These can appear in sm4-style combined sample instructions. */ -- hlsl_fixme(ctx, &var->loc, "Generic samplers need to be lowered."); -- continue; -- } -- -- reg_id = var->regs[HLSL_REGSET_SAMPLERS].index + i; -- d3dbc_write_sampler_dcl(d3dbc, reg_id, sampler_dim); -- } -- } -- } --} -+ reg_id = semantic->resource.reg.reg.idx[0].offset; - --static void d3dbc_write_constant(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr) --{ -- const struct hlsl_ir_constant *constant = hlsl_ir_constant(instr); -- struct sm1_instruction sm1_instr = -+ if (semantic->resource.reg.reg.type != VKD3DSPR_SAMPLER) - { -- .opcode = D3DSIO_MOV, -- -- .dst.type = VKD3DSPR_TEMP, -- .dst.reg = instr->reg.id, -- .dst.writemask = instr->reg.writemask, -- .has_dst = 1, -- -- .srcs[0].type = VKD3DSPR_CONST, -- .srcs[0].reg = constant->reg.id, -- .srcs[0].swizzle = hlsl_swizzle_from_writemask(constant->reg.writemask), -- .src_count = 1, -- }; -- -- VKD3D_ASSERT(instr->reg.allocated); -- VKD3D_ASSERT(constant->reg.allocated); -- sm1_map_src_swizzle(&sm1_instr.srcs[0], sm1_instr.dst.writemask); -- d3dbc_write_instruction(d3dbc, &sm1_instr); --} -- --static void d3dbc_write_per_component_unary_op(struct d3dbc_compiler *d3dbc, -- const struct hlsl_ir_node *instr, D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode) --{ -- struct hlsl_ir_expr *expr = hlsl_ir_expr(instr); -- struct hlsl_ir_node *arg1 = expr->operands[0].node; -- unsigned int i; -- -- for (i = 0; i < instr->data_type->dimx; ++i) -- { -- struct hlsl_reg src = arg1->reg, dst = instr->reg; -- -- src.writemask = hlsl_combine_writemasks(src.writemask, 1u << i); -- dst.writemask = hlsl_combine_writemasks(dst.writemask, 1u << i); -- d3dbc_write_unary_op(d3dbc, opcode, &dst, &src, 0, 0); -- } --} -- --static void d3dbc_write_sincos(struct d3dbc_compiler *d3dbc, enum hlsl_ir_expr_op op, -- const struct hlsl_reg *dst, const struct hlsl_reg *src) --{ -- struct sm1_instruction instr = -- { -- .opcode = D3DSIO_SINCOS, -- -- .dst.type = VKD3DSPR_TEMP, -- .dst.writemask = dst->writemask, -- .dst.reg = dst->id, -- .has_dst = 1, -- -- .srcs[0].type = VKD3DSPR_TEMP, -- .srcs[0].swizzle = hlsl_swizzle_from_writemask(src->writemask), -- .srcs[0].reg = src->id, -- .src_count = 1, -- }; -- -- if (op == HLSL_OP1_COS_REDUCED) -- VKD3D_ASSERT(dst->writemask == VKD3DSP_WRITEMASK_0); -- else /* HLSL_OP1_SIN_REDUCED */ -- VKD3D_ASSERT(dst->writemask == VKD3DSP_WRITEMASK_1); -- -- if (d3dbc->ctx->profile->major_version < 3) -- { -- instr.src_count = 3; -- -- instr.srcs[1].type = VKD3DSPR_CONST; -- instr.srcs[1].swizzle = hlsl_swizzle_from_writemask(VKD3DSP_WRITEMASK_ALL); -- instr.srcs[1].reg = d3dbc->ctx->d3dsincosconst1.id; -- -- instr.srcs[2].type = VKD3DSPR_CONST; -- instr.srcs[2].swizzle = hlsl_swizzle_from_writemask(VKD3DSP_WRITEMASK_ALL); -- instr.srcs[2].reg = d3dbc->ctx->d3dsincosconst2.id; -- } -- -- d3dbc_write_instruction(d3dbc, &instr); --} -- --static void d3dbc_write_expr(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr) --{ -- const struct vkd3d_shader_version *version = &d3dbc->program->shader_version; -- struct hlsl_ir_expr *expr = hlsl_ir_expr(instr); -- struct hlsl_ir_node *arg1 = expr->operands[0].node; -- struct hlsl_ir_node *arg2 = expr->operands[1].node; -- struct hlsl_ir_node *arg3 = expr->operands[2].node; -- struct hlsl_ctx *ctx = d3dbc->ctx; -- -- VKD3D_ASSERT(instr->reg.allocated); -- -- if (expr->op == HLSL_OP1_REINTERPRET) -- { -- d3dbc_write_unary_op(d3dbc, D3DSIO_MOV, &instr->reg, &arg1->reg, 0, 0); -+ vkd3d_shader_error(d3dbc->message_context, &ins->location, VKD3D_SHADER_ERROR_D3DBC_INVALID_REGISTER_TYPE, -+ "dcl instruction with register type %u.", semantic->resource.reg.reg.type); -+ d3dbc->failed = true; - return; - } - -- if (expr->op == HLSL_OP1_CAST) -+ switch (semantic->resource_type) - { -- d3dbc_write_cast(d3dbc, instr); -- return; -- } -- -- if (instr->data_type->e.numeric.type != HLSL_TYPE_FLOAT) -- { -- /* These need to be lowered. */ -- hlsl_fixme(ctx, &instr->loc, "SM1 non-float expression."); -- return; -- } -- -- switch (expr->op) -- { -- case HLSL_OP1_ABS: -- d3dbc_write_unary_op(d3dbc, D3DSIO_ABS, &instr->reg, &arg1->reg, 0, 0); -+ case VKD3D_SHADER_RESOURCE_TEXTURE_2D: -+ d3dbc_write_vsir_sampler_dcl(d3dbc, reg_id, VKD3D_SM1_RESOURCE_TEXTURE_2D); - break; - -- case HLSL_OP1_DSX: -- d3dbc_write_unary_op(d3dbc, D3DSIO_DSX, &instr->reg, &arg1->reg, 0, 0); -+ case VKD3D_SHADER_RESOURCE_TEXTURE_CUBE: -+ d3dbc_write_vsir_sampler_dcl(d3dbc, reg_id, VKD3D_SM1_RESOURCE_TEXTURE_CUBE); - break; - -- case HLSL_OP1_DSY: -- d3dbc_write_unary_op(d3dbc, D3DSIO_DSY, &instr->reg, &arg1->reg, 0, 0); -- break; -- -- case HLSL_OP1_EXP2: -- d3dbc_write_per_component_unary_op(d3dbc, instr, D3DSIO_EXP); -- break; -- -- case HLSL_OP1_LOG2: -- d3dbc_write_per_component_unary_op(d3dbc, instr, D3DSIO_LOG); -- break; -- -- case HLSL_OP1_NEG: -- d3dbc_write_unary_op(d3dbc, D3DSIO_MOV, &instr->reg, &arg1->reg, D3DSPSM_NEG, 0); -- break; -- -- case HLSL_OP1_SAT: -- d3dbc_write_unary_op(d3dbc, D3DSIO_MOV, &instr->reg, &arg1->reg, 0, D3DSPDM_SATURATE); -- break; -- -- case HLSL_OP1_RCP: -- d3dbc_write_per_component_unary_op(d3dbc, instr, D3DSIO_RCP); -- break; -- -- case HLSL_OP1_RSQ: -- d3dbc_write_per_component_unary_op(d3dbc, instr, D3DSIO_RSQ); -- break; -- -- case HLSL_OP1_COS_REDUCED: -- case HLSL_OP1_SIN_REDUCED: -- d3dbc_write_sincos(d3dbc, expr->op, &instr->reg, &arg1->reg); -- break; -- -- case HLSL_OP2_ADD: -- d3dbc_write_binary_op(d3dbc, D3DSIO_ADD, &instr->reg, &arg1->reg, &arg2->reg); -- break; -- -- case HLSL_OP2_MAX: -- d3dbc_write_binary_op(d3dbc, D3DSIO_MAX, &instr->reg, &arg1->reg, &arg2->reg); -- break; -- -- case HLSL_OP2_MIN: -- d3dbc_write_binary_op(d3dbc, D3DSIO_MIN, &instr->reg, &arg1->reg, &arg2->reg); -- break; -- -- case HLSL_OP2_MUL: -- d3dbc_write_binary_op(d3dbc, D3DSIO_MUL, &instr->reg, &arg1->reg, &arg2->reg); -- break; -- -- case HLSL_OP1_FRACT: -- d3dbc_write_unary_op(d3dbc, D3DSIO_FRC, &instr->reg, &arg1->reg, D3DSPSM_NONE, 0); -- break; -- -- case HLSL_OP2_DOT: -- switch (arg1->data_type->dimx) -- { -- case 4: -- d3dbc_write_dot(d3dbc, D3DSIO_DP4, &instr->reg, &arg1->reg, &arg2->reg); -- break; -- -- case 3: -- d3dbc_write_dot(d3dbc, D3DSIO_DP3, &instr->reg, &arg1->reg, &arg2->reg); -- break; -- -- default: -- vkd3d_unreachable(); -- } -- break; -- -- case HLSL_OP2_LOGIC_AND: -- d3dbc_write_binary_op(d3dbc, D3DSIO_MIN, &instr->reg, &arg1->reg, &arg2->reg); -- break; -- -- case HLSL_OP2_LOGIC_OR: -- d3dbc_write_binary_op(d3dbc, D3DSIO_MAX, &instr->reg, &arg1->reg, &arg2->reg); -- break; -- -- case HLSL_OP2_SLT: -- if (version->type == VKD3D_SHADER_TYPE_PIXEL) -- hlsl_fixme(ctx, &instr->loc, "Lower SLT instructions for pixel shaders."); -- d3dbc_write_binary_op(d3dbc, D3DSIO_SLT, &instr->reg, &arg1->reg, &arg2->reg); -- break; -- -- case HLSL_OP3_CMP: -- if (version->type == VKD3D_SHADER_TYPE_VERTEX) -- hlsl_fixme(ctx, &instr->loc, "Lower CMP instructions for vertex shaders."); -- d3dbc_write_ternary_op(d3dbc, D3DSIO_CMP, &instr->reg, &arg1->reg, &arg2->reg, &arg3->reg); -- break; -- -- case HLSL_OP3_DP2ADD: -- d3dbc_write_dp2add(d3dbc, &instr->reg, &arg1->reg, &arg2->reg, &arg3->reg); -- break; -- -- case HLSL_OP3_MAD: -- d3dbc_write_ternary_op(d3dbc, D3DSIO_MAD, &instr->reg, &arg1->reg, &arg2->reg, &arg3->reg); -+ case VKD3D_SHADER_RESOURCE_TEXTURE_3D: -+ d3dbc_write_vsir_sampler_dcl(d3dbc, reg_id, VKD3D_SM1_RESOURCE_TEXTURE_3D); - break; - - default: -- hlsl_fixme(ctx, &instr->loc, "SM1 \"%s\" expression.", debug_hlsl_expr_op(expr->op)); -- break; -- } --} -- --static void d3dbc_write_block(struct d3dbc_compiler *d3dbc, const struct hlsl_block *block); -- --static void d3dbc_write_if(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr) --{ -- const struct hlsl_ir_if *iff = hlsl_ir_if(instr); -- const struct hlsl_ir_node *condition; -- struct sm1_instruction sm1_ifc, sm1_else, sm1_endif; -- -- condition = iff->condition.node; -- VKD3D_ASSERT(condition->data_type->dimx == 1 && condition->data_type->dimy == 1); -- -- sm1_ifc = (struct sm1_instruction) -- { -- .opcode = D3DSIO_IFC, -- .flags = VKD3D_SHADER_REL_OP_NE, /* Make it a "if_ne" instruction. */ -- -- .srcs[0].type = VKD3DSPR_TEMP, -- .srcs[0].swizzle = hlsl_swizzle_from_writemask(condition->reg.writemask), -- .srcs[0].reg = condition->reg.id, -- .srcs[0].mod = 0, -- -- .srcs[1].type = VKD3DSPR_TEMP, -- .srcs[1].swizzle = hlsl_swizzle_from_writemask(condition->reg.writemask), -- .srcs[1].reg = condition->reg.id, -- .srcs[1].mod = D3DSPSM_NEG, -- -- .src_count = 2, -- }; -- d3dbc_write_instruction(d3dbc, &sm1_ifc); -- d3dbc_write_block(d3dbc, &iff->then_block); -- -- if (!list_empty(&iff->else_block.instrs)) -- { -- sm1_else = (struct sm1_instruction){.opcode = D3DSIO_ELSE}; -- d3dbc_write_instruction(d3dbc, &sm1_else); -- d3dbc_write_block(d3dbc, &iff->else_block); -+ vkd3d_shader_error(d3dbc->message_context, &ins->location, VKD3D_SHADER_ERROR_D3DBC_INVALID_RESOURCE_TYPE, -+ "dcl instruction with resource_type %u.", semantic->resource_type); -+ d3dbc->failed = true; -+ return; - } -- -- sm1_endif = (struct sm1_instruction){.opcode = D3DSIO_ENDIF}; -- d3dbc_write_instruction(d3dbc, &sm1_endif); - } - --static void d3dbc_write_jump(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr) -+static const struct vkd3d_sm1_opcode_info *shader_sm1_get_opcode_info_from_vsir_instruction( -+ struct d3dbc_compiler *d3dbc, const struct vkd3d_shader_instruction *ins) - { -- const struct hlsl_ir_jump *jump = hlsl_ir_jump(instr); -+ const struct vkd3d_sm1_opcode_info *info; - -- switch (jump->type) -+ if (!(info = shader_sm1_get_opcode_info_from_vsir(d3dbc, ins->opcode))) - { -- case HLSL_IR_JUMP_DISCARD_NEG: -- { -- struct hlsl_reg *reg = &jump->condition.node->reg; -- -- struct sm1_instruction sm1_instr = -- { -- .opcode = D3DSIO_TEXKILL, -- -- .dst.type = VKD3DSPR_TEMP, -- .dst.reg = reg->id, -- .dst.writemask = reg->writemask, -- .has_dst = 1, -- }; -- -- d3dbc_write_instruction(d3dbc, &sm1_instr); -- break; -- } -- -- default: -- hlsl_fixme(d3dbc->ctx, &jump->node.loc, "Jump type %s.", hlsl_jump_type_to_string(jump->type)); -+ vkd3d_shader_error(d3dbc->message_context, &ins->location, VKD3D_SHADER_ERROR_D3DBC_INVALID_OPCODE, -+ "Opcode %#x not supported for shader profile.", ins->opcode); -+ d3dbc->failed = true; -+ return NULL; - } --} -- --static void d3dbc_write_load(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr) --{ -- const struct hlsl_ir_load *load = hlsl_ir_load(instr); -- struct hlsl_ctx *ctx = d3dbc->ctx; -- const struct hlsl_reg reg = hlsl_reg_from_deref(ctx, &load->src); -- struct sm1_instruction sm1_instr = -- { -- .opcode = D3DSIO_MOV, - -- .dst.type = VKD3DSPR_TEMP, -- .dst.reg = instr->reg.id, -- .dst.writemask = instr->reg.writemask, -- .has_dst = 1, -- -- .srcs[0].type = VKD3DSPR_TEMP, -- .srcs[0].reg = reg.id, -- .srcs[0].swizzle = hlsl_swizzle_from_writemask(reg.writemask), -- .src_count = 1, -- }; -- -- VKD3D_ASSERT(instr->reg.allocated); -- -- if (load->src.var->is_uniform) -+ if (ins->dst_count != info->dst_count) - { -- VKD3D_ASSERT(reg.allocated); -- sm1_instr.srcs[0].type = VKD3DSPR_CONST; -+ vkd3d_shader_error(d3dbc->message_context, &ins->location, VKD3D_SHADER_ERROR_D3DBC_INVALID_REGISTER_COUNT, -+ "Invalid destination count %u for vsir instruction %#x (expected %u).", -+ ins->dst_count, ins->opcode, info->dst_count); -+ d3dbc->failed = true; -+ return NULL; - } -- else if (load->src.var->is_input_semantic) -+ if (ins->src_count != info->src_count) - { -- if (!hlsl_sm1_register_from_semantic(&d3dbc->program->shader_version, load->src.var->semantic.name, -- load->src.var->semantic.index, false, &sm1_instr.srcs[0].type, &sm1_instr.srcs[0].reg)) -- { -- VKD3D_ASSERT(reg.allocated); -- sm1_instr.srcs[0].type = VKD3DSPR_INPUT; -- sm1_instr.srcs[0].reg = reg.id; -- } -- else -- sm1_instr.srcs[0].swizzle = hlsl_swizzle_from_writemask((1 << load->src.var->data_type->dimx) - 1); -+ vkd3d_shader_error(d3dbc->message_context, &ins->location, VKD3D_SHADER_ERROR_D3DBC_INVALID_REGISTER_COUNT, -+ "Invalid source count %u for vsir instruction %#x (expected %u).", -+ ins->src_count, ins->opcode, info->src_count); -+ d3dbc->failed = true; -+ return NULL; - } - -- sm1_map_src_swizzle(&sm1_instr.srcs[0], sm1_instr.dst.writemask); -- d3dbc_write_instruction(d3dbc, &sm1_instr); -+ return info; - } - --static void d3dbc_write_resource_load(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr) -+static void d3dbc_write_vsir_simple_instruction(struct d3dbc_compiler *d3dbc, -+ const struct vkd3d_shader_instruction *ins) - { -- const struct hlsl_ir_resource_load *load = hlsl_ir_resource_load(instr); -- struct hlsl_ir_node *coords = load->coords.node; -- struct hlsl_ir_node *ddx = load->ddx.node; -- struct hlsl_ir_node *ddy = load->ddy.node; -- unsigned int sampler_offset, reg_id; -- struct hlsl_ctx *ctx = d3dbc->ctx; -- struct sm1_instruction sm1_instr; -+ struct sm1_instruction instr = {0}; -+ const struct vkd3d_sm1_opcode_info *info; - -- sampler_offset = hlsl_offset_from_deref_safe(ctx, &load->resource); -- reg_id = load->resource.var->regs[HLSL_REGSET_SAMPLERS].index + sampler_offset; -+ if (!(info = shader_sm1_get_opcode_info_from_vsir_instruction(d3dbc, ins))) -+ return; - -- sm1_instr = (struct sm1_instruction) -- { -- .dst.type = VKD3DSPR_TEMP, -- .dst.reg = instr->reg.id, -- .dst.writemask = instr->reg.writemask, -- .has_dst = 1, -+ instr.opcode = info->sm1_opcode; -+ instr.flags = ins->flags; -+ instr.has_dst = info->dst_count; -+ instr.src_count = info->src_count; - -- .srcs[0].type = VKD3DSPR_TEMP, -- .srcs[0].reg = coords->reg.id, -- .srcs[0].swizzle = hlsl_swizzle_from_writemask(coords->reg.writemask), -+ if (instr.has_dst) -+ sm1_dst_reg_from_vsir(d3dbc, &ins->dst[0], &instr.dst, &ins->location); -+ for (unsigned int i = 0; i < instr.src_count; ++i) -+ sm1_src_reg_from_vsir(d3dbc, &ins->src[i], &instr.srcs[i], &ins->location); - -- .srcs[1].type = VKD3DSPR_COMBINED_SAMPLER, -- .srcs[1].reg = reg_id, -- .srcs[1].swizzle = hlsl_swizzle_from_writemask(VKD3DSP_WRITEMASK_ALL), -+ d3dbc_write_instruction(d3dbc, &instr); -+} - -- .src_count = 2, -- }; -+static void d3dbc_write_vsir_instruction(struct d3dbc_compiler *d3dbc, const struct vkd3d_shader_instruction *ins) -+{ -+ uint32_t writemask; - -- switch (load->load_type) -+ switch (ins->opcode) - { -- case HLSL_RESOURCE_SAMPLE: -- sm1_instr.opcode = D3DSIO_TEX; -+ case VKD3DSIH_DEF: -+ d3dbc_write_vsir_def(d3dbc, ins); - break; - -- case HLSL_RESOURCE_SAMPLE_PROJ: -- sm1_instr.opcode = D3DSIO_TEX; -- sm1_instr.opcode |= VKD3DSI_TEXLD_PROJECT << VKD3D_SM1_INSTRUCTION_FLAGS_SHIFT; -+ case VKD3DSIH_DCL: -+ d3dbc_write_vsir_dcl(d3dbc, ins); - break; - -- case HLSL_RESOURCE_SAMPLE_LOD_BIAS: -- sm1_instr.opcode = D3DSIO_TEX; -- sm1_instr.opcode |= VKD3DSI_TEXLD_BIAS << VKD3D_SM1_INSTRUCTION_FLAGS_SHIFT; -+ case VKD3DSIH_ABS: -+ case VKD3DSIH_ADD: -+ case VKD3DSIH_CMP: -+ case VKD3DSIH_DP2ADD: -+ case VKD3DSIH_DP3: -+ case VKD3DSIH_DP4: -+ case VKD3DSIH_DSX: -+ case VKD3DSIH_DSY: -+ case VKD3DSIH_ELSE: -+ case VKD3DSIH_ENDIF: -+ case VKD3DSIH_FRC: -+ case VKD3DSIH_IFC: -+ case VKD3DSIH_MAD: -+ case VKD3DSIH_MAX: -+ case VKD3DSIH_MIN: -+ case VKD3DSIH_MOV: -+ case VKD3DSIH_MUL: -+ case VKD3DSIH_SINCOS: -+ case VKD3DSIH_SLT: -+ case VKD3DSIH_TEX: -+ case VKD3DSIH_TEXKILL: -+ case VKD3DSIH_TEXLDD: -+ d3dbc_write_vsir_simple_instruction(d3dbc, ins); - break; - -- case HLSL_RESOURCE_SAMPLE_GRAD: -- sm1_instr.opcode = D3DSIO_TEXLDD; -- -- sm1_instr.srcs[2].type = VKD3DSPR_TEMP; -- sm1_instr.srcs[2].reg = ddx->reg.id; -- sm1_instr.srcs[2].swizzle = hlsl_swizzle_from_writemask(ddx->reg.writemask); -- -- sm1_instr.srcs[3].type = VKD3DSPR_TEMP; -- sm1_instr.srcs[3].reg = ddy->reg.id; -- sm1_instr.srcs[3].swizzle = hlsl_swizzle_from_writemask(ddy->reg.writemask); -- -- sm1_instr.src_count += 2; -+ case VKD3DSIH_EXP: -+ case VKD3DSIH_LOG: -+ case VKD3DSIH_RCP: -+ case VKD3DSIH_RSQ: -+ writemask = ins->dst->write_mask; -+ if (writemask != VKD3DSP_WRITEMASK_0 && writemask != VKD3DSP_WRITEMASK_1 -+ && writemask != VKD3DSP_WRITEMASK_2 && writemask != VKD3DSP_WRITEMASK_3) -+ { -+ vkd3d_shader_error(d3dbc->message_context, &ins->location, -+ VKD3D_SHADER_ERROR_D3DBC_INVALID_WRITEMASK, -+ "writemask %#x for vsir instruction with opcode %#x is not single component.", -+ writemask, ins->opcode); -+ d3dbc->failed = true; -+ } -+ d3dbc_write_vsir_simple_instruction(d3dbc, ins); - break; - - default: -- hlsl_fixme(ctx, &instr->loc, "Resource load type %u.", load->load_type); -- return; -+ vkd3d_shader_error(d3dbc->message_context, &ins->location, VKD3D_SHADER_ERROR_D3DBC_INVALID_OPCODE, -+ "vsir instruction with opcode %#x.", ins->opcode); -+ d3dbc->failed = true; -+ break; - } -- -- VKD3D_ASSERT(instr->reg.allocated); -- -- d3dbc_write_instruction(d3dbc, &sm1_instr); - } - --static void d3dbc_write_store(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr) -+static void d3dbc_write_semantic_dcl(struct d3dbc_compiler *d3dbc, -+ const struct signature_element *element, bool output) - { - const struct vkd3d_shader_version *version = &d3dbc->program->shader_version; -- const struct hlsl_ir_store *store = hlsl_ir_store(instr); -- struct hlsl_ctx *ctx = d3dbc->ctx; -- const struct hlsl_reg reg = hlsl_reg_from_deref(ctx, &store->lhs); -- const struct hlsl_ir_node *rhs = store->rhs.node; -- struct sm1_instruction sm1_instr = -- { -- .opcode = D3DSIO_MOV, -- -- .dst.type = VKD3DSPR_TEMP, -- .dst.reg = reg.id, -- .dst.writemask = hlsl_combine_writemasks(reg.writemask, store->writemask), -- .has_dst = 1, -- -- .srcs[0].type = VKD3DSPR_TEMP, -- .srcs[0].reg = rhs->reg.id, -- .srcs[0].swizzle = hlsl_swizzle_from_writemask(rhs->reg.writemask), -- .src_count = 1, -- }; -+ struct vkd3d_bytecode_buffer *buffer = &d3dbc->buffer; -+ struct sm1_dst_register reg = {0}; -+ enum vkd3d_decl_usage usage; -+ uint32_t token, usage_idx; -+ bool ret; - -- if (store->lhs.var->is_output_semantic) -+ if (sm1_register_from_semantic_name(version, element->semantic_name, -+ element->semantic_index, output, ®.type, ®.reg)) - { -- if (version->type == VKD3D_SHADER_TYPE_PIXEL && version->major == 1) -- { -- sm1_instr.dst.type = VKD3DSPR_TEMP; -- sm1_instr.dst.reg = 0; -- } -- else if (!hlsl_sm1_register_from_semantic(&d3dbc->program->shader_version, store->lhs.var->semantic.name, -- store->lhs.var->semantic.index, true, &sm1_instr.dst.type, &sm1_instr.dst.reg)) -- { -- VKD3D_ASSERT(reg.allocated); -- sm1_instr.dst.type = VKD3DSPR_OUTPUT; -- sm1_instr.dst.reg = reg.id; -- } -- else -- sm1_instr.dst.writemask = (1u << store->lhs.var->data_type->dimx) - 1; -+ usage = 0; -+ usage_idx = 0; - } - else -- VKD3D_ASSERT(reg.allocated); -- -- sm1_map_src_swizzle(&sm1_instr.srcs[0], sm1_instr.dst.writemask); -- d3dbc_write_instruction(d3dbc, &sm1_instr); --} -- --static void d3dbc_write_swizzle(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr) --{ -- const struct hlsl_ir_swizzle *swizzle = hlsl_ir_swizzle(instr); -- const struct hlsl_ir_node *val = swizzle->val.node; -- struct sm1_instruction sm1_instr = - { -- .opcode = D3DSIO_MOV, -+ ret = sm1_usage_from_semantic_name(element->semantic_name, element->semantic_index, &usage, &usage_idx); -+ VKD3D_ASSERT(ret); -+ reg.type = output ? VKD3DSPR_OUTPUT : VKD3DSPR_INPUT; -+ reg.reg = element->register_index; -+ } - -- .dst.type = VKD3DSPR_TEMP, -- .dst.reg = instr->reg.id, -- .dst.writemask = instr->reg.writemask, -- .has_dst = 1, -+ token = VKD3D_SM1_OP_DCL; -+ if (version->major > 1) -+ token |= 2 << VKD3D_SM1_INSTRUCTION_LENGTH_SHIFT; -+ put_u32(buffer, token); - -- .srcs[0].type = VKD3DSPR_TEMP, -- .srcs[0].reg = val->reg.id, -- .srcs[0].swizzle = hlsl_combine_swizzles(hlsl_swizzle_from_writemask(val->reg.writemask), -- swizzle->swizzle, instr->data_type->dimx), -- .src_count = 1, -- }; -+ token = (1u << 31); -+ token |= usage << VKD3D_SM1_DCL_USAGE_SHIFT; -+ token |= usage_idx << VKD3D_SM1_DCL_USAGE_INDEX_SHIFT; -+ put_u32(buffer, token); - -- VKD3D_ASSERT(instr->reg.allocated); -- VKD3D_ASSERT(val->reg.allocated); -- sm1_map_src_swizzle(&sm1_instr.srcs[0], sm1_instr.dst.writemask); -- d3dbc_write_instruction(d3dbc, &sm1_instr); -+ reg.writemask = element->mask; -+ write_sm1_dst_register(buffer, ®); - } - --static void d3dbc_write_block(struct d3dbc_compiler *d3dbc, const struct hlsl_block *block) -+static void d3dbc_write_semantic_dcls(struct d3dbc_compiler *d3dbc) - { -- struct hlsl_ctx *ctx = d3dbc->ctx; -- const struct hlsl_ir_node *instr; -- -- LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry) -- { -- if (instr->data_type) -- { -- if (instr->data_type->class != HLSL_CLASS_SCALAR && instr->data_type->class != HLSL_CLASS_VECTOR) -- { -- hlsl_fixme(ctx, &instr->loc, "Class %#x should have been lowered or removed.", instr->data_type->class); -- break; -- } -- } -- -- switch (instr->type) -- { -- case HLSL_IR_CALL: -- vkd3d_unreachable(); -- -- case HLSL_IR_CONSTANT: -- d3dbc_write_constant(d3dbc, instr); -- break; -- -- case HLSL_IR_EXPR: -- d3dbc_write_expr(d3dbc, instr); -- break; -- -- case HLSL_IR_IF: -- if (hlsl_version_ge(ctx, 2, 1)) -- d3dbc_write_if(d3dbc, instr); -- else -- hlsl_fixme(ctx, &instr->loc, "Flatten \"if\" conditionals branches."); -- break; -- -- case HLSL_IR_JUMP: -- d3dbc_write_jump(d3dbc, instr); -- break; -+ struct vsir_program *program = d3dbc->program; -+ const struct vkd3d_shader_version *version; -+ bool write_in = false, write_out = false; - -- case HLSL_IR_LOAD: -- d3dbc_write_load(d3dbc, instr); -- break; -+ version = &program->shader_version; -+ if (version->type == VKD3D_SHADER_TYPE_PIXEL && version->major >= 2) -+ write_in = true; -+ else if (version->type == VKD3D_SHADER_TYPE_VERTEX && version->major == 3) -+ write_in = write_out = true; -+ else if (version->type == VKD3D_SHADER_TYPE_VERTEX && version->major < 3) -+ write_in = true; - -- case HLSL_IR_RESOURCE_LOAD: -- d3dbc_write_resource_load(d3dbc, instr); -- break; -+ if (write_in) -+ { -+ for (unsigned int i = 0; i < program->input_signature.element_count; ++i) -+ d3dbc_write_semantic_dcl(d3dbc, &program->input_signature.elements[i], false); -+ } - -- case HLSL_IR_STORE: -- d3dbc_write_store(d3dbc, instr); -- break; -+ if (write_out) -+ { -+ for (unsigned int i = 0; i < program->output_signature.element_count; ++i) -+ d3dbc_write_semantic_dcl(d3dbc, &program->output_signature.elements[i], true); -+ } -+} - -- case HLSL_IR_SWIZZLE: -- d3dbc_write_swizzle(d3dbc, instr); -- break; -+static void d3dbc_write_program_instructions(struct d3dbc_compiler *d3dbc) -+{ -+ struct vsir_program *program = d3dbc->program; -+ unsigned int i; - -- default: -- hlsl_fixme(ctx, &instr->loc, "Instruction type %s.", hlsl_node_type_to_string(instr->type)); -- } -- } -+ for (i = 0; i < program->instructions.count; ++i) -+ d3dbc_write_vsir_instruction(d3dbc, &program->instructions.elements[i]); - } - --/* OBJECTIVE: Stop relying on ctx and entry_func on this function, receiving -- * data from the other parameters instead, so it can be removed as an argument -- * and be declared in vkd3d_shader_private.h and used without relying on HLSL -- * IR structs. */ - int d3dbc_compile(struct vsir_program *program, uint64_t config_flags, - const struct vkd3d_shader_compile_info *compile_info, const struct vkd3d_shader_code *ctab, -- struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context, -- struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) -+ struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context) - { - const struct vkd3d_shader_version *version = &program->shader_version; - struct d3dbc_compiler d3dbc = {0}; - struct vkd3d_bytecode_buffer *buffer = &d3dbc.buffer; -+ int result; - -- d3dbc.ctx = ctx; - d3dbc.program = program; - d3dbc.message_context = message_context; -+ switch (version->type) -+ { -+ case VKD3D_SHADER_TYPE_VERTEX: -+ d3dbc.opcode_table = vs_opcode_table; -+ break; -+ -+ case VKD3D_SHADER_TYPE_PIXEL: -+ d3dbc.opcode_table = ps_opcode_table; -+ break; -+ -+ default: -+ vkd3d_shader_error(message_context, NULL, VKD3D_SHADER_ERROR_D3DBC_INVALID_PROFILE, -+ "Invalid shader type %u.", version->type); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } - - put_u32(buffer, sm1_version(version->type, version->major, version->minor)); - - bytecode_put_bytes(buffer, ctab->code, ctab->size); - -- d3dbc_write_constant_defs(&d3dbc); - d3dbc_write_semantic_dcls(&d3dbc); -- d3dbc_write_sampler_dcls(&d3dbc); -- d3dbc_write_block(&d3dbc, &entry_func->body); -+ d3dbc_write_program_instructions(&d3dbc); - -- put_u32(buffer, D3DSIO_END); -+ put_u32(buffer, VKD3D_SM1_OP_END); - -+ result = VKD3D_OK; - if (buffer->status) -- ctx->result = buffer->status; -+ result = buffer->status; -+ if (d3dbc.failed) -+ result = VKD3D_ERROR_INVALID_SHADER; - -- if (!ctx->result) -+ if (!result) - { - out->code = buffer->data; - out->size = buffer->size; -@@ -2925,5 +2409,5 @@ int d3dbc_compile(struct vsir_program *program, uint64_t config_flags, - { - vkd3d_free(buffer->data); - } -- return ctx->result; -+ return result; - } -diff --git a/libs/vkd3d/libs/vkd3d-shader/dxbc.c b/libs/vkd3d/libs/vkd3d-shader/dxbc.c -index 184788dc57e..f6ac8e0829e 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/dxbc.c -+++ b/libs/vkd3d/libs/vkd3d-shader/dxbc.c -@@ -20,6 +20,19 @@ - - #include "vkd3d_shader_private.h" - -+#define DXBC_CHECKSUM_SKIP_BYTE_COUNT 20 -+ -+static void compute_dxbc_checksum(const void *dxbc, size_t size, uint32_t checksum[4]) -+{ -+ const uint8_t *ptr = dxbc; -+ -+ VKD3D_ASSERT(size > DXBC_CHECKSUM_SKIP_BYTE_COUNT); -+ ptr += DXBC_CHECKSUM_SKIP_BYTE_COUNT; -+ size -= DXBC_CHECKSUM_SKIP_BYTE_COUNT; -+ -+ vkd3d_compute_md5(ptr, size, checksum, VKD3D_MD5_DXBC); -+} -+ - void dxbc_writer_init(struct dxbc_writer *dxbc) - { - memset(dxbc, 0, sizeof(*dxbc)); -@@ -72,7 +85,7 @@ int vkd3d_shader_serialize_dxbc(size_t section_count, const struct vkd3d_shader_ - } - set_u32(&buffer, size_position, bytecode_get_size(&buffer)); - -- vkd3d_compute_dxbc_checksum(buffer.data, buffer.size, checksum); -+ compute_dxbc_checksum(buffer.data, buffer.size, checksum); - for (i = 0; i < 4; ++i) - set_u32(&buffer, checksum_position + i * sizeof(uint32_t), checksum[i]); - -@@ -188,7 +201,7 @@ static int parse_dxbc(const struct vkd3d_shader_code *dxbc, struct vkd3d_shader_ - checksum[3] = read_u32(&ptr); - if (!(flags & VKD3D_SHADER_PARSE_DXBC_IGNORE_CHECKSUM)) - { -- vkd3d_compute_dxbc_checksum(data, data_size, calculated_checksum); -+ compute_dxbc_checksum(data, data_size, calculated_checksum); - if (memcmp(checksum, calculated_checksum, sizeof(checksum))) - { - WARN("Checksum {0x%08x, 0x%08x, 0x%08x, 0x%08x} does not match " -@@ -406,8 +419,6 @@ static int shader_parse_signature(const struct vkd3d_shader_dxbc_section_desc *s - const char *name; - uint32_t mask; - -- e[i].sort_index = i; -- - if (has_stream_index) - e[i].stream_index = read_u32(&ptr); - else -@@ -1488,7 +1499,7 @@ int vkd3d_shader_serialize_root_signature(const struct vkd3d_shader_versioned_ro - dxbc->code = context.buffer.data; - dxbc->size = total_size; - -- vkd3d_compute_dxbc_checksum(dxbc->code, dxbc->size, checksum); -+ compute_dxbc_checksum(dxbc->code, dxbc->size, checksum); - for (i = 0; i < 4; ++i) - set_u32(&context.buffer, (i + 1) * sizeof(uint32_t), checksum[i]); - -diff --git a/libs/vkd3d/libs/vkd3d-shader/dxil.c b/libs/vkd3d/libs/vkd3d-shader/dxil.c -index 4a17c62292b..d467693bd59 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/dxil.c -+++ b/libs/vkd3d/libs/vkd3d-shader/dxil.c -@@ -430,6 +430,7 @@ enum dx_intrinsic_opcode - DX_DERIV_COARSEY = 84, - DX_DERIV_FINEX = 85, - DX_DERIV_FINEY = 86, -+ DX_SAMPLE_INDEX = 90, - DX_COVERAGE = 91, - DX_THREAD_ID = 93, - DX_GROUP_ID = 94, -@@ -3827,6 +3828,11 @@ static enum vkd3d_shader_register_type register_type_from_dxil_semantic_kind( - { - switch (sysval_semantic) - { -+ /* VSIR does not use an I/O register for SV_SampleIndex, but its -+ * signature element has a register index of UINT_MAX and it is -+ * convenient to return a valid register type here to handle it. */ -+ case VKD3D_SHADER_SV_SAMPLE_INDEX: -+ return VKD3DSPR_NULL; - case VKD3D_SHADER_SV_COVERAGE: - return VKD3DSPR_COVERAGE; - case VKD3D_SHADER_SV_DEPTH: -@@ -3844,6 +3850,7 @@ static void sm6_parser_init_signature(struct sm6_parser *sm6, const struct shade - bool is_input, enum vkd3d_shader_register_type reg_type, struct vkd3d_shader_dst_param *params) - { - enum vkd3d_shader_type shader_type = sm6->p.program->shader_version.type; -+ enum vkd3d_shader_register_type io_reg_type; - bool is_patch_constant, is_control_point; - struct vkd3d_shader_dst_param *param; - const struct signature_element *e; -@@ -3876,9 +3883,10 @@ static void sm6_parser_init_signature(struct sm6_parser *sm6, const struct shade - - param = ¶ms[i]; - -- if (e->register_index == UINT_MAX) -+ if (e->register_index == UINT_MAX -+ && (io_reg_type = register_type_from_dxil_semantic_kind(e->sysval_semantic)) != VKD3DSPR_NULL) - { -- dst_param_io_init(param, e, register_type_from_dxil_semantic_kind(e->sysval_semantic)); -+ dst_param_io_init(param, e, io_reg_type); - continue; - } - -@@ -3888,7 +3896,7 @@ static void sm6_parser_init_signature(struct sm6_parser *sm6, const struct shade - if (is_control_point) - { - if (reg_type == VKD3DSPR_OUTPUT) -- param->reg.idx[count].rel_addr = instruction_array_create_outpointid_param(&sm6->p.program->instructions); -+ param->reg.idx[count].rel_addr = vsir_program_create_outpointid_param(sm6->p.program); - param->reg.idx[count++].offset = 0; - } - -@@ -4161,8 +4169,7 @@ static void sm6_parser_emit_atomicrmw(struct sm6_parser *sm6, const struct dxil_ - dst_param_init(&dst_params[0]); - - dst_params[1].reg = ptr->u.reg; -- /* The groupshared register has data type UAV when accessed. */ -- dst_params[1].reg.data_type = VKD3D_DATA_UAV; -+ dst_params[1].reg.data_type = VKD3D_DATA_UNUSED; - dst_params[1].reg.idx[1].rel_addr = NULL; - dst_params[1].reg.idx[1].offset = ~0u; - dst_params[1].reg.idx_count = 1; -@@ -4175,6 +4182,7 @@ static enum vkd3d_shader_opcode map_binary_op(uint64_t code, const struct sm6_ty - const struct sm6_type *type_b, struct sm6_parser *sm6) - { - bool is_int = sm6_type_is_bool_i16_i32_i64(type_a); -+ bool is_double = sm6_type_is_double(type_a); - bool is_bool = sm6_type_is_bool(type_a); - enum vkd3d_shader_opcode op; - bool is_valid; -@@ -4199,7 +4207,7 @@ static enum vkd3d_shader_opcode map_binary_op(uint64_t code, const struct sm6_ty - case BINOP_ADD: - case BINOP_SUB: - /* NEG is applied later for subtraction. */ -- op = is_int ? VKD3DSIH_IADD : VKD3DSIH_ADD; -+ op = is_int ? VKD3DSIH_IADD : (is_double ? VKD3DSIH_DADD : VKD3DSIH_ADD); - is_valid = !is_bool; - break; - case BINOP_AND: -@@ -4215,7 +4223,7 @@ static enum vkd3d_shader_opcode map_binary_op(uint64_t code, const struct sm6_ty - is_valid = is_int && !is_bool; - break; - case BINOP_MUL: -- op = is_int ? VKD3DSIH_UMUL : VKD3DSIH_MUL; -+ op = is_int ? VKD3DSIH_UMUL : (is_double ? VKD3DSIH_DMUL : VKD3DSIH_MUL); - is_valid = !is_bool; - break; - case BINOP_OR: -@@ -4223,7 +4231,7 @@ static enum vkd3d_shader_opcode map_binary_op(uint64_t code, const struct sm6_ty - is_valid = is_int; - break; - case BINOP_SDIV: -- op = is_int ? VKD3DSIH_IDIV : VKD3DSIH_DIV; -+ op = is_int ? VKD3DSIH_IDIV : (is_double ? VKD3DSIH_DDIV : VKD3DSIH_DIV); - is_valid = !is_bool; - break; - case BINOP_SREM: -@@ -4865,8 +4873,10 @@ static void sm6_parser_emit_dx_cbuffer_load(struct sm6_parser *sm6, enum dx_intr - if (!(src_param = instruction_src_params_alloc(ins, 1, sm6))) - return; - src_param_init_vector_from_reg(src_param, &buffer->u.handle.reg); -+ /* Differently from other descriptors, constant buffers require an -+ * additional index, used to index within the constant buffer itself. */ -+ src_param->reg.idx_count = 3; - register_index_address_init(&src_param->reg.idx[2], operands[1], sm6); -- VKD3D_ASSERT(src_param->reg.idx_count == 3); - - type = sm6_type_get_scalar_type(dst->type, 0); - VKD3D_ASSERT(type); -@@ -4965,8 +4975,7 @@ static void sm6_parser_emit_dx_create_handle(struct sm6_parser *sm6, enum dx_int - dst->u.handle.d = d; - - reg = &dst->u.handle.reg; -- /* Set idx_count to 3 for use with load/store instructions. */ -- vsir_register_init(reg, d->reg_type, d->reg_data_type, 3); -+ vsir_register_init(reg, d->reg_type, d->reg_data_type, 2); - reg->dimension = VSIR_DIMENSION_VEC4; - reg->idx[0].offset = id; - register_index_address_init(®->idx[1], operands[2], sm6); -@@ -5794,6 +5803,34 @@ static void sm6_parser_emit_dx_sample(struct sm6_parser *sm6, enum dx_intrinsic_ - instruction_dst_param_init_ssa_vector(ins, component_count, sm6); - } - -+static void sm6_parser_emit_dx_sample_index(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, -+ const struct sm6_value **operands, struct function_emission_state *state) -+{ -+ const struct shader_signature *signature = &sm6->p.program->input_signature; -+ struct vkd3d_shader_instruction *ins = state->ins; -+ struct vkd3d_shader_src_param *src_param; -+ unsigned int element_idx; -+ -+ vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_MOV); -+ -+ /* SV_SampleIndex is identified in VSIR by its signature element index, -+ * but the index is not supplied as a parameter to the DXIL intrinsic. */ -+ if (!vsir_signature_find_sysval(signature, VKD3D_SHADER_SV_SAMPLE_INDEX, 0, &element_idx)) -+ { -+ WARN("Sample index is not in the signature.\n"); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_SIGNATURE, -+ "Sample index signature element for a sample index operation is missing."); -+ return; -+ } -+ -+ if (!(src_param = instruction_src_params_alloc(ins, 1, sm6))) -+ return; -+ src_param->reg = sm6->input_params[element_idx].reg; -+ src_param_init(src_param); -+ -+ instruction_dst_param_init_ssa_scalar(ins, sm6); -+} -+ - static void sm6_parser_emit_dx_saturate(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, - const struct sm6_value **operands, struct function_emission_state *state) - { -@@ -5871,6 +5908,8 @@ static void sm6_parser_emit_dx_store_output(struct sm6_parser *sm6, enum dx_intr - return; - } - e = &signature->elements[row_index]; -+ if (!e->sysval_semantic) -+ column_index += vsir_write_mask_get_component_idx(e->mask); - - if (column_index >= VKD3D_VEC4_SIZE) - { -@@ -6297,6 +6336,7 @@ static const struct sm6_dx_opcode_info sm6_dx_op_table[] = - [DX_SAMPLE_C ] = {"o", "HHffffiiiff", sm6_parser_emit_dx_sample}, - [DX_SAMPLE_C_LZ ] = {"o", "HHffffiiif", sm6_parser_emit_dx_sample}, - [DX_SAMPLE_GRAD ] = {"o", "HHffffiiifffffff", sm6_parser_emit_dx_sample}, -+ [DX_SAMPLE_INDEX ] = {"i", "", sm6_parser_emit_dx_sample_index}, - [DX_SAMPLE_LOD ] = {"o", "HHffffiiif", sm6_parser_emit_dx_sample}, - [DX_SATURATE ] = {"g", "R", sm6_parser_emit_dx_saturate}, - [DX_SIN ] = {"g", "R", sm6_parser_emit_dx_sincos}, -@@ -6861,7 +6901,6 @@ static void sm6_parser_emit_cmpxchg(struct sm6_parser *sm6, const struct dxil_re - struct vkd3d_shader_dst_param *dst_params; - struct vkd3d_shader_src_param *src_params; - const struct sm6_value *ptr, *cmp, *new; -- const struct sm6_type *type; - unsigned int i = 0; - bool is_volatile; - uint64_t code; -@@ -6887,9 +6926,10 @@ static void sm6_parser_emit_cmpxchg(struct sm6_parser *sm6, const struct dxil_re - return; - } - -- type = ptr->type->u.pointer.type; -- cmp = sm6_parser_get_value_by_ref(sm6, record, type, &i); -- new = sm6_parser_get_value_by_ref(sm6, record, type, &i); -+ /* Forward-referenced comparands are stored as value/type pairs, even -+ * though in principle we could use the destination type. */ -+ cmp = sm6_parser_get_value_by_ref(sm6, record, NULL, &i); -+ new = sm6_parser_get_value_by_ref(sm6, record, ptr->type->u.pointer.type, &i); - if (!cmp || !new) - return; - -@@ -7287,7 +7327,6 @@ static void sm6_parser_emit_store(struct sm6_parser *sm6, const struct dxil_reco - unsigned int i = 0, alignment, operand_count; - struct vkd3d_shader_src_param *src_params; - struct vkd3d_shader_dst_param *dst_param; -- const struct sm6_type *pointee_type; - const struct sm6_value *ptr, *src; - uint64_t alignment_code; - -@@ -7299,13 +7338,14 @@ static void sm6_parser_emit_store(struct sm6_parser *sm6, const struct dxil_reco - return; - } - -- pointee_type = ptr->type->u.pointer.type; -- if (!(src = sm6_parser_get_value_by_ref(sm6, record, pointee_type, &i))) -+ /* Forward-referenced sources are stored as value/type pairs, even -+ * though in principle we could use the destination type. */ -+ if (!(src = sm6_parser_get_value_by_ref(sm6, record, NULL, &i))) - return; - if (!sm6_value_validate_is_numeric(src, sm6)) - return; - -- if (pointee_type != src->type) -+ if (ptr->type->u.pointer.type != src->type) - { - WARN("Type mismatch.\n"); - vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_TYPE_MISMATCH, -@@ -8510,6 +8550,7 @@ static const enum vkd3d_shader_sysval_semantic sysval_semantic_table[] = - [SEMANTIC_KIND_CLIPDISTANCE] = VKD3D_SHADER_SV_CLIP_DISTANCE, - [SEMANTIC_KIND_CULLDISTANCE] = VKD3D_SHADER_SV_CULL_DISTANCE, - [SEMANTIC_KIND_PRIMITIVEID] = VKD3D_SHADER_SV_PRIMITIVE_ID, -+ [SEMANTIC_KIND_SAMPLEINDEX] = VKD3D_SHADER_SV_SAMPLE_INDEX, - [SEMANTIC_KIND_ISFRONTFACE] = VKD3D_SHADER_SV_IS_FRONT_FACE, - [SEMANTIC_KIND_COVERAGE] = VKD3D_SHADER_SV_COVERAGE, - [SEMANTIC_KIND_TARGET] = VKD3D_SHADER_SV_TARGET, -@@ -8908,7 +8949,7 @@ static enum vkd3d_result sm6_parser_resources_load_srv(struct sm6_parser *sm6, - d->resource_type = ins->resource_type; - d->kind = kind; - d->reg_type = VKD3DSPR_RESOURCE; -- d->reg_data_type = (ins->resource_type == VKD3D_SHADER_RESOURCE_BUFFER) ? VKD3D_DATA_UINT : VKD3D_DATA_RESOURCE; -+ d->reg_data_type = VKD3D_DATA_UNUSED; - d->resource_data_type = (ins->opcode == VKD3DSIH_DCL) - ? ins->declaration.semantic.resource_data_type[0] : VKD3D_DATA_UNUSED; - -@@ -8982,7 +9023,7 @@ static enum vkd3d_result sm6_parser_resources_load_uav(struct sm6_parser *sm6, - d->resource_type = ins->resource_type; - d->kind = values[0]; - d->reg_type = VKD3DSPR_UAV; -- d->reg_data_type = (ins->resource_type == VKD3D_SHADER_RESOURCE_BUFFER) ? VKD3D_DATA_UINT : VKD3D_DATA_UAV; -+ d->reg_data_type = VKD3D_DATA_UNUSED; - d->resource_data_type = (ins->opcode == VKD3DSIH_DCL_UAV_TYPED) - ? ins->declaration.semantic.resource_data_type[0] : VKD3D_DATA_UNUSED; - -@@ -9346,7 +9387,7 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const - WARN("Signature element is not a node.\n"); - vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_SIGNATURE, - "Signature element is not a metadata node."); -- return VKD3D_ERROR_INVALID_SHADER; -+ goto invalid; - } - - element_node = m->u.node; -@@ -9355,7 +9396,7 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const - WARN("Invalid operand count %u.\n", element_node->operand_count); - vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_SIGNATURE, - "Invalid signature element operand count %u.", element_node->operand_count); -- return VKD3D_ERROR_INVALID_SHADER; -+ goto invalid; - } - if (element_node->operand_count > 11) - { -@@ -9374,7 +9415,7 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const - WARN("Failed to load uint value at index %u.\n", j); - vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_SIGNATURE, - "Signature element value at index %u is not an integer.", j); -- return VKD3D_ERROR_INVALID_SHADER; -+ goto invalid; - } - } - -@@ -9385,7 +9426,7 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const - FIXME("Unsupported element id %u not equal to its index %u.\n", values[0], i); - vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_SIGNATURE, - "A non-sequential and non-zero-based element id is not supported."); -- return VKD3D_ERROR_INVALID_SHADER; -+ goto invalid; - } - - if (!sm6_metadata_value_is_string(element_node->operands[1])) -@@ -9393,7 +9434,7 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const - WARN("Element name is not a string.\n"); - vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_SIGNATURE, - "Signature element name is not a metadata string."); -- return VKD3D_ERROR_INVALID_SHADER; -+ goto invalid; - } - e->semantic_name = element_node->operands[1]->u.string_value; - -@@ -9407,7 +9448,7 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const - WARN("Unhandled semantic kind %u.\n", j); - vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_SIGNATURE, - "DXIL semantic kind %u is unhandled.", j); -- return VKD3D_ERROR_INVALID_SHADER; -+ goto invalid; - } - - if ((e->interpolation_mode = values[5]) >= VKD3DSIM_COUNT) -@@ -9415,7 +9456,7 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const - WARN("Unhandled interpolation mode %u.\n", e->interpolation_mode); - vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_SIGNATURE, - "Interpolation mode %u is unhandled.", e->interpolation_mode); -- return VKD3D_ERROR_INVALID_SHADER; -+ goto invalid; - } - - e->register_count = values[6]; -@@ -9430,7 +9471,7 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const - WARN("Unhandled I/O register semantic kind %u.\n", j); - vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_SIGNATURE, - "DXIL semantic kind %u is unhandled for an I/O register.", j); -- return VKD3D_ERROR_INVALID_SHADER; -+ goto invalid; - } - } - else if (e->register_index > MAX_REG_OUTPUT || e->register_count > MAX_REG_OUTPUT - e->register_index) -@@ -9439,7 +9480,7 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const - vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_SIGNATURE, - "A signature element starting row of %u with count %u is invalid.", - e->register_index, e->register_count); -- return VKD3D_ERROR_INVALID_SHADER; -+ goto invalid; - } - - index = values[9]; -@@ -9448,7 +9489,7 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const - WARN("Invalid column start %u with count %u.\n", index, column_count); - vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_SIGNATURE, - "A signature element starting column %u with count %u is invalid.", index, column_count); -- return VKD3D_ERROR_INVALID_SHADER; -+ goto invalid; - } - - e->mask = vkd3d_write_mask_from_component_count(column_count); -@@ -9471,7 +9512,7 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const - WARN("Semantic index list is not a node.\n"); - vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_SIGNATURE, - "Signature element semantic index list is not a metadata node."); -- return VKD3D_ERROR_INVALID_SHADER; -+ goto invalid; - } - - element_node = m->u.node; -@@ -9516,6 +9557,10 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const - s->element_count = operand_count; - - return VKD3D_OK; -+ -+invalid: -+ vkd3d_free(elements); -+ return VKD3D_ERROR_INVALID_SHADER; - } - - static enum vkd3d_result sm6_parser_signatures_init(struct sm6_parser *sm6, const struct sm6_metadata_value *m, -@@ -9557,7 +9602,7 @@ static enum vkd3d_result sm6_parser_signatures_init(struct sm6_parser *sm6, cons - - static void sm6_parser_emit_global_flags(struct sm6_parser *sm6, const struct sm6_metadata_value *m) - { -- enum vkd3d_shader_global_flags global_flags, mask, rotated_flags; -+ enum vsir_global_flags global_flags, mask, rotated_flags; - struct vkd3d_shader_instruction *ins; - - if (!sm6_metadata_get_uint64_value(sm6, m, (uint64_t*)&global_flags)) -@@ -9567,7 +9612,7 @@ static void sm6_parser_emit_global_flags(struct sm6_parser *sm6, const struct sm - "Global flags metadata value is not an integer."); - return; - } -- /* Rotate SKIP_OPTIMIZATION from bit 0 to bit 4 to match vkd3d_shader_global_flags. */ -+ /* Rotate SKIP_OPTIMIZATION from bit 0 to bit 4 to match vsir_global_flags. */ - mask = (VKD3DSGF_SKIP_OPTIMIZATION << 1) - 1; - rotated_flags = global_flags & mask; - rotated_flags = (rotated_flags >> 1) | ((rotated_flags & 1) << 4); -@@ -9575,6 +9620,7 @@ static void sm6_parser_emit_global_flags(struct sm6_parser *sm6, const struct sm - - ins = sm6_parser_add_instruction(sm6, VKD3DSIH_DCL_GLOBAL_FLAGS); - ins->declaration.global_flags = global_flags; -+ sm6->p.program->global_flags = global_flags; - } - - static enum vkd3d_result sm6_parser_emit_thread_group(struct sm6_parser *sm6, const struct sm6_metadata_value *m) -@@ -9633,6 +9679,7 @@ static enum vkd3d_result sm6_parser_emit_thread_group(struct sm6_parser *sm6, co - ins->declaration.thread_group_size.x = group_sizes[0]; - ins->declaration.thread_group_size.y = group_sizes[1]; - ins->declaration.thread_group_size.z = group_sizes[2]; -+ sm6->p.program->thread_group_size = ins->declaration.thread_group_size; - - return VKD3D_OK; - } -@@ -9670,12 +9717,13 @@ static void sm6_parser_emit_dcl_tessellator_domain(struct sm6_parser *sm6, - - ins = sm6_parser_add_instruction(sm6, VKD3DSIH_DCL_TESSELLATOR_DOMAIN); - ins->declaration.tessellator_domain = tessellator_domain; -+ sm6->p.program->tess_domain = tessellator_domain; - } - --static void sm6_parser_validate_control_point_count(struct sm6_parser *sm6, unsigned int count, -- const char *type) -+static void sm6_parser_validate_control_point_count(struct sm6_parser *sm6, -+ unsigned int count, bool allow_zero, const char *type) - { -- if (!count || count > 32) -+ if ((!count && !allow_zero) || count > 32) - { - WARN("%s control point count %u invalid.\n", type, count); - vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, -@@ -9904,7 +9952,7 @@ static enum vkd3d_tessellator_domain sm6_parser_ds_properties_init(struct sm6_pa - } - - sm6_parser_emit_dcl_tessellator_domain(sm6, operands[0]); -- sm6_parser_validate_control_point_count(sm6, operands[1], "Domain shader input"); -+ sm6_parser_validate_control_point_count(sm6, operands[1], true, "Domain shader input"); - sm6->p.program->input_control_point_count = operands[1]; - - return operands[0]; -@@ -9963,9 +10011,9 @@ static enum vkd3d_tessellator_domain sm6_parser_hs_properties_init(struct sm6_pa - } - } - -- sm6_parser_validate_control_point_count(sm6, operands[1], "Hull shader input"); -+ sm6_parser_validate_control_point_count(sm6, operands[1], false, "Hull shader input"); - program->input_control_point_count = operands[1]; -- sm6_parser_validate_control_point_count(sm6, operands[2], "Hull shader output"); -+ sm6_parser_validate_control_point_count(sm6, operands[2], false, "Hull shader output"); - sm6_parser_emit_dcl_count(sm6, VKD3DSIH_DCL_OUTPUT_CONTROL_POINT_COUNT, operands[2]); - program->output_control_point_count = operands[2]; - sm6_parser_emit_dcl_tessellator_domain(sm6, operands[3]); -@@ -10303,12 +10351,28 @@ static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, struct vsir_pro - - /* Estimate instruction count to avoid reallocation in most shaders. */ - count = max(token_count, 400) - 400; -- if (!vsir_program_init(program, compile_info, &version, (count + (count >> 2)) / 2u + 10)) -+ if (!vsir_program_init(program, compile_info, &version, -+ (count + (count >> 2)) / 2u + 10, VSIR_CF_BLOCKS, VSIR_FULLY_NORMALISED_IO)) - return VKD3D_ERROR_OUT_OF_MEMORY; - vkd3d_shader_parser_init(&sm6->p, program, message_context, compile_info->source_name); - sm6->ptr = &sm6->start[1]; - sm6->bitpos = 2; - -+ switch (program->shader_version.type) -+ { -+ case VKD3D_SHADER_TYPE_HULL: -+ case VKD3D_SHADER_TYPE_DOMAIN: -+ break; -+ -+ default: -+ if (program->patch_constant_signature.element_count != 0) -+ { -+ WARN("The patch constant signature only makes sense for Hull and Domain Shaders, ignoring it.\n"); -+ shader_signature_cleanup(&program->patch_constant_signature); -+ } -+ break; -+ } -+ - input_signature = &program->input_signature; - output_signature = &program->output_signature; - patch_constant_signature = &program->patch_constant_signature; -@@ -10526,9 +10590,16 @@ static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, struct vsir_pro - - dxil_block_destroy(&sm6->root_block); - -+ if (sm6->p.failed) -+ { -+ ret = VKD3D_ERROR_INVALID_SHADER; -+ goto fail; -+ } -+ - return VKD3D_OK; - - fail: -+ sm6_parser_cleanup(sm6); - vsir_program_cleanup(program); - return ret; - } -@@ -10570,18 +10641,10 @@ int dxil_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t co - free_dxbc_shader_desc(&dxbc_desc); - vkd3d_free(byte_code); - -- if (!sm6.p.failed && ret >= 0) -- ret = vkd3d_shader_parser_validate(&sm6.p, config_flags); -- -- if (sm6.p.failed && ret >= 0) -- ret = VKD3D_ERROR_INVALID_SHADER; -- -- sm6_parser_cleanup(&sm6); - if (ret < 0) -- { -- WARN("Failed to parse shader.\n"); - return ret; -- } - -- return ret; -+ sm6_parser_cleanup(&sm6); -+ -+ return VKD3D_OK; - } -diff --git a/libs/vkd3d/libs/vkd3d-shader/fx.c b/libs/vkd3d/libs/vkd3d-shader/fx.c -index a1d1fd6572f..cb42551ee8b 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/fx.c -+++ b/libs/vkd3d/libs/vkd3d-shader/fx.c -@@ -25,6 +25,17 @@ static inline size_t put_u32_unaligned(struct vkd3d_bytecode_buffer *buffer, uin - return bytecode_put_bytes_unaligned(buffer, &value, sizeof(value)); - } - -+struct fx_4_binary_type -+{ -+ uint32_t name; -+ uint32_t class; -+ uint32_t element_count; -+ uint32_t unpacked_size; -+ uint32_t stride; -+ uint32_t packed_size; -+ uint32_t typeinfo; -+}; -+ - struct string_entry - { - struct rb_entry entry; -@@ -38,6 +49,7 @@ struct type_entry - struct list entry; - const char *name; - uint32_t elements_count; -+ uint32_t modifiers; - uint32_t offset; - }; - -@@ -181,6 +193,7 @@ struct fx_write_context - - struct vkd3d_bytecode_buffer unstructured; - struct vkd3d_bytecode_buffer structured; -+ struct vkd3d_bytecode_buffer objects; - - struct rb_tree strings; - struct list types; -@@ -223,11 +236,6 @@ static void set_status(struct fx_write_context *fx, int status) - fx->status = status; - } - --static bool has_annotations(const struct hlsl_ir_var *var) --{ -- return var->annotations && !list_empty(&var->annotations->vars); --} -- - static uint32_t write_string(const char *string, struct fx_write_context *fx) - { - return fx->ops->write_string(string, fx); -@@ -278,9 +286,9 @@ static void write_fx_4_state_block(struct hlsl_ir_var *var, unsigned int block_i - - static uint32_t write_type(const struct hlsl_type *type, struct fx_write_context *fx) - { -+ unsigned int elements_count, modifiers; - const struct hlsl_type *element_type; - struct type_entry *type_entry; -- unsigned int elements_count; - const char *name; - - VKD3D_ASSERT(fx->ctx->profile->major_version >= 4); -@@ -297,6 +305,7 @@ static uint32_t write_type(const struct hlsl_type *type, struct fx_write_context - } - - name = get_fx_4_type_name(element_type); -+ modifiers = element_type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK; - - LIST_FOR_EACH_ENTRY(type_entry, &fx->types, struct type_entry, entry) - { -@@ -306,6 +315,9 @@ static uint32_t write_type(const struct hlsl_type *type, struct fx_write_context - if (type_entry->elements_count != elements_count) - continue; - -+ if (type_entry->modifiers != modifiers) -+ continue; -+ - return type_entry->offset; - } - -@@ -315,6 +327,7 @@ static uint32_t write_type(const struct hlsl_type *type, struct fx_write_context - type_entry->offset = write_fx_4_type(type, fx); - type_entry->name = name; - type_entry->elements_count = elements_count; -+ type_entry->modifiers = modifiers; - - list_add_tail(&fx->types, &type_entry->entry); - -@@ -429,17 +442,26 @@ static void write_fx_4_pass(struct hlsl_ir_var *var, struct fx_write_context *fx - write_fx_4_state_block(var, 0, count_offset, fx); - } - -+static void write_fx_2_annotations(struct hlsl_ir_var *var, uint32_t count_offset, struct fx_write_context *fx) -+{ -+ struct vkd3d_bytecode_buffer *buffer = &fx->structured; -+ uint32_t count; -+ -+ count = write_annotations(var->annotations, fx); -+ set_u32(buffer, count_offset, count); -+} -+ - static void write_fx_2_pass(struct hlsl_ir_var *var, struct fx_write_context *fx) - { - struct vkd3d_bytecode_buffer *buffer = &fx->structured; -- uint32_t name_offset; -+ uint32_t name_offset, annotation_count_offset; - - name_offset = write_string(var->name, fx); - put_u32(buffer, name_offset); -- put_u32(buffer, 0); /* Annotation count. */ -+ annotation_count_offset = put_u32(buffer, 0); - put_u32(buffer, 0); /* Assignment count. */ - -- /* TODO: annotations */ -+ write_fx_2_annotations(var, annotation_count_offset, fx); - /* TODO: assignments */ - - if (var->state_block_count && var->state_blocks[0]->count) -@@ -459,25 +481,93 @@ static uint32_t get_fx_4_type_size(const struct hlsl_type *type) - return type->reg_size[HLSL_REGSET_NUMERIC] * sizeof(float) * elements_count; - } - --static const uint32_t fx_4_numeric_base_type[] = -+enum fx_4_type_constants -+{ -+ /* Numeric types encoding */ -+ FX_4_NUMERIC_TYPE_FLOAT = 1, -+ FX_4_NUMERIC_TYPE_INT = 2, -+ FX_4_NUMERIC_TYPE_UINT = 3, -+ FX_4_NUMERIC_TYPE_BOOL = 4, -+ -+ FX_4_NUMERIC_CLASS_SCALAR = 1, -+ FX_4_NUMERIC_CLASS_VECTOR = 2, -+ FX_4_NUMERIC_CLASS_MATRIX = 3, -+ -+ FX_4_NUMERIC_BASE_TYPE_SHIFT = 3, -+ FX_4_NUMERIC_ROWS_SHIFT = 8, -+ FX_4_NUMERIC_COLUMNS_SHIFT = 11, -+ FX_4_NUMERIC_COLUMN_MAJOR_MASK = 0x4000, -+ -+ /* Object types */ -+ FX_4_OBJECT_TYPE_STRING = 0x1, -+ FX_4_OBJECT_TYPE_BLEND_STATE = 0x2, -+ FX_4_OBJECT_TYPE_DEPTH_STENCIL_STATE = 0x3, -+ FX_4_OBJECT_TYPE_RASTERIZER_STATE = 0x4, -+ FX_4_OBJECT_TYPE_PIXEL_SHADER = 0x5, -+ FX_4_OBJECT_TYPE_VERTEX_SHADER = 0x6, -+ FX_4_OBJECT_TYPE_GEOMETRY_SHADER = 0x7, -+ FX_4_OBJECT_TYPE_GEOMETRY_SHADER_SO = 0x8, -+ -+ FX_4_OBJECT_TYPE_TEXTURE = 0x9, -+ FX_4_OBJECT_TYPE_TEXTURE_1D = 0xa, -+ FX_4_OBJECT_TYPE_TEXTURE_1DARRAY = 0xb, -+ FX_4_OBJECT_TYPE_TEXTURE_2D = 0xc, -+ FX_4_OBJECT_TYPE_TEXTURE_2DARRAY = 0xd, -+ FX_4_OBJECT_TYPE_TEXTURE_2DMS = 0xe, -+ FX_4_OBJECT_TYPE_TEXTURE_2DMSARRAY = 0xf, -+ FX_4_OBJECT_TYPE_TEXTURE_3D = 0x10, -+ FX_4_OBJECT_TYPE_TEXTURE_CUBE = 0x11, -+ FX_4_OBJECT_TYPE_RTV = 0x13, -+ FX_4_OBJECT_TYPE_DSV = 0x14, -+ FX_4_OBJECT_TYPE_SAMPLER_STATE = 0x15, -+ FX_4_OBJECT_TYPE_TEXTURE_CUBEARRAY = 0x17, -+ -+ FX_5_OBJECT_TYPE_GEOMETRY_SHADER = 0x1b, -+ FX_5_OBJECT_TYPE_COMPUTE_SHADER = 0x1c, -+ FX_5_OBJECT_TYPE_HULL_SHADER = 0x1d, -+ FX_5_OBJECT_TYPE_DOMAIN_SHADER = 0x1e, -+ -+ FX_5_OBJECT_TYPE_UAV_1D = 0x1f, -+ FX_5_OBJECT_TYPE_UAV_1DARRAY = 0x20, -+ FX_5_OBJECT_TYPE_UAV_2D = 0x21, -+ FX_5_OBJECT_TYPE_UAV_2DARRAY = 0x22, -+ FX_5_OBJECT_TYPE_UAV_3D = 0x23, -+ FX_5_OBJECT_TYPE_UAV_BUFFER = 0x24, -+ FX_5_OBJECT_TYPE_SRV_RAW_BUFFER = 0x25, -+ FX_5_OBJECT_TYPE_UAV_RAW_BUFFER = 0x26, -+ FX_5_OBJECT_TYPE_SRV_STRUCTURED_BUFFER = 0x27, -+ FX_5_OBJECT_TYPE_UAV_STRUCTURED_BUFFER = 0x28, -+ FX_5_OBJECT_TYPE_SRV_APPEND_STRUCTURED_BUFFER = 0x2b, -+ FX_5_OBJECT_TYPE_SRV_CONSUME_STRUCTURED_BUFFER = 0x2c, -+ -+ /* Types */ -+ FX_4_TYPE_CLASS_NUMERIC = 1, -+ FX_4_TYPE_CLASS_OBJECT = 2, -+ FX_4_TYPE_CLASS_STRUCT = 3, -+ -+ /* Assignment types */ -+ FX_4_ASSIGNMENT_CONSTANT = 0x1, -+ FX_4_ASSIGNMENT_VARIABLE = 0x2, -+ FX_4_ASSIGNMENT_ARRAY_CONSTANT_INDEX = 0x3, -+ FX_4_ASSIGNMENT_ARRAY_VARIABLE_INDEX = 0x4, -+}; -+ -+static const uint32_t fx_4_numeric_base_types[] = - { -- [HLSL_TYPE_FLOAT] = 1, -- [HLSL_TYPE_INT ] = 2, -- [HLSL_TYPE_UINT ] = 3, -- [HLSL_TYPE_BOOL ] = 4, -+ [HLSL_TYPE_HALF ] = FX_4_NUMERIC_TYPE_FLOAT, -+ [HLSL_TYPE_FLOAT] = FX_4_NUMERIC_TYPE_FLOAT, -+ [HLSL_TYPE_INT ] = FX_4_NUMERIC_TYPE_INT, -+ [HLSL_TYPE_UINT ] = FX_4_NUMERIC_TYPE_UINT, -+ [HLSL_TYPE_BOOL ] = FX_4_NUMERIC_TYPE_BOOL, - }; - - static uint32_t get_fx_4_numeric_type_description(const struct hlsl_type *type, struct fx_write_context *fx) - { -- static const unsigned int NUMERIC_BASE_TYPE_SHIFT = 3; -- static const unsigned int NUMERIC_ROWS_SHIFT = 8; -- static const unsigned int NUMERIC_COLUMNS_SHIFT = 11; -- static const unsigned int NUMERIC_COLUMN_MAJOR_MASK = 0x4000; - static const uint32_t numeric_type_class[] = - { -- [HLSL_CLASS_SCALAR] = 1, -- [HLSL_CLASS_VECTOR] = 2, -- [HLSL_CLASS_MATRIX] = 3, -+ [HLSL_CLASS_SCALAR] = FX_4_NUMERIC_CLASS_SCALAR, -+ [HLSL_CLASS_VECTOR] = FX_4_NUMERIC_CLASS_VECTOR, -+ [HLSL_CLASS_MATRIX] = FX_4_NUMERIC_CLASS_MATRIX, - }; - struct hlsl_ctx *ctx = fx->ctx; - uint32_t value = 0; -@@ -497,20 +587,21 @@ static uint32_t get_fx_4_numeric_type_description(const struct hlsl_type *type, - switch (type->e.numeric.type) - { - case HLSL_TYPE_FLOAT: -+ case HLSL_TYPE_HALF: - case HLSL_TYPE_INT: - case HLSL_TYPE_UINT: - case HLSL_TYPE_BOOL: -- value |= (fx_4_numeric_base_type[type->e.numeric.type] << NUMERIC_BASE_TYPE_SHIFT); -+ value |= (fx_4_numeric_base_types[type->e.numeric.type] << FX_4_NUMERIC_BASE_TYPE_SHIFT); - break; - default: - hlsl_fixme(ctx, &ctx->location, "Not implemented for base type %u.", type->e.numeric.type); - return 0; - } - -- value |= (type->dimy & 0x7) << NUMERIC_ROWS_SHIFT; -- value |= (type->dimx & 0x7) << NUMERIC_COLUMNS_SHIFT; -+ value |= (type->dimy & 0x7) << FX_4_NUMERIC_ROWS_SHIFT; -+ value |= (type->dimx & 0x7) << FX_4_NUMERIC_COLUMNS_SHIFT; - if (type->modifiers & HLSL_MODIFIER_COLUMN_MAJOR) -- value |= NUMERIC_COLUMN_MAJOR_MASK; -+ value |= FX_4_NUMERIC_COLUMN_MAJOR_MASK; - - return value; - } -@@ -539,6 +630,7 @@ static const char * get_fx_4_type_name(const struct hlsl_type *type) - [HLSL_SAMPLER_DIM_3D] = "RWTexture3D", - [HLSL_SAMPLER_DIM_BUFFER] = "RWBuffer", - [HLSL_SAMPLER_DIM_STRUCTURED_BUFFER] = "RWStructuredBuffer", -+ [HLSL_SAMPLER_DIM_RAW_BUFFER] = "RWByteAddressBuffer", - }; - - switch (type->class) -@@ -564,17 +656,32 @@ static const char * get_fx_4_type_name(const struct hlsl_type *type) - case HLSL_CLASS_VERTEX_SHADER: - return "VertexShader"; - -+ case HLSL_CLASS_GEOMETRY_SHADER: -+ return "GeometryShader"; -+ - case HLSL_CLASS_PIXEL_SHADER: - return "PixelShader"; - - case HLSL_CLASS_STRING: - return "String"; - -+ case HLSL_CLASS_SCALAR: -+ case HLSL_CLASS_VECTOR: -+ case HLSL_CLASS_MATRIX: -+ if (type->e.numeric.type == HLSL_TYPE_HALF) -+ return "float"; -+ /* fall-through */ - default: - return type->name; - } - } - -+static bool is_numeric_fx_4_type(const struct hlsl_type *type) -+{ -+ type = hlsl_get_multiarray_element_type(type); -+ return type->class == HLSL_CLASS_STRUCT || hlsl_is_numeric_type(type); -+} -+ - static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_context *fx) - { - struct field_offsets -@@ -584,48 +691,46 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co - uint32_t offset; - uint32_t type; - }; -- uint32_t name_offset, offset, total_size, packed_size, stride, numeric_desc; -+ uint32_t name_offset, offset, unpacked_size, packed_size, stride, numeric_desc; - struct vkd3d_bytecode_buffer *buffer = &fx->unstructured; - struct field_offsets *field_offsets = NULL; -+ const struct hlsl_type *element_type; - struct hlsl_ctx *ctx = fx->ctx; - uint32_t elements_count = 0; - const char *name; - size_t i; - -- /* Resolve arrays to element type and number of elements. */ - if (type->class == HLSL_CLASS_ARRAY) -- { - elements_count = hlsl_get_multiarray_size(type); -- type = hlsl_get_multiarray_element_type(type); -- } -+ element_type = hlsl_get_multiarray_element_type(type); - -- name = get_fx_4_type_name(type); -+ name = get_fx_4_type_name(element_type); - - name_offset = write_string(name, fx); -- if (type->class == HLSL_CLASS_STRUCT) -+ if (element_type->class == HLSL_CLASS_STRUCT) - { -- if (!(field_offsets = hlsl_calloc(ctx, type->e.record.field_count, sizeof(*field_offsets)))) -+ if (!(field_offsets = hlsl_calloc(ctx, element_type->e.record.field_count, sizeof(*field_offsets)))) - return 0; - -- for (i = 0; i < type->e.record.field_count; ++i) -+ for (i = 0; i < element_type->e.record.field_count; ++i) - { -- const struct hlsl_struct_field *field = &type->e.record.fields[i]; -+ const struct hlsl_struct_field *field = &element_type->e.record.fields[i]; - - field_offsets[i].name = write_string(field->name, fx); - field_offsets[i].semantic = write_string(field->semantic.raw_name, fx); -- field_offsets[i].offset = field->reg_offset[HLSL_REGSET_NUMERIC]; -+ field_offsets[i].offset = field->reg_offset[HLSL_REGSET_NUMERIC] * sizeof(float); - field_offsets[i].type = write_type(field->type, fx); - } - } - - offset = put_u32_unaligned(buffer, name_offset); - -- switch (type->class) -+ switch (element_type->class) - { - case HLSL_CLASS_SCALAR: - case HLSL_CLASS_VECTOR: - case HLSL_CLASS_MATRIX: -- put_u32_unaligned(buffer, 1); -+ put_u32_unaligned(buffer, FX_4_TYPE_CLASS_NUMERIC); - break; - - case HLSL_CLASS_DEPTH_STENCIL_STATE: -@@ -643,48 +748,50 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co - case HLSL_CLASS_GEOMETRY_SHADER: - case HLSL_CLASS_BLEND_STATE: - case HLSL_CLASS_STRING: -- put_u32_unaligned(buffer, 2); -+ put_u32_unaligned(buffer, FX_4_TYPE_CLASS_OBJECT); - break; - - case HLSL_CLASS_STRUCT: -- put_u32_unaligned(buffer, 3); -+ put_u32_unaligned(buffer, FX_4_TYPE_CLASS_STRUCT); - break; - - case HLSL_CLASS_ARRAY: - case HLSL_CLASS_EFFECT_GROUP: -+ case HLSL_CLASS_ERROR: - case HLSL_CLASS_PASS: - case HLSL_CLASS_TECHNIQUE: - case HLSL_CLASS_CONSTANT_BUFFER: - case HLSL_CLASS_NULL: -+ case HLSL_CLASS_STREAM_OUTPUT: - vkd3d_unreachable(); - - case HLSL_CLASS_VOID: -- FIXME("Writing type class %u is not implemented.\n", type->class); -+ FIXME("Writing type class %u is not implemented.\n", element_type->class); - set_status(fx, VKD3D_ERROR_NOT_IMPLEMENTED); - return 0; - } - - /* Structures can only contain numeric fields, this is validated during variable declaration. */ -- total_size = stride = type->reg_size[HLSL_REGSET_NUMERIC] * sizeof(float); -+ unpacked_size = type->reg_size[HLSL_REGSET_NUMERIC] * sizeof(float); -+ - packed_size = 0; -- if (type->class == HLSL_CLASS_STRUCT || hlsl_is_numeric_type(type)) -- packed_size = hlsl_type_component_count(type) * sizeof(float); -+ if (is_numeric_fx_4_type(element_type)) -+ packed_size = hlsl_type_component_count(element_type) * sizeof(float); - if (elements_count) -- { -- total_size *= elements_count; - packed_size *= elements_count; -- } -+ -+ stride = element_type->reg_size[HLSL_REGSET_NUMERIC] * sizeof(float); - stride = align(stride, 4 * sizeof(float)); - - put_u32_unaligned(buffer, elements_count); -- put_u32_unaligned(buffer, total_size); -+ put_u32_unaligned(buffer, unpacked_size); - put_u32_unaligned(buffer, stride); - put_u32_unaligned(buffer, packed_size); - -- if (type->class == HLSL_CLASS_STRUCT) -+ if (element_type->class == HLSL_CLASS_STRUCT) - { -- put_u32_unaligned(buffer, type->e.record.field_count); -- for (i = 0; i < type->e.record.field_count; ++i) -+ put_u32_unaligned(buffer, element_type->e.record.field_count); -+ for (i = 0; i < element_type->e.record.field_count; ++i) - { - const struct field_offsets *field = &field_offsets[i]; - -@@ -700,95 +807,96 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co - put_u32_unaligned(buffer, 0); /* Interface count */ - } - } -- else if (type->class == HLSL_CLASS_TEXTURE) -+ else if (element_type->class == HLSL_CLASS_TEXTURE) - { - static const uint32_t texture_type[] = - { -- [HLSL_SAMPLER_DIM_GENERIC] = 9, -- [HLSL_SAMPLER_DIM_1D] = 10, -- [HLSL_SAMPLER_DIM_1DARRAY] = 11, -- [HLSL_SAMPLER_DIM_2D] = 12, -- [HLSL_SAMPLER_DIM_2DARRAY] = 13, -- [HLSL_SAMPLER_DIM_2DMS] = 14, -- [HLSL_SAMPLER_DIM_2DMSARRAY] = 15, -- [HLSL_SAMPLER_DIM_3D] = 16, -- [HLSL_SAMPLER_DIM_CUBE] = 17, -- [HLSL_SAMPLER_DIM_CUBEARRAY] = 23, -+ [HLSL_SAMPLER_DIM_GENERIC] = FX_4_OBJECT_TYPE_TEXTURE, -+ [HLSL_SAMPLER_DIM_1D] = FX_4_OBJECT_TYPE_TEXTURE_1D, -+ [HLSL_SAMPLER_DIM_1DARRAY] = FX_4_OBJECT_TYPE_TEXTURE_1DARRAY, -+ [HLSL_SAMPLER_DIM_2D] = FX_4_OBJECT_TYPE_TEXTURE_2D, -+ [HLSL_SAMPLER_DIM_2DARRAY] = FX_4_OBJECT_TYPE_TEXTURE_2DARRAY, -+ [HLSL_SAMPLER_DIM_2DMS] = FX_4_OBJECT_TYPE_TEXTURE_2DMS, -+ [HLSL_SAMPLER_DIM_2DMSARRAY] = FX_4_OBJECT_TYPE_TEXTURE_2DMSARRAY, -+ [HLSL_SAMPLER_DIM_3D] = FX_4_OBJECT_TYPE_TEXTURE_3D, -+ [HLSL_SAMPLER_DIM_CUBE] = FX_4_OBJECT_TYPE_TEXTURE_CUBE, -+ [HLSL_SAMPLER_DIM_CUBEARRAY] = FX_4_OBJECT_TYPE_TEXTURE_CUBEARRAY, - }; - -- put_u32_unaligned(buffer, texture_type[type->sampler_dim]); -+ put_u32_unaligned(buffer, texture_type[element_type->sampler_dim]); - } -- else if (type->class == HLSL_CLASS_SAMPLER) -+ else if (element_type->class == HLSL_CLASS_SAMPLER) - { -- put_u32_unaligned(buffer, 21); -+ put_u32_unaligned(buffer, FX_4_OBJECT_TYPE_SAMPLER_STATE); - } -- else if (type->class == HLSL_CLASS_UAV) -+ else if (element_type->class == HLSL_CLASS_UAV) - { - static const uint32_t uav_type[] = - { -- [HLSL_SAMPLER_DIM_1D] = 31, -- [HLSL_SAMPLER_DIM_1DARRAY] = 32, -- [HLSL_SAMPLER_DIM_2D] = 33, -- [HLSL_SAMPLER_DIM_2DARRAY] = 34, -- [HLSL_SAMPLER_DIM_3D] = 35, -- [HLSL_SAMPLER_DIM_BUFFER] = 36, -- [HLSL_SAMPLER_DIM_STRUCTURED_BUFFER] = 40, -+ [HLSL_SAMPLER_DIM_1D] = FX_5_OBJECT_TYPE_UAV_1D, -+ [HLSL_SAMPLER_DIM_1DARRAY] = FX_5_OBJECT_TYPE_UAV_1DARRAY, -+ [HLSL_SAMPLER_DIM_2D] = FX_5_OBJECT_TYPE_UAV_2D, -+ [HLSL_SAMPLER_DIM_2DARRAY] = FX_5_OBJECT_TYPE_UAV_2DARRAY, -+ [HLSL_SAMPLER_DIM_3D] = FX_5_OBJECT_TYPE_UAV_3D, -+ [HLSL_SAMPLER_DIM_BUFFER] = FX_5_OBJECT_TYPE_UAV_BUFFER, -+ [HLSL_SAMPLER_DIM_STRUCTURED_BUFFER] = FX_5_OBJECT_TYPE_UAV_STRUCTURED_BUFFER, -+ [HLSL_SAMPLER_DIM_RAW_BUFFER] = FX_5_OBJECT_TYPE_UAV_RAW_BUFFER, - }; - -- put_u32_unaligned(buffer, uav_type[type->sampler_dim]); -+ put_u32_unaligned(buffer, uav_type[element_type->sampler_dim]); - } -- else if (type->class == HLSL_CLASS_DEPTH_STENCIL_VIEW) -+ else if (element_type->class == HLSL_CLASS_DEPTH_STENCIL_VIEW) - { -- put_u32_unaligned(buffer, 20); -+ put_u32_unaligned(buffer, FX_4_OBJECT_TYPE_DSV); - } -- else if (type->class == HLSL_CLASS_RENDER_TARGET_VIEW) -+ else if (element_type->class == HLSL_CLASS_RENDER_TARGET_VIEW) - { -- put_u32_unaligned(buffer, 19); -+ put_u32_unaligned(buffer, FX_4_OBJECT_TYPE_RTV); - } -- else if (type->class == HLSL_CLASS_PIXEL_SHADER) -+ else if (element_type->class == HLSL_CLASS_PIXEL_SHADER) - { -- put_u32_unaligned(buffer, 5); -+ put_u32_unaligned(buffer, FX_4_OBJECT_TYPE_PIXEL_SHADER); - } -- else if (type->class == HLSL_CLASS_VERTEX_SHADER) -+ else if (element_type->class == HLSL_CLASS_VERTEX_SHADER) - { -- put_u32_unaligned(buffer, 6); -+ put_u32_unaligned(buffer, FX_4_OBJECT_TYPE_VERTEX_SHADER); - } -- else if (type->class == HLSL_CLASS_RASTERIZER_STATE) -+ else if (element_type->class == HLSL_CLASS_RASTERIZER_STATE) - { -- put_u32_unaligned(buffer, 4); -+ put_u32_unaligned(buffer, FX_4_OBJECT_TYPE_RASTERIZER_STATE); - } -- else if (type->class == HLSL_CLASS_DEPTH_STENCIL_STATE) -+ else if (element_type->class == HLSL_CLASS_DEPTH_STENCIL_STATE) - { -- put_u32_unaligned(buffer, 3); -+ put_u32_unaligned(buffer, FX_4_OBJECT_TYPE_DEPTH_STENCIL_STATE); - } -- else if (type->class == HLSL_CLASS_BLEND_STATE) -+ else if (element_type->class == HLSL_CLASS_BLEND_STATE) - { -- put_u32_unaligned(buffer, 2); -+ put_u32_unaligned(buffer, FX_4_OBJECT_TYPE_BLEND_STATE); - } -- else if (type->class == HLSL_CLASS_STRING) -+ else if (element_type->class == HLSL_CLASS_STRING) - { -- put_u32_unaligned(buffer, 1); -+ put_u32_unaligned(buffer, FX_4_OBJECT_TYPE_STRING); - } -- else if (hlsl_is_numeric_type(type)) -+ else if (hlsl_is_numeric_type(element_type)) - { -- numeric_desc = get_fx_4_numeric_type_description(type, fx); -+ numeric_desc = get_fx_4_numeric_type_description(element_type, fx); - put_u32_unaligned(buffer, numeric_desc); - } -- else if (type->class == HLSL_CLASS_COMPUTE_SHADER) -+ else if (element_type->class == HLSL_CLASS_COMPUTE_SHADER) - { -- put_u32_unaligned(buffer, 28); -+ put_u32_unaligned(buffer, FX_5_OBJECT_TYPE_COMPUTE_SHADER); - } -- else if (type->class == HLSL_CLASS_HULL_SHADER) -+ else if (element_type->class == HLSL_CLASS_HULL_SHADER) - { -- put_u32_unaligned(buffer, 29); -+ put_u32_unaligned(buffer, FX_5_OBJECT_TYPE_HULL_SHADER); - } -- else if (type->class == HLSL_CLASS_DOMAIN_SHADER) -+ else if (element_type->class == HLSL_CLASS_DOMAIN_SHADER) - { -- put_u32_unaligned(buffer, 30); -+ put_u32_unaligned(buffer, FX_5_OBJECT_TYPE_DOMAIN_SHADER); - } - else - { -- FIXME("Type %u is not supported.\n", type->class); -+ FIXME("Type %u is not supported.\n", element_type->class); - set_status(fx, VKD3D_ERROR_NOT_IMPLEMENTED); - } - -@@ -963,16 +1071,16 @@ static uint32_t write_fx_2_parameter(const struct hlsl_type *type, const char *n - - static void write_fx_2_technique(struct hlsl_ir_var *var, struct fx_write_context *fx) - { -+ uint32_t name_offset, pass_count_offset, annotation_count_offset, count = 0; - struct vkd3d_bytecode_buffer *buffer = &fx->structured; -- uint32_t name_offset, count_offset, count = 0; - struct hlsl_ir_var *pass; - - name_offset = write_string(var->name, fx); - put_u32(buffer, name_offset); -- put_u32(buffer, 0); /* Annotation count. */ -- count_offset = put_u32(buffer, 0); /* Pass count. */ -+ annotation_count_offset = put_u32(buffer, 0); -+ pass_count_offset = put_u32(buffer, 0); - -- /* FIXME: annotations */ -+ write_fx_2_annotations(var, annotation_count_offset, fx); - - LIST_FOR_EACH_ENTRY(pass, &var->scope->vars, struct hlsl_ir_var, scope_entry) - { -@@ -980,47 +1088,128 @@ static void write_fx_2_technique(struct hlsl_ir_var *var, struct fx_write_contex - ++count; - } - -- set_u32(buffer, count_offset, count); -+ set_u32(buffer, pass_count_offset, count); - } - --static uint32_t get_fx_2_type_size(const struct hlsl_type *type) -+static uint32_t write_fx_2_default_value(struct hlsl_type *value_type, struct hlsl_default_value *value, -+ struct fx_write_context *fx) - { -- uint32_t size = 0, elements_count; -- size_t i; -+ const struct hlsl_type *type = hlsl_get_multiarray_element_type(value_type); -+ uint32_t elements_count = hlsl_get_multiarray_size(value_type), i, j; -+ struct vkd3d_bytecode_buffer *buffer = &fx->unstructured; -+ struct hlsl_ctx *ctx = fx->ctx; -+ uint32_t offset = buffer->size; -+ unsigned int comp_count; - -- if (type->class == HLSL_CLASS_ARRAY) -+ if (!value) -+ return 0; -+ -+ comp_count = hlsl_type_component_count(type); -+ -+ for (i = 0; i < elements_count; ++i) - { -- elements_count = hlsl_get_multiarray_size(type); -- type = hlsl_get_multiarray_element_type(type); -- return get_fx_2_type_size(type) * elements_count; -+ switch (type->class) -+ { -+ case HLSL_CLASS_SCALAR: -+ case HLSL_CLASS_VECTOR: -+ case HLSL_CLASS_MATRIX: -+ { -+ switch (type->e.numeric.type) -+ { -+ case HLSL_TYPE_FLOAT: -+ case HLSL_TYPE_HALF: -+ case HLSL_TYPE_INT: -+ case HLSL_TYPE_UINT: -+ case HLSL_TYPE_BOOL: -+ -+ for (j = 0; j < comp_count; ++j) -+ { -+ put_u32(buffer, value->number.u); -+ value++; -+ } -+ break; -+ default: -+ hlsl_fixme(ctx, &ctx->location, "Writing default values for numeric type %u is not implemented.", -+ type->e.numeric.type); -+ } -+ -+ break; -+ } -+ case HLSL_CLASS_STRUCT: -+ { -+ struct hlsl_struct_field *fields = type->e.record.fields; -+ -+ for (j = 0; j < type->e.record.field_count; ++j) -+ { -+ write_fx_2_default_value(fields[i].type, value, fx); -+ value += hlsl_type_component_count(fields[i].type); -+ } -+ break; -+ } -+ default: -+ hlsl_fixme(ctx, &ctx->location, "Writing default values for class %u is not implemented.", type->class); -+ } - } -- else if (type->class == HLSL_CLASS_STRUCT) -+ -+ return offset; -+} -+ -+static uint32_t write_fx_2_object_initializer(const struct hlsl_ir_var *var, struct fx_write_context *fx) -+{ -+ const struct hlsl_type *type = hlsl_get_multiarray_element_type(var->data_type); -+ unsigned int i, elements_count = hlsl_get_multiarray_size(var->data_type); -+ struct vkd3d_bytecode_buffer *buffer = &fx->objects; -+ uint32_t offset = fx->unstructured.size, id, size; -+ struct hlsl_ctx *ctx = fx->ctx; -+ const void *data; -+ -+ for (i = 0; i < elements_count; ++i) - { -- for (i = 0; i < type->e.record.field_count; ++i) -+ if (type->class == HLSL_CLASS_SAMPLER) - { -- const struct hlsl_struct_field *field = &type->e.record.fields[i]; -- size += get_fx_2_type_size(field->type); -+ hlsl_fixme(ctx, &var->loc, "Writing fx_2_0 sampler objects initializers is not implemented."); - } -+ else -+ { -+ switch (type->class) -+ { -+ case HLSL_CLASS_STRING: -+ { -+ const char *string = var->default_values[i].string ? var->default_values[i].string : ""; -+ size = strlen(string) + 1; -+ data = string; -+ break; -+ } -+ case HLSL_CLASS_TEXTURE: -+ size = 0; -+ break; -+ case HLSL_CLASS_PIXEL_SHADER: -+ case HLSL_CLASS_VERTEX_SHADER: -+ size = 0; -+ hlsl_fixme(ctx, &var->loc, "Writing fx_2_0 shader objects initializers is not implemented."); -+ break; -+ default: -+ vkd3d_unreachable(); -+ } -+ id = fx->object_variable_count++; - -- return size; -+ put_u32(&fx->unstructured, id); -+ -+ put_u32(buffer, id); -+ put_u32(buffer, size); -+ if (size) -+ bytecode_put_bytes(buffer, data, size); -+ } - } - -- return type->dimx * type->dimy * sizeof(float); -+ return offset; - } - - static uint32_t write_fx_2_initial_value(const struct hlsl_ir_var *var, struct fx_write_context *fx) - { -- struct vkd3d_bytecode_buffer *buffer = &fx->unstructured; -- const struct hlsl_type *type = var->data_type; -- uint32_t offset, size, elements_count = 1; -- -- size = get_fx_2_type_size(type); -- -- if (type->class == HLSL_CLASS_ARRAY) -- { -- elements_count = hlsl_get_multiarray_size(type); -- type = hlsl_get_multiarray_element_type(type); -- } -+ const struct hlsl_type *type = hlsl_get_multiarray_element_type(var->data_type); -+ struct hlsl_ctx *ctx = fx->ctx; -+ uint32_t offset; - - /* Note that struct fields must all be numeric; - * this was validated in check_invalid_object_fields(). */ -@@ -1030,21 +1219,20 @@ static uint32_t write_fx_2_initial_value(const struct hlsl_ir_var *var, struct f - case HLSL_CLASS_VECTOR: - case HLSL_CLASS_MATRIX: - case HLSL_CLASS_STRUCT: -- /* FIXME: write actual initial value */ -- if (var->default_values) -- hlsl_fixme(fx->ctx, &var->loc, "Write default values.\n"); -- -- offset = put_u32(buffer, 0); -+ offset = write_fx_2_default_value(var->data_type, var->default_values, fx); -+ break; - -- for (uint32_t i = 1; i < size / sizeof(uint32_t); ++i) -- put_u32(buffer, 0); -+ case HLSL_CLASS_SAMPLER: -+ case HLSL_CLASS_TEXTURE: -+ case HLSL_CLASS_STRING: -+ case HLSL_CLASS_PIXEL_SHADER: -+ case HLSL_CLASS_VERTEX_SHADER: -+ offset = write_fx_2_object_initializer(var, fx); - break; - - default: -- /* Objects are given sequential ids. */ -- offset = put_u32(buffer, fx->object_variable_count++); -- for (uint32_t i = 1; i < elements_count; ++i) -- put_u32(buffer, fx->object_variable_count++); -+ offset = 0; -+ hlsl_fixme(ctx, &var->loc, "Writing initializer not implemented for parameter class %#x.", type->class); - break; - } - -@@ -1070,6 +1258,7 @@ static bool is_type_supported_fx_2(struct hlsl_ctx *ctx, const struct hlsl_type - return is_type_supported_fx_2(ctx, type->e.array.type, loc); - - case HLSL_CLASS_TEXTURE: -+ case HLSL_CLASS_SAMPLER: - switch (type->sampler_dim) - { - case HLSL_SAMPLER_DIM_1D: -@@ -1083,9 +1272,10 @@ static bool is_type_supported_fx_2(struct hlsl_ctx *ctx, const struct hlsl_type - } - break; - -- case HLSL_CLASS_PIXEL_SHADER: -- case HLSL_CLASS_SAMPLER: - case HLSL_CLASS_STRING: -+ return true; -+ -+ case HLSL_CLASS_PIXEL_SHADER: - case HLSL_CLASS_VERTEX_SHADER: - hlsl_fixme(ctx, loc, "Write fx 2.0 parameter class %#x.", type->class); - return false; -@@ -1104,10 +1294,12 @@ static bool is_type_supported_fx_2(struct hlsl_ctx *ctx, const struct hlsl_type - return false; - - case HLSL_CLASS_EFFECT_GROUP: -+ case HLSL_CLASS_ERROR: - case HLSL_CLASS_PASS: - case HLSL_CLASS_TECHNIQUE: - case HLSL_CLASS_CONSTANT_BUFFER: - case HLSL_CLASS_NULL: -+ case HLSL_CLASS_STREAM_OUTPUT: - /* This cannot appear as an extern variable. */ - break; - } -@@ -1117,8 +1309,8 @@ static bool is_type_supported_fx_2(struct hlsl_ctx *ctx, const struct hlsl_type - - static void write_fx_2_parameters(struct fx_write_context *fx) - { -+ uint32_t desc_offset, value_offset, flags, annotation_count_offset; - struct vkd3d_bytecode_buffer *buffer = &fx->structured; -- uint32_t desc_offset, value_offset, flags; - struct hlsl_ctx *ctx = fx->ctx; - struct hlsl_ir_var *var; - enum fx_2_parameter_flags -@@ -1138,23 +1330,35 @@ static void write_fx_2_parameters(struct fx_write_context *fx) - if (var->storage_modifiers & HLSL_STORAGE_SHARED) - flags |= IS_SHARED; - -- put_u32(buffer, desc_offset); /* Parameter description */ -- put_u32(buffer, value_offset); /* Value */ -- put_u32(buffer, flags); /* Flags */ -+ put_u32(buffer, desc_offset); -+ put_u32(buffer, value_offset); -+ put_u32(buffer, flags); - -- put_u32(buffer, 0); /* Annotations count */ -- if (has_annotations(var)) -- hlsl_fixme(ctx, &ctx->location, "Writing annotations for parameters is not implemented."); -+ annotation_count_offset = put_u32(buffer, 0); -+ write_fx_2_annotations(var, annotation_count_offset, fx); - - ++fx->parameter_count; - } - } - -+static void write_fx_2_annotation(struct hlsl_ir_var *var, struct fx_write_context *fx) -+{ -+ struct vkd3d_bytecode_buffer *buffer = &fx->structured; -+ uint32_t desc_offset, value_offset; -+ -+ desc_offset = write_fx_2_parameter(var->data_type, var->name, &var->semantic, fx); -+ value_offset = write_fx_2_initial_value(var, fx); -+ -+ put_u32(buffer, desc_offset); -+ put_u32(buffer, value_offset); -+} -+ - static const struct fx_write_context_ops fx_2_ops = - { - .write_string = write_fx_2_string, - .write_technique = write_fx_2_technique, - .write_pass = write_fx_2_pass, -+ .write_annotation = write_fx_2_annotation, - }; - - static int hlsl_fx_2_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) -@@ -1180,19 +1384,18 @@ static int hlsl_fx_2_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) - object_count = put_u32(structured, 0); - - write_fx_2_parameters(&fx); -- set_u32(structured, parameter_count, fx.parameter_count); -- set_u32(structured, object_count, fx.object_variable_count); -- - write_techniques(ctx->globals, &fx); -- set_u32(structured, technique_count, fx.technique_count); -- set_u32(structured, shader_count, fx.shader_count); -- -- put_u32(structured, 0); /* String count */ -+ put_u32(structured, fx.object_variable_count - 1); - put_u32(structured, 0); /* Resource count */ - -- /* TODO: strings */ -+ bytecode_put_bytes(structured, fx.objects.data, fx.objects.size); - /* TODO: resources */ - -+ set_u32(structured, parameter_count, fx.parameter_count); -+ set_u32(structured, object_count, fx.object_variable_count); -+ set_u32(structured, technique_count, fx.technique_count); -+ set_u32(structured, shader_count, fx.shader_count); -+ - size = align(fx.unstructured.size, 4); - set_u32(&buffer, offset, size); - -@@ -1201,6 +1404,7 @@ static int hlsl_fx_2_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) - - vkd3d_free(fx.unstructured.data); - vkd3d_free(fx.structured.data); -+ vkd3d_free(fx.objects.data); - - if (!fx.technique_count) - hlsl_error(ctx, &ctx->location, VKD3D_SHADER_ERROR_HLSL_MISSING_TECHNIQUE, "No techniques found."); -@@ -1252,6 +1456,7 @@ static uint32_t write_fx_4_default_value(struct hlsl_type *value_type, struct hl - switch (type->e.numeric.type) - { - case HLSL_TYPE_FLOAT: -+ case HLSL_TYPE_HALF: - case HLSL_TYPE_INT: - case HLSL_TYPE_UINT: - case HLSL_TYPE_BOOL: -@@ -1412,20 +1617,17 @@ static uint32_t write_fx_4_state_numeric_value(struct hlsl_ir_constant *value, s - - for (i = 0; i < count; ++i) - { -- if (hlsl_is_numeric_type(data_type)) -+ switch (data_type->e.numeric.type) - { -- switch (data_type->e.numeric.type) -- { -- case HLSL_TYPE_FLOAT: -- case HLSL_TYPE_INT: -- case HLSL_TYPE_UINT: -- case HLSL_TYPE_BOOL: -- type = fx_4_numeric_base_type[data_type->e.numeric.type]; -- break; -- default: -- type = 0; -- hlsl_fixme(ctx, &ctx->location, "Unsupported numeric state value type %u.", data_type->e.numeric.type); -- } -+ case HLSL_TYPE_FLOAT: -+ case HLSL_TYPE_INT: -+ case HLSL_TYPE_UINT: -+ case HLSL_TYPE_BOOL: -+ type = fx_4_numeric_base_types[data_type->e.numeric.type]; -+ break; -+ default: -+ type = 0; -+ hlsl_fixme(ctx, &ctx->location, "Unsupported numeric state value type %u.", data_type->e.numeric.type); - } - - put_u32_unaligned(buffer, type); -@@ -1438,11 +1640,14 @@ static uint32_t write_fx_4_state_numeric_value(struct hlsl_ir_constant *value, s - static void write_fx_4_state_assignment(const struct hlsl_ir_var *var, struct hlsl_state_block_entry *entry, - struct fx_write_context *fx) - { -- uint32_t value_offset = 0, assignment_type = 0, rhs_offset; -- uint32_t type_offset; -+ uint32_t value_offset = 0, assignment_type = 0, rhs_offset, type_offset, offset; -+ struct vkd3d_bytecode_buffer *unstructured = &fx->unstructured; - struct vkd3d_bytecode_buffer *buffer = &fx->structured; -- struct hlsl_ctx *ctx = fx->ctx; - struct hlsl_ir_node *value = entry->args->node; -+ struct hlsl_ctx *ctx = fx->ctx; -+ struct hlsl_ir_var *index_var; -+ struct hlsl_ir_constant *c; -+ struct hlsl_ir_load *load; - - put_u32(buffer, entry->name_id); - put_u32(buffer, entry->lhs_index); -@@ -1453,21 +1658,77 @@ static void write_fx_4_state_assignment(const struct hlsl_ir_var *var, struct hl - { - case HLSL_IR_CONSTANT: - { -- struct hlsl_ir_constant *c = hlsl_ir_constant(value); -+ c = hlsl_ir_constant(value); - - value_offset = write_fx_4_state_numeric_value(c, fx); -- assignment_type = 1; -+ assignment_type = FX_4_ASSIGNMENT_CONSTANT; - break; - } - case HLSL_IR_LOAD: - { -- struct hlsl_ir_load *l = hlsl_ir_load(value); -+ load = hlsl_ir_load(value); - -- if (l->src.path_len) -+ if (load->src.path_len) - hlsl_fixme(ctx, &var->loc, "Indexed access in RHS values is not implemented."); - -- value_offset = write_fx_4_string(l->src.var->name, fx); -- assignment_type = 2; -+ value_offset = write_fx_4_string(load->src.var->name, fx); -+ assignment_type = FX_4_ASSIGNMENT_VARIABLE; -+ break; -+ } -+ case HLSL_IR_INDEX: -+ { -+ struct hlsl_ir_index *index = hlsl_ir_index(value); -+ struct hlsl_ir_node *val = index->val.node; -+ struct hlsl_ir_node *idx = index->idx.node; -+ struct hlsl_type *type; -+ -+ if (val->type != HLSL_IR_LOAD) -+ { -+ hlsl_fixme(ctx, &var->loc, "Unexpected indexed RHS value type."); -+ break; -+ } -+ -+ load = hlsl_ir_load(val); -+ value_offset = write_fx_4_string(load->src.var->name, fx); -+ type = load->src.var->data_type; -+ -+ switch (idx->type) -+ { -+ case HLSL_IR_CONSTANT: -+ { -+ c = hlsl_ir_constant(idx); -+ value_offset = put_u32(unstructured, value_offset); -+ put_u32(unstructured, c->value.u[0].u); -+ assignment_type = FX_4_ASSIGNMENT_ARRAY_CONSTANT_INDEX; -+ -+ if (c->value.u[0].u >= type->e.array.elements_count) -+ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_OFFSET_OUT_OF_BOUNDS, -+ "Array index %u exceeds array size %u.", c->value.u[0].u, type->e.array.elements_count); -+ break; -+ } -+ -+ case HLSL_IR_LOAD: -+ { -+ load = hlsl_ir_load(idx); -+ index_var = load->src.var; -+ -+ /* Special case for uint index variables, for anything more complex use an expression. */ -+ if (hlsl_types_are_equal(index_var->data_type, hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT)) -+ && !load->src.path_len) -+ { -+ offset = write_fx_4_string(index_var->name, fx); -+ -+ value_offset = put_u32(unstructured, value_offset); -+ put_u32(unstructured, offset); -+ assignment_type = FX_4_ASSIGNMENT_ARRAY_VARIABLE_INDEX; -+ break; -+ } -+ } -+ /* fall through */ -+ -+ default: -+ hlsl_fixme(ctx, &var->loc, "Complex array index expressions in RHS values are not implemented."); -+ } - break; - } - default: -@@ -1575,6 +1836,7 @@ enum state_property_component_type - FX_BLEND, - FX_VERTEXSHADER, - FX_PIXELSHADER, -+ FX_COMPONENT_TYPE_COUNT, - }; - - static inline bool is_object_fx_type(enum state_property_component_type type) -@@ -1645,230 +1907,227 @@ static inline enum hlsl_base_type hlsl_type_from_fx_type(enum state_property_com - } - } - --static void resolve_fx_4_state_block_values(struct hlsl_ir_var *var, struct hlsl_state_block_entry *entry, -- struct fx_write_context *fx) --{ -- static const struct rhs_named_value filter_values[] = -- { -- { "MIN_MAG_MIP_POINT", 0x00 }, -- { "MIN_MAG_POINT_MIP_LINEAR", 0x01 }, -- { "MIN_POINT_MAG_LINEAR_MIP_POINT", 0x04 }, -- { "MIN_POINT_MAG_MIP_LINEAR", 0x05 }, -- { "MIN_LINEAR_MAG_MIP_POINT", 0x10 }, -- { "MIN_LINEAR_MAG_POINT_MIP_LINEAR", 0x11 }, -- { "MIN_MAG_LINEAR_MIP_POINT", 0x14 }, -- { "MIN_MAG_MIP_LINEAR", 0x15 }, -- { "ANISOTROPIC", 0x55 }, -- { "COMPARISON_MIN_MAG_MIP_POINT", 0x80 }, -- { "COMPARISON_MIN_MAG_POINT_MIP_LINEAR", 0x81 }, -- { "COMPARISON_MIN_POINT_MAG_LINEAR_MIP_POINT", 0x84 }, -- { "COMPARISON_MIN_POINT_MAG_MIP_LINEAR", 0x85 }, -- { "COMPARISON_MIN_LINEAR_MAG_MIP_POINT", 0x90 }, -- { "COMPARISON_MIN_LINEAR_MAG_POINT_MIP_LINEAR", 0x91 }, -- { "COMPARISON_MIN_MAG_LINEAR_MIP_POINT", 0x94 }, -- { "COMPARISON_MIN_MAG_MIP_LINEAR", 0x95 }, -- { "COMPARISON_ANISOTROPIC", 0xd5 }, -- { NULL }, -- }; -- -- static const struct rhs_named_value address_values[] = -- { -- { "WRAP", 1 }, -- { "MIRROR", 2 }, -- { "CLAMP", 3 }, -- { "BORDER", 4 }, -- { "MIRROR_ONCE", 5 }, -- { NULL }, -- }; -+static const struct rhs_named_value filter_values[] = -+{ -+ { "MIN_MAG_MIP_POINT", 0x00 }, -+ { "MIN_MAG_POINT_MIP_LINEAR", 0x01 }, -+ { "MIN_POINT_MAG_LINEAR_MIP_POINT", 0x04 }, -+ { "MIN_POINT_MAG_MIP_LINEAR", 0x05 }, -+ { "MIN_LINEAR_MAG_MIP_POINT", 0x10 }, -+ { "MIN_LINEAR_MAG_POINT_MIP_LINEAR", 0x11 }, -+ { "MIN_MAG_LINEAR_MIP_POINT", 0x14 }, -+ { "MIN_MAG_MIP_LINEAR", 0x15 }, -+ { "ANISOTROPIC", 0x55 }, -+ { "COMPARISON_MIN_MAG_MIP_POINT", 0x80 }, -+ { "COMPARISON_MIN_MAG_POINT_MIP_LINEAR", 0x81 }, -+ { "COMPARISON_MIN_POINT_MAG_LINEAR_MIP_POINT", 0x84 }, -+ { "COMPARISON_MIN_POINT_MAG_MIP_LINEAR", 0x85 }, -+ { "COMPARISON_MIN_LINEAR_MAG_MIP_POINT", 0x90 }, -+ { "COMPARISON_MIN_LINEAR_MAG_POINT_MIP_LINEAR", 0x91 }, -+ { "COMPARISON_MIN_MAG_LINEAR_MIP_POINT", 0x94 }, -+ { "COMPARISON_MIN_MAG_MIP_LINEAR", 0x95 }, -+ { "COMPARISON_ANISOTROPIC", 0xd5 }, -+ { NULL }, -+}; - -- static const struct rhs_named_value compare_func_values[] = -- { -- { "NEVER", 1 }, -- { "LESS", 2 }, -- { "EQUAL", 3 }, -- { "LESS_EQUAL", 4 }, -- { "GREATER", 5 }, -- { "NOT_EQUAL", 6 }, -- { "GREATER_EQUAL", 7 }, -- { "ALWAYS", 8 }, -- { NULL } -- }; -+static const struct rhs_named_value address_values[] = -+{ -+ { "WRAP", 1 }, -+ { "MIRROR", 2 }, -+ { "CLAMP", 3 }, -+ { "BORDER", 4 }, -+ { "MIRROR_ONCE", 5 }, -+ { NULL }, -+}; - -- static const struct rhs_named_value depth_write_mask_values[] = -- { -- { "ZERO", 0 }, -- { "ALL", 1 }, -- { NULL } -- }; -+static const struct rhs_named_value compare_func_values[] = -+{ -+ { "NEVER", 1 }, -+ { "LESS", 2 }, -+ { "EQUAL", 3 }, -+ { "LESS_EQUAL", 4 }, -+ { "GREATER", 5 }, -+ { "NOT_EQUAL", 6 }, -+ { "GREATER_EQUAL", 7 }, -+ { "ALWAYS", 8 }, -+ { NULL } -+}; - -- static const struct rhs_named_value comparison_values[] = -- { -- { "NEVER", 1 }, -- { "LESS", 2 }, -- { "EQUAL", 3 }, -- { "LESS_EQUAL", 4 }, -- { "GREATER", 5 }, -- { "NOT_EQUAL", 6 }, -- { "GREATER_EQUAL", 7 }, -- { "ALWAYS", 8 }, -- { NULL } -- }; -+static const struct rhs_named_value depth_write_mask_values[] = -+{ -+ { "ZERO", 0 }, -+ { "ALL", 1 }, -+ { NULL } -+}; - -- static const struct rhs_named_value stencil_op_values[] = -- { -- { "KEEP", 1 }, -- { "ZERO", 2 }, -- { "REPLACE", 3 }, -- { "INCR_SAT", 4 }, -- { "DECR_SAT", 5 }, -- { "INVERT", 6 }, -- { "INCR", 7 }, -- { "DECR", 8 }, -- { NULL } -- }; -+static const struct rhs_named_value comparison_values[] = -+{ -+ { "NEVER", 1 }, -+ { "LESS", 2 }, -+ { "EQUAL", 3 }, -+ { "LESS_EQUAL", 4 }, -+ { "GREATER", 5 }, -+ { "NOT_EQUAL", 6 }, -+ { "GREATER_EQUAL", 7 }, -+ { "ALWAYS", 8 }, -+ { NULL } -+}; - -- static const struct rhs_named_value fill_values[] = -- { -- { "WIREFRAME", 2 }, -- { "SOLID", 3 }, -- { NULL } -- }; -+static const struct rhs_named_value stencil_op_values[] = -+{ -+ { "KEEP", 1 }, -+ { "ZERO", 2 }, -+ { "REPLACE", 3 }, -+ { "INCR_SAT", 4 }, -+ { "DECR_SAT", 5 }, -+ { "INVERT", 6 }, -+ { "INCR", 7 }, -+ { "DECR", 8 }, -+ { NULL } -+}; - -- static const struct rhs_named_value cull_values[] = -- { -- { "NONE", 1 }, -- { "FRONT", 2 }, -- { "BACK", 3 }, -- { NULL } -- }; -+static const struct rhs_named_value fill_values[] = -+{ -+ { "WIREFRAME", 2 }, -+ { "SOLID", 3 }, -+ { NULL } -+}; - -- static const struct rhs_named_value blend_values[] = -- { -- { "ZERO", 1 }, -- { "ONE", 2 }, -- { "SRC_COLOR", 3 }, -- { "INV_SRC_COLOR", 4 }, -- { "SRC_ALPHA", 5 }, -- { "INV_SRC_ALPHA", 6 }, -- { "DEST_ALPHA", 7 }, -- { "INV_DEST_ALPHA", 8 }, -- { "DEST_COLOR", 9 }, -- { "INV_DEST_COLOR", 10 }, -- { "SRC_ALPHA_SAT", 11 }, -- { "BLEND_FACTOR", 14 }, -- { "INV_BLEND_FACTOR", 15 }, -- { "SRC1_COLOR", 16 }, -- { "INV_SRC1_COLOR", 17 }, -- { "SRC1_ALPHA", 18 }, -- { "INV_SRC1_ALPHA", 19 }, -- { NULL } -- }; -+static const struct rhs_named_value cull_values[] = -+{ -+ { "NONE", 1 }, -+ { "FRONT", 2 }, -+ { "BACK", 3 }, -+ { NULL } -+}; - -- static const struct rhs_named_value blendop_values[] = -- { -- { "ADD", 1 }, -- { "SUBTRACT", 2 }, -- { "REV_SUBTRACT", 3 }, -- { "MIN", 4 }, -- { "MAX", 5 }, -- { NULL } -- }; -+static const struct rhs_named_value blend_values[] = -+{ -+ { "ZERO", 1 }, -+ { "ONE", 2 }, -+ { "SRC_COLOR", 3 }, -+ { "INV_SRC_COLOR", 4 }, -+ { "SRC_ALPHA", 5 }, -+ { "INV_SRC_ALPHA", 6 }, -+ { "DEST_ALPHA", 7 }, -+ { "INV_DEST_ALPHA", 8 }, -+ { "DEST_COLOR", 9 }, -+ { "INV_DEST_COLOR", 10 }, -+ { "SRC_ALPHA_SAT", 11 }, -+ { "BLEND_FACTOR", 14 }, -+ { "INV_BLEND_FACTOR", 15 }, -+ { "SRC1_COLOR", 16 }, -+ { "INV_SRC1_COLOR", 17 }, -+ { "SRC1_ALPHA", 18 }, -+ { "INV_SRC1_ALPHA", 19 }, -+ { NULL } -+}; - -- static const struct rhs_named_value bool_values[] = -- { -- { "FALSE", 0 }, -- { "TRUE", 1 }, -- { NULL } -- }; -+static const struct rhs_named_value blendop_values[] = -+{ -+ { "ADD", 1 }, -+ { "SUBTRACT", 2 }, -+ { "REV_SUBTRACT", 3 }, -+ { "MIN", 4 }, -+ { "MAX", 5 }, -+ { NULL } -+}; - -- static const struct rhs_named_value null_values[] = -- { -- { "NULL", 0 }, -- { NULL } -- }; -+static const struct rhs_named_value bool_values[] = -+{ -+ { "FALSE", 0 }, -+ { "TRUE", 1 }, -+ { NULL } -+}; - -- static const struct state -- { -- const char *name; -- enum hlsl_type_class container; -- enum hlsl_type_class class; -- enum state_property_component_type type; -- unsigned int dimx; -- unsigned int array_size; -- uint32_t id; -- const struct rhs_named_value *values; -- } -- states[] = -- { -- { "RasterizerState", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_RASTERIZER, 1, 1, 0 }, -- { "DepthStencilState", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_DEPTHSTENCIL, 1, 1, 1 }, -- { "BlendState", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_BLEND, 1, 1, 2 }, -- { "RenderTargetView", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_RENDERTARGETVIEW, 1, 8, 3 }, -- { "DepthStencilView", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_DEPTHSTENCILVIEW, 1, 1, 4 }, -- -- { "VertexShader", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_VERTEXSHADER, 1, 1, 6 }, -- { "PixelShader", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_PIXELSHADER, 1, 1, 7 }, -- { "DS_StencilRef", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 9 }, -- { "AB_BlendFactor", HLSL_CLASS_PASS, HLSL_CLASS_VECTOR, FX_FLOAT, 4, 1, 10 }, -- { "AB_SampleMask", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 11 }, -- -- { "FillMode", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 12, fill_values }, -- { "CullMode", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 13, cull_values }, -- { "FrontCounterClockwise", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 1, 14, bool_values }, -- { "DepthBias", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 15 }, -- { "DepthBiasClamp", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 16 }, -- { "SlopeScaledDepthBias", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 17 }, -- { "DepthClipEnable", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 1, 18, bool_values }, -- { "ScissorEnable", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 1, 19, bool_values }, -- { "MultisampleEnable", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 1, 20, bool_values }, -- { "AntializedLineEnable", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 1, 21, bool_values }, -- -- { "DepthEnable", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 1, 22, bool_values }, -- { "DepthWriteMask", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 23, depth_write_mask_values }, -- { "DepthFunc", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 24, comparison_values }, -- { "StencilEnable", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 1, 25, bool_values }, -- { "StencilReadMask", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT8, 1, 1, 26 }, -- { "StencilWriteMask", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT8, 1, 1, 27 }, -- { "FrontFaceStencilFail", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 28, stencil_op_values }, -- { "FrontFaceStencilDepthFail", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 29, stencil_op_values }, -- { "FrontFaceStencilPass", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 30, stencil_op_values }, -- { "FrontFaceStencilFunc", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 31, comparison_values }, -- { "BackFaceStencilFail", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 32, stencil_op_values }, -- { "BackFaceStencilDepthFail", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 33, stencil_op_values }, -- { "BackFaceStencilPass", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 34, stencil_op_values }, -- { "BackFaceStencilFunc", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 35, comparison_values }, -- -- { "Filter", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 45, filter_values }, -- { "AddressU", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 46, address_values }, -- { "AddressV", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 47, address_values }, -- { "AddressW", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 48, address_values }, -- { "MipLODBias", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 49 }, -- { "MaxAnisotropy", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 50 }, -- { "ComparisonFunc", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 51, compare_func_values }, -- { "BorderColor", HLSL_CLASS_SAMPLER, HLSL_CLASS_VECTOR, FX_FLOAT, 4, 1, 52 }, -- { "MinLOD", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 53 }, -- { "MaxLOD", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 54 }, -- { "Texture", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_TEXTURE, 1, 1, 55, null_values }, -- -- { "HullShader", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_HULLSHADER, 1, 1, 56 }, -- { "DomainShader", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_DOMAINSHADER, 1, 1, 57 }, -- { "ComputeShader", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_COMPUTESHADER, 1, 1, 58 }, -- }; -+static const struct rhs_named_value null_values[] = -+{ -+ { "NULL", 0 }, -+ { NULL } -+}; - -- static const struct state fx_4_blend_states[] = -- { -- { "AlphaToCoverageEnable", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 1, 36, bool_values }, -- { "BlendEnable", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 8, 37, bool_values }, -- { "SrcBlend", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 38, blend_values }, -- { "DestBlend", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 39, blend_values }, -- { "BlendOp", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 40, blendop_values }, -- { "SrcBlendAlpha", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 41, blend_values }, -- { "DestBlendAlpha", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 42, blend_values }, -- { "BlendOpAlpha", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 43, blendop_values }, -- { "RenderTargetWriteMask", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_UINT8, 1, 8, 44 }, -- }; -+static const struct fx_4_state -+{ -+ const char *name; -+ enum hlsl_type_class container; -+ enum hlsl_type_class class; -+ enum state_property_component_type type; -+ unsigned int dimx; -+ unsigned int array_size; -+ int id; -+ const struct rhs_named_value *values; -+} -+fx_4_states[] = -+{ -+ { "RasterizerState", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_RASTERIZER, 1, 1, 0 }, -+ { "DepthStencilState", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_DEPTHSTENCIL, 1, 1, 1 }, -+ { "BlendState", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_BLEND, 1, 1, 2 }, -+ { "RenderTargetView", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_RENDERTARGETVIEW, 1, 8, 3 }, -+ { "DepthStencilView", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_DEPTHSTENCILVIEW, 1, 1, 4 }, -+ -+ { "VertexShader", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_VERTEXSHADER, 1, 1, 6 }, -+ { "PixelShader", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_PIXELSHADER, 1, 1, 7 }, -+ { "DS_StencilRef", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 9 }, -+ { "AB_BlendFactor", HLSL_CLASS_PASS, HLSL_CLASS_VECTOR, FX_FLOAT, 4, 1, 10 }, -+ { "AB_SampleMask", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 11 }, -+ -+ { "FillMode", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 12, fill_values }, -+ { "CullMode", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 13, cull_values }, -+ { "FrontCounterClockwise", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 1, 14, bool_values }, -+ { "DepthBias", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 15 }, -+ { "DepthBiasClamp", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 16 }, -+ { "SlopeScaledDepthBias", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 17 }, -+ { "DepthClipEnable", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 1, 18, bool_values }, -+ { "ScissorEnable", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 1, 19, bool_values }, -+ { "MultisampleEnable", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 1, 20, bool_values }, -+ { "AntializedLineEnable", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 1, 21, bool_values }, -+ -+ { "DepthEnable", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 1, 22, bool_values }, -+ { "DepthWriteMask", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 23, depth_write_mask_values }, -+ { "DepthFunc", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 24, comparison_values }, -+ { "StencilEnable", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 1, 25, bool_values }, -+ { "StencilReadMask", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT8, 1, 1, 26 }, -+ { "StencilWriteMask", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT8, 1, 1, 27 }, -+ { "FrontFaceStencilFail", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 28, stencil_op_values }, -+ { "FrontFaceStencilDepthFail", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 29, stencil_op_values }, -+ { "FrontFaceStencilPass", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 30, stencil_op_values }, -+ { "FrontFaceStencilFunc", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 31, comparison_values }, -+ { "BackFaceStencilFail", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 32, stencil_op_values }, -+ { "BackFaceStencilDepthFail", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 33, stencil_op_values }, -+ { "BackFaceStencilPass", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 34, stencil_op_values }, -+ { "BackFaceStencilFunc", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 35, comparison_values }, -+ -+ { "AlphaToCoverageEnable", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 1, 36, bool_values }, -+ { "BlendEnable", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 8, 37, bool_values }, -+ { "SrcBlend", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 38, blend_values }, -+ { "DestBlend", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 39, blend_values }, -+ { "BlendOp", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 40, blendop_values }, -+ { "SrcBlendAlpha", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 41, blend_values }, -+ { "DestBlendAlpha", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 42, blend_values }, -+ { "BlendOpAlpha", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 43, blendop_values }, -+ { "RenderTargetWriteMask", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_UINT8, 1, 8, 44 }, -+ -+ { "Filter", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 45, filter_values }, -+ { "AddressU", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 46, address_values }, -+ { "AddressV", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 47, address_values }, -+ { "AddressW", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 48, address_values }, -+ { "MipLODBias", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 49 }, -+ { "MaxAnisotropy", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 50 }, -+ { "ComparisonFunc", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 51, compare_func_values }, -+ { "BorderColor", HLSL_CLASS_SAMPLER, HLSL_CLASS_VECTOR, FX_FLOAT, 4, 1, 52 }, -+ { "MinLOD", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 53 }, -+ { "MaxLOD", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 54 }, -+ { "Texture", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_TEXTURE, 1, 1, 55, null_values }, -+ -+ { "HullShader", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_HULLSHADER, 1, 1, 56 }, -+ { "DomainShader", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_DOMAINSHADER, 1, 1, 57 }, -+ { "ComputeShader", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_COMPUTESHADER, 1, 1, 58 }, -+}; - -- static const struct state fx_5_blend_states[] = -+static void resolve_fx_4_state_block_values(struct hlsl_ir_var *var, struct hlsl_state_block_entry *entry, -+ struct fx_write_context *fx) -+{ -+ static const struct fx_4_state fx_5_blend_states[] = - { - { "AlphaToCoverageEnable", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 1, 36, bool_values }, - { "BlendEnable", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 8, 37, bool_values }, -@@ -1883,36 +2142,28 @@ static void resolve_fx_4_state_block_values(struct hlsl_ir_var *var, struct hlsl - - struct state_table - { -- const struct state *ptr; -+ const struct fx_4_state *ptr; - unsigned int count; - } table; - - const struct hlsl_type *type = hlsl_get_multiarray_element_type(var->data_type); - struct replace_state_context replace_context; -+ const struct fx_4_state *state = NULL; - struct hlsl_type *state_type = NULL; - struct hlsl_ir_node *node, *cast; -- const struct state *state = NULL; - struct hlsl_ctx *ctx = fx->ctx; - enum hlsl_base_type base_type; - unsigned int i; - -- if (type->class == HLSL_CLASS_BLEND_STATE) -+ if (type->class == HLSL_CLASS_BLEND_STATE && ctx->profile->major_version == 5) - { -- if (ctx->profile->major_version == 4) -- { -- table.ptr = fx_4_blend_states; -- table.count = ARRAY_SIZE(fx_4_blend_states); -- } -- else -- { -- table.ptr = fx_5_blend_states; -- table.count = ARRAY_SIZE(fx_5_blend_states); -- } -+ table.ptr = fx_5_blend_states; -+ table.count = ARRAY_SIZE(fx_5_blend_states); - } - else - { -- table.ptr = states; -- table.count = ARRAY_SIZE(states); -+ table.ptr = fx_4_states; -+ table.count = ARRAY_SIZE(fx_4_states); - } - - for (i = 0; i < table.count; ++i) -@@ -2118,7 +2369,7 @@ static unsigned int decompose_fx_4_state_function_call(struct hlsl_ir_var *var, - const struct function_component *comp = &components[i]; - unsigned int arg_index = (i + 1) % entry->args_count; - block->entries[entry_index + i] = clone_stateblock_entry(ctx, entry, comp->name, -- comp->lhs_has_index, comp->lhs_index, arg_index); -+ comp->lhs_has_index, comp->lhs_index, true, arg_index); - } - hlsl_free_state_block_entry(entry); - -@@ -2126,7 +2377,7 @@ static unsigned int decompose_fx_4_state_function_call(struct hlsl_ir_var *var, - } - - /* For some states assignment sets all of the elements. This behaviour is limited to certain states of BlendState -- object, and only when fx_5_0 profile is used. */ -+ object, and only when fx_4_1 or fx_5_0 profile is used. */ - static unsigned int decompose_fx_4_state_block_expand_array(struct hlsl_ir_var *var, struct hlsl_state_block *block, - unsigned int entry_index, struct fx_write_context *fx) - { -@@ -2140,7 +2391,7 @@ static unsigned int decompose_fx_4_state_block_expand_array(struct hlsl_ir_var * - - if (type->class != HLSL_CLASS_BLEND_STATE) - return 1; -- if (ctx->profile->major_version != 5) -+ if (hlsl_version_lt(ctx, 4, 1)) - return 1; - if (entry->lhs_has_index) - return 1; -@@ -2164,7 +2415,7 @@ static unsigned int decompose_fx_4_state_block_expand_array(struct hlsl_ir_var * - for (i = 1; i < array_size; ++i) - { - block->entries[entry_index + i] = clone_stateblock_entry(ctx, entry, -- entry->name, true, i, 0); -+ entry->name, true, i, true, 0); - } - - return array_size; -@@ -2401,6 +2652,9 @@ static void write_fx_4_buffer(struct hlsl_buffer *b, struct fx_write_context *fx - size = 0; - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) - { -+ if (!is_numeric_fx_4_type(var->data_type)) -+ continue; -+ - if (var->buffer != b) - continue; - -@@ -2629,3 +2883,949 @@ int hlsl_emit_effect_binary(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) - vkd3d_unreachable(); - } - } -+ -+struct fx_parser -+{ -+ const uint8_t *ptr, *start, *end; -+ struct vkd3d_shader_message_context *message_context; -+ struct vkd3d_string_buffer buffer; -+ unsigned int indent; -+ unsigned int version; -+ struct -+ { -+ const uint8_t *ptr; -+ const uint8_t *end; -+ uint32_t size; -+ } unstructured; -+ uint32_t buffer_count; -+ uint32_t object_count; -+ uint32_t group_count; -+ bool failed; -+}; -+ -+static uint32_t fx_parser_read_u32(struct fx_parser *parser) -+{ -+ uint32_t ret; -+ -+ if ((parser->end - parser->ptr) < sizeof(uint32_t)) -+ { -+ parser->failed = true; -+ return 0; -+ } -+ -+ ret = *(uint32_t *)parser->ptr; -+ parser->ptr += sizeof(uint32_t); -+ -+ return ret; -+} -+ -+static void fx_parser_read_u32s(struct fx_parser *parser, void *dst, size_t size) -+{ -+ uint32_t *ptr = dst; -+ size_t i; -+ -+ for (i = 0; i < size / sizeof(uint32_t); ++i) -+ ptr[i] = fx_parser_read_u32(parser); -+} -+ -+static void fx_parser_skip(struct fx_parser *parser, size_t size) -+{ -+ if ((parser->end - parser->ptr) < size) -+ { -+ parser->ptr = parser->end; -+ parser->failed = true; -+ return; -+ } -+ parser->ptr += size; -+} -+ -+static void VKD3D_PRINTF_FUNC(3, 4) fx_parser_error(struct fx_parser *parser, enum vkd3d_shader_error error, -+ const char *format, ...) -+{ -+ va_list args; -+ -+ va_start(args, format); -+ vkd3d_shader_verror(parser->message_context, NULL, error, format, args); -+ va_end(args); -+ -+ parser->failed = true; -+} -+ -+static int fx_2_parse(struct fx_parser *parser) -+{ -+ fx_parser_error(parser, VKD3D_SHADER_ERROR_FX_NOT_IMPLEMENTED, "Parsing fx_2_0 binaries is not implemented.\n"); -+ -+ return -1; -+} -+ -+static const void *fx_parser_get_unstructured_ptr(struct fx_parser *parser, uint32_t offset, size_t size) -+{ -+ const uint8_t *ptr = parser->unstructured.ptr; -+ -+ if (offset >= parser->unstructured.size -+ || size > parser->unstructured.size - offset) -+ { -+ parser->failed = true; -+ return NULL; -+ } -+ -+ return &ptr[offset]; -+} -+ -+static void fx_parser_read_unstructured(struct fx_parser *parser, void *dst, uint32_t offset, size_t size) -+{ -+ const uint8_t *ptr; -+ -+ memset(dst, 0, size); -+ if (!(ptr = fx_parser_get_unstructured_ptr(parser, offset, size))) -+ return; -+ -+ memcpy(dst, ptr, size); -+} -+ -+static const char *fx_4_get_string(struct fx_parser *parser, uint32_t offset) -+{ -+ const uint8_t *ptr = parser->unstructured.ptr; -+ const uint8_t *end = parser->unstructured.end; -+ -+ if (offset >= parser->unstructured.size) -+ { -+ parser->failed = true; -+ return "<invalid>"; -+ } -+ -+ ptr += offset; -+ -+ while (ptr < end && *ptr) -+ ++ptr; -+ -+ if (*ptr) -+ { -+ parser->failed = true; -+ return "<invalid>"; -+ } -+ -+ return (const char *)(parser->unstructured.ptr + offset); -+} -+ -+static void parse_fx_start_indent(struct fx_parser *parser) -+{ -+ ++parser->indent; -+} -+ -+static void parse_fx_end_indent(struct fx_parser *parser) -+{ -+ --parser->indent; -+} -+ -+static void parse_fx_print_indent(struct fx_parser *parser) -+{ -+ vkd3d_string_buffer_printf(&parser->buffer, "%*s", 4 * parser->indent, ""); -+} -+ -+static void parse_fx_4_numeric_value(struct fx_parser *parser, uint32_t offset, -+ const struct fx_4_binary_type *type) -+{ -+ unsigned int base_type, comp_count; -+ size_t i; -+ -+ base_type = (type->typeinfo >> FX_4_NUMERIC_BASE_TYPE_SHIFT) & 0xf; -+ -+ comp_count = type->packed_size / sizeof(uint32_t); -+ for (i = 0; i < comp_count; ++i) -+ { -+ union hlsl_constant_value_component value; -+ -+ fx_parser_read_unstructured(parser, &value, offset, sizeof(uint32_t)); -+ -+ if (base_type == FX_4_NUMERIC_TYPE_FLOAT) -+ vkd3d_string_buffer_printf(&parser->buffer, "%f", value.f); -+ else if (base_type == FX_4_NUMERIC_TYPE_INT) -+ vkd3d_string_buffer_printf(&parser->buffer, "%d", value.i); -+ else if (base_type == FX_4_NUMERIC_TYPE_UINT) -+ vkd3d_string_buffer_printf(&parser->buffer, "%u", value.u); -+ else if (base_type == FX_4_NUMERIC_TYPE_BOOL) -+ vkd3d_string_buffer_printf(&parser->buffer, "%s", value.u ? "true" : "false" ); -+ else -+ vkd3d_string_buffer_printf(&parser->buffer, "%#x", value.u); -+ -+ if (i < comp_count - 1) -+ vkd3d_string_buffer_printf(&parser->buffer, ", "); -+ -+ offset += sizeof(uint32_t); -+ } -+} -+ -+static void fx_4_parse_string_initializer(struct fx_parser *parser, uint32_t offset) -+{ -+ const char *str = fx_4_get_string(parser, offset); -+ vkd3d_string_buffer_printf(&parser->buffer, "\"%s\"", str); -+} -+ -+static void fx_parse_fx_4_annotations(struct fx_parser *parser) -+{ -+ struct fx_4_annotation -+ { -+ uint32_t name; -+ uint32_t type; -+ } var; -+ struct fx_4_binary_type type; -+ const char *name, *type_name; -+ uint32_t count, i, value; -+ -+ if (parser->failed) -+ return; -+ -+ count = fx_parser_read_u32(parser); -+ -+ if (!count) -+ return; -+ -+ vkd3d_string_buffer_printf(&parser->buffer, "\n"); -+ parse_fx_print_indent(parser); -+ vkd3d_string_buffer_printf(&parser->buffer, "<\n"); -+ parse_fx_start_indent(parser); -+ -+ for (i = 0; i < count; ++i) -+ { -+ fx_parser_read_u32s(parser, &var, sizeof(var)); -+ fx_parser_read_unstructured(parser, &type, var.type, sizeof(type)); -+ -+ name = fx_4_get_string(parser, var.name); -+ type_name = fx_4_get_string(parser, type.name); -+ -+ parse_fx_print_indent(parser); -+ vkd3d_string_buffer_printf(&parser->buffer, "%s %s", type_name, name); -+ if (type.element_count) -+ vkd3d_string_buffer_printf(&parser->buffer, "[%u]", type.element_count); -+ vkd3d_string_buffer_printf(&parser->buffer, " = "); -+ if (type.element_count) -+ vkd3d_string_buffer_printf(&parser->buffer, "{ "); -+ -+ if (type.class == FX_4_TYPE_CLASS_NUMERIC) -+ { -+ value = fx_parser_read_u32(parser); -+ parse_fx_4_numeric_value(parser, value, &type); -+ } -+ else if (type.class == FX_4_TYPE_CLASS_OBJECT && type.typeinfo == FX_4_OBJECT_TYPE_STRING) -+ { -+ uint32_t element_count = max(type.element_count, 1); -+ -+ for (uint32_t j = 0; j < element_count; ++j) -+ { -+ value = fx_parser_read_u32(parser); -+ fx_4_parse_string_initializer(parser, value); -+ if (j < element_count - 1) -+ vkd3d_string_buffer_printf(&parser->buffer, ", "); -+ } -+ } -+ else -+ { -+ fx_parser_error(parser, VKD3D_SHADER_ERROR_FX_INVALID_DATA, -+ "Only numeric and string types are supported in annotations.\n"); -+ } -+ -+ if (type.element_count) -+ vkd3d_string_buffer_printf(&parser->buffer, " }"); -+ vkd3d_string_buffer_printf(&parser->buffer, ";\n"); -+ } -+ parse_fx_end_indent(parser); -+ -+ parse_fx_print_indent(parser); -+ vkd3d_string_buffer_printf(&parser->buffer, ">"); -+} -+ -+static void fx_parse_fx_4_numeric_variables(struct fx_parser *parser, uint32_t count) -+{ -+ struct fx_4_numeric_variable -+ { -+ uint32_t name; -+ uint32_t type; -+ uint32_t semantic; -+ uint32_t offset; -+ uint32_t value; -+ uint32_t flags; -+ } var; -+ const char *name, *semantic, *type_name; -+ struct fx_4_binary_type type; -+ uint32_t i; -+ -+ for (i = 0; i < count; ++i) -+ { -+ fx_parser_read_u32s(parser, &var, sizeof(var)); -+ fx_parser_read_unstructured(parser, &type, var.type, sizeof(type)); -+ -+ name = fx_4_get_string(parser, var.name); -+ type_name = fx_4_get_string(parser, type.name); -+ -+ vkd3d_string_buffer_printf(&parser->buffer, " %s %s", type_name, name); -+ if (type.element_count) -+ vkd3d_string_buffer_printf(&parser->buffer, "[%u]", type.element_count); -+ -+ if (var.semantic) -+ { -+ semantic = fx_4_get_string(parser, var.semantic); -+ vkd3d_string_buffer_printf(&parser->buffer, " : %s", semantic); -+ } -+ fx_parse_fx_4_annotations(parser); -+ -+ if (var.value) -+ { -+ vkd3d_string_buffer_printf(&parser->buffer, " = { "); -+ parse_fx_4_numeric_value(parser, var.value, &type); -+ vkd3d_string_buffer_printf(&parser->buffer, " }"); -+ } -+ vkd3d_string_buffer_printf(&parser->buffer, "; // Offset: %u, size %u.\n", var.offset, type.unpacked_size); -+ } -+} -+ -+static void fx_parse_buffers(struct fx_parser *parser) -+{ -+ struct fx_buffer -+ { -+ uint32_t name; -+ uint32_t size; -+ uint32_t flags; -+ uint32_t count; -+ uint32_t bind_point; -+ } buffer; -+ const char *name; -+ uint32_t i; -+ -+ if (parser->failed) -+ return; -+ -+ for (i = 0; i < parser->buffer_count; ++i) -+ { -+ fx_parser_read_u32s(parser, &buffer, sizeof(buffer)); -+ -+ name = fx_4_get_string(parser, buffer.name); -+ -+ vkd3d_string_buffer_printf(&parser->buffer, "cbuffer %s", name); -+ fx_parse_fx_4_annotations(parser); -+ -+ vkd3d_string_buffer_printf(&parser->buffer, "\n{\n"); -+ parse_fx_start_indent(parser); -+ fx_parse_fx_4_numeric_variables(parser, buffer.count); -+ parse_fx_end_indent(parser); -+ vkd3d_string_buffer_printf(&parser->buffer, "}\n\n"); -+ } -+} -+ -+static void fx_4_parse_shader_initializer(struct fx_parser *parser, unsigned int object_type) -+{ -+ struct vkd3d_shader_compile_info info = { 0 }; -+ struct vkd3d_shader_code output; -+ uint32_t data_size, offset; -+ const void *data = NULL; -+ const char *p, *q, *end; -+ struct fx_5_shader -+ { -+ uint32_t offset; -+ uint32_t sodecl[4]; -+ uint32_t sodecl_count; -+ uint32_t rast_stream; -+ uint32_t iface_bindings_count; -+ uint32_t iface_bindings; -+ } shader5; -+ struct fx_4_gs_so -+ { -+ uint32_t offset; -+ uint32_t sodecl; -+ } gs_so; -+ int ret; -+ -+ static const struct vkd3d_shader_compile_option options[] = -+ { -+ {VKD3D_SHADER_COMPILE_OPTION_API_VERSION, VKD3D_SHADER_API_VERSION_1_14}, -+ }; -+ -+ switch (object_type) -+ { -+ case FX_4_OBJECT_TYPE_PIXEL_SHADER: -+ case FX_4_OBJECT_TYPE_VERTEX_SHADER: -+ case FX_4_OBJECT_TYPE_GEOMETRY_SHADER: -+ offset = fx_parser_read_u32(parser); -+ break; -+ -+ case FX_4_OBJECT_TYPE_GEOMETRY_SHADER_SO: -+ fx_parser_read_u32s(parser, &gs_so, sizeof(gs_so)); -+ offset = gs_so.offset; -+ break; -+ -+ case FX_5_OBJECT_TYPE_GEOMETRY_SHADER: -+ case FX_5_OBJECT_TYPE_COMPUTE_SHADER: -+ case FX_5_OBJECT_TYPE_HULL_SHADER: -+ case FX_5_OBJECT_TYPE_DOMAIN_SHADER: -+ fx_parser_read_u32s(parser, &shader5, sizeof(shader5)); -+ offset = shader5.offset; -+ break; -+ -+ default: -+ parser->failed = true; -+ return; -+ } -+ -+ fx_parser_read_unstructured(parser, &data_size, offset, sizeof(data_size)); -+ if (data_size) -+ data = fx_parser_get_unstructured_ptr(parser, offset + 4, data_size); -+ -+ if (!data) -+ return; -+ -+ info.type = VKD3D_SHADER_STRUCTURE_TYPE_COMPILE_INFO; -+ info.source.code = data; -+ info.source.size = data_size; -+ info.source_type = VKD3D_SHADER_SOURCE_DXBC_TPF; -+ info.target_type = VKD3D_SHADER_TARGET_D3D_ASM; -+ info.options = options; -+ info.option_count = ARRAY_SIZE(options); -+ info.log_level = VKD3D_SHADER_LOG_INFO; -+ -+ if ((ret = vkd3d_shader_compile(&info, &output, NULL)) < 0) -+ { -+ fx_parser_error(parser, VKD3D_SHADER_ERROR_FX_INVALID_DATA, -+ "Failed to disassemble shader blob.\n"); -+ return; -+ } -+ parse_fx_print_indent(parser); -+ vkd3d_string_buffer_printf(&parser->buffer, "asm {\n"); -+ -+ parse_fx_start_indent(parser); -+ -+ end = (const char *)output.code + output.size; -+ for (p = output.code; p < end; p = q) -+ { -+ if (!(q = memchr(p, '\n', end - p))) -+ q = end; -+ else -+ ++q; -+ -+ parse_fx_print_indent(parser); -+ vkd3d_string_buffer_printf(&parser->buffer, "%.*s", (int)(q - p), p); -+ } -+ -+ parse_fx_end_indent(parser); -+ -+ parse_fx_print_indent(parser); -+ vkd3d_string_buffer_printf(&parser->buffer, "}"); -+ if (object_type == FX_4_OBJECT_TYPE_GEOMETRY_SHADER_SO && gs_so.sodecl) -+ { -+ vkd3d_string_buffer_printf(&parser->buffer, "\n/* Stream output declaration: \"%s\" */", -+ fx_4_get_string(parser, gs_so.sodecl)); -+ } -+ else if (object_type == FX_5_OBJECT_TYPE_GEOMETRY_SHADER) -+ { -+ for (unsigned int i = 0; i < ARRAY_SIZE(shader5.sodecl); ++i) -+ { -+ if (shader5.sodecl[i]) -+ vkd3d_string_buffer_printf(&parser->buffer, "\n/* Stream output %u declaration: \"%s\" */", -+ i, fx_4_get_string(parser, shader5.sodecl[i])); -+ } -+ if (shader5.sodecl_count) -+ vkd3d_string_buffer_printf(&parser->buffer, "\n/* Rasterized stream %u */", shader5.rast_stream); -+ } -+ -+ vkd3d_shader_free_shader_code(&output); -+} -+ -+static bool fx_4_object_has_initializer(const struct fx_4_binary_type *type) -+{ -+ switch (type->typeinfo) -+ { -+ case FX_4_OBJECT_TYPE_STRING: -+ case FX_4_OBJECT_TYPE_BLEND_STATE: -+ case FX_4_OBJECT_TYPE_DEPTH_STENCIL_STATE: -+ case FX_4_OBJECT_TYPE_RASTERIZER_STATE: -+ case FX_4_OBJECT_TYPE_SAMPLER_STATE: -+ case FX_4_OBJECT_TYPE_PIXEL_SHADER: -+ case FX_4_OBJECT_TYPE_VERTEX_SHADER: -+ case FX_4_OBJECT_TYPE_GEOMETRY_SHADER: -+ case FX_4_OBJECT_TYPE_GEOMETRY_SHADER_SO: -+ case FX_5_OBJECT_TYPE_GEOMETRY_SHADER: -+ case FX_5_OBJECT_TYPE_COMPUTE_SHADER: -+ case FX_5_OBJECT_TYPE_HULL_SHADER: -+ case FX_5_OBJECT_TYPE_DOMAIN_SHADER: -+ return true; -+ default: -+ return false; -+ } -+} -+ -+static int fx_4_state_id_compare(const void *a, const void *b) -+{ -+ const struct fx_4_state *state = b; -+ int id = *(int *)a; -+ -+ return id - state->id; -+} -+ -+static void fx_4_parse_state_object_initializer(struct fx_parser *parser, uint32_t count, -+ enum hlsl_type_class type_class) -+{ -+ struct fx_4_assignment -+ { -+ uint32_t id; -+ uint32_t lhs_index; -+ uint32_t type; -+ uint32_t value; -+ } entry; -+ struct -+ { -+ uint32_t name; -+ uint32_t index; -+ } index; -+ struct -+ { -+ uint32_t type; -+ union -+ { -+ uint32_t u; -+ float f; -+ }; -+ } value; -+ static const char *value_types[FX_COMPONENT_TYPE_COUNT] = -+ { -+ [FX_BOOL] = "bool", -+ [FX_FLOAT] = "float", -+ [FX_UINT] = "uint", -+ [FX_UINT8] = "byte", -+ }; -+ const struct rhs_named_value *named_value; -+ uint32_t i, j, comp_count; -+ struct fx_4_state *state; -+ -+ for (i = 0; i < count; ++i) -+ { -+ fx_parser_read_u32s(parser, &entry, sizeof(entry)); -+ -+ if (!(state = bsearch(&entry.id, fx_4_states, ARRAY_SIZE(fx_4_states), -+ sizeof(*fx_4_states), fx_4_state_id_compare))) -+ { -+ fx_parser_error(parser, VKD3D_SHADER_ERROR_FX_INVALID_DATA, "Unrecognized state id %#x.\n", entry.id); -+ break; -+ } -+ -+ if (state->container != type_class) -+ { -+ fx_parser_error(parser, VKD3D_SHADER_ERROR_FX_INVALID_DATA, -+ "State '%s' does not belong to object type class %#x.", state->name, type_class); -+ break; -+ } -+ -+ parse_fx_print_indent(parser); -+ vkd3d_string_buffer_printf(&parser->buffer, "%s", state->name); -+ if (state->array_size > 1) -+ vkd3d_string_buffer_printf(&parser->buffer, "[%u]", entry.lhs_index); -+ vkd3d_string_buffer_printf(&parser->buffer, " = "); -+ -+ switch (entry.type) -+ { -+ case FX_4_ASSIGNMENT_CONSTANT: -+ -+ if (value_types[state->type]) -+ vkd3d_string_buffer_printf(&parser->buffer, "%s", value_types[state->type]); -+ if (state->dimx > 1) -+ vkd3d_string_buffer_printf(&parser->buffer, "%u", state->dimx); -+ vkd3d_string_buffer_printf(&parser->buffer, "("); -+ -+ fx_parser_read_unstructured(parser, &comp_count, entry.value, sizeof(uint32_t)); -+ -+ named_value = NULL; -+ if (comp_count == 1 && state->values && (state->type == FX_UINT || state->type == FX_BOOL)) -+ { -+ const struct rhs_named_value *ptr = state->values; -+ -+ fx_parser_read_unstructured(parser, &value, entry.value + 4, sizeof(value)); -+ -+ while (ptr->name) -+ { -+ if (value.u == ptr->value) -+ { -+ named_value = ptr; -+ break; -+ } -+ ++ptr; -+ } -+ } -+ -+ if (named_value) -+ { -+ vkd3d_string_buffer_printf(&parser->buffer, "%s /* %u */", named_value->name, named_value->value); -+ } -+ else -+ { -+ uint32_t offset = entry.value + 4; -+ -+ for (j = 0; j < comp_count; ++j, offset += sizeof(value)) -+ { -+ fx_parser_read_unstructured(parser, &value, offset, sizeof(value)); -+ -+ if (state->type == FX_UINT8) -+ vkd3d_string_buffer_printf(&parser->buffer, "0x%.2x", value.u); -+ else if (state->type == FX_UINT) -+ vkd3d_string_buffer_printf(&parser->buffer, "%u", value.u); -+ else if (state->type == FX_FLOAT) -+ vkd3d_string_buffer_printf(&parser->buffer, "%g", value.f); -+ -+ if (comp_count > 1 && j < comp_count - 1) -+ vkd3d_string_buffer_printf(&parser->buffer, ", "); -+ } -+ } -+ -+ vkd3d_string_buffer_printf(&parser->buffer, ")"); -+ -+ break; -+ case FX_4_ASSIGNMENT_VARIABLE: -+ vkd3d_string_buffer_printf(&parser->buffer, "%s", fx_4_get_string(parser, entry.value)); -+ break; -+ case FX_4_ASSIGNMENT_ARRAY_CONSTANT_INDEX: -+ fx_parser_read_unstructured(parser, &index, entry.value, sizeof(index)); -+ vkd3d_string_buffer_printf(&parser->buffer, "%s[%u]", fx_4_get_string(parser, index.name), index.index); -+ break; -+ case FX_4_ASSIGNMENT_ARRAY_VARIABLE_INDEX: -+ fx_parser_read_unstructured(parser, &index, entry.value, sizeof(index)); -+ vkd3d_string_buffer_printf(&parser->buffer, "%s[%s]", fx_4_get_string(parser, index.name), -+ fx_4_get_string(parser, index.index)); -+ break; -+ default: -+ fx_parser_error(parser, VKD3D_SHADER_ERROR_FX_NOT_IMPLEMENTED, -+ "Unsupported assignment type %u.\n", entry.type); -+ } -+ vkd3d_string_buffer_printf(&parser->buffer, ";\n"); -+ } -+} -+ -+static void fx_4_parse_object_initializer(struct fx_parser *parser, const struct fx_4_binary_type *type) -+{ -+ static const enum hlsl_type_class type_classes[] = -+ { -+ [FX_4_OBJECT_TYPE_BLEND_STATE] = HLSL_CLASS_BLEND_STATE, -+ [FX_4_OBJECT_TYPE_DEPTH_STENCIL_STATE] = HLSL_CLASS_DEPTH_STENCIL_STATE, -+ [FX_4_OBJECT_TYPE_RASTERIZER_STATE] = HLSL_CLASS_RASTERIZER_STATE, -+ [FX_4_OBJECT_TYPE_SAMPLER_STATE] = HLSL_CLASS_SAMPLER, -+ }; -+ unsigned int i, element_count, count; -+ uint32_t value; -+ -+ if (!fx_4_object_has_initializer(type)) -+ return; -+ -+ vkd3d_string_buffer_printf(&parser->buffer, " = {\n"); -+ element_count = max(type->element_count, 1); -+ for (i = 0; i < element_count; ++i) -+ { -+ switch (type->typeinfo) -+ { -+ case FX_4_OBJECT_TYPE_STRING: -+ vkd3d_string_buffer_printf(&parser->buffer, " "); -+ value = fx_parser_read_u32(parser); -+ fx_4_parse_string_initializer(parser, value); -+ break; -+ case FX_4_OBJECT_TYPE_BLEND_STATE: -+ case FX_4_OBJECT_TYPE_DEPTH_STENCIL_STATE: -+ case FX_4_OBJECT_TYPE_RASTERIZER_STATE: -+ case FX_4_OBJECT_TYPE_SAMPLER_STATE: -+ count = fx_parser_read_u32(parser); -+ -+ parse_fx_start_indent(parser); -+ fx_4_parse_state_object_initializer(parser, count, type_classes[type->typeinfo]); -+ parse_fx_end_indent(parser); -+ break; -+ case FX_4_OBJECT_TYPE_PIXEL_SHADER: -+ case FX_4_OBJECT_TYPE_VERTEX_SHADER: -+ case FX_4_OBJECT_TYPE_GEOMETRY_SHADER: -+ case FX_4_OBJECT_TYPE_GEOMETRY_SHADER_SO: -+ case FX_5_OBJECT_TYPE_GEOMETRY_SHADER: -+ case FX_5_OBJECT_TYPE_COMPUTE_SHADER: -+ case FX_5_OBJECT_TYPE_HULL_SHADER: -+ case FX_5_OBJECT_TYPE_DOMAIN_SHADER: -+ parse_fx_start_indent(parser); -+ fx_4_parse_shader_initializer(parser, type->typeinfo); -+ parse_fx_end_indent(parser); -+ break; -+ default: -+ fx_parser_error(parser, VKD3D_SHADER_ERROR_FX_NOT_IMPLEMENTED, -+ "Parsing object type %u is not implemented.", type->typeinfo); -+ return; -+ } -+ vkd3d_string_buffer_printf(&parser->buffer, ",\n"); -+ } -+ vkd3d_string_buffer_printf(&parser->buffer, "}"); -+} -+ -+static void fx_4_parse_objects(struct fx_parser *parser) -+{ -+ struct fx_4_object_variable -+ { -+ uint32_t name; -+ uint32_t type; -+ uint32_t semantic; -+ uint32_t bind_point; -+ } var; -+ struct fx_4_binary_type type; -+ const char *name, *type_name; -+ uint32_t i; -+ -+ if (parser->failed) -+ return; -+ -+ for (i = 0; i < parser->object_count; ++i) -+ { -+ if (parser->failed) -+ return; -+ -+ fx_parser_read_u32s(parser, &var, sizeof(var)); -+ fx_parser_read_unstructured(parser, &type, var.type, sizeof(type)); -+ -+ name = fx_4_get_string(parser, var.name); -+ type_name = fx_4_get_string(parser, type.name); -+ vkd3d_string_buffer_printf(&parser->buffer, "%s %s", type_name, name); -+ if (type.element_count) -+ vkd3d_string_buffer_printf(&parser->buffer, "[%u]", type.element_count); -+ -+ fx_4_parse_object_initializer(parser, &type); -+ vkd3d_string_buffer_printf(&parser->buffer, ";\n"); -+ -+ fx_parse_fx_4_annotations(parser); -+ } -+} -+ -+static void fx_parse_fx_4_technique(struct fx_parser *parser) -+{ -+ struct fx_technique -+ { -+ uint32_t name; -+ uint32_t count; -+ } technique; -+ struct fx_pass -+ { -+ uint32_t name; -+ uint32_t count; -+ } pass; -+ const char *name; -+ uint32_t i; -+ -+ if (parser->failed) -+ return; -+ -+ fx_parser_read_u32s(parser, &technique, sizeof(technique)); -+ -+ name = fx_4_get_string(parser, technique.name); -+ -+ parse_fx_print_indent(parser); -+ vkd3d_string_buffer_printf(&parser->buffer, "technique%u %s", parser->version, name); -+ fx_parse_fx_4_annotations(parser); -+ -+ vkd3d_string_buffer_printf(&parser->buffer, "\n"); -+ parse_fx_print_indent(parser); -+ vkd3d_string_buffer_printf(&parser->buffer, "{\n"); -+ -+ parse_fx_start_indent(parser); -+ for (i = 0; i < technique.count; ++i) -+ { -+ fx_parser_read_u32s(parser, &pass, sizeof(pass)); -+ name = fx_4_get_string(parser, pass.name); -+ -+ parse_fx_print_indent(parser); -+ vkd3d_string_buffer_printf(&parser->buffer, "pass %s", name); -+ fx_parse_fx_4_annotations(parser); -+ -+ vkd3d_string_buffer_printf(&parser->buffer, "\n"); -+ parse_fx_print_indent(parser); -+ vkd3d_string_buffer_printf(&parser->buffer, "{\n"); -+ -+ parse_fx_start_indent(parser); -+ fx_4_parse_state_object_initializer(parser, pass.count, HLSL_CLASS_PASS); -+ parse_fx_end_indent(parser); -+ -+ parse_fx_print_indent(parser); -+ vkd3d_string_buffer_printf(&parser->buffer, "}\n\n"); -+ } -+ -+ parse_fx_end_indent(parser); -+ -+ parse_fx_print_indent(parser); -+ vkd3d_string_buffer_printf(&parser->buffer, "}\n\n"); -+} -+ -+static void fx_parse_groups(struct fx_parser *parser) -+{ -+ struct fx_group -+ { -+ uint32_t name; -+ uint32_t count; -+ } group; -+ const char *name; -+ uint32_t i, j; -+ -+ if (parser->failed) -+ return; -+ -+ for (i = 0; i < parser->group_count; ++i) -+ { -+ fx_parser_read_u32s(parser, &group, sizeof(group)); -+ -+ name = fx_4_get_string(parser, group.name); -+ -+ vkd3d_string_buffer_printf(&parser->buffer, "fxgroup %s", name); -+ fx_parse_fx_4_annotations(parser); -+ -+ vkd3d_string_buffer_printf(&parser->buffer, "\n{\n"); -+ parse_fx_start_indent(parser); -+ -+ for (j = 0; j < group.count; ++j) -+ fx_parse_fx_4_technique(parser); -+ -+ parse_fx_end_indent(parser); -+ vkd3d_string_buffer_printf(&parser->buffer, "}\n\n"); -+ } -+} -+ -+static int fx_4_parse(struct fx_parser *parser) -+{ -+ struct fx_4_header -+ { -+ uint32_t version; -+ uint32_t buffer_count; -+ uint32_t numeric_variable_count; -+ uint32_t object_count; -+ uint32_t shared_buffer_count; -+ uint32_t shared_numeric_variable_count; -+ uint32_t shared_object_count; -+ uint32_t technique_count; -+ uint32_t unstructured_size; -+ uint32_t string_count; -+ uint32_t texture_count; -+ uint32_t depth_stencil_state_count; -+ uint32_t blend_state_count; -+ uint32_t rasterizer_state_count; -+ uint32_t sampler_state_count; -+ uint32_t rtv_count; -+ uint32_t dsv_count; -+ uint32_t shader_count; -+ uint32_t inline_shader_count; -+ } header; -+ uint32_t i; -+ -+ parser->version = 10; -+ fx_parser_read_u32s(parser, &header, sizeof(header)); -+ parser->buffer_count = header.buffer_count; -+ parser->object_count = header.object_count; -+ -+ if (parser->end - parser->ptr < header.unstructured_size) -+ { -+ parser->failed = true; -+ return -1; -+ } -+ -+ parser->unstructured.ptr = parser->ptr; -+ parser->unstructured.end = parser->ptr + header.unstructured_size; -+ parser->unstructured.size = header.unstructured_size; -+ fx_parser_skip(parser, header.unstructured_size); -+ -+ fx_parse_buffers(parser); -+ fx_4_parse_objects(parser); -+ -+ for (i = 0; i < header.technique_count; ++i) -+ fx_parse_fx_4_technique(parser); -+ -+ return parser->failed ? - 1 : 0; -+} -+ -+static int fx_5_parse(struct fx_parser *parser) -+{ -+ struct fx_5_header -+ { -+ uint32_t version; -+ uint32_t buffer_count; -+ uint32_t numeric_variable_count; -+ uint32_t object_count; -+ uint32_t shared_buffer_count; -+ uint32_t shared_numeric_variable_count; -+ uint32_t shared_object_count; -+ uint32_t technique_count; -+ uint32_t unstructured_size; -+ uint32_t string_count; -+ uint32_t texture_count; -+ uint32_t depth_stencil_state_count; -+ uint32_t blend_state_count; -+ uint32_t rasterizer_state_count; -+ uint32_t sampler_state_count; -+ uint32_t rtv_count; -+ uint32_t dsv_count; -+ uint32_t shader_count; -+ uint32_t inline_shader_count; -+ uint32_t group_count; -+ uint32_t uav_count; -+ uint32_t interface_variable_count; -+ uint32_t interface_variable_element_count; -+ uint32_t class_instance_element_count; -+ } header; -+ -+ parser->version = 11; -+ fx_parser_read_u32s(parser, &header, sizeof(header)); -+ parser->buffer_count = header.buffer_count; -+ parser->object_count = header.object_count; -+ parser->group_count = header.group_count; -+ -+ if (parser->end - parser->ptr < header.unstructured_size) -+ { -+ parser->failed = true; -+ return -1; -+ } -+ -+ parser->unstructured.ptr = parser->ptr; -+ parser->unstructured.end = parser->ptr + header.unstructured_size; -+ parser->unstructured.size = header.unstructured_size; -+ fx_parser_skip(parser, header.unstructured_size); -+ -+ fx_parse_buffers(parser); -+ fx_4_parse_objects(parser); -+ -+ fx_parse_groups(parser); -+ -+ return parser->failed ? - 1 : 0; -+} -+ -+int fx_parse(const struct vkd3d_shader_compile_info *compile_info, -+ struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context) -+{ -+ struct fx_parser parser = -+ { -+ .start = compile_info->source.code, -+ .ptr = compile_info->source.code, -+ .end = (uint8_t *)compile_info->source.code + compile_info->source.size, -+ .message_context = message_context, -+ }; -+ uint32_t version; -+ int ret; -+ -+ vkd3d_string_buffer_init(&parser.buffer); -+ -+ if (parser.end - parser.start < sizeof(version)) -+ return -1; -+ version = *(uint32_t *)parser.ptr; -+ -+ switch (version) -+ { -+ case 0xfeff0901: -+ ret = fx_2_parse(&parser); -+ break; -+ case 0xfeff1001: -+ case 0xfeff1011: -+ ret = fx_4_parse(&parser); -+ break; -+ case 0xfeff2001: -+ ret = fx_5_parse(&parser); -+ break; -+ default: -+ fx_parser_error(&parser, VKD3D_SHADER_ERROR_FX_INVALID_VERSION, -+ "Invalid effect binary version value 0x%08x.", version); -+ ret = -1; -+ } -+ -+ vkd3d_shader_code_from_string_buffer(out, &parser.buffer); -+ -+ return ret; -+} -diff --git a/libs/vkd3d/libs/vkd3d-shader/glsl.c b/libs/vkd3d/libs/vkd3d-shader/glsl.c -index d1f02ab568b..0df0e30f399 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/glsl.c -+++ b/libs/vkd3d/libs/vkd3d-shader/glsl.c -@@ -18,6 +18,19 @@ - - #include "vkd3d_shader_private.h" - -+struct glsl_resource_type_info -+{ -+ /* The number of coordinates needed to sample the resource type. */ -+ size_t coord_size; -+ /* Whether the resource type is an array type. */ -+ bool array; -+ /* Whether the resource type has a shadow/comparison variant. */ -+ bool shadow; -+ /* The type suffix for resource type. I.e., the "2D" part of "usampler2D" -+ * or "iimage2D". */ -+ const char *type_suffix; -+}; -+ - struct glsl_src - { - struct vkd3d_string_buffer *str; -@@ -38,9 +51,26 @@ struct vkd3d_glsl_generator - struct vkd3d_shader_location location; - struct vkd3d_shader_message_context *message_context; - unsigned int indent; -+ const char *prefix; - bool failed; -+ -+ struct shader_limits -+ { -+ unsigned int input_count; -+ unsigned int output_count; -+ } limits; -+ bool interstage_input; -+ bool interstage_output; -+ -+ const struct vkd3d_shader_interface_info *interface_info; -+ const struct vkd3d_shader_descriptor_offset_info *offset_info; -+ const struct vkd3d_shader_scan_descriptor_info1 *descriptor_info; -+ const struct vkd3d_shader_scan_combined_resource_sampler_info *combined_sampler_info; - }; - -+static void shader_glsl_print_subscript(struct vkd3d_string_buffer *buffer, struct vkd3d_glsl_generator *gen, -+ const struct vkd3d_shader_src_param *rel_addr, unsigned int offset); -+ - static void VKD3D_PRINTF_FUNC(3, 4) vkd3d_glsl_compiler_error( - struct vkd3d_glsl_generator *generator, - enum vkd3d_shader_error error, const char *fmt, ...) -@@ -53,11 +83,110 @@ static void VKD3D_PRINTF_FUNC(3, 4) vkd3d_glsl_compiler_error( - generator->failed = true; - } - -+static const char *shader_glsl_get_prefix(enum vkd3d_shader_type type) -+{ -+ switch (type) -+ { -+ case VKD3D_SHADER_TYPE_VERTEX: -+ return "vs"; -+ case VKD3D_SHADER_TYPE_HULL: -+ return "hs"; -+ case VKD3D_SHADER_TYPE_DOMAIN: -+ return "ds"; -+ case VKD3D_SHADER_TYPE_GEOMETRY: -+ return "gs"; -+ case VKD3D_SHADER_TYPE_PIXEL: -+ return "ps"; -+ case VKD3D_SHADER_TYPE_COMPUTE: -+ return "cs"; -+ default: -+ return NULL; -+ } -+} -+ -+static const struct glsl_resource_type_info *shader_glsl_get_resource_type_info(enum vkd3d_shader_resource_type t) -+{ -+ static const struct glsl_resource_type_info info[] = -+ { -+ {0, 0, 0, "None"}, /* VKD3D_SHADER_RESOURCE_NONE */ -+ {1, 0, 0, "Buffer"}, /* VKD3D_SHADER_RESOURCE_BUFFER */ -+ {1, 0, 1, "1D"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_1D */ -+ {2, 0, 1, "2D"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_2D */ -+ {2, 0, 0, "2DMS"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_2DMS */ -+ {3, 0, 0, "3D"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_3D */ -+ {3, 0, 1, "Cube"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_CUBE */ -+ {2, 1, 1, "1DArray"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_1DARRAY */ -+ {3, 1, 1, "2DArray"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_2DARRAY */ -+ {3, 1, 0, "2DMSArray"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY */ -+ {4, 1, 1, "CubeArray"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_CUBEARRAY */ -+ }; -+ -+ if (!t || t >= ARRAY_SIZE(info)) -+ return NULL; -+ -+ return &info[t]; -+} -+ -+static const struct vkd3d_shader_descriptor_info1 *shader_glsl_get_descriptor(struct vkd3d_glsl_generator *gen, -+ enum vkd3d_shader_descriptor_type type, unsigned int idx, unsigned int space) -+{ -+ const struct vkd3d_shader_scan_descriptor_info1 *info = gen->descriptor_info; -+ -+ for (unsigned int i = 0; i < info->descriptor_count; ++i) -+ { -+ const struct vkd3d_shader_descriptor_info1 *d = &info->descriptors[i]; -+ -+ if (d->type == type && d->register_space == space && d->register_index == idx) -+ return d; -+ } -+ -+ return NULL; -+} -+ -+static const struct vkd3d_shader_descriptor_info1 *shader_glsl_get_descriptor_by_id( -+ struct vkd3d_glsl_generator *gen, enum vkd3d_shader_descriptor_type type, unsigned int id) -+{ -+ const struct vkd3d_shader_scan_descriptor_info1 *info = gen->descriptor_info; -+ -+ for (unsigned int i = 0; i < info->descriptor_count; ++i) -+ { -+ const struct vkd3d_shader_descriptor_info1 *d = &info->descriptors[i]; -+ -+ if (d->type == type && d->register_id == id) -+ return d; -+ } -+ -+ return NULL; -+} -+ - static void shader_glsl_print_indent(struct vkd3d_string_buffer *buffer, unsigned int indent) - { - vkd3d_string_buffer_printf(buffer, "%*s", 4 * indent, ""); - } - -+static void shader_glsl_print_combined_sampler_name(struct vkd3d_string_buffer *buffer, -+ struct vkd3d_glsl_generator *gen, unsigned int resource_index, -+ unsigned int resource_space, unsigned int sampler_index, unsigned int sampler_space) -+{ -+ vkd3d_string_buffer_printf(buffer, "%s_t_%u", gen->prefix, resource_index); -+ if (resource_space) -+ vkd3d_string_buffer_printf(buffer, "_%u", resource_space); -+ if (sampler_index != VKD3D_SHADER_DUMMY_SAMPLER_INDEX) -+ { -+ vkd3d_string_buffer_printf(buffer, "_s_%u", sampler_index); -+ if (sampler_space) -+ vkd3d_string_buffer_printf(buffer, "_%u", sampler_space); -+ } -+} -+ -+static void shader_glsl_print_image_name(struct vkd3d_string_buffer *buffer, -+ struct vkd3d_glsl_generator *gen, unsigned int idx, unsigned int space) -+{ -+ vkd3d_string_buffer_printf(buffer, "%s_image_%u", gen->prefix, idx); -+ if (space) -+ vkd3d_string_buffer_printf(buffer, "_%u", space); -+} -+ - static void shader_glsl_print_register_name(struct vkd3d_string_buffer *buffer, - struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_register *reg) - { -@@ -67,6 +196,99 @@ static void shader_glsl_print_register_name(struct vkd3d_string_buffer *buffer, - vkd3d_string_buffer_printf(buffer, "r[%u]", reg->idx[0].offset); - break; - -+ case VKD3DSPR_INPUT: -+ if (reg->idx_count != 1) -+ { -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, -+ "Internal compiler error: Unhandled input register index count %u.", reg->idx_count); -+ vkd3d_string_buffer_printf(buffer, "<unhandled register %#x>", reg->type); -+ break; -+ } -+ if (reg->idx[0].rel_addr) -+ { -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, -+ "Internal compiler error: Unhandled input register indirect addressing."); -+ vkd3d_string_buffer_printf(buffer, "<unhandled register %#x>", reg->type); -+ break; -+ } -+ vkd3d_string_buffer_printf(buffer, "%s_in[%u]", gen->prefix, reg->idx[0].offset); -+ break; -+ -+ case VKD3DSPR_OUTPUT: -+ if (reg->idx_count != 1) -+ { -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, -+ "Internal compiler error: Unhandled output register index count %u.", reg->idx_count); -+ vkd3d_string_buffer_printf(buffer, "<unhandled register %#x>", reg->type); -+ break; -+ } -+ if (reg->idx[0].rel_addr) -+ { -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, -+ "Internal compiler error: Unhandled output register indirect addressing."); -+ vkd3d_string_buffer_printf(buffer, "<unhandled register %#x>", reg->type); -+ break; -+ } -+ vkd3d_string_buffer_printf(buffer, "%s_out[%u]", gen->prefix, reg->idx[0].offset); -+ break; -+ -+ case VKD3DSPR_DEPTHOUT: -+ if (gen->program->shader_version.type != VKD3D_SHADER_TYPE_PIXEL) -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, -+ "Internal compiler error: Unhandled depth output in shader type #%x.", -+ gen->program->shader_version.type); -+ vkd3d_string_buffer_printf(buffer, "gl_FragDepth"); -+ break; -+ -+ case VKD3DSPR_IMMCONST: -+ switch (reg->dimension) -+ { -+ case VSIR_DIMENSION_SCALAR: -+ vkd3d_string_buffer_printf(buffer, "%#xu", reg->u.immconst_u32[0]); -+ break; -+ -+ case VSIR_DIMENSION_VEC4: -+ vkd3d_string_buffer_printf(buffer, "uvec4(%#xu, %#xu, %#xu, %#xu)", -+ reg->u.immconst_u32[0], reg->u.immconst_u32[1], -+ reg->u.immconst_u32[2], reg->u.immconst_u32[3]); -+ break; -+ -+ default: -+ vkd3d_string_buffer_printf(buffer, "<unhandled_dimension %#x>", reg->dimension); -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, -+ "Internal compiler error: Unhandled dimension %#x.", reg->dimension); -+ break; -+ } -+ break; -+ -+ case VKD3DSPR_CONSTBUFFER: -+ if (reg->idx_count != 3) -+ { -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, -+ "Internal compiler error: Unhandled constant buffer register index count %u.", reg->idx_count); -+ vkd3d_string_buffer_printf(buffer, "<unhandled register %#x>", reg->type); -+ break; -+ } -+ if (reg->idx[0].rel_addr || reg->idx[2].rel_addr) -+ { -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, -+ "Internal compiler error: Unhandled constant buffer register indirect addressing."); -+ vkd3d_string_buffer_printf(buffer, "<unhandled register %#x>", reg->type); -+ break; -+ } -+ vkd3d_string_buffer_printf(buffer, "%s_cb_%u[%u]", -+ gen->prefix, reg->idx[0].offset, reg->idx[2].offset); -+ break; -+ -+ case VKD3DSPR_THREADID: -+ vkd3d_string_buffer_printf(buffer, "gl_GlobalInvocationID"); -+ break; -+ -+ case VKD3DSPR_IDXTEMP: -+ vkd3d_string_buffer_printf(buffer, "x%u", reg->idx[0].offset); -+ shader_glsl_print_subscript(buffer, gen, reg->idx[1].rel_addr, reg->idx[1].offset); -+ break; -+ - default: - vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, - "Internal compiler error: Unhandled register type %#x.", reg->type); -@@ -106,23 +328,118 @@ static void glsl_src_cleanup(struct glsl_src *src, struct vkd3d_string_buffer_ca - vkd3d_string_buffer_release(cache, src->str); - } - --static void glsl_src_init(struct glsl_src *glsl_src, struct vkd3d_glsl_generator *gen, -- const struct vkd3d_shader_src_param *vsir_src, uint32_t mask) -+static void shader_glsl_print_bitcast(struct vkd3d_string_buffer *dst, struct vkd3d_glsl_generator *gen, -+ const char *src, enum vkd3d_data_type dst_data_type, enum vkd3d_data_type src_data_type, unsigned int size) -+{ -+ if (dst_data_type == VKD3D_DATA_UNORM || dst_data_type == VKD3D_DATA_SNORM) -+ dst_data_type = VKD3D_DATA_FLOAT; -+ if (src_data_type == VKD3D_DATA_UNORM || src_data_type == VKD3D_DATA_SNORM) -+ src_data_type = VKD3D_DATA_FLOAT; -+ -+ if (dst_data_type == src_data_type) -+ { -+ vkd3d_string_buffer_printf(dst, "%s", src); -+ return; -+ } -+ -+ if (src_data_type == VKD3D_DATA_FLOAT) -+ { -+ switch (dst_data_type) -+ { -+ case VKD3D_DATA_INT: -+ vkd3d_string_buffer_printf(dst, "floatBitsToInt(%s)", src); -+ return; -+ case VKD3D_DATA_UINT: -+ vkd3d_string_buffer_printf(dst, "floatBitsToUint(%s)", src); -+ return; -+ default: -+ break; -+ } -+ } -+ -+ if (src_data_type == VKD3D_DATA_UINT) -+ { -+ switch (dst_data_type) -+ { -+ case VKD3D_DATA_FLOAT: -+ vkd3d_string_buffer_printf(dst, "uintBitsToFloat(%s)", src); -+ return; -+ case VKD3D_DATA_INT: -+ if (size == 1) -+ vkd3d_string_buffer_printf(dst, "int(%s)", src); -+ else -+ vkd3d_string_buffer_printf(dst, "ivec%u(%s)", size, src); -+ return; -+ default: -+ break; -+ } -+ } -+ -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, -+ "Internal compiler error: Unhandled bitcast from %#x to %#x.", -+ src_data_type, dst_data_type); -+ vkd3d_string_buffer_printf(dst, "%s", src); -+} -+ -+static void shader_glsl_print_src(struct vkd3d_string_buffer *buffer, struct vkd3d_glsl_generator *gen, -+ const struct vkd3d_shader_src_param *vsir_src, uint32_t mask, enum vkd3d_data_type data_type) - { - const struct vkd3d_shader_register *reg = &vsir_src->reg; -+ struct vkd3d_string_buffer *register_name, *str; -+ enum vkd3d_data_type src_data_type; -+ unsigned int size; - -- glsl_src->str = vkd3d_string_buffer_get(&gen->string_buffers); -+ register_name = vkd3d_string_buffer_get(&gen->string_buffers); - - if (reg->non_uniform) - vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, - "Internal compiler error: Unhandled 'non-uniform' modifier."); -- if (vsir_src->modifiers) -- vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, -- "Internal compiler error: Unhandled source modifier(s) %#x.", vsir_src->modifiers); - -- shader_glsl_print_register_name(glsl_src->str, gen, reg); -+ if (reg->type == VKD3DSPR_IMMCONST || reg->type == VKD3DSPR_THREADID) -+ src_data_type = VKD3D_DATA_UINT; -+ else -+ src_data_type = VKD3D_DATA_FLOAT; -+ -+ shader_glsl_print_register_name(register_name, gen, reg); -+ -+ if (!vsir_src->modifiers) -+ str = buffer; -+ else -+ str = vkd3d_string_buffer_get(&gen->string_buffers); -+ -+ size = reg->dimension == VSIR_DIMENSION_VEC4 ? 4 : 1; -+ shader_glsl_print_bitcast(str, gen, register_name->buffer, data_type, src_data_type, size); - if (reg->dimension == VSIR_DIMENSION_VEC4) -- shader_glsl_print_swizzle(glsl_src->str, vsir_src->swizzle, mask); -+ shader_glsl_print_swizzle(str, vsir_src->swizzle, mask); -+ -+ switch (vsir_src->modifiers) -+ { -+ case VKD3DSPSM_NONE: -+ break; -+ case VKD3DSPSM_NEG: -+ vkd3d_string_buffer_printf(buffer, "-%s", str->buffer); -+ break; -+ case VKD3DSPSM_ABS: -+ vkd3d_string_buffer_printf(buffer, "abs(%s)", str->buffer); -+ break; -+ default: -+ vkd3d_string_buffer_printf(buffer, "<unhandled modifier %#x>(%s)", -+ vsir_src->modifiers, str->buffer); -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, -+ "Internal compiler error: Unhandled source modifier(s) %#x.", vsir_src->modifiers); -+ break; -+ } -+ -+ if (str != buffer) -+ vkd3d_string_buffer_release(&gen->string_buffers, str); -+ vkd3d_string_buffer_release(&gen->string_buffers, register_name); -+} -+ -+static void glsl_src_init(struct glsl_src *glsl_src, struct vkd3d_glsl_generator *gen, -+ const struct vkd3d_shader_src_param *vsir_src, uint32_t mask) -+{ -+ glsl_src->str = vkd3d_string_buffer_get(&gen->string_buffers); -+ shader_glsl_print_src(glsl_src->str, gen, vsir_src, mask, vsir_src->reg.data_type); - } - - static void glsl_dst_cleanup(struct glsl_dst *dst, struct vkd3d_string_buffer_cache *cache) -@@ -153,26 +470,89 @@ static uint32_t glsl_dst_init(struct glsl_dst *glsl_dst, struct vkd3d_glsl_gener - return write_mask; - } - --static void VKD3D_PRINTF_FUNC(3, 4) shader_glsl_print_assignment( -- struct vkd3d_glsl_generator *gen, struct glsl_dst *dst, const char *format, ...) -+static void shader_glsl_print_subscript(struct vkd3d_string_buffer *buffer, struct vkd3d_glsl_generator *gen, -+ const struct vkd3d_shader_src_param *rel_addr, unsigned int offset) - { -- va_list args; -+ struct glsl_src r; -+ -+ if (!rel_addr) -+ { -+ vkd3d_string_buffer_printf(buffer, "[%u]", offset); -+ return; -+ } -+ -+ glsl_src_init(&r, gen, rel_addr, VKD3DSP_WRITEMASK_0); -+ vkd3d_string_buffer_printf(buffer, "[%s", r.str->buffer); -+ if (offset) -+ vkd3d_string_buffer_printf(buffer, " + %u", offset); -+ else -+ vkd3d_string_buffer_printf(buffer, "]"); -+ glsl_src_cleanup(&r, &gen->string_buffers); -+} -+ -+static void VKD3D_PRINTF_FUNC(4, 0) shader_glsl_vprint_assignment(struct vkd3d_glsl_generator *gen, -+ struct glsl_dst *dst, enum vkd3d_data_type data_type, const char *format, va_list args) -+{ -+ struct vkd3d_string_buffer *buffer = gen->buffer; -+ uint32_t modifiers = dst->vsir->modifiers; -+ bool close = true; - - if (dst->vsir->shift) - vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, - "Internal compiler error: Unhandled destination shift %#x.", dst->vsir->shift); -- if (dst->vsir->modifiers) -+ if (modifiers & ~VKD3DSPDM_SATURATE) - vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, -- "Internal compiler error: Unhandled destination modifier(s) %#x.", dst->vsir->modifiers); -+ "Internal compiler error: Unhandled destination modifier(s) %#x.", modifiers); - -- shader_glsl_print_indent(gen->buffer, gen->indent); -- vkd3d_string_buffer_printf(gen->buffer, "%s%s = ", dst->register_name->buffer, dst->mask->buffer); -+ shader_glsl_print_indent(buffer, gen->indent); -+ vkd3d_string_buffer_printf(buffer, "%s%s = ", dst->register_name->buffer, dst->mask->buffer); -+ if (modifiers & VKD3DSPDM_SATURATE) -+ vkd3d_string_buffer_printf(buffer, "clamp("); -+ -+ switch (data_type) -+ { -+ default: -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, -+ "Internal compiler error: Unhandled destination register data type %#x.", data_type); -+ /* fall through */ -+ case VKD3D_DATA_FLOAT: -+ close = false; -+ break; -+ case VKD3D_DATA_INT: -+ vkd3d_string_buffer_printf(buffer, "intBitsToFloat("); -+ break; -+ case VKD3D_DATA_UINT: -+ vkd3d_string_buffer_printf(buffer, "uintBitsToFloat("); -+ break; -+ } -+ -+ vkd3d_string_buffer_vprintf(buffer, format, args); -+ -+ if (close) -+ vkd3d_string_buffer_printf(buffer, ")"); -+ if (modifiers & VKD3DSPDM_SATURATE) -+ vkd3d_string_buffer_printf(buffer, ", 0.0, 1.0)"); -+ vkd3d_string_buffer_printf(buffer, ";\n"); -+} -+ -+static void VKD3D_PRINTF_FUNC(3, 4) shader_glsl_print_assignment( -+ struct vkd3d_glsl_generator *gen, struct glsl_dst *dst, const char *format, ...) -+{ -+ va_list args; - - va_start(args, format); -- vkd3d_string_buffer_vprintf(gen->buffer, format, args); -+ shader_glsl_vprint_assignment(gen, dst, dst->vsir->reg.data_type, format, args); - va_end(args); -+} -+ -+static void VKD3D_PRINTF_FUNC(4, 5) shader_glsl_print_assignment_ext(struct vkd3d_glsl_generator *gen, -+ struct glsl_dst *dst, enum vkd3d_data_type data_type, const char *format, ...) -+{ -+ va_list args; - -- vkd3d_string_buffer_printf(gen->buffer, ";\n"); -+ va_start(args, format); -+ shader_glsl_vprint_assignment(gen, dst, data_type, format, args); -+ va_end(args); - } - - static void shader_glsl_unhandled(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_instruction *ins) -@@ -183,138 +563,1923 @@ static void shader_glsl_unhandled(struct vkd3d_glsl_generator *gen, const struct - "Internal compiler error: Unhandled instruction %#x.", ins->opcode); - } - --static void shader_glsl_mov(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_instruction *ins) -+static void shader_glsl_binop(struct vkd3d_glsl_generator *gen, -+ const struct vkd3d_shader_instruction *ins, const char *op) - { -- struct glsl_src src; -+ struct glsl_src src[2]; - struct glsl_dst dst; - uint32_t mask; - - mask = glsl_dst_init(&dst, gen, ins, &ins->dst[0]); -- glsl_src_init(&src, gen, &ins->src[0], mask); -+ glsl_src_init(&src[0], gen, &ins->src[0], mask); -+ glsl_src_init(&src[1], gen, &ins->src[1], mask); - -- shader_glsl_print_assignment(gen, &dst, "%s", src.str->buffer); -+ shader_glsl_print_assignment(gen, &dst, "%s %s %s", src[0].str->buffer, op, src[1].str->buffer); - -- glsl_src_cleanup(&src, &gen->string_buffers); -+ glsl_src_cleanup(&src[1], &gen->string_buffers); -+ glsl_src_cleanup(&src[0], &gen->string_buffers); - glsl_dst_cleanup(&dst, &gen->string_buffers); - } - --static void shader_glsl_ret(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_instruction *ins) -+static void shader_glsl_dot(struct vkd3d_glsl_generator *gen, -+ const struct vkd3d_shader_instruction *ins, uint32_t src_mask) - { -- const struct vkd3d_shader_version *version = &gen->program->shader_version; -+ unsigned int component_count; -+ struct glsl_src src[2]; -+ struct glsl_dst dst; -+ uint32_t dst_mask; - -- /* -- * TODO: Implement in_subroutine -- * TODO: shader_glsl_generate_shader_epilogue(generator); -- */ -- if (version->major >= 4) -- { -- shader_glsl_print_indent(gen->buffer, gen->indent); -- vkd3d_string_buffer_printf(gen->buffer, "return;\n"); -- } -+ dst_mask = glsl_dst_init(&dst, gen, ins, &ins->dst[0]); -+ glsl_src_init(&src[0], gen, &ins->src[0], src_mask); -+ glsl_src_init(&src[1], gen, &ins->src[1], src_mask); -+ -+ if ((component_count = vsir_write_mask_component_count(dst_mask)) > 1) -+ shader_glsl_print_assignment(gen, &dst, "vec%d(dot(%s, %s))", -+ component_count, src[0].str->buffer, src[1].str->buffer); -+ else -+ shader_glsl_print_assignment(gen, &dst, "dot(%s, %s)", -+ src[0].str->buffer, src[1].str->buffer); -+ -+ glsl_src_cleanup(&src[1], &gen->string_buffers); -+ glsl_src_cleanup(&src[0], &gen->string_buffers); -+ glsl_dst_cleanup(&dst, &gen->string_buffers); - } - --static void vkd3d_glsl_handle_instruction(struct vkd3d_glsl_generator *gen, -- const struct vkd3d_shader_instruction *ins) -+static void shader_glsl_intrinsic(struct vkd3d_glsl_generator *gen, -+ const struct vkd3d_shader_instruction *ins, const char *op) - { -- gen->location = ins->location; -+ struct vkd3d_string_buffer *args; -+ struct glsl_src src; -+ struct glsl_dst dst; -+ unsigned int i; -+ uint32_t mask; - -- switch (ins->opcode) -+ mask = glsl_dst_init(&dst, gen, ins, &ins->dst[0]); -+ args = vkd3d_string_buffer_get(&gen->string_buffers); -+ -+ for (i = 0; i < ins->src_count; ++i) - { -- case VKD3DSIH_DCL_INPUT: -- case VKD3DSIH_DCL_OUTPUT: -- case VKD3DSIH_DCL_OUTPUT_SIV: -- case VKD3DSIH_NOP: -- break; -- case VKD3DSIH_MOV: -- shader_glsl_mov(gen, ins); -- break; -- case VKD3DSIH_RET: -- shader_glsl_ret(gen, ins); -- break; -- default: -- shader_glsl_unhandled(gen, ins); -- break; -+ glsl_src_init(&src, gen, &ins->src[i], mask); -+ vkd3d_string_buffer_printf(args, "%s%s", i ? ", " : "", src.str->buffer); -+ glsl_src_cleanup(&src, &gen->string_buffers); - } -+ shader_glsl_print_assignment(gen, &dst, "%s(%s)", op, args->buffer); -+ -+ vkd3d_string_buffer_release(&gen->string_buffers, args); -+ glsl_dst_cleanup(&dst, &gen->string_buffers); - } - --static void shader_glsl_generate_declarations(struct vkd3d_glsl_generator *gen) -+static void shader_glsl_relop(struct vkd3d_glsl_generator *gen, -+ const struct vkd3d_shader_instruction *ins, const char *scalar_op, const char *vector_op) - { -- const struct vsir_program *program = gen->program; -- struct vkd3d_string_buffer *buffer = gen->buffer; -+ unsigned int mask_size; -+ struct glsl_src src[2]; -+ struct glsl_dst dst; -+ uint32_t mask; - -- if (program->temp_count) -- vkd3d_string_buffer_printf(buffer, "vec4 r[%u];\n\n", program->temp_count); -+ mask = glsl_dst_init(&dst, gen, ins, &ins->dst[0]); -+ glsl_src_init(&src[0], gen, &ins->src[0], mask); -+ glsl_src_init(&src[1], gen, &ins->src[1], mask); -+ -+ if ((mask_size = vsir_write_mask_component_count(mask)) > 1) -+ shader_glsl_print_assignment(gen, &dst, "uvec%u(%s(%s, %s)) * 0xffffffffu", -+ mask_size, vector_op, src[0].str->buffer, src[1].str->buffer); -+ else -+ shader_glsl_print_assignment(gen, &dst, "%s %s %s ? 0xffffffffu : 0u", -+ src[0].str->buffer, scalar_op, src[1].str->buffer); -+ -+ glsl_src_cleanup(&src[1], &gen->string_buffers); -+ glsl_src_cleanup(&src[0], &gen->string_buffers); -+ glsl_dst_cleanup(&dst, &gen->string_buffers); - } - --static int vkd3d_glsl_generator_generate(struct vkd3d_glsl_generator *gen, struct vkd3d_shader_code *out) -+static void shader_glsl_cast(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_instruction *ins, -+ const char *scalar_constructor, const char *vector_constructor) - { -- const struct vkd3d_shader_instruction_array *instructions = &gen->program->instructions; -- struct vkd3d_string_buffer *buffer = gen->buffer; -- unsigned int i; -- void *code; -- -- MESSAGE("Generating a GLSL shader. This is unsupported; you get to keep all the pieces if it breaks.\n"); -+ unsigned int component_count; -+ struct glsl_src src; -+ struct glsl_dst dst; -+ uint32_t mask; - -- vkd3d_string_buffer_printf(buffer, "#version 440\n\n"); -+ mask = glsl_dst_init(&dst, gen, ins, &ins->dst[0]); -+ glsl_src_init(&src, gen, &ins->src[0], mask); - -- vkd3d_string_buffer_printf(buffer, "/* Generated by %s. */\n\n", vkd3d_shader_get_version(NULL, NULL)); -+ if ((component_count = vsir_write_mask_component_count(mask)) > 1) -+ shader_glsl_print_assignment(gen, &dst, "%s%u(%s)", -+ vector_constructor, component_count, src.str->buffer); -+ else -+ shader_glsl_print_assignment(gen, &dst, "%s(%s)", -+ scalar_constructor, src.str->buffer); - -- shader_glsl_generate_declarations(gen); -+ glsl_src_cleanup(&src, &gen->string_buffers); -+ glsl_dst_cleanup(&dst, &gen->string_buffers); -+} - -- vkd3d_string_buffer_printf(buffer, "void main()\n{\n"); -+static void shader_glsl_end_block(struct vkd3d_glsl_generator *gen) -+{ -+ --gen->indent; -+ shader_glsl_print_indent(gen->buffer, gen->indent); -+ vkd3d_string_buffer_printf(gen->buffer, "}\n"); -+} - -+static void shader_glsl_begin_block(struct vkd3d_glsl_generator *gen) -+{ -+ shader_glsl_print_indent(gen->buffer, gen->indent); -+ vkd3d_string_buffer_printf(gen->buffer, "{\n"); - ++gen->indent; -- for (i = 0; i < instructions->count; ++i) -- { -- vkd3d_glsl_handle_instruction(gen, &instructions->elements[i]); -- } -+} - -- vkd3d_string_buffer_printf(buffer, "}\n"); -+static void shader_glsl_if(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_instruction *ins) -+{ -+ const char *condition; -+ struct glsl_src src; - -- if (TRACE_ON()) -- vkd3d_string_buffer_trace(buffer); -+ glsl_src_init(&src, gen, &ins->src[0], VKD3DSP_WRITEMASK_0); - -- if (gen->failed) -- return VKD3D_ERROR_INVALID_SHADER; -+ shader_glsl_print_indent(gen->buffer, gen->indent); -+ condition = ins->flags == VKD3D_SHADER_CONDITIONAL_OP_NZ ? "bool" : "!bool"; -+ vkd3d_string_buffer_printf(gen->buffer, "if (%s(%s))\n", condition, src.str->buffer); - -- if ((code = vkd3d_malloc(buffer->buffer_size))) -- { -- memcpy(code, buffer->buffer, buffer->content_size); -- out->size = buffer->content_size; -- out->code = code; -- } -- else return VKD3D_ERROR_OUT_OF_MEMORY; -+ glsl_src_cleanup(&src, &gen->string_buffers); - -- return VKD3D_OK; -+ shader_glsl_begin_block(gen); - } - --static void vkd3d_glsl_generator_cleanup(struct vkd3d_glsl_generator *gen) -+static void shader_glsl_else(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_instruction *ins) - { -- vkd3d_string_buffer_release(&gen->string_buffers, gen->buffer); -- vkd3d_string_buffer_cache_cleanup(&gen->string_buffers); -+ shader_glsl_end_block(gen); -+ shader_glsl_print_indent(gen->buffer, gen->indent); -+ vkd3d_string_buffer_printf(gen->buffer, "else\n"); -+ shader_glsl_begin_block(gen); - } - --static void vkd3d_glsl_generator_init(struct vkd3d_glsl_generator *gen, -- struct vsir_program *program, struct vkd3d_shader_message_context *message_context) -+static void shader_glsl_loop(struct vkd3d_glsl_generator *gen) - { -- memset(gen, 0, sizeof(*gen)); -- gen->program = program; -- vkd3d_string_buffer_cache_init(&gen->string_buffers); -- gen->buffer = vkd3d_string_buffer_get(&gen->string_buffers); -- gen->message_context = message_context; -+ shader_glsl_print_indent(gen->buffer, gen->indent); -+ vkd3d_string_buffer_printf(gen->buffer, "for (;;)\n"); -+ shader_glsl_begin_block(gen); - } - --int glsl_compile(struct vsir_program *program, uint64_t config_flags, -- const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *out, -- struct vkd3d_shader_message_context *message_context) -+static void shader_glsl_break(struct vkd3d_glsl_generator *gen) - { -- struct vkd3d_glsl_generator generator; -- int ret; -+ shader_glsl_print_indent(gen->buffer, gen->indent); -+ vkd3d_string_buffer_printf(gen->buffer, "break;\n"); -+} - -- if ((ret = vsir_program_normalise(program, config_flags, compile_info, message_context)) < 0) -- return ret; -+static void shader_glsl_continue(struct vkd3d_glsl_generator *gen) -+{ -+ shader_glsl_print_indent(gen->buffer, gen->indent); -+ vkd3d_string_buffer_printf(gen->buffer, "continue;\n"); -+} -+ -+static void shader_glsl_switch(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_instruction *ins) -+{ -+ struct glsl_src src; -+ -+ glsl_src_init(&src, gen, &ins->src[0], VKD3DSP_WRITEMASK_0); -+ -+ shader_glsl_print_indent(gen->buffer, gen->indent); -+ vkd3d_string_buffer_printf(gen->buffer, "switch (%s)\n", src.str->buffer); -+ shader_glsl_begin_block(gen); -+ -+ glsl_src_cleanup(&src, &gen->string_buffers); -+} -+ -+static void shader_glsl_case(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_instruction *ins) -+{ -+ struct glsl_src src; -+ -+ glsl_src_init(&src, gen, &ins->src[0], VKD3DSP_WRITEMASK_0); -+ -+ shader_glsl_print_indent(gen->buffer, gen->indent); -+ vkd3d_string_buffer_printf(gen->buffer, "case %s:\n", src.str->buffer); -+ -+ glsl_src_cleanup(&src, &gen->string_buffers); -+} -+ -+static void shader_glsl_default(struct vkd3d_glsl_generator *gen) -+{ -+ shader_glsl_print_indent(gen->buffer, gen->indent); -+ vkd3d_string_buffer_printf(gen->buffer, "default:\n"); -+} -+ -+static void shader_glsl_print_texel_offset(struct vkd3d_string_buffer *buffer, struct vkd3d_glsl_generator *gen, -+ unsigned int offset_size, const struct vkd3d_shader_texel_offset *offset) -+{ -+ switch (offset_size) -+ { -+ case 1: -+ vkd3d_string_buffer_printf(buffer, "%d", offset->u); -+ break; -+ case 2: -+ vkd3d_string_buffer_printf(buffer, "ivec2(%d, %d)", offset->u, offset->v); -+ break; -+ default: -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, -+ "Internal compiler error: Invalid texel offset size %u.", offset_size); -+ /* fall through */ -+ case 3: -+ vkd3d_string_buffer_printf(buffer, "ivec3(%d, %d, %d)", offset->u, offset->v, offset->w); -+ break; -+ } -+} -+ -+static void shader_glsl_ld(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_instruction *ins) -+{ -+ unsigned int resource_id, resource_idx, resource_space, sample_count; -+ const struct glsl_resource_type_info *resource_type_info; -+ const struct vkd3d_shader_descriptor_info1 *d; -+ enum vkd3d_shader_component_type sampled_type; -+ enum vkd3d_shader_resource_type resource_type; -+ struct vkd3d_string_buffer *fetch; -+ enum vkd3d_data_type data_type; -+ struct glsl_src coord; -+ struct glsl_dst dst; -+ uint32_t coord_mask; -+ -+ if (vkd3d_shader_instruction_has_texel_offset(ins)) -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, -+ "Internal compiler error: Unhandled texel fetch offset."); -+ -+ if (ins->src[1].reg.idx[0].rel_addr || ins->src[1].reg.idx[1].rel_addr) -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_UNSUPPORTED, -+ "Descriptor indexing is not supported."); -+ -+ resource_id = ins->src[1].reg.idx[0].offset; -+ resource_idx = ins->src[1].reg.idx[1].offset; -+ if ((d = shader_glsl_get_descriptor_by_id(gen, VKD3D_SHADER_DESCRIPTOR_TYPE_SRV, resource_id))) -+ { -+ resource_type = d->resource_type; -+ resource_space = d->register_space; -+ sample_count = d->sample_count; -+ sampled_type = vkd3d_component_type_from_resource_data_type(d->resource_data_type); -+ data_type = vkd3d_data_type_from_component_type(sampled_type); -+ } -+ else -+ { -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, -+ "Internal compiler error: Undeclared resource descriptor %u.", resource_id); -+ resource_space = 0; -+ resource_type = VKD3D_SHADER_RESOURCE_TEXTURE_2D; -+ sample_count = 1; -+ data_type = VKD3D_DATA_FLOAT; -+ } -+ -+ if ((resource_type_info = shader_glsl_get_resource_type_info(resource_type))) -+ { -+ coord_mask = vkd3d_write_mask_from_component_count(resource_type_info->coord_size); -+ } -+ else -+ { -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, -+ "Internal compiler error: Unhandled resource type %#x.", resource_type); -+ coord_mask = vkd3d_write_mask_from_component_count(2); -+ } -+ -+ glsl_dst_init(&dst, gen, ins, &ins->dst[0]); -+ glsl_src_init(&coord, gen, &ins->src[0], coord_mask); -+ fetch = vkd3d_string_buffer_get(&gen->string_buffers); -+ -+ vkd3d_string_buffer_printf(fetch, "texelFetch("); -+ shader_glsl_print_combined_sampler_name(fetch, gen, resource_idx, -+ resource_space, VKD3D_SHADER_DUMMY_SAMPLER_INDEX, 0); -+ vkd3d_string_buffer_printf(fetch, ", %s", coord.str->buffer); -+ if (resource_type != VKD3D_SHADER_RESOURCE_BUFFER) -+ { -+ vkd3d_string_buffer_printf(fetch, ", "); -+ if (ins->opcode != VKD3DSIH_LD2DMS) -+ shader_glsl_print_src(fetch, gen, &ins->src[0], VKD3DSP_WRITEMASK_3, ins->src[0].reg.data_type); -+ else if (sample_count == 1) -+ /* If the resource isn't a true multisample resource, this is the -+ * "lod" parameter instead of the "sample" parameter. */ -+ vkd3d_string_buffer_printf(fetch, "0"); -+ else -+ shader_glsl_print_src(fetch, gen, &ins->src[2], VKD3DSP_WRITEMASK_0, ins->src[2].reg.data_type); -+ } -+ vkd3d_string_buffer_printf(fetch, ")"); -+ shader_glsl_print_swizzle(fetch, ins->src[1].swizzle, ins->dst[0].write_mask); -+ -+ shader_glsl_print_assignment_ext(gen, &dst, data_type, "%s", fetch->buffer); -+ -+ vkd3d_string_buffer_release(&gen->string_buffers, fetch); -+ glsl_src_cleanup(&coord, &gen->string_buffers); -+ glsl_dst_cleanup(&dst, &gen->string_buffers); -+} -+ -+static void shader_glsl_print_shadow_coord(struct vkd3d_string_buffer *buffer, struct vkd3d_glsl_generator *gen, -+ const struct vkd3d_shader_src_param *coord, const struct vkd3d_shader_src_param *ref, unsigned int coord_size) -+{ -+ uint32_t coord_mask = vkd3d_write_mask_from_component_count(coord_size); -+ -+ switch (coord_size) -+ { -+ case 1: -+ vkd3d_string_buffer_printf(buffer, "vec3("); -+ shader_glsl_print_src(buffer, gen, coord, coord_mask, coord->reg.data_type); -+ vkd3d_string_buffer_printf(buffer, ", 0.0, "); -+ shader_glsl_print_src(buffer, gen, ref, VKD3DSP_WRITEMASK_0, ref->reg.data_type); -+ vkd3d_string_buffer_printf(buffer, ")"); -+ break; -+ -+ case 4: -+ shader_glsl_print_src(buffer, gen, coord, coord_mask, coord->reg.data_type); -+ vkd3d_string_buffer_printf(buffer, ", "); -+ shader_glsl_print_src(buffer, gen, ref, VKD3DSP_WRITEMASK_0, ref->reg.data_type); -+ break; -+ -+ default: -+ vkd3d_string_buffer_printf(buffer, "vec%u(", coord_size + 1); -+ shader_glsl_print_src(buffer, gen, coord, coord_mask, coord->reg.data_type); -+ vkd3d_string_buffer_printf(buffer, ", "); -+ shader_glsl_print_src(buffer, gen, ref, VKD3DSP_WRITEMASK_0, ref->reg.data_type); -+ vkd3d_string_buffer_printf(buffer, ")"); -+ break; -+ } -+} -+ -+static void shader_glsl_sample(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_instruction *ins) -+{ -+ bool shadow_sampler, array, bias, dynamic_offset, gather, grad, lod, lod_zero, offset, shadow; -+ const struct glsl_resource_type_info *resource_type_info; -+ const struct vkd3d_shader_src_param *resource, *sampler; -+ unsigned int resource_id, resource_idx, resource_space; -+ unsigned int sampler_id, sampler_idx, sampler_space; -+ const struct vkd3d_shader_descriptor_info1 *d; -+ enum vkd3d_shader_component_type sampled_type; -+ enum vkd3d_shader_resource_type resource_type; -+ unsigned int component_idx, coord_size; -+ struct vkd3d_string_buffer *sample; -+ enum vkd3d_data_type data_type; -+ struct glsl_dst dst; -+ -+ bias = ins->opcode == VKD3DSIH_SAMPLE_B; -+ dynamic_offset = ins->opcode == VKD3DSIH_GATHER4_PO; -+ gather = ins->opcode == VKD3DSIH_GATHER4 || ins->opcode == VKD3DSIH_GATHER4_PO; -+ grad = ins->opcode == VKD3DSIH_SAMPLE_GRAD; -+ lod = ins->opcode == VKD3DSIH_SAMPLE_LOD || ins->opcode == VKD3DSIH_SAMPLE_C_LZ; -+ lod_zero = ins->opcode == VKD3DSIH_SAMPLE_C_LZ; -+ offset = dynamic_offset || vkd3d_shader_instruction_has_texel_offset(ins); -+ shadow = ins->opcode == VKD3DSIH_SAMPLE_C || ins->opcode == VKD3DSIH_SAMPLE_C_LZ; -+ -+ resource = &ins->src[1 + dynamic_offset]; -+ sampler = &ins->src[2 + dynamic_offset]; -+ -+ if (resource->reg.idx[0].rel_addr || resource->reg.idx[1].rel_addr -+ || sampler->reg.idx[0].rel_addr || sampler->reg.idx[1].rel_addr) -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_UNSUPPORTED, -+ "Descriptor indexing is not supported."); -+ -+ resource_id = resource->reg.idx[0].offset; -+ resource_idx = resource->reg.idx[1].offset; -+ if ((d = shader_glsl_get_descriptor_by_id(gen, VKD3D_SHADER_DESCRIPTOR_TYPE_SRV, resource_id))) -+ { -+ resource_type = d->resource_type; -+ resource_space = d->register_space; -+ sampled_type = vkd3d_component_type_from_resource_data_type(d->resource_data_type); -+ data_type = vkd3d_data_type_from_component_type(sampled_type); -+ } -+ else -+ { -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, -+ "Internal compiler error: Undeclared resource descriptor %u.", resource_id); -+ resource_space = 0; -+ resource_type = VKD3D_SHADER_RESOURCE_TEXTURE_2D; -+ data_type = VKD3D_DATA_FLOAT; -+ } -+ -+ if ((resource_type_info = shader_glsl_get_resource_type_info(resource_type))) -+ { -+ coord_size = resource_type_info->coord_size; -+ array = resource_type_info->array; -+ } -+ else -+ { -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, -+ "Internal compiler error: Unhandled resource type %#x.", resource_type); -+ coord_size = 2; -+ array = false; -+ } -+ -+ sampler_id = sampler->reg.idx[0].offset; -+ sampler_idx = sampler->reg.idx[1].offset; -+ if ((d = shader_glsl_get_descriptor_by_id(gen, VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER, sampler_id))) -+ { -+ sampler_space = d->register_space; -+ shadow_sampler = d->flags & VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_SAMPLER_COMPARISON_MODE; -+ -+ if (shadow) -+ { -+ if (!shadow_sampler) -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, -+ "Internal compiler error: Sampler %u is not a comparison sampler.", sampler_id); -+ } -+ else -+ { -+ if (shadow_sampler) -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, -+ "Internal compiler error: Sampler %u is a comparison sampler.", sampler_id); -+ } -+ } -+ else -+ { -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, -+ "Internal compiler error: Undeclared sampler descriptor %u.", sampler_id); -+ sampler_space = 0; -+ } -+ -+ glsl_dst_init(&dst, gen, ins, &ins->dst[0]); -+ sample = vkd3d_string_buffer_get(&gen->string_buffers); -+ -+ if (gather) -+ vkd3d_string_buffer_printf(sample, "textureGather"); -+ else if (grad) -+ vkd3d_string_buffer_printf(sample, "textureGrad"); -+ else if (lod) -+ vkd3d_string_buffer_printf(sample, "textureLod"); -+ else -+ vkd3d_string_buffer_printf(sample, "texture"); -+ vkd3d_string_buffer_printf(sample, "%s(", offset ? "Offset" : ""); -+ shader_glsl_print_combined_sampler_name(sample, gen, resource_idx, resource_space, sampler_idx, sampler_space); -+ vkd3d_string_buffer_printf(sample, ", "); -+ if (shadow) -+ shader_glsl_print_shadow_coord(sample, gen, &ins->src[0], &ins->src[3], coord_size); -+ else -+ shader_glsl_print_src(sample, gen, &ins->src[0], -+ vkd3d_write_mask_from_component_count(coord_size), ins->src[0].reg.data_type); -+ if (grad) -+ { -+ vkd3d_string_buffer_printf(sample, ", "); -+ shader_glsl_print_src(sample, gen, &ins->src[3], -+ vkd3d_write_mask_from_component_count(coord_size - array), ins->src[3].reg.data_type); -+ vkd3d_string_buffer_printf(sample, ", "); -+ shader_glsl_print_src(sample, gen, &ins->src[4], -+ vkd3d_write_mask_from_component_count(coord_size - array), ins->src[4].reg.data_type); -+ } -+ else if (lod_zero) -+ { -+ vkd3d_string_buffer_printf(sample, ", 0.0"); -+ } -+ else if (lod) -+ { -+ vkd3d_string_buffer_printf(sample, ", "); -+ shader_glsl_print_src(sample, gen, &ins->src[3], VKD3DSP_WRITEMASK_0, ins->src[3].reg.data_type); -+ } -+ if (offset) -+ { -+ vkd3d_string_buffer_printf(sample, ", "); -+ if (dynamic_offset) -+ shader_glsl_print_src(sample, gen, &ins->src[1], -+ vkd3d_write_mask_from_component_count(coord_size - array), ins->src[1].reg.data_type); -+ else -+ shader_glsl_print_texel_offset(sample, gen, coord_size - array, &ins->texel_offset); -+ } -+ if (bias) -+ { -+ vkd3d_string_buffer_printf(sample, ", "); -+ shader_glsl_print_src(sample, gen, &ins->src[3], VKD3DSP_WRITEMASK_0, ins->src[3].reg.data_type); -+ } -+ else if (gather) -+ { -+ if ((component_idx = vsir_swizzle_get_component(sampler->swizzle, 0))) -+ vkd3d_string_buffer_printf(sample, ", %d", component_idx); -+ } -+ vkd3d_string_buffer_printf(sample, ")"); -+ shader_glsl_print_swizzle(sample, resource->swizzle, ins->dst[0].write_mask); -+ -+ shader_glsl_print_assignment_ext(gen, &dst, data_type, "%s", sample->buffer); -+ -+ vkd3d_string_buffer_release(&gen->string_buffers, sample); -+ glsl_dst_cleanup(&dst, &gen->string_buffers); -+} -+ -+static void shader_glsl_load_uav_typed(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_instruction *ins) -+{ -+ const struct glsl_resource_type_info *resource_type_info; -+ enum vkd3d_shader_component_type component_type; -+ const struct vkd3d_shader_descriptor_info1 *d; -+ enum vkd3d_shader_resource_type resource_type; -+ unsigned int uav_id, uav_idx, uav_space; -+ struct vkd3d_string_buffer *load; -+ struct glsl_src coord; -+ struct glsl_dst dst; -+ uint32_t coord_mask; -+ -+ if (ins->src[1].reg.idx[0].rel_addr || ins->src[1].reg.idx[1].rel_addr) -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_UNSUPPORTED, -+ "Descriptor indexing is not supported."); -+ -+ uav_id = ins->src[1].reg.idx[0].offset; -+ uav_idx = ins->src[1].reg.idx[1].offset; -+ if ((d = shader_glsl_get_descriptor_by_id(gen, VKD3D_SHADER_DESCRIPTOR_TYPE_UAV, uav_id))) -+ { -+ resource_type = d->resource_type; -+ uav_space = d->register_space; -+ component_type = vkd3d_component_type_from_resource_data_type(d->resource_data_type); -+ } -+ else -+ { -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, -+ "Internal compiler error: Undeclared UAV descriptor %u.", uav_id); -+ uav_space = 0; -+ resource_type = VKD3D_SHADER_RESOURCE_TEXTURE_2D; -+ component_type = VKD3D_SHADER_COMPONENT_FLOAT; -+ } -+ -+ if ((resource_type_info = shader_glsl_get_resource_type_info(resource_type))) -+ { -+ coord_mask = vkd3d_write_mask_from_component_count(resource_type_info->coord_size); -+ } -+ else -+ { -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, -+ "Internal compiler error: Unhandled UAV type %#x.", resource_type); -+ coord_mask = vkd3d_write_mask_from_component_count(2); -+ } -+ -+ glsl_dst_init(&dst, gen, ins, &ins->dst[0]); -+ glsl_src_init(&coord, gen, &ins->src[0], coord_mask); -+ load = vkd3d_string_buffer_get(&gen->string_buffers); -+ -+ vkd3d_string_buffer_printf(load, "imageLoad("); -+ shader_glsl_print_image_name(load, gen, uav_idx, uav_space); -+ vkd3d_string_buffer_printf(load, ", %s)", coord.str->buffer); -+ shader_glsl_print_swizzle(load, ins->src[1].swizzle, ins->dst[0].write_mask); -+ -+ shader_glsl_print_assignment_ext(gen, &dst, -+ vkd3d_data_type_from_component_type(component_type), "%s", load->buffer); -+ -+ vkd3d_string_buffer_release(&gen->string_buffers, load); -+ glsl_src_cleanup(&coord, &gen->string_buffers); -+ glsl_dst_cleanup(&dst, &gen->string_buffers); -+} -+ -+static void shader_glsl_store_uav_typed(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_instruction *ins) -+{ -+ const struct glsl_resource_type_info *resource_type_info; -+ enum vkd3d_shader_component_type component_type; -+ const struct vkd3d_shader_descriptor_info1 *d; -+ enum vkd3d_shader_resource_type resource_type; -+ unsigned int uav_id, uav_idx, uav_space; -+ struct vkd3d_string_buffer *image_data; -+ struct glsl_src image_coord; -+ uint32_t coord_mask; -+ -+ if (ins->dst[0].reg.idx[0].rel_addr || ins->dst[0].reg.idx[1].rel_addr) -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_UNSUPPORTED, -+ "Descriptor indexing is not supported."); -+ -+ uav_id = ins->dst[0].reg.idx[0].offset; -+ uav_idx = ins->dst[0].reg.idx[1].offset; -+ if ((d = shader_glsl_get_descriptor_by_id(gen, VKD3D_SHADER_DESCRIPTOR_TYPE_UAV, uav_id))) -+ { -+ resource_type = d->resource_type; -+ uav_space = d->register_space; -+ component_type = vkd3d_component_type_from_resource_data_type(d->resource_data_type); -+ } -+ else -+ { -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, -+ "Internal compiler error: Undeclared UAV descriptor %u.", uav_id); -+ uav_space = 0; -+ resource_type = VKD3D_SHADER_RESOURCE_TEXTURE_2D; -+ component_type = VKD3D_SHADER_COMPONENT_FLOAT; -+ } -+ -+ if ((resource_type_info = shader_glsl_get_resource_type_info(resource_type))) -+ { -+ coord_mask = vkd3d_write_mask_from_component_count(resource_type_info->coord_size); -+ } -+ else -+ { -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, -+ "Internal compiler error: Unhandled UAV type %#x.", resource_type); -+ coord_mask = vkd3d_write_mask_from_component_count(2); -+ } -+ -+ glsl_src_init(&image_coord, gen, &ins->src[0], coord_mask); -+ image_data = vkd3d_string_buffer_get(&gen->string_buffers); -+ -+ if (ins->src[1].reg.dimension == VSIR_DIMENSION_SCALAR) -+ { -+ switch (component_type) -+ { -+ case VKD3D_SHADER_COMPONENT_UINT: -+ vkd3d_string_buffer_printf(image_data, "uvec4("); -+ break; -+ case VKD3D_SHADER_COMPONENT_INT: -+ vkd3d_string_buffer_printf(image_data, "ivec4("); -+ break; -+ default: -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, -+ "Internal compiler error: Unhandled component type %#x.", component_type); -+ /* fall through */ -+ case VKD3D_SHADER_COMPONENT_FLOAT: -+ vkd3d_string_buffer_printf(image_data, "vec4("); -+ break; -+ } -+ } -+ shader_glsl_print_src(image_data, gen, &ins->src[1], VKD3DSP_WRITEMASK_ALL, -+ vkd3d_data_type_from_component_type(component_type)); -+ if (ins->src[1].reg.dimension == VSIR_DIMENSION_SCALAR) -+ vkd3d_string_buffer_printf(image_data, ", 0, 0, 0)"); -+ -+ shader_glsl_print_indent(gen->buffer, gen->indent); -+ vkd3d_string_buffer_printf(gen->buffer, "imageStore("); -+ shader_glsl_print_image_name(gen->buffer, gen, uav_idx, uav_space); -+ vkd3d_string_buffer_printf(gen->buffer, ", %s, %s);\n", image_coord.str->buffer, image_data->buffer); -+ -+ vkd3d_string_buffer_release(&gen->string_buffers, image_data); -+ glsl_src_cleanup(&image_coord, &gen->string_buffers); -+} -+ -+static void shader_glsl_unary_op(struct vkd3d_glsl_generator *gen, -+ const struct vkd3d_shader_instruction *ins, const char *op) -+{ -+ struct glsl_src src; -+ struct glsl_dst dst; -+ uint32_t mask; -+ -+ mask = glsl_dst_init(&dst, gen, ins, &ins->dst[0]); -+ glsl_src_init(&src, gen, &ins->src[0], mask); -+ -+ shader_glsl_print_assignment(gen, &dst, "%s%s", op, src.str->buffer); -+ -+ glsl_src_cleanup(&src, &gen->string_buffers); -+ glsl_dst_cleanup(&dst, &gen->string_buffers); -+} -+ -+static void shader_glsl_mov(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_instruction *ins) -+{ -+ struct glsl_src src; -+ struct glsl_dst dst; -+ uint32_t mask; -+ -+ mask = glsl_dst_init(&dst, gen, ins, &ins->dst[0]); -+ glsl_src_init(&src, gen, &ins->src[0], mask); -+ -+ shader_glsl_print_assignment(gen, &dst, "%s", src.str->buffer); -+ -+ glsl_src_cleanup(&src, &gen->string_buffers); -+ glsl_dst_cleanup(&dst, &gen->string_buffers); -+} -+ -+static void shader_glsl_movc(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_instruction *ins) -+{ -+ unsigned int component_count; -+ struct glsl_src src[3]; -+ struct glsl_dst dst; -+ uint32_t mask; -+ -+ mask = glsl_dst_init(&dst, gen, ins, &ins->dst[0]); -+ glsl_src_init(&src[0], gen, &ins->src[0], mask); -+ glsl_src_init(&src[1], gen, &ins->src[1], mask); -+ glsl_src_init(&src[2], gen, &ins->src[2], mask); -+ -+ if ((component_count = vsir_write_mask_component_count(mask)) > 1) -+ shader_glsl_print_assignment(gen, &dst, "mix(%s, %s, bvec%u(%s))", -+ src[2].str->buffer, src[1].str->buffer, component_count, src[0].str->buffer); -+ else -+ shader_glsl_print_assignment(gen, &dst, "mix(%s, %s, bool(%s))", -+ src[2].str->buffer, src[1].str->buffer, src[0].str->buffer); -+ -+ glsl_src_cleanup(&src[2], &gen->string_buffers); -+ glsl_src_cleanup(&src[1], &gen->string_buffers); -+ glsl_src_cleanup(&src[0], &gen->string_buffers); -+ glsl_dst_cleanup(&dst, &gen->string_buffers); -+} -+ -+static void shader_glsl_mul_extended(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_instruction *ins) -+{ -+ struct glsl_src src[2]; -+ struct glsl_dst dst; -+ uint32_t mask; -+ -+ if (ins->dst[0].reg.type != VKD3DSPR_NULL) -+ { -+ /* FIXME: imulExtended()/umulExtended() from ARB_gpu_shader5/GLSL 4.00+. */ -+ mask = glsl_dst_init(&dst, gen, ins, &ins->dst[0]); -+ shader_glsl_print_assignment(gen, &dst, "<unhandled 64-bit multiplication>"); -+ glsl_dst_cleanup(&dst, &gen->string_buffers); -+ -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, -+ "Internal compiler error: Unhandled 64-bit integer multiplication."); -+ } -+ -+ if (ins->dst[1].reg.type != VKD3DSPR_NULL) -+ { -+ mask = glsl_dst_init(&dst, gen, ins, &ins->dst[1]); -+ glsl_src_init(&src[0], gen, &ins->src[0], mask); -+ glsl_src_init(&src[1], gen, &ins->src[1], mask); -+ -+ shader_glsl_print_assignment(gen, &dst, "%s * %s", src[0].str->buffer, src[1].str->buffer); -+ -+ glsl_src_cleanup(&src[1], &gen->string_buffers); -+ glsl_src_cleanup(&src[0], &gen->string_buffers); -+ glsl_dst_cleanup(&dst, &gen->string_buffers); -+ } -+} -+ -+static void shader_glsl_print_sysval_name(struct vkd3d_string_buffer *buffer, struct vkd3d_glsl_generator *gen, -+ enum vkd3d_shader_sysval_semantic sysval, unsigned int idx) -+{ -+ const struct vkd3d_shader_version *version = &gen->program->shader_version; -+ -+ switch (sysval) -+ { -+ case VKD3D_SHADER_SV_POSITION: -+ if (version->type == VKD3D_SHADER_TYPE_COMPUTE) -+ { -+ vkd3d_string_buffer_printf(buffer, "<unhandled sysval %#x>", sysval); -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, -+ "Internal compiler error: Unhandled SV_POSITION in shader type #%x.", version->type); -+ break; -+ } -+ if (idx) -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, -+ "Internal compiler error: Unhandled SV_POSITION index %u.", idx); -+ if (version->type == VKD3D_SHADER_TYPE_PIXEL) -+ vkd3d_string_buffer_printf(buffer, "gl_FragCoord"); -+ else -+ vkd3d_string_buffer_printf(buffer, "gl_Position"); -+ break; -+ -+ case VKD3D_SHADER_SV_VERTEX_ID: -+ if (version->type != VKD3D_SHADER_TYPE_VERTEX) -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, -+ "Internal compiler error: Unhandled SV_VERTEX_ID in shader type #%x.", version->type); -+ vkd3d_string_buffer_printf(buffer, "intBitsToFloat(ivec4(gl_VertexID, 0, 0, 0))"); -+ break; -+ -+ case VKD3D_SHADER_SV_IS_FRONT_FACE: -+ if (version->type != VKD3D_SHADER_TYPE_PIXEL) -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, -+ "Internal compiler error: Unhandled SV_IS_FRONT_FACE in shader type #%x.", version->type); -+ vkd3d_string_buffer_printf(buffer, -+ "uintBitsToFloat(uvec4(gl_FrontFacing ? 0xffffffffu : 0u, 0u, 0u, 0u))"); -+ break; -+ -+ case VKD3D_SHADER_SV_SAMPLE_INDEX: -+ if (version->type != VKD3D_SHADER_TYPE_PIXEL) -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, -+ "Internal compiler error: Unhandled SV_SAMPLE_INDEX in shader type #%x.", version->type); -+ vkd3d_string_buffer_printf(buffer, "intBitsToFloat(ivec4(gl_SampleID, 0, 0, 0))"); -+ break; -+ -+ case VKD3D_SHADER_SV_TARGET: -+ if (version->type != VKD3D_SHADER_TYPE_PIXEL) -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, -+ "Internal compiler error: Unhandled SV_TARGET in shader type #%x.", version->type); -+ vkd3d_string_buffer_printf(buffer, "shader_out_%u", idx); -+ break; -+ -+ default: -+ vkd3d_string_buffer_printf(buffer, "<unhandled sysval %#x>", sysval); -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, -+ "Internal compiler error: Unhandled system value %#x.", sysval); -+ break; -+ } -+} -+ -+static void shader_glsl_shader_prologue(struct vkd3d_glsl_generator *gen) -+{ -+ const struct shader_signature *signature = &gen->program->input_signature; -+ struct vkd3d_string_buffer *buffer = gen->buffer; -+ const struct signature_element *e; -+ unsigned int i; -+ -+ for (i = 0; i < signature->element_count; ++i) -+ { -+ e = &signature->elements[i]; -+ -+ if (e->target_location == SIGNATURE_TARGET_LOCATION_UNUSED) -+ continue; -+ -+ shader_glsl_print_indent(buffer, gen->indent); -+ vkd3d_string_buffer_printf(buffer, "%s_in[%u]", gen->prefix, e->register_index); -+ shader_glsl_print_write_mask(buffer, e->mask); -+ if (e->sysval_semantic == VKD3D_SHADER_SV_NONE) -+ { -+ if (gen->interstage_input) -+ { -+ vkd3d_string_buffer_printf(buffer, " = shader_in.reg_%u", e->target_location); -+ if (e->target_location >= gen->limits.input_count) -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, -+ "Internal compiler error: Input element %u specifies target location %u, " -+ "but only %u inputs are supported.", -+ i, e->target_location, gen->limits.input_count); -+ } -+ else -+ { -+ switch (e->component_type) -+ { -+ case VKD3D_SHADER_COMPONENT_UINT: -+ vkd3d_string_buffer_printf(buffer, " = uintBitsToFloat(shader_in_%u)", i); -+ break; -+ case VKD3D_SHADER_COMPONENT_INT: -+ vkd3d_string_buffer_printf(buffer, " = intBitsToFloat(shader_in_%u)", i); -+ break; -+ default: -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, -+ "Internal compiler error: Unhandled input component type %#x.", e->component_type); -+ /* fall through */ -+ case VKD3D_SHADER_COMPONENT_FLOAT: -+ vkd3d_string_buffer_printf(buffer, " = shader_in_%u", i); -+ break; -+ } -+ } -+ } -+ else -+ { -+ vkd3d_string_buffer_printf(buffer, " = "); -+ shader_glsl_print_sysval_name(buffer, gen, e->sysval_semantic, e->semantic_index); -+ } -+ shader_glsl_print_write_mask(buffer, e->mask); -+ vkd3d_string_buffer_printf(buffer, ";\n"); -+ } -+} -+ -+static void shader_glsl_shader_epilogue(struct vkd3d_glsl_generator *gen) -+{ -+ const struct shader_signature *signature = &gen->program->output_signature; -+ struct vkd3d_string_buffer *buffer = gen->buffer; -+ enum vkd3d_shader_component_type type; -+ const struct signature_element *e; -+ unsigned int i; -+ -+ for (i = 0; i < signature->element_count; ++i) -+ { -+ e = &signature->elements[i]; -+ -+ if (e->target_location == SIGNATURE_TARGET_LOCATION_UNUSED) -+ continue; -+ -+ type = e->component_type; -+ shader_glsl_print_indent(buffer, gen->indent); -+ if (e->sysval_semantic == VKD3D_SHADER_SV_NONE) -+ { -+ if (gen->interstage_output) -+ { -+ type = VKD3D_SHADER_COMPONENT_FLOAT; -+ vkd3d_string_buffer_printf(buffer, "shader_out.reg_%u", e->target_location); -+ if (e->target_location >= gen->limits.output_count) -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, -+ "Internal compiler error: Output element %u specifies target location %u, " -+ "but only %u outputs are supported.", -+ i, e->target_location, gen->limits.output_count); -+ } -+ else -+ { -+ vkd3d_string_buffer_printf(buffer, "<unhandled output %u>", e->target_location); -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, -+ "Internal compiler error: Unhandled output."); -+ } -+ } -+ else -+ { -+ shader_glsl_print_sysval_name(buffer, gen, e->sysval_semantic, e->semantic_index); -+ } -+ shader_glsl_print_write_mask(buffer, e->mask); -+ switch (type) -+ { -+ case VKD3D_SHADER_COMPONENT_UINT: -+ vkd3d_string_buffer_printf(buffer, " = floatBitsToUint(%s_out[%u])", gen->prefix, e->register_index); -+ break; -+ case VKD3D_SHADER_COMPONENT_INT: -+ vkd3d_string_buffer_printf(buffer, " = floatBitsToInt(%s_out[%u])", gen->prefix, e->register_index); -+ break; -+ default: -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, -+ "Internal compiler error: Unhandled output component type %#x.", e->component_type); -+ /* fall through */ -+ case VKD3D_SHADER_COMPONENT_FLOAT: -+ vkd3d_string_buffer_printf(buffer, " = %s_out[%u]", gen->prefix, e->register_index); -+ break; -+ } -+ shader_glsl_print_write_mask(buffer, e->mask); -+ vkd3d_string_buffer_printf(buffer, ";\n"); -+ } -+} -+ -+static void shader_glsl_ret(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_instruction *ins) -+{ -+ const struct vkd3d_shader_version *version = &gen->program->shader_version; -+ -+ if (version->major >= 4) -+ { -+ shader_glsl_shader_epilogue(gen); -+ shader_glsl_print_indent(gen->buffer, gen->indent); -+ vkd3d_string_buffer_printf(gen->buffer, "return;\n"); -+ } -+} -+ -+static void shader_glsl_dcl_indexable_temp(struct vkd3d_glsl_generator *gen, -+ const struct vkd3d_shader_instruction *ins) -+{ -+ shader_glsl_print_indent(gen->buffer, gen->indent); -+ vkd3d_string_buffer_printf(gen->buffer, "vec4 x%u[%u];\n", -+ ins->declaration.indexable_temp.register_idx, -+ ins->declaration.indexable_temp.register_size); -+} -+ -+static void vkd3d_glsl_handle_instruction(struct vkd3d_glsl_generator *gen, -+ const struct vkd3d_shader_instruction *ins) -+{ -+ gen->location = ins->location; -+ -+ switch (ins->opcode) -+ { -+ case VKD3DSIH_ADD: -+ case VKD3DSIH_IADD: -+ shader_glsl_binop(gen, ins, "+"); -+ break; -+ case VKD3DSIH_AND: -+ shader_glsl_binop(gen, ins, "&"); -+ break; -+ case VKD3DSIH_BREAK: -+ shader_glsl_break(gen); -+ break; -+ case VKD3DSIH_CASE: -+ shader_glsl_case(gen, ins); -+ break; -+ case VKD3DSIH_CONTINUE: -+ shader_glsl_continue(gen); -+ break; -+ case VKD3DSIH_DCL_INDEXABLE_TEMP: -+ shader_glsl_dcl_indexable_temp(gen, ins); -+ break; -+ case VKD3DSIH_DCL_INPUT: -+ case VKD3DSIH_DCL_INPUT_PS: -+ case VKD3DSIH_DCL_INPUT_PS_SGV: -+ case VKD3DSIH_DCL_INPUT_PS_SIV: -+ case VKD3DSIH_DCL_INPUT_SGV: -+ case VKD3DSIH_DCL_OUTPUT: -+ case VKD3DSIH_DCL_OUTPUT_SIV: -+ case VKD3DSIH_NOP: -+ break; -+ case VKD3DSIH_DEFAULT: -+ shader_glsl_default(gen); -+ break; -+ case VKD3DSIH_DIV: -+ shader_glsl_binop(gen, ins, "/"); -+ break; -+ case VKD3DSIH_DP2: -+ shader_glsl_dot(gen, ins, vkd3d_write_mask_from_component_count(2)); -+ break; -+ case VKD3DSIH_DP3: -+ shader_glsl_dot(gen, ins, vkd3d_write_mask_from_component_count(3)); -+ break; -+ case VKD3DSIH_DP4: -+ shader_glsl_dot(gen, ins, VKD3DSP_WRITEMASK_ALL); -+ break; -+ case VKD3DSIH_ELSE: -+ shader_glsl_else(gen, ins); -+ break; -+ case VKD3DSIH_ENDIF: -+ case VKD3DSIH_ENDLOOP: -+ case VKD3DSIH_ENDSWITCH: -+ shader_glsl_end_block(gen); -+ break; -+ case VKD3DSIH_EQO: -+ case VKD3DSIH_IEQ: -+ shader_glsl_relop(gen, ins, "==", "equal"); -+ break; -+ case VKD3DSIH_EXP: -+ shader_glsl_intrinsic(gen, ins, "exp2"); -+ break; -+ case VKD3DSIH_FRC: -+ shader_glsl_intrinsic(gen, ins, "fract"); -+ break; -+ case VKD3DSIH_FTOI: -+ shader_glsl_cast(gen, ins, "int", "ivec"); -+ break; -+ case VKD3DSIH_FTOU: -+ shader_glsl_cast(gen, ins, "uint", "uvec"); -+ break; -+ case VKD3DSIH_GATHER4: -+ case VKD3DSIH_GATHER4_PO: -+ case VKD3DSIH_SAMPLE: -+ case VKD3DSIH_SAMPLE_B: -+ case VKD3DSIH_SAMPLE_C: -+ case VKD3DSIH_SAMPLE_C_LZ: -+ case VKD3DSIH_SAMPLE_GRAD: -+ case VKD3DSIH_SAMPLE_LOD: -+ shader_glsl_sample(gen, ins); -+ break; -+ case VKD3DSIH_GEO: -+ case VKD3DSIH_IGE: -+ shader_glsl_relop(gen, ins, ">=", "greaterThanEqual"); -+ break; -+ case VKD3DSIH_IF: -+ shader_glsl_if(gen, ins); -+ break; -+ case VKD3DSIH_MAD: -+ shader_glsl_intrinsic(gen, ins, "fma"); -+ break; -+ case VKD3DSIH_ILT: -+ case VKD3DSIH_LTO: -+ case VKD3DSIH_ULT: -+ shader_glsl_relop(gen, ins, "<", "lessThan"); -+ break; -+ case VKD3DSIH_IMAX: -+ case VKD3DSIH_MAX: -+ case VKD3DSIH_UMAX: -+ shader_glsl_intrinsic(gen, ins, "max"); -+ break; -+ case VKD3DSIH_MIN: -+ case VKD3DSIH_UMIN: -+ shader_glsl_intrinsic(gen, ins, "min"); -+ break; -+ case VKD3DSIH_IMUL: -+ shader_glsl_mul_extended(gen, ins); -+ break; -+ case VKD3DSIH_INE: -+ case VKD3DSIH_NEU: -+ shader_glsl_relop(gen, ins, "!=", "notEqual"); -+ break; -+ case VKD3DSIH_INEG: -+ shader_glsl_unary_op(gen, ins, "-"); -+ break; -+ case VKD3DSIH_ISHL: -+ shader_glsl_binop(gen, ins, "<<"); -+ break; -+ case VKD3DSIH_ISHR: -+ case VKD3DSIH_USHR: -+ shader_glsl_binop(gen, ins, ">>"); -+ break; -+ case VKD3DSIH_ITOF: -+ case VKD3DSIH_UTOF: -+ shader_glsl_cast(gen, ins, "float", "vec"); -+ break; -+ case VKD3DSIH_LD: -+ case VKD3DSIH_LD2DMS: -+ shader_glsl_ld(gen, ins); -+ break; -+ case VKD3DSIH_LD_UAV_TYPED: -+ shader_glsl_load_uav_typed(gen, ins); -+ break; -+ case VKD3DSIH_LOG: -+ shader_glsl_intrinsic(gen, ins, "log2"); -+ break; -+ case VKD3DSIH_LOOP: -+ shader_glsl_loop(gen); -+ break; -+ case VKD3DSIH_MOV: -+ shader_glsl_mov(gen, ins); -+ break; -+ case VKD3DSIH_MOVC: -+ shader_glsl_movc(gen, ins); -+ break; -+ case VKD3DSIH_MUL: -+ shader_glsl_binop(gen, ins, "*"); -+ break; -+ case VKD3DSIH_NOT: -+ shader_glsl_unary_op(gen, ins, "~"); -+ break; -+ case VKD3DSIH_OR: -+ shader_glsl_binop(gen, ins, "|"); -+ break; -+ case VKD3DSIH_RET: -+ shader_glsl_ret(gen, ins); -+ break; -+ case VKD3DSIH_ROUND_NE: -+ shader_glsl_intrinsic(gen, ins, "roundEven"); -+ break; -+ case VKD3DSIH_ROUND_NI: -+ shader_glsl_intrinsic(gen, ins, "floor"); -+ break; -+ case VKD3DSIH_ROUND_PI: -+ shader_glsl_intrinsic(gen, ins, "ceil"); -+ break; -+ case VKD3DSIH_ROUND_Z: -+ shader_glsl_intrinsic(gen, ins, "trunc"); -+ break; -+ case VKD3DSIH_RSQ: -+ shader_glsl_intrinsic(gen, ins, "inversesqrt"); -+ break; -+ case VKD3DSIH_SQRT: -+ shader_glsl_intrinsic(gen, ins, "sqrt"); -+ break; -+ case VKD3DSIH_STORE_UAV_TYPED: -+ shader_glsl_store_uav_typed(gen, ins); -+ break; -+ case VKD3DSIH_SWITCH: -+ shader_glsl_switch(gen, ins); -+ break; -+ default: -+ shader_glsl_unhandled(gen, ins); -+ break; -+ } -+} -+ -+static bool shader_glsl_check_shader_visibility(const struct vkd3d_glsl_generator *gen, -+ enum vkd3d_shader_visibility visibility) -+{ -+ enum vkd3d_shader_type t = gen->program->shader_version.type; -+ -+ switch (visibility) -+ { -+ case VKD3D_SHADER_VISIBILITY_ALL: -+ return true; -+ case VKD3D_SHADER_VISIBILITY_VERTEX: -+ return t == VKD3D_SHADER_TYPE_VERTEX; -+ case VKD3D_SHADER_VISIBILITY_HULL: -+ return t == VKD3D_SHADER_TYPE_HULL; -+ case VKD3D_SHADER_VISIBILITY_DOMAIN: -+ return t == VKD3D_SHADER_TYPE_DOMAIN; -+ case VKD3D_SHADER_VISIBILITY_GEOMETRY: -+ return t == VKD3D_SHADER_TYPE_GEOMETRY; -+ case VKD3D_SHADER_VISIBILITY_PIXEL: -+ return t == VKD3D_SHADER_TYPE_PIXEL; -+ case VKD3D_SHADER_VISIBILITY_COMPUTE: -+ return t == VKD3D_SHADER_TYPE_COMPUTE; -+ default: -+ WARN("Invalid shader visibility %#x.\n", visibility); -+ return false; -+ } -+} -+ -+static bool shader_glsl_get_uav_binding(const struct vkd3d_glsl_generator *gen, unsigned int register_space, -+ unsigned int register_idx, enum vkd3d_shader_resource_type resource_type, unsigned int *binding_idx) -+{ -+ const struct vkd3d_shader_interface_info *interface_info = gen->interface_info; -+ const struct vkd3d_shader_resource_binding *binding; -+ enum vkd3d_shader_binding_flag resource_type_flag; -+ unsigned int i; -+ -+ if (!interface_info) -+ return false; -+ -+ resource_type_flag = resource_type == VKD3D_SHADER_RESOURCE_BUFFER -+ ? VKD3D_SHADER_BINDING_FLAG_BUFFER : VKD3D_SHADER_BINDING_FLAG_IMAGE; -+ -+ for (i = 0; i < interface_info->binding_count; ++i) -+ { -+ binding = &interface_info->bindings[i]; -+ -+ if (binding->type != VKD3D_SHADER_DESCRIPTOR_TYPE_UAV) -+ continue; -+ if (binding->register_space != register_space) -+ continue; -+ if (binding->register_index != register_idx) -+ continue; -+ if (!shader_glsl_check_shader_visibility(gen, binding->shader_visibility)) -+ continue; -+ if (!(binding->flags & resource_type_flag)) -+ continue; -+ *binding_idx = i; -+ return true; -+ } -+ -+ return false; -+} -+ -+static void shader_glsl_generate_uav_declaration(struct vkd3d_glsl_generator *gen, -+ const struct vkd3d_shader_descriptor_info1 *uav) -+{ -+ const struct glsl_resource_type_info *resource_type_info; -+ const char *image_type_prefix, *image_type, *read_format; -+ const struct vkd3d_shader_descriptor_binding *binding; -+ const struct vkd3d_shader_descriptor_offset *offset; -+ struct vkd3d_string_buffer *buffer = gen->buffer; -+ enum vkd3d_shader_component_type component_type; -+ unsigned int binding_idx; -+ -+ if (uav->count != 1) -+ { -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_UNSUPPORTED, -+ "UAV %u has unsupported descriptor array size %u.", uav->register_id, uav->count); -+ return; -+ } -+ -+ if (!shader_glsl_get_uav_binding(gen, uav->register_space, -+ uav->register_index, uav->resource_type, &binding_idx)) -+ { -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_BINDING_NOT_FOUND, -+ "No descriptor binding specified for UAV %u.", uav->register_id); -+ return; -+ } -+ -+ binding = &gen->interface_info->bindings[binding_idx].binding; -+ -+ if (binding->set != 0) -+ { -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_BINDING_NOT_FOUND, -+ "Unsupported binding set %u specified for UAV %u.", binding->set, uav->register_id); -+ return; -+ } -+ -+ if (binding->count != 1) -+ { -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_BINDING_NOT_FOUND, -+ "Unsupported binding count %u specified for UAV %u.", binding->count, uav->register_id); -+ return; -+ } -+ -+ if (gen->offset_info && gen->offset_info->binding_offsets) -+ { -+ offset = &gen->offset_info->binding_offsets[binding_idx]; -+ if (offset->static_offset || offset->dynamic_offset_index != ~0u) -+ { -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, -+ "Internal compiler error: Unhandled descriptor offset specified for UAV %u.", -+ uav->register_id); -+ return; -+ } -+ } -+ -+ if ((resource_type_info = shader_glsl_get_resource_type_info(uav->resource_type))) -+ { -+ image_type = resource_type_info->type_suffix; -+ } -+ else -+ { -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, -+ "Internal compiler error: Unhandled UAV type %#x.", uav->resource_type); -+ image_type = "<unhandled image type>"; -+ } -+ -+ switch ((component_type = vkd3d_component_type_from_resource_data_type(uav->resource_data_type))) -+ { -+ case VKD3D_SHADER_COMPONENT_UINT: -+ image_type_prefix = "u"; -+ read_format = "r32ui"; -+ break; -+ case VKD3D_SHADER_COMPONENT_INT: -+ image_type_prefix = "i"; -+ read_format = "r32i"; -+ break; -+ default: -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, -+ "Internal compiler error: Unhandled component type %#x for UAV %u.", -+ component_type, uav->register_id); -+ /* fall through */ -+ case VKD3D_SHADER_COMPONENT_FLOAT: -+ image_type_prefix = ""; -+ read_format = "r32f"; -+ break; -+ } -+ -+ vkd3d_string_buffer_printf(buffer, "layout(binding = %u", binding->binding); -+ if (uav->flags & VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_READ) -+ vkd3d_string_buffer_printf(buffer, ", %s) ", read_format); -+ else -+ vkd3d_string_buffer_printf(buffer, ") writeonly "); -+ vkd3d_string_buffer_printf(buffer, "uniform %simage%s ", image_type_prefix, image_type); -+ shader_glsl_print_image_name(buffer, gen, uav->register_index, uav->register_space); -+ vkd3d_string_buffer_printf(buffer, ";\n"); -+} -+ -+static bool shader_glsl_get_cbv_binding(const struct vkd3d_glsl_generator *gen, -+ unsigned int register_space, unsigned int register_idx, unsigned int *binding_idx) -+{ -+ const struct vkd3d_shader_interface_info *interface_info = gen->interface_info; -+ const struct vkd3d_shader_resource_binding *binding; -+ unsigned int i; -+ -+ if (!interface_info) -+ return false; -+ -+ for (i = 0; i < interface_info->binding_count; ++i) -+ { -+ binding = &interface_info->bindings[i]; -+ -+ if (binding->type != VKD3D_SHADER_DESCRIPTOR_TYPE_CBV) -+ continue; -+ if (binding->register_space != register_space) -+ continue; -+ if (binding->register_index != register_idx) -+ continue; -+ if (!shader_glsl_check_shader_visibility(gen, binding->shader_visibility)) -+ continue; -+ if (!(binding->flags & VKD3D_SHADER_BINDING_FLAG_BUFFER)) -+ continue; -+ *binding_idx = i; -+ return true; -+ } -+ -+ return false; -+} -+ -+static void shader_glsl_generate_cbv_declaration(struct vkd3d_glsl_generator *gen, -+ const struct vkd3d_shader_descriptor_info1 *cbv) -+{ -+ const struct vkd3d_shader_descriptor_binding *binding; -+ const struct vkd3d_shader_descriptor_offset *offset; -+ struct vkd3d_string_buffer *buffer = gen->buffer; -+ const char *prefix = gen->prefix; -+ unsigned int binding_idx; -+ size_t size; -+ -+ if (cbv->count != 1) -+ { -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_UNSUPPORTED, -+ "Constant buffer %u has unsupported descriptor array size %u.", cbv->register_id, cbv->count); -+ return; -+ } -+ -+ if (!shader_glsl_get_cbv_binding(gen, cbv->register_space, cbv->register_index, &binding_idx)) -+ { -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_BINDING_NOT_FOUND, -+ "No descriptor binding specified for constant buffer %u.", cbv->register_id); -+ return; -+ } -+ -+ binding = &gen->interface_info->bindings[binding_idx].binding; -+ -+ if (binding->set != 0) -+ { -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_BINDING_NOT_FOUND, -+ "Unsupported binding set %u specified for constant buffer %u.", binding->set, cbv->register_id); -+ return; -+ } -+ -+ if (binding->count != 1) -+ { -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_BINDING_NOT_FOUND, -+ "Unsupported binding count %u specified for constant buffer %u.", binding->count, cbv->register_id); -+ return; -+ } -+ -+ if (gen->offset_info && gen->offset_info->binding_offsets) -+ { -+ offset = &gen->offset_info->binding_offsets[binding_idx]; -+ if (offset->static_offset || offset->dynamic_offset_index != ~0u) -+ { -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, -+ "Internal compiler error: Unhandled descriptor offset specified for constant buffer %u.", -+ cbv->register_id); -+ return; -+ } -+ } -+ -+ size = align(cbv->buffer_size, VKD3D_VEC4_SIZE * sizeof(uint32_t)); -+ size /= VKD3D_VEC4_SIZE * sizeof(uint32_t); -+ -+ vkd3d_string_buffer_printf(buffer, -+ "layout(std140, binding = %u) uniform block_%s_cb_%u { vec4 %s_cb_%u[%zu]; };\n", -+ binding->binding, prefix, cbv->register_id, prefix, cbv->register_id, size); -+} -+ -+static bool shader_glsl_get_combined_sampler_binding(const struct vkd3d_glsl_generator *gen, -+ const struct vkd3d_shader_combined_resource_sampler_info *crs, -+ enum vkd3d_shader_resource_type resource_type, unsigned int *binding_idx) -+{ -+ const struct vkd3d_shader_interface_info *interface_info = gen->interface_info; -+ const struct vkd3d_shader_combined_resource_sampler *s; -+ enum vkd3d_shader_binding_flag resource_type_flag; -+ unsigned int i; -+ -+ if (!interface_info) -+ return false; -+ -+ resource_type_flag = resource_type == VKD3D_SHADER_RESOURCE_BUFFER -+ ? VKD3D_SHADER_BINDING_FLAG_BUFFER : VKD3D_SHADER_BINDING_FLAG_IMAGE; -+ -+ for (i = 0; i < interface_info->combined_sampler_count; ++i) -+ { -+ s = &interface_info->combined_samplers[i]; -+ -+ if (s->resource_space != crs->resource_space) -+ continue; -+ if (s->resource_index != crs->resource_index) -+ continue; -+ if (crs->sampler_index != VKD3D_SHADER_DUMMY_SAMPLER_INDEX) -+ { -+ if (s->sampler_space != crs->sampler_space) -+ continue; -+ if (s->sampler_index != crs->sampler_index) -+ continue; -+ } -+ if (!shader_glsl_check_shader_visibility(gen, s->shader_visibility)) -+ continue; -+ if (!(s->flags & resource_type_flag)) -+ continue; -+ *binding_idx = i; -+ return true; -+ } -+ -+ return false; -+} -+ -+static void shader_glsl_generate_sampler_declaration(struct vkd3d_glsl_generator *gen, -+ const struct vkd3d_shader_combined_resource_sampler_info *crs) -+{ -+ const struct vkd3d_shader_descriptor_info1 *sampler, *srv; -+ const struct glsl_resource_type_info *resource_type_info; -+ const struct vkd3d_shader_descriptor_binding *binding; -+ struct vkd3d_string_buffer *buffer = gen->buffer; -+ enum vkd3d_shader_component_type component_type; -+ const char *sampler_type, *sampler_type_prefix; -+ enum vkd3d_shader_resource_type resource_type; -+ unsigned int binding_idx; -+ bool shadow = false; -+ -+ if (crs->sampler_index != VKD3D_SHADER_DUMMY_SAMPLER_INDEX) -+ { -+ if (!(sampler = shader_glsl_get_descriptor(gen, VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER, -+ crs->sampler_index, crs->sampler_space))) -+ { -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, -+ "Internal compiler error: No descriptor found for sampler %u, space %u.", -+ crs->sampler_index, crs->sampler_space); -+ return; -+ } -+ shadow = sampler->flags & VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_SAMPLER_COMPARISON_MODE; -+ } -+ -+ if (!(srv = shader_glsl_get_descriptor(gen, VKD3D_SHADER_DESCRIPTOR_TYPE_SRV, -+ crs->resource_index, crs->resource_space))) -+ { -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, -+ "Internal compiler error: No descriptor found for resource %u, space %u.", -+ crs->resource_index, crs->resource_space); -+ return; -+ } -+ -+ resource_type = srv->resource_type; -+ if (srv->sample_count == 1) -+ { -+ /* The OpenGL API distinguishes between multi-sample textures with -+ * sample count 1 and single-sample textures. Direct3D and Vulkan -+ * don't make this distinction at the API level, but Direct3D shaders -+ * are capable of expressing both. We therefore map such multi-sample -+ * textures to their single-sample equivalents here. */ -+ if (resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_2DMS) -+ resource_type = VKD3D_SHADER_RESOURCE_TEXTURE_2D; -+ else if (resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY) -+ resource_type = VKD3D_SHADER_RESOURCE_TEXTURE_2DARRAY; -+ } -+ -+ if ((resource_type_info = shader_glsl_get_resource_type_info(resource_type))) -+ { -+ sampler_type = resource_type_info->type_suffix; -+ if (shadow && !resource_type_info->shadow) -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_UNSUPPORTED, -+ "Comparison samplers are not supported with resource type %#x.", resource_type); -+ } -+ else -+ { -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, -+ "Internal compiler error: Unhandled resource type %#x for combined resource/sampler " -+ "for resource %u, space %u and sampler %u, space %u.", resource_type, -+ crs->resource_index, crs->resource_space, crs->sampler_index, crs->sampler_space); -+ sampler_type = "<unhandled sampler type>"; -+ } -+ -+ switch ((component_type = vkd3d_component_type_from_resource_data_type(srv->resource_data_type))) -+ { -+ case VKD3D_SHADER_COMPONENT_UINT: -+ sampler_type_prefix = "u"; -+ break; -+ case VKD3D_SHADER_COMPONENT_INT: -+ sampler_type_prefix = "i"; -+ break; -+ case VKD3D_SHADER_COMPONENT_FLOAT: -+ sampler_type_prefix = ""; -+ break; -+ default: -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, -+ "Internal compiler error: Unhandled component type %#x for combined resource/sampler " -+ "for resource %u, space %u and sampler %u, space %u.", component_type, -+ crs->resource_index, crs->resource_space, crs->sampler_index, crs->sampler_space); -+ sampler_type_prefix = ""; -+ break; -+ } -+ -+ if (!shader_glsl_get_combined_sampler_binding(gen, crs, resource_type, &binding_idx)) -+ { -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_BINDING_NOT_FOUND, -+ "No descriptor binding specified for combined resource/sampler " -+ "for resource %u, space %u and sampler %u, space %u.", -+ crs->resource_index, crs->resource_space, crs->sampler_index, crs->sampler_space); -+ return; -+ } -+ -+ binding = &gen->interface_info->combined_samplers[binding_idx].binding; -+ -+ if (binding->set != 0) -+ { -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_BINDING_NOT_FOUND, -+ "Unsupported binding set %u specified for combined resource/sampler " -+ "for resource %u, space %u and sampler %u, space %u.", binding->set, -+ crs->resource_index, crs->resource_space, crs->sampler_index, crs->sampler_space); -+ return; -+ } -+ -+ if (binding->count != 1) -+ { -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_BINDING_NOT_FOUND, -+ "Unsupported binding count %u specified for combined resource/sampler " -+ "for resource %u, space %u and sampler %u, space %u.", binding->count, -+ crs->resource_index, crs->resource_space, crs->sampler_index, crs->sampler_space); -+ return; -+ } -+ -+ vkd3d_string_buffer_printf(buffer, "layout(binding = %u) uniform %ssampler%s%s ", -+ binding->binding, sampler_type_prefix, sampler_type, shadow ? "Shadow" : ""); -+ shader_glsl_print_combined_sampler_name(buffer, gen, crs->resource_index, -+ crs->resource_space, crs->sampler_index, crs->sampler_space); -+ vkd3d_string_buffer_printf(buffer, ";\n"); -+} -+ -+static void shader_glsl_generate_descriptor_declarations(struct vkd3d_glsl_generator *gen) -+{ -+ const struct vkd3d_shader_scan_combined_resource_sampler_info *sampler_info = gen->combined_sampler_info; -+ const struct vkd3d_shader_scan_descriptor_info1 *info = gen->descriptor_info; -+ const struct vkd3d_shader_descriptor_info1 *descriptor; -+ unsigned int i; -+ -+ for (i = 0; i < info->descriptor_count; ++i) -+ { -+ descriptor = &info->descriptors[i]; -+ -+ switch (descriptor->type) -+ { -+ case VKD3D_SHADER_DESCRIPTOR_TYPE_SRV: -+ case VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER: -+ /* GLSL uses combined resource/sampler descriptors.*/ -+ break; -+ -+ case VKD3D_SHADER_DESCRIPTOR_TYPE_UAV: -+ shader_glsl_generate_uav_declaration(gen, descriptor); -+ break; -+ -+ case VKD3D_SHADER_DESCRIPTOR_TYPE_CBV: -+ shader_glsl_generate_cbv_declaration(gen, descriptor); -+ break; -+ -+ default: -+ vkd3d_string_buffer_printf(gen->buffer, "/* <unhandled descriptor type %#x> */\n", descriptor->type); -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, -+ "Internal compiler error: Unhandled descriptor type %#x.", descriptor->type); -+ break; -+ } -+ } -+ for (i = 0; i < sampler_info->combined_sampler_count; ++i) -+ { -+ shader_glsl_generate_sampler_declaration(gen, &sampler_info->combined_samplers[i]); -+ } -+ if (info->descriptor_count) -+ vkd3d_string_buffer_printf(gen->buffer, "\n"); -+} -+ -+static const struct signature_element *signature_get_element_by_location( -+ const struct shader_signature *signature, unsigned int location) -+{ -+ const struct signature_element *e; -+ unsigned int i; -+ -+ for (i = 0; i < signature->element_count; ++i) -+ { -+ e = &signature->elements[i]; -+ -+ if (e->target_location != location) -+ continue; -+ -+ return e; -+ } -+ -+ return NULL; -+} -+ -+static const char *shader_glsl_get_interpolation(struct vkd3d_glsl_generator *gen, -+ const struct shader_signature *signature, const char *type, unsigned int location) -+{ -+ enum vkd3d_shader_interpolation_mode m; -+ const struct signature_element *e; -+ -+ if ((e = signature_get_element_by_location(signature, location))) -+ m = e->interpolation_mode; -+ else -+ m = VKD3DSIM_NONE; -+ -+ switch (m) -+ { -+ case VKD3DSIM_NONE: -+ case VKD3DSIM_LINEAR: -+ return ""; -+ case VKD3DSIM_CONSTANT: -+ return "flat "; -+ default: -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, -+ "Internal compiler error: Unhandled interpolation mode %#x for %s location %u.", m, type, location); -+ return ""; -+ } -+} -+ -+static void shader_glsl_generate_interface_block(struct vkd3d_glsl_generator *gen, -+ const struct shader_signature *signature, const char *type, unsigned int count) -+{ -+ struct vkd3d_string_buffer *buffer = gen->buffer; -+ const char *interpolation; -+ unsigned int i; -+ -+ vkd3d_string_buffer_printf(buffer, "%s shader_in_out\n{\n", type); -+ for (i = 0; i < count; ++i) -+ { -+ interpolation = shader_glsl_get_interpolation(gen, signature, type, i); -+ vkd3d_string_buffer_printf(buffer, " %svec4 reg_%u;\n", interpolation, i); -+ } -+ vkd3d_string_buffer_printf(buffer, "} shader_%s;\n", type); -+} -+ -+static void shader_glsl_generate_input_declarations(struct vkd3d_glsl_generator *gen) -+{ -+ const struct shader_signature *signature = &gen->program->input_signature; -+ struct vkd3d_string_buffer *buffer = gen->buffer; -+ const struct signature_element *e; -+ unsigned int i, count; -+ -+ if (!gen->interstage_input) -+ { -+ for (i = 0, count = 0; i < signature->element_count; ++i) -+ { -+ e = &signature->elements[i]; -+ -+ if (e->target_location == SIGNATURE_TARGET_LOCATION_UNUSED || e->sysval_semantic) -+ continue; -+ -+ if (e->min_precision != VKD3D_SHADER_MINIMUM_PRECISION_NONE) -+ { -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, -+ "Internal compiler error: Unhandled minimum precision %#x.", e->min_precision); -+ continue; -+ } -+ -+ if (e->interpolation_mode != VKD3DSIM_NONE) -+ { -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, -+ "Internal compiler error: Unhandled interpolation mode %#x.", e->interpolation_mode); -+ continue; -+ } -+ -+ vkd3d_string_buffer_printf(buffer, "layout(location = %u) in ", e->target_location); -+ switch (e->component_type) -+ { -+ case VKD3D_SHADER_COMPONENT_UINT: -+ vkd3d_string_buffer_printf(buffer, "uvec4"); -+ break; -+ case VKD3D_SHADER_COMPONENT_INT: -+ vkd3d_string_buffer_printf(buffer, "ivec4"); -+ break; -+ case VKD3D_SHADER_COMPONENT_FLOAT: -+ vkd3d_string_buffer_printf(buffer, "vec4"); -+ break; -+ default: -+ vkd3d_string_buffer_printf(buffer, "<unhandled type %#x>", e->component_type); -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, -+ "Internal compiler error: Unhandled input component type %#x.", e->component_type); -+ break; -+ } -+ vkd3d_string_buffer_printf(buffer, " shader_in_%u;\n", i); -+ ++count; -+ } -+ if (count) -+ vkd3d_string_buffer_printf(buffer, "\n"); -+ } -+ else if (gen->limits.input_count) -+ { -+ shader_glsl_generate_interface_block(gen, signature, "in", gen->limits.input_count); -+ vkd3d_string_buffer_printf(buffer, "\n"); -+ } -+} -+ -+static void shader_glsl_generate_output_declarations(struct vkd3d_glsl_generator *gen) -+{ -+ const struct shader_signature *signature = &gen->program->output_signature; -+ struct vkd3d_string_buffer *buffer = gen->buffer; -+ const struct signature_element *e; -+ unsigned int i, count; -+ -+ if (!gen->interstage_output) -+ { -+ for (i = 0, count = 0; i < signature->element_count; ++i) -+ { -+ e = &signature->elements[i]; -+ -+ if (e->target_location == SIGNATURE_TARGET_LOCATION_UNUSED) -+ continue; -+ -+ if (e->sysval_semantic != VKD3D_SHADER_SV_TARGET) -+ { -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, -+ "Internal compiler error: Unhandled system value %#x.", e->sysval_semantic); -+ continue; -+ } -+ -+ if (e->min_precision != VKD3D_SHADER_MINIMUM_PRECISION_NONE) -+ { -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, -+ "Internal compiler error: Unhandled minimum precision %#x.", e->min_precision); -+ continue; -+ } -+ -+ if (e->interpolation_mode != VKD3DSIM_NONE) -+ { -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, -+ "Internal compiler error: Unhandled interpolation mode %#x.", e->interpolation_mode); -+ continue; -+ } -+ -+ vkd3d_string_buffer_printf(buffer, "layout(location = %u) out ", e->target_location); -+ switch (e->component_type) -+ { -+ case VKD3D_SHADER_COMPONENT_UINT: -+ vkd3d_string_buffer_printf(buffer, "uvec4"); -+ break; -+ case VKD3D_SHADER_COMPONENT_INT: -+ vkd3d_string_buffer_printf(buffer, "ivec4"); -+ break; -+ case VKD3D_SHADER_COMPONENT_FLOAT: -+ vkd3d_string_buffer_printf(buffer, "vec4"); -+ break; -+ default: -+ vkd3d_string_buffer_printf(buffer, "<unhandled type %#x>", e->component_type); -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, -+ "Internal compiler error: Unhandled output component type %#x.", e->component_type); -+ break; -+ } -+ vkd3d_string_buffer_printf(buffer, " shader_out_%u;\n", e->semantic_index); -+ ++count; -+ } -+ if (count) -+ vkd3d_string_buffer_printf(buffer, "\n"); -+ } -+ else if (gen->limits.output_count) -+ { -+ shader_glsl_generate_interface_block(gen, signature, "out", gen->limits.output_count); -+ vkd3d_string_buffer_printf(buffer, "\n"); -+ } -+} -+ -+static void shader_glsl_handle_global_flags(struct vkd3d_string_buffer *buffer, -+ struct vkd3d_glsl_generator *gen, enum vsir_global_flags flags) -+{ -+ if (flags & VKD3DSGF_FORCE_EARLY_DEPTH_STENCIL) -+ { -+ vkd3d_string_buffer_printf(buffer, "layout(early_fragment_tests) in;\n"); -+ flags &= ~VKD3DSGF_FORCE_EARLY_DEPTH_STENCIL; -+ } -+ -+ if (flags) -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, -+ "Internal compiler error: Unhandled global flags %#"PRIx64".", (uint64_t)flags); -+} -+ -+static void shader_glsl_generate_declarations(struct vkd3d_glsl_generator *gen) -+{ -+ const struct vsir_program *program = gen->program; -+ struct vkd3d_string_buffer *buffer = gen->buffer; -+ const struct vsir_thread_group_size *group_size; -+ -+ if (program->shader_version.type == VKD3D_SHADER_TYPE_COMPUTE) -+ { -+ group_size = &program->thread_group_size; -+ vkd3d_string_buffer_printf(buffer, "layout(local_size_x = %u, local_size_y = %u, local_size_z = %u) in;\n\n", -+ group_size->x, group_size->y, group_size->z); -+ } -+ -+ shader_glsl_handle_global_flags(buffer, gen, program->global_flags); -+ -+ shader_glsl_generate_descriptor_declarations(gen); -+ shader_glsl_generate_input_declarations(gen); -+ shader_glsl_generate_output_declarations(gen); -+ -+ if (gen->limits.input_count) -+ vkd3d_string_buffer_printf(buffer, "vec4 %s_in[%u];\n", gen->prefix, gen->limits.input_count); -+ if (gen->limits.output_count) -+ vkd3d_string_buffer_printf(buffer, "vec4 %s_out[%u];\n", gen->prefix, gen->limits.output_count); -+ if (program->temp_count) -+ vkd3d_string_buffer_printf(buffer, "vec4 r[%u];\n", program->temp_count); -+ vkd3d_string_buffer_printf(buffer, "\n"); -+} -+ -+static int vkd3d_glsl_generator_generate(struct vkd3d_glsl_generator *gen, struct vkd3d_shader_code *out) -+{ -+ const struct vkd3d_shader_instruction_array *instructions = &gen->program->instructions; -+ struct vkd3d_string_buffer *buffer = gen->buffer; -+ unsigned int i; -+ void *code; -+ -+ MESSAGE("Generating a GLSL shader. This is unsupported; you get to keep all the pieces if it breaks.\n"); -+ -+ vkd3d_string_buffer_printf(buffer, "#version 440\n\n"); -+ -+ vkd3d_string_buffer_printf(buffer, "/* Generated by %s. */\n\n", vkd3d_shader_get_version(NULL, NULL)); -+ -+ shader_glsl_generate_declarations(gen); -+ -+ vkd3d_string_buffer_printf(buffer, "void main()\n{\n"); -+ -+ ++gen->indent; -+ shader_glsl_shader_prologue(gen); -+ for (i = 0; i < instructions->count; ++i) -+ { -+ vkd3d_glsl_handle_instruction(gen, &instructions->elements[i]); -+ } -+ -+ vkd3d_string_buffer_printf(buffer, "}\n"); -+ -+ if (TRACE_ON()) -+ vkd3d_string_buffer_trace(buffer); -+ -+ if (gen->failed) -+ return VKD3D_ERROR_INVALID_SHADER; -+ -+ if ((code = vkd3d_malloc(buffer->buffer_size))) -+ { -+ memcpy(code, buffer->buffer, buffer->content_size); -+ out->size = buffer->content_size; -+ out->code = code; -+ } -+ else return VKD3D_ERROR_OUT_OF_MEMORY; -+ -+ return VKD3D_OK; -+} -+ -+static void vkd3d_glsl_generator_cleanup(struct vkd3d_glsl_generator *gen) -+{ -+ vkd3d_string_buffer_release(&gen->string_buffers, gen->buffer); -+ vkd3d_string_buffer_cache_cleanup(&gen->string_buffers); -+} -+ -+static void shader_glsl_init_limits(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_version *version) -+{ -+ struct shader_limits *limits = &gen->limits; -+ -+ if (version->major < 4 || version->major >= 6) -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, -+ "Internal compiler error: Unhandled shader version %u.%u.", version->major, version->minor); -+ -+ switch (version->type) -+ { -+ case VKD3D_SHADER_TYPE_VERTEX: -+ limits->input_count = 32; -+ limits->output_count = 32; -+ break; -+ case VKD3D_SHADER_TYPE_PIXEL: -+ limits->input_count = 32; -+ limits->output_count = 8; -+ break; -+ case VKD3D_SHADER_TYPE_COMPUTE: -+ limits->input_count = 0; -+ limits->output_count = 0; -+ break; -+ default: -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, -+ "Internal compiler error: Unhandled shader type %#x.", version->type); -+ limits->input_count = 0; -+ limits->output_count = 0; -+ break; -+ } -+} -+ -+static void vkd3d_glsl_generator_init(struct vkd3d_glsl_generator *gen, -+ struct vsir_program *program, const struct vkd3d_shader_compile_info *compile_info, -+ const struct vkd3d_shader_scan_descriptor_info1 *descriptor_info, -+ const struct vkd3d_shader_scan_combined_resource_sampler_info *combined_sampler_info, -+ struct vkd3d_shader_message_context *message_context) -+{ -+ enum vkd3d_shader_type type = program->shader_version.type; -+ -+ memset(gen, 0, sizeof(*gen)); -+ gen->program = program; -+ vkd3d_string_buffer_cache_init(&gen->string_buffers); -+ gen->buffer = vkd3d_string_buffer_get(&gen->string_buffers); -+ gen->location.source_name = compile_info->source_name; -+ gen->message_context = message_context; -+ if (!(gen->prefix = shader_glsl_get_prefix(type))) -+ { -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, -+ "Internal compiler error: Unhandled shader type %#x.", type); -+ gen->prefix = "unknown"; -+ } -+ shader_glsl_init_limits(gen, &program->shader_version); -+ gen->interstage_input = type != VKD3D_SHADER_TYPE_VERTEX && type != VKD3D_SHADER_TYPE_COMPUTE; -+ gen->interstage_output = type != VKD3D_SHADER_TYPE_PIXEL && type != VKD3D_SHADER_TYPE_COMPUTE; -+ -+ gen->interface_info = vkd3d_find_struct(compile_info->next, INTERFACE_INFO); -+ gen->offset_info = vkd3d_find_struct(compile_info->next, DESCRIPTOR_OFFSET_INFO); -+ gen->descriptor_info = descriptor_info; -+ gen->combined_sampler_info = combined_sampler_info; -+} -+ -+int glsl_compile(struct vsir_program *program, uint64_t config_flags, -+ const struct vkd3d_shader_scan_descriptor_info1 *descriptor_info, -+ const struct vkd3d_shader_scan_combined_resource_sampler_info *combined_sampler_info, -+ const struct vkd3d_shader_compile_info *compile_info, -+ struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context) -+{ -+ struct vkd3d_glsl_generator generator; -+ int ret; -+ -+ if ((ret = vsir_program_transform(program, config_flags, compile_info, message_context)) < 0) -+ return ret; -+ -+ VKD3D_ASSERT(program->normalisation_level == VSIR_FULLY_NORMALISED_IO); - -- vkd3d_glsl_generator_init(&generator, program, message_context); -+ vkd3d_glsl_generator_init(&generator, program, compile_info, -+ descriptor_info, combined_sampler_info, message_context); - ret = vkd3d_glsl_generator_generate(&generator, out); - vkd3d_glsl_generator_cleanup(&generator); - -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.c b/libs/vkd3d/libs/vkd3d-shader/hlsl.c -index bd5baacd83d..3be9ba9979b 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.c -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.c -@@ -254,6 +254,47 @@ bool hlsl_type_is_resource(const struct hlsl_type *type) - } - } - -+bool hlsl_type_is_shader(const struct hlsl_type *type) -+{ -+ switch (type->class) -+ { -+ case HLSL_CLASS_ARRAY: -+ return hlsl_type_is_shader(type->e.array.type); -+ -+ case HLSL_CLASS_COMPUTE_SHADER: -+ case HLSL_CLASS_DOMAIN_SHADER: -+ case HLSL_CLASS_GEOMETRY_SHADER: -+ case HLSL_CLASS_HULL_SHADER: -+ case HLSL_CLASS_PIXEL_SHADER: -+ case HLSL_CLASS_VERTEX_SHADER: -+ return true; -+ -+ case HLSL_CLASS_SCALAR: -+ case HLSL_CLASS_VECTOR: -+ case HLSL_CLASS_MATRIX: -+ case HLSL_CLASS_STRUCT: -+ case HLSL_CLASS_DEPTH_STENCIL_STATE: -+ case HLSL_CLASS_DEPTH_STENCIL_VIEW: -+ case HLSL_CLASS_EFFECT_GROUP: -+ case HLSL_CLASS_ERROR: -+ case HLSL_CLASS_PASS: -+ case HLSL_CLASS_RASTERIZER_STATE: -+ case HLSL_CLASS_RENDER_TARGET_VIEW: -+ case HLSL_CLASS_SAMPLER: -+ case HLSL_CLASS_STRING: -+ case HLSL_CLASS_TECHNIQUE: -+ case HLSL_CLASS_TEXTURE: -+ case HLSL_CLASS_UAV: -+ case HLSL_CLASS_CONSTANT_BUFFER: -+ case HLSL_CLASS_BLEND_STATE: -+ case HLSL_CLASS_STREAM_OUTPUT: -+ case HLSL_CLASS_VOID: -+ case HLSL_CLASS_NULL: -+ return false; -+ } -+ return false; -+} -+ - /* Only intended to be used for derefs (after copies have been lowered to components or vectors) or - * resources, since for both their data types span across a single regset. */ - static enum hlsl_regset type_get_regset(const struct hlsl_type *type) -@@ -379,6 +420,7 @@ static void hlsl_type_calculate_reg_size(struct hlsl_ctx *ctx, struct hlsl_type - case HLSL_CLASS_DEPTH_STENCIL_STATE: - case HLSL_CLASS_DEPTH_STENCIL_VIEW: - case HLSL_CLASS_EFFECT_GROUP: -+ case HLSL_CLASS_ERROR: - case HLSL_CLASS_PASS: - case HLSL_CLASS_PIXEL_SHADER: - case HLSL_CLASS_RASTERIZER_STATE: -@@ -393,6 +435,7 @@ static void hlsl_type_calculate_reg_size(struct hlsl_ctx *ctx, struct hlsl_type - case HLSL_CLASS_HULL_SHADER: - case HLSL_CLASS_GEOMETRY_SHADER: - case HLSL_CLASS_BLEND_STATE: -+ case HLSL_CLASS_STREAM_OUTPUT: - case HLSL_CLASS_NULL: - break; - } -@@ -455,6 +498,7 @@ static bool type_is_single_component(const struct hlsl_type *type) - { - case HLSL_CLASS_DEPTH_STENCIL_STATE: - case HLSL_CLASS_DEPTH_STENCIL_VIEW: -+ case HLSL_CLASS_ERROR: - case HLSL_CLASS_PIXEL_SHADER: - case HLSL_CLASS_SCALAR: - case HLSL_CLASS_SAMPLER: -@@ -483,6 +527,7 @@ static bool type_is_single_component(const struct hlsl_type *type) - case HLSL_CLASS_PASS: - case HLSL_CLASS_TECHNIQUE: - case HLSL_CLASS_VOID: -+ case HLSL_CLASS_STREAM_OUTPUT: - break; - } - vkd3d_unreachable(); -@@ -631,12 +676,14 @@ unsigned int hlsl_type_get_component_offset(struct hlsl_ctx *ctx, struct hlsl_ty - break; - - case HLSL_CLASS_EFFECT_GROUP: -+ case HLSL_CLASS_ERROR: - case HLSL_CLASS_PASS: - case HLSL_CLASS_TECHNIQUE: - case HLSL_CLASS_VOID: - case HLSL_CLASS_SCALAR: - case HLSL_CLASS_CONSTANT_BUFFER: - case HLSL_CLASS_NULL: -+ case HLSL_CLASS_STREAM_OUTPUT: - vkd3d_unreachable(); - } - type = next_type; -@@ -855,6 +902,22 @@ struct hlsl_type *hlsl_new_array_type(struct hlsl_ctx *ctx, struct hlsl_type *ba - return type; - } - -+struct hlsl_type *hlsl_new_stream_output_type(struct hlsl_ctx *ctx, -+ enum hlsl_so_object_type so_type, struct hlsl_type *data_type) -+{ -+ struct hlsl_type *type; -+ -+ if (!(type = hlsl_alloc(ctx, sizeof(*type)))) -+ return NULL; -+ type->class = HLSL_CLASS_STREAM_OUTPUT; -+ type->e.so.so_type = so_type; -+ type->e.so.type = data_type; -+ -+ list_add_tail(&ctx->types, &type->entry); -+ -+ return type; -+} -+ - struct hlsl_type *hlsl_new_struct_type(struct hlsl_ctx *ctx, const char *name, - struct hlsl_struct_field *fields, size_t field_count) - { -@@ -930,6 +993,7 @@ static const char * get_case_insensitive_typename(const char *name) - { - "dword", - "float", -+ "geometryshader", - "matrix", - "pixelshader", - "texture", -@@ -1021,6 +1085,7 @@ unsigned int hlsl_type_component_count(const struct hlsl_type *type) - - case HLSL_CLASS_DEPTH_STENCIL_STATE: - case HLSL_CLASS_DEPTH_STENCIL_VIEW: -+ case HLSL_CLASS_ERROR: - case HLSL_CLASS_PIXEL_SHADER: - case HLSL_CLASS_RASTERIZER_STATE: - case HLSL_CLASS_RENDER_TARGET_VIEW: -@@ -1041,6 +1106,7 @@ unsigned int hlsl_type_component_count(const struct hlsl_type *type) - case HLSL_CLASS_PASS: - case HLSL_CLASS_TECHNIQUE: - case HLSL_CLASS_VOID: -+ case HLSL_CLASS_STREAM_OUTPUT: - break; - } - -@@ -1112,9 +1178,15 @@ bool hlsl_types_are_equal(const struct hlsl_type *t1, const struct hlsl_type *t2 - case HLSL_CLASS_CONSTANT_BUFFER: - return hlsl_types_are_equal(t1->e.resource.format, t2->e.resource.format); - -+ case HLSL_CLASS_STREAM_OUTPUT: -+ if (t1->e.so.so_type != t2->e.so.so_type) -+ return false; -+ return hlsl_types_are_equal(t1->e.so.type, t2->e.so.type); -+ - case HLSL_CLASS_DEPTH_STENCIL_STATE: - case HLSL_CLASS_DEPTH_STENCIL_VIEW: - case HLSL_CLASS_EFFECT_GROUP: -+ case HLSL_CLASS_ERROR: - case HLSL_CLASS_PASS: - case HLSL_CLASS_PIXEL_SHADER: - case HLSL_CLASS_RASTERIZER_STATE: -@@ -1575,7 +1647,6 @@ struct hlsl_ir_node *hlsl_new_binary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_exp - { - struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {arg1, arg2}; - -- VKD3D_ASSERT(hlsl_types_are_equal(arg1->data_type, arg2->data_type)); - return hlsl_new_expr(ctx, op, operands, arg1->data_type, &arg1->loc); - } - -@@ -1589,6 +1660,16 @@ struct hlsl_ir_node *hlsl_new_ternary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_ex - return hlsl_new_expr(ctx, op, operands, arg1->data_type, &arg1->loc); - } - -+static struct hlsl_ir_node *hlsl_new_error_expr(struct hlsl_ctx *ctx) -+{ -+ static const struct vkd3d_shader_location loc = {.source_name = "<error>"}; -+ struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {0}; -+ -+ /* Use a dummy location; we should never report any messages related to -+ * this expression. */ -+ return hlsl_new_expr(ctx, HLSL_OP0_ERROR, operands, ctx->builtin_types.error, &loc); -+} -+ - struct hlsl_ir_node *hlsl_new_if(struct hlsl_ctx *ctx, struct hlsl_ir_node *condition, - struct hlsl_block *then_block, struct hlsl_block *else_block, const struct vkd3d_shader_location *loc) - { -@@ -1640,6 +1721,22 @@ struct hlsl_ir_node *hlsl_new_switch(struct hlsl_ctx *ctx, struct hlsl_ir_node * - return &s->node; - } - -+struct hlsl_ir_node *hlsl_new_vsir_instruction_ref(struct hlsl_ctx *ctx, unsigned int vsir_instr_idx, -+ struct hlsl_type *type, const struct hlsl_reg *reg, const struct vkd3d_shader_location *loc) -+{ -+ struct hlsl_ir_vsir_instruction_ref *vsir_instr; -+ -+ if (!(vsir_instr = hlsl_alloc(ctx, sizeof(*vsir_instr)))) -+ return NULL; -+ init_node(&vsir_instr->node, HLSL_IR_VSIR_INSTRUCTION_REF, type, loc); -+ vsir_instr->vsir_instr_idx = vsir_instr_idx; -+ -+ if (reg) -+ vsir_instr->node.reg = *reg; -+ -+ return &vsir_instr->node; -+} -+ - struct hlsl_ir_load *hlsl_new_load_index(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, - struct hlsl_ir_node *idx, const struct vkd3d_shader_location *loc) - { -@@ -1792,6 +1889,118 @@ struct hlsl_ir_node *hlsl_new_swizzle(struct hlsl_ctx *ctx, uint32_t s, unsigned - return &swizzle->node; - } - -+struct hlsl_ir_node *hlsl_new_compile(struct hlsl_ctx *ctx, enum hlsl_compile_type compile_type, -+ const char *profile_name, struct hlsl_ir_node **args, unsigned int args_count, -+ struct hlsl_block *args_instrs, const struct vkd3d_shader_location *loc) -+{ -+ const struct hlsl_profile_info *profile_info = NULL; -+ struct hlsl_ir_compile *compile; -+ struct hlsl_type *type = NULL; -+ unsigned int i; -+ -+ switch (compile_type) -+ { -+ case HLSL_COMPILE_TYPE_COMPILE: -+ if (!(profile_info = hlsl_get_target_info(profile_name))) -+ { -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_PROFILE, "Unknown profile \"%s\".", profile_name); -+ return NULL; -+ } -+ -+ if (profile_info->type == VKD3D_SHADER_TYPE_PIXEL) -+ type = hlsl_get_type(ctx->cur_scope, "PixelShader", true, true); -+ else if (profile_info->type == VKD3D_SHADER_TYPE_VERTEX) -+ type = hlsl_get_type(ctx->cur_scope, "VertexShader", true, true); -+ -+ if (!type) -+ { -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_PROFILE, "Invalid profile \"%s\".", profile_name); -+ return NULL; -+ } -+ -+ break; -+ -+ case HLSL_COMPILE_TYPE_CONSTRUCTGSWITHSO: -+ type = hlsl_get_type(ctx->cur_scope, "GeometryShader", true, true); -+ break; -+ } -+ -+ if (!(compile = hlsl_alloc(ctx, sizeof(*compile)))) -+ return NULL; -+ -+ init_node(&compile->node, HLSL_IR_COMPILE, type, loc); -+ -+ compile->compile_type = compile_type; -+ compile->profile = profile_info; -+ -+ hlsl_block_init(&compile->instrs); -+ hlsl_block_add_block(&compile->instrs, args_instrs); -+ -+ compile->args_count = args_count; -+ if (!(compile->args = hlsl_alloc(ctx, sizeof(*compile->args) * args_count))) -+ { -+ vkd3d_free(compile); -+ return NULL; -+ } -+ for (i = 0; i < compile->args_count; ++i) -+ hlsl_src_from_node(&compile->args[i], args[i]); -+ -+ return &compile->node; -+} -+ -+bool hlsl_state_block_add_entry(struct hlsl_state_block *state_block, -+ struct hlsl_state_block_entry *entry) -+{ -+ if (!vkd3d_array_reserve((void **)&state_block->entries, -+ &state_block->capacity, state_block->count + 1, -+ sizeof(*state_block->entries))) -+ return false; -+ -+ state_block->entries[state_block->count++] = entry; -+ return true; -+} -+ -+struct hlsl_ir_node *hlsl_new_sampler_state(struct hlsl_ctx *ctx, -+ const struct hlsl_state_block *state_block, struct vkd3d_shader_location *loc) -+{ -+ struct hlsl_ir_sampler_state *sampler_state; -+ struct hlsl_type *type = ctx->builtin_types.sampler[HLSL_SAMPLER_DIM_GENERIC]; -+ -+ if (!(sampler_state = hlsl_alloc(ctx, sizeof(*sampler_state)))) -+ return NULL; -+ -+ init_node(&sampler_state->node, HLSL_IR_SAMPLER_STATE, type, loc); -+ -+ if (!(sampler_state->state_block = hlsl_alloc(ctx, sizeof(*sampler_state->state_block)))) -+ { -+ vkd3d_free(sampler_state); -+ return NULL; -+ } -+ -+ if (state_block) -+ { -+ for (unsigned int i = 0; i < state_block->count; ++i) -+ { -+ const struct hlsl_state_block_entry *src = state_block->entries[i]; -+ struct hlsl_state_block_entry *entry; -+ -+ if (!(entry = clone_stateblock_entry(ctx, src, src->name, src->lhs_has_index, src->lhs_index, false, 0))) -+ { -+ hlsl_free_instr(&sampler_state->node); -+ return NULL; -+ } -+ -+ if (!hlsl_state_block_add_entry(sampler_state->state_block, entry)) -+ { -+ hlsl_free_instr(&sampler_state->node); -+ return NULL; -+ } -+ } -+ } -+ -+ return &sampler_state->node; -+} -+ - struct hlsl_ir_node *hlsl_new_stateblock_constant(struct hlsl_ctx *ctx, const char *name, - struct vkd3d_shader_location *loc) - { -@@ -2142,6 +2351,51 @@ static struct hlsl_ir_node *clone_index(struct hlsl_ctx *ctx, struct clone_instr - return dst; - } - -+static struct hlsl_ir_node *clone_compile(struct hlsl_ctx *ctx, -+ struct clone_instr_map *map, struct hlsl_ir_compile *compile) -+{ -+ const char *profile_name = NULL; -+ struct hlsl_ir_node **args; -+ struct hlsl_ir_node *node; -+ struct hlsl_block block; -+ unsigned int i; -+ -+ if (!(clone_block(ctx, &block, &compile->instrs, map))) -+ return NULL; -+ -+ if (!(args = hlsl_alloc(ctx, sizeof(*args) * compile->args_count))) -+ { -+ hlsl_block_cleanup(&block); -+ return NULL; -+ } -+ for (i = 0; i < compile->args_count; ++i) -+ { -+ args[i] = map_instr(map, compile->args[i].node); -+ VKD3D_ASSERT(args[i]); -+ } -+ -+ if (compile->profile) -+ profile_name = compile->profile->name; -+ -+ if (!(node = hlsl_new_compile(ctx, compile->compile_type, profile_name, -+ args, compile->args_count, &block, &compile->node.loc))) -+ { -+ hlsl_block_cleanup(&block); -+ vkd3d_free(args); -+ return NULL; -+ } -+ -+ vkd3d_free(args); -+ return node; -+} -+ -+static struct hlsl_ir_node *clone_sampler_state(struct hlsl_ctx *ctx, -+ struct clone_instr_map *map, struct hlsl_ir_sampler_state *sampler_state) -+{ -+ return hlsl_new_sampler_state(ctx, sampler_state->state_block, -+ &sampler_state->node.loc); -+} -+ - static struct hlsl_ir_node *clone_stateblock_constant(struct hlsl_ctx *ctx, - struct clone_instr_map *map, struct hlsl_ir_stateblock_constant *constant) - { -@@ -2149,8 +2403,8 @@ static struct hlsl_ir_node *clone_stateblock_constant(struct hlsl_ctx *ctx, - } - - struct hlsl_state_block_entry *clone_stateblock_entry(struct hlsl_ctx *ctx, -- struct hlsl_state_block_entry *src, const char *name, bool lhs_has_index, -- unsigned int lhs_index, unsigned int arg_index) -+ const struct hlsl_state_block_entry *src, const char *name, bool lhs_has_index, -+ unsigned int lhs_index, bool single_arg, unsigned int arg_index) - { - struct hlsl_state_block_entry *entry; - struct clone_instr_map map = { 0 }; -@@ -2166,7 +2420,11 @@ struct hlsl_state_block_entry *clone_stateblock_entry(struct hlsl_ctx *ctx, - return NULL; - } - -- entry->args_count = 1; -+ if (single_arg) -+ entry->args_count = 1; -+ else -+ entry->args_count = src->args_count; -+ - if (!(entry->args = hlsl_alloc(ctx, sizeof(*entry->args) * entry->args_count))) - { - hlsl_free_state_block_entry(entry); -@@ -2179,7 +2437,16 @@ struct hlsl_state_block_entry *clone_stateblock_entry(struct hlsl_ctx *ctx, - hlsl_free_state_block_entry(entry); - return NULL; - } -- clone_src(&map, entry->args, &src->args[arg_index]); -+ -+ if (single_arg) -+ { -+ clone_src(&map, entry->args, &src->args[arg_index]); -+ } -+ else -+ { -+ for (unsigned int i = 0; i < src->args_count; ++i) -+ clone_src(&map, &entry->args[i], &src->args[i]); -+ } - vkd3d_free(map.instrs); - - return entry; -@@ -2284,8 +2551,17 @@ static struct hlsl_ir_node *clone_instr(struct hlsl_ctx *ctx, - case HLSL_IR_SWIZZLE: - return clone_swizzle(ctx, map, hlsl_ir_swizzle(instr)); - -+ case HLSL_IR_COMPILE: -+ return clone_compile(ctx, map, hlsl_ir_compile(instr)); -+ -+ case HLSL_IR_SAMPLER_STATE: -+ return clone_sampler_state(ctx, map, hlsl_ir_sampler_state(instr)); -+ - case HLSL_IR_STATEBLOCK_CONSTANT: - return clone_stateblock_constant(ctx, map, hlsl_ir_stateblock_constant(instr)); -+ -+ case HLSL_IR_VSIR_INSTRUCTION_REF: -+ vkd3d_unreachable(); - } - - vkd3d_unreachable(); -@@ -2314,6 +2590,7 @@ struct hlsl_ir_function_decl *hlsl_new_func_decl(struct hlsl_ctx *ctx, - decl->return_type = return_type; - decl->parameters = *parameters; - decl->loc = *loc; -+ list_init(&decl->extern_vars); - - if (!hlsl_types_are_equal(return_type, ctx->builtin_types.Void)) - { -@@ -2523,6 +2800,12 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru - return string; - - case HLSL_CLASS_TEXTURE: -+ if (type->sampler_dim == HLSL_SAMPLER_DIM_RAW_BUFFER) -+ { -+ vkd3d_string_buffer_printf(string, "ByteAddressBuffer"); -+ return string; -+ } -+ - if (type->sampler_dim == HLSL_SAMPLER_DIM_GENERIC) - { - vkd3d_string_buffer_printf(string, "Texture"); -@@ -2548,6 +2831,11 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru - return string; - - case HLSL_CLASS_UAV: -+ if (type->sampler_dim == HLSL_SAMPLER_DIM_RAW_BUFFER) -+ { -+ vkd3d_string_buffer_printf(string, "RWByteAddressBuffer"); -+ return string; -+ } - if (type->sampler_dim == HLSL_SAMPLER_DIM_BUFFER) - vkd3d_string_buffer_printf(string, "RWBuffer"); - else if (type->sampler_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER) -@@ -2570,6 +2858,24 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru - } - return string; - -+ case HLSL_CLASS_ERROR: -+ vkd3d_string_buffer_printf(string, "<error type>"); -+ return string; -+ -+ case HLSL_CLASS_STREAM_OUTPUT: -+ if (type->e.so.so_type == HLSL_STREAM_OUTPUT_POINT_STREAM) -+ vkd3d_string_buffer_printf(string, "PointStream"); -+ else if (type->e.so.so_type == HLSL_STREAM_OUTPUT_LINE_STREAM) -+ vkd3d_string_buffer_printf(string, "LineStream"); -+ else -+ vkd3d_string_buffer_printf(string, "TriangleStream"); -+ if ((inner_string = hlsl_type_to_string(ctx, type->e.so.type))) -+ { -+ vkd3d_string_buffer_printf(string, "<%s>", inner_string->buffer); -+ hlsl_release_string_buffer(ctx, inner_string); -+ } -+ return string; -+ - case HLSL_CLASS_DEPTH_STENCIL_STATE: - case HLSL_CLASS_DEPTH_STENCIL_VIEW: - case HLSL_CLASS_EFFECT_GROUP: -@@ -2698,7 +3004,11 @@ const char *hlsl_node_type_to_string(enum hlsl_ir_node_type type) - [HLSL_IR_STORE ] = "HLSL_IR_STORE", - [HLSL_IR_SWITCH ] = "HLSL_IR_SWITCH", - [HLSL_IR_SWIZZLE ] = "HLSL_IR_SWIZZLE", -+ -+ [HLSL_IR_COMPILE] = "HLSL_IR_COMPILE", -+ [HLSL_IR_SAMPLER_STATE] = "HLSL_IR_SAMPLER_STATE", - [HLSL_IR_STATEBLOCK_CONSTANT] = "HLSL_IR_STATEBLOCK_CONSTANT", -+ [HLSL_IR_VSIR_INSTRUCTION_REF] = "HLSL_IR_VSIR_INSTRUCTION_REF", - }; - - if (type >= ARRAY_SIZE(names)) -@@ -2907,6 +3217,7 @@ const char *debug_hlsl_expr_op(enum hlsl_ir_expr_op op) - { - static const char *const op_names[] = - { -+ [HLSL_OP0_ERROR] = "error", - [HLSL_OP0_VOID] = "void", - [HLSL_OP0_RASTERIZER_SAMPLE_COUNT] = "GetRenderTargetSampleCount", - -@@ -2924,6 +3235,7 @@ const char *debug_hlsl_expr_op(enum hlsl_ir_expr_op op) - [HLSL_OP1_DSY_FINE] = "dsy_fine", - [HLSL_OP1_EXP2] = "exp2", - [HLSL_OP1_F16TOF32] = "f16tof32", -+ [HLSL_OP1_F32TOF16] = "f32tof16", - [HLSL_OP1_FLOOR] = "floor", - [HLSL_OP1_FRACT] = "fract", - [HLSL_OP1_LOG2] = "log2", -@@ -3146,6 +3458,40 @@ static void dump_ir_index(struct vkd3d_string_buffer *buffer, const struct hlsl_ - vkd3d_string_buffer_printf(buffer, "]"); - } - -+static void dump_ir_compile(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, -+ const struct hlsl_ir_compile *compile) -+{ -+ unsigned int i; -+ -+ switch (compile->compile_type) -+ { -+ case HLSL_COMPILE_TYPE_COMPILE: -+ vkd3d_string_buffer_printf(buffer, "compile %s {\n", compile->profile->name); -+ break; -+ -+ case HLSL_COMPILE_TYPE_CONSTRUCTGSWITHSO: -+ vkd3d_string_buffer_printf(buffer, "ConstructGSWithSO {\n"); -+ break; -+ } -+ -+ dump_block(ctx, buffer, &compile->instrs); -+ -+ vkd3d_string_buffer_printf(buffer, " %10s } (", ""); -+ for (i = 0; i < compile->args_count; ++i) -+ { -+ dump_src(buffer, &compile->args[i]); -+ if (i + 1 < compile->args_count) -+ vkd3d_string_buffer_printf(buffer, ", "); -+ } -+ vkd3d_string_buffer_printf(buffer, ")"); -+} -+ -+static void dump_ir_sampler_state(struct vkd3d_string_buffer *buffer, -+ const struct hlsl_ir_sampler_state *sampler_state) -+{ -+ vkd3d_string_buffer_printf(buffer, "sampler_state {...}"); -+} -+ - static void dump_ir_stateblock_constant(struct vkd3d_string_buffer *buffer, - const struct hlsl_ir_stateblock_constant *constant) - { -@@ -3245,9 +3591,22 @@ static void dump_instr(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, - dump_ir_swizzle(buffer, hlsl_ir_swizzle(instr)); - break; - -+ case HLSL_IR_COMPILE: -+ dump_ir_compile(ctx, buffer, hlsl_ir_compile(instr)); -+ break; -+ -+ case HLSL_IR_SAMPLER_STATE: -+ dump_ir_sampler_state(buffer, hlsl_ir_sampler_state(instr)); -+ break; -+ - case HLSL_IR_STATEBLOCK_CONSTANT: - dump_ir_stateblock_constant(buffer, hlsl_ir_stateblock_constant(instr)); - break; -+ -+ case HLSL_IR_VSIR_INSTRUCTION_REF: -+ vkd3d_string_buffer_printf(buffer, "vsir_program instruction %u", -+ hlsl_ir_vsir_instruction_ref(instr)->vsir_instr_idx); -+ break; - } - } - -@@ -3308,8 +3667,8 @@ void hlsl_replace_node(struct hlsl_ir_node *old, struct hlsl_ir_node *new) - { - struct hlsl_src *src, *next; - -- VKD3D_ASSERT(old->data_type->dimx == new->data_type->dimx); -- VKD3D_ASSERT(old->data_type->dimy == new->data_type->dimy); -+ VKD3D_ASSERT(old->data_type == new->data_type || old->data_type->dimx == new->data_type->dimx); -+ VKD3D_ASSERT(old->data_type == new->data_type || old->data_type->dimy == new->data_type->dimy); - - LIST_FOR_EACH_ENTRY_SAFE(src, next, &old->uses, struct hlsl_src, entry) - { -@@ -3459,6 +3818,24 @@ static void free_ir_index(struct hlsl_ir_index *index) - vkd3d_free(index); - } - -+static void free_ir_compile(struct hlsl_ir_compile *compile) -+{ -+ unsigned int i; -+ -+ for (i = 0; i < compile->args_count; ++i) -+ hlsl_src_remove(&compile->args[i]); -+ -+ hlsl_block_cleanup(&compile->instrs); -+ vkd3d_free(compile); -+} -+ -+static void free_ir_sampler_state(struct hlsl_ir_sampler_state *sampler_state) -+{ -+ if (sampler_state->state_block) -+ hlsl_free_state_block(sampler_state->state_block); -+ vkd3d_free(sampler_state); -+} -+ - static void free_ir_stateblock_constant(struct hlsl_ir_stateblock_constant *constant) - { - vkd3d_free(constant->name); -@@ -3527,9 +3904,21 @@ void hlsl_free_instr(struct hlsl_ir_node *node) - free_ir_switch(hlsl_ir_switch(node)); - break; - -+ case HLSL_IR_COMPILE: -+ free_ir_compile(hlsl_ir_compile(node)); -+ break; -+ -+ case HLSL_IR_SAMPLER_STATE: -+ free_ir_sampler_state(hlsl_ir_sampler_state(node)); -+ break; -+ - case HLSL_IR_STATEBLOCK_CONSTANT: - free_ir_stateblock_constant(hlsl_ir_stateblock_constant(node)); - break; -+ -+ case HLSL_IR_VSIR_INSTRUCTION_REF: -+ vkd3d_free(hlsl_ir_vsir_instruction_ref(node)); -+ break; - } - } - -@@ -3801,12 +4190,12 @@ static void declare_predefined_types(struct hlsl_ctx *ctx) - - static const char * const names[] = - { -- "float", -- "half", -- "double", -- "int", -- "uint", -- "bool", -+ [HLSL_TYPE_FLOAT] = "float", -+ [HLSL_TYPE_HALF] = "half", -+ [HLSL_TYPE_DOUBLE] = "double", -+ [HLSL_TYPE_INT] = "int", -+ [HLSL_TYPE_UINT] = "uint", -+ [HLSL_TYPE_BOOL] = "bool", - }; - - static const char *const variants_float[] = {"min10float", "min16float"}; -@@ -3957,6 +4346,7 @@ static void declare_predefined_types(struct hlsl_ctx *ctx) - ctx->builtin_types.Void = hlsl_new_simple_type(ctx, "void", HLSL_CLASS_VOID); - ctx->builtin_types.null = hlsl_new_type(ctx, "NULL", HLSL_CLASS_NULL, HLSL_TYPE_UINT, 1, 1); - ctx->builtin_types.string = hlsl_new_simple_type(ctx, "string", HLSL_CLASS_STRING); -+ ctx->builtin_types.error = hlsl_new_simple_type(ctx, "<error type>", HLSL_CLASS_ERROR); - hlsl_scope_add_type(ctx->globals, ctx->builtin_types.string); - hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "DepthStencilView", HLSL_CLASS_DEPTH_STENCIL_VIEW)); - hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "DepthStencilState", HLSL_CLASS_DEPTH_STENCIL_STATE)); -@@ -4059,6 +4449,7 @@ static bool hlsl_ctx_init(struct hlsl_ctx *ctx, const struct vkd3d_shader_compil - - case VKD3D_SHADER_COMPILE_OPTION_BACKWARD_COMPATIBILITY: - ctx->semantic_compat_mapping = option->value & VKD3D_SHADER_COMPILE_OPTION_BACKCOMPAT_MAP_SEMANTIC_NAMES; -+ ctx->double_as_float_alias = option->value & VKD3D_SHADER_COMPILE_OPTION_DOUBLE_AS_FLOAT_ALIAS; - break; - - case VKD3D_SHADER_COMPILE_OPTION_CHILD_EFFECT: -@@ -4078,6 +4469,15 @@ static bool hlsl_ctx_init(struct hlsl_ctx *ctx, const struct vkd3d_shader_compil - } - } - -+ if (!(ctx->error_instr = hlsl_new_error_expr(ctx))) -+ return false; -+ hlsl_block_add_instr(&ctx->static_initializers, ctx->error_instr); -+ -+ ctx->domain = VKD3D_TESSELLATOR_DOMAIN_INVALID; -+ ctx->output_control_point_count = UINT_MAX; -+ ctx->output_primitive = 0; -+ ctx->partitioning = 0; -+ - return true; - } - -@@ -4089,8 +4489,6 @@ static void hlsl_ctx_cleanup(struct hlsl_ctx *ctx) - struct hlsl_type *type, *next_type; - unsigned int i; - -- hlsl_block_cleanup(&ctx->static_initializers); -- - for (i = 0; i < ctx->source_files_count; ++i) - vkd3d_free((void *)ctx->source_files[i]); - vkd3d_free(ctx->source_files); -@@ -4113,6 +4511,8 @@ static void hlsl_ctx_cleanup(struct hlsl_ctx *ctx) - } - } - -+ hlsl_block_cleanup(&ctx->static_initializers); -+ - LIST_FOR_EACH_ENTRY_SAFE(scope, next_scope, &ctx->scopes, struct hlsl_scope, entry) - { - LIST_FOR_EACH_ENTRY_SAFE(var, next_var, &scope->vars, struct hlsl_ir_var, scope_entry) -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h -index 22e25b23988..4824234ab99 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.h -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h -@@ -22,7 +22,6 @@ - - #include "vkd3d_shader_private.h" - #include "wine/rbtree.h" --#include "d3dcommon.h" - #include "d3dx9shader.h" - - /* The general IR structure is inspired by Mesa GLSL hir, even though the code -@@ -70,6 +69,14 @@ static inline unsigned int hlsl_swizzle_get_component(uint32_t swizzle, unsigned - return (swizzle >> HLSL_SWIZZLE_SHIFT(idx)) & HLSL_SWIZZLE_MASK; - } - -+static inline uint32_t vsir_swizzle_from_hlsl(uint32_t swizzle) -+{ -+ return vkd3d_shader_create_swizzle(hlsl_swizzle_get_component(swizzle, 0), -+ hlsl_swizzle_get_component(swizzle, 1), -+ hlsl_swizzle_get_component(swizzle, 2), -+ hlsl_swizzle_get_component(swizzle, 3)); -+} -+ - enum hlsl_type_class - { - HLSL_CLASS_SCALAR, -@@ -97,8 +104,10 @@ enum hlsl_type_class - HLSL_CLASS_GEOMETRY_SHADER, - HLSL_CLASS_CONSTANT_BUFFER, - HLSL_CLASS_BLEND_STATE, -+ HLSL_CLASS_STREAM_OUTPUT, - HLSL_CLASS_VOID, - HLSL_CLASS_NULL, -+ HLSL_CLASS_ERROR, - }; - - enum hlsl_base_type -@@ -128,10 +137,18 @@ enum hlsl_sampler_dim - HLSL_SAMPLER_DIM_CUBEARRAY, - HLSL_SAMPLER_DIM_BUFFER, - HLSL_SAMPLER_DIM_STRUCTURED_BUFFER, -- HLSL_SAMPLER_DIM_MAX = HLSL_SAMPLER_DIM_STRUCTURED_BUFFER, -+ HLSL_SAMPLER_DIM_RAW_BUFFER, -+ HLSL_SAMPLER_DIM_MAX = HLSL_SAMPLER_DIM_RAW_BUFFER, - /* NOTE: Remember to update object_methods[] in hlsl.y if this enum is modified. */ - }; - -+enum hlsl_so_object_type -+{ -+ HLSL_STREAM_OUTPUT_POINT_STREAM, -+ HLSL_STREAM_OUTPUT_LINE_STREAM, -+ HLSL_STREAM_OUTPUT_TRIANGLE_STREAM, -+}; -+ - enum hlsl_regset - { - HLSL_REGSET_SAMPLERS, -@@ -210,6 +227,12 @@ struct hlsl_type - } resource; - /* Additional field to distinguish object types. Currently used only for technique types. */ - unsigned int version; -+ /* Additional information if type is HLSL_CLASS_STREAM_OUTPUT. */ -+ struct -+ { -+ struct hlsl_type *type; -+ enum hlsl_so_object_type so_type; -+ } so; - } e; - - /* Number of numeric register components used by one value of this type, for each regset. -@@ -316,7 +339,12 @@ enum hlsl_ir_node_type - HLSL_IR_STORE, - HLSL_IR_SWIZZLE, - HLSL_IR_SWITCH, -+ -+ HLSL_IR_COMPILE, -+ HLSL_IR_SAMPLER_STATE, - HLSL_IR_STATEBLOCK_CONSTANT, -+ -+ HLSL_IR_VSIR_INSTRUCTION_REF, - }; - - /* Common data for every type of IR instruction node. */ -@@ -352,6 +380,9 @@ struct hlsl_block - { - /* List containing instruction nodes; linked by the hlsl_ir_node.entry fields. */ - struct list instrs; -+ /* Instruction representing the "value" of this block, if applicable. -+ * This may point to an instruction outside of this block! */ -+ struct hlsl_ir_node *value; - }; - - /* A reference to an instruction node (struct hlsl_ir_node), usable as a field in other structs. -@@ -396,10 +427,12 @@ struct hlsl_attribute - #define HLSL_MODIFIER_SINGLE 0x00020000 - #define HLSL_MODIFIER_EXPORT 0x00040000 - #define HLSL_STORAGE_ANNOTATION 0x00080000 -+#define HLSL_MODIFIER_UNORM 0x00100000 -+#define HLSL_MODIFIER_SNORM 0x00200000 - - #define HLSL_TYPE_MODIFIERS_MASK (HLSL_MODIFIER_PRECISE | HLSL_MODIFIER_VOLATILE | \ - HLSL_MODIFIER_CONST | HLSL_MODIFIER_ROW_MAJOR | \ -- HLSL_MODIFIER_COLUMN_MAJOR) -+ HLSL_MODIFIER_COLUMN_MAJOR | HLSL_MODIFIER_UNORM | HLSL_MODIFIER_SNORM) - - #define HLSL_INTERPOLATION_MODIFIERS_MASK (HLSL_STORAGE_NOINTERPOLATION | HLSL_STORAGE_CENTROID | \ - HLSL_STORAGE_NOPERSPECTIVE | HLSL_STORAGE_LINEAR) -@@ -474,6 +507,8 @@ struct hlsl_ir_var - * range). The IR instructions are numerated starting from 2, because 0 means unused, and 1 - * means function entry. */ - unsigned int first_write, last_read; -+ /* Whether the variable is read in any entry function. */ -+ bool is_read; - /* Offset where the variable's value is stored within its buffer in numeric register components. - * This in case the variable is uniform. */ - unsigned int buffer_offset; -@@ -498,6 +533,9 @@ struct hlsl_ir_var - - /* Whether the shader performs dereferences with non-constant offsets in the variable. */ - bool indexable; -+ /* Whether this is a semantic variable that was split from an array, or is the first -+ * element of a struct, and thus needs to be aligned when packed in the signature. */ -+ bool force_align; - - uint32_t is_input_semantic : 1; - uint32_t is_output_semantic : 1; -@@ -591,10 +629,18 @@ struct hlsl_ir_function_decl - unsigned int attr_count; - const struct hlsl_attribute *const *attrs; - -+ bool early_depth_test; -+ - /* Synthetic boolean variable marking whether a return statement has been - * executed. Needed to deal with return statements in non-uniform control - * flow, since some backends can't handle them. */ - struct hlsl_ir_var *early_return_var; -+ -+ /* List of all the extern semantic variables; linked by the -+ * hlsl_ir_var.extern_entry fields. This exists as a convenience because -+ * it is often necessary to iterate all extern variables and these can be -+ * declared in as function parameters, or as the function return value. */ -+ struct list extern_vars; - }; - - struct hlsl_ir_call -@@ -646,6 +692,7 @@ struct hlsl_ir_switch - - enum hlsl_ir_expr_op - { -+ HLSL_OP0_ERROR, - HLSL_OP0_VOID, - HLSL_OP0_RASTERIZER_SAMPLE_COUNT, - -@@ -663,6 +710,7 @@ enum hlsl_ir_expr_op - HLSL_OP1_DSY_FINE, - HLSL_OP1_EXP2, - HLSL_OP1_F16TOF32, -+ HLSL_OP1_F32TOF16, - HLSL_OP1_FLOOR, - HLSL_OP1_FRACT, - HLSL_OP1_LOG2, -@@ -703,7 +751,7 @@ enum hlsl_ir_expr_op - HLSL_OP2_SLT, - - /* DP2ADD(a, b, c) computes the scalar product of a.xy and b.xy, -- * then adds c. */ -+ * then adds c, where c must have dimx=1. */ - HLSL_OP3_DP2ADD, - /* TERNARY(a, b, c) returns 'b' if 'a' is true and 'c' otherwise. 'a' must always be boolean. - * CMP(a, b, c) returns 'b' if 'a' >= 0, and 'c' otherwise. It's used only for SM1-SM3 targets. */ -@@ -854,6 +902,43 @@ struct hlsl_ir_string_constant - char *string; - }; - -+/* Represents shader compilation call for effects, such as "CompileShader()". -+ * -+ * Unlike hlsl_ir_call, it is not flattened, thus, it keeps track of its -+ * arguments and maintains its own instruction block. */ -+struct hlsl_ir_compile -+{ -+ struct hlsl_ir_node node; -+ -+ enum hlsl_compile_type -+ { -+ /* A shader compilation through the CompileShader() function or the "compile" syntax. */ -+ HLSL_COMPILE_TYPE_COMPILE, -+ /* A call to ConstructGSWithSO(), which receives a geometry shader and retrieves one as well. */ -+ HLSL_COMPILE_TYPE_CONSTRUCTGSWITHSO, -+ } compile_type; -+ -+ /* Special field to store the profile argument for HLSL_COMPILE_TYPE_COMPILE. */ -+ const struct hlsl_profile_info *profile; -+ -+ /* Block containing the instructions required by the arguments of the -+ * compilation call. */ -+ struct hlsl_block instrs; -+ -+ /* Arguments to the compilation call. For HLSL_COMPILE_TYPE_COMPILE -+ * args[0] is an hlsl_ir_call to the specified function. */ -+ struct hlsl_src *args; -+ unsigned int args_count; -+}; -+ -+/* Represents a state block initialized with the "sampler_state" keyword. */ -+struct hlsl_ir_sampler_state -+{ -+ struct hlsl_ir_node node; -+ -+ struct hlsl_state_block *state_block; -+}; -+ - /* Stateblock constants are undeclared values found on state blocks or technique passes descriptions, - * that do not concern regular pixel, vertex, or compute shaders, except for parsing. */ - struct hlsl_ir_stateblock_constant -@@ -862,6 +947,16 @@ struct hlsl_ir_stateblock_constant - char *name; - }; - -+/* A vkd3d_shader_instruction that can be inserted in a hlsl_block. -+ * Only used for the HLSL IR to vsir translation, might be removed once this translation is complete. */ -+struct hlsl_ir_vsir_instruction_ref -+{ -+ struct hlsl_ir_node node; -+ -+ /* Index to a vkd3d_shader_instruction within a vkd3d_shader_instruction_array in a vsir_program. */ -+ unsigned int vsir_instr_idx; -+}; -+ - struct hlsl_scope - { - /* Item entry for hlsl_ctx.scopes. */ -@@ -965,10 +1060,11 @@ struct hlsl_ctx - struct hlsl_scope *dummy_scope; - /* List of all the scopes in the program; linked by the hlsl_scope.entry fields. */ - struct list scopes; -- /* List of all the extern variables; linked by the hlsl_ir_var.extern_entry fields. -- * This exists as a convenience because it is often necessary to iterate all extern variables -- * and these can be declared in global scope, as function parameters, or as the function -- * return value. */ -+ -+ /* List of all the extern variables, excluding semantic variables; linked -+ * by the hlsl_ir_var.extern_entry fields. This exists as a convenience -+ * because it is often necessary to iterate all extern variables declared -+ * in the global scope or as function parameters. */ - struct list extern_vars; - - /* List containing both the built-in HLSL buffers ($Globals and $Params) and the ones declared -@@ -1003,8 +1099,12 @@ struct hlsl_ctx - struct hlsl_type *string; - struct hlsl_type *Void; - struct hlsl_type *null; -+ struct hlsl_type *error; - } builtin_types; - -+ /* Pre-allocated "error" expression. */ -+ struct hlsl_ir_node *error_instr; -+ - /* List of the instruction nodes for initializing static variables. */ - struct hlsl_block static_initializers; - -@@ -1016,19 +1116,23 @@ struct hlsl_ctx - { - uint32_t index; - struct hlsl_vec4 value; -+ struct vkd3d_shader_location loc; - } *regs; - size_t count, size; - } constant_defs; - /* 'c' registers where the constants expected by SM2 sincos are stored. */ - struct hlsl_reg d3dsincosconst1, d3dsincosconst2; -- /* Number of temp. registers required for the shader to run, i.e. the largest temp register -- * index that will be used in the output bytecode (+1). */ -- uint32_t temp_count; - - /* Number of threads to be executed (on the X, Y, and Z dimensions) in a single thread group in - * compute shader profiles. It is set using the numthreads() attribute in the entry point. */ - uint32_t thread_count[3]; - -+ enum vkd3d_tessellator_domain domain; -+ unsigned int output_control_point_count; -+ enum vkd3d_shader_tessellator_output_primitive output_primitive; -+ enum vkd3d_shader_tessellator_partitioning partitioning; -+ struct hlsl_ir_function_decl *patch_constant_func; -+ - /* In some cases we generate opcodes by parsing an HLSL function and then - * invoking it. If not NULL, this field is the name of the function that we - * are currently parsing, "mangled" with an internal prefix to avoid -@@ -1044,6 +1148,7 @@ struct hlsl_ctx - bool child_effect; - bool include_empty_buffers; - bool warn_implicit_truncation; -+ bool double_as_float_alias; - }; - - static inline bool hlsl_version_ge(const struct hlsl_ctx *ctx, unsigned int major, unsigned int minor) -@@ -1149,25 +1254,46 @@ static inline struct hlsl_ir_switch *hlsl_ir_switch(const struct hlsl_ir_node *n - return CONTAINING_RECORD(node, struct hlsl_ir_switch, node); - } - -+static inline struct hlsl_ir_compile *hlsl_ir_compile(const struct hlsl_ir_node *node) -+{ -+ VKD3D_ASSERT(node->type == HLSL_IR_COMPILE); -+ return CONTAINING_RECORD(node, struct hlsl_ir_compile, node); -+} -+ -+static inline struct hlsl_ir_sampler_state *hlsl_ir_sampler_state(const struct hlsl_ir_node *node) -+{ -+ VKD3D_ASSERT(node->type == HLSL_IR_SAMPLER_STATE); -+ return CONTAINING_RECORD(node, struct hlsl_ir_sampler_state, node); -+}; -+ - static inline struct hlsl_ir_stateblock_constant *hlsl_ir_stateblock_constant(const struct hlsl_ir_node *node) - { - VKD3D_ASSERT(node->type == HLSL_IR_STATEBLOCK_CONSTANT); - return CONTAINING_RECORD(node, struct hlsl_ir_stateblock_constant, node); - } - -+static inline struct hlsl_ir_vsir_instruction_ref *hlsl_ir_vsir_instruction_ref(const struct hlsl_ir_node *node) -+{ -+ VKD3D_ASSERT(node->type == HLSL_IR_VSIR_INSTRUCTION_REF); -+ return CONTAINING_RECORD(node, struct hlsl_ir_vsir_instruction_ref, node); -+} -+ - static inline void hlsl_block_init(struct hlsl_block *block) - { - list_init(&block->instrs); -+ block->value = NULL; - } - - static inline void hlsl_block_add_instr(struct hlsl_block *block, struct hlsl_ir_node *instr) - { - list_add_tail(&block->instrs, &instr->entry); -+ block->value = (instr->data_type ? instr : NULL); - } - - static inline void hlsl_block_add_block(struct hlsl_block *block, struct hlsl_block *add) - { - list_move_tail(&block->instrs, &add->instrs); -+ block->value = add->value; - } - - static inline void hlsl_src_from_node(struct hlsl_src *src, struct hlsl_ir_node *node) -@@ -1283,6 +1409,7 @@ static inline unsigned int hlsl_sampler_dim_count(enum hlsl_sampler_dim dim) - { - case HLSL_SAMPLER_DIM_1D: - case HLSL_SAMPLER_DIM_BUFFER: -+ case HLSL_SAMPLER_DIM_RAW_BUFFER: - case HLSL_SAMPLER_DIM_STRUCTURED_BUFFER: - return 1; - case HLSL_SAMPLER_DIM_1DARRAY: -@@ -1330,12 +1457,15 @@ bool hlsl_clone_block(struct hlsl_ctx *ctx, struct hlsl_block *dst_block, const - void hlsl_dump_function(struct hlsl_ctx *ctx, const struct hlsl_ir_function_decl *func); - void hlsl_dump_var_default_values(const struct hlsl_ir_var *var); - -+bool hlsl_state_block_add_entry(struct hlsl_state_block *state_block, -+ struct hlsl_state_block_entry *entry); - bool hlsl_validate_state_block_entry(struct hlsl_ctx *ctx, struct hlsl_state_block_entry *entry, - const struct vkd3d_shader_location *loc); - struct hlsl_state_block_entry *clone_stateblock_entry(struct hlsl_ctx *ctx, -- struct hlsl_state_block_entry *src, const char *name, bool lhs_has_index, -- unsigned int lhs_index, unsigned int arg_index); -+ const struct hlsl_state_block_entry *src, const char *name, bool lhs_has_index, -+ unsigned int lhs_index, bool single_arg, unsigned int arg_index); - -+void hlsl_lower_index_loads(struct hlsl_ctx *ctx, struct hlsl_block *body); - void hlsl_run_const_passes(struct hlsl_ctx *ctx, struct hlsl_block *body); - int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, - enum vkd3d_shader_target_type target_type, struct vkd3d_shader_code *out); -@@ -1402,6 +1532,8 @@ struct hlsl_ir_node *hlsl_new_if(struct hlsl_ctx *ctx, struct hlsl_ir_node *cond - struct hlsl_ir_node *hlsl_new_int_constant(struct hlsl_ctx *ctx, int32_t n, const struct vkd3d_shader_location *loc); - struct hlsl_ir_node *hlsl_new_jump(struct hlsl_ctx *ctx, - enum hlsl_ir_jump_type type, struct hlsl_ir_node *condition, const struct vkd3d_shader_location *loc); -+struct hlsl_type *hlsl_new_stream_output_type(struct hlsl_ctx *ctx, -+ enum hlsl_so_object_type so_type, struct hlsl_type *type); - struct hlsl_ir_node *hlsl_new_ternary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_expr_op op, - struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, struct hlsl_ir_node *arg3); - -@@ -1428,6 +1560,9 @@ bool hlsl_index_is_noncontiguous(struct hlsl_ir_index *index); - bool hlsl_index_is_resource_access(struct hlsl_ir_index *index); - bool hlsl_index_chain_has_resource_access(struct hlsl_ir_index *index); - -+struct hlsl_ir_node *hlsl_new_compile(struct hlsl_ctx *ctx, enum hlsl_compile_type compile_type, -+ const char *profile_name, struct hlsl_ir_node **args, unsigned int args_count, -+ struct hlsl_block *args_instrs, const struct vkd3d_shader_location *loc); - struct hlsl_ir_node *hlsl_new_index(struct hlsl_ctx *ctx, struct hlsl_ir_node *val, - struct hlsl_ir_node *idx, const struct vkd3d_shader_location *loc); - struct hlsl_ir_node *hlsl_new_loop(struct hlsl_ctx *ctx, -@@ -1440,6 +1575,8 @@ struct hlsl_type *hlsl_new_struct_type(struct hlsl_ctx *ctx, const char *name, - struct hlsl_struct_field *fields, size_t field_count); - struct hlsl_ir_node *hlsl_new_swizzle(struct hlsl_ctx *ctx, uint32_t s, unsigned int components, - struct hlsl_ir_node *val, const struct vkd3d_shader_location *loc); -+struct hlsl_ir_node *hlsl_new_sampler_state(struct hlsl_ctx *ctx, -+ const struct hlsl_state_block *state_block, struct vkd3d_shader_location *loc); - struct hlsl_ir_node *hlsl_new_stateblock_constant(struct hlsl_ctx *ctx, const char *name, - struct vkd3d_shader_location *loc); - struct hlsl_ir_node *hlsl_new_string_constant(struct hlsl_ctx *ctx, const char *str, -@@ -1466,6 +1603,9 @@ struct hlsl_ir_switch_case *hlsl_new_switch_case(struct hlsl_ctx *ctx, unsigned - struct hlsl_ir_node *hlsl_new_switch(struct hlsl_ctx *ctx, struct hlsl_ir_node *selector, - struct list *cases, const struct vkd3d_shader_location *loc); - -+struct hlsl_ir_node *hlsl_new_vsir_instruction_ref(struct hlsl_ctx *ctx, unsigned int vsir_instr_idx, -+ struct hlsl_type *type, const struct hlsl_reg *reg, const struct vkd3d_shader_location *loc); -+ - void hlsl_error(struct hlsl_ctx *ctx, const struct vkd3d_shader_location *loc, - enum vkd3d_shader_error error, const char *fmt, ...) VKD3D_PRINTF_FUNC(4, 5); - void hlsl_fixme(struct hlsl_ctx *ctx, const struct vkd3d_shader_location *loc, -@@ -1493,6 +1633,7 @@ unsigned int hlsl_type_minor_size(const struct hlsl_type *type); - unsigned int hlsl_type_major_size(const struct hlsl_type *type); - unsigned int hlsl_type_element_count(const struct hlsl_type *type); - bool hlsl_type_is_resource(const struct hlsl_type *type); -+bool hlsl_type_is_shader(const struct hlsl_type *type); - unsigned int hlsl_type_get_sm4_offset(const struct hlsl_type *type, unsigned int offset); - bool hlsl_types_are_equal(const struct hlsl_type *t1, const struct hlsl_type *t2); - -@@ -1525,22 +1666,18 @@ bool hlsl_transform_ir(struct hlsl_ctx *ctx, bool (*func)(struct hlsl_ctx *ctx, - - D3DXPARAMETER_CLASS hlsl_sm1_class(const struct hlsl_type *type); - D3DXPARAMETER_TYPE hlsl_sm1_base_type(const struct hlsl_type *type); --bool hlsl_sm1_register_from_semantic(const struct vkd3d_shader_version *version, const char *semantic_name, -- unsigned int semantic_index, bool output, enum vkd3d_shader_register_type *type, unsigned int *reg); --bool hlsl_sm1_usage_from_semantic(const char *semantic_name, -- uint32_t semantic_index, D3DDECLUSAGE *usage, uint32_t *usage_idx); - - void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer); - int d3dbc_compile(struct vsir_program *program, uint64_t config_flags, - const struct vkd3d_shader_compile_info *compile_info, const struct vkd3d_shader_code *ctab, -+ struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context); -+ -+int tpf_compile(struct vsir_program *program, uint64_t config_flags, - struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context, - struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func); - --bool hlsl_sm4_usage_from_semantic(struct hlsl_ctx *ctx, -- const struct hlsl_semantic *semantic, bool output, D3D_NAME *usage); --bool hlsl_sm4_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, -- bool output, enum vkd3d_shader_register_type *type, bool *has_idx); --int hlsl_sm4_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, struct vkd3d_shader_code *out); -+enum vkd3d_shader_interpolation_mode sm4_get_interpolation_mode(struct hlsl_type *type, -+ unsigned int storage_modifiers); - - struct hlsl_ir_function_decl *hlsl_compile_internal_function(struct hlsl_ctx *ctx, const char *name, const char *hlsl); - -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.l b/libs/vkd3d/libs/vkd3d-shader/hlsl.l -index 0c02b27817e..31fb30521e9 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.l -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.l -@@ -74,13 +74,16 @@ ANY (.) - BlendState {return KW_BLENDSTATE; } - break {return KW_BREAK; } - Buffer {return KW_BUFFER; } -+ByteAddressBuffer {return KW_BYTEADDRESSBUFFER; } - case {return KW_CASE; } - cbuffer {return KW_CBUFFER; } - centroid {return KW_CENTROID; } - column_major {return KW_COLUMN_MAJOR; } - ComputeShader {return KW_COMPUTESHADER; } - compile {return KW_COMPILE; } -+CompileShader {return KW_COMPILESHADER; } - const {return KW_CONST; } -+ConstructGSWithSO {return KW_CONSTRUCTGSWITHSO; } - continue {return KW_CONTINUE; } - DepthStencilState {return KW_DEPTHSTENCILSTATE; } - DepthStencilView {return KW_DEPTHSTENCILVIEW; } -@@ -88,7 +91,6 @@ default {return KW_DEFAULT; } - discard {return KW_DISCARD; } - DomainShader {return KW_DOMAINSHADER; } - do {return KW_DO; } --double {return KW_DOUBLE; } - else {return KW_ELSE; } - export {return KW_EXPORT; } - extern {return KW_EXTERN; } -@@ -102,6 +104,7 @@ if {return KW_IF; } - in {return KW_IN; } - inline {return KW_INLINE; } - inout {return KW_INOUT; } -+LineStream {return KW_LINESTREAM; } - linear {return KW_LINEAR; } - matrix {return KW_MATRIX; } - namespace {return KW_NAMESPACE; } -@@ -112,6 +115,7 @@ out {return KW_OUT; } - packoffset {return KW_PACKOFFSET; } - pass {return KW_PASS; } - PixelShader {return KW_PIXELSHADER; } -+PointStream {return KW_POINTSTREAM; } - pixelshader {return KW_PIXELSHADER; } - RasterizerOrderedBuffer {return KW_RASTERIZERORDEREDBUFFER; } - RasterizerOrderedStructuredBuffer {return KW_RASTERIZERORDEREDSTRUCTUREDBUFFER; } -@@ -126,6 +130,7 @@ RenderTargetView {return KW_RENDERTARGETVIEW; } - return {return KW_RETURN; } - row_major {return KW_ROW_MAJOR; } - RWBuffer {return KW_RWBUFFER; } -+RWByteAddressBuffer {return KW_RWBYTEADDRESSBUFFER; } - RWStructuredBuffer {return KW_RWSTRUCTUREDBUFFER; } - RWTexture1D {return KW_RWTEXTURE1D; } - RWTexture1DArray {return KW_RWTEXTURE1DARRAY; } -@@ -141,6 +146,7 @@ samplerCUBE {return KW_SAMPLERCUBE; } - SamplerState {return KW_SAMPLER; } - sampler_state {return KW_SAMPLER_STATE; } - shared {return KW_SHARED; } -+snorm {return KW_SNORM; } - stateblock {return KW_STATEBLOCK; } - stateblock_state {return KW_STATEBLOCK_STATE; } - static {return KW_STATIC; } -@@ -166,10 +172,12 @@ texture3D {return KW_TEXTURE3D; } - TextureCube {return KW_TEXTURECUBE; } - textureCUBE {return KW_TEXTURECUBE; } - TextureCubeArray {return KW_TEXTURECUBEARRAY; } -+TriangleStream {return KW_TRIANGLESTREAM; } - true {return KW_TRUE; } - typedef {return KW_TYPEDEF; } - unsigned {return KW_UNSIGNED; } - uniform {return KW_UNIFORM; } -+unorm {return KW_UNORM; } - vector {return KW_VECTOR; } - VertexShader {return KW_VERTEXSHADER; } - vertexshader {return KW_VERTEXSHADER; } -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.y b/libs/vkd3d/libs/vkd3d-shader/hlsl.y -index 3f319dea0d8..03a2f38e4e9 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.y -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.y -@@ -40,6 +40,7 @@ struct parse_initializer - unsigned int args_count; - struct hlsl_block *instrs; - bool braces; -+ struct vkd3d_shader_location loc; - }; - - struct parse_parameter -@@ -52,7 +53,7 @@ struct parse_parameter - struct parse_initializer initializer; - }; - --struct parse_colon_attribute -+struct parse_colon_attributes - { - struct hlsl_semantic semantic; - struct hlsl_reg_reservation reg_reservation; -@@ -147,7 +148,7 @@ static void yyerror(YYLTYPE *loc, void *scanner, struct hlsl_ctx *ctx, const cha - - static struct hlsl_ir_node *node_from_block(struct hlsl_block *block) - { -- return LIST_ENTRY(list_tail(&block->instrs), struct hlsl_ir_node, entry); -+ return block->value; - } - - static struct hlsl_block *make_empty_block(struct hlsl_ctx *ctx) -@@ -331,6 +332,9 @@ static void check_condition_type(struct hlsl_ctx *ctx, const struct hlsl_ir_node - { - const struct hlsl_type *type = cond->data_type; - -+ if (type->class == HLSL_CLASS_ERROR) -+ return; -+ - if (type->class > HLSL_CLASS_LAST_NUMERIC || type->dimx > 1 || type->dimy > 1) - { - struct vkd3d_string_buffer *string; -@@ -437,6 +441,9 @@ static struct hlsl_ir_node *add_implicit_conversion(struct hlsl_ctx *ctx, struct - if (hlsl_types_are_equal(src_type, dst_type)) - return node; - -+ if (node->type == HLSL_IR_SAMPLER_STATE && dst_type->class == HLSL_CLASS_SAMPLER) -+ return node; -+ - if (!implicit_compatible_data_types(ctx, src_type, dst_type)) - { - struct vkd3d_string_buffer *src_string, *dst_string; -@@ -458,6 +465,40 @@ static struct hlsl_ir_node *add_implicit_conversion(struct hlsl_ctx *ctx, struct - return add_cast(ctx, block, node, dst_type, loc); - } - -+static bool add_explicit_conversion(struct hlsl_ctx *ctx, struct hlsl_block *block, -+ struct hlsl_type *dst_type, const struct parse_array_sizes *arrays, const struct vkd3d_shader_location *loc) -+{ -+ struct hlsl_ir_node *instr = node_from_block(block); -+ struct hlsl_type *src_type = instr->data_type; -+ unsigned int i; -+ -+ for (i = 0; i < arrays->count; ++i) -+ { -+ if (arrays->sizes[i] == HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT) -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Implicit size arrays not allowed in casts."); -+ dst_type = hlsl_new_array_type(ctx, dst_type, arrays->sizes[i]); -+ } -+ -+ if (instr->data_type->class == HLSL_CLASS_ERROR) -+ return true; -+ -+ if (!explicit_compatible_data_types(ctx, src_type, dst_type)) -+ { -+ struct vkd3d_string_buffer *src_string, *dst_string; -+ -+ src_string = hlsl_type_to_string(ctx, src_type); -+ dst_string = hlsl_type_to_string(ctx, dst_type); -+ if (src_string && dst_string) -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Can't cast from %s to %s.", -+ src_string->buffer, dst_string->buffer); -+ hlsl_release_string_buffer(ctx, src_string); -+ hlsl_release_string_buffer(ctx, dst_string); -+ return false; -+ } -+ -+ return add_cast(ctx, block, instr, dst_type, loc); -+} -+ - static uint32_t add_modifiers(struct hlsl_ctx *ctx, uint32_t modifiers, uint32_t mod, - const struct vkd3d_shader_location *loc) - { -@@ -489,9 +530,10 @@ static bool append_conditional_break(struct hlsl_ctx *ctx, struct hlsl_block *co - check_condition_type(ctx, condition); - - bool_type = hlsl_get_scalar_type(ctx, HLSL_TYPE_BOOL); -- if (!(cast = hlsl_new_cast(ctx, condition, bool_type, &condition->loc))) -+ /* We already checked for a 1-component numeric type, so -+ * add_implicit_conversion() is equivalent to add_cast() here. */ -+ if (!(cast = add_cast(ctx, cond_block, condition, bool_type, &condition->loc))) - return false; -- hlsl_block_add_instr(cond_block, cast); - - if (!(not = hlsl_new_unary_expr(ctx, HLSL_OP1_LOGIC_NOT, cast, &condition->loc))) - return false; -@@ -516,7 +558,7 @@ enum loop_type - LOOP_DO_WHILE - }; - --static bool attribute_list_has_duplicates(const struct parse_attribute_list *attrs) -+static void check_attribute_list_for_duplicates(struct hlsl_ctx *ctx, const struct parse_attribute_list *attrs) - { - unsigned int i, j; - -@@ -525,11 +567,10 @@ static bool attribute_list_has_duplicates(const struct parse_attribute_list *att - for (j = i + 1; j < attrs->count; ++j) - { - if (!strcmp(attrs->attrs[i]->name, attrs->attrs[j]->name)) -- return true; -+ hlsl_error(ctx, &attrs->attrs[j]->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, -+ "Found duplicate attribute \"%s\".", attrs->attrs[j]->name); - } - } -- -- return false; - } - - static void resolve_loop_continue(struct hlsl_ctx *ctx, struct hlsl_block *block, enum loop_type type, -@@ -606,12 +647,17 @@ static struct hlsl_default_value evaluate_static_expression(struct hlsl_ctx *ctx - struct hlsl_block expr; - struct hlsl_src src; - -+ if (node_from_block(block)->data_type->class == HLSL_CLASS_ERROR) -+ return ret; -+ - LIST_FOR_EACH_ENTRY(node, &block->instrs, struct hlsl_ir_node, entry) - { - switch (node->type) - { -+ case HLSL_IR_COMPILE: - case HLSL_IR_CONSTANT: - case HLSL_IR_EXPR: -+ case HLSL_IR_SAMPLER_STATE: - case HLSL_IR_STRING_CONSTANT: - case HLSL_IR_SWIZZLE: - case HLSL_IR_LOAD: -@@ -632,6 +678,8 @@ static struct hlsl_default_value evaluate_static_expression(struct hlsl_ctx *ctx - hlsl_error(ctx, &node->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, - "Expected literal expression."); - break; -+ case HLSL_IR_VSIR_INSTRUCTION_REF: -+ vkd3d_unreachable(); - } - } - -@@ -639,14 +687,15 @@ static struct hlsl_default_value evaluate_static_expression(struct hlsl_ctx *ctx - return ret; - hlsl_block_add_block(&expr, block); - -- if (!add_implicit_conversion(ctx, &expr, node_from_block(&expr), dst_type, loc)) -+ if (!(node = add_implicit_conversion(ctx, &expr, node_from_block(&expr), dst_type, loc))) - { - hlsl_block_cleanup(&expr); - return ret; - } - - /* Wrap the node into a src to allow the reference to survive the multiple const passes. */ -- hlsl_src_from_node(&src, node_from_block(&expr)); -+ hlsl_src_from_node(&src, node); -+ hlsl_lower_index_loads(ctx, &expr); - hlsl_run_const_passes(ctx, &expr); - node = src.node; - hlsl_src_remove(&src); -@@ -697,9 +746,7 @@ static struct hlsl_block *create_loop(struct hlsl_ctx *ctx, enum loop_type type, - unsigned int i, unroll_limit = 0; - struct hlsl_ir_node *loop; - -- if (attribute_list_has_duplicates(attributes)) -- hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, "Found duplicate attribute."); -- -+ check_attribute_list_for_duplicates(ctx, attributes); - check_loop_attributes(ctx, attributes, loc); - - /* Ignore unroll(0) attribute, and any invalid attribute. */ -@@ -897,6 +944,9 @@ static bool add_return(struct hlsl_ctx *ctx, struct hlsl_block *block, - { - struct hlsl_ir_node *store; - -+ if (return_value->data_type->class == HLSL_CLASS_ERROR) -+ return true; -+ - if (!(return_value = add_implicit_conversion(ctx, block, return_value, return_type, loc))) - return false; - -@@ -974,6 +1024,12 @@ static bool add_array_access(struct hlsl_ctx *ctx, struct hlsl_block *block, str - const struct hlsl_type *expr_type = array->data_type, *index_type = index->data_type; - struct hlsl_ir_node *return_index, *cast; - -+ if (array->data_type->class == HLSL_CLASS_ERROR || index->data_type->class == HLSL_CLASS_ERROR) -+ { -+ block->value = ctx->error_instr; -+ return true; -+ } -+ - if ((expr_type->class == HLSL_CLASS_TEXTURE || expr_type->class == HLSL_CLASS_UAV) - && expr_type->sampler_dim != HLSL_SAMPLER_DIM_GENERIC) - { -@@ -1164,6 +1220,33 @@ static bool gen_struct_fields(struct hlsl_ctx *ctx, struct parse_fields *fields, - return true; - } - -+static bool add_record_access_recurse(struct hlsl_ctx *ctx, struct hlsl_block *block, -+ const char *name, const struct vkd3d_shader_location *loc) -+{ -+ struct hlsl_ir_node *record = node_from_block(block); -+ const struct hlsl_type *type = record->data_type; -+ const struct hlsl_struct_field *field, *base; -+ -+ if ((field = get_struct_field(type->e.record.fields, type->e.record.field_count, name))) -+ { -+ unsigned int field_idx = field - type->e.record.fields; -+ -+ return add_record_access(ctx, block, record, field_idx, loc); -+ } -+ else if ((base = get_struct_field(type->e.record.fields, type->e.record.field_count, "$super"))) -+ { -+ unsigned int base_idx = base - type->e.record.fields; -+ -+ if (!add_record_access(ctx, block, record, base_idx, loc)) -+ return false; -+ return add_record_access_recurse(ctx, block, name, loc); -+ } -+ -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_NOT_DEFINED, "Field \"%s\" is not defined.", name); -+ block->value = ctx->error_instr; -+ return true; -+} -+ - static bool add_typedef(struct hlsl_ctx *ctx, struct hlsl_type *const orig_type, struct list *list) - { - struct parse_variable_def *v, *v_next; -@@ -1227,7 +1310,8 @@ static bool add_typedef(struct hlsl_ctx *ctx, struct hlsl_type *const orig_type, - } - - static void initialize_var_components(struct hlsl_ctx *ctx, struct hlsl_block *instrs, -- struct hlsl_ir_var *dst, unsigned int *store_index, struct hlsl_ir_node *src); -+ struct hlsl_ir_var *dst, unsigned int *store_index, struct hlsl_ir_node *src, -+ bool is_default_values_initializer); - - static bool add_func_parameter(struct hlsl_ctx *ctx, struct hlsl_func_parameters *parameters, - struct parse_parameter *param, const struct vkd3d_shader_location *loc) -@@ -1285,7 +1369,8 @@ static bool add_func_parameter(struct hlsl_ctx *ctx, struct hlsl_func_parameters - - for (i = 0; i < param->initializer.args_count; ++i) - { -- initialize_var_components(ctx, param->initializer.instrs, var, &store_index, param->initializer.args[i]); -+ initialize_var_components(ctx, param->initializer.instrs, var, -+ &store_index, param->initializer.args[i], true); - } - - free_parse_initializer(¶m->initializer); -@@ -1673,25 +1758,36 @@ static struct hlsl_ir_node *add_expr(struct hlsl_ctx *ctx, struct hlsl_block *bl - return expr; - } - --static void check_integer_type(struct hlsl_ctx *ctx, const struct hlsl_ir_node *instr) -+static bool type_is_integer(enum hlsl_base_type type) - { -- const struct hlsl_type *type = instr->data_type; -- struct vkd3d_string_buffer *string; -- -- switch (type->e.numeric.type) -+ switch (type) - { - case HLSL_TYPE_BOOL: - case HLSL_TYPE_INT: - case HLSL_TYPE_UINT: -- break; -+ return true; - -- default: -- if ((string = hlsl_type_to_string(ctx, type))) -- hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -- "Expression type '%s' is not integer.", string->buffer); -- hlsl_release_string_buffer(ctx, string); -- break; -+ case HLSL_TYPE_DOUBLE: -+ case HLSL_TYPE_FLOAT: -+ case HLSL_TYPE_HALF: -+ return false; - } -+ -+ vkd3d_unreachable(); -+} -+ -+static void check_integer_type(struct hlsl_ctx *ctx, const struct hlsl_ir_node *instr) -+{ -+ const struct hlsl_type *type = instr->data_type; -+ struct vkd3d_string_buffer *string; -+ -+ if (type_is_integer(type->e.numeric.type)) -+ return; -+ -+ if ((string = hlsl_type_to_string(ctx, type))) -+ hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -+ "Expression type '%s' is not integer.", string->buffer); -+ hlsl_release_string_buffer(ctx, string); - } - - static struct hlsl_ir_node *add_unary_arithmetic_expr(struct hlsl_ctx *ctx, struct hlsl_block *block, -@@ -1699,12 +1795,18 @@ static struct hlsl_ir_node *add_unary_arithmetic_expr(struct hlsl_ctx *ctx, stru - { - struct hlsl_ir_node *args[HLSL_MAX_OPERANDS] = {arg}; - -+ if (arg->data_type->class == HLSL_CLASS_ERROR) -+ return arg; -+ - return add_expr(ctx, block, op, args, arg->data_type, loc); - } - - static struct hlsl_ir_node *add_unary_bitwise_expr(struct hlsl_ctx *ctx, struct hlsl_block *block, - enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg, const struct vkd3d_shader_location *loc) - { -+ if (arg->data_type->class == HLSL_CLASS_ERROR) -+ return arg; -+ - check_integer_type(ctx, arg); - - return add_unary_arithmetic_expr(ctx, block, op, arg, loc); -@@ -1716,6 +1818,9 @@ static struct hlsl_ir_node *add_unary_logical_expr(struct hlsl_ctx *ctx, struct - struct hlsl_ir_node *args[HLSL_MAX_OPERANDS] = {0}; - struct hlsl_type *bool_type; - -+ if (arg->data_type->class == HLSL_CLASS_ERROR) -+ return arg; -+ - bool_type = hlsl_get_numeric_type(ctx, arg->data_type->class, HLSL_TYPE_BOOL, - arg->data_type->dimx, arg->data_type->dimy); - -@@ -1745,7 +1850,11 @@ static struct hlsl_ir_node *add_binary_arithmetic_expr(struct hlsl_ctx *ctx, str - struct hlsl_ir_node *args[HLSL_MAX_OPERANDS] = {0}; - struct hlsl_type *common_type; - -- common_type = get_common_numeric_type(ctx, arg1, arg2, loc); -+ if (!(common_type = get_common_numeric_type(ctx, arg1, arg2, loc))) -+ { -+ block->value = ctx->error_instr; -+ return block->value; -+ } - - if (!(args[0] = add_implicit_conversion(ctx, block, arg1, common_type, loc))) - return NULL; -@@ -1942,6 +2051,12 @@ static struct hlsl_block *add_binary_expr_merge(struct hlsl_ctx *ctx, struct hls - hlsl_block_add_block(block1, block2); - destroy_block(block2); - -+ if (arg1->data_type->class == HLSL_CLASS_ERROR || arg2->data_type->class == HLSL_CLASS_ERROR) -+ { -+ block1->value = ctx->error_instr; -+ return block1; -+ } -+ - if (add_binary_expr(ctx, block1, op, arg1, arg2, loc) == NULL) - return NULL; - -@@ -2048,18 +2163,23 @@ static bool invert_swizzle_matrix(uint32_t *swizzle, unsigned int *writemask, un - return true; - } - --static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *lhs, -+static bool add_assignment(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *lhs, - enum parse_assign_op assign_op, struct hlsl_ir_node *rhs) - { - struct hlsl_type *lhs_type = lhs->data_type; -- struct hlsl_ir_node *copy; - unsigned int writemask = 0, width = 0; - bool matrix_writemask = false; - -+ if (lhs->data_type->class == HLSL_CLASS_ERROR || rhs->data_type->class == HLSL_CLASS_ERROR) -+ { -+ block->value = ctx->error_instr; -+ return true; -+ } -+ - if (assign_op == ASSIGN_OP_SUB) - { - if (!(rhs = add_unary_arithmetic_expr(ctx, block, HLSL_OP1_NEG, rhs, &rhs->loc))) -- return NULL; -+ return false; - assign_op = ASSIGN_OP_ADD; - } - if (assign_op != ASSIGN_OP_ASSIGN) -@@ -2068,7 +2188,7 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_blo - - VKD3D_ASSERT(op); - if (!(rhs = add_binary_expr(ctx, block, op, lhs, rhs, &rhs->loc))) -- return NULL; -+ return false; - } - - if (hlsl_is_numeric_type(lhs_type)) -@@ -2078,14 +2198,14 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_blo - } - - if (!(rhs = add_implicit_conversion(ctx, block, rhs, lhs_type, &rhs->loc))) -- return NULL; -+ return false; - - while (lhs->type != HLSL_IR_LOAD && lhs->type != HLSL_IR_INDEX) - { - if (lhs->type == HLSL_IR_EXPR && hlsl_ir_expr(lhs)->op == HLSL_OP1_CAST) - { - hlsl_fixme(ctx, &lhs->loc, "Cast on the LHS."); -- return NULL; -+ return false; - } - else if (lhs->type == HLSL_IR_SWIZZLE) - { -@@ -2100,25 +2220,23 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_blo - if (swizzle->val.node->type != HLSL_IR_LOAD && swizzle->val.node->type != HLSL_IR_INDEX) - { - hlsl_fixme(ctx, &lhs->loc, "Unhandled source of matrix swizzle."); -- return NULL; -+ return false; - } - if (!invert_swizzle_matrix(&s, &writemask, &width)) - { - hlsl_error(ctx, &lhs->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_WRITEMASK, "Invalid writemask for matrix."); -- return NULL; -+ return false; - } - matrix_writemask = true; - } - else if (!invert_swizzle(&s, &writemask, &width)) - { - hlsl_error(ctx, &lhs->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_WRITEMASK, "Invalid writemask."); -- return NULL; -+ return false; - } - - if (!(new_swizzle = hlsl_new_swizzle(ctx, s, width, rhs, &swizzle->node.loc))) -- { -- return NULL; -- } -+ return false; - hlsl_block_add_instr(block, new_swizzle); - - lhs = swizzle->val.node; -@@ -2127,7 +2245,7 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_blo - else - { - hlsl_error(ctx, &lhs->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_LVALUE, "Invalid lvalue."); -- return NULL; -+ return false; - } - } - -@@ -2142,11 +2260,11 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_blo - if (!hlsl_index_is_resource_access(hlsl_ir_index(lhs))) - { - hlsl_fixme(ctx, &lhs->loc, "Non-direct structured resource store."); -- return NULL; -+ return false; - } - - if (!hlsl_init_deref_from_index_chain(ctx, &resource_deref, hlsl_ir_index(lhs)->val.node)) -- return NULL; -+ return false; - - resource_type = hlsl_deref_get_type(ctx, &resource_deref); - VKD3D_ASSERT(resource_type->class == HLSL_CLASS_TEXTURE || resource_type->class == HLSL_CLASS_UAV); -@@ -2168,7 +2286,7 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_blo - if (!(store = hlsl_new_resource_store(ctx, &resource_deref, coords, rhs, &lhs->loc))) - { - hlsl_cleanup_deref(&resource_deref); -- return NULL; -+ return false; - } - hlsl_block_add_instr(block, store); - hlsl_cleanup_deref(&resource_deref); -@@ -2195,13 +2313,13 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_blo - if (!(load = hlsl_add_load_component(ctx, block, rhs, k++, &rhs->loc))) - { - hlsl_cleanup_deref(&deref); -- return NULL; -+ return false; - } - - if (!hlsl_new_store_component(ctx, &store_block, &deref, component, load)) - { - hlsl_cleanup_deref(&deref); -- return NULL; -+ return false; - } - hlsl_block_add_block(block, &store_block); - } -@@ -2226,23 +2344,23 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_blo - continue; - - if (!(c = hlsl_new_uint_constant(ctx, i, &lhs->loc))) -- return NULL; -+ return false; - hlsl_block_add_instr(block, c); - - if (!(cell = hlsl_new_index(ctx, &row->node, c, &lhs->loc))) -- return NULL; -+ return false; - hlsl_block_add_instr(block, cell); - - if (!(load = hlsl_add_load_component(ctx, block, rhs, k++, &rhs->loc))) -- return NULL; -+ return false; - - if (!hlsl_init_deref_from_index_chain(ctx, &deref, cell)) -- return NULL; -+ return false; - - if (!(store = hlsl_new_store_index(ctx, &deref, NULL, load, 0, &rhs->loc))) - { - hlsl_cleanup_deref(&deref); -- return NULL; -+ return false; - } - hlsl_block_add_instr(block, store); - hlsl_cleanup_deref(&deref); -@@ -2254,24 +2372,19 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_blo - struct hlsl_deref deref; - - if (!hlsl_init_deref_from_index_chain(ctx, &deref, lhs)) -- return NULL; -+ return false; - - if (!(store = hlsl_new_store_index(ctx, &deref, NULL, rhs, writemask, &rhs->loc))) - { - hlsl_cleanup_deref(&deref); -- return NULL; -+ return false; - } - hlsl_block_add_instr(block, store); - hlsl_cleanup_deref(&deref); - } - -- /* Don't use the instruction itself as a source, as this makes structure -- * splitting easier. Instead copy it here. Since we retrieve sources from -- * the last instruction in the list, we do need to copy. */ -- if (!(copy = hlsl_new_copy(ctx, rhs))) -- return NULL; -- hlsl_block_add_instr(block, copy); -- return copy; -+ block->value = rhs; -+ return true; - } - - static bool add_increment(struct hlsl_ctx *ctx, struct hlsl_block *block, bool decrement, bool post, -@@ -2280,6 +2393,9 @@ static bool add_increment(struct hlsl_ctx *ctx, struct hlsl_block *block, bool d - struct hlsl_ir_node *lhs = node_from_block(block); - struct hlsl_ir_node *one; - -+ if (lhs->data_type->class == HLSL_CLASS_ERROR) -+ return true; -+ - if (lhs->data_type->modifiers & HLSL_MODIFIER_CONST) - hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_MODIFIES_CONST, - "Argument to %s%screment operator is const.", post ? "post" : "pre", decrement ? "de" : "in"); -@@ -2307,57 +2423,9 @@ static bool add_increment(struct hlsl_ctx *ctx, struct hlsl_block *block, bool d - return true; - } - --/* For some reason, for matrices, values from default value initializers end up in different -- * components than from regular initializers. Default value initializers fill the matrix in -- * vertical reading order (left-to-right top-to-bottom) instead of regular reading order -- * (top-to-bottom left-to-right), so they have to be adjusted. -- * An exception is that the order of matrix initializers for function parameters are row-major -- * (top-to-bottom left-to-right). */ --static unsigned int get_component_index_from_default_initializer_index(struct hlsl_ctx *ctx, -- struct hlsl_type *type, unsigned int index) --{ -- unsigned int element_comp_count, element, x, y, i; -- unsigned int base = 0; -- -- if (ctx->profile->major_version < 4) -- return index; -- -- if (ctx->profile->type == VKD3D_SHADER_TYPE_EFFECT) -- return index; -- -- switch (type->class) -- { -- case HLSL_CLASS_MATRIX: -- x = index / type->dimy; -- y = index % type->dimy; -- return y * type->dimx + x; -- -- case HLSL_CLASS_ARRAY: -- element_comp_count = hlsl_type_component_count(type->e.array.type); -- element = index / element_comp_count; -- base = element * element_comp_count; -- return base + get_component_index_from_default_initializer_index(ctx, type->e.array.type, index - base); -- -- case HLSL_CLASS_STRUCT: -- for (i = 0; i < type->e.record.field_count; ++i) -- { -- struct hlsl_type *field_type = type->e.record.fields[i].type; -- -- element_comp_count = hlsl_type_component_count(field_type); -- if (index - base < element_comp_count) -- return base + get_component_index_from_default_initializer_index(ctx, field_type, index - base); -- base += element_comp_count; -- } -- break; -- -- default: -- return index; -- } -- vkd3d_unreachable(); --} -- - static void initialize_var_components(struct hlsl_ctx *ctx, struct hlsl_block *instrs, -- struct hlsl_ir_var *dst, unsigned int *store_index, struct hlsl_ir_node *src) -+ struct hlsl_ir_var *dst, unsigned int *store_index, struct hlsl_ir_node *src, -+ bool is_default_values_initializer) - { - unsigned int src_comp_count = hlsl_type_component_count(src->data_type); - struct hlsl_deref dst_deref; -@@ -2376,38 +2444,107 @@ static void initialize_var_components(struct hlsl_ctx *ctx, struct hlsl_block *i - - dst_comp_type = hlsl_type_get_component_type(ctx, dst->data_type, *store_index); - -- if (dst->default_values) -+ if (is_default_values_initializer) - { - struct hlsl_default_value default_value = {0}; -- unsigned int dst_index; - -- if (!hlsl_clone_block(ctx, &block, instrs)) -- return; -- default_value = evaluate_static_expression(ctx, &block, dst_comp_type, &src->loc); -- -- if (dst->is_param) -- dst_index = *store_index; -+ if (src->type == HLSL_IR_COMPILE || src->type == HLSL_IR_SAMPLER_STATE) -+ { -+ if (hlsl_is_numeric_type(dst_comp_type)) -+ { -+ /* Default values are discarded if they contain an object -+ * literal expression for a numeric component. */ -+ if (dst->default_values) -+ { -+ hlsl_warning(ctx, &src->loc, VKD3D_SHADER_WARNING_HLSL_IGNORED_DEFAULT_VALUE, -+ "Component %u in variable '%s' initializer is object literal. Default values discarded.", -+ k, dst->name); -+ vkd3d_free(dst->default_values); -+ dst->default_values = NULL; -+ } -+ } -+ } - else -- dst_index = get_component_index_from_default_initializer_index(ctx, dst->data_type, *store_index); -+ { -+ if (!hlsl_clone_block(ctx, &block, instrs)) -+ return; -+ default_value = evaluate_static_expression(ctx, &block, dst_comp_type, &src->loc); - -- dst->default_values[dst_index] = default_value; -+ if (dst->default_values) -+ dst->default_values[*store_index] = default_value; - -- hlsl_block_cleanup(&block); -+ hlsl_block_cleanup(&block); -+ } - } - else - { -- if (!(conv = add_implicit_conversion(ctx, instrs, load, dst_comp_type, &src->loc))) -- return; -+ if (src->type == HLSL_IR_SAMPLER_STATE) -+ { -+ /* Sampler states end up in the variable's state_blocks instead of -+ * being used to initialize its value. */ -+ struct hlsl_ir_sampler_state *sampler_state = hlsl_ir_sampler_state(src); -+ -+ if (dst_comp_type->class != HLSL_CLASS_SAMPLER) -+ { -+ struct vkd3d_string_buffer *dst_string; -+ -+ dst_string = hlsl_type_to_string(ctx, dst_comp_type); -+ if (dst_string) -+ hlsl_error(ctx, &src->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -+ "Cannot assign sampler_state to %s.", dst_string->buffer); -+ hlsl_release_string_buffer(ctx, dst_string); -+ return; -+ } -+ -+ if (!hlsl_array_reserve(ctx, (void **)&dst->state_blocks, &dst->state_block_capacity, -+ dst->state_block_count + 1, sizeof(*dst->state_blocks))) -+ return; -+ -+ dst->state_blocks[dst->state_block_count] = sampler_state->state_block; -+ sampler_state->state_block = NULL; -+ ++dst->state_block_count; -+ } -+ else -+ { -+ if (!(conv = add_implicit_conversion(ctx, instrs, load, dst_comp_type, &src->loc))) -+ return; - -- if (!hlsl_new_store_component(ctx, &block, &dst_deref, *store_index, conv)) -- return; -- hlsl_block_add_block(instrs, &block); -+ if (!hlsl_new_store_component(ctx, &block, &dst_deref, *store_index, conv)) -+ return; -+ hlsl_block_add_block(instrs, &block); -+ } - } - - ++*store_index; - } - } - -+static void initialize_var(struct hlsl_ctx *ctx, struct hlsl_ir_var *dst, -+ const struct parse_initializer *initializer, bool is_default_values_initializer) -+{ -+ unsigned int store_index = 0; -+ -+ /* If any of the elements has an error type, then initializer_size() is not -+ * meaningful. */ -+ for (unsigned int i = 0; i < initializer->args_count; ++i) -+ { -+ if (initializer->args[i]->data_type->class == HLSL_CLASS_ERROR) -+ return; -+ } -+ -+ if (initializer_size(initializer) != hlsl_type_component_count(dst->data_type)) -+ { -+ hlsl_error(ctx, &initializer->loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, -+ "Expected %u components in initializer, but got %u.", -+ hlsl_type_component_count(dst->data_type), initializer_size(initializer)); -+ return; -+ } -+ -+ for (unsigned int i = 0; i < initializer->args_count; ++i) -+ initialize_var_components(ctx, initializer->instrs, dst, &store_index, -+ initializer->args[i], is_default_values_initializer); -+} -+ - static bool type_has_object_components(const struct hlsl_type *type) - { - if (type->class == HLSL_CLASS_ARRAY) -@@ -2733,13 +2870,15 @@ static struct hlsl_block *initialize_vars(struct hlsl_ctx *ctx, struct list *var - - if (v->initializer.args_count) - { -- unsigned int store_index = 0; - bool is_default_values_initializer; -- unsigned int size, k; - - is_default_values_initializer = (ctx->cur_buffer != ctx->globals_buffer) - || (var->storage_modifiers & HLSL_STORAGE_UNIFORM) - || ctx->cur_scope->annotations; -+ if (hlsl_get_multiarray_element_type(type)->class == HLSL_CLASS_SAMPLER) -+ is_default_values_initializer = false; -+ if (hlsl_type_is_shader(type)) -+ is_default_values_initializer = false; - - if (is_default_values_initializer) - { -@@ -2769,19 +2908,7 @@ static struct hlsl_block *initialize_vars(struct hlsl_ctx *ctx, struct list *var - v->initializer.args[0] = node_from_block(v->initializer.instrs); - } - -- size = initializer_size(&v->initializer); -- if (component_count != size) -- { -- hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, -- "Expected %u components in initializer, but got %u.", component_count, size); -- free_parse_variable_def(v); -- continue; -- } -- -- for (k = 0; k < v->initializer.args_count; ++k) -- { -- initialize_var_components(ctx, v->initializer.instrs, var, &store_index, v->initializer.args[k]); -- } -+ initialize_var(ctx, var, &v->initializer, is_default_values_initializer); - - if (is_default_values_initializer) - { -@@ -2795,6 +2922,9 @@ static struct hlsl_block *initialize_vars(struct hlsl_ctx *ctx, struct list *var - { - hlsl_block_add_block(initializers, v->initializer.instrs); - } -+ -+ if (var->state_blocks) -+ TRACE("Variable %s has %u state blocks.\n", var->name, var->state_block_count); - } - else if (var->storage_modifiers & HLSL_STORAGE_STATIC) - { -@@ -2835,28 +2965,36 @@ static struct hlsl_block *initialize_vars(struct hlsl_ctx *ctx, struct list *var - return initializers; - } - --static bool func_is_compatible_match(struct hlsl_ctx *ctx, -- const struct hlsl_ir_function_decl *decl, const struct parse_initializer *args) -+static bool func_is_compatible_match(struct hlsl_ctx *ctx, const struct hlsl_ir_function_decl *decl, -+ bool is_compile, const struct parse_initializer *args) - { -- unsigned int i; -- -- if (decl->parameters.count < args->args_count) -- return false; -+ unsigned int i, k; - -- for (i = 0; i < args->args_count; ++i) -+ k = 0; -+ for (i = 0; i < decl->parameters.count; ++i) - { -- if (!implicit_compatible_data_types(ctx, args->args[i]->data_type, decl->parameters.vars[i]->data_type)) -+ if (is_compile && !(decl->parameters.vars[i]->storage_modifiers & HLSL_STORAGE_UNIFORM)) -+ continue; -+ -+ if (k >= args->args_count) -+ { -+ if (!decl->parameters.vars[i]->default_values) -+ return false; -+ return true; -+ } -+ -+ if (!implicit_compatible_data_types(ctx, args->args[k]->data_type, decl->parameters.vars[i]->data_type)) - return false; -- } - -- if (args->args_count < decl->parameters.count && !decl->parameters.vars[args->args_count]->default_values) -+ ++k; -+ } -+ if (k < args->args_count) - return false; -- - return true; - } - - static struct hlsl_ir_function_decl *find_function_call(struct hlsl_ctx *ctx, -- const char *name, const struct parse_initializer *args, -+ const char *name, const struct parse_initializer *args, bool is_compile, - const struct vkd3d_shader_location *loc) - { - struct hlsl_ir_function_decl *decl, *compatible_match = NULL; -@@ -2869,7 +3007,7 @@ static struct hlsl_ir_function_decl *find_function_call(struct hlsl_ctx *ctx, - - LIST_FOR_EACH_ENTRY(decl, &func->overloads, struct hlsl_ir_function_decl, entry) - { -- if (func_is_compatible_match(ctx, decl, args)) -+ if (func_is_compatible_match(ctx, decl, is_compile, args)) - { - if (compatible_match) - { -@@ -2890,26 +3028,35 @@ static struct hlsl_ir_node *hlsl_new_void_expr(struct hlsl_ctx *ctx, const struc - return hlsl_new_expr(ctx, HLSL_OP0_VOID, operands, ctx->builtin_types.Void, loc); - } - --static bool add_user_call(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func, -- const struct parse_initializer *args, const struct vkd3d_shader_location *loc) -+static struct hlsl_ir_node *add_user_call(struct hlsl_ctx *ctx, -+ struct hlsl_ir_function_decl *func, const struct parse_initializer *args, -+ bool is_compile, const struct vkd3d_shader_location *loc) - { - struct hlsl_ir_node *call; -- unsigned int i, j; -+ unsigned int i, j, k; - - VKD3D_ASSERT(args->args_count <= func->parameters.count); - -- for (i = 0; i < args->args_count; ++i) -+ k = 0; -+ for (i = 0; i < func->parameters.count; ++i) - { - struct hlsl_ir_var *param = func->parameters.vars[i]; -- struct hlsl_ir_node *arg = args->args[i]; -+ struct hlsl_ir_node *arg; -+ -+ if (is_compile && !(param->storage_modifiers & HLSL_STORAGE_UNIFORM)) -+ continue; -+ -+ if (k >= args->args_count) -+ break; -+ arg = args->args[k]; - - if (!hlsl_types_are_equal(arg->data_type, param->data_type)) - { - struct hlsl_ir_node *cast; - - if (!(cast = add_cast(ctx, args->instrs, arg, param->data_type, &arg->loc))) -- return false; -- args->args[i] = cast; -+ return NULL; -+ args->args[k] = cast; - arg = cast; - } - -@@ -2918,13 +3065,15 @@ static bool add_user_call(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *fu - struct hlsl_ir_node *store; - - if (!(store = hlsl_new_simple_store(ctx, param, arg))) -- return false; -+ return NULL; - hlsl_block_add_instr(args->instrs, store); - } -+ -+ ++k; - } - - /* Add default values for the remaining parameters. */ -- for (i = args->args_count; i < func->parameters.count; ++i) -+ for (; i < func->parameters.count; ++i) - { - struct hlsl_ir_var *param = func->parameters.vars[i]; - unsigned int comp_count = hlsl_type_component_count(param->data_type); -@@ -2932,6 +3081,9 @@ static bool add_user_call(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *fu - - VKD3D_ASSERT(param->default_values); - -+ if (is_compile && !(param->storage_modifiers & HLSL_STORAGE_UNIFORM)) -+ continue; -+ - hlsl_init_simple_deref_from_var(¶m_deref, param); - - for (j = 0; j < comp_count; ++j) -@@ -2945,20 +3097,23 @@ static bool add_user_call(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *fu - { - value.u[0] = param->default_values[j].number; - if (!(comp = hlsl_new_constant(ctx, type, &value, loc))) -- return false; -+ return NULL; - hlsl_block_add_instr(args->instrs, comp); - - if (!hlsl_new_store_component(ctx, &store_block, ¶m_deref, j, comp)) -- return false; -+ return NULL; - hlsl_block_add_block(args->instrs, &store_block); - } - } - } - - if (!(call = hlsl_new_call(ctx, func, loc))) -- return false; -+ return NULL; - hlsl_block_add_instr(args->instrs, call); - -+ if (is_compile) -+ return call; -+ - for (i = 0; i < args->args_count; ++i) - { - struct hlsl_ir_var *param = func->parameters.vars[i]; -@@ -2973,11 +3128,11 @@ static bool add_user_call(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *fu - "Output argument to \"%s\" is const.", func->func->name); - - if (!(load = hlsl_new_var_load(ctx, param, &arg->loc))) -- return false; -+ return NULL; - hlsl_block_add_instr(args->instrs, &load->node); - - if (!add_assignment(ctx, args->instrs, arg, ASSIGN_OP_ASSIGN, &load->node)) -- return false; -+ return NULL; - } - } - -@@ -2998,7 +3153,7 @@ static bool add_user_call(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *fu - hlsl_block_add_instr(args->instrs, expr); - } - -- return true; -+ return call; - } - - static struct hlsl_ir_node *intrinsic_float_convert_arg(struct hlsl_ctx *ctx, -@@ -3006,7 +3161,7 @@ static struct hlsl_ir_node *intrinsic_float_convert_arg(struct hlsl_ctx *ctx, - { - struct hlsl_type *type = arg->data_type; - -- if (type->e.numeric.type == HLSL_TYPE_FLOAT || type->e.numeric.type == HLSL_TYPE_HALF) -+ if (!type_is_integer(type->e.numeric.type)) - return arg; - - type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_FLOAT, type->dimx, type->dimy); -@@ -3094,14 +3249,12 @@ static bool elementwise_intrinsic_convert_args(struct hlsl_ctx *ctx, - static bool elementwise_intrinsic_float_convert_args(struct hlsl_ctx *ctx, - const struct parse_initializer *params, const struct vkd3d_shader_location *loc) - { -- enum hlsl_base_type base_type; - struct hlsl_type *type; - - if (!(type = elementwise_intrinsic_get_common_type(ctx, params, loc))) - return false; -- -- base_type = type->e.numeric.type == HLSL_TYPE_HALF ? HLSL_TYPE_HALF : HLSL_TYPE_FLOAT; -- type = hlsl_get_numeric_type(ctx, type->class, base_type, type->dimx, type->dimy); -+ if (type_is_integer(type->e.numeric.type)) -+ type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_FLOAT, type->dimx, type->dimy); - - return convert_args(ctx, params, type, loc); - } -@@ -3129,6 +3282,7 @@ static bool write_acos_or_asin(struct hlsl_ctx *ctx, - const struct parse_initializer *params, const struct vkd3d_shader_location *loc, bool asin_mode) - { - struct hlsl_ir_function_decl *func; -+ struct hlsl_ir_node *arg; - struct hlsl_type *type; - char *body; - -@@ -3152,8 +3306,9 @@ static bool write_acos_or_asin(struct hlsl_ctx *ctx, - - const char *fn_name = asin_mode ? fn_name_asin : fn_name_acos; - -- type = params->args[0]->data_type; -- type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_FLOAT, type->dimx, type->dimy); -+ if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) -+ return false; -+ type = arg->data_type; - - if (!(body = hlsl_sprintf_alloc(ctx, template, - type->name, fn_name, type->name, -@@ -3165,7 +3320,7 @@ static bool write_acos_or_asin(struct hlsl_ctx *ctx, - if (!func) - return false; - -- return add_user_call(ctx, func, params, loc); -+ return !!add_user_call(ctx, func, params, false, loc); - } - - static bool intrinsic_acos(struct hlsl_ctx *ctx, -@@ -3282,9 +3437,9 @@ static bool write_atan_or_atan2(struct hlsl_ctx *ctx, - " : poly_approx;\n" - "}"; - -- if (!(type = elementwise_intrinsic_get_common_type(ctx, params, loc))) -+ if (!elementwise_intrinsic_float_convert_args(ctx, params, loc)) - return false; -- type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_FLOAT, type->dimx, type->dimy); -+ type = params->args[0]->data_type; - - if (!(buf = hlsl_get_string_buffer(ctx))) - return false; -@@ -3314,7 +3469,7 @@ static bool write_atan_or_atan2(struct hlsl_ctx *ctx, - if (!func) - return false; - -- return add_user_call(ctx, func, params, loc); -+ return !!add_user_call(ctx, func, params, false, loc); - } - - static bool intrinsic_atan(struct hlsl_ctx *ctx, -@@ -3507,7 +3662,7 @@ static bool write_cosh_or_sinh(struct hlsl_ctx *ctx, - if (!func) - return false; - -- return add_user_call(ctx, func, params, loc); -+ return !!add_user_call(ctx, func, params, false, loc); - } - - static bool intrinsic_cosh(struct hlsl_ctx *ctx, -@@ -3525,9 +3680,8 @@ static bool intrinsic_cross(struct hlsl_ctx *ctx, - struct hlsl_type *cast_type; - enum hlsl_base_type base; - -- if (arg1->data_type->e.numeric.type == HLSL_TYPE_HALF && arg2->data_type->e.numeric.type == HLSL_TYPE_HALF) -- base = HLSL_TYPE_HALF; -- else -+ base = expr_common_base_type(arg1->data_type->e.numeric.type, arg2->data_type->e.numeric.type); -+ if (type_is_integer(base)) - base = HLSL_TYPE_FLOAT; - - cast_type = hlsl_get_vector_type(ctx, base, 3); -@@ -3698,15 +3852,14 @@ static bool intrinsic_determinant(struct hlsl_ctx *ctx, - return false; - } - -+ if (!(arg = intrinsic_float_convert_arg(ctx, params, arg, loc))) -+ return false; -+ - dim = min(type->dimx, type->dimy); - if (dim == 1) -- { -- if (!(arg = intrinsic_float_convert_arg(ctx, params, arg, loc))) -- return false; - return hlsl_add_load_component(ctx, params->instrs, arg, 0, loc); -- } - -- typename = type->e.numeric.type == HLSL_TYPE_HALF ? "half" : "float"; -+ typename = hlsl_get_scalar_type(ctx, arg->data_type->e.numeric.type)->name; - template = templates[dim]; - - switch (dim) -@@ -3734,7 +3887,7 @@ static bool intrinsic_determinant(struct hlsl_ctx *ctx, - if (!func) - return false; - -- return add_user_call(ctx, func, params, loc); -+ return !!add_user_call(ctx, func, params, false, loc); - } - - static bool intrinsic_distance(struct hlsl_ctx *ctx, -@@ -3766,6 +3919,50 @@ static bool intrinsic_dot(struct hlsl_ctx *ctx, - return !!add_binary_dot_expr(ctx, params->instrs, params->args[0], params->args[1], loc); - } - -+static bool intrinsic_dst(struct hlsl_ctx *ctx, const struct parse_initializer *params, -+ const struct vkd3d_shader_location *loc) -+{ -+ struct hlsl_ir_function_decl *func; -+ struct hlsl_type *type, *vec4_type; -+ char *body; -+ -+ static const char template[] = -+ "%s dst(%s i0, %s i1)\n" -+ "{\n" -+ /* Scalars and vector-4s are both valid inputs, so promote scalars -+ * if necessary. */ -+ " %s src0 = i0, src1 = i1;\n" -+ " return %s(1, src0.y * src1.y, src0.z, src1.w);\n" -+ "}"; -+ -+ if (!elementwise_intrinsic_convert_args(ctx, params, loc)) -+ return false; -+ type = params->args[0]->data_type; -+ if (!(type->class == HLSL_CLASS_SCALAR -+ || (type->class == HLSL_CLASS_VECTOR && type->dimx == 4))) -+ { -+ struct vkd3d_string_buffer *string; -+ if ((string = hlsl_type_to_string(ctx, type))) -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -+ "Wrong dimension for dst(): expected scalar or 4-dimensional vector, but got %s.", -+ string->buffer); -+ hlsl_release_string_buffer(ctx, string); -+ } -+ vec4_type = hlsl_get_vector_type(ctx, type->e.numeric.type, 4); -+ -+ if (!(body = hlsl_sprintf_alloc(ctx, template, -+ vec4_type->name, type->name, type->name, -+ vec4_type->name, -+ vec4_type->name))) -+ return false; -+ func = hlsl_compile_internal_function(ctx, "dst", body); -+ vkd3d_free(body); -+ if (!func) -+ return false; -+ -+ return !!add_user_call(ctx, func, params, false, loc); -+} -+ - static bool intrinsic_exp(struct hlsl_ctx *ctx, - const struct parse_initializer *params, const struct vkd3d_shader_location *loc) - { -@@ -3809,9 +4006,9 @@ static bool intrinsic_faceforward(struct hlsl_ctx *ctx, - " return dot(i, ng) < 0 ? n : -n;\n" - "}\n"; - -- if (!(type = elementwise_intrinsic_get_common_type(ctx, params, loc))) -+ if (!elementwise_intrinsic_float_convert_args(ctx, params, loc)) - return false; -- type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_FLOAT, type->dimx, type->dimy); -+ type = params->args[0]->data_type; - - if (!(body = hlsl_sprintf_alloc(ctx, template, - type->name, type->name, type->name, type->name))) -@@ -3821,7 +4018,7 @@ static bool intrinsic_faceforward(struct hlsl_ctx *ctx, - if (!func) - return false; - -- return add_user_call(ctx, func, params, loc); -+ return !!add_user_call(ctx, func, params, false, loc); - } - - static bool intrinsic_f16tof32(struct hlsl_ctx *ctx, -@@ -3839,6 +4036,21 @@ static bool intrinsic_f16tof32(struct hlsl_ctx *ctx, - return add_expr(ctx, params->instrs, HLSL_OP1_F16TOF32, operands, type, loc); - } - -+static bool intrinsic_f32tof16(struct hlsl_ctx *ctx, -+ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) -+{ -+ struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {0}; -+ struct hlsl_type *type; -+ -+ if (!elementwise_intrinsic_float_convert_args(ctx, params, loc)) -+ return false; -+ -+ type = convert_numeric_type(ctx, params->args[0]->data_type, HLSL_TYPE_UINT); -+ -+ operands[0] = params->args[0]; -+ return add_expr(ctx, params->instrs, HLSL_OP1_F32TOF16, operands, type, loc); -+} -+ - static bool intrinsic_floor(struct hlsl_ctx *ctx, - const struct parse_initializer *params, const struct vkd3d_shader_location *loc) - { -@@ -3926,7 +4138,7 @@ static bool intrinsic_fwidth(struct hlsl_ctx *ctx, - if (!func) - return false; - -- return add_user_call(ctx, func, params, loc); -+ return !!add_user_call(ctx, func, params, false, loc); - } - - static bool intrinsic_ldexp(struct hlsl_ctx *ctx, -@@ -4029,7 +4241,7 @@ static bool intrinsic_lit(struct hlsl_ctx *ctx, - if (!(func = hlsl_compile_internal_function(ctx, "lit", body))) - return false; - -- return add_user_call(ctx, func, params, loc); -+ return !!add_user_call(ctx, func, params, false, loc); - } - - static bool intrinsic_log(struct hlsl_ctx *ctx, -@@ -4081,6 +4293,20 @@ static bool intrinsic_log2(struct hlsl_ctx *ctx, - return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_LOG2, arg, loc); - } - -+static bool intrinsic_mad(struct hlsl_ctx *ctx, -+ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) -+{ -+ struct hlsl_ir_node *args[HLSL_MAX_OPERANDS] = {0}; -+ -+ if (!elementwise_intrinsic_convert_args(ctx, params, loc)) -+ return false; -+ -+ args[0] = params->args[0]; -+ args[1] = params->args[1]; -+ args[2] = params->args[2]; -+ return add_expr(ctx, params->instrs, HLSL_OP3_MAD, args, args[0]->data_type, loc); -+} -+ - static bool intrinsic_max(struct hlsl_ctx *ctx, - const struct parse_initializer *params, const struct vkd3d_shader_location *loc) - { -@@ -4099,6 +4325,35 @@ static bool intrinsic_min(struct hlsl_ctx *ctx, - return !!add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MIN, params->args[0], params->args[1], loc); - } - -+static bool intrinsic_modf(struct hlsl_ctx *ctx, -+ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) -+{ -+ struct hlsl_ir_function_decl *func; -+ struct hlsl_type *type; -+ char *body; -+ -+ static const char template[] = -+ "%s modf(%s x, out %s ip)\n" -+ "{\n" -+ " ip = trunc(x);\n" -+ " return x - ip;\n" -+ "}"; -+ -+ if (!elementwise_intrinsic_float_convert_args(ctx, params, loc)) -+ return false; -+ type = params->args[0]->data_type; -+ -+ if (!(body = hlsl_sprintf_alloc(ctx, template, -+ type->name, type->name, type->name))) -+ return false; -+ func = hlsl_compile_internal_function(ctx, "modf", body); -+ vkd3d_free(body); -+ if (!func) -+ return false; -+ -+ return !!add_user_call(ctx, func, params, false, loc); -+} -+ - static bool intrinsic_mul(struct hlsl_ctx *ctx, - const struct parse_initializer *params, const struct vkd3d_shader_location *loc) - { -@@ -4285,13 +4540,9 @@ static bool intrinsic_reflect(struct hlsl_ctx *ctx, - static bool intrinsic_refract(struct hlsl_ctx *ctx, - const struct parse_initializer *params, const struct vkd3d_shader_location *loc) - { -- struct hlsl_type *r_type = params->args[0]->data_type; -- struct hlsl_type *n_type = params->args[1]->data_type; -- struct hlsl_type *i_type = params->args[2]->data_type; -- struct hlsl_type *res_type, *idx_type, *scal_type; -- struct parse_initializer mut_params; -+ struct hlsl_type *type, *scalar_type; - struct hlsl_ir_function_decl *func; -- enum hlsl_base_type base; -+ struct hlsl_ir_node *index; - char *body; - - static const char template[] = -@@ -4303,28 +4554,34 @@ static bool intrinsic_refract(struct hlsl_ctx *ctx, - " return t >= 0.0 ? i.x * r - (i.x * d + sqrt(t)) * n : 0;\n" - "}"; - -- if (r_type->class == HLSL_CLASS_MATRIX -- || n_type->class == HLSL_CLASS_MATRIX -- || i_type->class == HLSL_CLASS_MATRIX) -+ if (params->args[0]->data_type->class == HLSL_CLASS_MATRIX -+ || params->args[1]->data_type->class == HLSL_CLASS_MATRIX -+ || params->args[2]->data_type->class == HLSL_CLASS_MATRIX) - { - hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Matrix arguments are not supported."); - return false; - } - -- VKD3D_ASSERT(params->args_count == 3); -- mut_params = *params; -- mut_params.args_count = 2; -- if (!(res_type = elementwise_intrinsic_get_common_type(ctx, &mut_params, loc))) -+ /* This is technically not an elementwise intrinsic, but the first two -+ * arguments are. -+ * The third argument is a scalar, but can be passed as a vector, -+ * which should generate an implicit truncation warning. -+ * Cast down to scalar explicitly, then we can just use -+ * elementwise_intrinsic_float_convert_args(). -+ * This may result in casting the scalar back to a vector, -+ * which we will only use the first component of. */ -+ -+ scalar_type = hlsl_get_scalar_type(ctx, params->args[2]->data_type->e.numeric.type); -+ if (!(index = add_implicit_conversion(ctx, params->instrs, params->args[2], scalar_type, loc))) - return false; -+ params->args[2] = index; - -- base = expr_common_base_type(res_type->e.numeric.type, i_type->e.numeric.type); -- base = base == HLSL_TYPE_HALF ? HLSL_TYPE_HALF : HLSL_TYPE_FLOAT; -- res_type = convert_numeric_type(ctx, res_type, base); -- idx_type = convert_numeric_type(ctx, i_type, base); -- scal_type = hlsl_get_scalar_type(ctx, base); -+ if (!elementwise_intrinsic_float_convert_args(ctx, params, loc)) -+ return false; -+ type = params->args[0]->data_type; - -- if (!(body = hlsl_sprintf_alloc(ctx, template, res_type->name, res_type->name, -- res_type->name, idx_type->name, scal_type->name))) -+ if (!(body = hlsl_sprintf_alloc(ctx, template, type->name, type->name, -+ type->name, type->name, scalar_type->name))) - return false; - - func = hlsl_compile_internal_function(ctx, "refract", body); -@@ -4332,7 +4589,7 @@ static bool intrinsic_refract(struct hlsl_ctx *ctx, - if (!func) - return false; - -- return add_user_call(ctx, func, params, loc); -+ return !!add_user_call(ctx, func, params, false, loc); - } - - static bool intrinsic_round(struct hlsl_ctx *ctx, -@@ -4415,6 +4672,35 @@ static bool intrinsic_sin(struct hlsl_ctx *ctx, - return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_SIN, arg, loc); - } - -+static bool intrinsic_sincos(struct hlsl_ctx *ctx, -+ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) -+{ -+ struct hlsl_ir_function_decl *func; -+ struct hlsl_type *type; -+ char *body; -+ -+ static const char template[] = -+ "void sincos(%s f, out %s s, out %s c)\n" -+ "{\n" -+ " s = sin(f);\n" -+ " c = cos(f);\n" -+ "}"; -+ -+ if (!elementwise_intrinsic_float_convert_args(ctx, params, loc)) -+ return false; -+ type = params->args[0]->data_type; -+ -+ if (!(body = hlsl_sprintf_alloc(ctx, template, -+ type->name, type->name, type->name))) -+ return false; -+ func = hlsl_compile_internal_function(ctx, "sincos", body); -+ vkd3d_free(body); -+ if (!func) -+ return false; -+ -+ return !!add_user_call(ctx, func, params, false, loc); -+} -+ - static bool intrinsic_sinh(struct hlsl_ctx *ctx, - const struct parse_initializer *params, const struct vkd3d_shader_location *loc) - { -@@ -4436,9 +4722,9 @@ static bool intrinsic_smoothstep(struct hlsl_ctx *ctx, - " return (p * p) * (3 - 2 * p);\n" - "}"; - -- if (!(type = elementwise_intrinsic_get_common_type(ctx, params, loc))) -+ if (!elementwise_intrinsic_float_convert_args(ctx, params, loc)) - return false; -- type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_FLOAT, type->dimx, type->dimy); -+ type = params->args[0]->data_type; - - if (!(body = hlsl_sprintf_alloc(ctx, template, type->name, type->name, type->name, type->name, type->name))) - return false; -@@ -4447,7 +4733,7 @@ static bool intrinsic_smoothstep(struct hlsl_ctx *ctx, - if (!func) - return false; - -- return add_user_call(ctx, func, params, loc); -+ return !!add_user_call(ctx, func, params, false, loc); - } - - static bool intrinsic_sqrt(struct hlsl_ctx *ctx, -@@ -4469,13 +4755,12 @@ static bool intrinsic_step(struct hlsl_ctx *ctx, - - if (!elementwise_intrinsic_float_convert_args(ctx, params, loc)) - return false; -+ type = params->args[0]->data_type; - - if (!(ge = add_binary_comparison_expr(ctx, params->instrs, HLSL_OP2_GEQUAL, - params->args[1], params->args[0], loc))) - return false; - -- type = ge->data_type; -- type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_FLOAT, type->dimx, type->dimy); - return !!add_implicit_conversion(ctx, params->instrs, ge, type, loc); - } - -@@ -4523,7 +4808,7 @@ static bool intrinsic_tanh(struct hlsl_ctx *ctx, - if (!func) - return false; - -- return add_user_call(ctx, func, params, loc); -+ return !!add_user_call(ctx, func, params, false, loc); - } - - static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer *params, -@@ -4661,17 +4946,17 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * - if (!(var = hlsl_new_synthetic_var(ctx, "coords", hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, 2), loc))) - return false; - -- initialize_var_components(ctx, params->instrs, var, &idx, coords); -+ initialize_var_components(ctx, params->instrs, var, &idx, coords, false); - if (hlsl_version_ge(ctx, 4, 0)) - { - if (!(half = hlsl_new_float_constant(ctx, 0.5f, loc))) - return false; - hlsl_block_add_instr(params->instrs, half); - -- initialize_var_components(ctx, params->instrs, var, &idx, half); -+ initialize_var_components(ctx, params->instrs, var, &idx, half, false); - } - else -- initialize_var_components(ctx, params->instrs, var, &idx, coords); -+ initialize_var_components(ctx, params->instrs, var, &idx, coords, false); - - if (!(load = hlsl_new_var_load(ctx, var, loc))) - return false; -@@ -4890,6 +5175,10 @@ static bool intrinsic_GetRenderTargetSampleCount(struct hlsl_ctx *ctx, - struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {0}; - struct hlsl_ir_node *expr; - -+ if (ctx->profile->type != VKD3D_SHADER_TYPE_PIXEL || hlsl_version_lt(ctx, 4, 1)) -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INCOMPATIBLE_PROFILE, -+ "GetRenderTargetSampleCount() can only be used from a pixel shader using version 4.1 or higher."); -+ - if (!(expr = hlsl_new_expr(ctx, HLSL_OP0_RASTERIZER_SAMPLE_COUNT, - operands, hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), loc))) - return false; -@@ -4937,9 +5226,11 @@ intrinsic_functions[] = - {"determinant", 1, true, intrinsic_determinant}, - {"distance", 2, true, intrinsic_distance}, - {"dot", 2, true, intrinsic_dot}, -+ {"dst", 2, true, intrinsic_dst}, - {"exp", 1, true, intrinsic_exp}, - {"exp2", 1, true, intrinsic_exp2}, - {"f16tof32", 1, true, intrinsic_f16tof32}, -+ {"f32tof16", 1, true, intrinsic_f32tof16}, - {"faceforward", 3, true, intrinsic_faceforward}, - {"floor", 1, true, intrinsic_floor}, - {"fmod", 2, true, intrinsic_fmod}, -@@ -4952,8 +5243,10 @@ intrinsic_functions[] = - {"log", 1, true, intrinsic_log}, - {"log10", 1, true, intrinsic_log10}, - {"log2", 1, true, intrinsic_log2}, -+ {"mad", 3, true, intrinsic_mad}, - {"max", 2, true, intrinsic_max}, - {"min", 2, true, intrinsic_min}, -+ {"modf", 2, true, intrinsic_modf}, - {"mul", 2, true, intrinsic_mul}, - {"normalize", 1, true, intrinsic_normalize}, - {"pow", 2, true, intrinsic_pow}, -@@ -4966,6 +5259,7 @@ intrinsic_functions[] = - {"saturate", 1, true, intrinsic_saturate}, - {"sign", 1, true, intrinsic_sign}, - {"sin", 1, true, intrinsic_sin}, -+ {"sincos", 3, true, intrinsic_sincos}, - {"sinh", 1, true, intrinsic_sinh}, - {"smoothstep", 3, true, intrinsic_smoothstep}, - {"sqrt", 1, true, intrinsic_sqrt}, -@@ -5002,9 +5296,18 @@ static struct hlsl_block *add_call(struct hlsl_ctx *ctx, const char *name, - struct intrinsic_function *intrinsic; - struct hlsl_ir_function_decl *decl; - -- if ((decl = find_function_call(ctx, name, args, loc))) -+ for (unsigned int i = 0; i < args->args_count; ++i) -+ { -+ if (args->args[i]->data_type->class == HLSL_CLASS_ERROR) -+ { -+ args->instrs->value = ctx->error_instr; -+ return args->instrs; -+ } -+ } -+ -+ if ((decl = find_function_call(ctx, name, args, false, loc))) - { -- if (!add_user_call(ctx, decl, args, loc)) -+ if (!add_user_call(ctx, decl, args, false, loc)) - goto fail; - } - else if ((intrinsic = bsearch(name, intrinsic_functions, ARRAY_SIZE(intrinsic_functions), -@@ -5060,18 +5363,94 @@ fail: - return NULL; - } - -+static struct hlsl_block *add_shader_compilation(struct hlsl_ctx *ctx, const char *profile_name, -+ const char *function_name, struct parse_initializer *args, const struct vkd3d_shader_location *loc) -+{ -+ struct hlsl_ir_node *compile, *call_to_compile = NULL; -+ struct hlsl_ir_function_decl *decl; -+ -+ if (!ctx->in_state_block && ctx->cur_scope != ctx->globals) -+ { -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_MISPLACED_COMPILE, -+ "Shader compilation statements must be in global scope or a state block."); -+ free_parse_initializer(args); -+ return NULL; -+ } -+ -+ if (!(decl = find_function_call(ctx, function_name, args, true, loc))) -+ { -+ if (rb_get(&ctx->functions, function_name)) -+ { -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_NOT_DEFINED, -+ "No compatible \"%s\" declaration with %u uniform parameters found.", -+ function_name, args->args_count); -+ } -+ else -+ { -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_NOT_DEFINED, -+ "Function \"%s\" is not defined.", function_name); -+ } -+ free_parse_initializer(args); -+ return NULL; -+ } -+ -+ if (!(call_to_compile = add_user_call(ctx, decl, args, true, loc))) -+ { -+ free_parse_initializer(args); -+ return NULL; -+ } -+ -+ if (!(compile = hlsl_new_compile(ctx, HLSL_COMPILE_TYPE_COMPILE, -+ profile_name, &call_to_compile, 1, args->instrs, loc))) -+ { -+ free_parse_initializer(args); -+ return NULL; -+ } -+ -+ free_parse_initializer(args); -+ return make_block(ctx, compile); -+} -+ -+static struct hlsl_block *add_compile_variant(struct hlsl_ctx *ctx, enum hlsl_compile_type compile_type, -+ struct parse_initializer *args, const struct vkd3d_shader_location *loc) -+{ -+ struct hlsl_ir_node *compile; -+ -+ switch (compile_type) -+ { -+ case HLSL_COMPILE_TYPE_COMPILE: -+ vkd3d_unreachable(); -+ -+ case HLSL_COMPILE_TYPE_CONSTRUCTGSWITHSO: -+ if (args->args_count != 2 && args->args_count != 6) -+ { -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, -+ "Wrong number of arguments to ConstructGSWithSO: expected 2 or 6, but got %u.", -+ args->args_count); -+ } -+ break; -+ } -+ -+ if (!(compile = hlsl_new_compile(ctx, compile_type, NULL, args->args, args->args_count, args->instrs, loc))) -+ { -+ free_parse_initializer(args); -+ return NULL; -+ } -+ -+ free_parse_initializer(args); -+ return make_block(ctx, compile); -+} -+ - static struct hlsl_block *add_constructor(struct hlsl_ctx *ctx, struct hlsl_type *type, - struct parse_initializer *params, const struct vkd3d_shader_location *loc) - { - struct hlsl_ir_load *load; - struct hlsl_ir_var *var; -- unsigned int i, idx = 0; - - if (!(var = hlsl_new_synthetic_var(ctx, "constructor", type, loc))) - return NULL; - -- for (i = 0; i < params->args_count; ++i) -- initialize_var_components(ctx, params->instrs, var, &idx, params->args[i]); -+ initialize_var(ctx, var, params, false); - - if (!(load = hlsl_new_var_load(ctx, var, loc))) - return NULL; -@@ -5088,6 +5467,14 @@ static bool add_ternary(struct hlsl_ctx *ctx, struct hlsl_block *block, - struct hlsl_type *cond_type = cond->data_type; - struct hlsl_type *common_type; - -+ if (cond->data_type->class == HLSL_CLASS_ERROR -+ || first->data_type->class == HLSL_CLASS_ERROR -+ || second->data_type->class == HLSL_CLASS_ERROR) -+ { -+ block->value = ctx->error_instr; -+ return true; -+ } -+ - if (cond_type->class > HLSL_CLASS_LAST_NUMERIC) - { - struct vkd3d_string_buffer *string; -@@ -5113,11 +5500,6 @@ static bool add_ternary(struct hlsl_ctx *ctx, struct hlsl_block *block, - } - else - { -- cond_type = hlsl_get_numeric_type(ctx, cond_type->class, HLSL_TYPE_BOOL, -- cond_type->dimx, cond_type->dimy); -- if (!(cond = add_implicit_conversion(ctx, block, cond, cond_type, &cond->loc))) -- return false; -- - if (common_type->dimx == 1 && common_type->dimy == 1) - { - common_type = hlsl_get_numeric_type(ctx, cond_type->class, -@@ -5139,6 +5521,11 @@ static bool add_ternary(struct hlsl_ctx *ctx, struct hlsl_block *block, - hlsl_release_string_buffer(ctx, cond_string); - hlsl_release_string_buffer(ctx, value_string); - } -+ -+ cond_type = hlsl_get_numeric_type(ctx, common_type->class, HLSL_TYPE_BOOL, -+ common_type->dimx, common_type->dimy); -+ if (!(cond = add_implicit_conversion(ctx, block, cond, cond_type, &cond->loc))) -+ return false; - } - - if (!(first = add_implicit_conversion(ctx, block, first, common_type, &first->loc))) -@@ -5196,6 +5583,7 @@ static unsigned int hlsl_offset_dim_count(enum hlsl_sampler_dim dim) - case HLSL_SAMPLER_DIM_CUBE: - case HLSL_SAMPLER_DIM_CUBEARRAY: - case HLSL_SAMPLER_DIM_BUFFER: -+ case HLSL_SAMPLER_DIM_RAW_BUFFER: - /* Offset parameters not supported for these types. */ - return 0; - default: -@@ -5215,6 +5603,55 @@ static bool raise_invalid_method_object_type(struct hlsl_ctx *ctx, const struct - return false; - } - -+static bool add_raw_load_method_call(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *object, -+ const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) -+{ -+ struct hlsl_resource_load_params load_params = {.type = HLSL_RESOURCE_LOAD}; -+ struct hlsl_ir_node *load; -+ unsigned int value_dim; -+ -+ if (params->args_count != 1 && params->args_count != 2) -+ { -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, -+ "Wrong number of arguments to method 'Load': expected between 1 and 2, but got %u.", -+ params->args_count); -+ return false; -+ } -+ -+ if (params->args_count == 2) -+ { -+ hlsl_fixme(ctx, loc, "Tiled resource status argument."); -+ return false; -+ } -+ -+ if (params->args[0]->data_type->class != HLSL_CLASS_SCALAR) -+ { -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Scalar address argument expected for '%s'.", name); -+ return false; -+ } -+ -+ if (!strcmp(name, "Load")) -+ value_dim = 1; -+ else if (!strcmp(name, "Load2")) -+ value_dim = 2; -+ else if (!strcmp(name, "Load3")) -+ value_dim = 3; -+ else -+ value_dim = 4; -+ -+ if (!(load_params.coords = add_implicit_conversion(ctx, block, params->args[0], -+ hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), loc))) -+ return false; -+ -+ load_params.format = hlsl_get_vector_type(ctx, HLSL_TYPE_UINT, value_dim); -+ load_params.resource = object; -+ -+ if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) -+ return false; -+ hlsl_block_add_instr(block, load); -+ return true; -+} -+ - static bool add_load_method_call(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *object, - const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) - { -@@ -5224,6 +5661,9 @@ static bool add_load_method_call(struct hlsl_ctx *ctx, struct hlsl_block *block, - struct hlsl_ir_node *load; - bool multisampled; - -+ if (object_type->sampler_dim == HLSL_SAMPLER_DIM_RAW_BUFFER) -+ return add_raw_load_method_call(ctx, block, object, name, params, loc); -+ - if (object_type->sampler_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER) - { - hlsl_fixme(ctx, loc, "Method '%s' for structured buffers.", name); -@@ -5813,32 +6253,88 @@ static bool add_sample_grad_method_call(struct hlsl_ctx *ctx, struct hlsl_block - return true; - } - -+static bool add_store_method_call(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *object, -+ const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) -+{ -+ struct hlsl_ir_node *offset, *rhs, *store; -+ struct hlsl_deref resource_deref; -+ unsigned int value_dim; -+ -+ if (params->args_count != 2) -+ { -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, -+ "Wrong number of arguments to method '%s': expected 2.", name); -+ return false; -+ } -+ -+ if (!strcmp(name, "Store")) -+ value_dim = 1; -+ else if (!strcmp(name, "Store2")) -+ value_dim = 2; -+ else if (!strcmp(name, "Store3")) -+ value_dim = 3; -+ else -+ value_dim = 4; -+ -+ if (!(offset = add_implicit_conversion(ctx, block, params->args[0], -+ hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), loc))) -+ return false; -+ -+ if (!(rhs = add_implicit_conversion(ctx, block, params->args[1], -+ hlsl_get_vector_type(ctx, HLSL_TYPE_UINT, value_dim), loc))) -+ return false; -+ -+ if (!hlsl_init_deref_from_index_chain(ctx, &resource_deref, object)) -+ return false; -+ -+ if (!(store = hlsl_new_resource_store(ctx, &resource_deref, offset, rhs, loc))) -+ { -+ hlsl_cleanup_deref(&resource_deref); -+ return false; -+ } -+ -+ hlsl_block_add_instr(block, store); -+ hlsl_cleanup_deref(&resource_deref); -+ -+ return true; -+} -+ - static const struct method_function - { - const char *name; - bool (*handler)(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *object, - const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc); -- bool valid_dims[HLSL_SAMPLER_DIM_MAX + 1]; -+ char valid_dims[HLSL_SAMPLER_DIM_MAX + 1]; - } --object_methods[] = -+texture_methods[] = - { -- /* g c 1d 2d 3d cube 1darr 2darr 2dms 2dmsarr cubearr buff sbuff*/ -- { "Gather", add_gather_method_call, {0,0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0}}, -- { "GatherAlpha", add_gather_method_call, {0,0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0}}, -- { "GatherBlue", add_gather_method_call, {0,0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0}}, -- { "GatherGreen", add_gather_method_call, {0,0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0}}, -- { "GatherRed", add_gather_method_call, {0,0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0}}, -+ { "Gather", add_gather_method_call, "00010101001000" }, -+ { "GatherAlpha", add_gather_method_call, "00010101001000" }, -+ { "GatherBlue", add_gather_method_call, "00010101001000" }, -+ { "GatherGreen", add_gather_method_call, "00010101001000" }, -+ { "GatherRed", add_gather_method_call, "00010101001000" }, -+ -+ { "GetDimensions", add_getdimensions_method_call, "00111111111110" }, - -- { "GetDimensions", add_getdimensions_method_call, {0,0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}}, -+ { "Load", add_load_method_call, "00111011110111" }, -+ { "Load2", add_raw_load_method_call, "00000000000001" }, -+ { "Load3", add_raw_load_method_call, "00000000000001" }, -+ { "Load4", add_raw_load_method_call, "00000000000001" }, - -- { "Load", add_load_method_call, {0,0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1}}, -+ { "Sample", add_sample_method_call, "00111111001000" }, -+ { "SampleBias", add_sample_lod_method_call, "00111111001000" }, -+ { "SampleCmp", add_sample_cmp_method_call, "00111111001000" }, -+ { "SampleCmpLevelZero", add_sample_cmp_method_call, "00111111001000" }, -+ { "SampleGrad", add_sample_grad_method_call, "00111111001000" }, -+ { "SampleLevel", add_sample_lod_method_call, "00111111001000" }, -+}; - -- { "Sample", add_sample_method_call, {0,0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0}}, -- { "SampleBias", add_sample_lod_method_call, {0,0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0}}, -- { "SampleCmp", add_sample_cmp_method_call, {0,0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0}}, -- { "SampleCmpLevelZero", add_sample_cmp_method_call, {0,0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0}}, -- { "SampleGrad", add_sample_grad_method_call, {0,0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0}}, -- { "SampleLevel", add_sample_lod_method_call, {0,0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0}}, -+static const struct method_function uav_methods[] = -+{ -+ { "Store", add_store_method_call, "00000000000001" }, -+ { "Store2", add_store_method_call, "00000000000001" }, -+ { "Store3", add_store_method_call, "00000000000001" }, -+ { "Store4", add_store_method_call, "00000000000001" }, - }; - - static int object_method_function_name_compare(const void *a, const void *b) -@@ -5852,9 +6348,35 @@ static bool add_method_call(struct hlsl_ctx *ctx, struct hlsl_block *block, stru - const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) - { - const struct hlsl_type *object_type = object->data_type; -- const struct method_function *method; -+ const struct method_function *method, *methods; -+ unsigned int count; -+ -+ if (object_type->class == HLSL_CLASS_ERROR) -+ { -+ block->value = ctx->error_instr; -+ return true; -+ } -+ -+ for (unsigned int i = 0; i < params->args_count; ++i) -+ { -+ if (params->args[i]->data_type->class == HLSL_CLASS_ERROR) -+ { -+ block->value = ctx->error_instr; -+ return true; -+ } -+ } - -- if (object_type->class != HLSL_CLASS_TEXTURE || object_type->sampler_dim == HLSL_SAMPLER_DIM_GENERIC) -+ if (object_type->class == HLSL_CLASS_TEXTURE) -+ { -+ count = ARRAY_SIZE(texture_methods); -+ methods = texture_methods; -+ } -+ else if (object_type->class == HLSL_CLASS_UAV) -+ { -+ count = ARRAY_SIZE(uav_methods); -+ methods = uav_methods; -+ } -+ else - { - struct vkd3d_string_buffer *string; - -@@ -5865,10 +6387,10 @@ static bool add_method_call(struct hlsl_ctx *ctx, struct hlsl_block *block, stru - return false; - } - -- method = bsearch(name, object_methods, ARRAY_SIZE(object_methods), sizeof(*method), -+ method = bsearch(name, methods, count, sizeof(*method), - object_method_function_name_compare); - -- if (method && method->valid_dims[object_type->sampler_dim]) -+ if (method && method->valid_dims[object_type->sampler_dim] == '1') - { - return method->handler(ctx, block, object, name, params, loc); - } -@@ -5995,16 +6517,6 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, - hlsl_release_string_buffer(ctx, string); - } - --static bool state_block_add_entry(struct hlsl_state_block *state_block, struct hlsl_state_block_entry *entry) --{ -- if (!vkd3d_array_reserve((void **)&state_block->entries, &state_block->capacity, state_block->count + 1, -- sizeof(*state_block->entries))) -- return false; -- -- state_block->entries[state_block->count++] = entry; -- return true; --} -- - } - - %locations -@@ -6037,10 +6549,11 @@ static bool state_block_add_entry(struct hlsl_state_block *state_block, struct h - struct parse_if_body if_body; - enum parse_assign_op assign_op; - struct hlsl_reg_reservation reg_reservation; -- struct parse_colon_attribute colon_attribute; -+ struct parse_colon_attributes colon_attributes; - struct hlsl_semantic semantic; - enum hlsl_buffer_type buffer_type; - enum hlsl_sampler_dim sampler_dim; -+ enum hlsl_so_object_type so_type; - struct hlsl_attribute *attr; - struct parse_attribute_list attr_list; - struct hlsl_ir_switch_case *switch_case; -@@ -6052,14 +6565,17 @@ static bool state_block_add_entry(struct hlsl_state_block *state_block, struct h - %token KW_BLENDSTATE - %token KW_BREAK - %token KW_BUFFER -+%token KW_BYTEADDRESSBUFFER - %token KW_CASE - %token KW_CONSTANTBUFFER - %token KW_CBUFFER - %token KW_CENTROID - %token KW_COLUMN_MAJOR - %token KW_COMPILE -+%token KW_COMPILESHADER - %token KW_COMPUTESHADER - %token KW_CONST -+%token KW_CONSTRUCTGSWITHSO - %token KW_CONTINUE - %token KW_DEFAULT - %token KW_DEPTHSTENCILSTATE -@@ -6067,7 +6583,6 @@ static bool state_block_add_entry(struct hlsl_state_block *state_block, struct h - %token KW_DISCARD - %token KW_DO - %token KW_DOMAINSHADER --%token KW_DOUBLE - %token KW_ELSE - %token KW_EXPORT - %token KW_EXTERN -@@ -6082,6 +6597,7 @@ static bool state_block_add_entry(struct hlsl_state_block *state_block, struct h - %token KW_INLINE - %token KW_INOUT - %token KW_LINEAR -+%token KW_LINESTREAM - %token KW_MATRIX - %token KW_NAMESPACE - %token KW_NOINTERPOLATION -@@ -6091,6 +6607,7 @@ static bool state_block_add_entry(struct hlsl_state_block *state_block, struct h - %token KW_PACKOFFSET - %token KW_PASS - %token KW_PIXELSHADER -+%token KW_POINTSTREAM - %token KW_RASTERIZERORDEREDBUFFER - %token KW_RASTERIZERORDEREDSTRUCTUREDBUFFER - %token KW_RASTERIZERORDEREDTEXTURE1D -@@ -6104,6 +6621,7 @@ static bool state_block_add_entry(struct hlsl_state_block *state_block, struct h - %token KW_REGISTER - %token KW_ROW_MAJOR - %token KW_RWBUFFER -+%token KW_RWBYTEADDRESSBUFFER - %token KW_RWSTRUCTUREDBUFFER - %token KW_RWTEXTURE1D - %token KW_RWTEXTURE1DARRAY -@@ -6118,6 +6636,7 @@ static bool state_block_add_entry(struct hlsl_state_block *state_block, struct h - %token KW_SAMPLER_STATE - %token KW_SAMPLERCOMPARISONSTATE - %token KW_SHARED -+%token KW_SNORM - %token KW_STATEBLOCK - %token KW_STATEBLOCK_STATE - %token KW_STATIC -@@ -6138,10 +6657,12 @@ static bool state_block_add_entry(struct hlsl_state_block *state_block, struct h - %token KW_TEXTURE3D - %token KW_TEXTURECUBE - %token KW_TEXTURECUBEARRAY -+%token KW_TRIANGLESTREAM - %token KW_TRUE - %token KW_TYPEDEF - %token KW_UNSIGNED - %token KW_UNIFORM -+%token KW_UNORM - %token KW_VECTOR - %token KW_VERTEXSHADER - %token KW_VOID -@@ -6230,7 +6751,7 @@ static bool state_block_add_entry(struct hlsl_state_block *state_block, struct h - - %type <buffer_type> buffer_type - --%type <colon_attribute> colon_attribute -+%type <colon_attributes> colon_attributes - - %type <fields> field - %type <fields> fields_list -@@ -6267,12 +6788,15 @@ static bool state_block_add_entry(struct hlsl_state_block *state_block, struct h - - %type <semantic> semantic - -+%type <so_type> so_type -+ - %type <state_block> state_block - - %type <state_block_index> state_block_index_opt - - %type <switch_case> switch_case - -+%type <type> base_optional - %type <type> field_type - %type <type> named_struct_spec - %type <type> unnamed_struct_spec -@@ -6280,6 +6804,7 @@ static bool state_block_add_entry(struct hlsl_state_block *state_block, struct h - %type <type> type - %type <type> type_no_void - %type <type> typedef_type -+%type <type> resource_format - - %type <variable_def> state_block_list - %type <variable_def> type_spec -@@ -6416,7 +6941,7 @@ effect_group: - } - - buffer_declaration: -- var_modifiers buffer_type any_identifier colon_attribute annotations_opt -+ var_modifiers buffer_type any_identifier colon_attributes annotations_opt - { - if ($4.semantic.name) - hlsl_error(ctx, &@4, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC, "Semantics are not allowed on buffers."); -@@ -6487,11 +7012,28 @@ struct_spec: - | unnamed_struct_spec - - named_struct_spec: -- KW_STRUCT any_identifier '{' fields_list '}' -+ KW_STRUCT any_identifier base_optional '{' fields_list '}' - { - bool ret; - -- $$ = hlsl_new_struct_type(ctx, $2, $4.fields, $4.count); -+ if ($3) -+ { -+ char *name; -+ -+ if (!(name = hlsl_strdup(ctx, "$super"))) -+ YYABORT; -+ if (!hlsl_array_reserve(ctx, (void **)&$5.fields, &$5.capacity, 1 + $5.count, sizeof(*$5.fields))) -+ YYABORT; -+ memmove(&$5.fields[1], $5.fields, $5.count * sizeof(*$5.fields)); -+ ++$5.count; -+ -+ memset(&$5.fields[0], 0, sizeof($5.fields[0])); -+ $5.fields[0].type = $3; -+ $5.fields[0].loc = @3; -+ $5.fields[0].name = name; -+ } -+ -+ $$ = hlsl_new_struct_type(ctx, $2, $5.fields, $5.count); - - if (hlsl_get_var(ctx->cur_scope, $2)) - { -@@ -6518,6 +7060,23 @@ any_identifier: - | TYPE_IDENTIFIER - | NEW_IDENTIFIER - -+/* TODO: Multiple inheritance support for interfaces. */ -+base_optional: -+ %empty -+ { -+ $$ = NULL; -+ } -+ | ':' TYPE_IDENTIFIER -+ { -+ $$ = hlsl_get_type(ctx->cur_scope, $2, true, true); -+ if ($$->class != HLSL_CLASS_STRUCT) -+ { -+ hlsl_error(ctx, &@2, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Base type \"%s\" is not a struct.", $2); -+ YYABORT; -+ } -+ vkd3d_free($2); -+ } -+ - fields_list: - %empty - { -@@ -6707,7 +7266,7 @@ func_declaration: - - func_prototype_no_attrs: - /* var_modifiers is necessary to avoid shift/reduce conflicts. */ -- var_modifiers type var_identifier '(' parameters ')' colon_attribute -+ var_modifiers type var_identifier '(' parameters ')' colon_attributes - { - uint32_t modifiers = $1; - struct hlsl_ir_var *var; -@@ -6827,6 +7386,8 @@ func_prototype: - func_prototype_no_attrs - | attribute_list func_prototype_no_attrs - { -+ check_attribute_list_for_duplicates(ctx, &$1); -+ - if ($2.first) - { - $2.decl->attr_count = $1.count; -@@ -6882,28 +7443,39 @@ var_identifier: - VAR_IDENTIFIER - | NEW_IDENTIFIER - --colon_attribute: -+colon_attributes: - %empty - { - $$.semantic = (struct hlsl_semantic){0}; - $$.reg_reservation.reg_type = 0; - $$.reg_reservation.offset_type = 0; - } -- | semantic -+ | colon_attributes semantic - { -- $$.semantic = $1; -- $$.reg_reservation.reg_type = 0; -- $$.reg_reservation.offset_type = 0; -+ hlsl_cleanup_semantic(&$$.semantic); -+ $$.semantic = $2; - } -- | register_reservation -+ | colon_attributes register_reservation - { -- $$.semantic = (struct hlsl_semantic){0}; -- $$.reg_reservation = $1; -+ if ($$.reg_reservation.reg_type) -+ hlsl_fixme(ctx, &@2, "Multiple register() reservations."); -+ -+ $$.reg_reservation.reg_type = $2.reg_type; -+ $$.reg_reservation.reg_index = $2.reg_index; -+ $$.reg_reservation.reg_space = $2.reg_space; - } -- | packoffset_reservation -+ | colon_attributes packoffset_reservation - { -- $$.semantic = (struct hlsl_semantic){0}; -- $$.reg_reservation = $1; -+ if (ctx->cur_buffer == ctx->globals_buffer) -+ { -+ hlsl_error(ctx, &@2, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, -+ "The packoffset() reservation is only allowed within 'cbuffer' blocks."); -+ } -+ else -+ { -+ $$.reg_reservation.offset_type = $2.offset_type; -+ $$.reg_reservation.offset_index = $2.offset_index; -+ } - } - - semantic: -@@ -7099,7 +7671,7 @@ parameter: - } - - parameter_decl: -- var_modifiers type_no_void any_identifier arrays colon_attribute -+ var_modifiers type_no_void any_identifier arrays colon_attributes - { - uint32_t modifiers = $1; - struct hlsl_type *type; -@@ -7239,6 +7811,29 @@ rov_type: - $$ = HLSL_SAMPLER_DIM_3D; - } - -+so_type: -+ KW_POINTSTREAM -+ { -+ $$ = HLSL_STREAM_OUTPUT_POINT_STREAM; -+ } -+ | KW_LINESTREAM -+ { -+ $$ = HLSL_STREAM_OUTPUT_LINE_STREAM; -+ } -+ | KW_TRIANGLESTREAM -+ { -+ $$ = HLSL_STREAM_OUTPUT_TRIANGLE_STREAM; -+ } -+ -+resource_format: -+ var_modifiers type -+ { -+ uint32_t modifiers = $1; -+ -+ if (!($$ = apply_type_modifiers(ctx, $2, &modifiers, false, &@1))) -+ YYABORT; -+ } -+ - type_no_void: - KW_VECTOR '<' type ',' C_INTEGER '>' - { -@@ -7332,18 +7927,18 @@ type_no_void: - { - $$ = hlsl_new_texture_type(ctx, $1, hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, 4), 0); - } -- | texture_type '<' type '>' -+ | texture_type '<' resource_format '>' - { - validate_texture_format_type(ctx, $3, &@3); - $$ = hlsl_new_texture_type(ctx, $1, $3, 0); - } -- | texture_ms_type '<' type '>' -+ | texture_ms_type '<' resource_format '>' - { - validate_texture_format_type(ctx, $3, &@3); - - $$ = hlsl_new_texture_type(ctx, $1, $3, 0); - } -- | texture_ms_type '<' type ',' shift_expr '>' -+ | texture_ms_type '<' resource_format ',' shift_expr '>' - { - unsigned int sample_count; - struct hlsl_block block; -@@ -7359,16 +7954,28 @@ type_no_void: - - $$ = hlsl_new_texture_type(ctx, $1, $3, sample_count); - } -- | uav_type '<' type '>' -+ | KW_BYTEADDRESSBUFFER -+ { -+ $$ = hlsl_new_texture_type(ctx, HLSL_SAMPLER_DIM_RAW_BUFFER, hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), 0); -+ } -+ | uav_type '<' resource_format '>' - { - validate_uav_type(ctx, $1, $3, &@3); - $$ = hlsl_new_uav_type(ctx, $1, $3, false); - } -- | rov_type '<' type '>' -+ | rov_type '<' resource_format '>' - { -- validate_uav_type(ctx, $1, $3, &@3); -+ validate_uav_type(ctx, $1, $3, &@4); - $$ = hlsl_new_uav_type(ctx, $1, $3, true); - } -+ | so_type '<' type '>' -+ { -+ $$ = hlsl_new_stream_output_type(ctx, $1, $3); -+ } -+ | KW_RWBYTEADDRESSBUFFER -+ { -+ $$ = hlsl_new_uav_type(ctx, HLSL_SAMPLER_DIM_RAW_BUFFER, hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), false); -+ } - | KW_STRING - { - $$ = ctx->builtin_types.string; -@@ -7587,7 +8194,7 @@ variables_def_typed: - } - - variable_decl: -- any_identifier arrays colon_attribute annotations_opt -+ any_identifier arrays colon_attributes annotations_opt - { - $$ = hlsl_alloc(ctx, sizeof(*$$)); - $$->loc = @1; -@@ -7614,11 +8221,21 @@ stateblock_lhs_identifier: - if (!($$ = hlsl_strdup(ctx, "pixelshader"))) - YYABORT; - } -+ | KW_TEXTURE -+ { -+ if (!($$ = hlsl_strdup(ctx, "texture"))) -+ YYABORT; -+ } - | KW_VERTEXSHADER - { - if (!($$ = hlsl_strdup(ctx, "vertexshader"))) - YYABORT; - } -+ | KW_GEOMETRYSHADER -+ { -+ if (!($$ = hlsl_strdup(ctx, "geometryshader"))) -+ YYABORT; -+ } - - state_block_index_opt: - %empty -@@ -7666,7 +8283,7 @@ state_block: - vkd3d_free($5.args); - - $$ = $1; -- state_block_add_entry($$, entry); -+ hlsl_state_block_add_entry($$, entry); - } - | state_block any_identifier '(' func_arguments ')' ';' - { -@@ -7694,7 +8311,7 @@ state_block: - hlsl_validate_state_block_entry(ctx, entry, &@4); - - $$ = $1; -- state_block_add_entry($$, entry); -+ hlsl_state_block_add_entry($$, entry); - } - - state_block_list: -@@ -7906,6 +8523,14 @@ var_modifiers: - { - $$ = add_modifiers(ctx, $2, HLSL_MODIFIER_EXPORT, &@1); - } -+ | KW_UNORM var_modifiers -+ { -+ $$ = add_modifiers(ctx, $2, HLSL_MODIFIER_UNORM, &@1); -+ } -+ | KW_SNORM var_modifiers -+ { -+ $$ = add_modifiers(ctx, $2, HLSL_MODIFIER_SNORM, &@1); -+ } - | var_identifier var_modifiers - { - $$ = $2; -@@ -7931,6 +8556,7 @@ complex_initializer: - $$.args[0] = node_from_block($1); - $$.instrs = $1; - $$.braces = false; -+ $$.loc = @$; - } - | '{' complex_initializer_list '}' - { -@@ -7962,6 +8588,7 @@ complex_initializer_list: - $$.args[$$.args_count++] = $3.args[i]; - hlsl_block_add_block($$.instrs, $3.instrs); - free_parse_initializer(&$3); -+ $$.loc = @$; - } - - initializer_expr: -@@ -7979,6 +8606,7 @@ initializer_expr_list: - $$.args[0] = node_from_block($1); - $$.instrs = $1; - $$.braces = false; -+ $$.loc = @$; - } - | initializer_expr_list ',' initializer_expr - { -@@ -8092,8 +8720,7 @@ selection_statement: - struct hlsl_ir_node *instr; - unsigned int i; - -- if (attribute_list_has_duplicates(attributes)) -- hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, "Found duplicate attribute."); -+ check_attribute_list_for_duplicates(ctx, attributes); - - for (i = 0; i < attributes->count; ++i) - { -@@ -8298,6 +8925,7 @@ func_arguments: - if (!($$.instrs = make_empty_block(ctx))) - YYABORT; - $$.braces = false; -+ $$.loc = @$; - } - | initializer_expr_list - -@@ -8391,6 +9019,34 @@ primary_expr: - { - $$ = $2; - } -+ -+ | KW_COMPILE any_identifier var_identifier '(' func_arguments ')' -+ { -+ if (!($$ = add_shader_compilation(ctx, $2, $3, &$5, &@1))) -+ { -+ vkd3d_free($2); -+ vkd3d_free($3); -+ YYABORT; -+ } -+ vkd3d_free($2); -+ vkd3d_free($3); -+ } -+ | KW_COMPILESHADER '(' any_identifier ',' var_identifier '(' func_arguments ')' ')' -+ { -+ if (!($$ = add_shader_compilation(ctx, $3, $5, &$7, &@1))) -+ { -+ vkd3d_free($3); -+ vkd3d_free($5); -+ YYABORT; -+ } -+ vkd3d_free($3); -+ vkd3d_free($5); -+ } -+ | KW_CONSTRUCTGSWITHSO '(' func_arguments ')' -+ { -+ if (!($$ = add_compile_variant(ctx, HLSL_COMPILE_TYPE_CONSTRUCTGSWITHSO, &$3, &@1))) -+ YYABORT; -+ } - | var_identifier '(' func_arguments ')' - { - if (!($$ = add_call(ctx, $1, &$3, &@1))) -@@ -8400,6 +9056,25 @@ primary_expr: - } - vkd3d_free($1); - } -+ | KW_SAMPLER_STATE '{' state_block_start state_block '}' -+ { -+ struct hlsl_ir_node *sampler_state; -+ ctx->in_state_block = 0; -+ -+ if (!ctx->in_state_block && ctx->cur_scope != ctx->globals) -+ hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_MISPLACED_SAMPLER_STATE, -+ "sampler_state must be in global scope or a state block."); -+ -+ if (!(sampler_state = hlsl_new_sampler_state(ctx, $4, &@1))) -+ { -+ hlsl_free_state_block($4); -+ YYABORT; -+ } -+ hlsl_free_state_block($4); -+ -+ if (!($$ = make_block(ctx, sampler_state))) -+ YYABORT; -+ } - | NEW_IDENTIFIER - { - if (ctx->in_state_block) -@@ -8416,7 +9091,11 @@ primary_expr: - else - { - hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_NOT_DEFINED, "Identifier \"%s\" is not declared.", $1); -- YYABORT; -+ vkd3d_free($1); -+ -+ if (!($$ = make_empty_block(ctx))) -+ YYABORT; -+ $$->value = ctx->error_instr; - } - } - -@@ -8446,46 +9125,34 @@ postfix_expr: - - if (node->data_type->class == HLSL_CLASS_STRUCT) - { -- struct hlsl_type *type = node->data_type; -- const struct hlsl_struct_field *field; -- unsigned int field_idx = 0; -- -- if (!(field = get_struct_field(type->e.record.fields, type->e.record.field_count, $3))) -+ if (!add_record_access_recurse(ctx, $1, $3, &@2)) - { -- hlsl_error(ctx, &@3, VKD3D_SHADER_ERROR_HLSL_NOT_DEFINED, "Field \"%s\" is not defined.", $3); -+ destroy_block($1); - vkd3d_free($3); - YYABORT; - } -- -- field_idx = field - type->e.record.fields; -- if (!add_record_access(ctx, $1, node, field_idx, &@2)) -- { -- vkd3d_free($3); -- YYABORT; -- } -- vkd3d_free($3); -- $$ = $1; - } - else if (hlsl_is_numeric_type(node->data_type)) - { - struct hlsl_ir_node *swizzle; - -- if (!(swizzle = get_swizzle(ctx, node, $3, &@3))) -+ if ((swizzle = get_swizzle(ctx, node, $3, &@3))) -+ { -+ hlsl_block_add_instr($1, swizzle); -+ } -+ else - { - hlsl_error(ctx, &@3, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, "Invalid swizzle \"%s\".", $3); -- vkd3d_free($3); -- YYABORT; -+ $1->value = ctx->error_instr; - } -- hlsl_block_add_instr($1, swizzle); -- vkd3d_free($3); -- $$ = $1; - } -- else -+ else if (node->data_type->class != HLSL_CLASS_ERROR) - { - hlsl_error(ctx, &@3, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, "Invalid subscript \"%s\".", $3); -- vkd3d_free($3); -- YYABORT; -+ $1->value = ctx->error_instr; - } -+ vkd3d_free($3); -+ $$ = $1; - } - | postfix_expr '[' expr ']' - { -@@ -8523,14 +9190,6 @@ postfix_expr: - free_parse_initializer(&$4); - YYABORT; - } -- if ($2->dimx * $2->dimy != initializer_size(&$4)) -- { -- hlsl_error(ctx, &@4, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, -- "Expected %u components in constructor, but got %u.", -- $2->dimx * $2->dimy, initializer_size(&$4)); -- free_parse_initializer(&$4); -- YYABORT; -- } - - if (!($$ = add_constructor(ctx, $2, &$4, &@2))) - { -@@ -8597,10 +9256,6 @@ unary_expr: - /* var_modifiers is necessary to avoid shift/reduce conflicts. */ - | '(' var_modifiers type arrays ')' unary_expr - { -- struct hlsl_type *src_type = node_from_block($6)->data_type; -- struct hlsl_type *dst_type; -- unsigned int i; -- - if ($2) - { - hlsl_error(ctx, &@2, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, -@@ -8608,36 +9263,13 @@ unary_expr: - YYABORT; - } - -- dst_type = $3; -- for (i = 0; i < $4.count; ++i) -- { -- if ($4.sizes[i] == HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT) -- { -- hlsl_error(ctx, &@3, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -- "Implicit size arrays not allowed in casts."); -- } -- dst_type = hlsl_new_array_type(ctx, dst_type, $4.sizes[i]); -- } -- -- if (!explicit_compatible_data_types(ctx, src_type, dst_type)) -- { -- struct vkd3d_string_buffer *src_string, *dst_string; -- -- src_string = hlsl_type_to_string(ctx, src_type); -- dst_string = hlsl_type_to_string(ctx, dst_type); -- if (src_string && dst_string) -- hlsl_error(ctx, &@3, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Can't cast from %s to %s.", -- src_string->buffer, dst_string->buffer); -- hlsl_release_string_buffer(ctx, src_string); -- hlsl_release_string_buffer(ctx, dst_string); -- YYABORT; -- } -- -- if (!add_cast(ctx, $6, node_from_block($6), dst_type, &@3)) -+ if (!add_explicit_conversion(ctx, $6, $3, &$4, &@3)) - { - destroy_block($6); -+ vkd3d_free($4.sizes); - YYABORT; - } -+ vkd3d_free($4.sizes); - $$ = $6; - } - -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -index 92b5c71c43f..1fbf670f032 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -@@ -19,6 +19,7 @@ - */ - - #include "hlsl.h" -+#include "vkd3d_shader_private.h" - #include <stdio.h> - #include <math.h> - -@@ -276,9 +277,9 @@ static bool types_are_semantic_equivalent(struct hlsl_ctx *ctx, const struct hls - == base_type_get_semantic_equivalent(type2->e.numeric.type); - } - --static struct hlsl_ir_var *add_semantic_var(struct hlsl_ctx *ctx, struct hlsl_ir_var *var, -- struct hlsl_type *type, uint32_t modifiers, struct hlsl_semantic *semantic, -- uint32_t index, bool output, const struct vkd3d_shader_location *loc) -+static struct hlsl_ir_var *add_semantic_var(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func, -+ struct hlsl_ir_var *var, struct hlsl_type *type, uint32_t modifiers, struct hlsl_semantic *semantic, -+ uint32_t index, bool output, bool force_align, const struct vkd3d_shader_location *loc) - { - struct hlsl_semantic new_semantic; - struct hlsl_ir_var *ext_var; -@@ -287,7 +288,7 @@ static struct hlsl_ir_var *add_semantic_var(struct hlsl_ctx *ctx, struct hlsl_ir - if (!(new_name = hlsl_sprintf_alloc(ctx, "<%s-%s%u>", output ? "output" : "input", semantic->name, index))) - return NULL; - -- LIST_FOR_EACH_ENTRY(ext_var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) -+ LIST_FOR_EACH_ENTRY(ext_var, &func->extern_vars, struct hlsl_ir_var, extern_entry) - { - if (!ascii_strcasecmp(ext_var->name, new_name)) - { -@@ -338,14 +339,32 @@ static struct hlsl_ir_var *add_semantic_var(struct hlsl_ctx *ctx, struct hlsl_ir - else - ext_var->is_input_semantic = 1; - ext_var->is_param = var->is_param; -+ ext_var->force_align = force_align; - list_add_before(&var->scope_entry, &ext_var->scope_entry); -- list_add_tail(&ctx->extern_vars, &ext_var->extern_entry); -+ list_add_tail(&func->extern_vars, &ext_var->extern_entry); - - return ext_var; - } - --static void prepend_input_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_load *lhs, -- uint32_t modifiers, struct hlsl_semantic *semantic, uint32_t semantic_index) -+static uint32_t combine_field_storage_modifiers(uint32_t modifiers, uint32_t field_modifiers) -+{ -+ field_modifiers |= modifiers; -+ -+ /* TODO: 'sample' modifier is not supported yet. */ -+ -+ /* 'nointerpolation' always takes precedence, next the same is done for -+ * 'sample', remaining modifiers are combined. */ -+ if (field_modifiers & HLSL_STORAGE_NOINTERPOLATION) -+ { -+ field_modifiers &= ~HLSL_INTERPOLATION_MODIFIERS_MASK; -+ field_modifiers |= HLSL_STORAGE_NOINTERPOLATION; -+ } -+ -+ return field_modifiers; -+} -+ -+static void prepend_input_copy(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func, struct hlsl_ir_load *lhs, -+ uint32_t modifiers, struct hlsl_semantic *semantic, uint32_t semantic_index, bool force_align) - { - struct hlsl_type *type = lhs->node.data_type, *vector_type_src, *vector_type_dst; - struct vkd3d_shader_location *loc = &lhs->node.loc; -@@ -369,14 +388,17 @@ static void prepend_input_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, s - if (ctx->profile->major_version < 4 && ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX) - vector_type_src = hlsl_get_vector_type(ctx, type->e.numeric.type, 4); - -+ if (hlsl_type_major_size(type) > 1) -+ force_align = true; -+ - for (i = 0; i < hlsl_type_major_size(type); ++i) - { - struct hlsl_ir_node *store, *cast; - struct hlsl_ir_var *input; - struct hlsl_ir_load *load; - -- if (!(input = add_semantic_var(ctx, var, vector_type_src, modifiers, semantic, -- semantic_index + i, false, loc))) -+ if (!(input = add_semantic_var(ctx, func, var, vector_type_src, -+ modifiers, semantic, semantic_index + i, false, force_align, loc))) - return; - - if (!(load = hlsl_new_var_load(ctx, input, &var->loc))) -@@ -408,8 +430,9 @@ static void prepend_input_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, s - } - } - --static void prepend_input_copy_recurse(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_load *lhs, -- uint32_t modifiers, struct hlsl_semantic *semantic, uint32_t semantic_index) -+static void prepend_input_copy_recurse(struct hlsl_ctx *ctx, -+ struct hlsl_ir_function_decl *func, struct hlsl_ir_load *lhs, uint32_t modifiers, -+ struct hlsl_semantic *semantic, uint32_t semantic_index, bool force_align) - { - struct vkd3d_shader_location *loc = &lhs->node.loc; - struct hlsl_type *type = lhs->node.data_type; -@@ -425,12 +448,14 @@ static void prepend_input_copy_recurse(struct hlsl_ctx *ctx, struct hlsl_block * - - for (i = 0; i < hlsl_type_element_count(type); ++i) - { -- uint32_t element_modifiers = modifiers; -+ uint32_t element_modifiers; - - if (type->class == HLSL_CLASS_ARRAY) - { - elem_semantic_index = semantic_index - + i * hlsl_type_get_array_element_reg_size(type->e.array.type, HLSL_REGSET_NUMERIC) / 4; -+ element_modifiers = modifiers; -+ force_align = true; - } - else - { -@@ -444,17 +469,8 @@ static void prepend_input_copy_recurse(struct hlsl_ctx *ctx, struct hlsl_block * - semantic = &field->semantic; - elem_semantic_index = semantic->index; - loc = &field->loc; -- element_modifiers |= field->storage_modifiers; -- -- /* TODO: 'sample' modifier is not supported yet */ -- -- /* 'nointerpolation' always takes precedence, next the same is done for 'sample', -- remaining modifiers are combined. */ -- if (element_modifiers & HLSL_STORAGE_NOINTERPOLATION) -- { -- element_modifiers &= ~HLSL_INTERPOLATION_MODIFIERS_MASK; -- element_modifiers |= HLSL_STORAGE_NOINTERPOLATION; -- } -+ element_modifiers = combine_field_storage_modifiers(modifiers, field->storage_modifiers); -+ force_align = (i == 0); - } - - if (!(c = hlsl_new_uint_constant(ctx, i, &var->loc))) -@@ -466,31 +482,33 @@ static void prepend_input_copy_recurse(struct hlsl_ctx *ctx, struct hlsl_block * - return; - list_add_after(&c->entry, &element_load->node.entry); - -- prepend_input_copy_recurse(ctx, block, element_load, element_modifiers, semantic, elem_semantic_index); -+ prepend_input_copy_recurse(ctx, func, element_load, element_modifiers, -+ semantic, elem_semantic_index, force_align); - } - } - else - { -- prepend_input_copy(ctx, block, lhs, modifiers, semantic, semantic_index); -+ prepend_input_copy(ctx, func, lhs, modifiers, semantic, semantic_index, force_align); - } - } - - /* Split inputs into two variables representing the semantic and temp registers, - * and copy the former to the latter, so that writes to input variables work. */ --static void prepend_input_var_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_var *var) -+static void prepend_input_var_copy(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func, struct hlsl_ir_var *var) - { - struct hlsl_ir_load *load; - - /* This redundant load is expected to be deleted later by DCE. */ - if (!(load = hlsl_new_var_load(ctx, var, &var->loc))) - return; -- list_add_head(&block->instrs, &load->node.entry); -+ list_add_head(&func->body.instrs, &load->node.entry); - -- prepend_input_copy_recurse(ctx, block, load, var->storage_modifiers, &var->semantic, var->semantic.index); -+ prepend_input_copy_recurse(ctx, func, load, var->storage_modifiers, &var->semantic, var->semantic.index, false); - } - --static void append_output_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_load *rhs, -- uint32_t modifiers, struct hlsl_semantic *semantic, uint32_t semantic_index) -+static void append_output_copy(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func, -+ struct hlsl_ir_load *rhs, uint32_t modifiers, -+ struct hlsl_semantic *semantic, uint32_t semantic_index, bool force_align) - { - struct hlsl_type *type = rhs->node.data_type, *vector_type; - struct vkd3d_shader_location *loc = &rhs->node.loc; -@@ -511,24 +529,28 @@ static void append_output_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, s - - vector_type = hlsl_get_vector_type(ctx, type->e.numeric.type, hlsl_type_minor_size(type)); - -+ if (hlsl_type_major_size(type) > 1) -+ force_align = true; -+ - for (i = 0; i < hlsl_type_major_size(type); ++i) - { - struct hlsl_ir_node *store; - struct hlsl_ir_var *output; - struct hlsl_ir_load *load; - -- if (!(output = add_semantic_var(ctx, var, vector_type, modifiers, semantic, semantic_index + i, true, loc))) -+ if (!(output = add_semantic_var(ctx, func, var, vector_type, -+ modifiers, semantic, semantic_index + i, true, force_align, loc))) - return; - - if (type->class == HLSL_CLASS_MATRIX) - { - if (!(c = hlsl_new_uint_constant(ctx, i, &var->loc))) - return; -- hlsl_block_add_instr(block, c); -+ hlsl_block_add_instr(&func->body, c); - - if (!(load = hlsl_new_load_index(ctx, &rhs->src, c, &var->loc))) - return; -- hlsl_block_add_instr(block, &load->node); -+ hlsl_block_add_instr(&func->body, &load->node); - } - else - { -@@ -536,17 +558,18 @@ static void append_output_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, s - - if (!(load = hlsl_new_load_index(ctx, &rhs->src, NULL, &var->loc))) - return; -- hlsl_block_add_instr(block, &load->node); -+ hlsl_block_add_instr(&func->body, &load->node); - } - - if (!(store = hlsl_new_simple_store(ctx, output, &load->node))) - return; -- hlsl_block_add_instr(block, store); -+ hlsl_block_add_instr(&func->body, store); - } - } - --static void append_output_copy_recurse(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_load *rhs, -- uint32_t modifiers, struct hlsl_semantic *semantic, uint32_t semantic_index) -+static void append_output_copy_recurse(struct hlsl_ctx *ctx, -+ struct hlsl_ir_function_decl *func, struct hlsl_ir_load *rhs, uint32_t modifiers, -+ struct hlsl_semantic *semantic, uint32_t semantic_index, bool force_align) - { - struct vkd3d_shader_location *loc = &rhs->node.loc; - struct hlsl_type *type = rhs->node.data_type; -@@ -562,10 +585,14 @@ static void append_output_copy_recurse(struct hlsl_ctx *ctx, struct hlsl_block * - - for (i = 0; i < hlsl_type_element_count(type); ++i) - { -+ uint32_t element_modifiers; -+ - if (type->class == HLSL_CLASS_ARRAY) - { - elem_semantic_index = semantic_index - + i * hlsl_type_get_array_element_reg_size(type->e.array.type, HLSL_REGSET_NUMERIC) / 4; -+ element_modifiers = modifiers; -+ force_align = true; - } - else - { -@@ -576,38 +603,41 @@ static void append_output_copy_recurse(struct hlsl_ctx *ctx, struct hlsl_block * - semantic = &field->semantic; - elem_semantic_index = semantic->index; - loc = &field->loc; -+ element_modifiers = combine_field_storage_modifiers(modifiers, field->storage_modifiers); -+ force_align = (i == 0); - } - - if (!(c = hlsl_new_uint_constant(ctx, i, &var->loc))) - return; -- hlsl_block_add_instr(block, c); -+ hlsl_block_add_instr(&func->body, c); - - if (!(element_load = hlsl_new_load_index(ctx, &rhs->src, c, loc))) - return; -- hlsl_block_add_instr(block, &element_load->node); -+ hlsl_block_add_instr(&func->body, &element_load->node); - -- append_output_copy_recurse(ctx, block, element_load, modifiers, semantic, elem_semantic_index); -+ append_output_copy_recurse(ctx, func, element_load, element_modifiers, -+ semantic, elem_semantic_index, force_align); - } - } - else - { -- append_output_copy(ctx, block, rhs, modifiers, semantic, semantic_index); -+ append_output_copy(ctx, func, rhs, modifiers, semantic, semantic_index, force_align); - } - } - - /* Split outputs into two variables representing the temp and semantic - * registers, and copy the former to the latter, so that reads from output - * variables work. */ --static void append_output_var_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_var *var) -+static void append_output_var_copy(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func, struct hlsl_ir_var *var) - { - struct hlsl_ir_load *load; - - /* This redundant load is expected to be deleted later by DCE. */ - if (!(load = hlsl_new_var_load(ctx, var, &var->loc))) - return; -- hlsl_block_add_instr(block, &load->node); -+ hlsl_block_add_instr(&func->body, &load->node); - -- append_output_copy_recurse(ctx, block, load, var->storage_modifiers, &var->semantic, var->semantic.index); -+ append_output_copy_recurse(ctx, func, load, var->storage_modifiers, &var->semantic, var->semantic.index, false); - } - - bool hlsl_transform_ir(struct hlsl_ctx *ctx, bool (*func)(struct hlsl_ctx *ctx, struct hlsl_ir_node *, void *), -@@ -1649,17 +1679,23 @@ static bool copy_propagation_transform_load(struct hlsl_ctx *ctx, - case HLSL_CLASS_DEPTH_STENCIL_VIEW: - case HLSL_CLASS_GEOMETRY_SHADER: - case HLSL_CLASS_BLEND_STATE: -+ case HLSL_CLASS_STREAM_OUTPUT: - case HLSL_CLASS_NULL: - break; - - case HLSL_CLASS_MATRIX: - case HLSL_CLASS_ARRAY: - case HLSL_CLASS_STRUCT: -- case HLSL_CLASS_CONSTANT_BUFFER: -- /* FIXME: Actually we shouldn't even get here, but we don't split -- * matrices yet. */ -+ /* We can't handle complex types here. -+ * They should have been already split anyway by earlier passes, -+ * but they may not have been deleted yet. We can't rely on DCE to -+ * solve that problem for us, since we may be called on a partial -+ * block, but DCE deletes dead stores, so it needs to be able to -+ * see the whole program. */ -+ case HLSL_CLASS_ERROR: - return false; - -+ case HLSL_CLASS_CONSTANT_BUFFER: - case HLSL_CLASS_EFFECT_GROUP: - case HLSL_CLASS_PASS: - case HLSL_CLASS_TECHNIQUE: -@@ -4045,17 +4081,57 @@ static bool lower_discard_neg(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, - return true; - } - -+static bool lower_discard_nz(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) -+{ -+ struct hlsl_ir_node *cond, *cond_cast, *abs, *neg; -+ struct hlsl_type *float_type; -+ struct hlsl_ir_jump *jump; -+ struct hlsl_block block; -+ -+ if (instr->type != HLSL_IR_JUMP) -+ return false; -+ jump = hlsl_ir_jump(instr); -+ if (jump->type != HLSL_IR_JUMP_DISCARD_NZ) -+ return false; -+ -+ cond = jump->condition.node; -+ float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, cond->data_type->dimx); -+ -+ hlsl_block_init(&block); -+ -+ if (!(cond_cast = hlsl_new_cast(ctx, cond, float_type, &instr->loc))) -+ return false; -+ hlsl_block_add_instr(&block, cond_cast); -+ -+ if (!(abs = hlsl_new_unary_expr(ctx, HLSL_OP1_ABS, cond_cast, &instr->loc))) -+ return false; -+ hlsl_block_add_instr(&block, abs); -+ -+ if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, abs, &instr->loc))) -+ return false; -+ hlsl_block_add_instr(&block, neg); -+ -+ list_move_tail(&instr->entry, &block.instrs); -+ hlsl_src_remove(&jump->condition); -+ hlsl_src_from_node(&jump->condition, neg); -+ jump->type = HLSL_IR_JUMP_DISCARD_NEG; -+ -+ return true; -+} -+ - static bool dce(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) - { - switch (instr->type) - { - case HLSL_IR_CONSTANT: -+ case HLSL_IR_COMPILE: - case HLSL_IR_EXPR: - case HLSL_IR_INDEX: - case HLSL_IR_LOAD: - case HLSL_IR_RESOURCE_LOAD: - case HLSL_IR_STRING_CONSTANT: - case HLSL_IR_SWIZZLE: -+ case HLSL_IR_SAMPLER_STATE: - if (list_empty(&instr->uses)) - { - list_remove(&instr->entry); -@@ -4088,6 +4164,9 @@ static bool dce(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) - case HLSL_IR_STATEBLOCK_CONSTANT: - /* Stateblock constants should not appear in the shader program. */ - vkd3d_unreachable(); -+ case HLSL_IR_VSIR_INSTRUCTION_REF: -+ /* HLSL IR nodes are not translated to hlsl_ir_vsir_instruction_ref at this point. */ -+ vkd3d_unreachable(); - } - - return false; -@@ -4106,7 +4185,7 @@ static void dump_function(struct rb_entry *entry, void *context) - } - } - --static bool mark_indexable_vars(struct hlsl_ctx *ctx, struct hlsl_deref *deref, -+static bool mark_indexable_var(struct hlsl_ctx *ctx, struct hlsl_deref *deref, - struct hlsl_ir_node *instr) - { - if (!deref->rel_offset.node) -@@ -4119,6 +4198,20 @@ static bool mark_indexable_vars(struct hlsl_ctx *ctx, struct hlsl_deref *deref, - return true; - } - -+static void mark_indexable_vars(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) -+{ -+ struct hlsl_scope *scope; -+ struct hlsl_ir_var *var; -+ -+ LIST_FOR_EACH_ENTRY(scope, &ctx->scopes, struct hlsl_scope, entry) -+ { -+ LIST_FOR_EACH_ENTRY(var, &scope->vars, struct hlsl_ir_var, scope_entry) -+ var->indexable = false; -+ } -+ -+ transform_derefs(ctx, mark_indexable_var, &entry_func->body); -+} -+ - static char get_regset_name(enum hlsl_regset regset) - { - switch (regset) -@@ -4135,11 +4228,11 @@ static char get_regset_name(enum hlsl_regset regset) - vkd3d_unreachable(); - } - --static void allocate_register_reservations(struct hlsl_ctx *ctx) -+static void allocate_register_reservations(struct hlsl_ctx *ctx, struct list *extern_vars) - { - struct hlsl_ir_var *var; - -- LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) -+ LIST_FOR_EACH_ENTRY(var, extern_vars, struct hlsl_ir_var, extern_entry) - { - const struct hlsl_reg_reservation *reservation = &var->reg_reservation; - unsigned int r; -@@ -4213,6 +4306,9 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop - case HLSL_IR_STATEBLOCK_CONSTANT: - /* Stateblock constants should not appear in the shader program. */ - vkd3d_unreachable(); -+ case HLSL_IR_VSIR_INSTRUCTION_REF: -+ /* HLSL IR nodes are not translated to hlsl_ir_vsir_instruction_ref at this point. */ -+ vkd3d_unreachable(); - - case HLSL_IR_STORE: - { -@@ -4337,10 +4433,22 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop - case HLSL_IR_CONSTANT: - case HLSL_IR_STRING_CONSTANT: - break; -+ case HLSL_IR_COMPILE: -+ case HLSL_IR_SAMPLER_STATE: -+ /* These types are skipped as they are only relevant to effects. */ -+ break; - } - } - } - -+static void init_var_liveness(struct hlsl_ir_var *var) -+{ -+ if (var->is_uniform || var->is_input_semantic) -+ var->first_write = 1; -+ else if (var->is_output_semantic) -+ var->last_read = UINT_MAX; -+} -+ - static void compute_liveness(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) - { - struct hlsl_scope *scope; -@@ -4355,16 +4463,29 @@ static void compute_liveness(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl - } - - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) -- { -- if (var->is_uniform || var->is_input_semantic) -- var->first_write = 1; -- else if (var->is_output_semantic) -- var->last_read = UINT_MAX; -- } -+ init_var_liveness(var); -+ -+ LIST_FOR_EACH_ENTRY(var, &entry_func->extern_vars, struct hlsl_ir_var, extern_entry) -+ init_var_liveness(var); - - compute_liveness_recurse(&entry_func->body, 0, 0); - } - -+static void mark_vars_usage(struct hlsl_ctx *ctx) -+{ -+ struct hlsl_scope *scope; -+ struct hlsl_ir_var *var; -+ -+ LIST_FOR_EACH_ENTRY(scope, &ctx->scopes, struct hlsl_scope, entry) -+ { -+ LIST_FOR_EACH_ENTRY(var, &scope->vars, struct hlsl_ir_var, scope_entry) -+ { -+ if (var->last_read) -+ var->is_read = true; -+ } -+ } -+} -+ - struct register_allocator - { - struct allocation -@@ -4372,6 +4493,9 @@ struct register_allocator - uint32_t reg; - unsigned int writemask; - unsigned int first_write, last_read; -+ -+ /* Two allocations with different mode can't share the same register. */ -+ int mode; - } *allocations; - size_t count, capacity; - -@@ -4381,10 +4505,17 @@ struct register_allocator - - /* Total number of registers allocated so far. Used to declare sm4 temp count. */ - uint32_t reg_count; -+ -+ /* Special flag so allocations that can share registers prioritize those -+ * that will result in smaller writemasks. -+ * For instance, a single-register allocation would prefer to share a register -+ * whose .xy components are already allocated (becoming .z) instead of a -+ * register whose .xyz components are already allocated (becoming .w). */ -+ bool prioritize_smaller_writemasks; - }; - - static unsigned int get_available_writemask(const struct register_allocator *allocator, -- unsigned int first_write, unsigned int last_read, uint32_t reg_idx) -+ unsigned int first_write, unsigned int last_read, uint32_t reg_idx, int mode) - { - unsigned int writemask = VKD3DSP_WRITEMASK_ALL; - size_t i; -@@ -4399,7 +4530,11 @@ static unsigned int get_available_writemask(const struct register_allocator *all - - if (allocation->reg == reg_idx - && first_write < allocation->last_read && last_read > allocation->first_write) -+ { - writemask &= ~allocation->writemask; -+ if (allocation->mode != mode) -+ writemask = 0; -+ } - - if (!writemask) - break; -@@ -4408,8 +4543,8 @@ static unsigned int get_available_writemask(const struct register_allocator *all - return writemask; - } - --static void record_allocation(struct hlsl_ctx *ctx, struct register_allocator *allocator, -- uint32_t reg_idx, unsigned int writemask, unsigned int first_write, unsigned int last_read) -+static void record_allocation(struct hlsl_ctx *ctx, struct register_allocator *allocator, uint32_t reg_idx, -+ unsigned int writemask, unsigned int first_write, unsigned int last_read, int mode) - { - struct allocation *allocation; - -@@ -4422,6 +4557,7 @@ static void record_allocation(struct hlsl_ctx *ctx, struct register_allocator *a - allocation->writemask = writemask; - allocation->first_write = first_write; - allocation->last_read = last_read; -+ allocation->mode = mode; - - allocator->reg_count = max(allocator->reg_count, reg_idx + 1); - } -@@ -4431,37 +4567,46 @@ static void record_allocation(struct hlsl_ctx *ctx, struct register_allocator *a - * register, even if they don't use it completely. */ - static struct hlsl_reg allocate_register(struct hlsl_ctx *ctx, struct register_allocator *allocator, - unsigned int first_write, unsigned int last_read, unsigned int reg_size, -- unsigned int component_count) -+ unsigned int component_count, int mode, bool force_align) - { -- struct hlsl_reg ret = {0}; -- unsigned int writemask; -- uint32_t reg_idx; -+ struct hlsl_reg ret = {.allocation_size = 1, .allocated = true}; -+ unsigned int required_size = force_align ? 4 : reg_size; -+ unsigned int pref; - - VKD3D_ASSERT(component_count <= reg_size); - -- for (reg_idx = 0;; ++reg_idx) -+ pref = allocator->prioritize_smaller_writemasks ? 4 : required_size; -+ for (; pref >= required_size; --pref) - { -- writemask = get_available_writemask(allocator, first_write, last_read, reg_idx); -- -- if (vkd3d_popcount(writemask) >= reg_size) -+ for (uint32_t reg_idx = 0; reg_idx < allocator->reg_count; ++reg_idx) - { -- writemask = hlsl_combine_writemasks(writemask, (1u << reg_size) - 1); -- break; -+ unsigned int available_writemask = get_available_writemask(allocator, -+ first_write, last_read, reg_idx, mode); -+ -+ if (vkd3d_popcount(available_writemask) >= pref) -+ { -+ unsigned int writemask = hlsl_combine_writemasks(available_writemask, -+ vkd3d_write_mask_from_component_count(reg_size)); -+ -+ ret.id = reg_idx; -+ ret.writemask = hlsl_combine_writemasks(writemask, -+ vkd3d_write_mask_from_component_count(component_count)); -+ record_allocation(ctx, allocator, reg_idx, writemask, first_write, last_read, mode); -+ return ret; -+ } - } - } - -- record_allocation(ctx, allocator, reg_idx, writemask, first_write, last_read); -- -- ret.id = reg_idx; -- ret.allocation_size = 1; -- ret.writemask = hlsl_combine_writemasks(writemask, (1u << component_count) - 1); -- ret.allocated = true; -+ ret.id = allocator->reg_count; -+ ret.writemask = vkd3d_write_mask_from_component_count(component_count); -+ record_allocation(ctx, allocator, allocator->reg_count, -+ vkd3d_write_mask_from_component_count(reg_size), first_write, last_read, mode); - return ret; - } - - /* Allocate a register with writemask, while reserving reg_writemask. */ - static struct hlsl_reg allocate_register_with_masks(struct hlsl_ctx *ctx, struct register_allocator *allocator, -- unsigned int first_write, unsigned int last_read, uint32_t reg_writemask, uint32_t writemask) -+ unsigned int first_write, unsigned int last_read, uint32_t reg_writemask, uint32_t writemask, int mode) - { - struct hlsl_reg ret = {0}; - uint32_t reg_idx; -@@ -4470,11 +4615,12 @@ static struct hlsl_reg allocate_register_with_masks(struct hlsl_ctx *ctx, struct - - for (reg_idx = 0;; ++reg_idx) - { -- if ((get_available_writemask(allocator, first_write, last_read, reg_idx) & reg_writemask) == reg_writemask) -+ if ((get_available_writemask(allocator, first_write, last_read, -+ reg_idx, mode) & reg_writemask) == reg_writemask) - break; - } - -- record_allocation(ctx, allocator, reg_idx, reg_writemask, first_write, last_read); -+ record_allocation(ctx, allocator, reg_idx, reg_writemask, first_write, last_read, mode); - - ret.id = reg_idx; - ret.allocation_size = 1; -@@ -4483,8 +4629,8 @@ static struct hlsl_reg allocate_register_with_masks(struct hlsl_ctx *ctx, struct - return ret; - } - --static bool is_range_available(const struct register_allocator *allocator, -- unsigned int first_write, unsigned int last_read, uint32_t reg_idx, unsigned int reg_size) -+static bool is_range_available(const struct register_allocator *allocator, unsigned int first_write, -+ unsigned int last_read, uint32_t reg_idx, unsigned int reg_size, int mode) - { - unsigned int last_reg_mask = (1u << (reg_size % 4)) - 1; - unsigned int writemask; -@@ -4492,18 +4638,18 @@ static bool is_range_available(const struct register_allocator *allocator, - - for (i = 0; i < (reg_size / 4); ++i) - { -- writemask = get_available_writemask(allocator, first_write, last_read, reg_idx + i); -+ writemask = get_available_writemask(allocator, first_write, last_read, reg_idx + i, mode); - if (writemask != VKD3DSP_WRITEMASK_ALL) - return false; - } -- writemask = get_available_writemask(allocator, first_write, last_read, reg_idx + (reg_size / 4)); -+ writemask = get_available_writemask(allocator, first_write, last_read, reg_idx + (reg_size / 4), mode); - if ((writemask & last_reg_mask) != last_reg_mask) - return false; - return true; - } - - static struct hlsl_reg allocate_range(struct hlsl_ctx *ctx, struct register_allocator *allocator, -- unsigned int first_write, unsigned int last_read, unsigned int reg_size) -+ unsigned int first_write, unsigned int last_read, unsigned int reg_size, int mode) - { - struct hlsl_reg ret = {0}; - uint32_t reg_idx; -@@ -4511,14 +4657,15 @@ static struct hlsl_reg allocate_range(struct hlsl_ctx *ctx, struct register_allo - - for (reg_idx = 0;; ++reg_idx) - { -- if (is_range_available(allocator, first_write, last_read, reg_idx, reg_size)) -+ if (is_range_available(allocator, first_write, last_read, reg_idx, reg_size, mode)) - break; - } - - for (i = 0; i < reg_size / 4; ++i) -- record_allocation(ctx, allocator, reg_idx + i, VKD3DSP_WRITEMASK_ALL, first_write, last_read); -+ record_allocation(ctx, allocator, reg_idx + i, VKD3DSP_WRITEMASK_ALL, first_write, last_read, mode); - if (reg_size % 4) -- record_allocation(ctx, allocator, reg_idx + (reg_size / 4), (1u << (reg_size % 4)) - 1, first_write, last_read); -+ record_allocation(ctx, allocator, reg_idx + (reg_size / 4), -+ (1u << (reg_size % 4)) - 1, first_write, last_read, mode); - - ret.id = reg_idx; - ret.allocation_size = align(reg_size, 4) / 4; -@@ -4534,9 +4681,9 @@ static struct hlsl_reg allocate_numeric_registers_for_type(struct hlsl_ctx *ctx, - /* FIXME: We could potentially pack structs or arrays more efficiently... */ - - if (type->class <= HLSL_CLASS_VECTOR) -- return allocate_register(ctx, allocator, first_write, last_read, type->dimx, type->dimx); -+ return allocate_register(ctx, allocator, first_write, last_read, type->dimx, type->dimx, 0, false); - else -- return allocate_range(ctx, allocator, first_write, last_read, reg_size); -+ return allocate_range(ctx, allocator, first_write, last_read, reg_size, 0); - } - - static const char *debug_register(char class, struct hlsl_reg reg, const struct hlsl_type *type) -@@ -4715,7 +4862,7 @@ static void allocate_instr_temp_register(struct hlsl_ctx *ctx, - - if (reg_writemask) - instr->reg = allocate_register_with_masks(ctx, allocator, -- instr->index, instr->last_read, reg_writemask, dst_writemask); -+ instr->index, instr->last_read, reg_writemask, dst_writemask, 0); - else - instr->reg = allocate_numeric_registers_for_type(ctx, allocator, - instr->index, instr->last_read, instr->data_type); -@@ -4816,7 +4963,8 @@ static void allocate_temp_registers_recurse(struct hlsl_ctx *ctx, - } - } - --static void record_constant(struct hlsl_ctx *ctx, unsigned int component_index, float f) -+static void record_constant(struct hlsl_ctx *ctx, unsigned int component_index, float f, -+ const struct vkd3d_shader_location *loc) - { - struct hlsl_constant_defs *defs = &ctx->constant_defs; - struct hlsl_constant_register *reg; -@@ -4838,6 +4986,7 @@ static void record_constant(struct hlsl_ctx *ctx, unsigned int component_index, - memset(reg, 0, sizeof(*reg)); - reg->index = component_index / 4; - reg->value.f[component_index % 4] = f; -+ reg->loc = *loc; - } - - static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, -@@ -4898,7 +5047,7 @@ static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, - vkd3d_unreachable(); - } - -- record_constant(ctx, constant->reg.id * 4 + x, f); -+ record_constant(ctx, constant->reg.id * 4 + x, f, &constant->node.loc); - } - - break; -@@ -4991,17 +5140,17 @@ static void allocate_sincos_const_registers(struct hlsl_ctx *ctx, struct hlsl_bl - - ctx->d3dsincosconst1 = allocate_numeric_registers_for_type(ctx, allocator, 1, UINT_MAX, type); - TRACE("Allocated D3DSINCOSCONST1 to %s.\n", debug_register('c', ctx->d3dsincosconst1, type)); -- record_constant(ctx, ctx->d3dsincosconst1.id * 4 + 0, -1.55009923e-06f); -- record_constant(ctx, ctx->d3dsincosconst1.id * 4 + 1, -2.17013894e-05f); -- record_constant(ctx, ctx->d3dsincosconst1.id * 4 + 2, 2.60416674e-03f); -- record_constant(ctx, ctx->d3dsincosconst1.id * 4 + 3, 2.60416680e-04f); -+ record_constant(ctx, ctx->d3dsincosconst1.id * 4 + 0, -1.55009923e-06f, &instr->loc); -+ record_constant(ctx, ctx->d3dsincosconst1.id * 4 + 1, -2.17013894e-05f, &instr->loc); -+ record_constant(ctx, ctx->d3dsincosconst1.id * 4 + 2, 2.60416674e-03f, &instr->loc); -+ record_constant(ctx, ctx->d3dsincosconst1.id * 4 + 3, 2.60416680e-04f, &instr->loc); - - ctx->d3dsincosconst2 = allocate_numeric_registers_for_type(ctx, allocator, 1, UINT_MAX, type); - TRACE("Allocated D3DSINCOSCONST2 to %s.\n", debug_register('c', ctx->d3dsincosconst2, type)); -- record_constant(ctx, ctx->d3dsincosconst2.id * 4 + 0, -2.08333340e-02f); -- record_constant(ctx, ctx->d3dsincosconst2.id * 4 + 1, -1.25000000e-01f); -- record_constant(ctx, ctx->d3dsincosconst2.id * 4 + 2, 1.00000000e+00f); -- record_constant(ctx, ctx->d3dsincosconst2.id * 4 + 3, 5.00000000e-01f); -+ record_constant(ctx, ctx->d3dsincosconst2.id * 4 + 0, -2.08333340e-02f, &instr->loc); -+ record_constant(ctx, ctx->d3dsincosconst2.id * 4 + 1, -1.25000000e-01f, &instr->loc); -+ record_constant(ctx, ctx->d3dsincosconst2.id * 4 + 2, 1.00000000e+00f, &instr->loc); -+ record_constant(ctx, ctx->d3dsincosconst2.id * 4 + 3, 5.00000000e-01f, &instr->loc); - - return; - } -@@ -5034,14 +5183,14 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi - { - if (i < bind_count) - { -- if (get_available_writemask(&allocator_used, 1, UINT_MAX, reg_idx + i) != VKD3DSP_WRITEMASK_ALL) -+ if (get_available_writemask(&allocator_used, 1, UINT_MAX, reg_idx + i, 0) != VKD3DSP_WRITEMASK_ALL) - { - hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, - "Overlapping register() reservations on 'c%u'.", reg_idx + i); - } -- record_allocation(ctx, &allocator_used, reg_idx + i, VKD3DSP_WRITEMASK_ALL, 1, UINT_MAX); -+ record_allocation(ctx, &allocator_used, reg_idx + i, VKD3DSP_WRITEMASK_ALL, 1, UINT_MAX, 0); - } -- record_allocation(ctx, &allocator, reg_idx + i, VKD3DSP_WRITEMASK_ALL, 1, UINT_MAX); -+ record_allocation(ctx, &allocator, reg_idx + i, VKD3DSP_WRITEMASK_ALL, 1, UINT_MAX, 0); - } - - var->regs[HLSL_REGSET_NUMERIC].id = reg_idx; -@@ -5064,7 +5213,7 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi - - if (!var->regs[HLSL_REGSET_NUMERIC].allocated) - { -- var->regs[HLSL_REGSET_NUMERIC] = allocate_range(ctx, &allocator, 1, UINT_MAX, alloc_size); -+ var->regs[HLSL_REGSET_NUMERIC] = allocate_range(ctx, &allocator, 1, UINT_MAX, alloc_size, 0); - TRACE("Allocated %s to %s.\n", var->name, - debug_register('c', var->regs[HLSL_REGSET_NUMERIC], var->data_type)); - } -@@ -5081,9 +5230,21 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi - * index to all (simultaneously live) variables or intermediate values. Agnostic - * as to how many registers are actually available for the current backend, and - * does not handle constants. */ --static void allocate_temp_registers(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) -+static uint32_t allocate_temp_registers(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) - { - struct register_allocator allocator = {0}; -+ struct hlsl_scope *scope; -+ struct hlsl_ir_var *var; -+ -+ /* Reset variable temp register allocations. */ -+ LIST_FOR_EACH_ENTRY(scope, &ctx->scopes, struct hlsl_scope, entry) -+ { -+ LIST_FOR_EACH_ENTRY(var, &scope->vars, struct hlsl_ir_var, scope_entry) -+ { -+ if (!(var->is_input_semantic || var->is_output_semantic || var->is_uniform)) -+ memset(var->regs, 0, sizeof(var->regs)); -+ } -+ } - - /* ps_1_* outputs are special and go in temp register 0. */ - if (ctx->profile->major_version == 1 && ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL) -@@ -5092,22 +5253,53 @@ static void allocate_temp_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functio - - for (i = 0; i < entry_func->parameters.count; ++i) - { -- const struct hlsl_ir_var *var = entry_func->parameters.vars[i]; -- -+ var = entry_func->parameters.vars[i]; - if (var->is_output_semantic) - { -- record_allocation(ctx, &allocator, 0, VKD3DSP_WRITEMASK_ALL, var->first_write, var->last_read); -+ record_allocation(ctx, &allocator, 0, VKD3DSP_WRITEMASK_ALL, var->first_write, var->last_read, 0); - break; - } - } - } - - allocate_temp_registers_recurse(ctx, &entry_func->body, &allocator); -- ctx->temp_count = allocator.reg_count; - vkd3d_free(allocator.allocations); -+ -+ return allocator.reg_count; -+} -+ -+enum vkd3d_shader_interpolation_mode sm4_get_interpolation_mode(struct hlsl_type *type, unsigned int storage_modifiers) -+{ -+ unsigned int i; -+ -+ static const struct -+ { -+ unsigned int modifiers; -+ enum vkd3d_shader_interpolation_mode mode; -+ } -+ modes[] = -+ { -+ {HLSL_STORAGE_CENTROID | HLSL_STORAGE_NOPERSPECTIVE, VKD3DSIM_LINEAR_NOPERSPECTIVE_CENTROID}, -+ {HLSL_STORAGE_NOPERSPECTIVE, VKD3DSIM_LINEAR_NOPERSPECTIVE}, -+ {HLSL_STORAGE_CENTROID, VKD3DSIM_LINEAR_CENTROID}, -+ {HLSL_STORAGE_CENTROID | HLSL_STORAGE_LINEAR, VKD3DSIM_LINEAR_CENTROID}, -+ }; -+ -+ if ((storage_modifiers & HLSL_STORAGE_NOINTERPOLATION) -+ || base_type_get_semantic_equivalent(type->e.numeric.type) == HLSL_TYPE_UINT) -+ return VKD3DSIM_CONSTANT; -+ -+ for (i = 0; i < ARRAY_SIZE(modes); ++i) -+ { -+ if ((storage_modifiers & modes[i].modifiers) == modes[i].modifiers) -+ return modes[i].mode; -+ } -+ -+ return VKD3DSIM_LINEAR; - } - --static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var *var, unsigned int *counter, bool output) -+static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var *var, -+ struct register_allocator *allocator, bool output, bool optimize, bool is_patch_constant_func) - { - static const char *const shader_names[] = - { -@@ -5120,27 +5312,28 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var - }; - - enum vkd3d_shader_register_type type; -+ struct vkd3d_shader_version version; - uint32_t reg; - bool builtin; - - VKD3D_ASSERT(var->semantic.name); - -- if (ctx->profile->major_version < 4) -+ version.major = ctx->profile->major_version; -+ version.minor = ctx->profile->minor_version; -+ version.type = ctx->profile->type; -+ -+ if (version.major < 4) - { -- struct vkd3d_shader_version version; -- D3DDECLUSAGE usage; -+ enum vkd3d_decl_usage usage; - uint32_t usage_idx; - - /* ps_1_* outputs are special and go in temp register 0. */ -- if (ctx->profile->major_version == 1 && output && ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL) -+ if (version.major == 1 && output && version.type == VKD3D_SHADER_TYPE_PIXEL) - return; - -- version.major = ctx->profile->major_version; -- version.minor = ctx->profile->minor_version; -- version.type = ctx->profile->type; -- builtin = hlsl_sm1_register_from_semantic(&version, -+ builtin = sm1_register_from_semantic_name(&version, - var->semantic.name, var->semantic.index, output, &type, ®); -- if (!builtin && !hlsl_sm1_usage_from_semantic(var->semantic.name, var->semantic.index, &usage, &usage_idx)) -+ if (!builtin && !sm1_usage_from_semantic_name(var->semantic.name, var->semantic.index, &usage, &usage_idx)) - { - hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC, - "Invalid semantic '%s'.", var->semantic.name); -@@ -5152,50 +5345,72 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var - } - else - { -- D3D_NAME usage; -+ enum vkd3d_shader_sysval_semantic semantic; - bool has_idx; - -- if (!hlsl_sm4_usage_from_semantic(ctx, &var->semantic, output, &usage)) -+ if (!sm4_sysval_semantic_from_semantic_name(&semantic, &version, ctx->semantic_compat_mapping, -+ ctx->domain, var->semantic.name, var->semantic.index, output, is_patch_constant_func)) - { - hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC, - "Invalid semantic '%s'.", var->semantic.name); - return; - } -- if ((builtin = hlsl_sm4_register_from_semantic(ctx, &var->semantic, output, &type, &has_idx))) -+ -+ if ((builtin = sm4_register_from_semantic_name(&version, var->semantic.name, output, &type, &has_idx))) - reg = has_idx ? var->semantic.index : 0; -+ -+ if (semantic == VKD3D_SHADER_SV_TESS_FACTOR_TRIINT) -+ { -+ /* While SV_InsideTessFactor can be declared as 'float' for "tri" -+ * domains, it is allocated as if it was 'float[1]'. */ -+ var->force_align = true; -+ } - } - - if (builtin) - { -- TRACE("%s %s semantic %s[%u] matches predefined register %#x[%u].\n", shader_names[ctx->profile->type], -+ TRACE("%s %s semantic %s[%u] matches predefined register %#x[%u].\n", shader_names[version.type], - output ? "output" : "input", var->semantic.name, var->semantic.index, type, reg); - } - else - { -- var->regs[HLSL_REGSET_NUMERIC].allocated = true; -- var->regs[HLSL_REGSET_NUMERIC].id = (*counter)++; -- var->regs[HLSL_REGSET_NUMERIC].allocation_size = 1; -- var->regs[HLSL_REGSET_NUMERIC].writemask = (1 << var->data_type->dimx) - 1; -- TRACE("Allocated %s to %s.\n", var->name, debug_register(output ? 'o' : 'v', -- var->regs[HLSL_REGSET_NUMERIC], var->data_type)); -+ int mode = (ctx->profile->major_version < 4) -+ ? 0 : sm4_get_interpolation_mode(var->data_type, var->storage_modifiers); -+ unsigned int reg_size = optimize ? var->data_type->dimx : 4; -+ -+ var->regs[HLSL_REGSET_NUMERIC] = allocate_register(ctx, allocator, 1, -+ UINT_MAX, reg_size, var->data_type->dimx, mode, var->force_align); -+ -+ TRACE("Allocated %s to %s (mode %d).\n", var->name, debug_register(output ? 'o' : 'v', -+ var->regs[HLSL_REGSET_NUMERIC], var->data_type), mode); - } - } - --static void allocate_semantic_registers(struct hlsl_ctx *ctx) -+static void allocate_semantic_registers(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) - { -- unsigned int input_counter = 0, output_counter = 0; -+ struct register_allocator input_allocator = {0}, output_allocator = {0}; -+ bool is_vertex_shader = ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX; -+ bool is_pixel_shader = ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL; -+ bool is_patch_constant_func = entry_func == ctx->patch_constant_func; - struct hlsl_ir_var *var; - -- LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) -+ input_allocator.prioritize_smaller_writemasks = true; -+ output_allocator.prioritize_smaller_writemasks = true; -+ -+ LIST_FOR_EACH_ENTRY(var, &entry_func->extern_vars, struct hlsl_ir_var, extern_entry) - { - if (var->is_input_semantic) -- allocate_semantic_register(ctx, var, &input_counter, false); -+ allocate_semantic_register(ctx, var, &input_allocator, false, !is_vertex_shader, is_patch_constant_func); - if (var->is_output_semantic) -- allocate_semantic_register(ctx, var, &output_counter, true); -+ allocate_semantic_register(ctx, var, &output_allocator, true, !is_pixel_shader, is_patch_constant_func); - } -+ -+ vkd3d_free(input_allocator.allocations); -+ vkd3d_free(output_allocator.allocations); - } - --static const struct hlsl_buffer *get_reserved_buffer(struct hlsl_ctx *ctx, uint32_t space, uint32_t index) -+static const struct hlsl_buffer *get_reserved_buffer(struct hlsl_ctx *ctx, -+ uint32_t space, uint32_t index, bool allocated_only) - { - const struct hlsl_buffer *buffer; - -@@ -5203,7 +5418,12 @@ static const struct hlsl_buffer *get_reserved_buffer(struct hlsl_ctx *ctx, uint3 - { - if (buffer->reservation.reg_type == 'b' - && buffer->reservation.reg_space == space && buffer->reservation.reg_index == index) -+ { -+ if (allocated_only && !buffer->reg.allocated) -+ continue; -+ - return buffer; -+ } - } - return NULL; - } -@@ -5260,7 +5480,7 @@ static void hlsl_calculate_buffer_offset(struct hlsl_ctx *ctx, struct hlsl_ir_va - - TRACE("Allocated buffer offset %u to %s.\n", var->buffer_offset, var->name); - buffer->size = max(buffer->size, var->buffer_offset + var_reg_size); -- if (var->last_read) -+ if (var->is_read) - buffer->used_size = max(buffer->used_size, var->buffer_offset + var_reg_size); - } - -@@ -5386,8 +5606,8 @@ static void allocate_buffers(struct hlsl_ctx *ctx) - - if (reservation->reg_type == 'b') - { -- const struct hlsl_buffer *reserved_buffer = get_reserved_buffer(ctx, -- reservation->reg_space, reservation->reg_index); -+ const struct hlsl_buffer *allocated_buffer = get_reserved_buffer(ctx, -+ reservation->reg_space, reservation->reg_index, true); - unsigned int max_index = get_max_cbuffer_reg_index(ctx); - - if (buffer->reservation.reg_index > max_index) -@@ -5395,14 +5615,14 @@ static void allocate_buffers(struct hlsl_ctx *ctx) - "Buffer reservation cb%u exceeds target's maximum (cb%u).", - buffer->reservation.reg_index, max_index); - -- if (reserved_buffer && reserved_buffer != buffer) -+ if (allocated_buffer && allocated_buffer != buffer) - { - hlsl_error(ctx, &buffer->loc, VKD3D_SHADER_ERROR_HLSL_OVERLAPPING_RESERVATIONS, - "Multiple buffers bound to space %u, index %u.", - reservation->reg_space, reservation->reg_index); -- hlsl_note(ctx, &reserved_buffer->loc, VKD3D_SHADER_LOG_ERROR, -+ hlsl_note(ctx, &allocated_buffer->loc, VKD3D_SHADER_LOG_ERROR, - "Buffer %s is already bound to space %u, index %u.", -- reserved_buffer->name, reservation->reg_space, reservation->reg_index); -+ allocated_buffer->name, reservation->reg_space, reservation->reg_index); - } - - buffer->reg.space = reservation->reg_space; -@@ -5419,12 +5639,12 @@ static void allocate_buffers(struct hlsl_ctx *ctx) - else if (!reservation->reg_type) - { - unsigned int max_index = get_max_cbuffer_reg_index(ctx); -- while (get_reserved_buffer(ctx, 0, index)) -+ while (get_reserved_buffer(ctx, 0, index, false)) - ++index; - - if (index > max_index) - hlsl_error(ctx, &buffer->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, -- "Too many buffers allocated, target's maximum is %u.", max_index); -+ "Too many buffers reserved, target's maximum is %u.", max_index); - - buffer->reg.space = 0; - buffer->reg.index = index; -@@ -5491,15 +5711,15 @@ static const struct hlsl_ir_var *get_allocated_object(struct hlsl_ctx *ctx, enum - return NULL; - } - --static void allocate_objects(struct hlsl_ctx *ctx, enum hlsl_regset regset) -+static void allocate_objects(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func, enum hlsl_regset regset) - { - char regset_name = get_regset_name(regset); - uint32_t min_index = 0, id = 0; - struct hlsl_ir_var *var; - -- if (regset == HLSL_REGSET_UAVS) -+ if (regset == HLSL_REGSET_UAVS && ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL) - { -- LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) -+ LIST_FOR_EACH_ENTRY(var, &func->extern_vars, struct hlsl_ir_var, extern_entry) - { - if (var->semantic.name && (!ascii_strcasecmp(var->semantic.name, "color") - || !ascii_strcasecmp(var->semantic.name, "sv_target"))) -@@ -5786,6 +6006,26 @@ struct hlsl_reg hlsl_reg_from_deref(struct hlsl_ctx *ctx, const struct hlsl_dere - return ret; - } - -+static const char *get_string_argument_value(struct hlsl_ctx *ctx, const struct hlsl_attribute *attr, unsigned int i) -+{ -+ const struct hlsl_ir_node *instr = attr->args[i].node; -+ const struct hlsl_type *type = instr->data_type; -+ -+ if (type->class != HLSL_CLASS_STRING) -+ { -+ struct vkd3d_string_buffer *string; -+ -+ if ((string = hlsl_type_to_string(ctx, type))) -+ hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -+ "Wrong type for the argument %u of [%s]: expected string, but got %s.", -+ i, attr->name, string->buffer); -+ hlsl_release_string_buffer(ctx, string); -+ return NULL; -+ } -+ -+ return hlsl_ir_string_constant(instr)->string; -+} -+ - static void parse_numthreads_attribute(struct hlsl_ctx *ctx, const struct hlsl_attribute *attr) - { - unsigned int i; -@@ -5834,207 +6074,2961 @@ static void parse_numthreads_attribute(struct hlsl_ctx *ctx, const struct hlsl_a - } - } - --static void remove_unreachable_code(struct hlsl_ctx *ctx, struct hlsl_block *body) -+static void parse_domain_attribute(struct hlsl_ctx *ctx, const struct hlsl_attribute *attr) - { -- struct hlsl_ir_node *instr, *next; -- struct hlsl_block block; -- struct list *start; -+ const char *value; - -- LIST_FOR_EACH_ENTRY_SAFE(instr, next, &body->instrs, struct hlsl_ir_node, entry) -+ if (attr->args_count != 1) - { -- if (instr->type == HLSL_IR_IF) -- { -- struct hlsl_ir_if *iff = hlsl_ir_if(instr); -+ hlsl_error(ctx, &attr->loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, -+ "Expected 1 parameter for [domain] attribute, but got %u.", attr->args_count); -+ return; -+ } - -- remove_unreachable_code(ctx, &iff->then_block); -- remove_unreachable_code(ctx, &iff->else_block); -- } -- else if (instr->type == HLSL_IR_LOOP) -- { -- struct hlsl_ir_loop *loop = hlsl_ir_loop(instr); -+ if (!(value = get_string_argument_value(ctx, attr, 0))) -+ return; - -- remove_unreachable_code(ctx, &loop->body); -- } -- else if (instr->type == HLSL_IR_SWITCH) -- { -- struct hlsl_ir_switch *s = hlsl_ir_switch(instr); -- struct hlsl_ir_switch_case *c; -+ if (!strcmp(value, "isoline")) -+ ctx->domain = VKD3D_TESSELLATOR_DOMAIN_LINE; -+ else if (!strcmp(value, "tri")) -+ ctx->domain = VKD3D_TESSELLATOR_DOMAIN_TRIANGLE; -+ else if (!strcmp(value, "quad")) -+ ctx->domain = VKD3D_TESSELLATOR_DOMAIN_QUAD; -+ else -+ hlsl_error(ctx, &attr->args[0].node->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_DOMAIN, -+ "Invalid tessellator domain \"%s\": expected \"isoline\", \"tri\", or \"quad\".", -+ value); -+} - -- LIST_FOR_EACH_ENTRY(c, &s->cases, struct hlsl_ir_switch_case, entry) -- { -- remove_unreachable_code(ctx, &c->body); -- } -- } -- } -+static void parse_outputcontrolpoints_attribute(struct hlsl_ctx *ctx, const struct hlsl_attribute *attr) -+{ -+ const struct hlsl_ir_node *instr; -+ const struct hlsl_type *type; -+ const struct hlsl_ir_constant *constant; - -- /* Remove instructions past unconditional jumps. */ -- LIST_FOR_EACH_ENTRY(instr, &body->instrs, struct hlsl_ir_node, entry) -+ if (attr->args_count != 1) - { -- struct hlsl_ir_jump *jump; -- -- if (instr->type != HLSL_IR_JUMP) -- continue; -+ hlsl_error(ctx, &attr->loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, -+ "Expected 1 parameter for [outputcontrolpoints] attribute, but got %u.", attr->args_count); -+ return; -+ } - -- jump = hlsl_ir_jump(instr); -- if (jump->type != HLSL_IR_JUMP_BREAK && jump->type != HLSL_IR_JUMP_CONTINUE) -- continue; -+ instr = attr->args[0].node; -+ type = instr->data_type; - -- if (!(start = list_next(&body->instrs, &instr->entry))) -- break; -+ if (type->class != HLSL_CLASS_SCALAR -+ || (type->e.numeric.type != HLSL_TYPE_INT && type->e.numeric.type != HLSL_TYPE_UINT)) -+ { -+ struct vkd3d_string_buffer *string; - -- hlsl_block_init(&block); -- list_move_slice_tail(&block.instrs, start, list_tail(&body->instrs)); -- hlsl_block_cleanup(&block); -+ if ((string = hlsl_type_to_string(ctx, type))) -+ hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -+ "Wrong type for argument 0 of [outputcontrolpoints]: expected int or uint, but got %s.", -+ string->buffer); -+ hlsl_release_string_buffer(ctx, string); -+ return; -+ } - -- break; -+ if (instr->type != HLSL_IR_CONSTANT) -+ { -+ hlsl_fixme(ctx, &instr->loc, "Non-constant expression in [outputcontrolpoints] initializer."); -+ return; - } -+ constant = hlsl_ir_constant(instr); -+ -+ if ((type->e.numeric.type == HLSL_TYPE_INT && constant->value.u[0].i < 0) -+ || constant->value.u[0].u > 32) -+ hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_CONTROL_POINT_COUNT, -+ "Output control point count must be between 0 and 32."); -+ -+ ctx->output_control_point_count = constant->value.u[0].u; - } - --void hlsl_run_const_passes(struct hlsl_ctx *ctx, struct hlsl_block *body) -+static void parse_outputtopology_attribute(struct hlsl_ctx *ctx, const struct hlsl_attribute *attr) - { -- bool progress; -- -- lower_ir(ctx, lower_matrix_swizzles, body); -- lower_ir(ctx, lower_index_loads, body); -+ const char *value; - -- lower_ir(ctx, lower_broadcasts, body); -- while (hlsl_transform_ir(ctx, fold_redundant_casts, body, NULL)); -- do -+ if (attr->args_count != 1) - { -- progress = hlsl_transform_ir(ctx, split_array_copies, body, NULL); -- progress |= hlsl_transform_ir(ctx, split_struct_copies, body, NULL); -+ hlsl_error(ctx, &attr->loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, -+ "Expected 1 parameter for [outputtopology] attribute, but got %u.", attr->args_count); -+ return; - } -- while (progress); -- hlsl_transform_ir(ctx, split_matrix_copies, body, NULL); - -- lower_ir(ctx, lower_narrowing_casts, body); -- lower_ir(ctx, lower_int_dot, body); -- lower_ir(ctx, lower_int_division, body); -- lower_ir(ctx, lower_int_modulus, body); -- lower_ir(ctx, lower_int_abs, body); -- lower_ir(ctx, lower_casts_to_bool, body); -- lower_ir(ctx, lower_float_modulus, body); -- hlsl_transform_ir(ctx, fold_redundant_casts, body, NULL); -+ if (!(value = get_string_argument_value(ctx, attr, 0))) -+ return; - -- do -- { -- progress = hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, body, NULL); -- progress |= hlsl_transform_ir(ctx, hlsl_fold_constant_identities, body, NULL); -- progress |= hlsl_transform_ir(ctx, hlsl_fold_constant_swizzles, body, NULL); -- progress |= hlsl_copy_propagation_execute(ctx, body); -- progress |= hlsl_transform_ir(ctx, fold_swizzle_chains, body, NULL); -- progress |= hlsl_transform_ir(ctx, remove_trivial_swizzles, body, NULL); -- progress |= hlsl_transform_ir(ctx, remove_trivial_conditional_branches, body, NULL); -- } while (progress); -+ if (!strcmp(value, "point")) -+ ctx->output_primitive = VKD3D_SHADER_TESSELLATOR_OUTPUT_POINT; -+ else if (!strcmp(value, "line")) -+ ctx->output_primitive = VKD3D_SHADER_TESSELLATOR_OUTPUT_LINE; -+ else if (!strcmp(value, "triangle_cw")) -+ ctx->output_primitive = VKD3D_SHADER_TESSELLATOR_OUTPUT_TRIANGLE_CW; -+ else if (!strcmp(value, "triangle_ccw")) -+ ctx->output_primitive = VKD3D_SHADER_TESSELLATOR_OUTPUT_TRIANGLE_CCW; -+ else -+ hlsl_error(ctx, &attr->args[0].node->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_OUTPUT_PRIMITIVE, -+ "Invalid tessellator output topology \"%s\": " -+ "expected \"point\", \"line\", \"triangle_cw\", or \"triangle_ccw\".", value); - } - --static void sm1_generate_vsir_signature_entry(struct hlsl_ctx *ctx, -- struct vsir_program *program, bool output, struct hlsl_ir_var *var) -+static void parse_partitioning_attribute(struct hlsl_ctx *ctx, const struct hlsl_attribute *attr) - { -- enum vkd3d_shader_sysval_semantic sysval = VKD3D_SHADER_SV_NONE; -- enum vkd3d_shader_register_type type; -- struct shader_signature *signature; -- struct signature_element *element; -- unsigned int register_index, mask; -+ const char *value; - -- if ((!output && !var->last_read) || (output && !var->first_write)) -+ if (attr->args_count != 1) -+ { -+ hlsl_error(ctx, &attr->loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, -+ "Expected 1 parameter for [partitioning] attribute, but got %u.", attr->args_count); -+ return; -+ } -+ -+ if (!(value = get_string_argument_value(ctx, attr, 0))) -+ return; -+ -+ if (!strcmp(value, "integer")) -+ ctx->partitioning = VKD3D_SHADER_TESSELLATOR_PARTITIONING_INTEGER; -+ else if (!strcmp(value, "pow2")) -+ ctx->partitioning = VKD3D_SHADER_TESSELLATOR_PARTITIONING_POW2; -+ else if (!strcmp(value, "fractional_even")) -+ ctx->partitioning = VKD3D_SHADER_TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN; -+ else if (!strcmp(value, "fractional_odd")) -+ ctx->partitioning = VKD3D_SHADER_TESSELLATOR_PARTITIONING_FRACTIONAL_ODD; -+ else -+ hlsl_error(ctx, &attr->args[0].node->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_PARTITIONING, -+ "Invalid tessellator partitioning \"%s\": " -+ "expected \"integer\", \"pow2\", \"fractional_even\", or \"fractional_odd\".", value); -+} -+ -+static void parse_patchconstantfunc_attribute(struct hlsl_ctx *ctx, const struct hlsl_attribute *attr) -+{ -+ const char *name; -+ struct hlsl_ir_function *func; -+ struct hlsl_ir_function_decl *decl; -+ -+ if (attr->args_count != 1) -+ { -+ hlsl_error(ctx, &attr->loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, -+ "Expected 1 parameter for [patchconstantfunc] attribute, but got %u.", attr->args_count); -+ return; -+ } -+ -+ if (!(name = get_string_argument_value(ctx, attr, 0))) -+ return; -+ -+ ctx->patch_constant_func = NULL; -+ if ((func = hlsl_get_function(ctx, name))) -+ { -+ /* Pick the last overload with a body. */ -+ LIST_FOR_EACH_ENTRY_REV(decl, &func->overloads, struct hlsl_ir_function_decl, entry) -+ { -+ if (decl->has_body) -+ { -+ ctx->patch_constant_func = decl; -+ break; -+ } -+ } -+ } -+ -+ if (!ctx->patch_constant_func) -+ hlsl_error(ctx, &attr->loc, VKD3D_SHADER_ERROR_HLSL_NOT_DEFINED, -+ "Patch constant function \"%s\" is not defined.", name); -+} -+ -+static void parse_entry_function_attributes(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) -+{ -+ const struct hlsl_profile_info *profile = ctx->profile; -+ unsigned int i; -+ -+ for (i = 0; i < entry_func->attr_count; ++i) -+ { -+ const struct hlsl_attribute *attr = entry_func->attrs[i]; -+ -+ if (!strcmp(attr->name, "numthreads") && profile->type == VKD3D_SHADER_TYPE_COMPUTE) -+ parse_numthreads_attribute(ctx, attr); -+ else if (!strcmp(attr->name, "domain") -+ && (profile->type == VKD3D_SHADER_TYPE_HULL || profile->type == VKD3D_SHADER_TYPE_DOMAIN)) -+ parse_domain_attribute(ctx, attr); -+ else if (!strcmp(attr->name, "outputcontrolpoints") && profile->type == VKD3D_SHADER_TYPE_HULL) -+ parse_outputcontrolpoints_attribute(ctx, attr); -+ else if (!strcmp(attr->name, "outputtopology") && profile->type == VKD3D_SHADER_TYPE_HULL) -+ parse_outputtopology_attribute(ctx, attr); -+ else if (!strcmp(attr->name, "partitioning") && profile->type == VKD3D_SHADER_TYPE_HULL) -+ parse_partitioning_attribute(ctx, attr); -+ else if (!strcmp(attr->name, "patchconstantfunc") && profile->type == VKD3D_SHADER_TYPE_HULL) -+ parse_patchconstantfunc_attribute(ctx, attr); -+ else if (!strcmp(attr->name, "earlydepthstencil") && profile->type == VKD3D_SHADER_TYPE_PIXEL) -+ entry_func->early_depth_test = true; -+ else -+ hlsl_warning(ctx, &entry_func->attrs[i]->loc, VKD3D_SHADER_WARNING_HLSL_UNKNOWN_ATTRIBUTE, -+ "Ignoring unknown attribute \"%s\".", entry_func->attrs[i]->name); -+ } -+} -+ -+static void validate_hull_shader_attributes(struct hlsl_ctx *ctx, const struct hlsl_ir_function_decl *entry_func) -+{ -+ if (ctx->domain == VKD3D_TESSELLATOR_DOMAIN_INVALID) -+ { -+ hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_ATTRIBUTE, -+ "Entry point \"%s\" is missing a [domain] attribute.", entry_func->func->name); -+ } -+ -+ if (ctx->output_control_point_count == UINT_MAX) -+ { -+ hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_ATTRIBUTE, -+ "Entry point \"%s\" is missing a [outputcontrolpoints] attribute.", entry_func->func->name); -+ } -+ -+ if (!ctx->output_primitive) -+ { -+ hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_ATTRIBUTE, -+ "Entry point \"%s\" is missing a [outputtopology] attribute.", entry_func->func->name); -+ } -+ -+ if (!ctx->partitioning) -+ { -+ hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_ATTRIBUTE, -+ "Entry point \"%s\" is missing a [partitioning] attribute.", entry_func->func->name); -+ } -+ -+ if (!ctx->patch_constant_func) -+ { -+ hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_ATTRIBUTE, -+ "Entry point \"%s\" is missing a [patchconstantfunc] attribute.", entry_func->func->name); -+ } -+ else if (ctx->patch_constant_func == entry_func) -+ { -+ hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_RECURSIVE_CALL, -+ "Patch constant function cannot be the entry point function."); -+ /* Native returns E_NOTIMPL instead of E_FAIL here. */ -+ ctx->result = VKD3D_ERROR_NOT_IMPLEMENTED; -+ return; -+ } -+ -+ switch (ctx->domain) -+ { -+ case VKD3D_TESSELLATOR_DOMAIN_LINE: -+ if (ctx->output_primitive == VKD3D_SHADER_TESSELLATOR_OUTPUT_TRIANGLE_CW -+ || ctx->output_primitive == VKD3D_SHADER_TESSELLATOR_OUTPUT_TRIANGLE_CCW) -+ hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_OUTPUT_PRIMITIVE, -+ "Triangle output topologies are not available for isoline domains."); -+ break; -+ -+ case VKD3D_TESSELLATOR_DOMAIN_TRIANGLE: -+ if (ctx->output_primitive == VKD3D_SHADER_TESSELLATOR_OUTPUT_LINE) -+ hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_OUTPUT_PRIMITIVE, -+ "Line output topologies are not available for triangle domains."); -+ break; -+ -+ case VKD3D_TESSELLATOR_DOMAIN_QUAD: -+ if (ctx->output_primitive == VKD3D_SHADER_TESSELLATOR_OUTPUT_LINE) -+ hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_OUTPUT_PRIMITIVE, -+ "Line output topologies are not available for quad domains."); -+ break; -+ -+ default: -+ break; -+ } -+} -+ -+static void remove_unreachable_code(struct hlsl_ctx *ctx, struct hlsl_block *body) -+{ -+ struct hlsl_ir_node *instr, *next; -+ struct hlsl_block block; -+ struct list *start; -+ -+ LIST_FOR_EACH_ENTRY_SAFE(instr, next, &body->instrs, struct hlsl_ir_node, entry) -+ { -+ if (instr->type == HLSL_IR_IF) -+ { -+ struct hlsl_ir_if *iff = hlsl_ir_if(instr); -+ -+ remove_unreachable_code(ctx, &iff->then_block); -+ remove_unreachable_code(ctx, &iff->else_block); -+ } -+ else if (instr->type == HLSL_IR_LOOP) -+ { -+ struct hlsl_ir_loop *loop = hlsl_ir_loop(instr); -+ -+ remove_unreachable_code(ctx, &loop->body); -+ } -+ else if (instr->type == HLSL_IR_SWITCH) -+ { -+ struct hlsl_ir_switch *s = hlsl_ir_switch(instr); -+ struct hlsl_ir_switch_case *c; -+ -+ LIST_FOR_EACH_ENTRY(c, &s->cases, struct hlsl_ir_switch_case, entry) -+ { -+ remove_unreachable_code(ctx, &c->body); -+ } -+ } -+ } -+ -+ /* Remove instructions past unconditional jumps. */ -+ LIST_FOR_EACH_ENTRY(instr, &body->instrs, struct hlsl_ir_node, entry) -+ { -+ struct hlsl_ir_jump *jump; -+ -+ if (instr->type != HLSL_IR_JUMP) -+ continue; -+ -+ jump = hlsl_ir_jump(instr); -+ if (jump->type != HLSL_IR_JUMP_BREAK && jump->type != HLSL_IR_JUMP_CONTINUE) -+ continue; -+ -+ if (!(start = list_next(&body->instrs, &instr->entry))) -+ break; -+ -+ hlsl_block_init(&block); -+ list_move_slice_tail(&block.instrs, start, list_tail(&body->instrs)); -+ hlsl_block_cleanup(&block); -+ -+ break; -+ } -+} -+ -+void hlsl_lower_index_loads(struct hlsl_ctx *ctx, struct hlsl_block *body) -+{ -+ lower_ir(ctx, lower_index_loads, body); -+} -+ -+void hlsl_run_const_passes(struct hlsl_ctx *ctx, struct hlsl_block *body) -+{ -+ bool progress; -+ -+ lower_ir(ctx, lower_matrix_swizzles, body); -+ -+ lower_ir(ctx, lower_broadcasts, body); -+ while (hlsl_transform_ir(ctx, fold_redundant_casts, body, NULL)); -+ do -+ { -+ progress = hlsl_transform_ir(ctx, split_array_copies, body, NULL); -+ progress |= hlsl_transform_ir(ctx, split_struct_copies, body, NULL); -+ } -+ while (progress); -+ hlsl_transform_ir(ctx, split_matrix_copies, body, NULL); -+ -+ lower_ir(ctx, lower_narrowing_casts, body); -+ lower_ir(ctx, lower_int_dot, body); -+ lower_ir(ctx, lower_int_division, body); -+ lower_ir(ctx, lower_int_modulus, body); -+ lower_ir(ctx, lower_int_abs, body); -+ lower_ir(ctx, lower_casts_to_bool, body); -+ lower_ir(ctx, lower_float_modulus, body); -+ hlsl_transform_ir(ctx, fold_redundant_casts, body, NULL); -+ -+ do -+ { -+ progress = hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, body, NULL); -+ progress |= hlsl_transform_ir(ctx, hlsl_fold_constant_identities, body, NULL); -+ progress |= hlsl_transform_ir(ctx, hlsl_fold_constant_swizzles, body, NULL); -+ progress |= hlsl_copy_propagation_execute(ctx, body); -+ progress |= hlsl_transform_ir(ctx, fold_swizzle_chains, body, NULL); -+ progress |= hlsl_transform_ir(ctx, remove_trivial_swizzles, body, NULL); -+ progress |= hlsl_transform_ir(ctx, remove_trivial_conditional_branches, body, NULL); -+ } while (progress); -+} -+ -+static void generate_vsir_signature_entry(struct hlsl_ctx *ctx, struct vsir_program *program, -+ struct shader_signature *signature, bool output, bool is_patch_constant_func, struct hlsl_ir_var *var) -+{ -+ enum vkd3d_shader_sysval_semantic sysval = VKD3D_SHADER_SV_NONE; -+ enum vkd3d_shader_component_type component_type; -+ unsigned int register_index, mask, use_mask; -+ const char *name = var->semantic.name; -+ enum vkd3d_shader_register_type type; -+ struct signature_element *element; -+ -+ if (hlsl_version_ge(ctx, 4, 0)) -+ { -+ struct vkd3d_string_buffer *string; -+ bool has_idx, ret; -+ -+ ret = sm4_sysval_semantic_from_semantic_name(&sysval, &program->shader_version, ctx->semantic_compat_mapping, -+ ctx->domain, var->semantic.name, var->semantic.index, output, is_patch_constant_func); -+ VKD3D_ASSERT(ret); -+ if (sysval == ~0u) -+ return; -+ -+ if (sm4_register_from_semantic_name(&program->shader_version, var->semantic.name, output, &type, &has_idx)) -+ { -+ register_index = has_idx ? var->semantic.index : ~0u; -+ mask = (1u << var->data_type->dimx) - 1; -+ } -+ else -+ { -+ VKD3D_ASSERT(var->regs[HLSL_REGSET_NUMERIC].allocated); -+ register_index = var->regs[HLSL_REGSET_NUMERIC].id; -+ mask = var->regs[HLSL_REGSET_NUMERIC].writemask; -+ } -+ -+ use_mask = mask; /* FIXME: retrieve use mask accurately. */ -+ -+ switch (var->data_type->e.numeric.type) -+ { -+ case HLSL_TYPE_FLOAT: -+ case HLSL_TYPE_HALF: -+ component_type = VKD3D_SHADER_COMPONENT_FLOAT; -+ break; -+ -+ case HLSL_TYPE_INT: -+ component_type = VKD3D_SHADER_COMPONENT_INT; -+ break; -+ -+ case HLSL_TYPE_BOOL: -+ case HLSL_TYPE_UINT: -+ component_type = VKD3D_SHADER_COMPONENT_UINT; -+ break; -+ -+ default: -+ if ((string = hlsl_type_to_string(ctx, var->data_type))) -+ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -+ "Invalid data type %s for semantic variable %s.", string->buffer, var->name); -+ hlsl_release_string_buffer(ctx, string); -+ component_type = VKD3D_SHADER_COMPONENT_VOID; -+ break; -+ } -+ -+ if (sysval == VKD3D_SHADER_SV_TARGET && !ascii_strcasecmp(name, "color")) -+ name = "SV_Target"; -+ else if (sysval == VKD3D_SHADER_SV_DEPTH && !ascii_strcasecmp(name, "depth")) -+ name ="SV_Depth"; -+ else if (sysval == VKD3D_SHADER_SV_POSITION && !ascii_strcasecmp(name, "position")) -+ name = "SV_Position"; -+ } -+ else -+ { -+ if ((!output && !var->last_read) || (output && !var->first_write)) -+ return; -+ -+ if (!sm1_register_from_semantic_name(&program->shader_version, -+ var->semantic.name, var->semantic.index, output, &type, ®ister_index)) -+ { -+ enum vkd3d_decl_usage usage; -+ unsigned int usage_idx; -+ bool ret; -+ -+ register_index = var->regs[HLSL_REGSET_NUMERIC].id; -+ -+ ret = sm1_usage_from_semantic_name(var->semantic.name, var->semantic.index, &usage, &usage_idx); -+ VKD3D_ASSERT(ret); -+ /* With the exception of vertex POSITION output, none of these are -+ * system values. Pixel POSITION input is not equivalent to -+ * SV_Position; the closer equivalent is VPOS, which is not declared -+ * as a semantic. */ -+ if (program->shader_version.type == VKD3D_SHADER_TYPE_VERTEX -+ && output && usage == VKD3D_DECL_USAGE_POSITION) -+ sysval = VKD3D_SHADER_SV_POSITION; -+ } -+ -+ mask = (1 << var->data_type->dimx) - 1; -+ -+ if (!ascii_strcasecmp(var->semantic.name, "PSIZE") && output -+ && program->shader_version.type == VKD3D_SHADER_TYPE_VERTEX) -+ { -+ if (var->data_type->dimx > 1) -+ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC, -+ "PSIZE output must have only 1 component in this shader model."); -+ /* For some reason the writemask has all components set. */ -+ mask = VKD3DSP_WRITEMASK_ALL; -+ } -+ if (!ascii_strcasecmp(var->semantic.name, "FOG") && output && program->shader_version.major < 3 -+ && program->shader_version.type == VKD3D_SHADER_TYPE_VERTEX && var->data_type->dimx > 1) -+ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC, -+ "FOG output must have only 1 component in this shader model."); -+ -+ use_mask = mask; /* FIXME: retrieve use mask accurately. */ -+ component_type = VKD3D_SHADER_COMPONENT_FLOAT; -+ } -+ -+ if (!vkd3d_array_reserve((void **)&signature->elements, &signature->elements_capacity, -+ signature->element_count + 1, sizeof(*signature->elements))) -+ { -+ ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; -+ return; -+ } -+ element = &signature->elements[signature->element_count++]; -+ memset(element, 0, sizeof(*element)); -+ -+ if (!(element->semantic_name = vkd3d_strdup(name))) -+ { -+ --signature->element_count; -+ ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; -+ return; -+ } -+ element->semantic_index = var->semantic.index; -+ element->sysval_semantic = sysval; -+ element->component_type = component_type; -+ element->register_index = register_index; -+ element->target_location = register_index; -+ element->register_count = 1; -+ element->mask = mask; -+ element->used_mask = use_mask; -+ if (program->shader_version.type == VKD3D_SHADER_TYPE_PIXEL && !output) -+ element->interpolation_mode = VKD3DSIM_LINEAR; -+} -+ -+static void generate_vsir_signature(struct hlsl_ctx *ctx, -+ struct vsir_program *program, struct hlsl_ir_function_decl *func) -+{ -+ bool is_domain = program->shader_version.type == VKD3D_SHADER_TYPE_DOMAIN; -+ bool is_patch_constant_func = func == ctx->patch_constant_func; -+ struct hlsl_ir_var *var; -+ -+ LIST_FOR_EACH_ENTRY(var, &func->extern_vars, struct hlsl_ir_var, extern_entry) -+ { -+ if (var->is_input_semantic) -+ { -+ if (is_patch_constant_func) -+ generate_vsir_signature_entry(ctx, program, &program->patch_constant_signature, false, true, var); -+ else if (is_domain) -+ generate_vsir_signature_entry(ctx, program, &program->patch_constant_signature, false, false, var); -+ else -+ generate_vsir_signature_entry(ctx, program, &program->input_signature, false, false, var); -+ } -+ if (var->is_output_semantic) -+ { -+ if (is_patch_constant_func) -+ generate_vsir_signature_entry(ctx, program, &program->patch_constant_signature, true, true, var); -+ else -+ generate_vsir_signature_entry(ctx, program, &program->output_signature, true, false, var); -+ } -+ } -+} -+ -+static enum vkd3d_data_type vsir_data_type_from_hlsl_type(struct hlsl_ctx *ctx, const struct hlsl_type *type) -+{ -+ if (hlsl_version_lt(ctx, 4, 0)) -+ return VKD3D_DATA_FLOAT; -+ -+ if (type->class == HLSL_CLASS_ARRAY) -+ return vsir_data_type_from_hlsl_type(ctx, type->e.array.type); -+ if (type->class == HLSL_CLASS_STRUCT) -+ return VKD3D_DATA_MIXED; -+ if (type->class <= HLSL_CLASS_LAST_NUMERIC) -+ { -+ switch (type->e.numeric.type) -+ { -+ case HLSL_TYPE_DOUBLE: -+ return VKD3D_DATA_DOUBLE; -+ case HLSL_TYPE_FLOAT: -+ return VKD3D_DATA_FLOAT; -+ case HLSL_TYPE_HALF: -+ return VKD3D_DATA_HALF; -+ case HLSL_TYPE_INT: -+ return VKD3D_DATA_INT; -+ case HLSL_TYPE_UINT: -+ case HLSL_TYPE_BOOL: -+ return VKD3D_DATA_UINT; -+ } -+ } -+ -+ vkd3d_unreachable(); -+} -+ -+static enum vkd3d_data_type vsir_data_type_from_hlsl_instruction(struct hlsl_ctx *ctx, -+ const struct hlsl_ir_node *instr) -+{ -+ return vsir_data_type_from_hlsl_type(ctx, instr->data_type); -+} -+ -+static uint32_t generate_vsir_get_src_swizzle(uint32_t src_writemask, uint32_t dst_writemask) -+{ -+ uint32_t swizzle; -+ -+ swizzle = hlsl_swizzle_from_writemask(src_writemask); -+ swizzle = hlsl_map_swizzle(swizzle, dst_writemask); -+ swizzle = vsir_swizzle_from_hlsl(swizzle); -+ return swizzle; -+} -+ -+static void sm1_generate_vsir_constant_defs(struct hlsl_ctx *ctx, struct vsir_program *program, -+ struct hlsl_block *block) -+{ -+ struct vkd3d_shader_instruction_array *instructions = &program->instructions; -+ struct vkd3d_shader_dst_param *dst_param; -+ struct vkd3d_shader_src_param *src_param; -+ struct vkd3d_shader_instruction *ins; -+ unsigned int i, x; -+ -+ for (i = 0; i < ctx->constant_defs.count; ++i) -+ { -+ const struct hlsl_constant_register *constant_reg = &ctx->constant_defs.regs[i]; -+ -+ if (!shader_instruction_array_reserve(instructions, instructions->count + 1)) -+ { -+ ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; -+ return; -+ } -+ -+ ins = &instructions->elements[instructions->count]; -+ if (!vsir_instruction_init_with_params(program, ins, &constant_reg->loc, VKD3DSIH_DEF, 1, 1)) -+ { -+ ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; -+ return; -+ } -+ ++instructions->count; -+ -+ dst_param = &ins->dst[0]; -+ vsir_register_init(&dst_param->reg, VKD3DSPR_CONST, VKD3D_DATA_FLOAT, 1); -+ ins->dst[0].reg.dimension = VSIR_DIMENSION_VEC4; -+ ins->dst[0].reg.idx[0].offset = constant_reg->index; -+ ins->dst[0].write_mask = VKD3DSP_WRITEMASK_ALL; -+ -+ src_param = &ins->src[0]; -+ vsir_register_init(&src_param->reg, VKD3DSPR_IMMCONST, VKD3D_DATA_FLOAT, 0); -+ src_param->reg.type = VKD3DSPR_IMMCONST; -+ src_param->reg.precision = VKD3D_SHADER_REGISTER_PRECISION_DEFAULT; -+ src_param->reg.non_uniform = false; -+ src_param->reg.data_type = VKD3D_DATA_FLOAT; -+ src_param->reg.dimension = VSIR_DIMENSION_VEC4; -+ for (x = 0; x < 4; ++x) -+ src_param->reg.u.immconst_f32[x] = constant_reg->value.f[x]; -+ src_param->swizzle = VKD3D_SHADER_NO_SWIZZLE; -+ } -+} -+ -+static void sm1_generate_vsir_sampler_dcls(struct hlsl_ctx *ctx, -+ struct vsir_program *program, struct hlsl_block *block) -+{ -+ struct vkd3d_shader_instruction_array *instructions = &program->instructions; -+ enum vkd3d_shader_resource_type resource_type; -+ struct vkd3d_shader_register_range *range; -+ struct vkd3d_shader_dst_param *dst_param; -+ struct vkd3d_shader_semantic *semantic; -+ struct vkd3d_shader_instruction *ins; -+ enum hlsl_sampler_dim sampler_dim; -+ struct hlsl_ir_var *var; -+ unsigned int i, count; -+ -+ LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) -+ { -+ if (!var->regs[HLSL_REGSET_SAMPLERS].allocated) -+ continue; -+ -+ count = var->bind_count[HLSL_REGSET_SAMPLERS]; -+ for (i = 0; i < count; ++i) -+ { -+ if (var->objects_usage[HLSL_REGSET_SAMPLERS][i].used) -+ { -+ sampler_dim = var->objects_usage[HLSL_REGSET_SAMPLERS][i].sampler_dim; -+ -+ switch (sampler_dim) -+ { -+ case HLSL_SAMPLER_DIM_2D: -+ resource_type = VKD3D_SHADER_RESOURCE_TEXTURE_2D; -+ break; -+ -+ case HLSL_SAMPLER_DIM_CUBE: -+ resource_type = VKD3D_SHADER_RESOURCE_TEXTURE_CUBE; -+ break; -+ -+ case HLSL_SAMPLER_DIM_3D: -+ resource_type = VKD3D_SHADER_RESOURCE_TEXTURE_3D; -+ break; -+ -+ case HLSL_SAMPLER_DIM_GENERIC: -+ /* These can appear in sm4-style combined sample instructions. */ -+ hlsl_fixme(ctx, &var->loc, "Generic samplers need to be lowered."); -+ continue; -+ -+ default: -+ vkd3d_unreachable(); -+ break; -+ } -+ -+ if (!shader_instruction_array_reserve(instructions, instructions->count + 1)) -+ { -+ ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; -+ return; -+ } -+ -+ ins = &instructions->elements[instructions->count]; -+ if (!vsir_instruction_init_with_params(program, ins, &var->loc, VKD3DSIH_DCL, 0, 0)) -+ { -+ ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; -+ return; -+ } -+ ++instructions->count; -+ -+ semantic = &ins->declaration.semantic; -+ semantic->resource_type = resource_type; -+ -+ dst_param = &semantic->resource.reg; -+ vsir_register_init(&dst_param->reg, VKD3DSPR_SAMPLER, VKD3D_DATA_FLOAT, 1); -+ dst_param->reg.dimension = VSIR_DIMENSION_NONE; -+ dst_param->reg.idx[0].offset = var->regs[HLSL_REGSET_SAMPLERS].index + i; -+ dst_param->write_mask = 0; -+ range = &semantic->resource.range; -+ range->space = 0; -+ range->first = range->last = dst_param->reg.idx[0].offset; -+ } -+ } -+ } -+} -+ -+static struct vkd3d_shader_instruction *generate_vsir_add_program_instruction( -+ struct hlsl_ctx *ctx, struct vsir_program *program, -+ const struct vkd3d_shader_location *loc, enum vkd3d_shader_opcode opcode, -+ unsigned int dst_count, unsigned int src_count) -+{ -+ struct vkd3d_shader_instruction_array *instructions = &program->instructions; -+ struct vkd3d_shader_instruction *ins; -+ -+ if (!shader_instruction_array_reserve(instructions, instructions->count + 1)) -+ { -+ ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; -+ return NULL; -+ } -+ ins = &instructions->elements[instructions->count]; -+ if (!vsir_instruction_init_with_params(program, ins, loc, opcode, dst_count, src_count)) -+ { -+ ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; -+ return NULL; -+ } -+ ++instructions->count; -+ return ins; -+} -+ -+static void vsir_src_from_hlsl_constant_value(struct vkd3d_shader_src_param *src, -+ struct hlsl_ctx *ctx, const struct hlsl_constant_value *value, -+ enum vkd3d_data_type type, unsigned int width, unsigned int map_writemask) -+{ -+ unsigned int i, j; -+ -+ vsir_src_param_init(src, VKD3DSPR_IMMCONST, type, 0); -+ if (width == 1) -+ { -+ src->reg.u.immconst_u32[0] = value->u[0].u; -+ return; -+ } -+ -+ src->reg.dimension = VSIR_DIMENSION_VEC4; -+ for (i = 0, j = 0; i < 4; ++i) -+ { -+ if ((map_writemask & (1u << i)) && (j < width)) -+ src->reg.u.immconst_u32[i] = value->u[j++].u; -+ else -+ src->reg.u.immconst_u32[i] = 0; -+ } -+} -+ -+static void vsir_src_from_hlsl_node(struct vkd3d_shader_src_param *src, -+ struct hlsl_ctx *ctx, const struct hlsl_ir_node *instr, uint32_t map_writemask) -+{ -+ struct hlsl_ir_constant *constant; -+ -+ if (hlsl_version_ge(ctx, 4, 0) && instr->type == HLSL_IR_CONSTANT) -+ { -+ /* In SM4 constants are inlined */ -+ constant = hlsl_ir_constant(instr); -+ vsir_src_from_hlsl_constant_value(src, ctx, &constant->value, -+ vsir_data_type_from_hlsl_instruction(ctx, instr), instr->data_type->dimx, map_writemask); -+ } -+ else -+ { -+ vsir_register_init(&src->reg, VKD3DSPR_TEMP, vsir_data_type_from_hlsl_instruction(ctx, instr), 1); -+ src->reg.idx[0].offset = instr->reg.id; -+ src->reg.dimension = VSIR_DIMENSION_VEC4; -+ src->swizzle = generate_vsir_get_src_swizzle(instr->reg.writemask, map_writemask); -+ } -+} -+ -+static bool sm4_generate_vsir_numeric_reg_from_deref(struct hlsl_ctx *ctx, struct vsir_program *program, -+ struct vkd3d_shader_register *reg, uint32_t *writemask, const struct hlsl_deref *deref) -+{ -+ const struct hlsl_ir_var *var = deref->var; -+ unsigned int offset_const_deref; -+ -+ reg->type = var->indexable ? VKD3DSPR_IDXTEMP : VKD3DSPR_TEMP; -+ reg->idx[0].offset = var->regs[HLSL_REGSET_NUMERIC].id; -+ reg->dimension = VSIR_DIMENSION_VEC4; -+ -+ VKD3D_ASSERT(var->regs[HLSL_REGSET_NUMERIC].allocated); -+ -+ if (!var->indexable) -+ { -+ offset_const_deref = hlsl_offset_from_deref_safe(ctx, deref); -+ reg->idx[0].offset += offset_const_deref / 4; -+ reg->idx_count = 1; -+ } -+ else -+ { -+ offset_const_deref = deref->const_offset; -+ reg->idx[1].offset = offset_const_deref / 4; -+ reg->idx_count = 2; -+ -+ if (deref->rel_offset.node) -+ { -+ struct vkd3d_shader_src_param *idx_src; -+ -+ if (!(idx_src = vsir_program_get_src_params(program, 1))) -+ { -+ ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; -+ return false; -+ } -+ memset(idx_src, 0, sizeof(*idx_src)); -+ reg->idx[1].rel_addr = idx_src; -+ -+ vsir_src_from_hlsl_node(idx_src, ctx, deref->rel_offset.node, VKD3DSP_WRITEMASK_ALL); -+ } -+ } -+ -+ *writemask = 0xf & (0xf << (offset_const_deref % 4)); -+ if (var->regs[HLSL_REGSET_NUMERIC].writemask) -+ *writemask = hlsl_combine_writemasks(var->regs[HLSL_REGSET_NUMERIC].writemask, *writemask); -+ return true; -+} -+ -+static bool sm4_generate_vsir_reg_from_deref(struct hlsl_ctx *ctx, struct vsir_program *program, -+ struct vkd3d_shader_register *reg, uint32_t *writemask, const struct hlsl_deref *deref) -+{ -+ const struct vkd3d_shader_version *version = &program->shader_version; -+ const struct hlsl_type *data_type = hlsl_deref_get_type(ctx, deref); -+ const struct hlsl_ir_var *var = deref->var; -+ -+ if (var->is_uniform) -+ { -+ enum hlsl_regset regset = hlsl_deref_get_regset(ctx, deref); -+ -+ if (regset == HLSL_REGSET_TEXTURES) -+ { -+ reg->type = VKD3DSPR_RESOURCE; -+ reg->dimension = VSIR_DIMENSION_VEC4; -+ if (vkd3d_shader_ver_ge(version, 5, 1)) -+ { -+ reg->idx[0].offset = var->regs[HLSL_REGSET_TEXTURES].id; -+ reg->idx[1].offset = var->regs[HLSL_REGSET_TEXTURES].index; /* FIXME: array index */ -+ reg->idx_count = 2; -+ } -+ else -+ { -+ reg->idx[0].offset = var->regs[HLSL_REGSET_TEXTURES].index; -+ reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref); -+ reg->idx_count = 1; -+ } -+ VKD3D_ASSERT(regset == HLSL_REGSET_TEXTURES); -+ *writemask = VKD3DSP_WRITEMASK_ALL; -+ } -+ else if (regset == HLSL_REGSET_UAVS) -+ { -+ reg->type = VKD3DSPR_UAV; -+ reg->dimension = VSIR_DIMENSION_VEC4; -+ if (vkd3d_shader_ver_ge(version, 5, 1)) -+ { -+ reg->idx[0].offset = var->regs[HLSL_REGSET_UAVS].id; -+ reg->idx[1].offset = var->regs[HLSL_REGSET_UAVS].index; /* FIXME: array index */ -+ reg->idx_count = 2; -+ } -+ else -+ { -+ reg->idx[0].offset = var->regs[HLSL_REGSET_UAVS].index; -+ reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref); -+ reg->idx_count = 1; -+ } -+ VKD3D_ASSERT(regset == HLSL_REGSET_UAVS); -+ *writemask = VKD3DSP_WRITEMASK_ALL; -+ } -+ else if (regset == HLSL_REGSET_SAMPLERS) -+ { -+ reg->type = VKD3DSPR_SAMPLER; -+ reg->dimension = VSIR_DIMENSION_NONE; -+ if (vkd3d_shader_ver_ge(version, 5, 1)) -+ { -+ reg->idx[0].offset = var->regs[HLSL_REGSET_SAMPLERS].id; -+ reg->idx[1].offset = var->regs[HLSL_REGSET_SAMPLERS].index; /* FIXME: array index */ -+ reg->idx_count = 2; -+ } -+ else -+ { -+ reg->idx[0].offset = var->regs[HLSL_REGSET_SAMPLERS].index; -+ reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref); -+ reg->idx_count = 1; -+ } -+ VKD3D_ASSERT(regset == HLSL_REGSET_SAMPLERS); -+ *writemask = VKD3DSP_WRITEMASK_ALL; -+ } -+ else -+ { -+ unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref) + var->buffer_offset; -+ -+ VKD3D_ASSERT(data_type->class <= HLSL_CLASS_VECTOR); -+ reg->type = VKD3DSPR_CONSTBUFFER; -+ reg->dimension = VSIR_DIMENSION_VEC4; -+ if (vkd3d_shader_ver_ge(version, 5, 1)) -+ { -+ reg->idx[0].offset = var->buffer->reg.id; -+ reg->idx[1].offset = var->buffer->reg.index; /* FIXME: array index */ -+ reg->idx[2].offset = offset / 4; -+ reg->idx_count = 3; -+ } -+ else -+ { -+ reg->idx[0].offset = var->buffer->reg.index; -+ reg->idx[1].offset = offset / 4; -+ reg->idx_count = 2; -+ } -+ *writemask = ((1u << data_type->dimx) - 1) << (offset & 3); -+ } -+ } -+ else if (var->is_input_semantic) -+ { -+ bool has_idx; -+ -+ if (sm4_register_from_semantic_name(version, var->semantic.name, false, ®->type, &has_idx)) -+ { -+ unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref); -+ -+ if (has_idx) -+ { -+ reg->idx[0].offset = var->semantic.index + offset / 4; -+ reg->idx_count = 1; -+ } -+ -+ if (shader_sm4_is_scalar_register(reg)) -+ reg->dimension = VSIR_DIMENSION_SCALAR; -+ else -+ reg->dimension = VSIR_DIMENSION_VEC4; -+ *writemask = ((1u << data_type->dimx) - 1) << (offset % 4); -+ } -+ else -+ { -+ struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref); -+ -+ VKD3D_ASSERT(hlsl_reg.allocated); -+ -+ if (version->type == VKD3D_SHADER_TYPE_DOMAIN) -+ reg->type = VKD3DSPR_PATCHCONST; -+ else -+ reg->type = VKD3DSPR_INPUT; -+ reg->dimension = VSIR_DIMENSION_VEC4; -+ reg->idx[0].offset = hlsl_reg.id; -+ reg->idx_count = 1; -+ *writemask = hlsl_reg.writemask; -+ } -+ } -+ else if (var->is_output_semantic) -+ { -+ bool has_idx; -+ -+ if (sm4_register_from_semantic_name(version, var->semantic.name, true, ®->type, &has_idx)) -+ { -+ unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref); -+ -+ if (has_idx) -+ { -+ reg->idx[0].offset = var->semantic.index + offset / 4; -+ reg->idx_count = 1; -+ } -+ -+ if (shader_sm4_is_scalar_register(reg)) -+ reg->dimension = VSIR_DIMENSION_SCALAR; -+ else -+ reg->dimension = VSIR_DIMENSION_VEC4; -+ *writemask = ((1u << data_type->dimx) - 1) << (offset % 4); -+ } -+ else -+ { -+ struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref); -+ -+ VKD3D_ASSERT(hlsl_reg.allocated); -+ reg->type = VKD3DSPR_OUTPUT; -+ reg->dimension = VSIR_DIMENSION_VEC4; -+ reg->idx[0].offset = hlsl_reg.id; -+ reg->idx_count = 1; -+ *writemask = hlsl_reg.writemask; -+ } -+ } -+ else -+ { -+ return sm4_generate_vsir_numeric_reg_from_deref(ctx, program, reg, writemask, deref); -+ } -+ return true; -+} -+ -+static bool sm4_generate_vsir_init_src_param_from_deref(struct hlsl_ctx *ctx, struct vsir_program *program, -+ struct vkd3d_shader_src_param *src_param, const struct hlsl_deref *deref, -+ unsigned int dst_writemask, const struct vkd3d_shader_location *loc) -+{ -+ uint32_t writemask; -+ -+ if (!sm4_generate_vsir_reg_from_deref(ctx, program, &src_param->reg, &writemask, deref)) -+ return false; -+ src_param->swizzle = generate_vsir_get_src_swizzle(writemask, dst_writemask); -+ return true; -+} -+ -+static bool sm4_generate_vsir_init_dst_param_from_deref(struct hlsl_ctx *ctx, struct vsir_program *program, -+ struct vkd3d_shader_dst_param *dst_param, const struct hlsl_deref *deref, -+ const struct vkd3d_shader_location *loc, unsigned int writemask) -+{ -+ uint32_t reg_writemask; -+ -+ if (!sm4_generate_vsir_reg_from_deref(ctx, program, &dst_param->reg, ®_writemask, deref)) -+ return false; -+ dst_param->write_mask = hlsl_combine_writemasks(reg_writemask, writemask); -+ return true; -+} -+ -+static void vsir_dst_from_hlsl_node(struct vkd3d_shader_dst_param *dst, -+ struct hlsl_ctx *ctx, const struct hlsl_ir_node *instr) -+{ -+ VKD3D_ASSERT(instr->reg.allocated); -+ vsir_dst_param_init(dst, VKD3DSPR_TEMP, vsir_data_type_from_hlsl_instruction(ctx, instr), 1); -+ dst->reg.idx[0].offset = instr->reg.id; -+ dst->reg.dimension = VSIR_DIMENSION_VEC4; -+ dst->write_mask = instr->reg.writemask; -+} -+ -+static void sm1_generate_vsir_instr_constant(struct hlsl_ctx *ctx, -+ struct vsir_program *program, struct hlsl_ir_constant *constant) -+{ -+ struct hlsl_ir_node *instr = &constant->node; -+ struct vkd3d_shader_dst_param *dst_param; -+ struct vkd3d_shader_src_param *src_param; -+ struct vkd3d_shader_instruction *ins; -+ -+ VKD3D_ASSERT(instr->reg.allocated); -+ VKD3D_ASSERT(constant->reg.allocated); -+ -+ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_MOV, 1, 1))) -+ return; -+ -+ src_param = &ins->src[0]; -+ vsir_register_init(&src_param->reg, VKD3DSPR_CONST, VKD3D_DATA_FLOAT, 1); -+ src_param->reg.idx[0].offset = constant->reg.id; -+ src_param->swizzle = generate_vsir_get_src_swizzle(constant->reg.writemask, instr->reg.writemask); -+ -+ dst_param = &ins->dst[0]; -+ vsir_register_init(&dst_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); -+ dst_param->reg.idx[0].offset = instr->reg.id; -+ dst_param->write_mask = instr->reg.writemask; -+} -+ -+static void sm4_generate_vsir_rasterizer_sample_count(struct hlsl_ctx *ctx, -+ struct vsir_program *program, struct hlsl_ir_expr *expr) -+{ -+ struct vkd3d_shader_src_param *src_param; -+ struct hlsl_ir_node *instr = &expr->node; -+ struct vkd3d_shader_instruction *ins; -+ -+ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_SAMPLE_INFO, 1, 1))) -+ return; -+ ins->flags = VKD3DSI_SAMPLE_INFO_UINT; -+ -+ vsir_dst_from_hlsl_node(&ins->dst[0], ctx, instr); -+ -+ src_param = &ins->src[0]; -+ vsir_src_param_init(src_param, VKD3DSPR_RASTERIZER, VKD3D_DATA_UNUSED, 0); -+ src_param->reg.dimension = VSIR_DIMENSION_VEC4; -+ src_param->swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X); -+} -+ -+/* Translate ops that can be mapped to a single vsir instruction with only one dst register. */ -+static void generate_vsir_instr_expr_single_instr_op(struct hlsl_ctx *ctx, -+ struct vsir_program *program, struct hlsl_ir_expr *expr, enum vkd3d_shader_opcode opcode, -+ uint32_t src_mod, uint32_t dst_mod, bool map_src_swizzles) -+{ -+ struct hlsl_ir_node *instr = &expr->node; -+ struct vkd3d_shader_dst_param *dst_param; -+ struct vkd3d_shader_src_param *src_param; -+ struct vkd3d_shader_instruction *ins; -+ unsigned int i, src_count = 0; -+ -+ VKD3D_ASSERT(instr->reg.allocated); -+ -+ for (i = 0; i < HLSL_MAX_OPERANDS; ++i) -+ { -+ if (expr->operands[i].node) -+ src_count = i + 1; -+ } -+ VKD3D_ASSERT(!src_mod || src_count == 1); -+ -+ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, opcode, 1, src_count))) -+ return; -+ -+ dst_param = &ins->dst[0]; -+ vsir_dst_from_hlsl_node(dst_param, ctx, instr); -+ dst_param->modifiers = dst_mod; -+ -+ for (i = 0; i < src_count; ++i) -+ { -+ struct hlsl_ir_node *operand = expr->operands[i].node; -+ -+ src_param = &ins->src[i]; -+ vsir_src_from_hlsl_node(src_param, ctx, operand, -+ map_src_swizzles ? dst_param->write_mask : VKD3DSP_WRITEMASK_ALL); -+ src_param->modifiers = src_mod; -+ } -+} -+ -+/* Translate ops that have 1 src and need one instruction for each component in -+ * the d3dbc backend. */ -+static void sm1_generate_vsir_instr_expr_per_component_instr_op(struct hlsl_ctx *ctx, -+ struct vsir_program *program, struct hlsl_ir_expr *expr, enum vkd3d_shader_opcode opcode) -+{ -+ struct hlsl_ir_node *operand = expr->operands[0].node; -+ struct hlsl_ir_node *instr = &expr->node; -+ struct vkd3d_shader_dst_param *dst_param; -+ struct vkd3d_shader_src_param *src_param; -+ struct vkd3d_shader_instruction *ins; -+ uint32_t src_swizzle; -+ unsigned int i, c; -+ -+ VKD3D_ASSERT(instr->reg.allocated); -+ VKD3D_ASSERT(operand); -+ -+ src_swizzle = generate_vsir_get_src_swizzle(operand->reg.writemask, instr->reg.writemask); -+ for (i = 0; i < 4; ++i) -+ { -+ if (instr->reg.writemask & (1u << i)) -+ { -+ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, opcode, 1, 1))) -+ return; -+ -+ dst_param = &ins->dst[0]; -+ vsir_register_init(&dst_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); -+ dst_param->reg.idx[0].offset = instr->reg.id; -+ dst_param->write_mask = 1u << i; -+ -+ src_param = &ins->src[0]; -+ vsir_register_init(&src_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); -+ src_param->reg.idx[0].offset = operand->reg.id; -+ c = vsir_swizzle_get_component(src_swizzle, i); -+ src_param->swizzle = vsir_swizzle_from_writemask(1u << c); -+ } -+ } -+} -+ -+static void sm1_generate_vsir_instr_expr_sincos(struct hlsl_ctx *ctx, struct vsir_program *program, -+ struct hlsl_ir_expr *expr) -+{ -+ struct hlsl_ir_node *operand = expr->operands[0].node; -+ struct hlsl_ir_node *instr = &expr->node; -+ struct vkd3d_shader_dst_param *dst_param; -+ struct vkd3d_shader_src_param *src_param; -+ struct vkd3d_shader_instruction *ins; -+ unsigned int src_count = 0; -+ -+ VKD3D_ASSERT(instr->reg.allocated); -+ src_count = (ctx->profile->major_version < 3) ? 3 : 1; -+ -+ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_SINCOS, 1, src_count))) -+ return; -+ -+ dst_param = &ins->dst[0]; -+ vsir_register_init(&dst_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); -+ dst_param->reg.idx[0].offset = instr->reg.id; -+ dst_param->write_mask = instr->reg.writemask; -+ -+ src_param = &ins->src[0]; -+ vsir_register_init(&src_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); -+ src_param->reg.idx[0].offset = operand->reg.id; -+ src_param->swizzle = generate_vsir_get_src_swizzle(operand->reg.writemask, VKD3DSP_WRITEMASK_ALL); -+ -+ if (ctx->profile->major_version < 3) -+ { -+ src_param = &ins->src[1]; -+ vsir_register_init(&src_param->reg, VKD3DSPR_CONST, VKD3D_DATA_FLOAT, 1); -+ src_param->reg.idx[0].offset = ctx->d3dsincosconst1.id; -+ src_param->swizzle = VKD3D_SHADER_NO_SWIZZLE; -+ -+ src_param = &ins->src[1]; -+ vsir_register_init(&src_param->reg, VKD3DSPR_CONST, VKD3D_DATA_FLOAT, 1); -+ src_param->reg.idx[0].offset = ctx->d3dsincosconst2.id; -+ src_param->swizzle = VKD3D_SHADER_NO_SWIZZLE; -+ } -+} -+ -+static bool sm1_generate_vsir_instr_expr_cast(struct hlsl_ctx *ctx, -+ struct vsir_program *program, struct hlsl_ir_expr *expr) -+{ -+ const struct hlsl_type *src_type, *dst_type; -+ const struct hlsl_ir_node *arg1, *instr; -+ -+ arg1 = expr->operands[0].node; -+ src_type = arg1->data_type; -+ instr = &expr->node; -+ dst_type = instr->data_type; -+ -+ /* Narrowing casts were already lowered. */ -+ VKD3D_ASSERT(src_type->dimx == dst_type->dimx); -+ -+ switch (dst_type->e.numeric.type) -+ { -+ case HLSL_TYPE_HALF: -+ case HLSL_TYPE_FLOAT: -+ switch (src_type->e.numeric.type) -+ { -+ case HLSL_TYPE_INT: -+ case HLSL_TYPE_UINT: -+ case HLSL_TYPE_BOOL: -+ /* Integrals are internally represented as floats, so no change is necessary.*/ -+ case HLSL_TYPE_HALF: -+ case HLSL_TYPE_FLOAT: -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, 0, true); -+ return true; -+ -+ case HLSL_TYPE_DOUBLE: -+ if (ctx->double_as_float_alias) -+ { -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, 0, true); -+ return true; -+ } -+ hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -+ "The 'double' type is not supported for the %s profile.", ctx->profile->name); -+ break; -+ -+ default: -+ vkd3d_unreachable(); -+ } -+ break; -+ -+ case HLSL_TYPE_INT: -+ case HLSL_TYPE_UINT: -+ switch(src_type->e.numeric.type) -+ { -+ case HLSL_TYPE_HALF: -+ case HLSL_TYPE_FLOAT: -+ /* A compilation pass turns these into FLOOR+REINTERPRET, so we should not -+ * reach this case unless we are missing something. */ -+ hlsl_fixme(ctx, &instr->loc, "Unlowered SM1 cast from float to integer."); -+ break; -+ -+ case HLSL_TYPE_INT: -+ case HLSL_TYPE_UINT: -+ case HLSL_TYPE_BOOL: -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, 0, true); -+ return true; -+ -+ case HLSL_TYPE_DOUBLE: -+ hlsl_fixme(ctx, &instr->loc, "SM1 cast from double to integer."); -+ break; -+ -+ default: -+ vkd3d_unreachable(); -+ } -+ break; -+ -+ case HLSL_TYPE_DOUBLE: -+ switch (src_type->e.numeric.type) -+ { -+ case HLSL_TYPE_FLOAT: -+ if (ctx->double_as_float_alias) -+ { -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, 0, true); -+ return true; -+ } -+ hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -+ "The 'double' type is not supported for the %s profile.", ctx->profile->name); -+ break; -+ -+ default: -+ hlsl_fixme(ctx, &instr->loc, "SM1 cast to double."); -+ break; -+ } -+ break; -+ -+ case HLSL_TYPE_BOOL: -+ /* Casts to bool should have already been lowered. */ -+ default: -+ hlsl_fixme(ctx, &expr->node.loc, "SM1 cast from %s to %s.", -+ debug_hlsl_type(ctx, src_type), debug_hlsl_type(ctx, dst_type)); -+ break; -+ } -+ -+ return false; -+} -+ -+static bool sm1_generate_vsir_instr_expr(struct hlsl_ctx *ctx, struct vsir_program *program, -+ struct hlsl_ir_expr *expr) -+{ -+ struct hlsl_ir_node *instr = &expr->node; -+ -+ if (expr->op != HLSL_OP1_REINTERPRET && expr->op != HLSL_OP1_CAST -+ && instr->data_type->e.numeric.type != HLSL_TYPE_FLOAT) -+ { -+ /* These need to be lowered. */ -+ hlsl_fixme(ctx, &instr->loc, "SM1 non-float expression."); -+ return false; -+ } -+ -+ switch (expr->op) -+ { -+ case HLSL_OP1_ABS: -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_ABS, 0, 0, true); -+ break; -+ -+ case HLSL_OP1_CAST: -+ return sm1_generate_vsir_instr_expr_cast(ctx, program, expr); -+ -+ case HLSL_OP1_COS_REDUCED: -+ VKD3D_ASSERT(expr->node.reg.writemask == VKD3DSP_WRITEMASK_0); -+ sm1_generate_vsir_instr_expr_sincos(ctx, program, expr); -+ break; -+ -+ case HLSL_OP1_DSX: -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DSX, 0, 0, true); -+ break; -+ -+ case HLSL_OP1_DSY: -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DSY, 0, 0, true); -+ break; -+ -+ case HLSL_OP1_EXP2: -+ sm1_generate_vsir_instr_expr_per_component_instr_op(ctx, program, expr, VKD3DSIH_EXP); -+ break; -+ -+ case HLSL_OP1_LOG2: -+ sm1_generate_vsir_instr_expr_per_component_instr_op(ctx, program, expr, VKD3DSIH_LOG); -+ break; -+ -+ case HLSL_OP1_NEG: -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, VKD3DSPSM_NEG, 0, true); -+ break; -+ -+ case HLSL_OP1_RCP: -+ sm1_generate_vsir_instr_expr_per_component_instr_op(ctx, program, expr, VKD3DSIH_RCP); -+ break; -+ -+ case HLSL_OP1_REINTERPRET: -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, 0, true); -+ break; -+ -+ case HLSL_OP1_RSQ: -+ sm1_generate_vsir_instr_expr_per_component_instr_op(ctx, program, expr, VKD3DSIH_RSQ); -+ break; -+ -+ case HLSL_OP1_SAT: -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, VKD3DSPDM_SATURATE, true); -+ break; -+ -+ case HLSL_OP1_SIN_REDUCED: -+ VKD3D_ASSERT(expr->node.reg.writemask == VKD3DSP_WRITEMASK_1); -+ sm1_generate_vsir_instr_expr_sincos(ctx, program, expr); -+ break; -+ -+ case HLSL_OP2_ADD: -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_ADD, 0, 0, true); -+ break; -+ -+ case HLSL_OP2_DOT: -+ switch (expr->operands[0].node->data_type->dimx) -+ { -+ case 3: -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DP3, 0, 0, false); -+ break; -+ -+ case 4: -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DP4, 0, 0, false); -+ break; -+ -+ default: -+ vkd3d_unreachable(); -+ return false; -+ } -+ break; -+ -+ case HLSL_OP2_MAX: -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MAX, 0, 0, true); -+ break; -+ -+ case HLSL_OP2_MIN: -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MIN, 0, 0, true); -+ break; -+ -+ case HLSL_OP2_MUL: -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MUL, 0, 0, true); -+ break; -+ -+ case HLSL_OP1_FRACT: -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_FRC, 0, 0, true); -+ break; -+ -+ case HLSL_OP2_LOGIC_AND: -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MIN, 0, 0, true); -+ break; -+ -+ case HLSL_OP2_LOGIC_OR: -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MAX, 0, 0, true); -+ break; -+ -+ case HLSL_OP2_SLT: -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_SLT, 0, 0, true); -+ break; -+ -+ case HLSL_OP3_CMP: -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_CMP, 0, 0, true); -+ break; -+ -+ case HLSL_OP3_DP2ADD: -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DP2ADD, 0, 0, false); -+ break; -+ -+ case HLSL_OP3_MAD: -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MAD, 0, 0, true); -+ break; -+ -+ default: -+ hlsl_fixme(ctx, &instr->loc, "SM1 \"%s\" expression.", debug_hlsl_expr_op(expr->op)); -+ return false; -+ } -+ -+ return true; -+} -+ -+static void sm1_generate_vsir_init_dst_param_from_deref(struct hlsl_ctx *ctx, -+ struct vkd3d_shader_dst_param *dst_param, struct hlsl_deref *deref, -+ const struct vkd3d_shader_location *loc, unsigned int writemask) -+{ -+ enum vkd3d_shader_register_type type = VKD3DSPR_TEMP; -+ struct vkd3d_shader_version version; -+ uint32_t register_index; -+ struct hlsl_reg reg; -+ -+ reg = hlsl_reg_from_deref(ctx, deref); -+ register_index = reg.id; -+ writemask = hlsl_combine_writemasks(reg.writemask, writemask); -+ -+ if (deref->var->is_output_semantic) -+ { -+ const char *semantic_name = deref->var->semantic.name; -+ -+ version.major = ctx->profile->major_version; -+ version.minor = ctx->profile->minor_version; -+ version.type = ctx->profile->type; -+ -+ if (version.type == VKD3D_SHADER_TYPE_PIXEL && version.major == 1) -+ { -+ type = VKD3DSPR_TEMP; -+ register_index = 0; -+ } -+ else if (!sm1_register_from_semantic_name(&version, semantic_name, -+ deref->var->semantic.index, true, &type, ®ister_index)) -+ { -+ VKD3D_ASSERT(reg.allocated); -+ type = VKD3DSPR_OUTPUT; -+ register_index = reg.id; -+ } -+ else -+ writemask = (1u << deref->var->data_type->dimx) - 1; -+ -+ if (version.type == VKD3D_SHADER_TYPE_PIXEL && (!ascii_strcasecmp(semantic_name, "PSIZE") -+ || (!ascii_strcasecmp(semantic_name, "FOG") && version.major < 3))) -+ { -+ /* These are always 1-component, but for some reason are written -+ * with a writemask containing all components. */ -+ writemask = VKD3DSP_WRITEMASK_ALL; -+ } -+ } -+ else -+ VKD3D_ASSERT(reg.allocated); -+ -+ vsir_register_init(&dst_param->reg, type, VKD3D_DATA_FLOAT, 1); -+ dst_param->write_mask = writemask; -+ dst_param->reg.idx[0].offset = register_index; -+ -+ if (deref->rel_offset.node) -+ hlsl_fixme(ctx, loc, "Translate relative addressing on dst register for vsir."); -+} -+ -+static void sm1_generate_vsir_init_src_param_from_deref(struct hlsl_ctx *ctx, -+ struct vkd3d_shader_src_param *src_param, struct hlsl_deref *deref, -+ unsigned int dst_writemask, const struct vkd3d_shader_location *loc) -+{ -+ enum vkd3d_shader_register_type type = VKD3DSPR_TEMP; -+ struct vkd3d_shader_version version; -+ uint32_t register_index; -+ unsigned int writemask; -+ struct hlsl_reg reg; -+ -+ if (hlsl_type_is_resource(deref->var->data_type)) -+ { -+ unsigned int sampler_offset; -+ -+ type = VKD3DSPR_COMBINED_SAMPLER; -+ -+ sampler_offset = hlsl_offset_from_deref_safe(ctx, deref); -+ register_index = deref->var->regs[HLSL_REGSET_SAMPLERS].index + sampler_offset; -+ writemask = VKD3DSP_WRITEMASK_ALL; -+ } -+ else if (deref->var->is_uniform) -+ { -+ type = VKD3DSPR_CONST; -+ -+ reg = hlsl_reg_from_deref(ctx, deref); -+ register_index = reg.id; -+ writemask = reg.writemask; -+ VKD3D_ASSERT(reg.allocated); -+ } -+ else if (deref->var->is_input_semantic) -+ { -+ version.major = ctx->profile->major_version; -+ version.minor = ctx->profile->minor_version; -+ version.type = ctx->profile->type; -+ if (sm1_register_from_semantic_name(&version, deref->var->semantic.name, -+ deref->var->semantic.index, false, &type, ®ister_index)) -+ { -+ writemask = (1 << deref->var->data_type->dimx) - 1; -+ } -+ else -+ { -+ type = VKD3DSPR_INPUT; -+ -+ reg = hlsl_reg_from_deref(ctx, deref); -+ register_index = reg.id; -+ writemask = reg.writemask; -+ VKD3D_ASSERT(reg.allocated); -+ } -+ } -+ else -+ { -+ type = VKD3DSPR_TEMP; -+ -+ reg = hlsl_reg_from_deref(ctx, deref); -+ register_index = reg.id; -+ writemask = reg.writemask; -+ } -+ -+ vsir_register_init(&src_param->reg, type, VKD3D_DATA_FLOAT, 1); -+ src_param->reg.idx[0].offset = register_index; -+ src_param->swizzle = generate_vsir_get_src_swizzle(writemask, dst_writemask); -+ -+ if (deref->rel_offset.node) -+ hlsl_fixme(ctx, loc, "Translate relative addressing on src register for vsir."); -+} -+ -+static void sm1_generate_vsir_instr_load(struct hlsl_ctx *ctx, struct vsir_program *program, -+ struct hlsl_ir_load *load) -+{ -+ struct hlsl_ir_node *instr = &load->node; -+ struct vkd3d_shader_dst_param *dst_param; -+ struct vkd3d_shader_instruction *ins; -+ -+ VKD3D_ASSERT(instr->reg.allocated); -+ -+ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_MOV, 1, 1))) -+ return; -+ -+ dst_param = &ins->dst[0]; -+ vsir_register_init(&dst_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); -+ dst_param->reg.idx[0].offset = instr->reg.id; -+ dst_param->write_mask = instr->reg.writemask; -+ -+ sm1_generate_vsir_init_src_param_from_deref(ctx, &ins->src[0], &load->src, dst_param->write_mask, -+ &ins->location); -+} -+ -+static void sm1_generate_vsir_instr_resource_load(struct hlsl_ctx *ctx, -+ struct vsir_program *program, struct hlsl_ir_resource_load *load) -+{ -+ struct hlsl_ir_node *coords = load->coords.node; -+ struct hlsl_ir_node *ddx = load->ddx.node; -+ struct hlsl_ir_node *ddy = load->ddy.node; -+ struct hlsl_ir_node *instr = &load->node; -+ struct vkd3d_shader_dst_param *dst_param; -+ struct vkd3d_shader_src_param *src_param; -+ struct vkd3d_shader_instruction *ins; -+ enum vkd3d_shader_opcode opcode; -+ unsigned int src_count = 2; -+ uint32_t flags = 0; -+ -+ VKD3D_ASSERT(instr->reg.allocated); -+ -+ switch (load->load_type) -+ { -+ case HLSL_RESOURCE_SAMPLE: -+ opcode = VKD3DSIH_TEX; -+ break; -+ -+ case HLSL_RESOURCE_SAMPLE_PROJ: -+ opcode = VKD3DSIH_TEX; -+ flags |= VKD3DSI_TEXLD_PROJECT; -+ break; -+ -+ case HLSL_RESOURCE_SAMPLE_LOD_BIAS: -+ opcode = VKD3DSIH_TEX; -+ flags |= VKD3DSI_TEXLD_BIAS; -+ break; -+ -+ case HLSL_RESOURCE_SAMPLE_GRAD: -+ opcode = VKD3DSIH_TEXLDD; -+ src_count += 2; -+ break; -+ -+ default: -+ hlsl_fixme(ctx, &instr->loc, "Resource load type %u.", load->load_type); -+ return; -+ } -+ -+ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, opcode, 1, src_count))) -+ return; -+ ins->flags = flags; -+ -+ dst_param = &ins->dst[0]; -+ vsir_register_init(&dst_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); -+ dst_param->reg.idx[0].offset = instr->reg.id; -+ dst_param->write_mask = instr->reg.writemask; -+ -+ src_param = &ins->src[0]; -+ vsir_src_from_hlsl_node(src_param, ctx, coords, VKD3DSP_WRITEMASK_ALL); -+ -+ sm1_generate_vsir_init_src_param_from_deref(ctx, &ins->src[1], &load->resource, -+ VKD3DSP_WRITEMASK_ALL, &ins->location); -+ -+ if (load->load_type == HLSL_RESOURCE_SAMPLE_GRAD) -+ { -+ src_param = &ins->src[2]; -+ vsir_src_from_hlsl_node(src_param, ctx, ddx, VKD3DSP_WRITEMASK_ALL); -+ -+ src_param = &ins->src[3]; -+ vsir_src_from_hlsl_node(src_param, ctx, ddy, VKD3DSP_WRITEMASK_ALL); -+ } -+} -+ -+static void generate_vsir_instr_swizzle(struct hlsl_ctx *ctx, -+ struct vsir_program *program, struct hlsl_ir_swizzle *swizzle_instr) -+{ -+ struct hlsl_ir_node *instr = &swizzle_instr->node, *val = swizzle_instr->val.node; -+ struct vkd3d_shader_dst_param *dst_param; -+ struct vkd3d_shader_src_param *src_param; -+ struct vkd3d_shader_instruction *ins; -+ uint32_t swizzle; -+ -+ VKD3D_ASSERT(instr->reg.allocated); -+ -+ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_MOV, 1, 1))) -+ return; -+ -+ dst_param = &ins->dst[0]; -+ vsir_register_init(&dst_param->reg, VKD3DSPR_TEMP, vsir_data_type_from_hlsl_instruction(ctx, instr), 1); -+ dst_param->reg.idx[0].offset = instr->reg.id; -+ dst_param->reg.dimension = VSIR_DIMENSION_VEC4; -+ dst_param->write_mask = instr->reg.writemask; -+ -+ swizzle = hlsl_swizzle_from_writemask(val->reg.writemask); -+ swizzle = hlsl_combine_swizzles(swizzle, swizzle_instr->swizzle, instr->data_type->dimx); -+ swizzle = hlsl_map_swizzle(swizzle, ins->dst[0].write_mask); -+ swizzle = vsir_swizzle_from_hlsl(swizzle); -+ -+ src_param = &ins->src[0]; -+ VKD3D_ASSERT(val->type != HLSL_IR_CONSTANT); -+ vsir_register_init(&src_param->reg, VKD3DSPR_TEMP, vsir_data_type_from_hlsl_instruction(ctx, val), 1); -+ src_param->reg.idx[0].offset = val->reg.id; -+ src_param->reg.dimension = VSIR_DIMENSION_VEC4; -+ src_param->swizzle = swizzle; -+} -+ -+static void sm1_generate_vsir_instr_store(struct hlsl_ctx *ctx, struct vsir_program *program, -+ struct hlsl_ir_store *store) -+{ -+ struct hlsl_ir_node *rhs = store->rhs.node; -+ struct hlsl_ir_node *instr = &store->node; -+ struct vkd3d_shader_instruction *ins; -+ struct vkd3d_shader_src_param *src_param; -+ -+ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_MOV, 1, 1))) -+ return; -+ -+ sm1_generate_vsir_init_dst_param_from_deref(ctx, &ins->dst[0], &store->lhs, &ins->location, store->writemask); -+ -+ src_param = &ins->src[0]; -+ vsir_src_from_hlsl_node(src_param, ctx, rhs, ins->dst[0].write_mask); -+} -+ -+static void sm1_generate_vsir_instr_jump(struct hlsl_ctx *ctx, -+ struct vsir_program *program, struct hlsl_ir_jump *jump) -+{ -+ struct hlsl_ir_node *condition = jump->condition.node; -+ struct hlsl_ir_node *instr = &jump->node; -+ struct vkd3d_shader_dst_param *dst_param; -+ struct vkd3d_shader_instruction *ins; -+ -+ if (jump->type == HLSL_IR_JUMP_DISCARD_NEG) -+ { -+ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_TEXKILL, 1, 0))) -+ return; -+ -+ dst_param = &ins->dst[0]; -+ vsir_register_init(&dst_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); -+ dst_param->reg.idx[0].offset = condition->reg.id; -+ dst_param->write_mask = condition->reg.writemask; -+ } -+ else -+ { -+ hlsl_fixme(ctx, &instr->loc, "Jump type %s.", hlsl_jump_type_to_string(jump->type)); -+ } -+} -+ -+static void sm1_generate_vsir_block(struct hlsl_ctx *ctx, struct hlsl_block *block, struct vsir_program *program); -+ -+static void sm1_generate_vsir_instr_if(struct hlsl_ctx *ctx, struct vsir_program *program, struct hlsl_ir_if *iff) -+{ -+ struct hlsl_ir_node *condition = iff->condition.node; -+ struct vkd3d_shader_src_param *src_param; -+ struct hlsl_ir_node *instr = &iff->node; -+ struct vkd3d_shader_instruction *ins; -+ -+ if (hlsl_version_lt(ctx, 2, 1)) -+ { -+ hlsl_fixme(ctx, &instr->loc, "Flatten \"if\" conditionals branches."); -+ return; -+ } -+ VKD3D_ASSERT(condition->data_type->dimx == 1 && condition->data_type->dimy == 1); -+ -+ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_IFC, 0, 2))) -+ return; -+ ins->flags = VKD3D_SHADER_REL_OP_NE; -+ -+ src_param = &ins->src[0]; -+ vsir_src_from_hlsl_node(src_param, ctx, condition, VKD3DSP_WRITEMASK_ALL); -+ src_param->modifiers = 0; -+ -+ src_param = &ins->src[1]; -+ vsir_src_from_hlsl_node(src_param, ctx, condition, VKD3DSP_WRITEMASK_ALL); -+ src_param->modifiers = VKD3DSPSM_NEG; -+ -+ sm1_generate_vsir_block(ctx, &iff->then_block, program); -+ -+ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_ELSE, 0, 0))) -+ return; -+ -+ sm1_generate_vsir_block(ctx, &iff->else_block, program); -+ -+ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_ENDIF, 0, 0))) -+ return; -+} -+ -+static void sm1_generate_vsir_block(struct hlsl_ctx *ctx, struct hlsl_block *block, struct vsir_program *program) -+{ -+ struct hlsl_ir_node *instr, *next; -+ -+ LIST_FOR_EACH_ENTRY_SAFE(instr, next, &block->instrs, struct hlsl_ir_node, entry) -+ { -+ if (instr->data_type) -+ { -+ if (instr->data_type->class != HLSL_CLASS_SCALAR && instr->data_type->class != HLSL_CLASS_VECTOR) -+ { -+ hlsl_fixme(ctx, &instr->loc, "Class %#x should have been lowered or removed.", instr->data_type->class); -+ break; -+ } -+ } -+ -+ switch (instr->type) -+ { -+ case HLSL_IR_CALL: -+ vkd3d_unreachable(); -+ -+ case HLSL_IR_CONSTANT: -+ sm1_generate_vsir_instr_constant(ctx, program, hlsl_ir_constant(instr)); -+ break; -+ -+ case HLSL_IR_EXPR: -+ sm1_generate_vsir_instr_expr(ctx, program, hlsl_ir_expr(instr)); -+ break; -+ -+ case HLSL_IR_IF: -+ sm1_generate_vsir_instr_if(ctx, program, hlsl_ir_if(instr)); -+ break; -+ -+ case HLSL_IR_JUMP: -+ sm1_generate_vsir_instr_jump(ctx, program, hlsl_ir_jump(instr)); -+ break; -+ -+ case HLSL_IR_LOAD: -+ sm1_generate_vsir_instr_load(ctx, program, hlsl_ir_load(instr)); -+ break; -+ -+ case HLSL_IR_RESOURCE_LOAD: -+ sm1_generate_vsir_instr_resource_load(ctx, program, hlsl_ir_resource_load(instr)); -+ break; -+ -+ case HLSL_IR_STORE: -+ sm1_generate_vsir_instr_store(ctx, program, hlsl_ir_store(instr)); -+ break; -+ -+ case HLSL_IR_SWIZZLE: -+ generate_vsir_instr_swizzle(ctx, program, hlsl_ir_swizzle(instr)); -+ break; -+ -+ default: -+ hlsl_fixme(ctx, &instr->loc, "Instruction type %s.", hlsl_node_type_to_string(instr->type)); -+ break; -+ } -+ } -+} -+ -+static void sm1_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, -+ uint64_t config_flags, struct vsir_program *program, struct vkd3d_shader_code *ctab) -+{ -+ struct vkd3d_shader_version version = {0}; -+ struct vkd3d_bytecode_buffer buffer = {0}; -+ struct hlsl_block block; -+ -+ version.major = ctx->profile->major_version; -+ version.minor = ctx->profile->minor_version; -+ version.type = ctx->profile->type; -+ if (!vsir_program_init(program, NULL, &version, 0, VSIR_CF_STRUCTURED, VSIR_NOT_NORMALISED)) -+ { -+ ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; -+ return; -+ } -+ -+ write_sm1_uniforms(ctx, &buffer); -+ if (buffer.status) -+ { -+ vkd3d_free(buffer.data); -+ ctx->result = buffer.status; -+ return; -+ } -+ ctab->code = buffer.data; -+ ctab->size = buffer.size; -+ -+ generate_vsir_signature(ctx, program, entry_func); -+ -+ hlsl_block_init(&block); -+ sm1_generate_vsir_constant_defs(ctx, program, &block); -+ sm1_generate_vsir_sampler_dcls(ctx, program, &block); -+ list_move_head(&entry_func->body.instrs, &block.instrs); -+ -+ sm1_generate_vsir_block(ctx, &entry_func->body, program); -+} -+ -+static void add_last_vsir_instr_to_block(struct hlsl_ctx *ctx, struct vsir_program *program, struct hlsl_block *block) -+{ -+ struct vkd3d_shader_location *loc; -+ struct hlsl_ir_node *vsir_instr; -+ -+ loc = &program->instructions.elements[program->instructions.count - 1].location; -+ -+ if (!(vsir_instr = hlsl_new_vsir_instruction_ref(ctx, program->instructions.count - 1, NULL, NULL, loc))) -+ { -+ ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; -+ return; -+ } -+ hlsl_block_add_instr(block, vsir_instr); -+} -+ -+static void replace_instr_with_last_vsir_instr(struct hlsl_ctx *ctx, -+ struct vsir_program *program, struct hlsl_ir_node *instr) -+{ -+ struct vkd3d_shader_location *loc; -+ struct hlsl_ir_node *vsir_instr; -+ -+ loc = &program->instructions.elements[program->instructions.count - 1].location; -+ -+ if (!(vsir_instr = hlsl_new_vsir_instruction_ref(ctx, -+ program->instructions.count - 1, instr->data_type, &instr->reg, loc))) -+ { -+ ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; -+ return; -+ } -+ -+ list_add_before(&instr->entry, &vsir_instr->entry); -+ hlsl_replace_node(instr, vsir_instr); -+} -+ -+static void sm4_generate_vsir_instr_dcl_semantic(struct hlsl_ctx *ctx, struct vsir_program *program, -+ const struct hlsl_ir_var *var, bool is_patch_constant_func, struct hlsl_block *block, -+ const struct vkd3d_shader_location *loc) -+{ -+ const struct vkd3d_shader_version *version = &program->shader_version; -+ const bool output = var->is_output_semantic; -+ enum vkd3d_shader_sysval_semantic semantic; -+ struct vkd3d_shader_dst_param *dst_param; -+ struct vkd3d_shader_instruction *ins; -+ enum vkd3d_shader_register_type type; -+ enum vkd3d_shader_opcode opcode; -+ unsigned int idx = 0; -+ uint32_t write_mask; -+ bool has_idx; -+ -+ sm4_sysval_semantic_from_semantic_name(&semantic, version, ctx->semantic_compat_mapping, -+ ctx->domain, var->semantic.name, var->semantic.index, output, is_patch_constant_func); -+ if (semantic == ~0u) -+ semantic = VKD3D_SHADER_SV_NONE; -+ -+ if (var->is_input_semantic) -+ { -+ switch (semantic) -+ { -+ case VKD3D_SHADER_SV_NONE: -+ opcode = (version->type == VKD3D_SHADER_TYPE_PIXEL) -+ ? VKD3DSIH_DCL_INPUT_PS : VKD3DSIH_DCL_INPUT; -+ break; -+ -+ case VKD3D_SHADER_SV_INSTANCE_ID: -+ case VKD3D_SHADER_SV_IS_FRONT_FACE: -+ case VKD3D_SHADER_SV_PRIMITIVE_ID: -+ case VKD3D_SHADER_SV_SAMPLE_INDEX: -+ case VKD3D_SHADER_SV_VERTEX_ID: -+ opcode = (version->type == VKD3D_SHADER_TYPE_PIXEL) -+ ? VKD3DSIH_DCL_INPUT_PS_SGV : VKD3DSIH_DCL_INPUT_SGV; -+ break; -+ -+ default: -+ opcode = (version->type == VKD3D_SHADER_TYPE_PIXEL) -+ ? VKD3DSIH_DCL_INPUT_PS_SIV : VKD3DSIH_DCL_INPUT_SIV; -+ break; -+ } -+ } -+ else -+ { -+ if (semantic == VKD3D_SHADER_SV_NONE || version->type == VKD3D_SHADER_TYPE_PIXEL) -+ opcode = VKD3DSIH_DCL_OUTPUT; -+ else -+ opcode = VKD3DSIH_DCL_OUTPUT_SIV; -+ } -+ -+ if (sm4_register_from_semantic_name(version, var->semantic.name, output, &type, &has_idx)) -+ { -+ if (has_idx) -+ idx = var->semantic.index; -+ write_mask = (1u << var->data_type->dimx) - 1; -+ } -+ else -+ { -+ if (output) -+ type = VKD3DSPR_OUTPUT; -+ else if (version->type == VKD3D_SHADER_TYPE_DOMAIN) -+ type = VKD3DSPR_PATCHCONST; -+ else -+ type = VKD3DSPR_INPUT; -+ -+ has_idx = true; -+ idx = var->regs[HLSL_REGSET_NUMERIC].id; -+ write_mask = var->regs[HLSL_REGSET_NUMERIC].writemask; -+ } -+ -+ if (!(ins = generate_vsir_add_program_instruction(ctx, program, loc, opcode, 0, 0))) -+ return; -+ -+ if (opcode == VKD3DSIH_DCL_OUTPUT) -+ { -+ VKD3D_ASSERT(semantic == VKD3D_SHADER_SV_NONE -+ || semantic == VKD3D_SHADER_SV_TARGET || type != VKD3DSPR_OUTPUT); -+ dst_param = &ins->declaration.dst; -+ } -+ else if (opcode == VKD3DSIH_DCL_INPUT || opcode == VKD3DSIH_DCL_INPUT_PS) -+ { -+ VKD3D_ASSERT(semantic == VKD3D_SHADER_SV_NONE); -+ dst_param = &ins->declaration.dst; -+ } -+ else -+ { -+ VKD3D_ASSERT(semantic != VKD3D_SHADER_SV_NONE); -+ ins->declaration.register_semantic.sysval_semantic = vkd3d_siv_from_sysval_indexed(semantic, -+ var->semantic.index); -+ dst_param = &ins->declaration.register_semantic.reg; -+ } -+ -+ if (has_idx) -+ { -+ vsir_register_init(&dst_param->reg, type, VKD3D_DATA_FLOAT, 1); -+ dst_param->reg.idx[0].offset = idx; -+ } -+ else -+ { -+ vsir_register_init(&dst_param->reg, type, VKD3D_DATA_FLOAT, 0); -+ } -+ -+ if (shader_sm4_is_scalar_register(&dst_param->reg)) -+ dst_param->reg.dimension = VSIR_DIMENSION_SCALAR; -+ else -+ dst_param->reg.dimension = VSIR_DIMENSION_VEC4; -+ -+ dst_param->write_mask = write_mask; -+ -+ if (var->is_input_semantic && version->type == VKD3D_SHADER_TYPE_PIXEL) -+ ins->flags = sm4_get_interpolation_mode(var->data_type, var->storage_modifiers); -+ -+ add_last_vsir_instr_to_block(ctx, program, block); -+} -+ -+static void sm4_generate_vsir_instr_dcl_temps(struct hlsl_ctx *ctx, struct vsir_program *program, -+ uint32_t temp_count, struct hlsl_block *block, const struct vkd3d_shader_location *loc) -+{ -+ struct vkd3d_shader_instruction *ins; -+ -+ if (!(ins = generate_vsir_add_program_instruction(ctx, program, loc, VKD3DSIH_DCL_TEMPS, 0, 0))) -+ return; -+ -+ ins->declaration.count = temp_count; -+ -+ add_last_vsir_instr_to_block(ctx, program, block); -+} -+ -+static void sm4_generate_vsir_instr_dcl_indexable_temp(struct hlsl_ctx *ctx, -+ struct vsir_program *program, struct hlsl_block *block, uint32_t idx, -+ uint32_t size, uint32_t comp_count, const struct vkd3d_shader_location *loc) -+{ -+ struct vkd3d_shader_instruction *ins; -+ -+ if (!(ins = generate_vsir_add_program_instruction(ctx, program, loc, VKD3DSIH_DCL_INDEXABLE_TEMP, 0, 0))) -+ return; -+ -+ ins->declaration.indexable_temp.register_idx = idx; -+ ins->declaration.indexable_temp.register_size = size; -+ ins->declaration.indexable_temp.alignment = 0; -+ ins->declaration.indexable_temp.data_type = VKD3D_DATA_FLOAT; -+ ins->declaration.indexable_temp.component_count = comp_count; -+ ins->declaration.indexable_temp.has_function_scope = false; -+ -+ add_last_vsir_instr_to_block(ctx, program, block); -+} -+ -+static bool type_is_float(const struct hlsl_type *type) -+{ -+ return type->e.numeric.type == HLSL_TYPE_FLOAT || type->e.numeric.type == HLSL_TYPE_HALF; -+} -+ -+static bool type_is_integer(const struct hlsl_type *type) -+{ -+ return type->e.numeric.type == HLSL_TYPE_BOOL -+ || type->e.numeric.type == HLSL_TYPE_INT -+ || type->e.numeric.type == HLSL_TYPE_UINT; -+} -+ -+static void sm4_generate_vsir_cast_from_bool(struct hlsl_ctx *ctx, struct vsir_program *program, -+ const struct hlsl_ir_expr *expr, uint32_t bits) -+{ -+ struct hlsl_ir_node *operand = expr->operands[0].node; -+ const struct hlsl_ir_node *instr = &expr->node; -+ struct vkd3d_shader_dst_param *dst_param; -+ struct hlsl_constant_value value = {0}; -+ struct vkd3d_shader_instruction *ins; -+ -+ VKD3D_ASSERT(instr->reg.allocated); -+ -+ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_AND, 1, 2))) -+ return; -+ -+ dst_param = &ins->dst[0]; -+ vsir_dst_from_hlsl_node(dst_param, ctx, instr); -+ -+ vsir_src_from_hlsl_node(&ins->src[0], ctx, operand, dst_param->write_mask); -+ -+ value.u[0].u = bits; -+ vsir_src_from_hlsl_constant_value(&ins->src[1], ctx, &value, VKD3D_DATA_UINT, 1, 0); -+} -+ -+static bool sm4_generate_vsir_instr_expr_cast(struct hlsl_ctx *ctx, -+ struct vsir_program *program, struct hlsl_ir_expr *expr) -+{ -+ const struct hlsl_ir_node *arg1 = expr->operands[0].node; -+ const struct hlsl_type *dst_type = expr->node.data_type; -+ const struct hlsl_type *src_type = arg1->data_type; -+ -+ static const union -+ { -+ uint32_t u; -+ float f; -+ } one = { .f = 1.0 }; -+ -+ /* Narrowing casts were already lowered. */ -+ VKD3D_ASSERT(src_type->dimx == dst_type->dimx); -+ -+ switch (dst_type->e.numeric.type) -+ { -+ case HLSL_TYPE_HALF: -+ case HLSL_TYPE_FLOAT: -+ switch (src_type->e.numeric.type) -+ { -+ case HLSL_TYPE_HALF: -+ case HLSL_TYPE_FLOAT: -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, 0, true); -+ return true; -+ -+ case HLSL_TYPE_INT: -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_ITOF, 0, 0, true); -+ return true; -+ -+ case HLSL_TYPE_UINT: -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_UTOF, 0, 0, true); -+ return true; -+ -+ case HLSL_TYPE_BOOL: -+ sm4_generate_vsir_cast_from_bool(ctx, program, expr, one.u); -+ return true; -+ -+ case HLSL_TYPE_DOUBLE: -+ hlsl_fixme(ctx, &expr->node.loc, "SM4 cast from double to float."); -+ return false; -+ -+ default: -+ vkd3d_unreachable(); -+ } -+ break; -+ -+ case HLSL_TYPE_INT: -+ switch (src_type->e.numeric.type) -+ { -+ case HLSL_TYPE_HALF: -+ case HLSL_TYPE_FLOAT: -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_FTOI, 0, 0, true); -+ return true; -+ -+ case HLSL_TYPE_INT: -+ case HLSL_TYPE_UINT: -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, 0, true); -+ return true; -+ -+ case HLSL_TYPE_BOOL: -+ sm4_generate_vsir_cast_from_bool(ctx, program, expr, 1u); -+ return true; -+ -+ case HLSL_TYPE_DOUBLE: -+ hlsl_fixme(ctx, &expr->node.loc, "SM4 cast from double to int."); -+ return false; -+ -+ default: -+ vkd3d_unreachable(); -+ } -+ break; -+ -+ case HLSL_TYPE_UINT: -+ switch (src_type->e.numeric.type) -+ { -+ case HLSL_TYPE_HALF: -+ case HLSL_TYPE_FLOAT: -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_FTOU, 0, 0, true); -+ return true; -+ -+ case HLSL_TYPE_INT: -+ case HLSL_TYPE_UINT: -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, 0, true); -+ return true; -+ -+ case HLSL_TYPE_BOOL: -+ sm4_generate_vsir_cast_from_bool(ctx, program, expr, 1u); -+ return true; -+ -+ case HLSL_TYPE_DOUBLE: -+ hlsl_fixme(ctx, &expr->node.loc, "SM4 cast from double to uint."); -+ return false; -+ -+ default: -+ vkd3d_unreachable(); -+ } -+ break; -+ -+ case HLSL_TYPE_DOUBLE: -+ hlsl_fixme(ctx, &expr->node.loc, "SM4 cast to double."); -+ return false; -+ -+ case HLSL_TYPE_BOOL: -+ /* Casts to bool should have already been lowered. */ -+ default: -+ vkd3d_unreachable(); -+ } -+} -+ -+static void sm4_generate_vsir_expr_with_two_destinations(struct hlsl_ctx *ctx, struct vsir_program *program, -+ enum vkd3d_shader_opcode opcode, const struct hlsl_ir_expr *expr, unsigned int dst_idx) -+{ -+ struct vkd3d_shader_dst_param *dst_param, *null_param; -+ const struct hlsl_ir_node *instr = &expr->node; -+ struct vkd3d_shader_instruction *ins; -+ unsigned int i, src_count; -+ -+ VKD3D_ASSERT(instr->reg.allocated); -+ -+ for (i = 0; i < HLSL_MAX_OPERANDS; ++i) -+ { -+ if (expr->operands[i].node) -+ src_count = i + 1; -+ } -+ -+ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, opcode, 2, src_count))) -+ return; -+ -+ dst_param = &ins->dst[dst_idx]; -+ vsir_dst_from_hlsl_node(dst_param, ctx, instr); -+ -+ null_param = &ins->dst[1 - dst_idx]; -+ vsir_dst_param_init(null_param, VKD3DSPR_NULL, VKD3D_DATA_FLOAT, 0); -+ null_param->reg.dimension = VSIR_DIMENSION_NONE; -+ -+ for (i = 0; i < src_count; ++i) -+ vsir_src_from_hlsl_node(&ins->src[i], ctx, expr->operands[i].node, dst_param->write_mask); -+} -+ -+static void sm4_generate_vsir_rcp_using_div(struct hlsl_ctx *ctx, -+ struct vsir_program *program, const struct hlsl_ir_expr *expr) -+{ -+ struct hlsl_ir_node *operand = expr->operands[0].node; -+ const struct hlsl_ir_node *instr = &expr->node; -+ struct vkd3d_shader_dst_param *dst_param; -+ struct hlsl_constant_value value = {0}; -+ struct vkd3d_shader_instruction *ins; -+ -+ VKD3D_ASSERT(type_is_float(expr->node.data_type)); -+ -+ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_DIV, 1, 2))) - return; - -- if (output) -- signature = &program->output_signature; -- else -- signature = &program->input_signature; -+ dst_param = &ins->dst[0]; -+ vsir_dst_from_hlsl_node(dst_param, ctx, instr); -+ -+ value.u[0].f = 1.0f; -+ value.u[1].f = 1.0f; -+ value.u[2].f = 1.0f; -+ value.u[3].f = 1.0f; -+ vsir_src_from_hlsl_constant_value(&ins->src[0], ctx, &value, -+ VKD3D_DATA_FLOAT, instr->data_type->dimx, dst_param->write_mask); -+ -+ vsir_src_from_hlsl_node(&ins->src[1], ctx, operand, dst_param->write_mask); -+} -+ -+static bool sm4_generate_vsir_instr_expr(struct hlsl_ctx *ctx, -+ struct vsir_program *program, struct hlsl_ir_expr *expr, const char *dst_type_name) -+{ -+ const struct hlsl_type *dst_type = expr->node.data_type; -+ const struct hlsl_type *src_type = NULL; -+ -+ VKD3D_ASSERT(expr->node.reg.allocated); -+ if (expr->operands[0].node) -+ src_type = expr->operands[0].node->data_type; -+ -+ switch (expr->op) -+ { -+ case HLSL_OP0_RASTERIZER_SAMPLE_COUNT: -+ sm4_generate_vsir_rasterizer_sample_count(ctx, program, expr); -+ return true; -+ -+ case HLSL_OP1_ABS: -+ VKD3D_ASSERT(type_is_float(dst_type)); -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, VKD3DSPSM_ABS, 0, true); -+ return true; -+ -+ case HLSL_OP1_BIT_NOT: -+ VKD3D_ASSERT(type_is_integer(dst_type)); -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_NOT, 0, 0, true); -+ return true; -+ -+ case HLSL_OP1_CAST: -+ return sm4_generate_vsir_instr_expr_cast(ctx, program, expr); -+ -+ case HLSL_OP1_CEIL: -+ VKD3D_ASSERT(type_is_float(dst_type)); -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_ROUND_PI, 0, 0, true); -+ return true; -+ -+ case HLSL_OP1_COS: -+ VKD3D_ASSERT(type_is_float(dst_type)); -+ sm4_generate_vsir_expr_with_two_destinations(ctx, program, VKD3DSIH_SINCOS, expr, 1); -+ return true; -+ -+ case HLSL_OP1_DSX: -+ VKD3D_ASSERT(type_is_float(dst_type)); -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DSX, 0, 0, true); -+ return true; -+ -+ case HLSL_OP1_DSX_COARSE: -+ VKD3D_ASSERT(type_is_float(dst_type)); -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DSX_COARSE, 0, 0, true); -+ return true; -+ -+ case HLSL_OP1_DSX_FINE: -+ VKD3D_ASSERT(type_is_float(dst_type)); -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DSX_FINE, 0, 0, true); -+ return true; -+ -+ case HLSL_OP1_DSY: -+ VKD3D_ASSERT(type_is_float(dst_type)); -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DSY, 0, 0, true); -+ return true; -+ -+ case HLSL_OP1_DSY_COARSE: -+ VKD3D_ASSERT(type_is_float(dst_type)); -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DSY_COARSE, 0, 0, true); -+ return true; -+ -+ case HLSL_OP1_DSY_FINE: -+ VKD3D_ASSERT(type_is_float(dst_type)); -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DSY_FINE, 0, 0, true); -+ return true; -+ -+ case HLSL_OP1_EXP2: -+ VKD3D_ASSERT(type_is_float(dst_type)); -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_EXP, 0, 0, true); -+ return true; -+ -+ case HLSL_OP1_F16TOF32: -+ VKD3D_ASSERT(type_is_float(dst_type)); -+ VKD3D_ASSERT(hlsl_version_ge(ctx, 5, 0)); -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_F16TOF32, 0, 0, true); -+ return true; -+ -+ case HLSL_OP1_F32TOF16: -+ VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_UINT); -+ VKD3D_ASSERT(hlsl_version_ge(ctx, 5, 0)); -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_F32TOF16, 0, 0, true); -+ return true; -+ -+ case HLSL_OP1_FLOOR: -+ VKD3D_ASSERT(type_is_float(dst_type)); -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_ROUND_NI, 0, 0, true); -+ return true; -+ -+ case HLSL_OP1_FRACT: -+ VKD3D_ASSERT(type_is_float(dst_type)); -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_FRC, 0, 0, true); -+ return true; -+ -+ case HLSL_OP1_LOG2: -+ VKD3D_ASSERT(type_is_float(dst_type)); -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_LOG, 0, 0, true); -+ return true; -+ -+ case HLSL_OP1_LOGIC_NOT: -+ VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_NOT, 0, 0, true); -+ return true; -+ -+ case HLSL_OP1_NEG: -+ switch (dst_type->e.numeric.type) -+ { -+ case HLSL_TYPE_FLOAT: -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, VKD3DSPSM_NEG, 0, true); -+ return true; -+ -+ case HLSL_TYPE_INT: -+ case HLSL_TYPE_UINT: -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_INEG, 0, 0, true); -+ return true; -+ -+ default: -+ hlsl_fixme(ctx, &expr->node.loc, "SM4 %s negation expression.", dst_type_name); -+ return false; -+ } -+ -+ case HLSL_OP1_RCP: -+ switch (dst_type->e.numeric.type) -+ { -+ case HLSL_TYPE_FLOAT: -+ /* SM5 comes with a RCP opcode */ -+ if (hlsl_version_ge(ctx, 5, 0)) -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_RCP, 0, 0, true); -+ else -+ sm4_generate_vsir_rcp_using_div(ctx, program, expr); -+ return true; -+ -+ default: -+ hlsl_fixme(ctx, &expr->node.loc, "SM4 %s rcp expression.", dst_type_name); -+ return false; -+ } -+ -+ case HLSL_OP1_REINTERPRET: -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, 0, true); -+ return true; -+ -+ case HLSL_OP1_ROUND: -+ VKD3D_ASSERT(type_is_float(dst_type)); -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_ROUND_NE, 0, 0, true); -+ return true; -+ -+ case HLSL_OP1_RSQ: -+ VKD3D_ASSERT(type_is_float(dst_type)); -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_RSQ, 0, 0, true); -+ return true; -+ -+ case HLSL_OP1_SAT: -+ VKD3D_ASSERT(type_is_float(dst_type)); -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, VKD3DSPDM_SATURATE, true); -+ return true; -+ -+ case HLSL_OP1_SIN: -+ VKD3D_ASSERT(type_is_float(dst_type)); -+ sm4_generate_vsir_expr_with_two_destinations(ctx, program, VKD3DSIH_SINCOS, expr, 0); -+ return true; -+ -+ case HLSL_OP1_SQRT: -+ VKD3D_ASSERT(type_is_float(dst_type)); -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_SQRT, 0, 0, true); -+ return true; -+ -+ case HLSL_OP1_TRUNC: -+ VKD3D_ASSERT(type_is_float(dst_type)); -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_ROUND_Z, 0, 0, true); -+ return true; -+ -+ case HLSL_OP2_ADD: -+ switch (dst_type->e.numeric.type) -+ { -+ case HLSL_TYPE_FLOAT: -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_ADD, 0, 0, true); -+ return true; -+ -+ case HLSL_TYPE_INT: -+ case HLSL_TYPE_UINT: -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_IADD, 0, 0, true); -+ return true; -+ -+ default: -+ hlsl_fixme(ctx, &expr->node.loc, "SM4 %s addition expression.", dst_type_name); -+ return false; -+ } -+ -+ case HLSL_OP2_BIT_AND: -+ VKD3D_ASSERT(type_is_integer(dst_type)); -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_AND, 0, 0, true); -+ return true; -+ -+ case HLSL_OP2_BIT_OR: -+ VKD3D_ASSERT(type_is_integer(dst_type)); -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_OR, 0, 0, true); -+ return true; -+ -+ case HLSL_OP2_BIT_XOR: -+ VKD3D_ASSERT(type_is_integer(dst_type)); -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_XOR, 0, 0, true); -+ return true; -+ -+ case HLSL_OP2_DIV: -+ switch (dst_type->e.numeric.type) -+ { -+ case HLSL_TYPE_FLOAT: -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DIV, 0, 0, true); -+ return true; -+ -+ case HLSL_TYPE_UINT: -+ sm4_generate_vsir_expr_with_two_destinations(ctx, program, VKD3DSIH_UDIV, expr, 0); -+ return true; -+ -+ default: -+ hlsl_fixme(ctx, &expr->node.loc, "SM4 %s division expression.", dst_type_name); -+ return false; -+ } -+ -+ case HLSL_OP2_DOT: -+ switch (dst_type->e.numeric.type) -+ { -+ case HLSL_TYPE_FLOAT: -+ switch (expr->operands[0].node->data_type->dimx) -+ { -+ case 4: -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DP4, 0, 0, false); -+ return true; -+ -+ case 3: -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DP3, 0, 0, false); -+ return true; -+ -+ case 2: -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DP2, 0, 0, false); -+ return true; -+ -+ case 1: -+ default: -+ vkd3d_unreachable(); -+ } -+ -+ default: -+ hlsl_fixme(ctx, &expr->node.loc, "SM4 %s dot expression.", dst_type_name); -+ return false; -+ } -+ -+ case HLSL_OP2_EQUAL: -+ VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); -+ -+ switch (src_type->e.numeric.type) -+ { -+ case HLSL_TYPE_FLOAT: -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_EQO, 0, 0, true); -+ return true; -+ -+ case HLSL_TYPE_BOOL: -+ case HLSL_TYPE_INT: -+ case HLSL_TYPE_UINT: -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_IEQ, 0, 0, true); -+ return true; -+ -+ default: -+ hlsl_fixme(ctx, &expr->node.loc, "SM4 equality between \"%s\" operands.", -+ debug_hlsl_type(ctx, src_type)); -+ return false; -+ } -+ -+ case HLSL_OP2_GEQUAL: -+ VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); -+ -+ switch (src_type->e.numeric.type) -+ { -+ case HLSL_TYPE_FLOAT: -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_GEO, 0, 0, true); -+ return true; -+ -+ case HLSL_TYPE_INT: -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_IGE, 0, 0, true); -+ return true; -+ -+ case HLSL_TYPE_BOOL: -+ case HLSL_TYPE_UINT: -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_UGE, 0, 0, true); -+ return true; -+ -+ default: -+ hlsl_fixme(ctx, &expr->node.loc, "SM4 greater-than-or-equal between \"%s\" operands.", -+ debug_hlsl_type(ctx, src_type)); -+ return false; -+ } -+ -+ case HLSL_OP2_LESS: -+ VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); -+ -+ switch (src_type->e.numeric.type) -+ { -+ case HLSL_TYPE_FLOAT: -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_LTO, 0, 0, true); -+ return true; -+ -+ case HLSL_TYPE_INT: -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_ILT, 0, 0, true); -+ return true; -+ -+ case HLSL_TYPE_BOOL: -+ case HLSL_TYPE_UINT: -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_ULT, 0, 0, true); -+ return true; -+ -+ default: -+ hlsl_fixme(ctx, &expr->node.loc, "SM4 less-than between \"%s\" operands.", -+ debug_hlsl_type(ctx, src_type)); -+ return false; -+ } -+ -+ case HLSL_OP2_LOGIC_AND: -+ VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_AND, 0, 0, true); -+ return true; -+ -+ case HLSL_OP2_LOGIC_OR: -+ VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_OR, 0, 0, true); -+ return true; -+ -+ case HLSL_OP2_LSHIFT: -+ VKD3D_ASSERT(type_is_integer(dst_type)); -+ VKD3D_ASSERT(dst_type->e.numeric.type != HLSL_TYPE_BOOL); -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_ISHL, 0, 0, true); -+ return true; -+ -+ case HLSL_OP3_MAD: -+ switch (dst_type->e.numeric.type) -+ { -+ case HLSL_TYPE_FLOAT: -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MAD, 0, 0, true); -+ return true; -+ -+ case HLSL_TYPE_INT: -+ case HLSL_TYPE_UINT: -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_IMAD, 0, 0, true); -+ return true; -+ -+ default: -+ hlsl_fixme(ctx, &expr->node.loc, "SM4 %s MAD expression.", dst_type_name); -+ return false; -+ } -+ -+ case HLSL_OP2_MAX: -+ switch (dst_type->e.numeric.type) -+ { -+ case HLSL_TYPE_FLOAT: -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MAX, 0, 0, true); -+ return true; -+ -+ case HLSL_TYPE_INT: -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_IMAX, 0, 0, true); -+ return true; -+ -+ case HLSL_TYPE_UINT: -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_UMAX, 0, 0, true); -+ return true; -+ -+ default: -+ hlsl_fixme(ctx, &expr->node.loc, "SM4 %s maximum expression.", dst_type_name); -+ return false; -+ } -+ -+ case HLSL_OP2_MIN: -+ switch (dst_type->e.numeric.type) -+ { -+ case HLSL_TYPE_FLOAT: -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MIN, 0, 0, true); -+ return true; -+ -+ case HLSL_TYPE_INT: -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_IMIN, 0, 0, true); -+ return true; -+ -+ case HLSL_TYPE_UINT: -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_UMIN, 0, 0, true); -+ return true; -+ -+ default: -+ hlsl_fixme(ctx, &expr->node.loc, "SM4 %s minimum expression.", dst_type_name); -+ return false; -+ } -+ -+ case HLSL_OP2_MOD: -+ switch (dst_type->e.numeric.type) -+ { -+ case HLSL_TYPE_UINT: -+ sm4_generate_vsir_expr_with_two_destinations(ctx, program, VKD3DSIH_UDIV, expr, 1); -+ return true; -+ -+ default: -+ hlsl_fixme(ctx, &expr->node.loc, "SM4 %s modulus expression.", dst_type_name); -+ return false; -+ } -+ -+ case HLSL_OP2_MUL: -+ switch (dst_type->e.numeric.type) -+ { -+ case HLSL_TYPE_FLOAT: -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MUL, 0, 0, true); -+ return true; -+ -+ case HLSL_TYPE_INT: -+ case HLSL_TYPE_UINT: -+ /* Using IMUL instead of UMUL because we're taking the low -+ * bits, and the native compiler generates IMUL. */ -+ sm4_generate_vsir_expr_with_two_destinations(ctx, program, VKD3DSIH_IMUL, expr, 1); -+ return true; -+ -+ default: -+ hlsl_fixme(ctx, &expr->node.loc, "SM4 %s multiplication expression.", dst_type_name); -+ return false; -+ } -+ -+ case HLSL_OP2_NEQUAL: -+ VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); -+ -+ switch (src_type->e.numeric.type) -+ { -+ case HLSL_TYPE_FLOAT: -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_NEU, 0, 0, true); -+ return true; -+ -+ case HLSL_TYPE_BOOL: -+ case HLSL_TYPE_INT: -+ case HLSL_TYPE_UINT: -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_INE, 0, 0, true); -+ return true; -+ -+ default: -+ hlsl_fixme(ctx, &expr->node.loc, "SM4 inequality between \"%s\" operands.", -+ debug_hlsl_type(ctx, src_type)); -+ return false; -+ } -+ -+ case HLSL_OP2_RSHIFT: -+ VKD3D_ASSERT(type_is_integer(dst_type)); -+ VKD3D_ASSERT(dst_type->e.numeric.type != HLSL_TYPE_BOOL); -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, -+ dst_type->e.numeric.type == HLSL_TYPE_INT ? VKD3DSIH_ISHR : VKD3DSIH_USHR, 0, 0, true); -+ return true; -+ -+ case HLSL_OP3_TERNARY: -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOVC, 0, 0, true); -+ return true; -+ -+ default: -+ hlsl_fixme(ctx, &expr->node.loc, "SM4 %s expression.", debug_hlsl_expr_op(expr->op)); -+ return false; -+ } -+} -+ -+static bool sm4_generate_vsir_instr_store(struct hlsl_ctx *ctx, -+ struct vsir_program *program, struct hlsl_ir_store *store) -+{ -+ struct hlsl_ir_node *instr = &store->node; -+ struct vkd3d_shader_dst_param *dst_param; -+ struct vkd3d_shader_src_param *src_param; -+ struct vkd3d_shader_instruction *ins; -+ -+ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_MOV, 1, 1))) -+ return false; -+ -+ dst_param = &ins->dst[0]; -+ if (!sm4_generate_vsir_init_dst_param_from_deref(ctx, program, -+ dst_param, &store->lhs, &instr->loc, store->writemask)) -+ return false; -+ -+ src_param = &ins->src[0]; -+ vsir_src_from_hlsl_node(src_param, ctx, store->rhs.node, dst_param->write_mask); -+ -+ return true; -+} -+ -+/* Does this variable's data come directly from the API user, rather than -+ * being temporary or from a previous shader stage? I.e. is it a uniform or -+ * VS input? */ -+static bool var_is_user_input(const struct vkd3d_shader_version *version, const struct hlsl_ir_var *var) -+{ -+ if (var->is_uniform) -+ return true; -+ -+ return var->is_input_semantic && version->type == VKD3D_SHADER_TYPE_VERTEX; -+} -+ -+static bool sm4_generate_vsir_instr_load(struct hlsl_ctx *ctx, struct vsir_program *program, struct hlsl_ir_load *load) -+{ -+ const struct vkd3d_shader_version *version = &program->shader_version; -+ const struct hlsl_type *type = load->node.data_type; -+ struct vkd3d_shader_dst_param *dst_param; -+ struct hlsl_ir_node *instr = &load->node; -+ struct vkd3d_shader_instruction *ins; -+ struct hlsl_constant_value value; -+ -+ VKD3D_ASSERT(hlsl_is_numeric_type(type)); -+ if (type->e.numeric.type == HLSL_TYPE_BOOL && var_is_user_input(version, load->src.var)) -+ { -+ /* Uniform bools can be specified as anything, but internal bools -+ * always have 0 for false and ~0 for true. Normalise that here. */ -+ -+ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_MOVC, 1, 3))) -+ return false; -+ -+ dst_param = &ins->dst[0]; -+ vsir_dst_from_hlsl_node(dst_param, ctx, instr); -+ -+ if (!sm4_generate_vsir_init_src_param_from_deref(ctx, program, -+ &ins->src[0], &load->src, dst_param->write_mask, &instr->loc)) -+ return false; -+ -+ memset(&value, 0xff, sizeof(value)); -+ vsir_src_from_hlsl_constant_value(&ins->src[1], ctx, &value, -+ VKD3D_DATA_UINT, type->dimx, dst_param->write_mask); -+ memset(&value, 0x00, sizeof(value)); -+ vsir_src_from_hlsl_constant_value(&ins->src[2], ctx, &value, -+ VKD3D_DATA_UINT, type->dimx, dst_param->write_mask); -+ } -+ else -+ { -+ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_MOV, 1, 1))) -+ return false; -+ -+ dst_param = &ins->dst[0]; -+ vsir_dst_from_hlsl_node(dst_param, ctx, instr); -+ -+ if (!sm4_generate_vsir_init_src_param_from_deref(ctx, program, -+ &ins->src[0], &load->src, dst_param->write_mask, &instr->loc)) -+ return false; -+ } -+ return true; -+} -+ -+static bool sm4_generate_vsir_instr_resource_store(struct hlsl_ctx *ctx, -+ struct vsir_program *program, struct hlsl_ir_resource_store *store) -+{ -+ struct hlsl_type *resource_type = hlsl_deref_get_type(ctx, &store->resource); -+ struct hlsl_ir_node *coords = store->coords.node, *value = store->value.node; -+ struct hlsl_ir_node *instr = &store->node; -+ struct vkd3d_shader_instruction *ins; -+ unsigned int writemask; - -- if (!vkd3d_array_reserve((void **)&signature->elements, &signature->elements_capacity, -- signature->element_count + 1, sizeof(*signature->elements))) -+ if (!store->resource.var->is_uniform) - { -- ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; -- return; -+ hlsl_fixme(ctx, &store->node.loc, "Store to non-uniform resource variable."); -+ return false; - } -- element = &signature->elements[signature->element_count++]; - -- if (!hlsl_sm1_register_from_semantic(&program->shader_version, -- var->semantic.name, var->semantic.index, output, &type, ®ister_index)) -+ if (resource_type->sampler_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER) - { -- unsigned int usage_idx; -- D3DDECLUSAGE usage; -- bool ret; -+ hlsl_fixme(ctx, &store->node.loc, "Structured buffers store is not implemented."); -+ return false; -+ } - -- register_index = var->regs[HLSL_REGSET_NUMERIC].id; -+ if (resource_type->sampler_dim == HLSL_SAMPLER_DIM_RAW_BUFFER) -+ { -+ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_STORE_RAW, 1, 2))) -+ return false; - -- ret = hlsl_sm1_usage_from_semantic(var->semantic.name, var->semantic.index, &usage, &usage_idx); -- VKD3D_ASSERT(ret); -- /* With the exception of vertex POSITION output, none of these are -- * system values. Pixel POSITION input is not equivalent to -- * SV_Position; the closer equivalent is VPOS, which is not declared -- * as a semantic. */ -- if (program->shader_version.type == VKD3D_SHADER_TYPE_VERTEX -- && output && usage == D3DDECLUSAGE_POSITION) -- sysval = VKD3D_SHADER_SV_POSITION; -+ writemask = vkd3d_write_mask_from_component_count(value->data_type->dimx); -+ if (!sm4_generate_vsir_init_dst_param_from_deref(ctx, program, -+ &ins->dst[0], &store->resource, &instr->loc, writemask)) -+ return false; -+ } -+ else -+ { -+ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_STORE_UAV_TYPED, 1, 2))) -+ return false; -+ -+ if (!sm4_generate_vsir_init_dst_param_from_deref(ctx, program, -+ &ins->dst[0], &store->resource, &instr->loc, VKD3DSP_WRITEMASK_ALL)) -+ return false; - } -- mask = (1 << var->data_type->dimx) - 1; - -- memset(element, 0, sizeof(*element)); -- if (!(element->semantic_name = vkd3d_strdup(var->semantic.name))) -+ vsir_src_from_hlsl_node(&ins->src[0], ctx, coords, VKD3DSP_WRITEMASK_ALL); -+ vsir_src_from_hlsl_node(&ins->src[1], ctx, value, VKD3DSP_WRITEMASK_ALL); -+ -+ return true; -+} -+ -+static void sm4_generate_vsir_block(struct hlsl_ctx *ctx, struct hlsl_block *block, struct vsir_program *program) -+{ -+ struct vkd3d_string_buffer *dst_type_string; -+ struct hlsl_ir_node *instr, *next; -+ struct hlsl_ir_switch_case *c; -+ -+ LIST_FOR_EACH_ENTRY_SAFE(instr, next, &block->instrs, struct hlsl_ir_node, entry) - { -- --signature->element_count; -- ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; -- return; -+ if (instr->data_type) -+ { -+ if (instr->data_type->class != HLSL_CLASS_SCALAR && instr->data_type->class != HLSL_CLASS_VECTOR) -+ { -+ hlsl_fixme(ctx, &instr->loc, "Class %#x should have been lowered or removed.", instr->data_type->class); -+ break; -+ } -+ } -+ -+ switch (instr->type) -+ { -+ case HLSL_IR_CALL: -+ vkd3d_unreachable(); -+ -+ case HLSL_IR_CONSTANT: -+ /* In SM4 all constants are inlined. */ -+ break; -+ -+ case HLSL_IR_EXPR: -+ if (!(dst_type_string = hlsl_type_to_string(ctx, instr->data_type))) -+ break; -+ -+ if (sm4_generate_vsir_instr_expr(ctx, program, hlsl_ir_expr(instr), dst_type_string->buffer)) -+ replace_instr_with_last_vsir_instr(ctx, program, instr); -+ -+ hlsl_release_string_buffer(ctx, dst_type_string); -+ break; -+ -+ case HLSL_IR_IF: -+ sm4_generate_vsir_block(ctx, &hlsl_ir_if(instr)->then_block, program); -+ sm4_generate_vsir_block(ctx, &hlsl_ir_if(instr)->else_block, program); -+ break; -+ -+ case HLSL_IR_LOAD: -+ if (sm4_generate_vsir_instr_load(ctx, program, hlsl_ir_load(instr))) -+ replace_instr_with_last_vsir_instr(ctx, program, instr); -+ break; -+ -+ case HLSL_IR_LOOP: -+ sm4_generate_vsir_block(ctx, &hlsl_ir_loop(instr)->body, program); -+ break; -+ -+ case HLSL_IR_RESOURCE_STORE: -+ if (sm4_generate_vsir_instr_resource_store(ctx, program, hlsl_ir_resource_store(instr))) -+ replace_instr_with_last_vsir_instr(ctx, program, instr); -+ break; -+ -+ case HLSL_IR_STORE: -+ if (sm4_generate_vsir_instr_store(ctx, program, hlsl_ir_store(instr))) -+ replace_instr_with_last_vsir_instr(ctx, program, instr); -+ break; -+ -+ case HLSL_IR_SWITCH: -+ LIST_FOR_EACH_ENTRY(c, &hlsl_ir_switch(instr)->cases, struct hlsl_ir_switch_case, entry) -+ sm4_generate_vsir_block(ctx, &c->body, program); -+ break; -+ -+ case HLSL_IR_SWIZZLE: -+ generate_vsir_instr_swizzle(ctx, program, hlsl_ir_swizzle(instr)); -+ replace_instr_with_last_vsir_instr(ctx, program, instr); -+ break; -+ -+ default: -+ break; -+ } - } -- element->semantic_index = var->semantic.index; -- element->sysval_semantic = sysval; -- element->component_type = VKD3D_SHADER_COMPONENT_FLOAT; -- element->register_index = register_index; -- element->target_location = register_index; -- element->register_count = 1; -- element->mask = mask; -- element->used_mask = mask; -- if (program->shader_version.type == VKD3D_SHADER_TYPE_PIXEL && !output) -- element->interpolation_mode = VKD3DSIM_LINEAR; - } - --static void sm1_generate_vsir_signature(struct hlsl_ctx *ctx, struct vsir_program *program) -+static void sm4_generate_vsir_add_function(struct hlsl_ctx *ctx, -+ struct hlsl_ir_function_decl *func, uint64_t config_flags, struct vsir_program *program) - { -+ bool is_patch_constant_func = func == ctx->patch_constant_func; -+ struct hlsl_block block = {0}; -+ struct hlsl_scope *scope; - struct hlsl_ir_var *var; -+ uint32_t temp_count; - -- LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) -+ compute_liveness(ctx, func); -+ mark_indexable_vars(ctx, func); -+ temp_count = allocate_temp_registers(ctx, func); -+ if (ctx->result) -+ return; -+ program->temp_count = max(program->temp_count, temp_count); -+ -+ hlsl_block_init(&block); -+ -+ LIST_FOR_EACH_ENTRY(var, &func->extern_vars, struct hlsl_ir_var, extern_entry) - { -- if (var->is_input_semantic) -- sm1_generate_vsir_signature_entry(ctx, program, false, var); -- if (var->is_output_semantic) -- sm1_generate_vsir_signature_entry(ctx, program, true, var); -+ if ((var->is_input_semantic && var->last_read) -+ || (var->is_output_semantic && var->first_write)) -+ sm4_generate_vsir_instr_dcl_semantic(ctx, program, var, is_patch_constant_func, &block, &var->loc); -+ } -+ -+ if (temp_count) -+ sm4_generate_vsir_instr_dcl_temps(ctx, program, temp_count, &block, &func->loc); -+ -+ LIST_FOR_EACH_ENTRY(scope, &ctx->scopes, struct hlsl_scope, entry) -+ { -+ LIST_FOR_EACH_ENTRY(var, &scope->vars, struct hlsl_ir_var, scope_entry) -+ { -+ if (var->is_uniform || var->is_input_semantic || var->is_output_semantic) -+ continue; -+ if (!var->regs[HLSL_REGSET_NUMERIC].allocated) -+ continue; -+ -+ if (var->indexable) -+ { -+ unsigned int id = var->regs[HLSL_REGSET_NUMERIC].id; -+ unsigned int size = align(var->data_type->reg_size[HLSL_REGSET_NUMERIC], 4) / 4; -+ -+ sm4_generate_vsir_instr_dcl_indexable_temp(ctx, program, &block, id, size, 4, &var->loc); -+ } -+ } - } -+ -+ list_move_head(&func->body.instrs, &block.instrs); -+ -+ hlsl_block_cleanup(&block); -+ -+ sm4_generate_vsir_block(ctx, &func->body, program); - } - - /* OBJECTIVE: Translate all the information from ctx and entry_func to the -- * vsir_program and ctab blob, so they can be used as input to d3dbc_compile() -- * without relying on ctx and entry_func. */ --static void sm1_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, -- uint64_t config_flags, struct vsir_program *program, struct vkd3d_shader_code *ctab) -+ * vsir_program, so it can be used as input to tpf_compile() without relying -+ * on ctx and entry_func. */ -+static void sm4_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func, -+ uint64_t config_flags, struct vsir_program *program) - { - struct vkd3d_shader_version version = {0}; -- struct vkd3d_bytecode_buffer buffer = {0}; - - version.major = ctx->profile->major_version; - version.minor = ctx->profile->minor_version; - version.type = ctx->profile->type; -- if (!vsir_program_init(program, NULL, &version, 0)) -+ -+ if (!vsir_program_init(program, NULL, &version, 0, VSIR_CF_STRUCTURED, VSIR_NOT_NORMALISED)) - { - ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; - return; - } - -- write_sm1_uniforms(ctx, &buffer); -- if (buffer.status) -+ generate_vsir_signature(ctx, program, func); -+ if (version.type == VKD3D_SHADER_TYPE_HULL) -+ generate_vsir_signature(ctx, program, ctx->patch_constant_func); -+ -+ if (version.type == VKD3D_SHADER_TYPE_COMPUTE) - { -- vkd3d_free(buffer.data); -- ctx->result = buffer.status; -- return; -+ program->thread_group_size.x = ctx->thread_count[0]; -+ program->thread_group_size.y = ctx->thread_count[1]; -+ program->thread_group_size.z = ctx->thread_count[2]; - } -- ctab->code = buffer.data; -- ctab->size = buffer.size; - -- sm1_generate_vsir_signature(ctx, program); -+ sm4_generate_vsir_add_function(ctx, func, config_flags, program); -+ if (version.type == VKD3D_SHADER_TYPE_HULL) -+ sm4_generate_vsir_add_function(ctx, ctx->patch_constant_func, config_flags, program); - } - - static struct hlsl_ir_jump *loop_unrolling_find_jump(struct hlsl_block *block, struct hlsl_ir_node *stop_point, -@@ -6337,16 +9331,95 @@ static bool lower_f16tof32(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, stru - return true; - } - --int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, -- enum vkd3d_shader_target_type target_type, struct vkd3d_shader_code *out) -+static bool lower_f32tof16(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, struct hlsl_block *block) -+{ -+ struct hlsl_ir_node *call, *rhs, *store; -+ struct hlsl_ir_function_decl *func; -+ unsigned int component_count; -+ struct hlsl_ir_load *load; -+ struct hlsl_ir_expr *expr; -+ struct hlsl_ir_var *lhs; -+ char *body; -+ -+ static const char template[] = -+ "typedef uint%u uintX;\n" -+ "uintX soft_f32tof16(float%u x)\n" -+ "{\n" -+ " uintX v = asuint(x);\n" -+ " uintX v_abs = v & 0x7fffffff;\n" -+ " uintX sign_bit = (v >> 16) & 0x8000;\n" -+ " uintX exp = (v >> 23) & 0xff;\n" -+ " uintX mantissa = v & 0x7fffff;\n" -+ " uintX nan16;\n" -+ " uintX nan = (v & 0x7f800000) == 0x7f800000;\n" -+ " uintX val;\n" -+ "\n" -+ " val = 113 - exp;\n" -+ " val = (mantissa + 0x800000) >> val;\n" -+ " val >>= 13;\n" -+ "\n" -+ " val = (exp - 127) < -38 ? 0 : val;\n" -+ "\n" -+ " val = v_abs < 0x38800000 ? val : (v_abs + 0xc8000000) >> 13;\n" -+ " val = v_abs > 0x47ffe000 ? 0x7bff : val;\n" -+ "\n" -+ " nan16 = (((v >> 13) | (v >> 3) | v) & 0x3ff) + 0x7c00;\n" -+ " val = nan ? nan16 : val;\n" -+ "\n" -+ " return (val & 0x7fff) + sign_bit;\n" -+ "}\n"; -+ -+ if (node->type != HLSL_IR_EXPR) -+ return false; -+ -+ expr = hlsl_ir_expr(node); -+ -+ if (expr->op != HLSL_OP1_F32TOF16) -+ return false; -+ -+ rhs = expr->operands[0].node; -+ component_count = hlsl_type_component_count(rhs->data_type); -+ -+ if (!(body = hlsl_sprintf_alloc(ctx, template, component_count, component_count))) -+ return false; -+ -+ if (!(func = hlsl_compile_internal_function(ctx, "soft_f32tof16", body))) -+ return false; -+ -+ lhs = func->parameters.vars[0]; -+ -+ if (!(store = hlsl_new_simple_store(ctx, lhs, rhs))) -+ return false; -+ hlsl_block_add_instr(block, store); -+ -+ if (!(call = hlsl_new_call(ctx, func, &node->loc))) -+ return false; -+ hlsl_block_add_instr(block, call); -+ -+ if (!(load = hlsl_new_var_load(ctx, func->return_var, &node->loc))) -+ return false; -+ hlsl_block_add_instr(block, &load->node); -+ -+ return true; -+} -+ -+static void process_entry_function(struct hlsl_ctx *ctx, -+ const struct hlsl_block *global_uniform_block, struct hlsl_ir_function_decl *entry_func) - { - const struct hlsl_profile_info *profile = ctx->profile; -+ struct hlsl_block static_initializers, global_uniforms; - struct hlsl_block *const body = &entry_func->body; - struct recursive_call_ctx recursive_call_ctx; - struct hlsl_ir_var *var; - unsigned int i; - -- list_move_head(&body->instrs, &ctx->static_initializers.instrs); -+ if (!hlsl_clone_block(ctx, &static_initializers, &ctx->static_initializers)) -+ return; -+ list_move_head(&body->instrs, &static_initializers.instrs); -+ -+ if (!hlsl_clone_block(ctx, &global_uniforms, global_uniform_block)) -+ return; -+ list_move_head(&body->instrs, &global_uniforms.instrs); - - memset(&recursive_call_ctx, 0, sizeof(recursive_call_ctx)); - hlsl_transform_ir(ctx, find_recursive_calls, body, &recursive_call_ctx); -@@ -6355,10 +9428,13 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry - /* Avoid going into an infinite loop when processing call instructions. - * lower_return() recurses into inferior calls. */ - if (ctx->result) -- return ctx->result; -+ return; - - if (hlsl_version_ge(ctx, 4, 0) && hlsl_version_lt(ctx, 5, 0)) -+ { - lower_ir(ctx, lower_f16tof32, body); -+ lower_ir(ctx, lower_f32tof16, body); -+ } - - lower_return(ctx, entry_func, body, false); - -@@ -6367,20 +9443,22 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry - lower_ir(ctx, lower_matrix_swizzles, body); - lower_ir(ctx, lower_index_loads, body); - -- LIST_FOR_EACH_ENTRY(var, &ctx->globals->vars, struct hlsl_ir_var, scope_entry) -- { -- if (var->storage_modifiers & HLSL_STORAGE_UNIFORM) -- prepend_uniform_copy(ctx, body, var); -- } -- - for (i = 0; i < entry_func->parameters.count; ++i) - { - var = entry_func->parameters.vars[i]; - -- if (hlsl_type_is_resource(var->data_type) || (var->storage_modifiers & HLSL_STORAGE_UNIFORM)) -+ if (hlsl_type_is_resource(var->data_type)) - { - prepend_uniform_copy(ctx, body, var); - } -+ else if ((var->storage_modifiers & HLSL_STORAGE_UNIFORM)) -+ { -+ if (ctx->profile->type == VKD3D_SHADER_TYPE_HULL && entry_func == ctx->patch_constant_func) -+ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, -+ "Patch constant function parameter \"%s\" cannot be uniform.", var->name); -+ else -+ prepend_uniform_copy(ctx, body, var); -+ } - else - { - if (hlsl_get_multiarray_element_type(var->data_type)->class != HLSL_CLASS_STRUCT -@@ -6392,9 +9470,9 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry - } - - if (var->storage_modifiers & HLSL_STORAGE_IN) -- prepend_input_var_copy(ctx, body, var); -+ prepend_input_var_copy(ctx, entry_func, var); - if (var->storage_modifiers & HLSL_STORAGE_OUT) -- append_output_var_copy(ctx, body, var); -+ append_output_var_copy(ctx, entry_func, var); - } - } - if (entry_func->return_var) -@@ -6403,28 +9481,17 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry - hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_SEMANTIC, - "Entry point \"%s\" is missing a return value semantic.", entry_func->func->name); - -- append_output_var_copy(ctx, body, entry_func->return_var); -- } -- -- for (i = 0; i < entry_func->attr_count; ++i) -- { -- const struct hlsl_attribute *attr = entry_func->attrs[i]; -- -- if (!strcmp(attr->name, "numthreads") && profile->type == VKD3D_SHADER_TYPE_COMPUTE) -- parse_numthreads_attribute(ctx, attr); -- else -- hlsl_warning(ctx, &entry_func->attrs[i]->loc, VKD3D_SHADER_WARNING_HLSL_UNKNOWN_ATTRIBUTE, -- "Ignoring unknown attribute \"%s\".", entry_func->attrs[i]->name); -+ append_output_var_copy(ctx, entry_func, entry_func->return_var); - } - -- if (profile->type == VKD3D_SHADER_TYPE_COMPUTE && !ctx->found_numthreads) -- hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_ATTRIBUTE, -- "Entry point \"%s\" is missing a [numthreads] attribute.", entry_func->func->name); -- - if (profile->major_version >= 4) - { - hlsl_transform_ir(ctx, lower_discard_neg, body, NULL); - } -+ else -+ { -+ hlsl_transform_ir(ctx, lower_discard_nz, body, NULL); -+ } - - transform_unroll_loops(ctx, body); - hlsl_run_const_passes(ctx, body); -@@ -6496,29 +9563,72 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry - while (hlsl_transform_ir(ctx, dce, body, NULL)); - - compute_liveness(ctx, entry_func); -+ mark_vars_usage(ctx); - -- if (TRACE_ON()) -- rb_for_each_entry(&ctx->functions, dump_function, ctx); -+ calculate_resource_register_counts(ctx); - -- transform_derefs(ctx, mark_indexable_vars, body); -+ allocate_register_reservations(ctx, &ctx->extern_vars); -+ allocate_register_reservations(ctx, &entry_func->extern_vars); -+ allocate_semantic_registers(ctx, entry_func); -+} - -- calculate_resource_register_counts(ctx); -+int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, -+ enum vkd3d_shader_target_type target_type, struct vkd3d_shader_code *out) -+{ -+ const struct hlsl_profile_info *profile = ctx->profile; -+ struct hlsl_block global_uniform_block; -+ struct hlsl_ir_var *var; -+ -+ parse_entry_function_attributes(ctx, entry_func); -+ if (ctx->result) -+ return ctx->result; -+ -+ if (profile->type == VKD3D_SHADER_TYPE_HULL) -+ validate_hull_shader_attributes(ctx, entry_func); -+ else if (profile->type == VKD3D_SHADER_TYPE_COMPUTE && !ctx->found_numthreads) -+ hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_ATTRIBUTE, -+ "Entry point \"%s\" is missing a [numthreads] attribute.", entry_func->func->name); -+ else if (profile->type == VKD3D_SHADER_TYPE_DOMAIN && ctx->domain == VKD3D_TESSELLATOR_DOMAIN_INVALID) -+ hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_ATTRIBUTE, -+ "Entry point \"%s\" is missing a [domain] attribute.", entry_func->func->name); -+ -+ hlsl_block_init(&global_uniform_block); -+ -+ LIST_FOR_EACH_ENTRY(var, &ctx->globals->vars, struct hlsl_ir_var, scope_entry) -+ { -+ if (var->storage_modifiers & HLSL_STORAGE_UNIFORM) -+ prepend_uniform_copy(ctx, &global_uniform_block, var); -+ } -+ -+ process_entry_function(ctx, &global_uniform_block, entry_func); -+ if (ctx->result) -+ return ctx->result; - -- allocate_register_reservations(ctx); -+ if (profile->type == VKD3D_SHADER_TYPE_HULL) -+ { -+ process_entry_function(ctx, &global_uniform_block, ctx->patch_constant_func); -+ if (ctx->result) -+ return ctx->result; -+ } -+ -+ hlsl_block_cleanup(&global_uniform_block); - -- allocate_temp_registers(ctx, entry_func); - if (profile->major_version < 4) - { -+ mark_indexable_vars(ctx, entry_func); -+ allocate_temp_registers(ctx, entry_func); - allocate_const_registers(ctx, entry_func); - } - else - { - allocate_buffers(ctx); -- allocate_objects(ctx, HLSL_REGSET_TEXTURES); -- allocate_objects(ctx, HLSL_REGSET_UAVS); -+ allocate_objects(ctx, entry_func, HLSL_REGSET_TEXTURES); -+ allocate_objects(ctx, entry_func, HLSL_REGSET_UAVS); - } -- allocate_semantic_registers(ctx); -- allocate_objects(ctx, HLSL_REGSET_SAMPLERS); -+ allocate_objects(ctx, entry_func, HLSL_REGSET_SAMPLERS); -+ -+ if (TRACE_ON()) -+ rb_for_each_entry(&ctx->functions, dump_function, ctx); - - if (ctx->result) - return ctx->result; -@@ -6540,14 +9650,29 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry - return ctx->result; - } - -- result = d3dbc_compile(&program, config_flags, NULL, &ctab, out, ctx->message_context, ctx, entry_func); -+ result = d3dbc_compile(&program, config_flags, NULL, &ctab, out, ctx->message_context); - vsir_program_cleanup(&program); - vkd3d_shader_free_shader_code(&ctab); - return result; - } - - case VKD3D_SHADER_TARGET_DXBC_TPF: -- return hlsl_sm4_write(ctx, entry_func, out); -+ { -+ uint32_t config_flags = vkd3d_shader_init_config_flags(); -+ struct vsir_program program; -+ int result; -+ -+ sm4_generate_vsir(ctx, entry_func, config_flags, &program); -+ if (ctx->result) -+ { -+ vsir_program_cleanup(&program); -+ return ctx->result; -+ } -+ -+ result = tpf_compile(&program, config_flags, out, ctx->message_context, ctx, entry_func); -+ vsir_program_cleanup(&program); -+ return result; -+ } - - default: - ERR("Unsupported shader target type %#x.\n", target_type); -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c -index db4913b7c62..716adb15f08 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c -@@ -1452,11 +1452,15 @@ static bool constant_is_one(struct hlsl_ir_constant *const_arg) - - case HLSL_TYPE_UINT: - case HLSL_TYPE_INT: -- case HLSL_TYPE_BOOL: - if (const_arg->value.u[k].u != 1) - return false; - break; - -+ case HLSL_TYPE_BOOL: -+ if (const_arg->value.u[k].u != ~0) -+ return false; -+ break; -+ - default: - return false; - } -@@ -1514,6 +1518,20 @@ bool hlsl_fold_constant_identities(struct hlsl_ctx *ctx, struct hlsl_ir_node *in - res_node = mut_arg; - break; - -+ case HLSL_OP2_LOGIC_AND: -+ if (constant_is_zero(const_arg)) -+ res_node = &const_arg->node; -+ else if (constant_is_one(const_arg)) -+ res_node = mut_arg; -+ break; -+ -+ case HLSL_OP2_LOGIC_OR: -+ if (constant_is_zero(const_arg)) -+ res_node = mut_arg; -+ else if (constant_is_one(const_arg)) -+ res_node = &const_arg->node; -+ break; -+ - default: - break; - } -diff --git a/libs/vkd3d/libs/vkd3d-shader/ir.c b/libs/vkd3d/libs/vkd3d-shader/ir.c -index 747238e2fee..56c98d30661 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/ir.c -+++ b/libs/vkd3d/libs/vkd3d-shader/ir.c -@@ -19,6 +19,15 @@ - #include "vkd3d_shader_private.h" - #include "vkd3d_types.h" - -+struct vsir_transformation_context -+{ -+ enum vkd3d_result result; -+ struct vsir_program *program; -+ uint64_t config_flags; -+ const struct vkd3d_shader_compile_info *compile_info; -+ struct vkd3d_shader_message_context *message_context; -+}; -+ - static int convert_parameter_info(const struct vkd3d_shader_compile_info *compile_info, - unsigned int *ret_count, const struct vkd3d_shader_parameter1 **ret_parameters) - { -@@ -65,7 +74,8 @@ static int convert_parameter_info(const struct vkd3d_shader_compile_info *compil - } - - bool vsir_program_init(struct vsir_program *program, const struct vkd3d_shader_compile_info *compile_info, -- const struct vkd3d_shader_version *version, unsigned int reserve) -+ const struct vkd3d_shader_version *version, unsigned int reserve, enum vsir_control_flow_type cf_type, -+ enum vsir_normalisation_level normalisation_level) - { - memset(program, 0, sizeof(*program)); - -@@ -87,6 +97,8 @@ bool vsir_program_init(struct vsir_program *program, const struct vkd3d_shader_c - } - - program->shader_version = *version; -+ program->cf_type = cf_type; -+ program->normalisation_level = normalisation_level; - return shader_instruction_array_init(&program->instructions, reserve); - } - -@@ -117,26 +129,204 @@ const struct vkd3d_shader_parameter1 *vsir_program_get_parameter( - return NULL; - } - -+static struct signature_element *vsir_signature_find_element_by_name( -+ const struct shader_signature *signature, const char *semantic_name, unsigned int semantic_index) -+{ -+ for (unsigned int i = 0; i < signature->element_count; ++i) -+ { -+ if (!ascii_strcasecmp(signature->elements[i].semantic_name, semantic_name) -+ && signature->elements[i].semantic_index == semantic_index) -+ return &signature->elements[i]; -+ } -+ -+ return NULL; -+} -+ -+bool vsir_signature_find_sysval(const struct shader_signature *signature, -+ enum vkd3d_shader_sysval_semantic sysval, unsigned int semantic_index, unsigned int *element_index) -+{ -+ const struct signature_element *e; -+ unsigned int i; -+ -+ for (i = 0; i < signature->element_count; ++i) -+ { -+ e = &signature->elements[i]; -+ if (e->sysval_semantic == sysval && e->semantic_index == semantic_index) -+ { -+ *element_index = i; -+ return true; -+ } -+ } -+ -+ return false; -+} -+ -+void vsir_register_init(struct vkd3d_shader_register *reg, enum vkd3d_shader_register_type reg_type, -+ enum vkd3d_data_type data_type, unsigned int idx_count) -+{ -+ reg->type = reg_type; -+ reg->precision = VKD3D_SHADER_REGISTER_PRECISION_DEFAULT; -+ reg->non_uniform = false; -+ reg->data_type = data_type; -+ reg->idx[0].offset = ~0u; -+ reg->idx[0].rel_addr = NULL; -+ reg->idx[0].is_in_bounds = false; -+ reg->idx[1].offset = ~0u; -+ reg->idx[1].rel_addr = NULL; -+ reg->idx[1].is_in_bounds = false; -+ reg->idx[2].offset = ~0u; -+ reg->idx[2].rel_addr = NULL; -+ reg->idx[2].is_in_bounds = false; -+ reg->idx_count = idx_count; -+ reg->dimension = VSIR_DIMENSION_SCALAR; -+ reg->alignment = 0; -+} -+ - static inline bool shader_register_is_phase_instance_id(const struct vkd3d_shader_register *reg) - { - return reg->type == VKD3DSPR_FORKINSTID || reg->type == VKD3DSPR_JOININSTID; - } - --static bool vsir_instruction_is_dcl(const struct vkd3d_shader_instruction *instruction) -+void vsir_src_param_init(struct vkd3d_shader_src_param *param, enum vkd3d_shader_register_type reg_type, -+ enum vkd3d_data_type data_type, unsigned int idx_count) - { -- enum vkd3d_shader_opcode opcode = instruction->opcode; -- return (VKD3DSIH_DCL <= opcode && opcode <= VKD3DSIH_DCL_VERTICES_OUT) -- || opcode == VKD3DSIH_HS_DECLS; -+ vsir_register_init(¶m->reg, reg_type, data_type, idx_count); -+ param->swizzle = 0; -+ param->modifiers = VKD3DSPSM_NONE; - } - --static void vkd3d_shader_instruction_make_nop(struct vkd3d_shader_instruction *ins) -+static void src_param_init_const_uint(struct vkd3d_shader_src_param *src, uint32_t value) - { -- struct vkd3d_shader_location location = ins->location; -+ vsir_src_param_init(src, VKD3DSPR_IMMCONST, VKD3D_DATA_UINT, 0); -+ src->reg.u.immconst_u32[0] = value; -+} - -- vsir_instruction_init(ins, &location, VKD3DSIH_NOP); -+void vsir_src_param_init_label(struct vkd3d_shader_src_param *param, unsigned int label_id) -+{ -+ vsir_src_param_init(param, VKD3DSPR_LABEL, VKD3D_DATA_UNUSED, 1); -+ param->reg.dimension = VSIR_DIMENSION_NONE; -+ param->reg.idx[0].offset = label_id; -+} -+ -+static void src_param_init_parameter(struct vkd3d_shader_src_param *src, uint32_t idx, enum vkd3d_data_type type) -+{ -+ vsir_src_param_init(src, VKD3DSPR_PARAMETER, type, 1); -+ src->reg.idx[0].offset = idx; -+} -+ -+static void vsir_src_param_init_resource(struct vkd3d_shader_src_param *src, unsigned int id, unsigned int idx) -+{ -+ vsir_src_param_init(src, VKD3DSPR_RESOURCE, VKD3D_DATA_UNUSED, 2); -+ src->reg.idx[0].offset = id; -+ src->reg.idx[1].offset = idx; -+ src->reg.dimension = VSIR_DIMENSION_VEC4; -+ src->swizzle = VKD3D_SHADER_NO_SWIZZLE; -+} -+ -+static void vsir_src_param_init_sampler(struct vkd3d_shader_src_param *src, unsigned int id, unsigned int idx) -+{ -+ vsir_src_param_init(src, VKD3DSPR_SAMPLER, VKD3D_DATA_UNUSED, 2); -+ src->reg.idx[0].offset = id; -+ src->reg.idx[1].offset = idx; -+ src->reg.dimension = VSIR_DIMENSION_NONE; -+} -+ -+static void src_param_init_ssa_bool(struct vkd3d_shader_src_param *src, unsigned int idx) -+{ -+ vsir_src_param_init(src, VKD3DSPR_SSA, VKD3D_DATA_BOOL, 1); -+ src->reg.idx[0].offset = idx; -+} -+ -+static void src_param_init_ssa_float(struct vkd3d_shader_src_param *src, unsigned int idx) -+{ -+ vsir_src_param_init(src, VKD3DSPR_SSA, VKD3D_DATA_FLOAT, 1); -+ src->reg.idx[0].offset = idx; -+} -+ -+static void src_param_init_temp_bool(struct vkd3d_shader_src_param *src, unsigned int idx) -+{ -+ vsir_src_param_init(src, VKD3DSPR_TEMP, VKD3D_DATA_BOOL, 1); -+ src->reg.idx[0].offset = idx; -+} -+ -+static void src_param_init_temp_float(struct vkd3d_shader_src_param *src, unsigned int idx) -+{ -+ vsir_src_param_init(src, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); -+ src->reg.idx[0].offset = idx; -+} -+ -+static void src_param_init_temp_float4(struct vkd3d_shader_src_param *src, unsigned int idx) -+{ -+ vsir_src_param_init(src, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); -+ src->reg.dimension = VSIR_DIMENSION_VEC4; -+ src->swizzle = VKD3D_SHADER_NO_SWIZZLE; -+ src->reg.idx[0].offset = idx; -+} -+ -+static void src_param_init_temp_uint(struct vkd3d_shader_src_param *src, unsigned int idx) -+{ -+ vsir_src_param_init(src, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1); -+ src->reg.idx[0].offset = idx; -+} -+ -+void vsir_dst_param_init(struct vkd3d_shader_dst_param *param, enum vkd3d_shader_register_type reg_type, -+ enum vkd3d_data_type data_type, unsigned int idx_count) -+{ -+ vsir_register_init(¶m->reg, reg_type, data_type, idx_count); -+ param->write_mask = VKD3DSP_WRITEMASK_0; -+ param->modifiers = VKD3DSPDM_NONE; -+ param->shift = 0; -+} -+ -+static void dst_param_init_ssa_bool(struct vkd3d_shader_dst_param *dst, unsigned int idx) -+{ -+ vsir_dst_param_init(dst, VKD3DSPR_SSA, VKD3D_DATA_BOOL, 1); -+ dst->reg.idx[0].offset = idx; -+} -+ -+static void dst_param_init_ssa_float(struct vkd3d_shader_dst_param *dst, unsigned int idx) -+{ -+ vsir_dst_param_init(dst, VKD3DSPR_SSA, VKD3D_DATA_FLOAT, 1); -+ dst->reg.idx[0].offset = idx; -+} -+ -+static void dst_param_init_temp_bool(struct vkd3d_shader_dst_param *dst, unsigned int idx) -+{ -+ vsir_dst_param_init(dst, VKD3DSPR_TEMP, VKD3D_DATA_BOOL, 1); -+ dst->reg.idx[0].offset = idx; -+} -+ -+static void dst_param_init_temp_float4(struct vkd3d_shader_dst_param *dst, unsigned int idx) -+{ -+ vsir_dst_param_init(dst, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); -+ dst->reg.idx[0].offset = idx; -+ dst->reg.dimension = VSIR_DIMENSION_VEC4; -+} -+ -+static void dst_param_init_temp_uint(struct vkd3d_shader_dst_param *dst, unsigned int idx) -+{ -+ vsir_dst_param_init(dst, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1); -+ dst->reg.idx[0].offset = idx; -+} -+ -+static void dst_param_init_output(struct vkd3d_shader_dst_param *dst, -+ enum vkd3d_data_type data_type, uint32_t idx, uint32_t write_mask) -+{ -+ vsir_dst_param_init(dst, VKD3DSPR_OUTPUT, data_type, 1); -+ dst->reg.idx[0].offset = idx; -+ dst->reg.dimension = VSIR_DIMENSION_VEC4; -+ dst->write_mask = write_mask; -+} -+ -+void vsir_instruction_init(struct vkd3d_shader_instruction *ins, const struct vkd3d_shader_location *location, -+ enum vkd3d_shader_opcode opcode) -+{ -+ memset(ins, 0, sizeof(*ins)); -+ ins->location = *location; -+ ins->opcode = opcode; - } - --static bool vsir_instruction_init_with_params(struct vsir_program *program, -+bool vsir_instruction_init_with_params(struct vsir_program *program, - struct vkd3d_shader_instruction *ins, const struct vkd3d_shader_location *location, - enum vkd3d_shader_opcode opcode, unsigned int dst_count, unsigned int src_count) - { -@@ -161,6 +351,37 @@ static bool vsir_instruction_init_with_params(struct vsir_program *program, - return true; - } - -+static bool vsir_instruction_init_label(struct vkd3d_shader_instruction *ins, -+ const struct vkd3d_shader_location *location, unsigned int label_id, struct vsir_program *program) -+{ -+ struct vkd3d_shader_src_param *src_param; -+ -+ if (!(src_param = vsir_program_get_src_params(program, 1))) -+ return false; -+ -+ vsir_src_param_init_label(src_param, label_id); -+ -+ vsir_instruction_init(ins, location, VKD3DSIH_LABEL); -+ ins->src = src_param; -+ ins->src_count = 1; -+ -+ return true; -+} -+ -+static bool vsir_instruction_is_dcl(const struct vkd3d_shader_instruction *instruction) -+{ -+ enum vkd3d_shader_opcode opcode = instruction->opcode; -+ return (VKD3DSIH_DCL <= opcode && opcode <= VKD3DSIH_DCL_VERTICES_OUT) -+ || opcode == VKD3DSIH_HS_DECLS; -+} -+ -+static void vkd3d_shader_instruction_make_nop(struct vkd3d_shader_instruction *ins) -+{ -+ struct vkd3d_shader_location location = ins->location; -+ -+ vsir_instruction_init(ins, &location, VKD3DSIH_NOP); -+} -+ - static bool get_opcode_from_rel_op(enum vkd3d_shader_rel_op rel_op, enum vkd3d_data_type data_type, - enum vkd3d_shader_opcode *opcode, bool *requires_swap) - { -@@ -441,10 +662,58 @@ static enum vkd3d_result vsir_program_lower_sm1_sincos(struct vsir_program *prog - return VKD3D_OK; - } - -+static enum vkd3d_result vsir_program_lower_tex(struct vsir_program *program, struct vkd3d_shader_instruction *tex) -+{ -+ unsigned int idx = tex->src[1].reg.idx[0].offset; -+ struct vkd3d_shader_src_param *srcs; -+ -+ VKD3D_ASSERT(tex->src[1].reg.idx_count == 1); -+ VKD3D_ASSERT(!tex->src[1].reg.idx[0].rel_addr); -+ -+ if (!(srcs = shader_src_param_allocator_get(&program->instructions.src_params, 3))) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ -+ srcs[0] = tex->src[0]; -+ vsir_src_param_init_resource(&srcs[1], idx, idx); -+ vsir_src_param_init_sampler(&srcs[2], idx, idx); -+ -+ tex->opcode = VKD3DSIH_SAMPLE; -+ tex->src = srcs; -+ tex->src_count = 3; -+ -+ return VKD3D_OK; -+} -+ -+static enum vkd3d_result vsir_program_lower_texldd(struct vsir_program *program, -+ struct vkd3d_shader_instruction *texldd) -+{ -+ unsigned int idx = texldd->src[1].reg.idx[0].offset; -+ struct vkd3d_shader_src_param *srcs; -+ -+ VKD3D_ASSERT(texldd->src[1].reg.idx_count == 1); -+ VKD3D_ASSERT(!texldd->src[1].reg.idx[0].rel_addr); -+ -+ if (!(srcs = shader_src_param_allocator_get(&program->instructions.src_params, 5))) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ -+ srcs[0] = texldd->src[0]; -+ vsir_src_param_init_resource(&srcs[1], idx, idx); -+ vsir_src_param_init_sampler(&srcs[2], idx, idx); -+ srcs[3] = texldd->src[2]; -+ srcs[4] = texldd->src[3]; -+ -+ texldd->opcode = VKD3DSIH_SAMPLE_GRAD; -+ texldd->src = srcs; -+ texldd->src_count = 5; -+ -+ return VKD3D_OK; -+} -+ - static enum vkd3d_result vsir_program_lower_instructions(struct vsir_program *program, -- struct vkd3d_shader_message_context *message_context) -+ struct vsir_transformation_context *ctx) - { - struct vkd3d_shader_instruction_array *instructions = &program->instructions; -+ struct vkd3d_shader_message_context *message_context = ctx->message_context; - unsigned int tmp_idx = ~0u, i; - enum vkd3d_result ret; - -@@ -471,8 +740,12 @@ static enum vkd3d_result vsir_program_lower_instructions(struct vsir_program *pr - - case VKD3DSIH_DCL: - case VKD3DSIH_DCL_CONSTANT_BUFFER: -+ case VKD3DSIH_DCL_GLOBAL_FLAGS: - case VKD3DSIH_DCL_SAMPLER: - case VKD3DSIH_DCL_TEMPS: -+ case VKD3DSIH_DCL_TESSELLATOR_DOMAIN: -+ case VKD3DSIH_DCL_THREAD_GROUP: -+ case VKD3DSIH_DCL_UAV_TYPED: - vkd3d_shader_instruction_make_nop(ins); - break; - -@@ -481,6 +754,38 @@ static enum vkd3d_result vsir_program_lower_instructions(struct vsir_program *pr - return ret; - break; - -+ case VKD3DSIH_TEX: -+ if ((ret = vsir_program_lower_tex(program, ins)) < 0) -+ return ret; -+ break; -+ -+ case VKD3DSIH_TEXLDD: -+ if ((ret = vsir_program_lower_texldd(program, ins)) < 0) -+ return ret; -+ break; -+ -+ case VKD3DSIH_TEXBEM: -+ case VKD3DSIH_TEXBEML: -+ case VKD3DSIH_TEXCOORD: -+ case VKD3DSIH_TEXDEPTH: -+ case VKD3DSIH_TEXDP3: -+ case VKD3DSIH_TEXDP3TEX: -+ case VKD3DSIH_TEXLDL: -+ case VKD3DSIH_TEXM3x2PAD: -+ case VKD3DSIH_TEXM3x2TEX: -+ case VKD3DSIH_TEXM3x3DIFF: -+ case VKD3DSIH_TEXM3x3PAD: -+ case VKD3DSIH_TEXM3x3SPEC: -+ case VKD3DSIH_TEXM3x3TEX: -+ case VKD3DSIH_TEXM3x3VSPEC: -+ case VKD3DSIH_TEXREG2AR: -+ case VKD3DSIH_TEXREG2GB: -+ case VKD3DSIH_TEXREG2RGB: -+ vkd3d_shader_error(ctx->message_context, &ins->location, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, -+ "Aborting due to unimplemented feature: Combined sampler instruction %#x.", -+ ins->opcode); -+ return VKD3D_ERROR_NOT_IMPLEMENTED; -+ - default: - break; - } -@@ -523,29 +828,197 @@ static void shader_instruction_eliminate_phase_instance_id(struct vkd3d_shader_i - } - - for (i = 0; i < ins->dst_count; ++i) -- shader_register_eliminate_phase_addressing((struct vkd3d_shader_register *)&ins->dst[i].reg, instance_id); -+ shader_register_eliminate_phase_addressing(&ins->dst[i].reg, instance_id); - } - --static const struct vkd3d_shader_varying_map *find_varying_map( -- const struct vkd3d_shader_varying_map_info *varying_map, unsigned int signature_idx) -+/* Ensure that the program closes with a ret. sm1 programs do not, by default. -+ * Many of our IR passes rely on this in order to insert instructions at the -+ * end of execution. */ -+static enum vkd3d_result vsir_program_ensure_ret(struct vsir_program *program, -+ struct vsir_transformation_context *ctx) - { -- unsigned int i; -- -- for (i = 0; i < varying_map->varying_count; ++i) -- { -- if (varying_map->varying_map[i].output_signature_index == signature_idx) -- return &varying_map->varying_map[i]; -- } -+ static const struct vkd3d_shader_location no_loc; -+ if (program->instructions.count -+ && program->instructions.elements[program->instructions.count - 1].opcode == VKD3DSIH_RET) -+ return VKD3D_OK; - -- return NULL; -+ if (!shader_instruction_array_insert_at(&program->instructions, program->instructions.count, 1)) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ vsir_instruction_init(&program->instructions.elements[program->instructions.count - 1], &no_loc, VKD3DSIH_RET); -+ return VKD3D_OK; - } - --static enum vkd3d_result vsir_program_remap_output_signature(struct vsir_program *program, -- const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context) -+static enum vkd3d_result vsir_program_add_diffuse_output(struct vsir_program *program, -+ struct vsir_transformation_context *ctx) -+{ -+ struct shader_signature *signature = &program->output_signature; -+ struct signature_element *new_elements, *e; -+ -+ if (program->shader_version.type != VKD3D_SHADER_TYPE_VERTEX) -+ return VKD3D_OK; -+ -+ if ((e = vsir_signature_find_element_by_name(signature, "COLOR", 0))) -+ { -+ program->diffuse_written_mask = e->mask; -+ e->mask = VKD3DSP_WRITEMASK_ALL; -+ -+ return VKD3D_OK; -+ } -+ -+ if (!(new_elements = vkd3d_realloc(signature->elements, -+ (signature->element_count + 1) * sizeof(*signature->elements)))) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ signature->elements = new_elements; -+ e = &signature->elements[signature->element_count++]; -+ memset(e, 0, sizeof(*e)); -+ e->semantic_name = vkd3d_strdup("COLOR"); -+ e->sysval_semantic = VKD3D_SHADER_SV_NONE; -+ e->component_type = VKD3D_SHADER_COMPONENT_FLOAT; -+ e->register_count = 1; -+ e->mask = VKD3DSP_WRITEMASK_ALL; -+ e->used_mask = VKD3DSP_WRITEMASK_ALL; -+ e->register_index = SM1_COLOR_REGISTER_OFFSET; -+ e->target_location = SM1_COLOR_REGISTER_OFFSET; -+ e->interpolation_mode = VKD3DSIM_NONE; -+ -+ return VKD3D_OK; -+} -+ -+/* Uninitialized components of diffuse yield 1.0 in SM1-2. Implement this by -+ * always writing diffuse in those versions, even if the PS doesn't read it. */ -+static enum vkd3d_result vsir_program_ensure_diffuse(struct vsir_program *program, -+ struct vsir_transformation_context *ctx) -+{ -+ static const struct vkd3d_shader_location no_loc; -+ struct vkd3d_shader_instruction *ins; -+ unsigned int i; -+ -+ if (program->shader_version.type != VKD3D_SHADER_TYPE_VERTEX -+ || program->diffuse_written_mask == VKD3DSP_WRITEMASK_ALL) -+ return VKD3D_OK; -+ -+ /* Write the instruction after all LABEL, DCL, and NOP instructions. -+ * We need to skip NOP instructions because they might result from removed -+ * DCLs, and there could still be DCLs after NOPs. */ -+ for (i = 0; i < program->instructions.count; ++i) -+ { -+ ins = &program->instructions.elements[i]; -+ -+ if (!vsir_instruction_is_dcl(ins) && ins->opcode != VKD3DSIH_LABEL && ins->opcode != VKD3DSIH_NOP) -+ break; -+ } -+ -+ if (!shader_instruction_array_insert_at(&program->instructions, i, 1)) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ ins = &program->instructions.elements[i]; -+ vsir_instruction_init_with_params(program, ins, &no_loc, VKD3DSIH_MOV, 1, 1); -+ vsir_dst_param_init(&ins->dst[0], VKD3DSPR_ATTROUT, VKD3D_DATA_FLOAT, 1); -+ ins->dst[0].reg.idx[0].offset = 0; -+ ins->dst[0].reg.dimension = VSIR_DIMENSION_VEC4; -+ ins->dst[0].write_mask = VKD3DSP_WRITEMASK_ALL & ~program->diffuse_written_mask; -+ vsir_src_param_init(&ins->src[0], VKD3DSPR_IMMCONST, VKD3D_DATA_FLOAT, 0); -+ ins->src[0].reg.dimension = VSIR_DIMENSION_VEC4; -+ for (i = 0; i < 4; ++i) -+ ins->src[0].reg.u.immconst_f32[i] = 1.0f; -+ return VKD3D_OK; -+} -+ -+static const struct vkd3d_shader_varying_map *find_varying_map( -+ const struct vkd3d_shader_varying_map_info *varying_map, unsigned int signature_idx) -+{ -+ unsigned int i; -+ -+ for (i = 0; i < varying_map->varying_count; ++i) -+ { -+ if (varying_map->varying_map[i].output_signature_index == signature_idx) -+ return &varying_map->varying_map[i]; -+ } -+ -+ return NULL; -+} -+ -+static bool target_allows_subset_masks(const struct vkd3d_shader_compile_info *info) -+{ -+ const struct vkd3d_shader_spirv_target_info *spirv_info; -+ enum vkd3d_shader_spirv_environment environment; -+ -+ switch (info->target_type) -+ { -+ case VKD3D_SHADER_TARGET_SPIRV_BINARY: -+ spirv_info = vkd3d_find_struct(info->next, SPIRV_TARGET_INFO); -+ environment = spirv_info ? spirv_info->environment : VKD3D_SHADER_SPIRV_ENVIRONMENT_VULKAN_1_0; -+ -+ switch (environment) -+ { -+ case VKD3D_SHADER_SPIRV_ENVIRONMENT_OPENGL_4_5: -+ return true; -+ -+ case VKD3D_SHADER_SPIRV_ENVIRONMENT_VULKAN_1_0: -+ case VKD3D_SHADER_SPIRV_ENVIRONMENT_VULKAN_1_1: -+ /* FIXME: Allow KHR_maintenance4. */ -+ return false; -+ -+ default: -+ FIXME("Unrecognized environment %#x.\n", environment); -+ return false; -+ } -+ -+ default: -+ return true; -+ } -+} -+ -+static void remove_unread_output_components(const struct shader_signature *signature, -+ struct vkd3d_shader_instruction *ins, struct vkd3d_shader_dst_param *dst) -+{ -+ const struct signature_element *e; -+ -+ switch (dst->reg.type) -+ { -+ case VKD3DSPR_OUTPUT: -+ e = vsir_signature_find_element_for_reg(signature, dst->reg.idx[0].offset, 0); -+ break; -+ -+ case VKD3DSPR_ATTROUT: -+ e = vsir_signature_find_element_for_reg(signature, -+ SM1_COLOR_REGISTER_OFFSET + dst->reg.idx[0].offset, 0); -+ break; -+ -+ case VKD3DSPR_RASTOUT: -+ e = vsir_signature_find_element_for_reg(signature, -+ SM1_RASTOUT_REGISTER_OFFSET + dst->reg.idx[0].offset, 0); -+ break; -+ -+ default: -+ return; -+ } -+ -+ /* We already changed the mask earlier. */ -+ dst->write_mask &= e->mask; -+ -+ if (!dst->write_mask) -+ { -+ if (ins->dst_count == 1) -+ vkd3d_shader_instruction_make_nop(ins); -+ else -+ vsir_dst_param_init(dst, VKD3DSPR_NULL, VKD3D_DATA_UNUSED, 0); -+ } -+} -+ -+static enum vkd3d_result vsir_program_remap_output_signature(struct vsir_program *program, -+ struct vsir_transformation_context *ctx) - { -- const struct vkd3d_shader_location location = {.source_name = compile_info->source_name}; -+ const struct vkd3d_shader_location location = {.source_name = ctx->compile_info->source_name}; -+ struct vkd3d_shader_message_context *message_context = ctx->message_context; -+ const struct vkd3d_shader_compile_info *compile_info = ctx->compile_info; -+ bool allows_subset_masks = target_allows_subset_masks(compile_info); - struct shader_signature *signature = &program->output_signature; -+ unsigned int orig_element_count = signature->element_count; - const struct vkd3d_shader_varying_map_info *varying_map; -+ struct signature_element *new_elements, *e; -+ unsigned int uninit_varying_count = 0; -+ unsigned int subset_varying_count = 0; -+ unsigned int new_register_count = 0; - unsigned int i; - - if (!(varying_map = vkd3d_find_struct(compile_info->next, VARYING_MAP_INFO))) -@@ -554,22 +1027,29 @@ static enum vkd3d_result vsir_program_remap_output_signature(struct vsir_program - for (i = 0; i < signature->element_count; ++i) - { - const struct vkd3d_shader_varying_map *map = find_varying_map(varying_map, i); -- struct signature_element *e = &signature->elements[i]; - -+ e = &signature->elements[i]; - if (map) - { - unsigned int input_mask = map->input_mask; - - e->target_location = map->input_register_index; - -- /* It is illegal in Vulkan if the next shader uses the same varying -- * location with a different mask. */ -- if (input_mask && input_mask != e->mask) -+ if ((input_mask & e->mask) == input_mask) -+ { -+ ++subset_varying_count; -+ if (!allows_subset_masks) -+ { -+ e->mask = input_mask; -+ e->used_mask &= input_mask; -+ } -+ } -+ else if (input_mask && input_mask != e->mask) - { - vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, - "Aborting due to not yet implemented feature: " -- "Output mask %#x does not match input mask %#x.", -- e->mask, input_mask); -+ "Input mask %#x reads components not written in output mask %#x.", -+ input_mask, e->mask); - return VKD3D_ERROR_NOT_IMPLEMENTED; - } - } -@@ -577,17 +1057,103 @@ static enum vkd3d_result vsir_program_remap_output_signature(struct vsir_program - { - e->target_location = SIGNATURE_TARGET_LOCATION_UNUSED; - } -+ -+ new_register_count = max(new_register_count, e->register_index + 1); - } - -+ /* Handle uninitialized varyings by writing them before every ret. -+ * -+ * As far as sm1-sm3 is concerned, drivers disagree on what uninitialized -+ * varyings contain. -+ * -+ * - Diffuse (COLOR0) reliably contains (1, 1, 1, 1) in SM1/2. -+ * In SM3 it may contain (0, 0, 0, 0), (0, 0, 0, 1), or (1, 1, 1, 1). -+ * -+ * - Specular (COLOR1) contains (0, 0, 0, 0) or (0, 0, 0, 1). -+ * WARP writes (1, 1, 1, 1). -+ * -+ * - Anything else contains (0, 0, 0, 0) or (0, 0, 0, 1). -+ * -+ * We don't have enough knowledge to identify diffuse here. Instead we deal -+ * with that in vsir_program_ensure_diffuse(), by always writing diffuse if -+ * the shader doesn't. -+ */ -+ - for (i = 0; i < varying_map->varying_count; ++i) - { - if (varying_map->varying_map[i].output_signature_index >= signature->element_count) -+ ++uninit_varying_count; -+ } -+ -+ if (!(new_elements = vkd3d_realloc(signature->elements, -+ (signature->element_count + uninit_varying_count) * sizeof(*signature->elements)))) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ signature->elements = new_elements; -+ -+ for (i = 0; i < varying_map->varying_count; ++i) -+ { -+ const struct vkd3d_shader_varying_map *map = &varying_map->varying_map[i]; -+ -+ if (map->output_signature_index < orig_element_count) -+ continue; -+ -+ TRACE("Synthesizing zero value for uninitialized output %u (mask %u).\n", -+ map->input_register_index, map->input_mask); -+ e = &signature->elements[signature->element_count++]; -+ memset(e, 0, sizeof(*e)); -+ e->sysval_semantic = VKD3D_SHADER_SV_NONE; -+ e->component_type = VKD3D_SHADER_COMPONENT_FLOAT; -+ e->register_count = 1; -+ e->mask = map->input_mask; -+ e->used_mask = map->input_mask; -+ e->register_index = new_register_count++; -+ e->target_location = map->input_register_index; -+ e->interpolation_mode = VKD3DSIM_LINEAR; -+ } -+ -+ /* Write each uninitialized varying before each ret. */ -+ for (i = 0; i < program->instructions.count; ++i) -+ { -+ struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; -+ struct vkd3d_shader_location loc; -+ -+ if (ins->opcode != VKD3DSIH_RET) -+ continue; -+ -+ loc = ins->location; -+ if (!shader_instruction_array_insert_at(&program->instructions, i, uninit_varying_count)) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ -+ ins = &program->instructions.elements[i]; -+ -+ for (unsigned int j = signature->element_count - uninit_varying_count; j < signature->element_count; ++j) - { -- vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, -- "Aborting due to not yet implemented feature: " -- "The next stage consumes varyings not written by this stage."); -- return VKD3D_ERROR_NOT_IMPLEMENTED; -+ e = &signature->elements[j]; -+ -+ vsir_instruction_init_with_params(program, ins, &loc, VKD3DSIH_MOV, 1, 1); -+ dst_param_init_output(&ins->dst[0], VKD3D_DATA_FLOAT, e->register_index, e->mask); -+ vsir_src_param_init(&ins->src[0], VKD3DSPR_IMMCONST, VKD3D_DATA_FLOAT, 0); -+ ins->src[0].reg.dimension = VSIR_DIMENSION_VEC4; -+ ++ins; - } -+ -+ i += uninit_varying_count; -+ } -+ -+ /* Vulkan (without KHR_maintenance4) disallows any mismatching masks, -+ * including when the input mask is a proper subset of the output mask. -+ * Resolve this by rewriting the shader to remove unread components from -+ * any writes to the output variable. */ -+ -+ if (!subset_varying_count || allows_subset_masks) -+ return VKD3D_OK; -+ -+ for (i = 0; i < program->instructions.count; ++i) -+ { -+ struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; -+ -+ for (unsigned int j = 0; j < ins->dst_count; ++j) -+ remove_unread_output_components(signature, ins, &ins->dst[j]); - } - - return VKD3D_OK; -@@ -727,192 +1293,68 @@ static enum vkd3d_result flattener_flatten_phases(struct hull_flattener *normali - return VKD3D_OK; - } - --void vsir_register_init(struct vkd3d_shader_register *reg, enum vkd3d_shader_register_type reg_type, -- enum vkd3d_data_type data_type, unsigned int idx_count) -+static enum vkd3d_result vsir_program_flatten_hull_shader_phases(struct vsir_program *program, -+ struct vsir_transformation_context *ctx) - { -- reg->type = reg_type; -- reg->precision = VKD3D_SHADER_REGISTER_PRECISION_DEFAULT; -- reg->non_uniform = false; -- reg->data_type = data_type; -- reg->idx[0].offset = ~0u; -- reg->idx[0].rel_addr = NULL; -- reg->idx[0].is_in_bounds = false; -- reg->idx[1].offset = ~0u; -- reg->idx[1].rel_addr = NULL; -- reg->idx[1].is_in_bounds = false; -- reg->idx[2].offset = ~0u; -- reg->idx[2].rel_addr = NULL; -- reg->idx[2].is_in_bounds = false; -- reg->idx_count = idx_count; -- reg->dimension = VSIR_DIMENSION_SCALAR; -- reg->alignment = 0; --} -+ struct hull_flattener flattener = {program->instructions}; -+ struct vkd3d_shader_instruction_array *instructions; -+ struct shader_phase_location_array locations; -+ enum vkd3d_result result = VKD3D_OK; -+ unsigned int i; - --void vsir_src_param_init(struct vkd3d_shader_src_param *param, enum vkd3d_shader_register_type reg_type, -- enum vkd3d_data_type data_type, unsigned int idx_count) --{ -- vsir_register_init(¶m->reg, reg_type, data_type, idx_count); -- param->swizzle = 0; -- param->modifiers = VKD3DSPSM_NONE; --} -+ instructions = &flattener.instructions; - --void vsir_dst_param_init(struct vkd3d_shader_dst_param *param, enum vkd3d_shader_register_type reg_type, -- enum vkd3d_data_type data_type, unsigned int idx_count) --{ -- vsir_register_init(¶m->reg, reg_type, data_type, idx_count); -- param->write_mask = VKD3DSP_WRITEMASK_0; -- param->modifiers = VKD3DSPDM_NONE; -- param->shift = 0; --} -+ flattener.phase = VKD3DSIH_INVALID; -+ for (i = 0, locations.count = 0; i < instructions->count; ++i) -+ flattener_eliminate_phase_related_dcls(&flattener, i, &locations); - --void vsir_src_param_init_label(struct vkd3d_shader_src_param *param, unsigned int label_id) --{ -- vsir_src_param_init(param, VKD3DSPR_LABEL, VKD3D_DATA_UNUSED, 1); -- param->reg.dimension = VSIR_DIMENSION_NONE; -- param->reg.idx[0].offset = label_id; --} -+ if ((result = flattener_flatten_phases(&flattener, &locations)) < 0) -+ return result; - --static void src_param_init_ssa_bool(struct vkd3d_shader_src_param *src, unsigned int idx) --{ -- vsir_src_param_init(src, VKD3DSPR_SSA, VKD3D_DATA_BOOL, 1); -- src->reg.idx[0].offset = idx; --} -+ if (flattener.phase != VKD3DSIH_INVALID) -+ { -+ if (!shader_instruction_array_reserve(&flattener.instructions, flattener.instructions.count + 1)) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ vsir_instruction_init(&instructions->elements[instructions->count++], &flattener.last_ret_location, VKD3DSIH_RET); -+ } - --static void src_param_init_temp_bool(struct vkd3d_shader_src_param *src, unsigned int idx) --{ -- vsir_src_param_init(src, VKD3DSPR_TEMP, VKD3D_DATA_BOOL, 1); -- src->reg.idx[0].offset = idx; -+ program->instructions = flattener.instructions; -+ return result; - } - --static void dst_param_init_ssa_bool(struct vkd3d_shader_dst_param *dst, unsigned int idx) -+struct control_point_normaliser - { -- vsir_dst_param_init(dst, VKD3DSPR_SSA, VKD3D_DATA_BOOL, 1); -- dst->reg.idx[0].offset = idx; --} -+ struct vkd3d_shader_instruction_array instructions; -+ enum vkd3d_shader_opcode phase; -+ struct vkd3d_shader_src_param *outpointid_param; -+}; - --static void dst_param_init_temp_bool(struct vkd3d_shader_dst_param *dst, unsigned int idx) -+static bool control_point_normaliser_is_in_control_point_phase(const struct control_point_normaliser *normaliser) - { -- vsir_dst_param_init(dst, VKD3DSPR_TEMP, VKD3D_DATA_BOOL, 1); -- dst->reg.idx[0].offset = idx; -+ return normaliser->phase == VKD3DSIH_HS_CONTROL_POINT_PHASE; - } - --static void dst_param_init_temp_uint(struct vkd3d_shader_dst_param *dst, unsigned int idx) -+struct vkd3d_shader_src_param *vsir_program_create_outpointid_param(struct vsir_program *program) - { -- vsir_dst_param_init(dst, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1); -- dst->reg.idx[0].offset = idx; -- dst->write_mask = VKD3DSP_WRITEMASK_0; --} -+ struct vkd3d_shader_instruction_array *instructions = &program->instructions; -+ struct vkd3d_shader_src_param *rel_addr; - --static void src_param_init_temp_float(struct vkd3d_shader_src_param *src, unsigned int idx) --{ -- vsir_src_param_init(src, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); -- src->reg.idx[0].offset = idx; --} -+ if (instructions->outpointid_param) -+ return instructions->outpointid_param; - --static void src_param_init_temp_uint(struct vkd3d_shader_src_param *src, unsigned int idx) --{ -- vsir_src_param_init(src, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1); -- src->reg.idx[0].offset = idx; --} -+ if (!(rel_addr = shader_src_param_allocator_get(&instructions->src_params, 1))) -+ return NULL; - --static void src_param_init_const_uint(struct vkd3d_shader_src_param *src, uint32_t value) --{ -- vsir_src_param_init(src, VKD3DSPR_IMMCONST, VKD3D_DATA_UINT, 0); -- src->reg.u.immconst_u32[0] = value; --} -+ vsir_register_init(&rel_addr->reg, VKD3DSPR_OUTPOINTID, VKD3D_DATA_UINT, 0); -+ rel_addr->swizzle = 0; -+ rel_addr->modifiers = 0; - --static void src_param_init_parameter(struct vkd3d_shader_src_param *src, uint32_t idx, enum vkd3d_data_type type) --{ -- vsir_src_param_init(src, VKD3DSPR_PARAMETER, type, 1); -- src->reg.idx[0].offset = idx; -+ instructions->outpointid_param = rel_addr; -+ return rel_addr; - } - --void vsir_instruction_init(struct vkd3d_shader_instruction *ins, const struct vkd3d_shader_location *location, -- enum vkd3d_shader_opcode opcode) --{ -- memset(ins, 0, sizeof(*ins)); -- ins->location = *location; -- ins->opcode = opcode; --} -- --static bool vsir_instruction_init_label(struct vkd3d_shader_instruction *ins, -- const struct vkd3d_shader_location *location, unsigned int label_id, struct vsir_program *program) --{ -- struct vkd3d_shader_src_param *src_param; -- -- if (!(src_param = vsir_program_get_src_params(program, 1))) -- return false; -- -- vsir_src_param_init_label(src_param, label_id); -- -- vsir_instruction_init(ins, location, VKD3DSIH_LABEL); -- ins->src = src_param; -- ins->src_count = 1; -- -- return true; --} -- --static enum vkd3d_result instruction_array_flatten_hull_shader_phases(struct vkd3d_shader_instruction_array *src_instructions) --{ -- struct hull_flattener flattener = {*src_instructions}; -- struct vkd3d_shader_instruction_array *instructions; -- struct shader_phase_location_array locations; -- enum vkd3d_result result = VKD3D_OK; -- unsigned int i; -- -- instructions = &flattener.instructions; -- -- flattener.phase = VKD3DSIH_INVALID; -- for (i = 0, locations.count = 0; i < instructions->count; ++i) -- flattener_eliminate_phase_related_dcls(&flattener, i, &locations); -- -- if ((result = flattener_flatten_phases(&flattener, &locations)) < 0) -- return result; -- -- if (flattener.phase != VKD3DSIH_INVALID) -- { -- if (!shader_instruction_array_reserve(&flattener.instructions, flattener.instructions.count + 1)) -- return VKD3D_ERROR_OUT_OF_MEMORY; -- vsir_instruction_init(&instructions->elements[instructions->count++], &flattener.last_ret_location, VKD3DSIH_RET); -- } -- -- *src_instructions = flattener.instructions; -- return result; --} -- --struct control_point_normaliser --{ -- struct vkd3d_shader_instruction_array instructions; -- enum vkd3d_shader_opcode phase; -- struct vkd3d_shader_src_param *outpointid_param; --}; -- --static bool control_point_normaliser_is_in_control_point_phase(const struct control_point_normaliser *normaliser) --{ -- return normaliser->phase == VKD3DSIH_HS_CONTROL_POINT_PHASE; --} -- --struct vkd3d_shader_src_param *instruction_array_create_outpointid_param( -- struct vkd3d_shader_instruction_array *instructions) --{ -- struct vkd3d_shader_src_param *rel_addr; -- -- if (instructions->outpointid_param) -- return instructions->outpointid_param; -- -- if (!(rel_addr = shader_src_param_allocator_get(&instructions->src_params, 1))) -- return NULL; -- -- vsir_register_init(&rel_addr->reg, VKD3DSPR_OUTPOINTID, VKD3D_DATA_UINT, 0); -- rel_addr->swizzle = 0; -- rel_addr->modifiers = 0; -- -- instructions->outpointid_param = rel_addr; -- return rel_addr; --} -- --static void shader_dst_param_normalise_outpointid(struct vkd3d_shader_dst_param *dst_param, -- struct control_point_normaliser *normaliser) -+static void shader_dst_param_normalise_outpointid(struct vkd3d_shader_dst_param *dst_param, -+ struct control_point_normaliser *normaliser) - { - struct vkd3d_shader_register *reg = &dst_param->reg; - -@@ -991,7 +1433,7 @@ static enum vkd3d_result control_point_normaliser_emit_hs_input(struct control_p - } - - static enum vkd3d_result instruction_array_normalise_hull_shader_control_point_io( -- struct vkd3d_shader_instruction_array *src_instructions, const struct shader_signature *input_signature) -+ struct vsir_program *program, struct vsir_transformation_context *ctx) - { - struct vkd3d_shader_instruction_array *instructions; - struct control_point_normaliser normaliser; -@@ -1001,12 +1443,20 @@ static enum vkd3d_result instruction_array_normalise_hull_shader_control_point_i - enum vkd3d_result ret; - unsigned int i, j; - -- if (!(normaliser.outpointid_param = instruction_array_create_outpointid_param(src_instructions))) -+ VKD3D_ASSERT(program->normalisation_level == VSIR_NOT_NORMALISED); -+ -+ if (program->shader_version.type != VKD3D_SHADER_TYPE_HULL) -+ { -+ program->normalisation_level = VSIR_NORMALISED_HULL_CONTROL_POINT_IO; -+ return VKD3D_OK; -+ } -+ -+ if (!(normaliser.outpointid_param = vsir_program_create_outpointid_param(program))) - { - ERR("Failed to allocate src param.\n"); - return VKD3D_ERROR_OUT_OF_MEMORY; - } -- normaliser.instructions = *src_instructions; -+ normaliser.instructions = program->instructions; - instructions = &normaliser.instructions; - normaliser.phase = VKD3DSIH_INVALID; - -@@ -1043,22 +1493,25 @@ static enum vkd3d_result instruction_array_normalise_hull_shader_control_point_i - input_control_point_count = ins->declaration.count; - break; - case VKD3DSIH_HS_CONTROL_POINT_PHASE: -- *src_instructions = normaliser.instructions; -+ program->instructions = normaliser.instructions; -+ program->normalisation_level = VSIR_NORMALISED_HULL_CONTROL_POINT_IO; - return VKD3D_OK; - case VKD3DSIH_HS_FORK_PHASE: - case VKD3DSIH_HS_JOIN_PHASE: - /* ins may be relocated if the instruction array expands. */ - location = ins->location; -- ret = control_point_normaliser_emit_hs_input(&normaliser, input_signature, -+ ret = control_point_normaliser_emit_hs_input(&normaliser, &program->input_signature, - input_control_point_count, i, &location); -- *src_instructions = normaliser.instructions; -+ program->instructions = normaliser.instructions; -+ program->normalisation_level = VSIR_NORMALISED_HULL_CONTROL_POINT_IO; - return ret; - default: - break; - } - } - -- *src_instructions = normaliser.instructions; -+ program->instructions = normaliser.instructions; -+ program->normalisation_level = VSIR_NORMALISED_HULL_CONTROL_POINT_IO; - return VKD3D_OK; - } - -@@ -1098,36 +1551,35 @@ static bool io_normaliser_is_in_control_point_phase(const struct io_normaliser * - return normaliser->phase == VKD3DSIH_HS_CONTROL_POINT_PHASE; - } - --static unsigned int shader_signature_find_element_for_reg(const struct shader_signature *signature, -- unsigned int reg_idx, unsigned int write_mask) -+static bool shader_signature_find_element_for_reg(const struct shader_signature *signature, -+ unsigned int reg_idx, unsigned int write_mask, unsigned int *element_idx) - { -- unsigned int i, base_write_mask; -+ const struct signature_element *e; -+ unsigned int i; - - for (i = 0; i < signature->element_count; ++i) - { -- struct signature_element *e = &signature->elements[i]; -- if (e->register_index <= reg_idx && e->register_index + e->register_count > reg_idx -+ e = &signature->elements[i]; -+ if (e->register_index <= reg_idx && e->register_count > reg_idx - e->register_index - && (e->mask & write_mask) == write_mask) - { -- return i; -+ *element_idx = i; -+ return true; - } - } - -- /* Validated in the TPF reader, but failure in signature_element_range_expand_mask() -- * can land us here on an unmatched vector mask. */ -- FIXME("Failed to find signature element for register index %u, mask %#x; using scalar mask.\n", -- reg_idx, write_mask); -- base_write_mask = 1u << vsir_write_mask_get_component_idx(write_mask); -- if (base_write_mask != write_mask) -- return shader_signature_find_element_for_reg(signature, reg_idx, base_write_mask); -- -- vkd3d_unreachable(); -+ return false; - } - - struct signature_element *vsir_signature_find_element_for_reg(const struct shader_signature *signature, - unsigned int reg_idx, unsigned int write_mask) - { -- return &signature->elements[shader_signature_find_element_for_reg(signature, reg_idx, write_mask)]; -+ unsigned int element_idx; -+ -+ if (shader_signature_find_element_for_reg(signature, reg_idx, write_mask, &element_idx)) -+ return &signature->elements[element_idx]; -+ -+ return NULL; - } - - static unsigned int range_map_get_register_count(uint8_t range_map[][VKD3D_VEC4_SIZE], -@@ -1181,9 +1633,10 @@ static void io_normaliser_add_index_range(struct io_normaliser *normaliser, - { - const struct vkd3d_shader_index_range *range = &ins->declaration.index_range; - const struct vkd3d_shader_register *reg = &range->dst.reg; -- unsigned int reg_idx, write_mask, element_idx; - const struct shader_signature *signature; - uint8_t (*range_map)[VKD3D_VEC4_SIZE]; -+ struct signature_element *element; -+ unsigned int reg_idx, write_mask; - - switch (reg->type) - { -@@ -1215,9 +1668,8 @@ static void io_normaliser_add_index_range(struct io_normaliser *normaliser, - - reg_idx = reg->idx[reg->idx_count - 1].offset; - write_mask = range->dst.write_mask; -- element_idx = shader_signature_find_element_for_reg(signature, reg_idx, write_mask); -- range_map_set_register_range(range_map, reg_idx, range->register_count, -- signature->elements[element_idx].mask, true); -+ element = vsir_signature_find_element_for_reg(signature, reg_idx, write_mask); -+ range_map_set_register_range(range_map, reg_idx, range->register_count, element->mask, true); - } - - static int signature_element_mask_compare(const void *a, const void *b) -@@ -1276,8 +1728,34 @@ static void shader_signature_map_patch_constant_index_ranges(struct shader_signa - static int signature_element_register_compare(const void *a, const void *b) - { - const struct signature_element *e = a, *f = b; -+ int ret; -+ -+ if ((ret = vkd3d_u32_compare(e->register_index, f->register_index))) -+ return ret; - -- return vkd3d_u32_compare(e->register_index, f->register_index); -+ /* System values like SV_RenderTargetArrayIndex and SV_ViewPortArrayIndex -+ * can get packed into the same I/O register as non-system values, but -+ * only at the end. E.g.: -+ * -+ * vs_4_0 -+ * ... -+ * .output -+ * ... -+ * .param B.x, o1.x, uint -+ * .param C.y, o1.y, uint -+ * .param SV_RenderTargetArrayIndex.z, o1.z, uint, RTINDEX -+ * .text -+ * ... -+ * mov o1.xy, v1.xyxx -+ * mov o1.z, v1.z -+ * ret -+ * -+ * Because I/O normalisation doesn't split writes like the mov to o1.xy -+ * above, we want to make sure that o1.x and o1.y continue to be packed -+ * into a single register after I/O normalisation, so we order system -+ * values after non-system values here, allowing the non-system values to -+ * get merged into a single register. */ -+ return vkd3d_u32_compare(f->sysval_semantic, e->sysval_semantic); - } - - static int signature_element_index_compare(const void *a, const void *b) -@@ -1345,6 +1823,9 @@ static bool shader_signature_merge(struct shader_signature *s, uint8_t range_map - return false; - memcpy(elements, s->elements, element_count * sizeof(*elements)); - -+ for (i = 0; i < element_count; ++i) -+ elements[i].sort_index = i; -+ - qsort(elements, element_count, sizeof(elements[0]), signature_element_register_compare); - - for (i = 0, new_count = 0; i < element_count; i = j, elements[new_count++] = *e) -@@ -1388,6 +1869,8 @@ static bool shader_signature_merge(struct shader_signature *s, uint8_t range_map - else - e->interpolation_mode = f->interpolation_mode; - } -+ -+ vkd3d_free((void *)f->semantic_name); - } - } - element_count = new_count; -@@ -1415,6 +1898,12 @@ static bool shader_signature_merge(struct shader_signature *s, uint8_t range_map - TRACE("Merging %s, base reg %u, count %u.\n", e->semantic_name, e->register_index, register_count); - e->register_count = register_count; - e->mask = signature_element_range_expand_mask(e, register_count, range_map); -+ -+ for (j = 1; j < register_count; ++j) -+ { -+ f = &elements[i + j]; -+ vkd3d_free((void *)f->semantic_name); -+ } - } - } - element_count = new_count; -@@ -1470,6 +1959,8 @@ static bool shader_dst_param_io_normalise(struct vkd3d_shader_dst_param *dst_par - const struct shader_signature *signature; - const struct signature_element *e; - -+ write_mask = dst_param->write_mask; -+ - switch (reg->type) - { - case VKD3DSPR_OUTPUT: -@@ -1518,10 +2009,17 @@ static bool shader_dst_param_io_normalise(struct vkd3d_shader_dst_param *dst_par - break; - - case VKD3DSPR_RASTOUT: -+ /* Leave point size as a system value for the backends to consume. */ -+ if (reg->idx[0].offset == VSIR_RASTOUT_POINT_SIZE) -+ return true; - reg_idx = SM1_RASTOUT_REGISTER_OFFSET + reg->idx[0].offset; - signature = normaliser->output_signature; - reg->type = VKD3DSPR_OUTPUT; - dcl_params = normaliser->output_dcl_params; -+ /* Fog and point size are scalar, but fxc/d3dcompiler emits a full -+ * write mask when writing to them. */ -+ if (reg->idx[0].offset > 0) -+ write_mask = VKD3DSP_WRITEMASK_0; - break; - - default: -@@ -1529,11 +2027,10 @@ static bool shader_dst_param_io_normalise(struct vkd3d_shader_dst_param *dst_par - } - - id_idx = reg->idx_count - 1; -- write_mask = dst_param->write_mask; -- element_idx = shader_signature_find_element_for_reg(signature, reg_idx, write_mask); -+ if (!shader_signature_find_element_for_reg(signature, reg_idx, write_mask, &element_idx)) -+ vkd3d_unreachable(); - e = &signature->elements[element_idx]; - -- dst_param->write_mask >>= vsir_write_mask_get_component_idx(e->mask); - if (is_io_dcl) - { - /* Validated in the TPF reader. */ -@@ -1653,7 +2150,8 @@ static void shader_src_param_io_normalise(struct vkd3d_shader_src_param *src_par - - id_idx = reg->idx_count - 1; - write_mask = VKD3DSP_WRITEMASK_0 << vsir_swizzle_get_component(src_param->swizzle, 0); -- element_idx = shader_signature_find_element_for_reg(signature, reg_idx, write_mask); -+ if (!shader_signature_find_element_for_reg(signature, reg_idx, write_mask, &element_idx)) -+ vkd3d_unreachable(); - - e = &signature->elements[element_idx]; - if ((e->register_count > 1 || vsir_sysval_semantic_is_tess_factor(e->sysval_semantic))) -@@ -1725,38 +2223,14 @@ static void shader_instruction_normalise_io_params(struct vkd3d_shader_instructi - } - } - --static bool use_flat_interpolation(const struct vsir_program *program, -- struct vkd3d_shader_message_context *message_context) --{ -- static const struct vkd3d_shader_location no_loc; -- const struct vkd3d_shader_parameter1 *parameter; -- -- if (!(parameter = vsir_program_get_parameter(program, VKD3D_SHADER_PARAMETER_NAME_FLAT_INTERPOLATION))) -- return false; -- -- if (parameter->type != VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT) -- { -- vkd3d_shader_error(message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, -- "Unsupported flat interpolation parameter type %#x.\n", parameter->type); -- return false; -- } -- if (parameter->data_type != VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32) -- { -- vkd3d_shader_error(message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, -- "Invalid flat interpolation parameter data type %#x.\n", parameter->data_type); -- return false; -- } -- -- return parameter->u.immediate_constant.u.u32; --} -- - static enum vkd3d_result vsir_program_normalise_io_registers(struct vsir_program *program, -- struct vkd3d_shader_message_context *message_context) -+ struct vsir_transformation_context *ctx) - { - struct io_normaliser normaliser = {program->instructions}; - struct vkd3d_shader_instruction *ins; -- bool has_control_point_phase; -- unsigned int i, j; -+ unsigned int i; -+ -+ VKD3D_ASSERT(program->normalisation_level == VSIR_NORMALISED_HULL_CONTROL_POINT_IO); - - normaliser.phase = VKD3DSIH_INVALID; - normaliser.shader_type = program->shader_version.type; -@@ -1765,7 +2239,7 @@ static enum vkd3d_result vsir_program_normalise_io_registers(struct vsir_program - normaliser.output_signature = &program->output_signature; - normaliser.patch_constant_signature = &program->patch_constant_signature; - -- for (i = 0, has_control_point_phase = false; i < program->instructions.count; ++i) -+ for (i = 0; i < program->instructions.count; ++i) - { - ins = &program->instructions.elements[i]; - -@@ -1779,8 +2253,6 @@ static enum vkd3d_result vsir_program_normalise_io_registers(struct vsir_program - vkd3d_shader_instruction_make_nop(ins); - break; - case VKD3DSIH_HS_CONTROL_POINT_PHASE: -- has_control_point_phase = true; -- /* fall through */ - case VKD3DSIH_HS_FORK_PHASE: - case VKD3DSIH_HS_JOIN_PHASE: - normaliser.phase = ins->opcode; -@@ -1790,22 +2262,6 @@ static enum vkd3d_result vsir_program_normalise_io_registers(struct vsir_program - } - } - -- if (normaliser.shader_type == VKD3D_SHADER_TYPE_HULL && !has_control_point_phase) -- { -- /* Inputs and outputs must match for the default phase, so merge ranges must match too. */ -- for (i = 0; i < MAX_REG_OUTPUT; ++i) -- { -- for (j = 0; j < VKD3D_VEC4_SIZE; ++j) -- { -- if (!normaliser.input_range_map[i][j] && normaliser.output_range_map[i][j]) -- normaliser.input_range_map[i][j] = normaliser.output_range_map[i][j]; -- else if (normaliser.input_range_map[i][j] && !normaliser.output_range_map[i][j]) -- normaliser.output_range_map[i][j] = normaliser.input_range_map[i][j]; -- else VKD3D_ASSERT(normaliser.input_range_map[i][j] == normaliser.output_range_map[i][j]); -- } -- } -- } -- - if (!shader_signature_merge(&program->input_signature, normaliser.input_range_map, false) - || !shader_signature_merge(&program->output_signature, normaliser.output_range_map, false) - || !shader_signature_merge(&program->patch_constant_signature, normaliser.pc_range_map, true)) -@@ -1814,24 +2270,13 @@ static enum vkd3d_result vsir_program_normalise_io_registers(struct vsir_program - return VKD3D_ERROR_OUT_OF_MEMORY; - } - -- if (program->shader_version.type == VKD3D_SHADER_TYPE_PIXEL -- && program->shader_version.major < 4 && use_flat_interpolation(program, message_context)) -- { -- for (i = 0; i < program->input_signature.element_count; ++i) -- { -- struct signature_element *element = &program->input_signature.elements[i]; -- -- if (!ascii_strcasecmp(element->semantic_name, "COLOR")) -- element->interpolation_mode = VKD3DSIM_CONSTANT; -- } -- } -- - normaliser.phase = VKD3DSIH_INVALID; - for (i = 0; i < normaliser.instructions.count; ++i) - shader_instruction_normalise_io_params(&normaliser.instructions.elements[i], &normaliser); - - program->instructions = normaliser.instructions; - program->use_vocp = normaliser.use_vocp; -+ program->normalisation_level = VSIR_FULLY_NORMALISED_IO; - return VKD3D_OK; - } - -@@ -1918,7 +2363,8 @@ static void shader_register_normalise_flat_constants(struct vkd3d_shader_src_par - param->reg.idx_count = 3; - } - --static enum vkd3d_result instruction_array_normalise_flat_constants(struct vsir_program *program) -+static enum vkd3d_result vsir_program_normalise_flat_constants(struct vsir_program *program, -+ struct vsir_transformation_context *ctx) - { - struct flat_constants_normaliser normaliser = {0}; - unsigned int i, j; -@@ -1957,7 +2403,8 @@ static enum vkd3d_result instruction_array_normalise_flat_constants(struct vsir_ - return VKD3D_OK; - } - --static void remove_dead_code(struct vsir_program *program) -+static enum vkd3d_result vsir_program_remove_dead_code(struct vsir_program *program, -+ struct vsir_transformation_context *ctx) - { - size_t i, depth = 0; - bool dead = false; -@@ -2045,103 +2492,6 @@ static void remove_dead_code(struct vsir_program *program) - break; - } - } --} -- --static enum vkd3d_result vsir_program_normalise_combined_samplers(struct vsir_program *program, -- struct vkd3d_shader_message_context *message_context) --{ -- unsigned int i; -- -- for (i = 0; i < program->instructions.count; ++i) -- { -- struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; -- struct vkd3d_shader_src_param *srcs; -- -- switch (ins->opcode) -- { -- case VKD3DSIH_TEX: -- if (!(srcs = shader_src_param_allocator_get(&program->instructions.src_params, 3))) -- return VKD3D_ERROR_OUT_OF_MEMORY; -- memset(srcs, 0, sizeof(*srcs) * 3); -- -- ins->opcode = VKD3DSIH_SAMPLE; -- -- srcs[0] = ins->src[0]; -- -- srcs[1].reg.type = VKD3DSPR_RESOURCE; -- srcs[1].reg.idx[0] = ins->src[1].reg.idx[0]; -- srcs[1].reg.idx[1] = ins->src[1].reg.idx[0]; -- srcs[1].reg.idx_count = 2; -- srcs[1].reg.data_type = VKD3D_DATA_RESOURCE; -- srcs[1].reg.dimension = VSIR_DIMENSION_VEC4; -- srcs[1].swizzle = VKD3D_SHADER_NO_SWIZZLE; -- -- srcs[2].reg.type = VKD3DSPR_SAMPLER; -- srcs[2].reg.idx[0] = ins->src[1].reg.idx[0]; -- srcs[2].reg.idx[1] = ins->src[1].reg.idx[0]; -- srcs[2].reg.idx_count = 2; -- srcs[2].reg.data_type = VKD3D_DATA_SAMPLER; -- -- ins->src = srcs; -- ins->src_count = 3; -- break; -- -- case VKD3DSIH_TEXLDD: -- if (!(srcs = shader_src_param_allocator_get(&program->instructions.src_params, 5))) -- return VKD3D_ERROR_OUT_OF_MEMORY; -- memset(srcs, 0, sizeof(*srcs) * 5); -- -- ins->opcode = VKD3DSIH_SAMPLE_GRAD; -- -- srcs[0] = ins->src[0]; -- -- srcs[1].reg.type = VKD3DSPR_RESOURCE; -- srcs[1].reg.idx[0] = ins->src[1].reg.idx[0]; -- srcs[1].reg.idx[1] = ins->src[1].reg.idx[0]; -- srcs[1].reg.idx_count = 2; -- srcs[1].reg.data_type = VKD3D_DATA_RESOURCE; -- srcs[1].reg.dimension = VSIR_DIMENSION_VEC4; -- srcs[1].swizzle = VKD3D_SHADER_NO_SWIZZLE; -- -- srcs[2].reg.type = VKD3DSPR_SAMPLER; -- srcs[2].reg.idx[0] = ins->src[1].reg.idx[0]; -- srcs[2].reg.idx[1] = ins->src[1].reg.idx[0]; -- srcs[2].reg.idx_count = 2; -- srcs[2].reg.data_type = VKD3D_DATA_SAMPLER; -- -- srcs[3] = ins->src[2]; -- srcs[4] = ins->src[3]; -- -- ins->src = srcs; -- ins->src_count = 5; -- break; -- -- case VKD3DSIH_TEXBEM: -- case VKD3DSIH_TEXBEML: -- case VKD3DSIH_TEXCOORD: -- case VKD3DSIH_TEXDEPTH: -- case VKD3DSIH_TEXDP3: -- case VKD3DSIH_TEXDP3TEX: -- case VKD3DSIH_TEXLDL: -- case VKD3DSIH_TEXM3x2PAD: -- case VKD3DSIH_TEXM3x2TEX: -- case VKD3DSIH_TEXM3x3DIFF: -- case VKD3DSIH_TEXM3x3PAD: -- case VKD3DSIH_TEXM3x3SPEC: -- case VKD3DSIH_TEXM3x3TEX: -- case VKD3DSIH_TEXM3x3VSPEC: -- case VKD3DSIH_TEXREG2AR: -- case VKD3DSIH_TEXREG2GB: -- case VKD3DSIH_TEXREG2RGB: -- vkd3d_shader_error(message_context, &ins->location, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, -- "Aborting due to not yet implemented feature: " -- "Combined sampler instruction %#x.", ins->opcode); -- return VKD3D_ERROR_NOT_IMPLEMENTED; -- -- default: -- break; -- } -- } - - return VKD3D_OK; - } -@@ -2434,15 +2784,14 @@ static void VKD3D_PRINTF_FUNC(3, 4) cf_flattener_create_block_name(struct cf_fla - static enum vkd3d_result cf_flattener_iterate_instruction_array(struct cf_flattener *flattener, - struct vkd3d_shader_message_context *message_context) - { -- bool main_block_open, is_hull_shader, after_declarations_section; - struct vkd3d_shader_instruction_array *instructions; - struct vsir_program *program = flattener->program; -+ bool is_hull_shader, after_declarations_section; - struct vkd3d_shader_instruction *dst_ins; - size_t i; - - instructions = &program->instructions; - is_hull_shader = program->shader_version.type == VKD3D_SHADER_TYPE_HULL; -- main_block_open = !is_hull_shader; - after_declarations_section = is_hull_shader; - - if (!cf_flattener_require_space(flattener, instructions->count + 1)) -@@ -2766,8 +3115,6 @@ static enum vkd3d_result cf_flattener_iterate_instruction_array(struct cf_flatte - - if (cf_info) - cf_info->inside_block = false; -- else -- main_block_open = false; - break; - - default: -@@ -2777,23 +3124,18 @@ static enum vkd3d_result cf_flattener_iterate_instruction_array(struct cf_flatte - } - } - -- if (main_block_open) -- { -- if (!(dst_ins = cf_flattener_require_space(flattener, 1))) -- return VKD3D_ERROR_OUT_OF_MEMORY; -- vsir_instruction_init(dst_ins, &flattener->location, VKD3DSIH_RET); -- ++flattener->instruction_count; -- } -- - return flattener->status; - } - - static enum vkd3d_result vsir_program_flatten_control_flow_constructs(struct vsir_program *program, -- struct vkd3d_shader_message_context *message_context) -+ struct vsir_transformation_context *ctx) - { -+ struct vkd3d_shader_message_context *message_context = ctx->message_context; - struct cf_flattener flattener = {.program = program}; - enum vkd3d_result result; - -+ VKD3D_ASSERT(program->cf_type == VSIR_CF_STRUCTURED); -+ - if ((result = cf_flattener_iterate_instruction_array(&flattener, message_context)) >= 0) - { - vkd3d_free(program->instructions.elements); -@@ -2801,6 +3143,7 @@ static enum vkd3d_result vsir_program_flatten_control_flow_constructs(struct vsi - program->instructions.capacity = flattener.instruction_capacity; - program->instructions.count = flattener.instruction_count; - program->block_count = flattener.block_id; -+ program->cf_type = VSIR_CF_BLOCKS; - } - else - { -@@ -2860,13 +3203,16 @@ static bool lower_switch_to_if_ladder_add_block_mapping(struct lower_switch_to_i - return true; - } - --static enum vkd3d_result lower_switch_to_if_ladder(struct vsir_program *program) -+static enum vkd3d_result vsir_program_lower_switch_to_selection_ladder(struct vsir_program *program, -+ struct vsir_transformation_context *ctx) - { - unsigned int block_count = program->block_count, ssa_count = program->ssa_count, current_label = 0, if_label; - size_t ins_capacity = 0, ins_count = 0, i, map_capacity = 0, map_count = 0; - struct vkd3d_shader_instruction *instructions = NULL; - struct lower_switch_to_if_ladder_block_mapping *block_map = NULL; - -+ VKD3D_ASSERT(program->cf_type == VSIR_CF_BLOCKS); -+ - if (!reserve_instructions(&instructions, &ins_capacity, program->instructions.count)) - goto fail; - -@@ -3050,7 +3396,8 @@ static void ssas_to_temps_block_info_cleanup(struct ssas_to_temps_block_info *bl - vkd3d_free(block_info); - } - --static enum vkd3d_result vsir_program_materialise_phi_ssas_to_temps(struct vsir_program *program) -+static enum vkd3d_result vsir_program_materialise_phi_ssas_to_temps(struct vsir_program *program, -+ struct vsir_transformation_context *ctx) - { - size_t ins_capacity = 0, ins_count = 0, phi_count, incoming_count, i; - struct ssas_to_temps_block_info *info, *block_info = NULL; -@@ -3058,6 +3405,8 @@ static enum vkd3d_result vsir_program_materialise_phi_ssas_to_temps(struct vsir_ - struct ssas_to_temps_alloc alloc = {0}; - unsigned int current_label = 0; - -+ VKD3D_ASSERT(program->cf_type == VSIR_CF_BLOCKS); -+ - if (!(block_info = vkd3d_calloc(program->block_count, sizeof(*block_info)))) - { - ERR("Failed to allocate block info array.\n"); -@@ -5271,12 +5620,15 @@ out: - } - - static enum vkd3d_result vsir_program_structurize(struct vsir_program *program, -- struct vkd3d_shader_message_context *message_context) -+ struct vsir_transformation_context *ctx) - { -+ struct vkd3d_shader_message_context *message_context = ctx->message_context; - struct vsir_cfg_emit_target target = {0}; - enum vkd3d_result ret; - size_t i; - -+ VKD3D_ASSERT(program->cf_type == VSIR_CF_BLOCKS); -+ - target.jump_target_temp_idx = program->temp_count; - target.temp_count = program->temp_count + 1; - -@@ -5324,6 +5676,7 @@ static enum vkd3d_result vsir_program_structurize(struct vsir_program *program, - program->instructions.capacity = target.ins_capacity; - program->instructions.count = target.ins_count; - program->temp_count = target.temp_count; -+ program->cf_type = VSIR_CF_STRUCTURED; - - return VKD3D_OK; - -@@ -5451,11 +5804,14 @@ static enum vkd3d_result vsir_program_materialize_undominated_ssas_to_temps_in_f - } - - static enum vkd3d_result vsir_program_materialize_undominated_ssas_to_temps(struct vsir_program *program, -- struct vkd3d_shader_message_context *message_context) -+ struct vsir_transformation_context *ctx) - { -+ struct vkd3d_shader_message_context *message_context = ctx->message_context; - enum vkd3d_result ret; - size_t i; - -+ VKD3D_ASSERT(program->cf_type == VSIR_CF_BLOCKS); -+ - for (i = 0; i < program->instructions.count;) - { - struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; -@@ -5491,26 +5847,67 @@ static enum vkd3d_result vsir_program_materialize_undominated_ssas_to_temps(stru - return VKD3D_OK; - } - --static bool find_colour_signature_idx(const struct shader_signature *signature, uint32_t *index) -+static bool use_flat_interpolation(const struct vsir_program *program, -+ struct vkd3d_shader_message_context *message_context, bool *flat) - { -- for (unsigned int i = 0; i < signature->element_count; ++i) -+ static const struct vkd3d_shader_location no_loc; -+ const struct vkd3d_shader_parameter1 *parameter; -+ -+ *flat = false; -+ -+ if (!(parameter = vsir_program_get_parameter(program, VKD3D_SHADER_PARAMETER_NAME_FLAT_INTERPOLATION))) -+ return true; -+ -+ if (parameter->type != VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT) - { -- if (signature->elements[i].sysval_semantic == VKD3D_SHADER_SV_TARGET -- && !signature->elements[i].register_index) -- { -- *index = i; -- return true; -- } -+ vkd3d_shader_error(message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, -+ "Unsupported flat interpolation parameter type %#x.", parameter->type); -+ return false; -+ } -+ if (parameter->data_type != VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32) -+ { -+ vkd3d_shader_error(message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, -+ "Invalid flat interpolation parameter data type %#x.", parameter->data_type); -+ return false; - } - -- return false; -+ *flat = parameter->u.immediate_constant.u.u32; -+ return true; -+} -+ -+static enum vkd3d_result vsir_program_apply_flat_interpolation(struct vsir_program *program, -+ struct vsir_transformation_context *ctx) -+{ -+ unsigned int i; -+ bool flat; -+ -+ if (program->shader_version.type != VKD3D_SHADER_TYPE_PIXEL || program->shader_version.major >= 4) -+ return VKD3D_OK; -+ -+ if (!use_flat_interpolation(program, ctx->message_context, &flat)) -+ return VKD3D_ERROR_INVALID_ARGUMENT; -+ -+ if (!flat) -+ return VKD3D_OK; -+ -+ for (i = 0; i < program->input_signature.element_count; ++i) -+ { -+ struct signature_element *element = &program->input_signature.elements[i]; -+ -+ if (!ascii_strcasecmp(element->semantic_name, "COLOR")) -+ element->interpolation_mode = VKD3DSIM_CONSTANT; -+ } -+ -+ return VKD3D_OK; - } - - static enum vkd3d_result insert_alpha_test_before_ret(struct vsir_program *program, - const struct vkd3d_shader_instruction *ret, enum vkd3d_shader_comparison_func compare_func, -- const struct vkd3d_shader_parameter1 *ref, uint32_t colour_signature_idx, uint32_t colour_temp, size_t *ret_pos) -+ const struct vkd3d_shader_parameter1 *ref, uint32_t colour_signature_idx, -+ uint32_t colour_temp, size_t *ret_pos, struct vkd3d_shader_message_context *message_context) - { - struct vkd3d_shader_instruction_array *instructions = &program->instructions; -+ static const struct vkd3d_shader_location no_loc; - size_t pos = ret - instructions->elements; - struct vkd3d_shader_instruction *ins; - -@@ -5565,6 +5962,11 @@ static enum vkd3d_result insert_alpha_test_before_ret(struct vsir_program *progr - VKD3D_SHADER_PARAMETER_NAME_ALPHA_TEST_REF, VKD3D_DATA_UINT); - break; - -+ case VKD3D_SHADER_PARAMETER_DATA_TYPE_FLOAT32_VEC4: -+ vkd3d_shader_error(message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_INVALID_PARAMETER, -+ "Alpha test reference data type must be a single component."); -+ return VKD3D_ERROR_INVALID_ARGUMENT; -+ - default: - FIXME("Unhandled parameter data type %#x.\n", ref->data_type); - return VKD3D_ERROR_NOT_IMPLEMENTED; -@@ -5596,12 +5998,13 @@ static enum vkd3d_result insert_alpha_test_before_ret(struct vsir_program *progr - } - - static enum vkd3d_result vsir_program_insert_alpha_test(struct vsir_program *program, -- struct vkd3d_shader_message_context *message_context) -+ struct vsir_transformation_context *ctx) - { -+ struct vkd3d_shader_message_context *message_context = ctx->message_context; - const struct vkd3d_shader_parameter1 *func = NULL, *ref = NULL; -+ uint32_t colour_signature_idx, colour_temp = ~0u; - static const struct vkd3d_shader_location no_loc; - enum vkd3d_shader_comparison_func compare_func; -- uint32_t colour_signature_idx, colour_temp; - struct vkd3d_shader_instruction *ins; - size_t new_pos; - int ret; -@@ -5609,7 +6012,7 @@ static enum vkd3d_result vsir_program_insert_alpha_test(struct vsir_program *pro - if (program->shader_version.type != VKD3D_SHADER_TYPE_PIXEL) - return VKD3D_OK; - -- if (!find_colour_signature_idx(&program->output_signature, &colour_signature_idx) -+ if (!vsir_signature_find_sysval(&program->output_signature, VKD3D_SHADER_SV_TARGET, 0, &colour_signature_idx) - || !(program->output_signature.elements[colour_signature_idx].mask & VKD3DSP_WRITEMASK_3)) - return VKD3D_OK; - -@@ -5620,13 +6023,13 @@ static enum vkd3d_result vsir_program_insert_alpha_test(struct vsir_program *pro - if (func->type != VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT) - { - vkd3d_shader_error(message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, -- "Unsupported alpha test function parameter type %#x.\n", func->type); -+ "Unsupported alpha test function parameter type %#x.", func->type); - return VKD3D_ERROR_NOT_IMPLEMENTED; - } - if (func->data_type != VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32) - { - vkd3d_shader_error(message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, -- "Invalid alpha test function parameter data type %#x.\n", func->data_type); -+ "Invalid alpha test function parameter data type %#x.", func->data_type); - return VKD3D_ERROR_INVALID_ARGUMENT; - } - compare_func = func->u.immediate_constant.u.u32; -@@ -5650,7 +6053,7 @@ static enum vkd3d_result vsir_program_insert_alpha_test(struct vsir_program *pro - if (ins->opcode == VKD3DSIH_RET) - { - if ((ret = insert_alpha_test_before_ret(program, ins, compare_func, -- ref, colour_signature_idx, colour_temp, &new_pos)) < 0) -+ ref, colour_signature_idx, colour_temp, &new_pos, message_context)) < 0) - return ret; - i = new_pos; - continue; -@@ -5677,456 +6080,2081 @@ static enum vkd3d_result vsir_program_insert_alpha_test(struct vsir_program *pro - return VKD3D_OK; - } - --struct validation_context -+static enum vkd3d_result insert_clip_planes_before_ret(struct vsir_program *program, -+ const struct vkd3d_shader_instruction *ret, uint32_t mask, uint32_t position_signature_idx, -+ uint32_t position_temp, uint32_t low_signature_idx, uint32_t high_signature_idx, size_t *ret_pos) - { -- struct vkd3d_shader_message_context *message_context; -- const struct vsir_program *program; -- size_t instruction_idx; -- struct vkd3d_shader_location null_location; -- bool invalid_instruction_idx; -- enum vkd3d_result status; -- bool dcl_temps_found; -- enum vkd3d_shader_opcode phase; -- enum cf_type -- { -- CF_TYPE_UNKNOWN = 0, -- CF_TYPE_STRUCTURED, -- CF_TYPE_BLOCKS, -- } cf_type; -- bool inside_block; -- -- struct validation_context_temp_data -- { -- enum vsir_dimension dimension; -- size_t first_seen; -- } *temps; -+ struct vkd3d_shader_instruction_array *instructions = &program->instructions; -+ size_t pos = ret - instructions->elements; -+ struct vkd3d_shader_instruction *ins; -+ unsigned int output_idx = 0; - -- struct validation_context_ssa_data -- { -- enum vsir_dimension dimension; -- enum vkd3d_data_type data_type; -- size_t first_seen; -- uint32_t write_mask; -- uint32_t read_mask; -- size_t first_assigned; -- } *ssas; -+ if (!shader_instruction_array_insert_at(&program->instructions, pos, vkd3d_popcount(mask) + 1)) -+ return VKD3D_ERROR_OUT_OF_MEMORY; - -- enum vkd3d_shader_opcode *blocks; -- size_t depth; -- size_t blocks_capacity; --}; -+ ins = &program->instructions.elements[pos]; - --static void VKD3D_PRINTF_FUNC(3, 4) validator_error(struct validation_context *ctx, -- enum vkd3d_shader_error error, const char *format, ...) --{ -- struct vkd3d_string_buffer buf; -- va_list args; -+ for (unsigned int i = 0; i < 8; ++i) -+ { -+ if (!(mask & (1u << i))) -+ continue; - -- vkd3d_string_buffer_init(&buf); -+ vsir_instruction_init_with_params(program, ins, &ret->location, VKD3DSIH_DP4, 1, 2); -+ src_param_init_temp_float4(&ins->src[0], position_temp); -+ src_param_init_parameter(&ins->src[1], VKD3D_SHADER_PARAMETER_NAME_CLIP_PLANE_0 + i, VKD3D_DATA_FLOAT); -+ ins->src[1].swizzle = VKD3D_SHADER_NO_SWIZZLE; -+ ins->src[1].reg.dimension = VSIR_DIMENSION_VEC4; - -- va_start(args, format); -- vkd3d_string_buffer_vprintf(&buf, format, args); -- va_end(args); -+ vsir_dst_param_init(&ins->dst[0], VKD3DSPR_OUTPUT, VKD3D_DATA_FLOAT, 1); -+ if (output_idx < 4) -+ ins->dst[0].reg.idx[0].offset = low_signature_idx; -+ else -+ ins->dst[0].reg.idx[0].offset = high_signature_idx; -+ ins->dst[0].reg.dimension = VSIR_DIMENSION_VEC4; -+ ins->dst[0].write_mask = (1u << (output_idx % 4)); -+ ++output_idx; - -- if (ctx->invalid_instruction_idx) -- { -- vkd3d_shader_error(ctx->message_context, &ctx->null_location, error, "%s", buf.buffer); -- ERR("VSIR validation error: %s\n", buf.buffer); -- } -- else -- { -- const struct vkd3d_shader_instruction *ins = &ctx->program->instructions.elements[ctx->instruction_idx]; -- vkd3d_shader_error(ctx->message_context, &ins->location, error, -- "instruction %zu: %s", ctx->instruction_idx + 1, buf.buffer); -- ERR("VSIR validation error: instruction %zu: %s\n", ctx->instruction_idx + 1, buf.buffer); -+ ++ins; - } - -- vkd3d_string_buffer_cleanup(&buf); -+ vsir_instruction_init_with_params(program, ins, &ret->location, VKD3DSIH_MOV, 1, 1); -+ vsir_dst_param_init(&ins->dst[0], VKD3DSPR_OUTPUT, VKD3D_DATA_FLOAT, 1); -+ ins->dst[0].reg.idx[0].offset = position_signature_idx; -+ ins->dst[0].reg.dimension = VSIR_DIMENSION_VEC4; -+ ins->dst[0].write_mask = program->output_signature.elements[position_signature_idx].mask; -+ src_param_init_temp_float(&ins->src[0], position_temp); -+ ins->src[0].reg.dimension = VSIR_DIMENSION_VEC4; -+ ins->src[0].swizzle = VKD3D_SHADER_NO_SWIZZLE; - -- if (!ctx->status) -- ctx->status = VKD3D_ERROR_INVALID_SHADER; -+ *ret_pos = pos + vkd3d_popcount(mask) + 1; -+ return VKD3D_OK; - } - --static void vsir_validate_src_param(struct validation_context *ctx, -- const struct vkd3d_shader_src_param *src); -- --static void vsir_validate_register(struct validation_context *ctx, -- const struct vkd3d_shader_register *reg) -+static enum vkd3d_result vsir_program_insert_clip_planes(struct vsir_program *program, -+ struct vsir_transformation_context *ctx) - { -- unsigned int i; -- -- if (reg->type >= VKD3DSPR_COUNT) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, "Invalid register type %#x.", -- reg->type); -+ struct shader_signature *signature = &program->output_signature; -+ unsigned int low_signature_idx = ~0u, high_signature_idx = ~0u; -+ const struct vkd3d_shader_parameter1 *mask_parameter = NULL; -+ struct signature_element *new_elements, *clip_element; -+ uint32_t position_signature_idx, position_temp, mask; -+ static const struct vkd3d_shader_location no_loc; -+ struct vkd3d_shader_instruction *ins; -+ unsigned int plane_count; -+ size_t new_pos; -+ int ret; - -- if (reg->precision >= VKD3D_SHADER_REGISTER_PRECISION_COUNT) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_PRECISION, "Invalid register precision %#x.", -- reg->precision); -+ if (program->shader_version.type != VKD3D_SHADER_TYPE_VERTEX) -+ return VKD3D_OK; - -- if (reg->data_type >= VKD3D_DATA_COUNT) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, "Invalid register data type %#x.", -- reg->data_type); -+ for (unsigned int i = 0; i < program->parameter_count; ++i) -+ { -+ const struct vkd3d_shader_parameter1 *parameter = &program->parameters[i]; - -- if (reg->dimension >= VSIR_DIMENSION_COUNT) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION, "Invalid register dimension %#x.", -- reg->dimension); -+ if (parameter->name == VKD3D_SHADER_PARAMETER_NAME_CLIP_PLANE_MASK) -+ mask_parameter = parameter; -+ } - -- if (reg->idx_count > ARRAY_SIZE(reg->idx)) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, "Invalid register index count %u.", -- reg->idx_count); -+ if (!mask_parameter) -+ return VKD3D_OK; - -- for (i = 0; i < min(reg->idx_count, ARRAY_SIZE(reg->idx)); ++i) -+ if (mask_parameter->type != VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT) - { -- const struct vkd3d_shader_src_param *param = reg->idx[i].rel_addr; -- if (reg->idx[i].rel_addr) -- vsir_validate_src_param(ctx, param); -+ vkd3d_shader_error(ctx->message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, -+ "Unsupported clip plane mask parameter type %#x.", mask_parameter->type); -+ return VKD3D_ERROR_NOT_IMPLEMENTED; - } -+ if (mask_parameter->data_type != VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32) -+ { -+ vkd3d_shader_error(ctx->message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, -+ "Invalid clip plane mask parameter data type %#x.", mask_parameter->data_type); -+ return VKD3D_ERROR_INVALID_ARGUMENT; -+ } -+ mask = mask_parameter->u.immediate_constant.u.u32; - -- switch (reg->type) -+ if (!mask) -+ return VKD3D_OK; -+ -+ for (unsigned int i = 0; i < signature->element_count; ++i) - { -- case VKD3DSPR_TEMP: -+ if (signature->elements[i].sysval_semantic == VKD3D_SHADER_SV_CLIP_DISTANCE) - { -- struct validation_context_temp_data *data; -+ vkd3d_shader_error(ctx->message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_INVALID_PARAMETER, -+ "Clip planes cannot be used if the shader writes clip distance."); -+ return VKD3D_ERROR_INVALID_ARGUMENT; -+ } -+ } - -- if (reg->idx_count != 1) -- { -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, "Invalid index count %u for a TEMP register.", -- reg->idx_count); -- break; -- } -+ if (!vsir_signature_find_sysval(signature, VKD3D_SHADER_SV_POSITION, 0, &position_signature_idx)) -+ { -+ vkd3d_shader_error(ctx->message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_MISSING_SEMANTIC, -+ "Shader does not write position."); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } - -- if (reg->idx[0].rel_addr) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, "Non-NULL relative address for a TEMP register."); -+ /* Append the clip plane signature indices. */ - -- if (reg->idx[0].offset >= ctx->program->temp_count) -- { -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, "TEMP register index %u exceeds the maximum count %u.", -- reg->idx[0].offset, ctx->program->temp_count); -- break; -- } -+ plane_count = vkd3d_popcount(mask); - -- data = &ctx->temps[reg->idx[0].offset]; -+ if (!(new_elements = vkd3d_realloc(signature->elements, -+ (signature->element_count + 2) * sizeof(*signature->elements)))) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ signature->elements = new_elements; -+ -+ low_signature_idx = signature->element_count; -+ clip_element = &signature->elements[signature->element_count++]; -+ memset(clip_element, 0, sizeof(*clip_element)); -+ clip_element->sysval_semantic = VKD3D_SHADER_SV_CLIP_DISTANCE; -+ clip_element->component_type = VKD3D_SHADER_COMPONENT_FLOAT; -+ clip_element->register_count = 1; -+ clip_element->mask = vkd3d_write_mask_from_component_count(min(plane_count, 4)); -+ clip_element->used_mask = clip_element->mask; -+ clip_element->min_precision = VKD3D_SHADER_MINIMUM_PRECISION_NONE; -+ -+ if (plane_count > 4) -+ { -+ high_signature_idx = signature->element_count; -+ clip_element = &signature->elements[signature->element_count++]; -+ memset(clip_element, 0, sizeof(*clip_element)); -+ clip_element->sysval_semantic = VKD3D_SHADER_SV_CLIP_DISTANCE; -+ clip_element->semantic_index = 1; -+ clip_element->component_type = VKD3D_SHADER_COMPONENT_FLOAT; -+ clip_element->register_count = 1; -+ clip_element->mask = vkd3d_write_mask_from_component_count(plane_count - 4); -+ clip_element->used_mask = clip_element->mask; -+ clip_element->min_precision = VKD3D_SHADER_MINIMUM_PRECISION_NONE; -+ } -+ -+ /* We're going to be reading from the output position, so we need to go -+ * through the whole shader and convert it to a temp. */ - -- if (reg->dimension == VSIR_DIMENSION_NONE) -- { -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION, "Invalid dimension NONE for a TEMP register."); -- break; -- } -+ position_temp = program->temp_count++; - -- /* TEMP registers can be scalar or vec4, provided that -- * each individual register always appears with the same -- * dimension. */ -- if (data->dimension == VSIR_DIMENSION_NONE) -- { -- data->dimension = reg->dimension; -- data->first_seen = ctx->instruction_idx; -- } -- else if (data->dimension != reg->dimension) -- { -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION, "Invalid dimension %#x for a TEMP register: " -- "it has already been seen with dimension %#x at instruction %zu.", -- reg->dimension, data->dimension, data->first_seen); -- } -- break; -+ for (size_t i = 0; i < program->instructions.count; ++i) -+ { -+ ins = &program->instructions.elements[i]; -+ -+ if (vsir_instruction_is_dcl(ins)) -+ continue; -+ -+ if (ins->opcode == VKD3DSIH_RET) -+ { -+ if ((ret = insert_clip_planes_before_ret(program, ins, mask, position_signature_idx, -+ position_temp, low_signature_idx, high_signature_idx, &new_pos)) < 0) -+ return ret; -+ i = new_pos; -+ continue; - } - -- case VKD3DSPR_SSA: -+ for (size_t j = 0; j < ins->dst_count; ++j) - { -- struct validation_context_ssa_data *data; -+ struct vkd3d_shader_dst_param *dst = &ins->dst[j]; - -- if (reg->idx_count != 1) -+ /* Note we run after I/O normalization. */ -+ if (dst->reg.type == VKD3DSPR_OUTPUT && dst->reg.idx[0].offset == position_signature_idx) - { -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, "Invalid index count %u for a SSA register.", -- reg->idx_count); -- break; -+ dst->reg.type = VKD3DSPR_TEMP; -+ dst->reg.idx[0].offset = position_temp; - } -+ } -+ } - -- if (reg->idx[0].rel_addr) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, "Non-NULL relative address for a SSA register."); -+ return VKD3D_OK; -+} - -- if (reg->idx[0].offset >= ctx->program->ssa_count) -- { -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, -- "SSA register index %u exceeds the maximum count %u.", -- reg->idx[0].offset, ctx->program->ssa_count); -- break; -- } -+static bool is_pre_rasterization_shader(enum vkd3d_shader_type type) -+{ -+ return type == VKD3D_SHADER_TYPE_VERTEX -+ || type == VKD3D_SHADER_TYPE_HULL -+ || type == VKD3D_SHADER_TYPE_DOMAIN -+ || type == VKD3D_SHADER_TYPE_GEOMETRY; -+} - -- data = &ctx->ssas[reg->idx[0].offset]; -+static enum vkd3d_result insert_point_size_before_ret(struct vsir_program *program, -+ const struct vkd3d_shader_instruction *ret, size_t *ret_pos) -+{ -+ struct vkd3d_shader_instruction_array *instructions = &program->instructions; -+ size_t pos = ret - instructions->elements; -+ struct vkd3d_shader_instruction *ins; - -- if (reg->dimension == VSIR_DIMENSION_NONE) -- { -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION, "Invalid dimension NONE for a SSA register."); -- break; -- } -+ if (!shader_instruction_array_insert_at(&program->instructions, pos, 1)) -+ return VKD3D_ERROR_OUT_OF_MEMORY; - -- /* SSA registers can be scalar or vec4, provided that each -- * individual register always appears with the same -- * dimension. */ -- if (data->dimension == VSIR_DIMENSION_NONE) -- { -- data->dimension = reg->dimension; -- data->data_type = reg->data_type; -- data->first_seen = ctx->instruction_idx; -- } -- else -- { -- if (data->dimension != reg->dimension) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION, "Invalid dimension %#x for a SSA register: " -- "it has already been seen with dimension %#x at instruction %zu.", -- reg->dimension, data->dimension, data->first_seen); -- -- if (data_type_is_64_bit(data->data_type) != data_type_is_64_bit(reg->data_type)) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, "Invalid data type %#x for a SSA register: " -- "it has already been seen with data type %#x at instruction %zu.", -- reg->data_type, data->data_type, data->first_seen); -- } -- break; -- } -+ ins = &program->instructions.elements[pos]; - -- case VKD3DSPR_LABEL: -- if (reg->precision != VKD3D_SHADER_REGISTER_PRECISION_DEFAULT) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_PRECISION, "Invalid precision %#x for a LABEL register.", -- reg->precision); -+ vsir_instruction_init_with_params(program, ins, &ret->location, VKD3DSIH_MOV, 1, 1); -+ vsir_dst_param_init(&ins->dst[0], VKD3DSPR_RASTOUT, VKD3D_DATA_FLOAT, 1); -+ ins->dst[0].reg.idx[0].offset = VSIR_RASTOUT_POINT_SIZE; -+ src_param_init_parameter(&ins->src[0], VKD3D_SHADER_PARAMETER_NAME_POINT_SIZE, VKD3D_DATA_FLOAT); - -- if (reg->data_type != VKD3D_DATA_UNUSED) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, "Invalid data type %#x for a LABEL register.", -- reg->data_type); -+ *ret_pos = pos + 1; -+ return VKD3D_OK; -+} - -- if (reg->dimension != VSIR_DIMENSION_NONE) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION, "Invalid dimension %#x for a LABEL register.", -- reg->dimension); -+static enum vkd3d_result vsir_program_insert_point_size(struct vsir_program *program, -+ struct vsir_transformation_context *ctx) -+{ -+ const struct vkd3d_shader_parameter1 *size_parameter = NULL; -+ static const struct vkd3d_shader_location no_loc; - -- if (reg->idx_count != 1) -- { -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, "Invalid index count %u for a LABEL register.", -- reg->idx_count); -- break; -- } -+ if (program->has_point_size) -+ return VKD3D_OK; - -- if (reg->idx[0].rel_addr) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, "Non-NULL relative address for a LABEL register."); -- -- /* Index == 0 is invalid, but it is temporarily allowed -- * for intermediate stages. Once we support validation -- * dialects we can selectively check for that. */ -- if (reg->idx[0].offset > ctx->program->block_count) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, -- "LABEL register index %u exceeds the maximum count %u.", -- reg->idx[0].offset, ctx->program->block_count); -- break; -+ if (!is_pre_rasterization_shader(program->shader_version.type)) -+ return VKD3D_OK; - -- case VKD3DSPR_NULL: -- if (reg->idx_count != 0) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, "Invalid index count %u for a NULL register.", -- reg->idx_count); -- break; -+ for (unsigned int i = 0; i < program->parameter_count; ++i) -+ { -+ const struct vkd3d_shader_parameter1 *parameter = &program->parameters[i]; - -- case VKD3DSPR_IMMCONST: -- if (reg->idx_count != 0) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, "Invalid index count %u for a IMMCONST register.", -- reg->idx_count); -- break; -+ if (parameter->name == VKD3D_SHADER_PARAMETER_NAME_POINT_SIZE) -+ size_parameter = parameter; -+ } - -- case VKD3DSPR_IMMCONST64: -- if (reg->idx_count != 0) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, "Invalid index count %u for a IMMCONST64 register.", -- reg->idx_count); -- break; -+ if (!size_parameter) -+ return VKD3D_OK; - -- default: -- break; -+ if (size_parameter->data_type != VKD3D_SHADER_PARAMETER_DATA_TYPE_FLOAT32) -+ { -+ vkd3d_shader_error(ctx->message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, -+ "Invalid point size parameter data type %#x.", size_parameter->data_type); -+ return VKD3D_ERROR_INVALID_ARGUMENT; -+ } -+ -+ program->has_point_size = true; -+ -+ /* Append a point size write before each ret. */ -+ for (size_t i = 0; i < program->instructions.count; ++i) -+ { -+ struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; -+ -+ if (ins->opcode == VKD3DSIH_RET) -+ { -+ size_t new_pos; -+ int ret; -+ -+ if ((ret = insert_point_size_before_ret(program, ins, &new_pos)) < 0) -+ return ret; -+ i = new_pos; -+ } - } -+ -+ return VKD3D_OK; - } - --static void vsir_validate_dst_param(struct validation_context *ctx, -- const struct vkd3d_shader_dst_param *dst) -+static enum vkd3d_result vsir_program_insert_point_size_clamp(struct vsir_program *program, -+ struct vsir_transformation_context *ctx) - { -- vsir_validate_register(ctx, &dst->reg); -+ const struct vkd3d_shader_parameter1 *min_parameter = NULL, *max_parameter = NULL; -+ static const struct vkd3d_shader_location no_loc; - -- if (dst->write_mask & ~VKD3DSP_WRITEMASK_ALL) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_WRITE_MASK, "Destination has invalid write mask %#x.", -- dst->write_mask); -+ if (!program->has_point_size) -+ return VKD3D_OK; - -- switch (dst->reg.dimension) -- { -- case VSIR_DIMENSION_SCALAR: -- if (dst->write_mask != VKD3DSP_WRITEMASK_0) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_WRITE_MASK, "Scalar destination has invalid write mask %#x.", -- dst->write_mask); -- break; -+ if (!is_pre_rasterization_shader(program->shader_version.type)) -+ return VKD3D_OK; - -- case VSIR_DIMENSION_VEC4: -- if (dst->write_mask == 0) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_WRITE_MASK, "Vec4 destination has empty write mask."); -- break; -+ for (unsigned int i = 0; i < program->parameter_count; ++i) -+ { -+ const struct vkd3d_shader_parameter1 *parameter = &program->parameters[i]; - -- default: -- if (dst->write_mask != 0) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_WRITE_MASK, "Destination of dimension %u has invalid write mask %#x.", -- dst->reg.dimension, dst->write_mask); -- break; -+ if (parameter->name == VKD3D_SHADER_PARAMETER_NAME_POINT_SIZE_MIN) -+ min_parameter = parameter; -+ else if (parameter->name == VKD3D_SHADER_PARAMETER_NAME_POINT_SIZE_MAX) -+ max_parameter = parameter; - } - -- if (dst->modifiers & ~VKD3DSPDM_MASK) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_MODIFIERS, "Destination has invalid modifiers %#x.", -- dst->modifiers); -+ if (!min_parameter && !max_parameter) -+ return VKD3D_OK; - -- switch (dst->shift) -+ if (min_parameter && min_parameter->data_type != VKD3D_SHADER_PARAMETER_DATA_TYPE_FLOAT32) - { -- case 0: -- case 1: -- case 2: -- case 3: -+ vkd3d_shader_error(ctx->message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, -+ "Invalid minimum point size parameter data type %#x.", min_parameter->data_type); -+ return VKD3D_ERROR_INVALID_ARGUMENT; -+ } -+ -+ if (max_parameter && max_parameter->data_type != VKD3D_SHADER_PARAMETER_DATA_TYPE_FLOAT32) -+ { -+ vkd3d_shader_error(ctx->message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, -+ "Invalid maximum point size parameter data type %#x.", max_parameter->data_type); -+ return VKD3D_ERROR_INVALID_ARGUMENT; -+ } -+ -+ /* Replace writes to the point size by inserting a clamp before each write. */ -+ -+ for (size_t i = 0; i < program->instructions.count; ++i) -+ { -+ struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; -+ const struct vkd3d_shader_location *loc; -+ unsigned int ssa_value; -+ bool clamp = false; -+ -+ if (vsir_instruction_is_dcl(ins)) -+ continue; -+ -+ for (size_t j = 0; j < ins->dst_count; ++j) -+ { -+ struct vkd3d_shader_dst_param *dst = &ins->dst[j]; -+ -+ /* Note we run after I/O normalization. */ -+ if (dst->reg.type == VKD3DSPR_RASTOUT) -+ { -+ dst_param_init_ssa_float(dst, program->ssa_count); -+ ssa_value = program->ssa_count++; -+ clamp = true; -+ } -+ } -+ -+ if (!clamp) -+ continue; -+ -+ if (!shader_instruction_array_insert_at(&program->instructions, i + 1, !!min_parameter + !!max_parameter)) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ -+ loc = &program->instructions.elements[i].location; -+ ins = &program->instructions.elements[i + 1]; -+ -+ if (min_parameter) -+ { -+ vsir_instruction_init_with_params(program, ins, loc, VKD3DSIH_MAX, 1, 2); -+ src_param_init_ssa_float(&ins->src[0], ssa_value); -+ src_param_init_parameter(&ins->src[1], VKD3D_SHADER_PARAMETER_NAME_POINT_SIZE_MIN, VKD3D_DATA_FLOAT); -+ if (max_parameter) -+ { -+ dst_param_init_ssa_float(&ins->dst[0], program->ssa_count); -+ ssa_value = program->ssa_count++; -+ } -+ else -+ { -+ vsir_dst_param_init(&ins->dst[0], VKD3DSPR_RASTOUT, VKD3D_DATA_FLOAT, 1); -+ ins->dst[0].reg.idx[0].offset = VSIR_RASTOUT_POINT_SIZE; -+ } -+ ++ins; -+ ++i; -+ } -+ -+ if (max_parameter) -+ { -+ vsir_instruction_init_with_params(program, ins, loc, VKD3DSIH_MIN, 1, 2); -+ src_param_init_ssa_float(&ins->src[0], ssa_value); -+ src_param_init_parameter(&ins->src[1], VKD3D_SHADER_PARAMETER_NAME_POINT_SIZE_MAX, VKD3D_DATA_FLOAT); -+ vsir_dst_param_init(&ins->dst[0], VKD3DSPR_RASTOUT, VKD3D_DATA_FLOAT, 1); -+ ins->dst[0].reg.idx[0].offset = VSIR_RASTOUT_POINT_SIZE; -+ -+ ++i; -+ } -+ } -+ -+ return VKD3D_OK; -+} -+ -+static bool has_texcoord_signature_element(const struct shader_signature *signature) -+{ -+ for (size_t i = 0; i < signature->element_count; ++i) -+ { -+ if (!ascii_strcasecmp(signature->elements[i].semantic_name, "TEXCOORD")) -+ return true; -+ } -+ return false; -+} -+ -+/* Returns true if replacement was done. */ -+static bool replace_texcoord_with_point_coord(struct vsir_program *program, -+ struct vkd3d_shader_src_param *src, unsigned int coord_temp) -+{ -+ uint32_t prev_swizzle = src->swizzle; -+ const struct signature_element *e; -+ -+ /* The input semantic may have a nontrivial mask, which we need to -+ * correct for. E.g. if the mask is .yz, and we read from .y, that needs -+ * to become .x. */ -+ static const uint32_t inverse_swizzles[16] = -+ { -+ /* Use _ for "undefined" components, for clarity. */ -+#define VKD3D_SHADER_SWIZZLE__ VKD3D_SHADER_SWIZZLE_X -+ 0, -+ /* .x */ VKD3D_SHADER_SWIZZLE(X, _, _, _), -+ /* .y */ VKD3D_SHADER_SWIZZLE(_, X, _, _), -+ /* .xy */ VKD3D_SHADER_SWIZZLE(X, Y, _, _), -+ /* .z */ VKD3D_SHADER_SWIZZLE(_, _, X, _), -+ /* .xz */ VKD3D_SHADER_SWIZZLE(X, _, Y, _), -+ /* .yz */ VKD3D_SHADER_SWIZZLE(_, X, Y, _), -+ /* .xyz */ VKD3D_SHADER_SWIZZLE(X, Y, Z, _), -+ /* .w */ VKD3D_SHADER_SWIZZLE(_, _, _, X), -+ /* .xw */ VKD3D_SHADER_SWIZZLE(X, _, _, Y), -+ /* .yw */ VKD3D_SHADER_SWIZZLE(_, X, _, Y), -+ /* .xyw */ VKD3D_SHADER_SWIZZLE(X, Y, _, Z), -+ /* .zw */ VKD3D_SHADER_SWIZZLE(_, _, X, Y), -+ /* .xzw */ VKD3D_SHADER_SWIZZLE(X, _, Y, Z), -+ /* .yzw */ VKD3D_SHADER_SWIZZLE(_, X, Y, Z), -+ /* .xyzw */ VKD3D_SHADER_SWIZZLE(X, Y, Z, W), -+#undef VKD3D_SHADER_SWIZZLE__ -+ }; -+ -+ if (src->reg.type != VKD3DSPR_INPUT) -+ return false; -+ e = &program->input_signature.elements[src->reg.idx[0].offset]; -+ -+ if (ascii_strcasecmp(e->semantic_name, "TEXCOORD")) -+ return false; -+ -+ src->reg.type = VKD3DSPR_TEMP; -+ src->reg.idx[0].offset = coord_temp; -+ -+ /* If the mask is already contiguous and zero-based, no need to remap -+ * the swizzle. */ -+ if (!(e->mask & (e->mask + 1))) -+ return true; -+ -+ src->swizzle = 0; -+ for (unsigned int i = 0; i < 4; ++i) -+ { -+ src->swizzle |= vsir_swizzle_get_component(inverse_swizzles[e->mask], -+ vsir_swizzle_get_component(prev_swizzle, i)) << VKD3D_SHADER_SWIZZLE_SHIFT(i); -+ } -+ -+ return true; -+} -+ -+static enum vkd3d_result vsir_program_insert_point_coord(struct vsir_program *program, -+ struct vsir_transformation_context *ctx) -+{ -+ const struct vkd3d_shader_parameter1 *sprite_parameter = NULL; -+ static const struct vkd3d_shader_location no_loc; -+ struct vkd3d_shader_instruction *ins; -+ bool used_texcoord = false; -+ unsigned int coord_temp; -+ size_t i, insert_pos; -+ -+ if (program->shader_version.type != VKD3D_SHADER_TYPE_PIXEL) -+ return VKD3D_OK; -+ -+ for (i = 0; i < program->parameter_count; ++i) -+ { -+ const struct vkd3d_shader_parameter1 *parameter = &program->parameters[i]; -+ -+ if (parameter->name == VKD3D_SHADER_PARAMETER_NAME_POINT_SPRITE) -+ sprite_parameter = parameter; -+ } -+ -+ if (!sprite_parameter) -+ return VKD3D_OK; -+ -+ if (sprite_parameter->type != VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT) -+ { -+ vkd3d_shader_error(ctx->message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, -+ "Unsupported point sprite parameter type %#x.", sprite_parameter->type); -+ return VKD3D_ERROR_NOT_IMPLEMENTED; -+ } -+ if (sprite_parameter->data_type != VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32) -+ { -+ vkd3d_shader_error(ctx->message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, -+ "Invalid point sprite parameter data type %#x.", sprite_parameter->data_type); -+ return VKD3D_ERROR_INVALID_ARGUMENT; -+ } -+ if (!sprite_parameter->u.immediate_constant.u.u32) -+ return VKD3D_OK; -+ -+ if (!has_texcoord_signature_element(&program->input_signature)) -+ return VKD3D_OK; -+ -+ /* VKD3DSPR_POINTCOORD is a two-component value; fill the remaining two -+ * components with zeroes. */ -+ coord_temp = program->temp_count++; -+ -+ /* Construct the new temp after all LABEL, DCL, and NOP instructions. -+ * We need to skip NOP instructions because they might result from removed -+ * DCLs, and there could still be DCLs after NOPs. */ -+ for (i = 0; i < program->instructions.count; ++i) -+ { -+ ins = &program->instructions.elements[i]; -+ -+ if (!vsir_instruction_is_dcl(ins) && ins->opcode != VKD3DSIH_LABEL && ins->opcode != VKD3DSIH_NOP) -+ break; -+ } -+ -+ insert_pos = i; -+ -+ /* Replace each texcoord read with a read from the point coord. */ -+ for (; i < program->instructions.count; ++i) -+ { -+ ins = &program->instructions.elements[i]; -+ -+ if (vsir_instruction_is_dcl(ins)) -+ continue; -+ -+ for (unsigned int j = 0; j < ins->src_count; ++j) -+ { -+ used_texcoord |= replace_texcoord_with_point_coord(program, &ins->src[j], coord_temp); -+ -+ for (unsigned int k = 0; k < ins->src[j].reg.idx_count; ++k) -+ { -+ if (ins->src[j].reg.idx[k].rel_addr) -+ used_texcoord |= replace_texcoord_with_point_coord(program, -+ ins->src[j].reg.idx[k].rel_addr, coord_temp); -+ } -+ } -+ -+ for (unsigned int j = 0; j < ins->dst_count; ++j) -+ { -+ for (unsigned int k = 0; k < ins->dst[j].reg.idx_count; ++k) -+ { -+ if (ins->dst[j].reg.idx[k].rel_addr) -+ used_texcoord |= replace_texcoord_with_point_coord(program, -+ ins->dst[j].reg.idx[k].rel_addr, coord_temp); -+ } -+ } -+ } -+ -+ if (used_texcoord) -+ { -+ if (!shader_instruction_array_insert_at(&program->instructions, insert_pos, 2)) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ -+ ins = &program->instructions.elements[insert_pos]; -+ -+ vsir_instruction_init_with_params(program, ins, &no_loc, VKD3DSIH_MOV, 1, 1); -+ dst_param_init_temp_float4(&ins->dst[0], coord_temp); -+ ins->dst[0].write_mask = VKD3DSP_WRITEMASK_0 | VKD3DSP_WRITEMASK_1; -+ vsir_src_param_init(&ins->src[0], VKD3DSPR_POINT_COORD, VKD3D_DATA_FLOAT, 0); -+ ins->src[0].reg.dimension = VSIR_DIMENSION_VEC4; -+ ins->src[0].swizzle = VKD3D_SHADER_NO_SWIZZLE; -+ ++ins; -+ -+ vsir_instruction_init_with_params(program, ins, &no_loc, VKD3DSIH_MOV, 1, 1); -+ dst_param_init_temp_float4(&ins->dst[0], coord_temp); -+ ins->dst[0].write_mask = VKD3DSP_WRITEMASK_2 | VKD3DSP_WRITEMASK_3; -+ vsir_src_param_init(&ins->src[0], VKD3DSPR_IMMCONST, VKD3D_DATA_FLOAT, 0); -+ ins->src[0].reg.dimension = VSIR_DIMENSION_VEC4; -+ ++ins; -+ -+ program->has_point_coord = true; -+ } -+ -+ return VKD3D_OK; -+} -+ -+struct validation_context -+{ -+ struct vkd3d_shader_message_context *message_context; -+ const struct vsir_program *program; -+ size_t instruction_idx; -+ struct vkd3d_shader_location null_location; -+ bool invalid_instruction_idx; -+ enum vkd3d_result status; -+ bool dcl_temps_found; -+ enum vkd3d_shader_opcode phase; -+ bool inside_block; -+ -+ struct validation_context_temp_data -+ { -+ enum vsir_dimension dimension; -+ size_t first_seen; -+ } *temps; -+ -+ struct validation_context_ssa_data -+ { -+ enum vsir_dimension dimension; -+ enum vkd3d_data_type data_type; -+ size_t first_seen; -+ uint32_t write_mask; -+ uint32_t read_mask; -+ size_t first_assigned; -+ } *ssas; -+ -+ enum vkd3d_shader_opcode *blocks; -+ size_t depth; -+ size_t blocks_capacity; -+ -+ unsigned int outer_tess_idxs[4]; -+ unsigned int inner_tess_idxs[2]; -+}; -+ -+static void VKD3D_PRINTF_FUNC(3, 4) validator_error(struct validation_context *ctx, -+ enum vkd3d_shader_error error, const char *format, ...) -+{ -+ struct vkd3d_string_buffer buf; -+ va_list args; -+ -+ vkd3d_string_buffer_init(&buf); -+ -+ va_start(args, format); -+ vkd3d_string_buffer_vprintf(&buf, format, args); -+ va_end(args); -+ -+ if (ctx->invalid_instruction_idx) -+ { -+ vkd3d_shader_error(ctx->message_context, &ctx->null_location, error, "%s", buf.buffer); -+ WARN("VSIR validation error: %s\n", buf.buffer); -+ } -+ else -+ { -+ const struct vkd3d_shader_instruction *ins = &ctx->program->instructions.elements[ctx->instruction_idx]; -+ vkd3d_shader_error(ctx->message_context, &ins->location, error, -+ "instruction %zu: %s", ctx->instruction_idx + 1, buf.buffer); -+ WARN("VSIR validation error: instruction %zu: %s\n", ctx->instruction_idx + 1, buf.buffer); -+ } -+ -+ vkd3d_string_buffer_cleanup(&buf); -+ -+ if (!ctx->status) -+ ctx->status = VKD3D_ERROR_INVALID_SHADER; -+} -+ -+static void vsir_validate_register_without_indices(struct validation_context *ctx, -+ const struct vkd3d_shader_register *reg) -+{ -+ if (reg->idx_count != 0) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, -+ "Invalid index count %u for a register of type %#x.", -+ reg->idx_count, reg->type); -+} -+ -+static void vsir_validate_io_register(struct validation_context *ctx, -+ const struct vkd3d_shader_register *reg) -+{ -+ const struct shader_signature *signature; -+ bool has_control_point = false; -+ -+ switch (reg->type) -+ { -+ case VKD3DSPR_INPUT: -+ signature = &ctx->program->input_signature; -+ -+ switch (ctx->program->shader_version.type) -+ { -+ case VKD3D_SHADER_TYPE_GEOMETRY: -+ case VKD3D_SHADER_TYPE_HULL: -+ case VKD3D_SHADER_TYPE_DOMAIN: -+ has_control_point = true; -+ break; -+ -+ default: -+ break; -+ } -+ break; -+ -+ case VKD3DSPR_OUTPUT: -+ switch (ctx->program->shader_version.type) -+ { -+ case VKD3D_SHADER_TYPE_HULL: -+ if (ctx->phase == VKD3DSIH_HS_CONTROL_POINT_PHASE -+ || ctx->program->normalisation_level >= VSIR_FULLY_NORMALISED_IO) -+ { -+ signature = &ctx->program->output_signature; -+ has_control_point = ctx->program->normalisation_level >= VSIR_NORMALISED_HULL_CONTROL_POINT_IO; -+ } -+ else -+ { -+ signature = &ctx->program->patch_constant_signature; -+ } -+ break; -+ -+ default: -+ signature = &ctx->program->output_signature; -+ break; -+ } -+ break; -+ -+ case VKD3DSPR_INCONTROLPOINT: -+ signature = &ctx->program->input_signature; -+ has_control_point = true; -+ break; -+ -+ case VKD3DSPR_OUTCONTROLPOINT: -+ signature = &ctx->program->output_signature; -+ has_control_point = true; -+ break; -+ -+ case VKD3DSPR_PATCHCONST: -+ signature = &ctx->program->patch_constant_signature; -+ break; -+ -+ default: -+ vkd3d_unreachable(); -+ } -+ -+ if (ctx->program->normalisation_level < VSIR_FULLY_NORMALISED_IO) -+ { -+ /* Indices are [register] or [control point, register]. Both are -+ * allowed to have a relative address. */ -+ unsigned int expected_idx_count = 1 + !!has_control_point; -+ -+ if (reg->idx_count != expected_idx_count) -+ { -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, -+ "Invalid index count %u for a register of type %#x.", -+ reg->idx_count, reg->type); -+ return; -+ } -+ } -+ else -+ { -+ struct signature_element *element; -+ unsigned int expected_idx_count; -+ unsigned int signature_idx; -+ bool is_array = false; -+ -+ /* If the signature element is not an array, indices are -+ * [signature] or [control point, signature]. If the signature -+ * element is an array, indices are [array, signature] or -+ * [control point, array, signature]. In any case `signature' is -+ * not allowed to have a relative address, while the others are. -+ */ -+ if (reg->idx_count < 1) -+ { -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, -+ "Invalid index count %u for a register of type %#x.", -+ reg->idx_count, reg->type); -+ return; -+ } -+ -+ if (reg->idx[reg->idx_count - 1].rel_addr) -+ { -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, -+ "Non-NULL relative address for the signature index of a register of type %#x.", -+ reg->type); -+ return; -+ } -+ -+ signature_idx = reg->idx[reg->idx_count - 1].offset; -+ -+ if (signature_idx >= signature->element_count) -+ { -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, -+ "Signature index %u exceeds the signature size %u in a register of type %#x.", -+ signature_idx, signature->element_count, reg->type); -+ return; -+ } -+ -+ element = &signature->elements[signature_idx]; -+ if (element->register_count > 1 || vsir_sysval_semantic_is_tess_factor(element->sysval_semantic)) -+ is_array = true; -+ -+ expected_idx_count = 1 + !!has_control_point + !!is_array; -+ -+ if (reg->idx_count != expected_idx_count) -+ { -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, -+ "Invalid index count %u for a register of type %#x.", -+ reg->idx_count, reg->type); -+ return; -+ } -+ } -+} -+ -+static void vsir_validate_temp_register(struct validation_context *ctx, -+ const struct vkd3d_shader_register *reg) -+{ -+ struct validation_context_temp_data *data; -+ -+ if (reg->idx_count != 1) -+ { -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, -+ "Invalid index count %u for a TEMP register.", -+ reg->idx_count); -+ return; -+ } -+ -+ if (reg->idx[0].rel_addr) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, -+ "Non-NULL relative address for a TEMP register."); -+ -+ if (reg->idx[0].offset >= ctx->program->temp_count) -+ { -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, -+ "TEMP register index %u exceeds the maximum count %u.", -+ reg->idx[0].offset, ctx->program->temp_count); -+ return; -+ } -+ -+ data = &ctx->temps[reg->idx[0].offset]; -+ -+ if (reg->dimension == VSIR_DIMENSION_NONE) -+ { -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION, -+ "Invalid dimension NONE for a TEMP register."); -+ return; -+ } -+ -+ /* TEMP registers can be scalar or vec4, provided that -+ * each individual register always appears with the same -+ * dimension. */ -+ if (data->dimension == VSIR_DIMENSION_NONE) -+ { -+ data->dimension = reg->dimension; -+ data->first_seen = ctx->instruction_idx; -+ } -+ else if (data->dimension != reg->dimension) -+ { -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION, -+ "Invalid dimension %#x for a TEMP register: " -+ "it has already been seen with dimension %#x at instruction %zu.", -+ reg->dimension, data->dimension, data->first_seen); -+ } -+} -+ -+static void vsir_validate_rastout_register(struct validation_context *ctx, -+ const struct vkd3d_shader_register *reg) -+{ -+ if (reg->idx_count != 1) -+ { -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, -+ "Invalid index count %u for a RASTOUT register.", -+ reg->idx_count); -+ return; -+ } -+ -+ if (reg->idx[0].rel_addr) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, -+ "Non-NULL relative address for a RASTOUT register."); -+ -+ if (reg->idx[0].offset >= 3) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, -+ "Invalid offset for a RASTOUT register."); -+} -+ -+static void vsir_validate_misctype_register(struct validation_context *ctx, -+ const struct vkd3d_shader_register *reg) -+{ -+ if (reg->idx_count != 1) -+ { -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, -+ "Invalid index count %u for a MISCTYPE register.", -+ reg->idx_count); -+ return; -+ } -+ -+ if (reg->idx[0].rel_addr) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, -+ "Non-NULL relative address for a MISCTYPE register."); -+ -+ if (reg->idx[0].offset >= 2) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, -+ "Invalid offset for a MISCTYPE register."); -+} -+ -+static void vsir_validate_label_register(struct validation_context *ctx, -+ const struct vkd3d_shader_register *reg) -+{ -+ if (reg->precision != VKD3D_SHADER_REGISTER_PRECISION_DEFAULT) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_PRECISION, -+ "Invalid precision %#x for a LABEL register.", reg->precision); -+ -+ if (reg->data_type != VKD3D_DATA_UNUSED) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, -+ "Invalid data type %#x for a LABEL register.", reg->data_type); -+ -+ if (reg->dimension != VSIR_DIMENSION_NONE) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION, -+ "Invalid dimension %#x for a LABEL register.", reg->dimension); -+ -+ if (reg->idx_count != 1) -+ { -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, -+ "Invalid index count %u for a LABEL register.", reg->idx_count); -+ return; -+ } -+ -+ if (reg->idx[0].rel_addr) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, -+ "Non-NULL relative address for a LABEL register."); -+ -+ /* Index == 0 is invalid, but it is temporarily allowed -+ * for intermediate stages. Once we support validation -+ * dialects we can selectively check for that. */ -+ if (reg->idx[0].offset > ctx->program->block_count) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, -+ "LABEL register index %u exceeds the maximum count %u.", -+ reg->idx[0].offset, ctx->program->block_count); -+} -+ -+static void vsir_validate_sampler_register(struct validation_context *ctx, -+ const struct vkd3d_shader_register *reg) -+{ -+ if (reg->precision != VKD3D_SHADER_REGISTER_PRECISION_DEFAULT) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_PRECISION, -+ "Invalid precision %#x for a SAMPLER register.", reg->precision); -+ -+ if (reg->data_type != VKD3D_DATA_UNUSED) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, -+ "Invalid data type %#x for a SAMPLER register.", reg->data_type); -+ -+ /* VEC4 is allowed in gather operations. */ -+ if (reg->dimension == VSIR_DIMENSION_SCALAR) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION, -+ "Invalid dimension SCALAR for a SAMPLER register."); -+ -+ if (reg->idx_count != 2) -+ { -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, -+ "Invalid index count %u for a SAMPLER register.", reg->idx_count); -+ return; -+ } -+ -+ if (reg->idx[0].rel_addr) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, -+ "Non-NULL relative address for the descriptor index of a SAMPLER register."); -+} -+ -+static void vsir_validate_resource_register(struct validation_context *ctx, -+ const struct vkd3d_shader_register *reg) -+{ -+ if (reg->precision != VKD3D_SHADER_REGISTER_PRECISION_DEFAULT) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_PRECISION, -+ "Invalid precision %#x for a RESOURCE register.", reg->precision); -+ -+ if (reg->data_type != VKD3D_DATA_UNUSED) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, -+ "Invalid data type %#x for a RESOURCE register.", reg->data_type); -+ -+ if (reg->dimension != VSIR_DIMENSION_VEC4) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION, -+ "Invalid dimension %#x for a RESOURCE register.", reg->dimension); -+ -+ if (reg->idx_count != 2) -+ { -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, -+ "Invalid index count %u for a RESOURCE register.", reg->idx_count); -+ return; -+ } -+ -+ if (reg->idx[0].rel_addr) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, -+ "Non-NULL relative address for the descriptor index of a RESOURCE register."); -+} -+ -+static void vsir_validate_uav_register(struct validation_context *ctx, -+ const struct vkd3d_shader_register *reg) -+{ -+ if (reg->precision != VKD3D_SHADER_REGISTER_PRECISION_DEFAULT) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_PRECISION, -+ "Invalid precision %#x for a UAV register.", -+ reg->precision); -+ -+ if (reg->data_type != VKD3D_DATA_UNUSED) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, -+ "Invalid data type %#x for a UAV register.", -+ reg->data_type); -+ -+ /* NONE is allowed in counter operations. */ -+ if (reg->dimension == VSIR_DIMENSION_SCALAR) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION, -+ "Invalid dimension %#x for a UAV register.", -+ reg->dimension); -+ -+ if (reg->idx_count != 2) -+ { -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, -+ "Invalid index count %u for a UAV register.", -+ reg->idx_count); -+ return; -+ } -+ -+ if (reg->idx[0].rel_addr) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, -+ "Non-NULL relative address for the descriptor index of a UAV register."); -+} -+ -+static void vsir_validate_ssa_register(struct validation_context *ctx, -+ const struct vkd3d_shader_register *reg) -+{ -+ struct validation_context_ssa_data *data; -+ -+ if (reg->idx_count != 1) -+ { -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, -+ "Invalid index count %u for a SSA register.", -+ reg->idx_count); -+ return; -+ } -+ -+ if (reg->idx[0].rel_addr) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, -+ "Non-NULL relative address for a SSA register."); -+ -+ if (reg->idx[0].offset >= ctx->program->ssa_count) -+ { -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, -+ "SSA register index %u exceeds the maximum count %u.", -+ reg->idx[0].offset, ctx->program->ssa_count); -+ return; -+ } -+ -+ data = &ctx->ssas[reg->idx[0].offset]; -+ -+ if (reg->dimension == VSIR_DIMENSION_NONE) -+ { -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION, -+ "Invalid dimension NONE for a SSA register."); -+ return; -+ } -+ -+ /* SSA registers can be scalar or vec4, provided that each -+ * individual register always appears with the same -+ * dimension. */ -+ if (data->dimension == VSIR_DIMENSION_NONE) -+ { -+ data->dimension = reg->dimension; -+ data->data_type = reg->data_type; -+ data->first_seen = ctx->instruction_idx; -+ } -+ else -+ { -+ if (data->dimension != reg->dimension) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION, -+ "Invalid dimension %#x for a SSA register: " -+ "it has already been seen with dimension %#x at instruction %zu.", -+ reg->dimension, data->dimension, data->first_seen); -+ -+ if (data_type_is_64_bit(data->data_type) != data_type_is_64_bit(reg->data_type)) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, -+ "Invalid data type %#x for a SSA register: " -+ "it has already been seen with data type %#x at instruction %zu.", -+ reg->data_type, data->data_type, data->first_seen); -+ } -+} -+ -+static void vsir_validate_src_param(struct validation_context *ctx, -+ const struct vkd3d_shader_src_param *src); -+ -+static void vsir_validate_register(struct validation_context *ctx, -+ const struct vkd3d_shader_register *reg) -+{ -+ unsigned int i; -+ -+ if (reg->type >= VKD3DSPR_COUNT) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, "Invalid register type %#x.", -+ reg->type); -+ -+ if (reg->precision >= VKD3D_SHADER_REGISTER_PRECISION_COUNT) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_PRECISION, "Invalid register precision %#x.", -+ reg->precision); -+ -+ if (reg->data_type >= VKD3D_DATA_COUNT) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, "Invalid register data type %#x.", -+ reg->data_type); -+ -+ if (reg->dimension >= VSIR_DIMENSION_COUNT) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION, "Invalid register dimension %#x.", -+ reg->dimension); -+ -+ if (reg->idx_count > ARRAY_SIZE(reg->idx)) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, "Invalid register index count %u.", -+ reg->idx_count); -+ -+ for (i = 0; i < min(reg->idx_count, ARRAY_SIZE(reg->idx)); ++i) -+ { -+ const struct vkd3d_shader_src_param *param = reg->idx[i].rel_addr; -+ if (reg->idx[i].rel_addr) -+ vsir_validate_src_param(ctx, param); -+ } -+ -+ switch (reg->type) -+ { -+ case VKD3DSPR_TEMP: -+ vsir_validate_temp_register(ctx, reg); -+ break; -+ -+ case VKD3DSPR_INPUT: -+ vsir_validate_io_register(ctx, reg); -+ break; -+ -+ case VKD3DSPR_RASTOUT: -+ vsir_validate_rastout_register(ctx, reg); -+ break; -+ -+ case VKD3DSPR_OUTPUT: -+ vsir_validate_io_register(ctx, reg); -+ break; -+ -+ case VKD3DSPR_DEPTHOUT: -+ vsir_validate_register_without_indices(ctx, reg); -+ break; -+ -+ case VKD3DSPR_MISCTYPE: -+ vsir_validate_misctype_register(ctx, reg); -+ break; -+ -+ case VKD3DSPR_LABEL: -+ vsir_validate_label_register(ctx, reg); -+ break; -+ -+ case VKD3DSPR_IMMCONST: -+ vsir_validate_register_without_indices(ctx, reg); -+ break; -+ -+ case VKD3DSPR_IMMCONST64: -+ vsir_validate_register_without_indices(ctx, reg); -+ break; -+ -+ case VKD3DSPR_NULL: -+ vsir_validate_register_without_indices(ctx, reg); -+ break; -+ -+ case VKD3DSPR_SAMPLER: -+ vsir_validate_sampler_register(ctx, reg); -+ break; -+ -+ case VKD3DSPR_RESOURCE: -+ vsir_validate_resource_register(ctx, reg); -+ break; -+ -+ case VKD3DSPR_UAV: -+ vsir_validate_uav_register(ctx, reg); -+ break; -+ -+ case VKD3DSPR_INCONTROLPOINT: -+ vsir_validate_io_register(ctx, reg); -+ break; -+ -+ case VKD3DSPR_OUTCONTROLPOINT: -+ vsir_validate_io_register(ctx, reg); -+ break; -+ -+ case VKD3DSPR_PATCHCONST: -+ vsir_validate_io_register(ctx, reg); -+ break; -+ -+ case VKD3DSPR_DEPTHOUTGE: -+ vsir_validate_register_without_indices(ctx, reg); -+ break; -+ -+ case VKD3DSPR_DEPTHOUTLE: -+ vsir_validate_register_without_indices(ctx, reg); -+ break; -+ -+ case VKD3DSPR_SSA: -+ vsir_validate_ssa_register(ctx, reg); -+ break; -+ -+ default: -+ break; -+ } -+} -+ -+static void vsir_validate_dst_param(struct validation_context *ctx, -+ const struct vkd3d_shader_dst_param *dst) -+{ -+ vsir_validate_register(ctx, &dst->reg); -+ -+ if (dst->write_mask & ~VKD3DSP_WRITEMASK_ALL) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_WRITE_MASK, "Destination has invalid write mask %#x.", -+ dst->write_mask); -+ -+ switch (dst->reg.dimension) -+ { -+ case VSIR_DIMENSION_SCALAR: -+ if (dst->write_mask != VKD3DSP_WRITEMASK_0) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_WRITE_MASK, "Scalar destination has invalid write mask %#x.", -+ dst->write_mask); -+ break; -+ -+ case VSIR_DIMENSION_VEC4: -+ if (dst->write_mask == 0) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_WRITE_MASK, "Vec4 destination has empty write mask."); -+ break; -+ -+ default: -+ if (dst->write_mask != 0) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_WRITE_MASK, "Destination of dimension %u has invalid write mask %#x.", -+ dst->reg.dimension, dst->write_mask); -+ break; -+ } -+ -+ if (dst->modifiers & ~VKD3DSPDM_MASK) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_MODIFIERS, "Destination has invalid modifiers %#x.", -+ dst->modifiers); -+ -+ switch (dst->shift) -+ { -+ case 0: -+ case 1: -+ case 2: -+ case 3: - case 13: - case 14: - case 15: - break; - - default: -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SHIFT, "Destination has invalid shift %#x.", -- dst->shift); -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SHIFT, "Destination has invalid shift %#x.", -+ dst->shift); -+ } -+ -+ switch (dst->reg.type) -+ { -+ case VKD3DSPR_SSA: -+ if (dst->reg.idx[0].offset < ctx->program->ssa_count) -+ { -+ struct validation_context_ssa_data *data = &ctx->ssas[dst->reg.idx[0].offset]; -+ -+ if (data->write_mask == 0) -+ { -+ data->write_mask = dst->write_mask; -+ data->first_assigned = ctx->instruction_idx; -+ } -+ else -+ { -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SSA_USAGE, -+ "SSA register is already assigned at instruction %zu.", -+ data->first_assigned); -+ } -+ } -+ break; -+ -+ case VKD3DSPR_IMMCONST: -+ case VKD3DSPR_IMMCONST64: -+ case VKD3DSPR_SAMPLER: -+ case VKD3DSPR_RESOURCE: -+ case VKD3DSPR_INPUT: -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, -+ "Invalid %#x register used as destination parameter.", dst->reg.type); -+ break; -+ -+ case VKD3DSPR_PATCHCONST: -+ if (ctx->program->shader_version.type != VKD3D_SHADER_TYPE_HULL) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, -+ "PATCHCONST register used as destination parameters are only allowed in Hull Shaders."); -+ break; -+ -+ default: -+ break; -+ } -+} -+ -+static void vsir_validate_src_param(struct validation_context *ctx, -+ const struct vkd3d_shader_src_param *src) -+{ -+ vsir_validate_register(ctx, &src->reg); -+ -+ if (src->swizzle & ~0x03030303u) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SWIZZLE, "Source has invalid swizzle %#x.", -+ src->swizzle); -+ -+ if (src->reg.dimension != VSIR_DIMENSION_VEC4 && src->swizzle != 0) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SWIZZLE, "Source of dimension %u has invalid swizzle %#x.", -+ src->reg.dimension, src->swizzle); -+ -+ if (src->modifiers >= VKD3DSPSM_COUNT) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_MODIFIERS, "Source has invalid modifiers %#x.", -+ src->modifiers); -+ -+ switch (src->reg.type) -+ { -+ case VKD3DSPR_SSA: -+ if (src->reg.idx[0].offset < ctx->program->ssa_count) -+ { -+ struct validation_context_ssa_data *data = &ctx->ssas[src->reg.idx[0].offset]; -+ unsigned int i; -+ -+ for (i = 0; i < VKD3D_VEC4_SIZE; ++i) -+ data->read_mask |= (1u << vsir_swizzle_get_component(src->swizzle, i)); -+ } -+ break; -+ -+ case VKD3DSPR_NULL: -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, -+ "Invalid NULL register used as source parameter."); -+ break; -+ -+ case VKD3DSPR_OUTPUT: -+ if (ctx->program->shader_version.type != VKD3D_SHADER_TYPE_HULL -+ || (ctx->phase != VKD3DSIH_HS_FORK_PHASE && ctx->phase != VKD3DSIH_HS_JOIN_PHASE)) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, -+ "Invalid OUTPUT register used as source parameter."); -+ break; -+ -+ case VKD3DSPR_PATCHCONST: -+ if (ctx->program->shader_version.type != VKD3D_SHADER_TYPE_DOMAIN -+ && ctx->program->shader_version.type != VKD3D_SHADER_TYPE_HULL) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, -+ "PATCHCONST register used as source parameters are only allowed in Hull and Domain Shaders."); -+ break; -+ -+ default: -+ break; -+ } -+} -+ -+static void vsir_validate_dst_count(struct validation_context *ctx, -+ const struct vkd3d_shader_instruction *instruction, unsigned int count) -+{ -+ if (instruction->dst_count != count) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DEST_COUNT, -+ "Invalid destination count %u for an instruction of type %#x, expected %u.", -+ instruction->dst_count, instruction->opcode, count); -+} -+ -+static void vsir_validate_src_count(struct validation_context *ctx, -+ const struct vkd3d_shader_instruction *instruction, unsigned int count) -+{ -+ if (instruction->src_count != count) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SOURCE_COUNT, -+ "Invalid source count %u for an instruction of type %#x, expected %u.", -+ instruction->src_count, instruction->opcode, count); -+} -+ -+static bool vsir_validate_src_min_count(struct validation_context *ctx, -+ const struct vkd3d_shader_instruction *instruction, unsigned int count) -+{ -+ if (instruction->src_count < count) -+ { -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SOURCE_COUNT, -+ "Invalid source count %u for an instruction of type %#x, expected at least %u.", -+ instruction->src_count, instruction->opcode, count); -+ return false; -+ } -+ -+ return true; -+} -+ -+static bool vsir_validate_src_max_count(struct validation_context *ctx, -+ const struct vkd3d_shader_instruction *instruction, unsigned int count) -+{ -+ if (instruction->src_count > count) -+ { -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SOURCE_COUNT, -+ "Invalid source count %u for an instruction of type %#x, expected at most %u.", -+ instruction->src_count, instruction->opcode, count); -+ return false; -+ } -+ -+ return true; -+} -+ -+enum vsir_signature_type -+{ -+ SIGNATURE_TYPE_INPUT, -+ SIGNATURE_TYPE_OUTPUT, -+ SIGNATURE_TYPE_PATCH_CONSTANT, -+}; -+ -+static const char * const signature_type_names[] = -+{ -+ [SIGNATURE_TYPE_INPUT] = "input", -+ [SIGNATURE_TYPE_OUTPUT] = "output", -+ [SIGNATURE_TYPE_PATCH_CONSTANT] = "patch constant", -+}; -+ -+#define PS_BIT (1u << VKD3D_SHADER_TYPE_PIXEL) -+#define VS_BIT (1u << VKD3D_SHADER_TYPE_VERTEX) -+#define GS_BIT (1u << VKD3D_SHADER_TYPE_GEOMETRY) -+#define HS_BIT (1u << VKD3D_SHADER_TYPE_HULL) -+#define DS_BIT (1u << VKD3D_SHADER_TYPE_DOMAIN) -+#define CS_BIT (1u << VKD3D_SHADER_TYPE_COMPUTE) -+ -+static const struct sysval_validation_data_element -+{ -+ unsigned int input; -+ unsigned int output; -+ unsigned int patch_constant; -+ enum vkd3d_shader_component_type data_type; -+ unsigned int component_count; -+} -+sysval_validation_data[] = -+{ -+ [VKD3D_SHADER_SV_POSITION] = {PS_BIT | GS_BIT | HS_BIT | DS_BIT, VS_BIT | GS_BIT | HS_BIT | DS_BIT, 0, -+ VKD3D_SHADER_COMPONENT_FLOAT, 4}, -+ [VKD3D_SHADER_SV_CLIP_DISTANCE] = {PS_BIT | GS_BIT | HS_BIT | DS_BIT, PS_BIT | VS_BIT | GS_BIT | HS_BIT | DS_BIT, 0, -+ VKD3D_SHADER_COMPONENT_FLOAT, 4}, -+ [VKD3D_SHADER_SV_CULL_DISTANCE] = {PS_BIT | GS_BIT | HS_BIT | DS_BIT, PS_BIT | VS_BIT | GS_BIT | HS_BIT | DS_BIT, 0, -+ VKD3D_SHADER_COMPONENT_FLOAT, 4}, -+ [VKD3D_SHADER_SV_TESS_FACTOR_QUADEDGE] = {0, 0, HS_BIT | DS_BIT, VKD3D_SHADER_COMPONENT_FLOAT, 1}, -+ [VKD3D_SHADER_SV_TESS_FACTOR_QUADINT] = {0, 0, HS_BIT | DS_BIT, VKD3D_SHADER_COMPONENT_FLOAT, 1}, -+ [VKD3D_SHADER_SV_TESS_FACTOR_TRIEDGE] = {0, 0, HS_BIT | DS_BIT, VKD3D_SHADER_COMPONENT_FLOAT, 1}, -+ [VKD3D_SHADER_SV_TESS_FACTOR_TRIINT] = {0, 0, HS_BIT | DS_BIT, VKD3D_SHADER_COMPONENT_FLOAT, 1}, -+ [VKD3D_SHADER_SV_TESS_FACTOR_LINEDET] = {0, 0, HS_BIT | DS_BIT, VKD3D_SHADER_COMPONENT_FLOAT, 1}, -+ [VKD3D_SHADER_SV_TESS_FACTOR_LINEDEN] = {0, 0, HS_BIT | DS_BIT, VKD3D_SHADER_COMPONENT_FLOAT, 1}, -+}; -+ -+static void vsir_validate_signature_element(struct validation_context *ctx, -+ const struct shader_signature *signature, enum vsir_signature_type signature_type, -+ unsigned int idx) -+{ -+ enum vkd3d_tessellator_domain expected_tess_domain = VKD3D_TESSELLATOR_DOMAIN_INVALID; -+ const char *signature_type_name = signature_type_names[signature_type]; -+ const struct signature_element *element = &signature->elements[idx]; -+ bool integer_type = false, is_outer = false; -+ unsigned int semantic_index_max = 0; -+ -+ if (element->register_count == 0) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, -+ "element %u of %s signature: Invalid zero register count.", idx, signature_type_name); -+ -+ if (element->mask == 0 || (element->mask & ~0xf)) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, -+ "element %u of %s signature: Invalid mask %#x.", idx, signature_type_name, element->mask); -+ -+ if (!vkd3d_bitmask_is_contiguous(element->mask)) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, -+ "element %u of %s signature: Non-contiguous mask %#x.", -+ idx, signature_type_name, element->mask); -+ -+ /* Here we'd likely want to validate that the usage mask is a subset of the -+ * signature mask. Unfortunately the D3DBC parser sometimes violates this. -+ * For example I've seen a shader like this: -+ * ps_3_0 -+ * [...] -+ * dcl_texcoord0 v0 -+ * [...] -+ * texld r2.xyzw, v0.xyzw, s1.xyzw -+ * [...] -+ * -+ * The dcl_textcoord0 instruction secretly has a .xy mask, which is used to -+ * compute the signature mask, but the texld instruction apparently uses all -+ * the components. Of course the last two components are ignored, but -+ * formally they seem to be used. So we end up with a signature element with -+ * mask .xy and usage mask .xyzw. -+ * -+ * The correct fix would probably be to make the D3DBC parser aware of which -+ * components are really used for each instruction, but that would take some -+ * time. */ -+ if (element->used_mask & ~0xf) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, -+ "element %u of %s signature: Invalid usage mask %#x.", -+ idx, signature_type_name, element->used_mask); -+ -+ switch (element->sysval_semantic) -+ { -+ case VKD3D_SHADER_SV_NONE: -+ case VKD3D_SHADER_SV_POSITION: -+ case VKD3D_SHADER_SV_CLIP_DISTANCE: -+ case VKD3D_SHADER_SV_CULL_DISTANCE: -+ case VKD3D_SHADER_SV_RENDER_TARGET_ARRAY_INDEX: -+ case VKD3D_SHADER_SV_VIEWPORT_ARRAY_INDEX: -+ case VKD3D_SHADER_SV_VERTEX_ID: -+ case VKD3D_SHADER_SV_PRIMITIVE_ID: -+ case VKD3D_SHADER_SV_INSTANCE_ID: -+ case VKD3D_SHADER_SV_IS_FRONT_FACE: -+ case VKD3D_SHADER_SV_SAMPLE_INDEX: -+ case VKD3D_SHADER_SV_TARGET: -+ case VKD3D_SHADER_SV_DEPTH: -+ case VKD3D_SHADER_SV_COVERAGE: -+ case VKD3D_SHADER_SV_DEPTH_GREATER_EQUAL: -+ case VKD3D_SHADER_SV_DEPTH_LESS_EQUAL: -+ case VKD3D_SHADER_SV_STENCIL_REF: -+ break; -+ -+ case VKD3D_SHADER_SV_TESS_FACTOR_QUADEDGE: -+ expected_tess_domain = VKD3D_TESSELLATOR_DOMAIN_QUAD; -+ semantic_index_max = 4; -+ is_outer = true; -+ break; -+ -+ case VKD3D_SHADER_SV_TESS_FACTOR_QUADINT: -+ expected_tess_domain = VKD3D_TESSELLATOR_DOMAIN_QUAD; -+ semantic_index_max = 2; -+ is_outer = false; -+ break; -+ -+ case VKD3D_SHADER_SV_TESS_FACTOR_TRIEDGE: -+ expected_tess_domain = VKD3D_TESSELLATOR_DOMAIN_TRIANGLE; -+ semantic_index_max = 3; -+ is_outer = true; -+ break; -+ -+ case VKD3D_SHADER_SV_TESS_FACTOR_TRIINT: -+ expected_tess_domain = VKD3D_TESSELLATOR_DOMAIN_TRIANGLE; -+ semantic_index_max = 1; -+ is_outer = false; -+ break; -+ -+ case VKD3D_SHADER_SV_TESS_FACTOR_LINEDET: -+ case VKD3D_SHADER_SV_TESS_FACTOR_LINEDEN: -+ expected_tess_domain = VKD3D_TESSELLATOR_DOMAIN_LINE; -+ semantic_index_max = 2; -+ is_outer = true; -+ break; -+ -+ default: -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, -+ "element %u of %s signature: Invalid system value semantic %#x.", -+ idx, signature_type_name, element->sysval_semantic); -+ break; - } - -- switch (dst->reg.type) -+ if (expected_tess_domain != VKD3D_TESSELLATOR_DOMAIN_INVALID) - { -- case VKD3DSPR_SSA: -- if (dst->reg.idx[0].offset < ctx->program->ssa_count) -+ if (signature_type != SIGNATURE_TYPE_PATCH_CONSTANT) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, -+ "element %u of %s signature: System value semantic %#x is only valid " -+ "in the patch constant signature.", -+ idx, signature_type_name, element->sysval_semantic); -+ -+ if (ctx->program->tess_domain != expected_tess_domain) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, -+ "element %u of %s signature: Invalid system value semantic %#x for tessellator domain %#x.", -+ idx, signature_type_name, element->sysval_semantic, ctx->program->tess_domain); -+ -+ if (element->semantic_index >= semantic_index_max) -+ { -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, -+ "element %u of %s signature: Invalid semantic index %u for system value semantic %#x.", -+ idx, signature_type_name, element->semantic_index, element->sysval_semantic); -+ } -+ else -+ { -+ unsigned int *idx_pos = &(is_outer ? ctx->outer_tess_idxs : ctx->inner_tess_idxs)[element->semantic_index]; -+ -+ if (*idx_pos != ~0u) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, -+ "element %u of %s signature: Duplicate semantic index %u for system value semantic %#x.", -+ idx, signature_type_name, element->semantic_index, element->sysval_semantic); -+ else -+ *idx_pos = idx; -+ } -+ } -+ -+ if (element->sysval_semantic < ARRAY_SIZE(sysval_validation_data)) -+ { -+ const struct sysval_validation_data_element *data = &sysval_validation_data[element->sysval_semantic]; -+ -+ if (data->input || data->output || data->patch_constant) -+ { -+ unsigned int mask; -+ -+ switch (signature_type) - { -- struct validation_context_ssa_data *data = &ctx->ssas[dst->reg.idx[0].offset]; -+ case SIGNATURE_TYPE_INPUT: -+ mask = data->input; -+ break; - -- if (data->write_mask == 0) -- { -- data->write_mask = dst->write_mask; -- data->first_assigned = ctx->instruction_idx; -- } -- else -- { -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SSA_USAGE, -- "SSA register is already assigned at instruction %zu.", -- data->first_assigned); -- } -+ case SIGNATURE_TYPE_OUTPUT: -+ mask = data->output; -+ break; -+ -+ case SIGNATURE_TYPE_PATCH_CONSTANT: -+ mask = data->patch_constant; -+ break; -+ -+ default: -+ vkd3d_unreachable(); - } -- break; - -- case VKD3DSPR_IMMCONST: -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, -- "Invalid IMMCONST register used as destination parameter."); -+ if (!(mask & (1u << ctx->program->shader_version.type))) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, -+ "element %u of %s signature: Invalid system value semantic %#x.", -+ idx, signature_type_name, element->sysval_semantic); -+ } -+ -+ if (data->component_count != 0) -+ { -+ if (element->component_type != data->data_type) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, -+ "element %u of %s signature: Invalid data type %#x for system value semantic %#x.", -+ idx, signature_type_name, element->component_type, element->sysval_semantic); -+ -+ if (vsir_write_mask_component_count(element->mask) > data->component_count) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, -+ "element %u of %s signature: Invalid mask %#x for system value semantic %#x.", -+ idx, signature_type_name, element->mask, element->sysval_semantic); -+ } -+ } -+ -+ switch (element->component_type) -+ { -+ case VKD3D_SHADER_COMPONENT_INT: -+ case VKD3D_SHADER_COMPONENT_UINT: -+ integer_type = true; - break; - -- case VKD3DSPR_IMMCONST64: -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, -- "Invalid IMMCONST64 register used as destination parameter."); -+ case VKD3D_SHADER_COMPONENT_FLOAT: - break; - - default: -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, -+ "element %u of %s signature: Invalid component type %#x.", -+ idx, signature_type_name, element->component_type); - break; - } -+ -+ if (element->min_precision >= VKD3D_SHADER_MINIMUM_PRECISION_COUNT) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, -+ "element %u of %s signature: Invalid minimum precision %#x.", -+ idx, signature_type_name, element->min_precision); -+ -+ if (element->interpolation_mode >= VKD3DSIM_COUNT) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, -+ "element %u of %s signature: Invalid interpolation mode %#x.", -+ idx, signature_type_name, element->interpolation_mode); -+ -+ if (integer_type && element->interpolation_mode != VKD3DSIM_NONE -+ && element->interpolation_mode != VKD3DSIM_CONSTANT) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, -+ "element %u of %s signature: Invalid interpolation mode %#x for integer component type.", -+ idx, signature_type_name, element->interpolation_mode); - } - --static void vsir_validate_src_param(struct validation_context *ctx, -- const struct vkd3d_shader_src_param *src) -+static const unsigned int allowed_signature_phases[] = - { -- vsir_validate_register(ctx, &src->reg); -+ [SIGNATURE_TYPE_INPUT] = PS_BIT | VS_BIT | GS_BIT | HS_BIT | DS_BIT, -+ [SIGNATURE_TYPE_OUTPUT] = PS_BIT | VS_BIT | GS_BIT | HS_BIT | DS_BIT, -+ [SIGNATURE_TYPE_PATCH_CONSTANT] = HS_BIT | DS_BIT, -+}; - -- if (src->swizzle & ~0x03030303u) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SWIZZLE, "Source has invalid swizzle %#x.", -- src->swizzle); -+static void vsir_validate_signature(struct validation_context *ctx, -+ const struct shader_signature *signature, enum vsir_signature_type signature_type) -+{ -+ unsigned int i; - -- if (src->reg.dimension != VSIR_DIMENSION_VEC4 && src->swizzle != 0) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SWIZZLE, "Source of dimension %u has invalid swizzle %#x.", -- src->reg.dimension, src->swizzle); -+ if (signature->element_count != 0 && !(allowed_signature_phases[signature_type] -+ & (1u << ctx->program->shader_version.type))) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, -+ "Unexpected %s signature.", signature_type_names[signature_type]); - -- if (src->modifiers >= VKD3DSPSM_COUNT) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_MODIFIERS, "Source has invalid modifiers %#x.", -- src->modifiers); -+ for (i = 0; i < signature->element_count; ++i) -+ vsir_validate_signature_element(ctx, signature, signature_type, i); -+} - -- switch (src->reg.type) -+static const char *name_from_cf_type(enum vsir_control_flow_type type) -+{ -+ switch (type) - { -- case VKD3DSPR_SSA: -- if (src->reg.idx[0].offset < ctx->program->ssa_count) -- { -- struct validation_context_ssa_data *data = &ctx->ssas[src->reg.idx[0].offset]; -- unsigned int i; -+ case VSIR_CF_STRUCTURED: -+ return "structured"; -+ case VSIR_CF_BLOCKS: -+ return "block-based"; -+ default: -+ vkd3d_unreachable(); -+ } -+} -+ -+static void vsir_validate_cf_type(struct validation_context *ctx, -+ const struct vkd3d_shader_instruction *instruction, enum vsir_control_flow_type expected_type) -+{ -+ if (ctx->program->cf_type != expected_type) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, "Invalid instruction %#x in %s shader.", -+ instruction->opcode, name_from_cf_type(ctx->program->cf_type)); -+} -+ -+static void vsir_validator_push_block(struct validation_context *ctx, enum vkd3d_shader_opcode opcode) -+{ -+ if (!vkd3d_array_reserve((void **)&ctx->blocks, &ctx->blocks_capacity, ctx->depth + 1, sizeof(*ctx->blocks))) -+ { -+ ctx->status = VKD3D_ERROR_OUT_OF_MEMORY; -+ return; -+ } -+ ctx->blocks[ctx->depth++] = opcode; -+} -+ -+static void vsir_validate_hull_shader_phase(struct validation_context *ctx, -+ const struct vkd3d_shader_instruction *instruction) -+{ -+ if (ctx->program->shader_version.type != VKD3D_SHADER_TYPE_HULL) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_HANDLER, -+ "Phase instruction %#x is only valid in a hull shader.", -+ instruction->opcode); -+ if (ctx->depth != 0) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, -+ "Phase instruction %#x must appear to top level.", -+ instruction->opcode); -+ ctx->phase = instruction->opcode; -+ ctx->dcl_temps_found = false; -+} -+ -+static void vsir_validate_branch(struct validation_context *ctx, const struct vkd3d_shader_instruction *instruction) -+{ -+ size_t i; -+ -+ vsir_validate_cf_type(ctx, instruction, VSIR_CF_BLOCKS); -+ vsir_validate_dst_count(ctx, instruction, 0); -+ -+ if (!vsir_validate_src_min_count(ctx, instruction, 1)) -+ return; -+ -+ if (vsir_register_is_label(&instruction->src[0].reg)) -+ { -+ /* Unconditional branch: parameters are jump label, -+ * optional merge label, optional continue label. */ -+ vsir_validate_src_max_count(ctx, instruction, 3); -+ -+ for (i = 0; i < instruction->src_count; ++i) -+ { -+ if (!vsir_register_is_label(&instruction->src[i].reg)) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, -+ "Invalid register of type %#x in unconditional BRANCH instruction, expected LABEL.", -+ instruction->src[i].reg.type); -+ } -+ } -+ else -+ { -+ /* Conditional branch: parameters are condition, true -+ * jump label, false jump label, optional merge label, -+ * optional continue label. */ -+ vsir_validate_src_min_count(ctx, instruction, 3); -+ vsir_validate_src_max_count(ctx, instruction, 5); -+ -+ for (i = 1; i < instruction->src_count; ++i) -+ { -+ if (!vsir_register_is_label(&instruction->src[i].reg)) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, -+ "Invalid register of type %#x in conditional BRANCH instruction, expected LABEL.", -+ instruction->src[i].reg.type); -+ } -+ } -+ -+ ctx->inside_block = false; -+} -+ -+static void vsir_validate_dcl_gs_instances(struct validation_context *ctx, -+ const struct vkd3d_shader_instruction *instruction) -+{ -+ if (!instruction->declaration.count || instruction->declaration.count > 32) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_GS, "GS instance count %u is invalid.", -+ instruction->declaration.count); -+} -+ -+static void vsir_validate_dcl_hs_max_tessfactor(struct validation_context *ctx, -+ const struct vkd3d_shader_instruction *instruction) -+{ -+ /* Exclude non-finite values. */ -+ if (!(instruction->declaration.max_tessellation_factor >= 1.0f -+ && instruction->declaration.max_tessellation_factor <= 64.0f)) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION, -+ "Max tessellation factor %f is invalid.", -+ instruction->declaration.max_tessellation_factor); -+} -+ -+static void vsir_validate_dcl_input_primitive(struct validation_context *ctx, -+ const struct vkd3d_shader_instruction *instruction) -+{ -+ if (instruction->declaration.primitive_type.type == VKD3D_PT_UNDEFINED -+ || instruction->declaration.primitive_type.type >= VKD3D_PT_COUNT) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_GS, "GS input primitive %u is invalid.", -+ instruction->declaration.primitive_type.type); -+} -+ -+static void vsir_validate_dcl_output_control_point_count(struct validation_context *ctx, -+ const struct vkd3d_shader_instruction *instruction) -+{ -+ if (!instruction->declaration.count || instruction->declaration.count > 32) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION, -+ "Output control point count %u is invalid.", -+ instruction->declaration.count); -+} -+ -+static void vsir_validate_dcl_output_topology(struct validation_context *ctx, -+ const struct vkd3d_shader_instruction *instruction) -+{ -+ if (instruction->declaration.primitive_type.type == VKD3D_PT_UNDEFINED -+ || instruction->declaration.primitive_type.type >= VKD3D_PT_COUNT) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_GS, "GS output primitive %u is invalid.", -+ instruction->declaration.primitive_type.type); -+} -+ -+static void vsir_validate_dcl_temps(struct validation_context *ctx, -+ const struct vkd3d_shader_instruction *instruction) -+{ -+ if (ctx->dcl_temps_found) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_DUPLICATE_DCL_TEMPS, -+ "Duplicate DCL_TEMPS instruction."); -+ if (instruction->declaration.count > ctx->program->temp_count) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DCL_TEMPS, -+ "Invalid DCL_TEMPS count %u, expected at most %u.", -+ instruction->declaration.count, ctx->program->temp_count); -+ ctx->dcl_temps_found = true; -+} -+ -+static void vsir_validate_dcl_tessellator_domain(struct validation_context *ctx, -+ const struct vkd3d_shader_instruction *instruction) -+{ -+ if (instruction->declaration.tessellator_domain == VKD3D_TESSELLATOR_DOMAIN_INVALID -+ || instruction->declaration.tessellator_domain >= VKD3D_TESSELLATOR_DOMAIN_COUNT) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION, -+ "Tessellator domain %#x is invalid.", instruction->declaration.tessellator_domain); -+ -+ if (instruction->declaration.tessellator_domain != ctx->program->tess_domain) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION, -+ "DCL_TESSELLATOR_DOMAIN argument %#x doesn't match the shader tessellator domain %#x.", -+ instruction->declaration.tessellator_domain, ctx->program->tess_domain); -+} -+ -+static void vsir_validate_dcl_tessellator_output_primitive(struct validation_context *ctx, -+ const struct vkd3d_shader_instruction *instruction) -+{ -+ if (!instruction->declaration.tessellator_output_primitive -+ || instruction->declaration.tessellator_output_primitive -+ > VKD3D_SHADER_TESSELLATOR_OUTPUT_TRIANGLE_CCW) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION, -+ "Tessellator output primitive %#x is invalid.", -+ instruction->declaration.tessellator_output_primitive); -+} -+ -+static void vsir_validate_dcl_tessellator_partitioning(struct validation_context *ctx, -+ const struct vkd3d_shader_instruction *instruction) -+{ -+ if (!instruction->declaration.tessellator_partitioning -+ || instruction->declaration.tessellator_partitioning -+ > VKD3D_SHADER_TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION, -+ "Tessellator partitioning %#x is invalid.", -+ instruction->declaration.tessellator_partitioning); -+} -+ -+static void vsir_validate_dcl_vertices_out(struct validation_context *ctx, -+ const struct vkd3d_shader_instruction *instruction) -+{ -+ if (instruction->declaration.count > 1024) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_GS, "GS output vertex count %u is invalid.", -+ instruction->declaration.count); -+} -+ -+static void vsir_validate_else(struct validation_context *ctx, const struct vkd3d_shader_instruction *instruction) -+{ -+ vsir_validate_cf_type(ctx, instruction, VSIR_CF_STRUCTURED); -+ if (ctx->depth == 0 || ctx->blocks[ctx->depth - 1] != VKD3DSIH_IF) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, -+ "ELSE instruction doesn't terminate IF block."); -+ else -+ ctx->blocks[ctx->depth - 1] = VKD3DSIH_ELSE; -+} -+ -+static void vsir_validate_endif(struct validation_context *ctx, const struct vkd3d_shader_instruction *instruction) -+{ -+ vsir_validate_cf_type(ctx, instruction, VSIR_CF_STRUCTURED); -+ if (ctx->depth == 0 || (ctx->blocks[ctx->depth - 1] != VKD3DSIH_IF -+ && ctx->blocks[ctx->depth - 1] != VKD3DSIH_ELSE)) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, -+ "ENDIF instruction doesn't terminate IF/ELSE block."); -+ else -+ --ctx->depth; -+} -+ -+static void vsir_validate_endloop(struct validation_context *ctx, const struct vkd3d_shader_instruction *instruction) -+{ -+ vsir_validate_cf_type(ctx, instruction, VSIR_CF_STRUCTURED); -+ if (ctx->depth == 0 || ctx->blocks[ctx->depth - 1] != VKD3DSIH_LOOP) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, -+ "ENDLOOP instruction doesn't terminate LOOP block."); -+ else -+ --ctx->depth; -+} -+ -+static void vsir_validate_endrep(struct validation_context *ctx, const struct vkd3d_shader_instruction *instruction) -+{ -+ vsir_validate_cf_type(ctx, instruction, VSIR_CF_STRUCTURED); -+ if (ctx->depth == 0 || ctx->blocks[ctx->depth - 1] != VKD3DSIH_REP) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, -+ "ENDREP instruction doesn't terminate REP block."); -+ else -+ --ctx->depth; -+} -+ -+static void vsir_validate_endswitch(struct validation_context *ctx, const struct vkd3d_shader_instruction *instruction) -+{ -+ vsir_validate_cf_type(ctx, instruction, VSIR_CF_STRUCTURED); -+ if (ctx->depth == 0 || ctx->blocks[ctx->depth - 1] != VKD3DSIH_SWITCH) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, -+ "ENDSWITCH instruction doesn't terminate SWITCH block."); -+ else -+ --ctx->depth; -+} -+ -+static void vsir_validate_if(struct validation_context *ctx, const struct vkd3d_shader_instruction *instruction) -+{ -+ vsir_validate_cf_type(ctx, instruction, VSIR_CF_STRUCTURED); -+ vsir_validator_push_block(ctx, VKD3DSIH_IF); -+} -+ -+static void vsir_validate_ifc(struct validation_context *ctx, const struct vkd3d_shader_instruction *instruction) -+{ -+ vsir_validate_cf_type(ctx, instruction, VSIR_CF_STRUCTURED); -+ vsir_validator_push_block(ctx, VKD3DSIH_IF); -+} -+ -+static void vsir_validate_label(struct validation_context *ctx, const struct vkd3d_shader_instruction *instruction) -+{ -+ vsir_validate_cf_type(ctx, instruction, VSIR_CF_BLOCKS); -+ if (instruction->src_count >= 1 && !vsir_register_is_label(&instruction->src[0].reg)) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, -+ "Invalid register of type %#x in a LABEL instruction, expected LABEL.", -+ instruction->src[0].reg.type); -+ -+ if (ctx->inside_block) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, -+ "Invalid LABEL instruction inside a block."); -+ ctx->inside_block = true; -+} -+ -+static void vsir_validate_loop(struct validation_context *ctx, const struct vkd3d_shader_instruction *instruction) -+{ -+ vsir_validate_cf_type(ctx, instruction, VSIR_CF_STRUCTURED); -+ vsir_validate_src_count(ctx, instruction, ctx->program->shader_version.major <= 3 ? 2 : 0); -+ vsir_validator_push_block(ctx, VKD3DSIH_LOOP); -+} -+ -+static void vsir_validate_nop(struct validation_context *ctx, const struct vkd3d_shader_instruction *instruction) -+{ -+} -+ -+static void vsir_validate_phi(struct validation_context *ctx, const struct vkd3d_shader_instruction *instruction) -+{ -+ unsigned int i, incoming_count; -+ -+ vsir_validate_cf_type(ctx, instruction, VSIR_CF_BLOCKS); -+ -+ vsir_validate_src_min_count(ctx, instruction, 2); -+ -+ if (instruction->src_count % 2 != 0) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SOURCE_COUNT, -+ "Invalid source count %u for a PHI instruction, it must be an even number.", -+ instruction->src_count); -+ incoming_count = instruction->src_count / 2; -+ -+ for (i = 0; i < incoming_count; ++i) -+ { -+ unsigned int value_idx = 2 * i; -+ unsigned int label_idx = 2 * i + 1; -+ -+ if (!register_is_constant_or_undef(&instruction->src[value_idx].reg) -+ && !register_is_ssa(&instruction->src[value_idx].reg)) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, -+ "Invalid value register for incoming %u of type %#x in PHI instruction, " -+ "expected SSA, IMMCONST or IMMCONST64.", i, instruction->src[value_idx].reg.type); -+ -+ if (instruction->src[value_idx].reg.dimension != VSIR_DIMENSION_SCALAR) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION, -+ "Invalid value dimension %#x for incoming %u in PHI instruction, expected scalar.", -+ instruction->src[value_idx].reg.dimension, i); -+ -+ if (!vsir_register_is_label(&instruction->src[label_idx].reg)) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, -+ "Invalid label register for case %u of type %#x in PHI instruction, " -+ "expected LABEL.", i, instruction->src[value_idx].reg.type); -+ } -+ -+ if (instruction->dst_count < 1) -+ return; -+ -+ if (!register_is_ssa(&instruction->dst[0].reg)) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, -+ "Invalid destination of type %#x in PHI instruction, expected SSA.", -+ instruction->dst[0].reg.type); -+ -+ if (instruction->dst[0].reg.dimension != VSIR_DIMENSION_SCALAR) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION, -+ "Invalid destination dimension %#x in PHI instruction, expected scalar.", -+ instruction->dst[0].reg.dimension); - -- for (i = 0; i < VKD3D_VEC4_SIZE; ++i) -- data->read_mask |= (1u << vsir_swizzle_get_component(src->swizzle, i)); -- } -- break; -+ if (instruction->dst[0].modifiers != VKD3DSPDM_NONE) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_MODIFIERS, -+ "Invalid modifiers %#x for the destination of a PHI instruction, expected none.", -+ instruction->dst[0].modifiers); - -- default: -- break; -- } -+ if (instruction->dst[0].shift != 0) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SHIFT, -+ "Invalid shift %#x for the destination of a PHI instruction, expected none.", -+ instruction->dst[0].shift); - } - --static void vsir_validate_dst_count(struct validation_context *ctx, -- const struct vkd3d_shader_instruction *instruction, unsigned int count) -+static void vsir_validate_rep(struct validation_context *ctx, const struct vkd3d_shader_instruction *instruction) - { -- if (instruction->dst_count != count) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DEST_COUNT, -- "Invalid destination count %u for an instruction of type %#x, expected %u.", -- instruction->dst_count, instruction->opcode, count); -+ vsir_validate_cf_type(ctx, instruction, VSIR_CF_STRUCTURED); -+ vsir_validator_push_block(ctx, VKD3DSIH_REP); - } - --static void vsir_validate_src_count(struct validation_context *ctx, -- const struct vkd3d_shader_instruction *instruction, unsigned int count) -+static void vsir_validate_ret(struct validation_context *ctx, const struct vkd3d_shader_instruction *instruction) - { -- if (instruction->src_count != count) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SOURCE_COUNT, -- "Invalid source count %u for an instruction of type %#x, expected %u.", -- instruction->src_count, instruction->opcode, count); -+ ctx->inside_block = false; - } - --static bool vsir_validate_src_min_count(struct validation_context *ctx, -- const struct vkd3d_shader_instruction *instruction, unsigned int count) -+static void vsir_validate_switch(struct validation_context *ctx, const struct vkd3d_shader_instruction *instruction) - { -- if (instruction->src_count < count) -- { -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SOURCE_COUNT, -- "Invalid source count %u for an instruction of type %#x, expected at least %u.", -- instruction->src_count, instruction->opcode, count); -- return false; -- } -- -- return true; -+ vsir_validate_cf_type(ctx, instruction, VSIR_CF_STRUCTURED); -+ vsir_validator_push_block(ctx, VKD3DSIH_SWITCH); - } - --static bool vsir_validate_src_max_count(struct validation_context *ctx, -- const struct vkd3d_shader_instruction *instruction, unsigned int count) -+static void vsir_validate_switch_monolithic(struct validation_context *ctx, -+ const struct vkd3d_shader_instruction *instruction) - { -- if (instruction->src_count > count) -- { -+ unsigned int i, case_count; -+ -+ vsir_validate_cf_type(ctx, instruction, VSIR_CF_BLOCKS); -+ -+ /* Parameters are source, default label, merge label and -+ * then pairs of constant value and case label. */ -+ -+ if (!vsir_validate_src_min_count(ctx, instruction, 3)) -+ return; -+ -+ if (instruction->src_count % 2 != 1) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SOURCE_COUNT, -- "Invalid source count %u for an instruction of type %#x, expected at most %u.", -- instruction->src_count, instruction->opcode, count); -- return false; -- } -+ "Invalid source count %u for a monolithic SWITCH instruction, it must be an odd number.", -+ instruction->src_count); - -- return true; --} -+ if (!vsir_register_is_label(&instruction->src[1].reg)) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, -+ "Invalid default label register of type %#x in monolithic SWITCH instruction, expected LABEL.", -+ instruction->src[1].reg.type); - --static const char *name_from_cf_type(enum cf_type type) --{ -- switch (type) -+ if (!vsir_register_is_label(&instruction->src[2].reg)) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, -+ "Invalid merge label register of type %#x in monolithic SWITCH instruction, expected LABEL.", -+ instruction->src[2].reg.type); -+ -+ case_count = (instruction->src_count - 3) / 2; -+ -+ for (i = 0; i < case_count; ++i) - { -- case CF_TYPE_STRUCTURED: -- return "structured"; -- case CF_TYPE_BLOCKS: -- return "block-based"; -- default: -- vkd3d_unreachable(); -+ unsigned int value_idx = 3 + 2 * i; -+ unsigned int label_idx = 3 + 2 * i + 1; -+ -+ if (!register_is_constant(&instruction->src[value_idx].reg)) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, -+ "Invalid value register for case %u of type %#x in monolithic SWITCH instruction, " -+ "expected IMMCONST or IMMCONST64.", i, instruction->src[value_idx].reg.type); -+ -+ if (!vsir_register_is_label(&instruction->src[label_idx].reg)) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, -+ "Invalid label register for case %u of type %#x in monolithic SWITCH instruction, " -+ "expected LABEL.", i, instruction->src[value_idx].reg.type); - } -+ -+ ctx->inside_block = false; - } - --static void vsir_validate_cf_type(struct validation_context *ctx, -- const struct vkd3d_shader_instruction *instruction, enum cf_type expected_type) -+struct vsir_validator_instruction_desc - { -- VKD3D_ASSERT(ctx->cf_type != CF_TYPE_UNKNOWN); -- VKD3D_ASSERT(expected_type != CF_TYPE_UNKNOWN); -- if (ctx->cf_type != expected_type) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, "Invalid instruction %#x in %s shader.", -- instruction->opcode, name_from_cf_type(ctx->cf_type)); --} -+ unsigned int dst_param_count; -+ unsigned int src_param_count; -+ void (*validate)(struct validation_context *ctx, const struct vkd3d_shader_instruction *instruction); -+}; -+ -+static const struct vsir_validator_instruction_desc vsir_validator_instructions[] = -+{ -+ [VKD3DSIH_BRANCH] = {0, ~0u, vsir_validate_branch}, -+ [VKD3DSIH_HS_CONTROL_POINT_PHASE] = {0, 0, vsir_validate_hull_shader_phase}, -+ [VKD3DSIH_HS_DECLS] = {0, 0, vsir_validate_hull_shader_phase}, -+ [VKD3DSIH_HS_FORK_PHASE] = {0, 0, vsir_validate_hull_shader_phase}, -+ [VKD3DSIH_HS_JOIN_PHASE] = {0, 0, vsir_validate_hull_shader_phase}, -+ [VKD3DSIH_DCL_GS_INSTANCES] = {0, 0, vsir_validate_dcl_gs_instances}, -+ [VKD3DSIH_DCL_HS_MAX_TESSFACTOR] = {0, 0, vsir_validate_dcl_hs_max_tessfactor}, -+ [VKD3DSIH_DCL_INPUT_PRIMITIVE] = {0, 0, vsir_validate_dcl_input_primitive}, -+ [VKD3DSIH_DCL_OUTPUT_CONTROL_POINT_COUNT] = {0, 0, vsir_validate_dcl_output_control_point_count}, -+ [VKD3DSIH_DCL_OUTPUT_TOPOLOGY] = {0, 0, vsir_validate_dcl_output_topology}, -+ [VKD3DSIH_DCL_TEMPS] = {0, 0, vsir_validate_dcl_temps}, -+ [VKD3DSIH_DCL_TESSELLATOR_DOMAIN] = {0, 0, vsir_validate_dcl_tessellator_domain}, -+ [VKD3DSIH_DCL_TESSELLATOR_OUTPUT_PRIMITIVE] = {0, 0, vsir_validate_dcl_tessellator_output_primitive}, -+ [VKD3DSIH_DCL_TESSELLATOR_PARTITIONING] = {0, 0, vsir_validate_dcl_tessellator_partitioning}, -+ [VKD3DSIH_DCL_VERTICES_OUT] = {0, 0, vsir_validate_dcl_vertices_out}, -+ [VKD3DSIH_ELSE] = {0, 0, vsir_validate_else}, -+ [VKD3DSIH_ENDIF] = {0, 0, vsir_validate_endif}, -+ [VKD3DSIH_ENDLOOP] = {0, 0, vsir_validate_endloop}, -+ [VKD3DSIH_ENDREP] = {0, 0, vsir_validate_endrep}, -+ [VKD3DSIH_ENDSWITCH] = {0, 0, vsir_validate_endswitch}, -+ [VKD3DSIH_IF] = {0, 1, vsir_validate_if}, -+ [VKD3DSIH_IFC] = {0, 2, vsir_validate_ifc}, -+ [VKD3DSIH_LABEL] = {0, 1, vsir_validate_label}, -+ [VKD3DSIH_LOOP] = {0, ~0u, vsir_validate_loop}, -+ [VKD3DSIH_NOP] = {0, 0, vsir_validate_nop}, -+ [VKD3DSIH_PHI] = {1, ~0u, vsir_validate_phi}, -+ [VKD3DSIH_REP] = {0, 1, vsir_validate_rep}, -+ [VKD3DSIH_RET] = {0, 0, vsir_validate_ret}, -+ [VKD3DSIH_SWITCH] = {0, 1, vsir_validate_switch}, -+ [VKD3DSIH_SWITCH_MONOLITHIC] = {0, ~0u, vsir_validate_switch_monolithic}, -+}; - - static void vsir_validate_instruction(struct validation_context *ctx) - { -@@ -6148,136 +8176,40 @@ static void vsir_validate_instruction(struct validation_context *ctx) - instruction->opcode); - } - -- switch (instruction->opcode) -+ if (version->type == VKD3D_SHADER_TYPE_HULL && ctx->phase == VKD3DSIH_INVALID) - { -- case VKD3DSIH_HS_DECLS: -- case VKD3DSIH_HS_CONTROL_POINT_PHASE: -- case VKD3DSIH_HS_FORK_PHASE: -- case VKD3DSIH_HS_JOIN_PHASE: -- vsir_validate_dst_count(ctx, instruction, 0); -- vsir_validate_src_count(ctx, instruction, 0); -- if (version->type != VKD3D_SHADER_TYPE_HULL) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_HANDLER, -- "Phase instruction %#x is only valid in a hull shader.", -- instruction->opcode); -- if (ctx->depth != 0) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, -- "Phase instruction %#x must appear to top level.", -- instruction->opcode); -- ctx->phase = instruction->opcode; -- ctx->dcl_temps_found = false; -- return; -- -- case VKD3DSIH_DCL_HS_MAX_TESSFACTOR: -- /* Exclude non-finite values. */ -- if (!(instruction->declaration.max_tessellation_factor >= 1.0f -- && instruction->declaration.max_tessellation_factor <= 64.0f)) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION, "Max tessellation factor %f is invalid.", -- instruction->declaration.max_tessellation_factor); -- return; -- -- case VKD3DSIH_DCL_INPUT_PRIMITIVE: -- if (instruction->declaration.primitive_type.type == VKD3D_PT_UNDEFINED -- || instruction->declaration.primitive_type.type >= VKD3D_PT_COUNT) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_GS, "GS input primitive %u is invalid.", -- instruction->declaration.primitive_type.type); -- return; -- -- case VKD3DSIH_DCL_VERTICES_OUT: -- if (instruction->declaration.count > 1024) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_GS, "GS output vertex count %u is invalid.", -- instruction->declaration.count); -- return; -- -- case VKD3DSIH_DCL_OUTPUT_TOPOLOGY: -- if (instruction->declaration.primitive_type.type == VKD3D_PT_UNDEFINED -- || instruction->declaration.primitive_type.type >= VKD3D_PT_COUNT) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_GS, "GS output primitive %u is invalid.", -- instruction->declaration.primitive_type.type); -- return; -- -- case VKD3DSIH_DCL_GS_INSTANCES: -- if (!instruction->declaration.count || instruction->declaration.count > 32) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_GS, "GS instance count %u is invalid.", -- instruction->declaration.count); -- return; -- -- case VKD3DSIH_DCL_OUTPUT_CONTROL_POINT_COUNT: -- if (!instruction->declaration.count || instruction->declaration.count > 32) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION, "Output control point count %u is invalid.", -- instruction->declaration.count); -- return; -- -- case VKD3DSIH_DCL_TESSELLATOR_DOMAIN: -- if (instruction->declaration.tessellator_domain == VKD3D_TESSELLATOR_DOMAIN_INVALID -- || instruction->declaration.tessellator_domain >= VKD3D_TESSELLATOR_DOMAIN_COUNT) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION, -- "Tessellator domain %#x is invalid.", instruction->declaration.tessellator_domain); -- return; -- -- case VKD3DSIH_DCL_TESSELLATOR_OUTPUT_PRIMITIVE: -- if (!instruction->declaration.tessellator_output_primitive -- || instruction->declaration.tessellator_output_primitive > VKD3D_SHADER_TESSELLATOR_OUTPUT_TRIANGLE_CCW) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION, -- "Tessellator output primitive %#x is invalid.", instruction->declaration.tessellator_output_primitive); -- return; -- -- case VKD3DSIH_DCL_TESSELLATOR_PARTITIONING: -- if (!instruction->declaration.tessellator_partitioning -- || instruction->declaration.tessellator_partitioning > VKD3D_SHADER_TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION, -- "Tessellator partitioning %#x is invalid.", instruction->declaration.tessellator_partitioning); -- return; -- -- default: -- break; -- } -- -- /* Only DCL instructions may occur outside hull shader phases. */ -- if (!vsir_instruction_is_dcl(instruction) && version->type == VKD3D_SHADER_TYPE_HULL -- && ctx->phase == VKD3DSIH_INVALID) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_HANDLER, -- "Instruction %#x appear before any phase instruction in a hull shader.", -- instruction->opcode); -+ switch (instruction->opcode) -+ { -+ case VKD3DSIH_NOP: -+ case VKD3DSIH_HS_DECLS: -+ case VKD3DSIH_HS_CONTROL_POINT_PHASE: -+ case VKD3DSIH_HS_FORK_PHASE: -+ case VKD3DSIH_HS_JOIN_PHASE: -+ break; - -- /* We support two different control flow types in shaders: -- * block-based, like DXIL and SPIR-V, and structured, like D3DBC -- * and TPF. The shader is detected as block-based when its first -- * instruction, except for DCL_* and phases, is a LABEL. Currently -- * we mandate that each shader is either purely block-based or -- * purely structured. In principle we could allow structured -- * constructs in a block, provided they are confined in a single -- * block, but need for that hasn't arisen yet, so we don't. */ -- if (ctx->cf_type == CF_TYPE_UNKNOWN && !vsir_instruction_is_dcl(instruction)) -- { -- if (instruction->opcode == VKD3DSIH_LABEL) -- ctx->cf_type = CF_TYPE_BLOCKS; -- else -- ctx->cf_type = CF_TYPE_STRUCTURED; -+ default: -+ if (!vsir_instruction_is_dcl(instruction)) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_HANDLER, -+ "Instruction %#x appear before any phase instruction in a hull shader.", -+ instruction->opcode); -+ break; -+ } - } - -- if (ctx->cf_type == CF_TYPE_BLOCKS && !vsir_instruction_is_dcl(instruction)) -+ if (ctx->program->cf_type == VSIR_CF_BLOCKS && !ctx->inside_block) - { - switch (instruction->opcode) - { -+ case VKD3DSIH_NOP: - case VKD3DSIH_LABEL: -- if (ctx->inside_block) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, "Invalid LABEL instruction inside a block."); -- ctx->inside_block = true; -- break; -- -- case VKD3DSIH_RET: -- case VKD3DSIH_BRANCH: -- case VKD3DSIH_SWITCH_MONOLITHIC: -- if (!ctx->inside_block) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, -- "Invalid instruction %#x outside any block.", -- instruction->opcode); -- ctx->inside_block = false; -+ case VKD3DSIH_HS_DECLS: -+ case VKD3DSIH_HS_CONTROL_POINT_PHASE: -+ case VKD3DSIH_HS_FORK_PHASE: -+ case VKD3DSIH_HS_JOIN_PHASE: - break; - - default: -- if (!ctx->inside_block) -+ if (!vsir_instruction_is_dcl(instruction)) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, - "Invalid instruction %#x outside any block.", - instruction->opcode); -@@ -6285,271 +8217,20 @@ static void vsir_validate_instruction(struct validation_context *ctx) - } - } - -- switch (instruction->opcode) -+ if (instruction->opcode < ARRAY_SIZE(vsir_validator_instructions)) - { -- case VKD3DSIH_DCL_TEMPS: -- vsir_validate_dst_count(ctx, instruction, 0); -- vsir_validate_src_count(ctx, instruction, 0); -- if (ctx->dcl_temps_found) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_DUPLICATE_DCL_TEMPS, "Duplicate DCL_TEMPS instruction."); -- if (instruction->declaration.count > ctx->program->temp_count) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DCL_TEMPS, -- "Invalid DCL_TEMPS count %u, expected at most %u.", -- instruction->declaration.count, ctx->program->temp_count); -- ctx->dcl_temps_found = true; -- break; -- -- case VKD3DSIH_IF: -- vsir_validate_cf_type(ctx, instruction, CF_TYPE_STRUCTURED); -- vsir_validate_dst_count(ctx, instruction, 0); -- vsir_validate_src_count(ctx, instruction, 1); -- if (!vkd3d_array_reserve((void **)&ctx->blocks, &ctx->blocks_capacity, ctx->depth + 1, sizeof(*ctx->blocks))) -- return; -- ctx->blocks[ctx->depth++] = instruction->opcode; -- break; -- -- case VKD3DSIH_IFC: -- vsir_validate_cf_type(ctx, instruction, CF_TYPE_STRUCTURED); -- vsir_validate_dst_count(ctx, instruction, 0); -- vsir_validate_src_count(ctx, instruction, 2); -- if (!vkd3d_array_reserve((void **)&ctx->blocks, &ctx->blocks_capacity, ctx->depth + 1, sizeof(*ctx->blocks))) -- return; -- ctx->blocks[ctx->depth++] = VKD3DSIH_IF; -- break; -- -- case VKD3DSIH_ELSE: -- vsir_validate_cf_type(ctx, instruction, CF_TYPE_STRUCTURED); -- vsir_validate_dst_count(ctx, instruction, 0); -- vsir_validate_src_count(ctx, instruction, 0); -- if (ctx->depth == 0 || ctx->blocks[ctx->depth - 1] != VKD3DSIH_IF) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, "ELSE instruction doesn't terminate IF block."); -- else -- ctx->blocks[ctx->depth - 1] = instruction->opcode; -- break; -- -- case VKD3DSIH_ENDIF: -- vsir_validate_cf_type(ctx, instruction, CF_TYPE_STRUCTURED); -- vsir_validate_dst_count(ctx, instruction, 0); -- vsir_validate_src_count(ctx, instruction, 0); -- if (ctx->depth == 0 || (ctx->blocks[ctx->depth - 1] != VKD3DSIH_IF && ctx->blocks[ctx->depth - 1] != VKD3DSIH_ELSE)) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, "ENDIF instruction doesn't terminate IF/ELSE block."); -- else -- --ctx->depth; -- break; -- -- case VKD3DSIH_LOOP: -- vsir_validate_cf_type(ctx, instruction, CF_TYPE_STRUCTURED); -- vsir_validate_dst_count(ctx, instruction, 0); -- vsir_validate_src_count(ctx, instruction, version->major <= 3 ? 2 : 0); -- if (!vkd3d_array_reserve((void **)&ctx->blocks, &ctx->blocks_capacity, ctx->depth + 1, sizeof(*ctx->blocks))) -- return; -- ctx->blocks[ctx->depth++] = instruction->opcode; -- break; -- -- case VKD3DSIH_ENDLOOP: -- vsir_validate_cf_type(ctx, instruction, CF_TYPE_STRUCTURED); -- vsir_validate_dst_count(ctx, instruction, 0); -- vsir_validate_src_count(ctx, instruction, 0); -- if (ctx->depth == 0 || ctx->blocks[ctx->depth - 1] != VKD3DSIH_LOOP) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, "ENDLOOP instruction doesn't terminate LOOP block."); -- else -- --ctx->depth; -- break; -- -- case VKD3DSIH_REP: -- vsir_validate_cf_type(ctx, instruction, CF_TYPE_STRUCTURED); -- vsir_validate_dst_count(ctx, instruction, 0); -- vsir_validate_src_count(ctx, instruction, 1); -- if (!vkd3d_array_reserve((void **)&ctx->blocks, &ctx->blocks_capacity, ctx->depth + 1, sizeof(*ctx->blocks))) -- return; -- ctx->blocks[ctx->depth++] = instruction->opcode; -- break; -- -- case VKD3DSIH_ENDREP: -- vsir_validate_cf_type(ctx, instruction, CF_TYPE_STRUCTURED); -- vsir_validate_dst_count(ctx, instruction, 0); -- vsir_validate_src_count(ctx, instruction, 0); -- if (ctx->depth == 0 || ctx->blocks[ctx->depth - 1] != VKD3DSIH_REP) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, "ENDREP instruction doesn't terminate REP block."); -- else -- --ctx->depth; -- break; -- -- case VKD3DSIH_SWITCH: -- vsir_validate_cf_type(ctx, instruction, CF_TYPE_STRUCTURED); -- vsir_validate_dst_count(ctx, instruction, 0); -- vsir_validate_src_count(ctx, instruction, 1); -- if (!vkd3d_array_reserve((void **)&ctx->blocks, &ctx->blocks_capacity, ctx->depth + 1, sizeof(*ctx->blocks))) -- return; -- ctx->blocks[ctx->depth++] = instruction->opcode; -- break; -- -- case VKD3DSIH_ENDSWITCH: -- vsir_validate_cf_type(ctx, instruction, CF_TYPE_STRUCTURED); -- vsir_validate_dst_count(ctx, instruction, 0); -- vsir_validate_src_count(ctx, instruction, 0); -- if (ctx->depth == 0 || ctx->blocks[ctx->depth - 1] != VKD3DSIH_SWITCH) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, "ENDSWITCH instruction doesn't terminate SWITCH block."); -- else -- --ctx->depth; -- break; -- -- case VKD3DSIH_RET: -- vsir_validate_dst_count(ctx, instruction, 0); -- vsir_validate_src_count(ctx, instruction, 0); -- break; -- -- case VKD3DSIH_LABEL: -- vsir_validate_cf_type(ctx, instruction, CF_TYPE_BLOCKS); -- vsir_validate_dst_count(ctx, instruction, 0); -- vsir_validate_src_count(ctx, instruction, 1); -- if (instruction->src_count >= 1 && !vsir_register_is_label(&instruction->src[0].reg)) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, -- "Invalid register of type %#x in a LABEL instruction, expected LABEL.", -- instruction->src[0].reg.type); -- break; -- -- case VKD3DSIH_BRANCH: -- vsir_validate_cf_type(ctx, instruction, CF_TYPE_BLOCKS); -- vsir_validate_dst_count(ctx, instruction, 0); -- if (!vsir_validate_src_min_count(ctx, instruction, 1)) -- break; -- if (vsir_register_is_label(&instruction->src[0].reg)) -- { -- /* Unconditional branch: parameters are jump label, -- * optional merge label, optional continue label. */ -- vsir_validate_src_max_count(ctx, instruction, 3); -- -- for (i = 0; i < instruction->src_count; ++i) -- { -- if (!vsir_register_is_label(&instruction->src[i].reg)) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, -- "Invalid register of type %#x in unconditional BRANCH instruction, expected LABEL.", -- instruction->src[i].reg.type); -- } -- } -- else -- { -- /* Conditional branch: parameters are condition, true -- * jump label, false jump label, optional merge label, -- * optional continue label. */ -- vsir_validate_src_min_count(ctx, instruction, 3); -- vsir_validate_src_max_count(ctx, instruction, 5); -- -- for (i = 1; i < instruction->src_count; ++i) -- { -- if (!vsir_register_is_label(&instruction->src[i].reg)) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, -- "Invalid register of type %#x in conditional BRANCH instruction, expected LABEL.", -- instruction->src[i].reg.type); -- } -- } -- break; -- -- case VKD3DSIH_SWITCH_MONOLITHIC: -- { -- unsigned int case_count; -+ const struct vsir_validator_instruction_desc *desc; - -- vsir_validate_cf_type(ctx, instruction, CF_TYPE_BLOCKS); -- vsir_validate_dst_count(ctx, instruction, 0); -- /* Parameters are source, default label, merge label and -- * then pairs of constant value and case label. */ -- if (!vsir_validate_src_min_count(ctx, instruction, 3)) -- break; -- if (instruction->src_count % 2 != 1) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SOURCE_COUNT, -- "Invalid source count %u for a monolithic SWITCH instruction, it must be an odd number.", -- instruction->src_count); -- -- if (!vsir_register_is_label(&instruction->src[1].reg)) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, -- "Invalid default label register of type %#x in monolithic SWITCH instruction, expected LABEL.", -- instruction->src[1].reg.type); -- -- if (!vsir_register_is_label(&instruction->src[2].reg)) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, -- "Invalid merge label register of type %#x in monolithic SWITCH instruction, expected LABEL.", -- instruction->src[2].reg.type); -- -- case_count = (instruction->src_count - 3) / 2; -- -- for (i = 0; i < case_count; ++i) -- { -- unsigned int value_idx = 3 + 2 * i; -- unsigned int label_idx = 3 + 2 * i + 1; -- -- if (!register_is_constant(&instruction->src[value_idx].reg)) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, -- "Invalid value register for case %zu of type %#x in monolithic SWITCH instruction, " -- "expected IMMCONST or IMMCONST64.", i, instruction->src[value_idx].reg.type); -- -- if (!vsir_register_is_label(&instruction->src[label_idx].reg)) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, -- "Invalid label register for case %zu of type %#x in monolithic SWITCH instruction, " -- "expected LABEL.", i, instruction->src[value_idx].reg.type); -- } -- break; -- } -+ desc = &vsir_validator_instructions[instruction->opcode]; - -- case VKD3DSIH_PHI: -+ if (desc->validate) - { -- unsigned int incoming_count; -- -- vsir_validate_cf_type(ctx, instruction, CF_TYPE_BLOCKS); -- vsir_validate_dst_count(ctx, instruction, 1); -- vsir_validate_src_min_count(ctx, instruction, 2); -- if (instruction->src_count % 2 != 0) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SOURCE_COUNT, -- "Invalid source count %u for a PHI instruction, it must be an even number.", -- instruction->src_count); -- incoming_count = instruction->src_count / 2; -- -- if (!register_is_ssa(&instruction->dst[0].reg)) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, -- "Invalid destination of type %#x in PHI instruction, expected SSA.", -- instruction->dst[0].reg.type); -- -- if (instruction->dst[0].reg.dimension != VSIR_DIMENSION_SCALAR) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION, -- "Invalid destination dimension %#x in PHI instruction, expected scalar.", -- instruction->dst[0].reg.dimension); -- -- if (instruction->dst[0].modifiers != VKD3DSPDM_NONE) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_MODIFIERS, -- "Invalid modifiers %#x for the destination of a PHI instruction, expected none.", -- instruction->dst[0].modifiers); -- -- if (instruction->dst[0].shift != 0) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SHIFT, -- "Invalid shift %#x for the destination of a PHI instruction, expected none.", -- instruction->dst[0].shift); -- -- for (i = 0; i < incoming_count; ++i) -- { -- unsigned int value_idx = 2 * i; -- unsigned int label_idx = 2 * i + 1; -- -- if (!register_is_constant_or_undef(&instruction->src[value_idx].reg) -- && !register_is_ssa(&instruction->src[value_idx].reg)) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, -- "Invalid value register for incoming %zu of type %#x in PHI instruction, " -- "expected SSA, IMMCONST or IMMCONST64.", i, instruction->src[value_idx].reg.type); -- -- if (instruction->src[value_idx].reg.dimension != VSIR_DIMENSION_SCALAR) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION, -- "Invalid value dimension %#x for incoming %zu in PHI instruction, expected scalar.", -- instruction->src[value_idx].reg.dimension, i); -- -- if (!vsir_register_is_label(&instruction->src[label_idx].reg)) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, -- "Invalid label register for case %zu of type %#x in PHI instruction, " -- "expected LABEL.", i, instruction->src[value_idx].reg.type); -- } -- break; -+ if (desc->dst_param_count != ~0u) -+ vsir_validate_dst_count(ctx, instruction, desc->dst_param_count); -+ if (desc->src_param_count != ~0u) -+ vsir_validate_src_count(ctx, instruction, desc->src_param_count); -+ desc->validate(ctx, instruction); - } -- -- default: -- break; - } - } - -@@ -6563,19 +8244,84 @@ enum vkd3d_result vsir_program_validate(struct vsir_program *program, uint64_t c - .null_location = {.source_name = source_name}, - .status = VKD3D_OK, - .phase = VKD3DSIH_INVALID, -+ .invalid_instruction_idx = true, -+ .outer_tess_idxs[0] = ~0u, -+ .outer_tess_idxs[1] = ~0u, -+ .outer_tess_idxs[2] = ~0u, -+ .outer_tess_idxs[3] = ~0u, -+ .inner_tess_idxs[0] = ~0u, -+ .inner_tess_idxs[1] = ~0u, - }; - unsigned int i; - - if (!(config_flags & VKD3D_SHADER_CONFIG_FLAG_FORCE_VALIDATION)) - return VKD3D_OK; - -+ switch (program->shader_version.type) -+ { -+ case VKD3D_SHADER_TYPE_HULL: -+ case VKD3D_SHADER_TYPE_DOMAIN: -+ if (program->tess_domain == VKD3D_TESSELLATOR_DOMAIN_INVALID -+ || program->tess_domain >= VKD3D_TESSELLATOR_DOMAIN_COUNT) -+ validator_error(&ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION, -+ "Invalid tessellation domain %#x.", program->tess_domain); -+ break; -+ -+ default: -+ if (program->patch_constant_signature.element_count != 0) -+ validator_error(&ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, -+ "Patch constant signature is only valid for hull and domain shaders."); -+ -+ if (program->tess_domain != VKD3D_TESSELLATOR_DOMAIN_INVALID) -+ validator_error(&ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION, -+ "Invalid tessellation domain %#x.", program->tess_domain); -+ } -+ -+ switch (program->shader_version.type) -+ { -+ case VKD3D_SHADER_TYPE_DOMAIN: -+ break; -+ -+ case VKD3D_SHADER_TYPE_HULL: -+ case VKD3D_SHADER_TYPE_GEOMETRY: -+ if (program->input_control_point_count == 0) -+ validator_error(&ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, -+ "Invalid zero input control point count."); -+ break; -+ -+ default: -+ if (program->input_control_point_count != 0) -+ validator_error(&ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, -+ "Invalid input control point count %u.", -+ program->input_control_point_count); -+ } -+ -+ switch (program->shader_version.type) -+ { -+ case VKD3D_SHADER_TYPE_HULL: -+ break; -+ -+ default: -+ if (program->output_control_point_count != 0) -+ validator_error(&ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, -+ "Invalid output control point count %u.", -+ program->output_control_point_count); -+ } -+ -+ vsir_validate_signature(&ctx, &program->input_signature, SIGNATURE_TYPE_INPUT); -+ vsir_validate_signature(&ctx, &program->output_signature, SIGNATURE_TYPE_OUTPUT); -+ vsir_validate_signature(&ctx, &program->patch_constant_signature, SIGNATURE_TYPE_PATCH_CONSTANT); -+ - if (!(ctx.temps = vkd3d_calloc(ctx.program->temp_count, sizeof(*ctx.temps)))) - goto fail; - - if (!(ctx.ssas = vkd3d_calloc(ctx.program->ssa_count, sizeof(*ctx.ssas)))) - goto fail; - -- for (ctx.instruction_idx = 0; ctx.instruction_idx < program->instructions.count; ++ctx.instruction_idx) -+ ctx.invalid_instruction_idx = false; -+ -+ for (ctx.instruction_idx = 0; ctx.instruction_idx < program->instructions.count -+ && ctx.status != VKD3D_ERROR_OUT_OF_MEMORY; ++ctx.instruction_idx) - vsir_validate_instruction(&ctx); - - ctx.invalid_instruction_idx = true; -@@ -6610,74 +8356,107 @@ fail: - return VKD3D_ERROR_OUT_OF_MEMORY; - } - --enum vkd3d_result vsir_program_normalise(struct vsir_program *program, uint64_t config_flags, -- const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context) -+#define vsir_transform(ctx, step) vsir_transform_(ctx, #step, step) -+static void vsir_transform_( -+ struct vsir_transformation_context *ctx, const char *step_name, -+ enum vkd3d_result (*step)(struct vsir_program *program, struct vsir_transformation_context *ctx)) - { -- enum vkd3d_result result = VKD3D_OK; -+ if (ctx->result < 0) -+ return; - -- if ((result = vsir_program_lower_instructions(program, message_context)) < 0) -- return result; -+ if ((ctx->result = step(ctx->program, ctx)) < 0) -+ { -+ WARN("Transformation \"%s\" failed with result %d.\n", step_name, ctx->result); -+ return; -+ } - -- if (program->shader_version.major >= 6) -+ if ((ctx->result = vsir_program_validate(ctx->program, ctx->config_flags, -+ ctx->compile_info->source_name, ctx->message_context)) < 0) -+ { -+ WARN("Validation failed with result %d after transformation \"%s\".\n", ctx->result, step_name); -+ return; -+ } -+} -+ -+/* Transformations which should happen at parse time, i.e. before scan -+ * information is returned to the user. -+ * -+ * In particular, some passes need to modify the signature, and -+ * vkd3d_shader_scan() should report the modified signature for the given -+ * target. */ -+enum vkd3d_result vsir_program_transform_early(struct vsir_program *program, uint64_t config_flags, -+ const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context) -+{ -+ struct vsir_transformation_context ctx = - { -- if ((result = vsir_program_materialise_phi_ssas_to_temps(program)) < 0) -- return result; -+ .result = VKD3D_OK, -+ .program = program, -+ .config_flags = config_flags, -+ .compile_info = compile_info, -+ .message_context = message_context, -+ }; - -- if ((result = lower_switch_to_if_ladder(program)) < 0) -- return result; -+ /* For vsir_program_ensure_diffuse(). */ -+ if (program->shader_version.major <= 2) -+ vsir_transform(&ctx, vsir_program_add_diffuse_output); - -- if ((result = vsir_program_structurize(program, message_context)) < 0) -- return result; -+ return ctx.result; -+} -+ -+enum vkd3d_result vsir_program_transform(struct vsir_program *program, uint64_t config_flags, -+ const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context) -+{ -+ struct vsir_transformation_context ctx = -+ { -+ .result = VKD3D_OK, -+ .program = program, -+ .config_flags = config_flags, -+ .compile_info = compile_info, -+ .message_context = message_context, -+ }; - -- if ((result = vsir_program_flatten_control_flow_constructs(program, message_context)) < 0) -- return result; -+ vsir_transform(&ctx, vsir_program_lower_instructions); - -- if ((result = vsir_program_materialize_undominated_ssas_to_temps(program, message_context)) < 0) -- return result; -+ if (program->shader_version.major >= 6) -+ { -+ vsir_transform(&ctx, vsir_program_materialise_phi_ssas_to_temps); -+ vsir_transform(&ctx, vsir_program_lower_switch_to_selection_ladder); -+ vsir_transform(&ctx, vsir_program_structurize); -+ vsir_transform(&ctx, vsir_program_flatten_control_flow_constructs); -+ vsir_transform(&ctx, vsir_program_materialize_undominated_ssas_to_temps); - } - else - { -- if (program->shader_version.type != VKD3D_SHADER_TYPE_PIXEL) -- { -- if ((result = vsir_program_remap_output_signature(program, compile_info, message_context)) < 0) -- return result; -- } -- -- if (program->shader_version.type == VKD3D_SHADER_TYPE_HULL) -- { -- if ((result = instruction_array_flatten_hull_shader_phases(&program->instructions)) < 0) -- return result; -+ vsir_transform(&ctx, vsir_program_ensure_ret); - -- if ((result = instruction_array_normalise_hull_shader_control_point_io(&program->instructions, -- &program->input_signature)) < 0) -- return result; -- } -- -- if ((result = vsir_program_normalise_io_registers(program, message_context)) < 0) -- return result; -+ if (program->shader_version.major <= 2) -+ vsir_transform(&ctx, vsir_program_ensure_diffuse); - -- if ((result = instruction_array_normalise_flat_constants(program)) < 0) -- return result; -+ if (program->shader_version.type != VKD3D_SHADER_TYPE_PIXEL) -+ vsir_transform(&ctx, vsir_program_remap_output_signature); - -- remove_dead_code(program); -+ if (program->shader_version.type == VKD3D_SHADER_TYPE_HULL) -+ vsir_transform(&ctx, vsir_program_flatten_hull_shader_phases); - -- if ((result = vsir_program_normalise_combined_samplers(program, message_context)) < 0) -- return result; -+ vsir_transform(&ctx, instruction_array_normalise_hull_shader_control_point_io); -+ vsir_transform(&ctx, vsir_program_normalise_io_registers); -+ vsir_transform(&ctx, vsir_program_normalise_flat_constants); -+ vsir_transform(&ctx, vsir_program_remove_dead_code); - - if (compile_info->target_type != VKD3D_SHADER_TARGET_GLSL -- && (result = vsir_program_flatten_control_flow_constructs(program, message_context)) < 0) -- return result; -+ && compile_info->target_type != VKD3D_SHADER_TARGET_MSL) -+ vsir_transform(&ctx, vsir_program_flatten_control_flow_constructs); - } - -- if ((result = vsir_program_insert_alpha_test(program, message_context)) < 0) -- return result; -+ vsir_transform(&ctx, vsir_program_apply_flat_interpolation); -+ vsir_transform(&ctx, vsir_program_insert_alpha_test); -+ vsir_transform(&ctx, vsir_program_insert_clip_planes); -+ vsir_transform(&ctx, vsir_program_insert_point_size); -+ vsir_transform(&ctx, vsir_program_insert_point_size_clamp); -+ vsir_transform(&ctx, vsir_program_insert_point_coord); - - if (TRACE_ON()) -- vkd3d_shader_trace(program); -- -- if ((result = vsir_program_validate(program, config_flags, -- compile_info->source_name, message_context)) < 0) -- return result; -+ vsir_program_trace(program); - -- return result; -+ return ctx.result; - } -diff --git a/libs/vkd3d/libs/vkd3d-shader/msl.c b/libs/vkd3d/libs/vkd3d-shader/msl.c -new file mode 100644 -index 00000000000..df3edeaa4e6 ---- /dev/null -+++ b/libs/vkd3d/libs/vkd3d-shader/msl.c -@@ -0,0 +1,898 @@ -+/* -+ * Copyright 2024 Feifan He for CodeWeavers -+ * -+ * This library is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU Lesser General Public -+ * License as published by the Free Software Foundation; either -+ * version 2.1 of the License, or (at your option) any later version. -+ * -+ * This library is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * Lesser General Public License for more details. -+ * -+ * You should have received a copy of the GNU Lesser General Public -+ * License along with this library; if not, write to the Free Software -+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA -+ */ -+ -+#include "vkd3d_shader_private.h" -+ -+struct msl_src -+{ -+ struct vkd3d_string_buffer *str; -+}; -+ -+struct msl_dst -+{ -+ const struct vkd3d_shader_dst_param *vsir; -+ struct vkd3d_string_buffer *register_name; -+ struct vkd3d_string_buffer *mask; -+}; -+ -+struct msl_generator -+{ -+ struct vsir_program *program; -+ struct vkd3d_string_buffer_cache string_buffers; -+ struct vkd3d_string_buffer *buffer; -+ struct vkd3d_shader_location location; -+ struct vkd3d_shader_message_context *message_context; -+ unsigned int indent; -+ const char *prefix; -+ bool failed; -+ -+ const struct vkd3d_shader_interface_info *interface_info; -+ const struct vkd3d_shader_scan_descriptor_info1 *descriptor_info; -+}; -+ -+static void VKD3D_PRINTF_FUNC(3, 4) msl_compiler_error(struct msl_generator *gen, -+ enum vkd3d_shader_error error, const char *fmt, ...) -+{ -+ va_list args; -+ -+ va_start(args, fmt); -+ vkd3d_shader_verror(gen->message_context, &gen->location, error, fmt, args); -+ va_end(args); -+ gen->failed = true; -+} -+ -+static const char *msl_get_prefix(enum vkd3d_shader_type type) -+{ -+ switch (type) -+ { -+ case VKD3D_SHADER_TYPE_VERTEX: -+ return "vs"; -+ case VKD3D_SHADER_TYPE_HULL: -+ return "hs"; -+ case VKD3D_SHADER_TYPE_DOMAIN: -+ return "ds"; -+ case VKD3D_SHADER_TYPE_GEOMETRY: -+ return "gs"; -+ case VKD3D_SHADER_TYPE_PIXEL: -+ return "ps"; -+ case VKD3D_SHADER_TYPE_COMPUTE: -+ return "cs"; -+ default: -+ return NULL; -+ } -+} -+ -+static void msl_print_indent(struct vkd3d_string_buffer *buffer, unsigned int indent) -+{ -+ vkd3d_string_buffer_printf(buffer, "%*s", 4 * indent, ""); -+} -+ -+static void msl_print_register_datatype(struct vkd3d_string_buffer *buffer, -+ struct msl_generator *gen, enum vkd3d_data_type data_type) -+{ -+ vkd3d_string_buffer_printf(buffer, "."); -+ switch (data_type) -+ { -+ case VKD3D_DATA_FLOAT: -+ vkd3d_string_buffer_printf(buffer, "f"); -+ break; -+ case VKD3D_DATA_INT: -+ vkd3d_string_buffer_printf(buffer, "i"); -+ break; -+ case VKD3D_DATA_UINT: -+ vkd3d_string_buffer_printf(buffer, "u"); -+ break; -+ default: -+ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, -+ "Internal compiler error: Unhandled register datatype %#x.", data_type); -+ vkd3d_string_buffer_printf(buffer, "<unrecognised register datatype %#x>", data_type); -+ break; -+ } -+} -+ -+static void msl_print_register_name(struct vkd3d_string_buffer *buffer, -+ struct msl_generator *gen, const struct vkd3d_shader_register *reg) -+{ -+ switch (reg->type) -+ { -+ case VKD3DSPR_TEMP: -+ vkd3d_string_buffer_printf(buffer, "r[%u]", reg->idx[0].offset); -+ msl_print_register_datatype(buffer, gen, reg->data_type); -+ break; -+ -+ case VKD3DSPR_INPUT: -+ if (reg->idx_count != 1) -+ { -+ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, -+ "Internal compiler error: Unhandled input register index count %u.", reg->idx_count); -+ vkd3d_string_buffer_printf(buffer, "<unhandled register %#x>", reg->type); -+ break; -+ } -+ if (reg->idx[0].rel_addr) -+ { -+ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, -+ "Internal compiler error: Unhandled input register indirect addressing."); -+ vkd3d_string_buffer_printf(buffer, "<unhandled register %#x>", reg->type); -+ break; -+ } -+ vkd3d_string_buffer_printf(buffer, "v[%u]", reg->idx[0].offset); -+ msl_print_register_datatype(buffer, gen, reg->data_type); -+ break; -+ -+ case VKD3DSPR_OUTPUT: -+ if (reg->idx_count != 1) -+ { -+ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, -+ "Internal compiler error: Unhandled output register index count %u.", reg->idx_count); -+ vkd3d_string_buffer_printf(buffer, "<unhandled register %#x>", reg->type); -+ break; -+ } -+ if (reg->idx[0].rel_addr) -+ { -+ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, -+ "Internal compiler error: Unhandled output register indirect addressing."); -+ vkd3d_string_buffer_printf(buffer, "<unhandled register %#x>", reg->type); -+ break; -+ } -+ vkd3d_string_buffer_printf(buffer, "o[%u]", reg->idx[0].offset); -+ msl_print_register_datatype(buffer, gen, reg->data_type); -+ break; -+ -+ case VKD3DSPR_CONSTBUFFER: -+ if (reg->idx_count != 3) -+ { -+ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, -+ "Internal compiler error: Unhandled constant buffer register index count %u.", reg->idx_count); -+ vkd3d_string_buffer_printf(buffer, "<unhandled register %#x>", reg->type); -+ break; -+ } -+ if (reg->idx[0].rel_addr || reg->idx[2].rel_addr) -+ { -+ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, -+ "Internal compiler error: Unhandled constant buffer register indirect addressing."); -+ vkd3d_string_buffer_printf(buffer, "<unhandled register %#x>", reg->type); -+ break; -+ } -+ vkd3d_string_buffer_printf(buffer, "descriptors.cb_%u[%u]", reg->idx[0].offset, reg->idx[2].offset); -+ msl_print_register_datatype(buffer, gen, reg->data_type); -+ break; -+ -+ default: -+ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, -+ "Internal compiler error: Unhandled register type %#x.", reg->type); -+ vkd3d_string_buffer_printf(buffer, "<unrecognised register %#x>", reg->type); -+ break; -+ } -+} -+ -+static void msl_print_swizzle(struct vkd3d_string_buffer *buffer, uint32_t swizzle, uint32_t mask) -+{ -+ const char swizzle_chars[] = "xyzw"; -+ unsigned int i; -+ -+ vkd3d_string_buffer_printf(buffer, "."); -+ for (i = 0; i < VKD3D_VEC4_SIZE; ++i) -+ { -+ if (mask & (VKD3DSP_WRITEMASK_0 << i)) -+ vkd3d_string_buffer_printf(buffer, "%c", swizzle_chars[vsir_swizzle_get_component(swizzle, i)]); -+ } -+} -+ -+static void msl_print_write_mask(struct vkd3d_string_buffer *buffer, uint32_t write_mask) -+{ -+ vkd3d_string_buffer_printf(buffer, "."); -+ if (write_mask & VKD3DSP_WRITEMASK_0) -+ vkd3d_string_buffer_printf(buffer, "x"); -+ if (write_mask & VKD3DSP_WRITEMASK_1) -+ vkd3d_string_buffer_printf(buffer, "y"); -+ if (write_mask & VKD3DSP_WRITEMASK_2) -+ vkd3d_string_buffer_printf(buffer, "z"); -+ if (write_mask & VKD3DSP_WRITEMASK_3) -+ vkd3d_string_buffer_printf(buffer, "w"); -+} -+ -+static void msl_src_cleanup(struct msl_src *src, struct vkd3d_string_buffer_cache *cache) -+{ -+ vkd3d_string_buffer_release(cache, src->str); -+} -+ -+static void msl_src_init(struct msl_src *msl_src, struct msl_generator *gen, -+ const struct vkd3d_shader_src_param *vsir_src, uint32_t mask) -+{ -+ const struct vkd3d_shader_register *reg = &vsir_src->reg; -+ -+ msl_src->str = vkd3d_string_buffer_get(&gen->string_buffers); -+ -+ if (reg->non_uniform) -+ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, -+ "Internal compiler error: Unhandled 'non-uniform' modifier."); -+ if (vsir_src->modifiers) -+ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, -+ "Internal compiler error: Unhandled source modifier(s) %#x.", vsir_src->modifiers); -+ -+ msl_print_register_name(msl_src->str, gen, reg); -+ if (reg->dimension == VSIR_DIMENSION_VEC4) -+ msl_print_swizzle(msl_src->str, vsir_src->swizzle, mask); -+} -+ -+static void msl_dst_cleanup(struct msl_dst *dst, struct vkd3d_string_buffer_cache *cache) -+{ -+ vkd3d_string_buffer_release(cache, dst->mask); -+ vkd3d_string_buffer_release(cache, dst->register_name); -+} -+ -+static uint32_t msl_dst_init(struct msl_dst *msl_dst, struct msl_generator *gen, -+ const struct vkd3d_shader_instruction *ins, const struct vkd3d_shader_dst_param *vsir_dst) -+{ -+ uint32_t write_mask = vsir_dst->write_mask; -+ -+ if (ins->flags & VKD3DSI_PRECISE_XYZW) -+ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, -+ "Internal compiler error: Unhandled 'precise' modifier."); -+ if (vsir_dst->reg.non_uniform) -+ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, -+ "Internal compiler error: Unhandled 'non-uniform' modifier."); -+ -+ msl_dst->vsir = vsir_dst; -+ msl_dst->register_name = vkd3d_string_buffer_get(&gen->string_buffers); -+ msl_dst->mask = vkd3d_string_buffer_get(&gen->string_buffers); -+ -+ msl_print_register_name(msl_dst->register_name, gen, &vsir_dst->reg); -+ msl_print_write_mask(msl_dst->mask, write_mask); -+ -+ return write_mask; -+} -+ -+static void VKD3D_PRINTF_FUNC(3, 4) msl_print_assignment( -+ struct msl_generator *gen, struct msl_dst *dst, const char *format, ...) -+{ -+ va_list args; -+ -+ if (dst->vsir->shift) -+ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, -+ "Internal compiler error: Unhandled destination shift %#x.", dst->vsir->shift); -+ if (dst->vsir->modifiers) -+ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, -+ "Internal compiler error: Unhandled destination modifier(s) %#x.", dst->vsir->modifiers); -+ -+ msl_print_indent(gen->buffer, gen->indent); -+ vkd3d_string_buffer_printf(gen->buffer, "%s%s = ", dst->register_name->buffer, dst->mask->buffer); -+ -+ va_start(args, format); -+ vkd3d_string_buffer_vprintf(gen->buffer, format, args); -+ va_end(args); -+ -+ vkd3d_string_buffer_printf(gen->buffer, ";\n"); -+} -+ -+static void msl_unhandled(struct msl_generator *gen, const struct vkd3d_shader_instruction *ins) -+{ -+ msl_print_indent(gen->buffer, gen->indent); -+ vkd3d_string_buffer_printf(gen->buffer, "/* <unhandled instruction %#x> */\n", ins->opcode); -+ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, -+ "Internal compiler error: Unhandled instruction %#x.", ins->opcode); -+} -+ -+static void msl_mov(struct msl_generator *gen, const struct vkd3d_shader_instruction *ins) -+{ -+ struct msl_src src; -+ struct msl_dst dst; -+ uint32_t mask; -+ -+ mask = msl_dst_init(&dst, gen, ins, &ins->dst[0]); -+ msl_src_init(&src, gen, &ins->src[0], mask); -+ -+ msl_print_assignment(gen, &dst, "%s", src.str->buffer); -+ -+ msl_src_cleanup(&src, &gen->string_buffers); -+ msl_dst_cleanup(&dst, &gen->string_buffers); -+} -+ -+static void msl_ret(struct msl_generator *gen, const struct vkd3d_shader_instruction *ins) -+{ -+ msl_print_indent(gen->buffer, gen->indent); -+ vkd3d_string_buffer_printf(gen->buffer, "return;\n"); -+} -+ -+static void msl_handle_instruction(struct msl_generator *gen, const struct vkd3d_shader_instruction *ins) -+{ -+ gen->location = ins->location; -+ -+ switch (ins->opcode) -+ { -+ case VKD3DSIH_DCL_INPUT: -+ case VKD3DSIH_DCL_OUTPUT: -+ case VKD3DSIH_DCL_OUTPUT_SIV: -+ case VKD3DSIH_NOP: -+ break; -+ case VKD3DSIH_MOV: -+ msl_mov(gen, ins); -+ break; -+ case VKD3DSIH_RET: -+ msl_ret(gen, ins); -+ break; -+ default: -+ msl_unhandled(gen, ins); -+ break; -+ } -+} -+ -+static bool msl_check_shader_visibility(const struct msl_generator *gen, -+ enum vkd3d_shader_visibility visibility) -+{ -+ enum vkd3d_shader_type t = gen->program->shader_version.type; -+ -+ switch (visibility) -+ { -+ case VKD3D_SHADER_VISIBILITY_ALL: -+ return true; -+ case VKD3D_SHADER_VISIBILITY_VERTEX: -+ return t == VKD3D_SHADER_TYPE_VERTEX; -+ case VKD3D_SHADER_VISIBILITY_HULL: -+ return t == VKD3D_SHADER_TYPE_HULL; -+ case VKD3D_SHADER_VISIBILITY_DOMAIN: -+ return t == VKD3D_SHADER_TYPE_DOMAIN; -+ case VKD3D_SHADER_VISIBILITY_GEOMETRY: -+ return t == VKD3D_SHADER_TYPE_GEOMETRY; -+ case VKD3D_SHADER_VISIBILITY_PIXEL: -+ return t == VKD3D_SHADER_TYPE_PIXEL; -+ case VKD3D_SHADER_VISIBILITY_COMPUTE: -+ return t == VKD3D_SHADER_TYPE_COMPUTE; -+ default: -+ WARN("Invalid shader visibility %#x.\n", visibility); -+ return false; -+ } -+} -+ -+static bool msl_get_cbv_binding(const struct msl_generator *gen, -+ unsigned int register_space, unsigned int register_idx, unsigned int *binding_idx) -+{ -+ const struct vkd3d_shader_interface_info *interface_info = gen->interface_info; -+ const struct vkd3d_shader_resource_binding *binding; -+ unsigned int i; -+ -+ if (!interface_info) -+ return false; -+ -+ for (i = 0; i < interface_info->binding_count; ++i) -+ { -+ binding = &interface_info->bindings[i]; -+ -+ if (binding->type != VKD3D_SHADER_DESCRIPTOR_TYPE_CBV) -+ continue; -+ if (binding->register_space != register_space) -+ continue; -+ if (binding->register_index != register_idx) -+ continue; -+ if (!msl_check_shader_visibility(gen, binding->shader_visibility)) -+ continue; -+ if (!(binding->flags & VKD3D_SHADER_BINDING_FLAG_BUFFER)) -+ continue; -+ *binding_idx = i; -+ return true; -+ } -+ -+ return false; -+} -+ -+static void msl_generate_cbv_declaration(struct msl_generator *gen, -+ const struct vkd3d_shader_descriptor_info1 *cbv) -+{ -+ const struct vkd3d_shader_descriptor_binding *binding; -+ struct vkd3d_string_buffer *buffer = gen->buffer; -+ unsigned int binding_idx; -+ size_t size; -+ -+ if (cbv->count != 1) -+ { -+ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_BINDING_NOT_FOUND, -+ "Constant buffer %u has unsupported descriptor array size %u.", cbv->register_id, cbv->count); -+ return; -+ } -+ -+ if (!msl_get_cbv_binding(gen, cbv->register_space, cbv->register_index, &binding_idx)) -+ { -+ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_BINDING_NOT_FOUND, -+ "No descriptor binding specified for constant buffer %u.", cbv->register_id); -+ return; -+ } -+ -+ binding = &gen->interface_info->bindings[binding_idx].binding; -+ -+ if (binding->set != 0) -+ { -+ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_BINDING_NOT_FOUND, -+ "Unsupported binding set %u specified for constant buffer %u.", binding->set, cbv->register_id); -+ return; -+ } -+ -+ if (binding->count != 1) -+ { -+ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_BINDING_NOT_FOUND, -+ "Unsupported binding count %u specified for constant buffer %u.", binding->count, cbv->register_id); -+ return; -+ } -+ -+ size = align(cbv->buffer_size, VKD3D_VEC4_SIZE * sizeof(uint32_t)); -+ size /= VKD3D_VEC4_SIZE * sizeof(uint32_t); -+ -+ vkd3d_string_buffer_printf(buffer, -+ "constant vkd3d_vec4 *cb_%u [[id(%u)]];", cbv->register_id, binding->binding); -+}; -+ -+static void msl_generate_descriptor_struct_declarations(struct msl_generator *gen) -+{ -+ const struct vkd3d_shader_scan_descriptor_info1 *info = gen->descriptor_info; -+ const struct vkd3d_shader_descriptor_info1 *descriptor; -+ struct vkd3d_string_buffer *buffer = gen->buffer; -+ unsigned int i; -+ -+ if (!info->descriptor_count) -+ return; -+ -+ vkd3d_string_buffer_printf(buffer, "struct vkd3d_%s_descriptors\n{\n", gen->prefix); -+ -+ for (i = 0; i < info->descriptor_count; ++i) -+ { -+ descriptor = &info->descriptors[i]; -+ -+ msl_print_indent(buffer, 1); -+ switch (descriptor->type) -+ { -+ case VKD3D_SHADER_DESCRIPTOR_TYPE_CBV: -+ msl_generate_cbv_declaration(gen, descriptor); -+ break; -+ -+ default: -+ vkd3d_string_buffer_printf(buffer, "/* <unhandled descriptor type %#x> */", descriptor->type); -+ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, -+ "Internal compiler error: Unhandled descriptor type %#x.", descriptor->type); -+ break; -+ } -+ vkd3d_string_buffer_printf(buffer, "\n"); -+ } -+ -+ vkd3d_string_buffer_printf(buffer, "};\n\n"); -+} -+ -+static void msl_generate_input_struct_declarations(struct msl_generator *gen) -+{ -+ const struct shader_signature *signature = &gen->program->input_signature; -+ enum vkd3d_shader_type type = gen->program->shader_version.type; -+ struct vkd3d_string_buffer *buffer = gen->buffer; -+ const struct signature_element *e; -+ unsigned int i; -+ -+ vkd3d_string_buffer_printf(buffer, "struct vkd3d_%s_in\n{\n", gen->prefix); -+ -+ for (i = 0; i < signature->element_count; ++i) -+ { -+ e = &signature->elements[i]; -+ -+ if (e->target_location == SIGNATURE_TARGET_LOCATION_UNUSED) -+ continue; -+ -+ if (e->sysval_semantic) -+ { -+ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, -+ "Internal compiler error: Unhandled system value %#x.", e->sysval_semantic); -+ continue; -+ } -+ -+ if (e->min_precision != VKD3D_SHADER_MINIMUM_PRECISION_NONE) -+ { -+ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, -+ "Internal compiler error: Unhandled minimum precision %#x.", e->min_precision); -+ continue; -+ } -+ -+ if (e->interpolation_mode != VKD3DSIM_NONE) -+ { -+ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, -+ "Internal compiler error: Unhandled interpolation mode %#x.", e->interpolation_mode); -+ continue; -+ } -+ -+ if(e->register_count > 1) -+ { -+ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, -+ "Internal compiler error: Unhandled register count %u.", e->register_count); -+ continue; -+ } -+ -+ msl_print_indent(gen->buffer, 1); -+ -+ switch(e->component_type) -+ { -+ case VKD3D_SHADER_COMPONENT_FLOAT: -+ vkd3d_string_buffer_printf(buffer, "float4 "); -+ break; -+ case VKD3D_SHADER_COMPONENT_INT: -+ vkd3d_string_buffer_printf(buffer, "int4 "); -+ break; -+ case VKD3D_SHADER_COMPONENT_UINT: -+ vkd3d_string_buffer_printf(buffer, "uint4 "); -+ break; -+ default: -+ vkd3d_string_buffer_printf(buffer, "<unhandled component type %#x> ", e->component_type); -+ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, -+ "Internal compiler error: Unhandled component type %#x.", e->component_type); -+ break; -+ } -+ -+ vkd3d_string_buffer_printf(buffer, "shader_in_%u ", i); -+ -+ switch (type) -+ { -+ case VKD3D_SHADER_TYPE_VERTEX: -+ vkd3d_string_buffer_printf(gen->buffer, "[[attribute(%u)]]", e->target_location); -+ break; -+ case VKD3D_SHADER_TYPE_PIXEL: -+ vkd3d_string_buffer_printf(gen->buffer, "[[user(locn%u)]]", e->target_location); -+ break; -+ default: -+ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, -+ "Internal compiler error: Unhandled shader type %#x.", type); -+ break; -+ } -+ -+ vkd3d_string_buffer_printf(buffer, ";\n"); -+ } -+ -+ vkd3d_string_buffer_printf(buffer, "};\n\n"); -+} -+ -+static void msl_generate_vertex_output_element_attribute(struct msl_generator *gen, const struct signature_element *e) -+{ -+ switch (e->sysval_semantic) -+ { -+ case VKD3D_SHADER_SV_POSITION: -+ vkd3d_string_buffer_printf(gen->buffer, "[[position]]"); -+ break; -+ case VKD3D_SHADER_SV_NONE: -+ vkd3d_string_buffer_printf(gen->buffer, "[[user(locn%u)]]", e->target_location); -+ break; -+ default: -+ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, -+ "Internal compiler error: Unhandled vertex shader system value %#x.", e->sysval_semantic); -+ break; -+ } -+} -+ -+static void msl_generate_pixel_output_element_attribute(struct msl_generator *gen, const struct signature_element *e) -+{ -+ switch (e->sysval_semantic) -+ { -+ case VKD3D_SHADER_SV_TARGET: -+ vkd3d_string_buffer_printf(gen->buffer, "[[color(%u)]]", e->target_location); -+ break; -+ default: -+ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, -+ "Internal compiler error: Unhandled pixel shader system value %#x.", e->sysval_semantic); -+ break; -+ } -+} -+ -+static void msl_generate_output_struct_declarations(struct msl_generator *gen) -+{ -+ const struct shader_signature *signature = &gen->program->output_signature; -+ enum vkd3d_shader_type type = gen->program->shader_version.type; -+ struct vkd3d_string_buffer *buffer = gen->buffer; -+ const struct signature_element *e; -+ unsigned int i; -+ -+ vkd3d_string_buffer_printf(buffer, "struct vkd3d_%s_out\n{\n", gen->prefix); -+ -+ for (i = 0; i < signature->element_count; ++i) -+ { -+ e = &signature->elements[i]; -+ -+ if (e->target_location == SIGNATURE_TARGET_LOCATION_UNUSED) -+ continue; -+ -+ if (e->min_precision != VKD3D_SHADER_MINIMUM_PRECISION_NONE) -+ { -+ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, -+ "Internal compiler error: Unhandled minimum precision %#x.", e->min_precision); -+ continue; -+ } -+ -+ if (e->interpolation_mode != VKD3DSIM_NONE) -+ { -+ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, -+ "Internal compiler error: Unhandled interpolation mode %#x.", e->interpolation_mode); -+ continue; -+ } -+ -+ if(e->register_count > 1) -+ { -+ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, -+ "Internal compiler error: Unhandled register count %u.", e->register_count); -+ continue; -+ } -+ -+ msl_print_indent(gen->buffer, 1); -+ -+ switch(e->component_type) -+ { -+ case VKD3D_SHADER_COMPONENT_FLOAT: -+ vkd3d_string_buffer_printf(buffer, "float4 "); -+ break; -+ case VKD3D_SHADER_COMPONENT_INT: -+ vkd3d_string_buffer_printf(buffer, "int4 "); -+ break; -+ case VKD3D_SHADER_COMPONENT_UINT: -+ vkd3d_string_buffer_printf(buffer, "uint4 "); -+ break; -+ default: -+ vkd3d_string_buffer_printf(buffer, "<unhandled component type %#x> ", e->component_type); -+ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, -+ "Internal compiler error: Unhandled component type %#x.", e->component_type); -+ break; -+ } -+ -+ vkd3d_string_buffer_printf(buffer, "shader_out_%u ", i); -+ -+ switch (type) -+ { -+ case VKD3D_SHADER_TYPE_VERTEX: -+ msl_generate_vertex_output_element_attribute(gen, e); -+ break; -+ case VKD3D_SHADER_TYPE_PIXEL: -+ msl_generate_pixel_output_element_attribute(gen, e); -+ break; -+ default: -+ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, -+ "Internal compiler error: Unhandled shader type %#x.", type); -+ break; -+ } -+ -+ vkd3d_string_buffer_printf(buffer, ";\n"); -+ } -+ -+ vkd3d_string_buffer_printf(buffer, "};\n\n"); -+} -+ -+static void msl_generate_entrypoint_prologue(struct msl_generator *gen) -+{ -+ const struct shader_signature *signature = &gen->program->input_signature; -+ struct vkd3d_string_buffer *buffer = gen->buffer; -+ const struct signature_element *e; -+ unsigned int i; -+ -+ for (i = 0; i < signature->element_count; ++i) -+ { -+ e = &signature->elements[i]; -+ -+ if (e->target_location == SIGNATURE_TARGET_LOCATION_UNUSED) -+ continue; -+ -+ vkd3d_string_buffer_printf(buffer, " %s_in[%u]", gen->prefix, e->register_index); -+ if (e->sysval_semantic == VKD3D_SHADER_SV_NONE) -+ { -+ msl_print_register_datatype(buffer, gen, vkd3d_data_type_from_component_type(e->component_type)); -+ msl_print_write_mask(buffer, e->mask); -+ vkd3d_string_buffer_printf(buffer, " = input.shader_in_%u", i); -+ msl_print_write_mask(buffer, e->mask); -+ } -+ else -+ { -+ vkd3d_string_buffer_printf(buffer, " = <unhandled sysval %#x>", e->sysval_semantic); -+ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, -+ "Internal compiler error: Unhandled system value %#x input.", e->sysval_semantic); -+ } -+ vkd3d_string_buffer_printf(buffer, ";\n"); -+ } -+} -+ -+static void msl_generate_entrypoint_epilogue(struct msl_generator *gen) -+{ -+ const struct shader_signature *signature = &gen->program->output_signature; -+ struct vkd3d_string_buffer *buffer = gen->buffer; -+ const struct signature_element *e; -+ unsigned int i; -+ -+ for (i = 0; i < signature->element_count; ++i) -+ { -+ e = &signature->elements[i]; -+ -+ if (e->target_location == SIGNATURE_TARGET_LOCATION_UNUSED) -+ continue; -+ -+ switch (e->sysval_semantic) -+ { -+ case VKD3D_SHADER_SV_NONE: -+ case VKD3D_SHADER_SV_TARGET: -+ case VKD3D_SHADER_SV_POSITION: -+ vkd3d_string_buffer_printf(buffer, " output.shader_out_%u", i); -+ msl_print_write_mask(buffer, e->mask); -+ vkd3d_string_buffer_printf(buffer, " = %s_out[%u]", gen->prefix, e->register_index); -+ msl_print_register_datatype(buffer, gen, vkd3d_data_type_from_component_type(e->component_type)); -+ msl_print_write_mask(buffer, e->mask); -+ break; -+ default: -+ vkd3d_string_buffer_printf(buffer, " <unhandled sysval %#x>", e->sysval_semantic); -+ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, -+ "Internal compiler error: Unhandled system value %#x input.", e->sysval_semantic); -+ } -+ vkd3d_string_buffer_printf(buffer, ";\n"); -+ } -+} -+ -+static void msl_generate_entrypoint(struct msl_generator *gen) -+{ -+ enum vkd3d_shader_type type = gen->program->shader_version.type; -+ -+ switch (type) -+ { -+ case VKD3D_SHADER_TYPE_VERTEX: -+ vkd3d_string_buffer_printf(gen->buffer, "vertex "); -+ break; -+ case VKD3D_SHADER_TYPE_PIXEL: -+ vkd3d_string_buffer_printf(gen->buffer, "fragment "); -+ break; -+ default: -+ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, -+ "Internal compiler error: Unhandled shader type %#x.", type); -+ return; -+ } -+ -+ vkd3d_string_buffer_printf(gen->buffer, "vkd3d_%s_out shader_entry(\n", gen->prefix); -+ -+ if (gen->descriptor_info->descriptor_count) -+ { -+ msl_print_indent(gen->buffer, 2); -+ /* TODO: Configurable argument buffer binding location. */ -+ vkd3d_string_buffer_printf(gen->buffer, -+ "constant vkd3d_%s_descriptors& descriptors [[buffer(0)]],\n", gen->prefix); -+ } -+ -+ msl_print_indent(gen->buffer, 2); -+ vkd3d_string_buffer_printf(gen->buffer, "vkd3d_%s_in input [[stage_in]])\n{\n", gen->prefix); -+ -+ /* TODO: declare #maximum_register + 1 */ -+ vkd3d_string_buffer_printf(gen->buffer, " vkd3d_vec4 %s_in[%u];\n", gen->prefix, 32); -+ vkd3d_string_buffer_printf(gen->buffer, " vkd3d_vec4 %s_out[%u];\n", gen->prefix, 32); -+ vkd3d_string_buffer_printf(gen->buffer, " vkd3d_%s_out output;\n", gen->prefix); -+ -+ msl_generate_entrypoint_prologue(gen); -+ -+ vkd3d_string_buffer_printf(gen->buffer, " %s_main(%s_in, %s_out", gen->prefix, gen->prefix, gen->prefix); -+ if (gen->descriptor_info->descriptor_count) -+ vkd3d_string_buffer_printf(gen->buffer, ", descriptors"); -+ vkd3d_string_buffer_printf(gen->buffer, ");\n"); -+ -+ msl_generate_entrypoint_epilogue(gen); -+ -+ vkd3d_string_buffer_printf(gen->buffer, " return output;\n}\n"); -+} -+ -+static int msl_generator_generate(struct msl_generator *gen, struct vkd3d_shader_code *out) -+{ -+ const struct vkd3d_shader_instruction_array *instructions = &gen->program->instructions; -+ unsigned int i; -+ -+ MESSAGE("Generating a MSL shader. This is unsupported; you get to keep all the pieces if it breaks.\n"); -+ -+ vkd3d_string_buffer_printf(gen->buffer, "/* Generated by %s. */\n\n", vkd3d_shader_get_version(NULL, NULL)); -+ -+ if (gen->program->global_flags) -+ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, -+ "Internal compiler error: Unhandled global flags %#"PRIx64".", (uint64_t)gen->program->global_flags); -+ -+ vkd3d_string_buffer_printf(gen->buffer, "union vkd3d_vec4\n{\n"); -+ vkd3d_string_buffer_printf(gen->buffer, " uint4 u;\n"); -+ vkd3d_string_buffer_printf(gen->buffer, " int4 i;\n"); -+ vkd3d_string_buffer_printf(gen->buffer, " float4 f;\n};\n\n"); -+ -+ msl_generate_descriptor_struct_declarations(gen); -+ msl_generate_input_struct_declarations(gen); -+ msl_generate_output_struct_declarations(gen); -+ -+ vkd3d_string_buffer_printf(gen->buffer, -+ "void %s_main(thread vkd3d_vec4 *v, " -+ "thread vkd3d_vec4 *o", -+ gen->prefix); -+ if (gen->descriptor_info->descriptor_count) -+ vkd3d_string_buffer_printf(gen->buffer, ", constant vkd3d_%s_descriptors& descriptors", gen->prefix); -+ vkd3d_string_buffer_printf(gen->buffer, ")\n{\n"); -+ -+ ++gen->indent; -+ -+ if (gen->program->temp_count) -+ { -+ msl_print_indent(gen->buffer, gen->indent); -+ vkd3d_string_buffer_printf(gen->buffer, "vkd3d_vec4 r[%u];\n\n", gen->program->temp_count); -+ } -+ -+ for (i = 0; i < instructions->count; ++i) -+ { -+ msl_handle_instruction(gen, &instructions->elements[i]); -+ } -+ -+ --gen->indent; -+ -+ vkd3d_string_buffer_printf(gen->buffer, "}\n\n"); -+ -+ msl_generate_entrypoint(gen); -+ -+ if (TRACE_ON()) -+ vkd3d_string_buffer_trace(gen->buffer); -+ -+ if (gen->failed) -+ return VKD3D_ERROR_INVALID_SHADER; -+ -+ vkd3d_shader_code_from_string_buffer(out, gen->buffer); -+ -+ return VKD3D_OK; -+} -+ -+static void msl_generator_cleanup(struct msl_generator *gen) -+{ -+ vkd3d_string_buffer_release(&gen->string_buffers, gen->buffer); -+ vkd3d_string_buffer_cache_cleanup(&gen->string_buffers); -+} -+ -+static int msl_generator_init(struct msl_generator *gen, struct vsir_program *program, -+ const struct vkd3d_shader_compile_info *compile_info, -+ const struct vkd3d_shader_scan_descriptor_info1 *descriptor_info, -+ struct vkd3d_shader_message_context *message_context) -+{ -+ enum vkd3d_shader_type type = program->shader_version.type; -+ -+ memset(gen, 0, sizeof(*gen)); -+ gen->program = program; -+ vkd3d_string_buffer_cache_init(&gen->string_buffers); -+ if (!(gen->buffer = vkd3d_string_buffer_get(&gen->string_buffers))) -+ { -+ vkd3d_string_buffer_cache_cleanup(&gen->string_buffers); -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ } -+ gen->message_context = message_context; -+ if (!(gen->prefix = msl_get_prefix(type))) -+ { -+ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, -+ "Internal compiler error: Unhandled shader type %#x.", type); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ gen->interface_info = vkd3d_find_struct(compile_info->next, INTERFACE_INFO); -+ gen->descriptor_info = descriptor_info; -+ -+ return VKD3D_OK; -+} -+ -+int msl_compile(struct vsir_program *program, uint64_t config_flags, -+ const struct vkd3d_shader_scan_descriptor_info1 *descriptor_info, -+ const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *out, -+ struct vkd3d_shader_message_context *message_context) -+{ -+ struct msl_generator generator; -+ int ret; -+ -+ if ((ret = vsir_program_transform(program, config_flags, compile_info, message_context)) < 0) -+ return ret; -+ -+ VKD3D_ASSERT(program->normalisation_level == VSIR_FULLY_NORMALISED_IO); -+ -+ if ((ret = msl_generator_init(&generator, program, compile_info, descriptor_info, message_context)) < 0) -+ return ret; -+ ret = msl_generator_generate(&generator, out); -+ msl_generator_cleanup(&generator); -+ -+ return ret; -+} -diff --git a/libs/vkd3d/libs/vkd3d-shader/preproc.h b/libs/vkd3d/libs/vkd3d-shader/preproc.h -index 9806614a35b..a98c8ae3df5 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/preproc.h -+++ b/libs/vkd3d/libs/vkd3d-shader/preproc.h -@@ -60,6 +60,7 @@ struct preproc_expansion - { - struct preproc_buffer buffer; - const struct preproc_text *text; -+ struct preproc_text *arg_values; - /* Back-pointer to the macro, if this expansion a macro body. This is - * necessary so that argument tokens can be correctly replaced. */ - struct preproc_macro *macro; -@@ -72,7 +73,6 @@ struct preproc_macro - - char **arg_names; - size_t arg_count; -- struct preproc_text *arg_values; - - struct preproc_text body; - }; -@@ -117,6 +117,7 @@ struct preproc_ctx - STATE_ARGS, - } state; - unsigned int paren_depth; -+ struct preproc_text *arg_values; - } text_func, directive_func; - - int current_directive; -diff --git a/libs/vkd3d/libs/vkd3d-shader/preproc.l b/libs/vkd3d/libs/vkd3d-shader/preproc.l -index 2b7455a5c30..d167415c356 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/preproc.l -+++ b/libs/vkd3d/libs/vkd3d-shader/preproc.l -@@ -20,6 +20,7 @@ - - %{ - -+#include "preproc.h" - #include "preproc.tab.h" - - #undef ERROR /* defined in wingdi.h */ -@@ -29,11 +30,11 @@ - - #define YY_DECL static int preproc_lexer_lex(YYSTYPE *yylval_param, YYLTYPE *yylloc_param, yyscan_t yyscanner) - --static struct preproc_macro *preproc_get_top_macro(struct preproc_ctx *ctx) -+static struct preproc_expansion *preproc_get_top_expansion(struct preproc_ctx *ctx) - { - if (!ctx->expansion_count) - return NULL; -- return ctx->expansion_stack[ctx->expansion_count - 1].macro; -+ return &ctx->expansion_stack[ctx->expansion_count - 1]; - } - - static void update_location(struct preproc_ctx *ctx); -@@ -66,7 +67,7 @@ static void update_location(struct preproc_ctx *ctx); - - NEWLINE \r?\n - WS [ \t\r] --IDENTIFIER [A-Za-z_][A-Za-z0-9_]* -+IDENTIFIER (::)?[A-Za-z_]((::)?[A-Za-z0-9_]+)* - INT_SUFFIX [uUlL]{0,2} - - %% -@@ -132,14 +133,14 @@ INT_SUFFIX [uUlL]{0,2} - - if (!ctx->last_was_newline) - { -- struct preproc_macro *macro; -+ struct preproc_expansion *exp; - - /* Stringification is only done for function-like macro bodies. - * Anywhere else, we need to parse it as two separate tokens. - * We could use a state for this, but yyless() is easier and cheap. - */ - -- if ((macro = preproc_get_top_macro(ctx)) && macro->arg_count) -+ if ((exp = preproc_get_top_expansion(ctx)) && exp->macro && exp->macro->arg_count) - return T_HASHSTRING; - - yyless(1); -@@ -258,6 +259,12 @@ static void preproc_pop_buffer(struct preproc_ctx *ctx) - - yy_delete_buffer(exp->buffer.lexer_buffer, ctx->scanner); - -+ if (exp->macro) -+ { -+ for (unsigned int i = 0; i < exp->macro->arg_count; ++i) -+ vkd3d_string_buffer_cleanup(&exp->arg_values[i].text); -+ free(exp->arg_values); -+ } - --ctx->expansion_count; - TRACE("Expansion stack size is now %zu.\n", ctx->expansion_count); - } -@@ -310,15 +317,15 @@ static int return_token(int token, YYSTYPE *lval, const char *text) - - static const struct preproc_text *find_arg_expansion(struct preproc_ctx *ctx, const char *s) - { -- struct preproc_macro *macro; -+ struct preproc_expansion *exp; - unsigned int i; - -- if ((macro = preproc_get_top_macro(ctx))) -+ if ((exp = preproc_get_top_expansion(ctx)) && exp->macro) - { -- for (i = 0; i < macro->arg_count; ++i) -+ for (i = 0; i < exp->macro->arg_count; ++i) - { -- if (!strcmp(s, macro->arg_names[i])) -- return ¯o->arg_values[i]; -+ if (!strcmp(s, exp->macro->arg_names[i])) -+ return &exp->arg_values[i]; - } - } - return NULL; -@@ -330,7 +337,7 @@ static void preproc_text_add(struct preproc_text *text, const char *string) - } - - static bool preproc_push_expansion(struct preproc_ctx *ctx, -- const struct preproc_text *text, struct preproc_macro *macro) -+ const struct preproc_text *text, struct preproc_macro *macro, struct preproc_text *arg_values) - { - struct preproc_expansion *exp; - -@@ -342,6 +349,7 @@ static bool preproc_push_expansion(struct preproc_ctx *ctx, - exp->buffer.lexer_buffer = yy_scan_bytes(text->text.buffer, text->text.content_size, ctx->scanner); - exp->buffer.location = text->location; - exp->macro = macro; -+ exp->arg_values = arg_values; - TRACE("Expansion stack size is now %zu.\n", ctx->expansion_count); - return true; - } -@@ -542,7 +550,7 @@ int yylex(YYSTYPE *lval, YYLTYPE *lloc, yyscan_t scanner) - - if ((expansion = find_arg_expansion(ctx, text))) - { -- preproc_push_expansion(ctx, expansion, NULL); -+ preproc_push_expansion(ctx, expansion, NULL, NULL); - continue; - } - -@@ -550,7 +558,7 @@ int yylex(YYSTYPE *lval, YYLTYPE *lloc, yyscan_t scanner) - { - if (!macro->arg_count) - { -- preproc_push_expansion(ctx, ¯o->body, macro); -+ preproc_push_expansion(ctx, ¯o->body, macro, NULL); - } - else - { -@@ -616,16 +624,19 @@ int yylex(YYSTYPE *lval, YYLTYPE *lloc, yyscan_t scanner) - case STATE_IDENTIFIER: - if (token == '(') - { -- struct preproc_text *first_arg = &func_state->macro->arg_values[0]; -- unsigned int i; -+ struct preproc_text *arg_values; -+ -+ if (!(arg_values = calloc(func_state->macro->arg_count, sizeof(*arg_values)))) -+ return 0; -+ -+ for (unsigned int i = 0; i < func_state->macro->arg_count; ++i) -+ vkd3d_string_buffer_init(&arg_values[i].text); -+ arg_values[0].location = *lloc; - - func_state->arg_count = 0; - func_state->paren_depth = 1; - func_state->state = STATE_ARGS; -- for (i = 0; i < func_state->macro->arg_count; ++i) -- func_state->macro->arg_values[i].text.content_size = 0; -- -- first_arg->location = *lloc; -+ func_state->arg_values = arg_values; - } - else - { -@@ -649,7 +660,7 @@ int yylex(YYSTYPE *lval, YYLTYPE *lloc, yyscan_t scanner) - VKD3D_ASSERT(func_state->macro->arg_count); - - if (func_state->arg_count < func_state->macro->arg_count) -- current_arg = &func_state->macro->arg_values[func_state->arg_count]; -+ current_arg = &func_state->arg_values[func_state->arg_count]; - - switch (token) - { -@@ -664,7 +675,7 @@ int yylex(YYSTYPE *lval, YYLTYPE *lloc, yyscan_t scanner) - - if ((expansion = find_arg_expansion(ctx, text))) - { -- preproc_push_expansion(ctx, expansion, NULL); -+ preproc_push_expansion(ctx, expansion, NULL, NULL); - continue; - } - -@@ -700,7 +711,8 @@ int yylex(YYSTYPE *lval, YYLTYPE *lloc, yyscan_t scanner) - { - if (++func_state->arg_count == func_state->macro->arg_count) - { -- preproc_push_expansion(ctx, &func_state->macro->body, func_state->macro); -+ preproc_push_expansion(ctx, &func_state->macro->body, -+ func_state->macro, func_state->arg_values); - } - else - { -diff --git a/libs/vkd3d/libs/vkd3d-shader/preproc.y b/libs/vkd3d/libs/vkd3d-shader/preproc.y -index 366e351e3b5..c6be17bd230 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/preproc.y -+++ b/libs/vkd3d/libs/vkd3d-shader/preproc.y -@@ -91,7 +91,6 @@ bool preproc_add_macro(struct preproc_ctx *ctx, const struct vkd3d_shader_locati - size_t arg_count, const struct vkd3d_shader_location *body_loc, struct vkd3d_string_buffer *body) - { - struct preproc_macro *macro; -- unsigned int i; - int ret; - - if ((macro = preproc_find_macro(ctx, name))) -@@ -108,14 +107,6 @@ bool preproc_add_macro(struct preproc_ctx *ctx, const struct vkd3d_shader_locati - macro->name = name; - macro->arg_names = arg_names; - macro->arg_count = arg_count; -- macro->arg_values = NULL; -- if (arg_count && !(macro->arg_values = vkd3d_calloc(arg_count, sizeof(*macro->arg_values)))) -- { -- vkd3d_free(macro); -- return false; -- } -- for (i = 0; i < arg_count; ++i) -- vkd3d_string_buffer_init(¯o->arg_values[i].text); - macro->body.text = *body; - macro->body.location = *body_loc; - ret = rb_put(&ctx->macros, name, ¯o->entry); -@@ -129,12 +120,8 @@ void preproc_free_macro(struct preproc_macro *macro) - - vkd3d_free(macro->name); - for (i = 0; i < macro->arg_count; ++i) -- { -- vkd3d_string_buffer_cleanup(¯o->arg_values[i].text); - vkd3d_free(macro->arg_names[i]); -- } - vkd3d_free(macro->arg_names); -- vkd3d_free(macro->arg_values); - vkd3d_string_buffer_cleanup(¯o->body.text); - vkd3d_free(macro); - } -diff --git a/libs/vkd3d/libs/vkd3d-shader/spirv.c b/libs/vkd3d/libs/vkd3d-shader/spirv.c -index 49979ab2491..81555e702ec 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/spirv.c -+++ b/libs/vkd3d/libs/vkd3d-shader/spirv.c -@@ -97,15 +97,37 @@ static enum vkd3d_result vkd3d_spirv_binary_to_text(const struct vkd3d_shader_co - if (!(spvret = spvBinaryToText(context, spirv->code, spirv->size / sizeof(uint32_t), - get_binary_to_text_options(formatting), &text, &diagnostic))) - { -- void *code = vkd3d_malloc(text->length); -- if (code) -+ const char *p, *q, *end, *pad, *truncate; -+ struct vkd3d_string_buffer buffer; -+ size_t line_len; -+ -+ vkd3d_string_buffer_init(&buffer); -+ -+ for (p = text->str, end = p + text->length; p < end; p = q) - { -- memcpy(code, text->str, text->length); -- out->size = text->length; -- out->code = code; -+ if (!(q = memchr(p, '\n', end - p))) -+ q = end; -+ else -+ ++q; -+ -+ /* FIXME: Note that when colour output is enabled, we count colour -+ * escape codes towards the line length. It's possible to fix -+ * that, but not completely trivial. */ -+ for (pad = "", line_len = 100; q - p > line_len; line_len = 100 - strlen(pad)) -+ { -+ if (!(truncate = memchr(p + line_len, ' ', q - p - line_len))) -+ break; -+ vkd3d_string_buffer_printf(&buffer, "%s%.*s\n", pad, (int)(truncate - p), p); -+ p = truncate + 1; -+ if (formatting & VKD3D_SHADER_COMPILE_OPTION_FORMATTING_INDENT) -+ pad = " "; -+ else -+ pad = " "; -+ } -+ vkd3d_string_buffer_printf(&buffer, "%s%.*s", pad, (int)(q - p), p); - } -- else -- result = VKD3D_ERROR_OUT_OF_MEMORY; -+ -+ vkd3d_shader_code_from_string_buffer(out, &buffer); - } - else - { -@@ -228,7 +250,7 @@ enum vkd3d_shader_input_sysval_semantic vkd3d_siv_from_sysval_indexed(enum vkd3d - #define VKD3D_SPIRV_VERSION_1_0 0x00010000 - #define VKD3D_SPIRV_VERSION_1_3 0x00010300 - #define VKD3D_SPIRV_GENERATOR_ID 18 --#define VKD3D_SPIRV_GENERATOR_VERSION 13 -+#define VKD3D_SPIRV_GENERATOR_VERSION 14 - #define VKD3D_SPIRV_GENERATOR_MAGIC vkd3d_make_u32(VKD3D_SPIRV_GENERATOR_VERSION, VKD3D_SPIRV_GENERATOR_ID) - - struct vkd3d_spirv_stream -@@ -277,6 +299,16 @@ static void vkd3d_spirv_stream_free(struct vkd3d_spirv_stream *stream) - vkd3d_spirv_stream_clear(stream); - } - -+static void vkd3d_shader_code_from_spirv_stream(struct vkd3d_shader_code *code, struct vkd3d_spirv_stream *stream) -+{ -+ code->code = stream->words; -+ code->size = stream->word_count * sizeof(*stream->words); -+ -+ stream->words = NULL; -+ stream->capacity = 0; -+ stream->word_count = 0; -+} -+ - static size_t vkd3d_spirv_stream_current_location(struct vkd3d_spirv_stream *stream) - { - return stream->word_count; -@@ -362,6 +394,7 @@ struct vkd3d_spirv_builder - uint32_t type_bool_id; - uint32_t type_void_id; - uint32_t scope_subgroup_id; -+ uint32_t numeric_type_ids[VKD3D_SHADER_COMPONENT_TYPE_COUNT][VKD3D_VEC4_SIZE]; - - struct vkd3d_spirv_stream debug_stream; /* debug instructions */ - struct vkd3d_spirv_stream annotation_stream; /* decoration instructions */ -@@ -1195,6 +1228,13 @@ static uint32_t vkd3d_spirv_build_op_constant_composite(struct vkd3d_spirv_build - SpvOpConstantComposite, result_type, constituents, constituent_count); - } - -+static uint32_t vkd3d_spirv_build_op_spec_constant_composite(struct vkd3d_spirv_builder *builder, -+ uint32_t result_type, const uint32_t *constituents, unsigned int constituent_count) -+{ -+ return vkd3d_spirv_build_op_trv(builder, &builder->global_stream, -+ SpvOpSpecConstantComposite, result_type, constituents, constituent_count); -+} -+ - static uint32_t vkd3d_spirv_get_op_constant_composite(struct vkd3d_spirv_builder *builder, - uint32_t result_type, const uint32_t *constituents, unsigned int constituent_count) - { -@@ -1870,29 +1910,41 @@ static uint32_t vkd3d_spirv_build_op_glsl_std450_nclamp(struct vkd3d_spirv_build - static uint32_t vkd3d_spirv_get_type_id(struct vkd3d_spirv_builder *builder, - enum vkd3d_shader_component_type component_type, unsigned int component_count) - { -- uint32_t scalar_id; -+ uint32_t scalar_id, type_id; -+ -+ VKD3D_ASSERT(component_type < VKD3D_SHADER_COMPONENT_TYPE_COUNT); -+ if (!component_count || component_count > VKD3D_VEC4_SIZE) -+ { -+ ERR("Invalid component count %u.\n", component_count); -+ return 0; -+ } -+ -+ if ((type_id = builder->numeric_type_ids[component_type][component_count - 1])) -+ return type_id; - - if (component_count == 1) - { - switch (component_type) - { - case VKD3D_SHADER_COMPONENT_VOID: -- return vkd3d_spirv_get_op_type_void(builder); -+ type_id = vkd3d_spirv_get_op_type_void(builder); - break; - case VKD3D_SHADER_COMPONENT_FLOAT: -- return vkd3d_spirv_get_op_type_float(builder, 32); -+ type_id = vkd3d_spirv_get_op_type_float(builder, 32); - break; - case VKD3D_SHADER_COMPONENT_INT: - case VKD3D_SHADER_COMPONENT_UINT: -- return vkd3d_spirv_get_op_type_int(builder, 32, component_type == VKD3D_SHADER_COMPONENT_INT); -+ type_id = vkd3d_spirv_get_op_type_int(builder, 32, component_type == VKD3D_SHADER_COMPONENT_INT); - break; - case VKD3D_SHADER_COMPONENT_BOOL: -- return vkd3d_spirv_get_op_type_bool(builder); -+ type_id = vkd3d_spirv_get_op_type_bool(builder); - break; - case VKD3D_SHADER_COMPONENT_DOUBLE: -- return vkd3d_spirv_get_op_type_float(builder, 64); -+ type_id = vkd3d_spirv_get_op_type_float(builder, 64); -+ break; - case VKD3D_SHADER_COMPONENT_UINT64: -- return vkd3d_spirv_get_op_type_int(builder, 64, 0); -+ type_id = vkd3d_spirv_get_op_type_int(builder, 64, 0); -+ break; - default: - FIXME("Unhandled component type %#x.\n", component_type); - return 0; -@@ -1902,46 +1954,21 @@ static uint32_t vkd3d_spirv_get_type_id(struct vkd3d_spirv_builder *builder, - { - VKD3D_ASSERT(component_type != VKD3D_SHADER_COMPONENT_VOID); - scalar_id = vkd3d_spirv_get_type_id(builder, component_type, 1); -- return vkd3d_spirv_get_op_type_vector(builder, scalar_id, component_count); -+ type_id = vkd3d_spirv_get_op_type_vector(builder, scalar_id, component_count); - } -+ -+ builder->numeric_type_ids[component_type][component_count - 1] = type_id; -+ -+ return type_id; - } - - static uint32_t vkd3d_spirv_get_type_id_for_data_type(struct vkd3d_spirv_builder *builder, - enum vkd3d_data_type data_type, unsigned int component_count) - { -- uint32_t scalar_id; -+ enum vkd3d_shader_component_type component_type; - -- if (component_count == 1) -- { -- switch (data_type) -- { -- case VKD3D_DATA_HALF: /* Minimum precision. TODO: native 16-bit */ -- case VKD3D_DATA_FLOAT: -- case VKD3D_DATA_SNORM: -- case VKD3D_DATA_UNORM: -- return vkd3d_spirv_get_op_type_float(builder, 32); -- break; -- case VKD3D_DATA_INT: -- case VKD3D_DATA_UINT: -- case VKD3D_DATA_UINT16: /* Minimum precision. TODO: native 16-bit */ -- return vkd3d_spirv_get_op_type_int(builder, 32, data_type == VKD3D_DATA_INT); -- break; -- case VKD3D_DATA_DOUBLE: -- return vkd3d_spirv_get_op_type_float(builder, 64); -- case VKD3D_DATA_UINT64: -- return vkd3d_spirv_get_op_type_int(builder, 64, 0); -- case VKD3D_DATA_BOOL: -- return vkd3d_spirv_get_op_type_bool(builder); -- default: -- FIXME("Unhandled data type %#x.\n", data_type); -- return 0; -- } -- } -- else -- { -- scalar_id = vkd3d_spirv_get_type_id_for_data_type(builder, data_type, 1); -- return vkd3d_spirv_get_op_type_vector(builder, scalar_id, component_count); -- } -+ component_type = vkd3d_component_type_from_data_type(data_type); -+ return vkd3d_spirv_get_type_id(builder, component_type, component_count); - } - - static void vkd3d_spirv_builder_init(struct vkd3d_spirv_builder *builder, const char *entry_point) -@@ -1996,9 +2023,7 @@ static bool vkd3d_spirv_compile_module(struct vkd3d_spirv_builder *builder, - { - uint64_t capability_mask = builder->capability_mask; - struct vkd3d_spirv_stream stream; -- uint32_t *code; - unsigned int i; -- size_t size; - - vkd3d_spirv_stream_init(&stream); - -@@ -2053,26 +2078,20 @@ static bool vkd3d_spirv_compile_module(struct vkd3d_spirv_builder *builder, - if (builder->invocation_count) - vkd3d_spirv_build_op_execution_mode(&builder->execution_mode_stream, - builder->main_function_id, SpvExecutionModeInvocations, &builder->invocation_count, 1); -- vkd3d_spirv_stream_append(&stream, &builder->execution_mode_stream); -- -- vkd3d_spirv_stream_append(&stream, &builder->debug_stream); -- vkd3d_spirv_stream_append(&stream, &builder->annotation_stream); -- vkd3d_spirv_stream_append(&stream, &builder->global_stream); -- vkd3d_spirv_stream_append(&stream, &builder->function_stream); - -- if (!(code = vkd3d_calloc(stream.word_count, sizeof(*code)))) -+ if (!vkd3d_spirv_stream_append(&stream, &builder->execution_mode_stream) -+ || !vkd3d_spirv_stream_append(&stream, &builder->debug_stream) -+ || !vkd3d_spirv_stream_append(&stream, &builder->annotation_stream) -+ || !vkd3d_spirv_stream_append(&stream, &builder->global_stream) -+ || !vkd3d_spirv_stream_append(&stream, &builder->function_stream)) - { - vkd3d_spirv_stream_free(&stream); - return false; - } - -- size = stream.word_count * sizeof(*code); -- memcpy(code, stream.words, size); -+ vkd3d_shader_code_from_spirv_stream(spirv, &stream); - vkd3d_spirv_stream_free(&stream); - -- spirv->code = code; -- spirv->size = size; -- - return true; - } - -@@ -2083,28 +2102,26 @@ static const struct vkd3d_spirv_resource_type - SpvDim dim; - uint32_t arrayed; - uint32_t ms; -- - unsigned int coordinate_component_count; -- unsigned int offset_component_count; - - SpvCapability capability; - SpvCapability uav_capability; - } - vkd3d_spirv_resource_type_table[] = - { -- {VKD3D_SHADER_RESOURCE_BUFFER, SpvDimBuffer, 0, 0, 1, 0, -+ {VKD3D_SHADER_RESOURCE_BUFFER, SpvDimBuffer, 0, 0, 1, - SpvCapabilitySampledBuffer, SpvCapabilityImageBuffer}, -- {VKD3D_SHADER_RESOURCE_TEXTURE_1D, SpvDim1D, 0, 0, 1, 1, -+ {VKD3D_SHADER_RESOURCE_TEXTURE_1D, SpvDim1D, 0, 0, 1, - SpvCapabilitySampled1D, SpvCapabilityImage1D}, -- {VKD3D_SHADER_RESOURCE_TEXTURE_2DMS, SpvDim2D, 0, 1, 2, 2}, -- {VKD3D_SHADER_RESOURCE_TEXTURE_2D, SpvDim2D, 0, 0, 2, 2}, -- {VKD3D_SHADER_RESOURCE_TEXTURE_3D, SpvDim3D, 0, 0, 3, 3}, -- {VKD3D_SHADER_RESOURCE_TEXTURE_CUBE, SpvDimCube, 0, 0, 3, 0}, -- {VKD3D_SHADER_RESOURCE_TEXTURE_1DARRAY, SpvDim1D, 1, 0, 2, 1, -+ {VKD3D_SHADER_RESOURCE_TEXTURE_2DMS, SpvDim2D, 0, 1, 2}, -+ {VKD3D_SHADER_RESOURCE_TEXTURE_2D, SpvDim2D, 0, 0, 2}, -+ {VKD3D_SHADER_RESOURCE_TEXTURE_3D, SpvDim3D, 0, 0, 3}, -+ {VKD3D_SHADER_RESOURCE_TEXTURE_CUBE, SpvDimCube, 0, 0, 3}, -+ {VKD3D_SHADER_RESOURCE_TEXTURE_1DARRAY, SpvDim1D, 1, 0, 2, - SpvCapabilitySampled1D, SpvCapabilityImage1D}, -- {VKD3D_SHADER_RESOURCE_TEXTURE_2DARRAY, SpvDim2D, 1, 0, 3, 2}, -- {VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY, SpvDim2D, 1, 1, 3, 2}, -- {VKD3D_SHADER_RESOURCE_TEXTURE_CUBEARRAY, SpvDimCube, 1, 0, 4, 0, -+ {VKD3D_SHADER_RESOURCE_TEXTURE_2DARRAY, SpvDim2D, 1, 0, 3}, -+ {VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY, SpvDim2D, 1, 1, 3}, -+ {VKD3D_SHADER_RESOURCE_TEXTURE_CUBEARRAY, SpvDimCube, 1, 0, 4, - SpvCapabilitySampledCubeArray, SpvCapabilityImageCubeArray}, - }; - -@@ -2647,8 +2664,6 @@ static struct spirv_compiler *spirv_compiler_create(const struct vsir_program *p - if ((shader_interface = vkd3d_find_struct(compile_info->next, INTERFACE_INFO))) - { - compiler->xfb_info = vkd3d_find_struct(compile_info->next, TRANSFORM_FEEDBACK_INFO); -- compiler->emit_point_size = compiler->xfb_info && compiler->xfb_info->element_count -- && compiler->shader_type != VKD3D_SHADER_TYPE_GEOMETRY; - - compiler->shader_interface = *shader_interface; - if (shader_interface->push_constant_buffer_count) -@@ -2675,6 +2690,11 @@ static struct spirv_compiler *spirv_compiler_create(const struct vsir_program *p - } - } - -+ if (compiler->shader_type == VKD3D_SHADER_TYPE_VERTEX) -+ compiler->emit_point_size = true; -+ else if (compiler->shader_type != VKD3D_SHADER_TYPE_GEOMETRY) -+ compiler->emit_point_size = compiler->xfb_info && compiler->xfb_info->element_count; -+ - compiler->scan_descriptor_info = scan_descriptor_info; - - compiler->phase = VKD3DSIH_INVALID; -@@ -3174,6 +3194,14 @@ static bool spirv_compiler_get_register_name(char *buffer, unsigned int buffer_s - case VKD3DSPR_CONSTBUFFER: - snprintf(buffer, buffer_size, "cb%u_%u", reg->idx[0].offset, reg->idx[1].offset); - break; -+ case VKD3DSPR_RASTOUT: -+ if (idx == VSIR_RASTOUT_POINT_SIZE) -+ { -+ snprintf(buffer, buffer_size, "oPts"); -+ break; -+ } -+ FIXME("Unhandled rastout register %#x.\n", idx); -+ return false; - case VKD3DSPR_INPUT: - snprintf(buffer, buffer_size, "v%u", idx); - break; -@@ -3234,6 +3262,9 @@ static bool spirv_compiler_get_register_name(char *buffer, unsigned int buffer_s - case VKD3DSPR_WAVELANEINDEX: - snprintf(buffer, buffer_size, "vWaveLaneIndex"); - break; -+ case VKD3DSPR_POINT_COORD: -+ snprintf(buffer, buffer_size, "vPointCoord"); -+ break; - default: - FIXME("Unhandled register %#x.\n", reg->type); - snprintf(buffer, buffer_size, "unrecognized_%#x", reg->type); -@@ -3252,18 +3283,6 @@ static void spirv_compiler_emit_register_debug_name(struct vkd3d_spirv_builder * - vkd3d_spirv_build_op_name(builder, id, "%s", debug_name); - } - --static uint32_t spirv_compiler_emit_variable(struct spirv_compiler *compiler, -- struct vkd3d_spirv_stream *stream, SpvStorageClass storage_class, -- enum vkd3d_shader_component_type component_type, unsigned int component_count) --{ -- struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; -- uint32_t type_id, ptr_type_id; -- -- type_id = vkd3d_spirv_get_type_id(builder, component_type, component_count); -- ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, storage_class, type_id); -- return vkd3d_spirv_build_op_variable(builder, stream, ptr_type_id, storage_class, 0); --} -- - static uint32_t spirv_compiler_emit_array_variable(struct spirv_compiler *compiler, - struct vkd3d_spirv_stream *stream, SpvStorageClass storage_class, - enum vkd3d_shader_component_type component_type, unsigned int component_count, -@@ -3273,10 +3292,6 @@ static uint32_t spirv_compiler_emit_array_variable(struct spirv_compiler *compil - uint32_t type_id, length_id, ptr_type_id; - unsigned int i; - -- if (!length_count) -- return spirv_compiler_emit_variable(compiler, -- stream, storage_class, component_type, component_count); -- - type_id = vkd3d_spirv_get_type_id(builder, component_type, component_count); - for (i = 0; i < length_count; ++i) - { -@@ -3290,6 +3305,14 @@ static uint32_t spirv_compiler_emit_array_variable(struct spirv_compiler *compil - return vkd3d_spirv_build_op_variable(builder, stream, ptr_type_id, storage_class, 0); - } - -+static uint32_t spirv_compiler_emit_variable(struct spirv_compiler *compiler, -+ struct vkd3d_spirv_stream *stream, SpvStorageClass storage_class, -+ enum vkd3d_shader_component_type component_type, unsigned int component_count) -+{ -+ return spirv_compiler_emit_array_variable(compiler, stream, storage_class, -+ component_type, component_count, NULL, 0); -+} -+ - static const struct vkd3d_spec_constant_info - { - enum vkd3d_shader_parameter_name name; -@@ -3316,8 +3339,10 @@ static const struct vkd3d_spec_constant_info *get_spec_constant_info(enum vkd3d_ - return NULL; - } - --static uint32_t spirv_compiler_alloc_spec_constant_id(struct spirv_compiler *compiler) -+static uint32_t spirv_compiler_alloc_spec_constant_id(struct spirv_compiler *compiler, unsigned int count) - { -+ uint32_t ret; -+ - if (!compiler->current_spec_constant_id) - { - unsigned int i, id = 0; -@@ -3327,28 +3352,52 @@ static uint32_t spirv_compiler_alloc_spec_constant_id(struct spirv_compiler *com - const struct vkd3d_shader_parameter1 *current = &compiler->program->parameters[i]; - - if (current->type == VKD3D_SHADER_PARAMETER_TYPE_SPECIALIZATION_CONSTANT) -- id = max(current->u.specialization_constant.id + 1, id); -+ { -+ switch (current->data_type) -+ { -+ case VKD3D_SHADER_PARAMETER_DATA_TYPE_FLOAT32_VEC4: -+ id = max(current->u.specialization_constant.id + 4, id); -+ break; -+ -+ default: -+ id = max(current->u.specialization_constant.id + 1, id); -+ break; -+ } -+ } - } - - compiler->current_spec_constant_id = id; - } - -- return compiler->current_spec_constant_id++; -+ ret = compiler->current_spec_constant_id; -+ compiler->current_spec_constant_id += count; -+ return ret; - } - - static uint32_t spirv_compiler_emit_spec_constant(struct spirv_compiler *compiler, -- enum vkd3d_shader_parameter_name name, uint32_t spec_id, enum vkd3d_data_type type) -+ enum vkd3d_shader_parameter_name name, uint32_t spec_id, -+ enum vkd3d_data_type type, unsigned int component_count) - { -+ uint32_t scalar_type_id, vector_type_id, id, default_value, components[4]; - struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; - const struct vkd3d_spec_constant_info *info; -- uint32_t type_id, id, default_value; - - info = get_spec_constant_info(name); - default_value = info ? info->default_value : 0; - -- type_id = vkd3d_spirv_get_type_id(builder, vkd3d_component_type_from_data_type(type), 1); -- id = vkd3d_spirv_build_op_spec_constant(builder, type_id, default_value); -- vkd3d_spirv_build_op_decorate1(builder, id, SpvDecorationSpecId, spec_id); -+ scalar_type_id = vkd3d_spirv_get_type_id(builder, vkd3d_component_type_from_data_type(type), 1); -+ vector_type_id = vkd3d_spirv_get_type_id(builder, vkd3d_component_type_from_data_type(type), component_count); -+ -+ for (unsigned int i = 0; i < component_count; ++i) -+ { -+ components[i] = vkd3d_spirv_build_op_spec_constant(builder, scalar_type_id, default_value); -+ vkd3d_spirv_build_op_decorate1(builder, components[i], SpvDecorationSpecId, spec_id + i); -+ } -+ -+ if (component_count == 1) -+ id = components[0]; -+ else -+ id = vkd3d_spirv_build_op_spec_constant_composite(builder, vector_type_id, components, component_count); - - if (info) - vkd3d_spirv_build_op_name(builder, id, "%s", info->debug_name); -@@ -3365,7 +3414,8 @@ static uint32_t spirv_compiler_emit_spec_constant(struct spirv_compiler *compile - } - - static uint32_t spirv_compiler_get_spec_constant(struct spirv_compiler *compiler, -- enum vkd3d_shader_parameter_name name, uint32_t spec_id, enum vkd3d_data_type type) -+ enum vkd3d_shader_parameter_name name, uint32_t spec_id, -+ enum vkd3d_data_type type, unsigned int component_count) - { - unsigned int i; - -@@ -3375,17 +3425,17 @@ static uint32_t spirv_compiler_get_spec_constant(struct spirv_compiler *compiler - return compiler->spec_constants[i].id; - } - -- return spirv_compiler_emit_spec_constant(compiler, name, spec_id, type); -+ return spirv_compiler_emit_spec_constant(compiler, name, spec_id, type, component_count); - } - - static uint32_t spirv_compiler_get_buffer_parameter(struct spirv_compiler *compiler, -- const struct vkd3d_shader_parameter1 *parameter, enum vkd3d_data_type type) -+ const struct vkd3d_shader_parameter1 *parameter, enum vkd3d_data_type type, unsigned int component_count) - { - struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; - unsigned int index = parameter - compiler->program->parameters; - uint32_t type_id, ptr_id, ptr_type_id; - -- type_id = vkd3d_spirv_get_type_id(builder, vkd3d_component_type_from_data_type(type), 1); -+ type_id = vkd3d_spirv_get_type_id(builder, vkd3d_component_type_from_data_type(type), component_count); - ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, SpvStorageClassUniform, type_id); - ptr_id = vkd3d_spirv_build_op_access_chain1(builder, ptr_type_id, - compiler->spirv_parameter_info[index].buffer_id, -@@ -3393,48 +3443,49 @@ static uint32_t spirv_compiler_get_buffer_parameter(struct spirv_compiler *compi - return vkd3d_spirv_build_op_load(builder, type_id, ptr_id, SpvMemoryAccessMaskNone); - } - -+static const struct -+{ -+ enum vkd3d_data_type type; -+ unsigned int component_count; -+} -+parameter_data_type_map[] = -+{ -+ [VKD3D_SHADER_PARAMETER_DATA_TYPE_FLOAT32] = {VKD3D_DATA_FLOAT, 1}, -+ [VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32] = {VKD3D_DATA_UINT, 1}, -+ [VKD3D_SHADER_PARAMETER_DATA_TYPE_FLOAT32_VEC4] = {VKD3D_DATA_FLOAT, 4}, -+}; -+ - static uint32_t spirv_compiler_emit_shader_parameter(struct spirv_compiler *compiler, -- enum vkd3d_shader_parameter_name name, enum vkd3d_data_type type) -+ enum vkd3d_shader_parameter_name name, enum vkd3d_data_type type, unsigned int component_count) - { - const struct vkd3d_shader_parameter1 *parameter; - -- static const struct -- { -- enum vkd3d_data_type type; -- } -- type_map[] = -- { -- [VKD3D_SHADER_PARAMETER_DATA_TYPE_FLOAT32] = {VKD3D_DATA_FLOAT}, -- [VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32] = {VKD3D_DATA_UINT}, -- }; -- - if (!(parameter = vsir_program_get_parameter(compiler->program, name))) - { - WARN("Unresolved shader parameter %#x.\n", name); - goto default_parameter; - } - -- if (type_map[parameter->data_type].type != type) -- ERR("Expected data type %#x for parameter %#x, got %#x.\n", type, name, parameter->data_type); -+ if (parameter_data_type_map[parameter->data_type].type != type -+ || parameter_data_type_map[parameter->data_type].component_count != component_count) -+ ERR("Expected type %#x, count %u for parameter %#x, got %#x.\n", -+ type, component_count, name, parameter->data_type); - - if (parameter->type == VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT) -- { -- if (parameter->data_type == VKD3D_SHADER_PARAMETER_DATA_TYPE_FLOAT32) -- return spirv_compiler_get_constant_float(compiler, parameter->u.immediate_constant.u.f32); -- else -- return spirv_compiler_get_constant_uint(compiler, parameter->u.immediate_constant.u.u32); -- } -+ return spirv_compiler_get_constant(compiler, vkd3d_component_type_from_data_type(type), -+ component_count, (const uint32_t *)¶meter->u.immediate_constant); - - if (parameter->type == VKD3D_SHADER_PARAMETER_TYPE_SPECIALIZATION_CONSTANT) -- return spirv_compiler_get_spec_constant(compiler, name, parameter->u.specialization_constant.id, type); -+ return spirv_compiler_get_spec_constant(compiler, name, -+ parameter->u.specialization_constant.id, type, component_count); - if (parameter->type == VKD3D_SHADER_PARAMETER_TYPE_BUFFER) -- return spirv_compiler_get_buffer_parameter(compiler, parameter, type); -+ return spirv_compiler_get_buffer_parameter(compiler, parameter, type, component_count); - - FIXME("Unhandled parameter type %#x.\n", parameter->type); - - default_parameter: - return spirv_compiler_get_spec_constant(compiler, -- name, spirv_compiler_alloc_spec_constant_id(compiler), type); -+ name, spirv_compiler_alloc_spec_constant_id(compiler, component_count), type, component_count); - } - - static uint32_t spirv_compiler_emit_construct_vector(struct spirv_compiler *compiler, -@@ -4210,7 +4261,8 @@ static uint32_t spirv_compiler_emit_load_reg(struct spirv_compiler *compiler, - else if (reg->type == VKD3DSPR_UNDEF) - return spirv_compiler_emit_load_undef(compiler, reg, write_mask); - else if (reg->type == VKD3DSPR_PARAMETER) -- return spirv_compiler_emit_shader_parameter(compiler, reg->idx[0].offset, reg->data_type); -+ return spirv_compiler_emit_shader_parameter(compiler, reg->idx[0].offset, -+ reg->data_type, reg->dimension == VSIR_DIMENSION_VEC4 ? 4 : 1); - - component_count = vsir_write_mask_component_count(write_mask); - component_type = vkd3d_component_type_from_data_type(reg->data_type); -@@ -4500,9 +4552,24 @@ static uint32_t spirv_compiler_emit_sat(struct spirv_compiler *compiler, - static void spirv_compiler_emit_store_dst(struct spirv_compiler *compiler, - const struct vkd3d_shader_dst_param *dst, uint32_t val_id) - { -- VKD3D_ASSERT(!(dst->modifiers & ~VKD3DSPDM_SATURATE)); -- if (dst->modifiers & VKD3DSPDM_SATURATE) -+ uint32_t modifiers = dst->modifiers; -+ -+ /* It is always legitimate to ignore _pp. */ -+ modifiers &= ~VKD3DSPDM_PARTIALPRECISION; -+ -+ if (modifiers & VKD3DSPDM_SATURATE) -+ { - val_id = spirv_compiler_emit_sat(compiler, &dst->reg, dst->write_mask, val_id); -+ modifiers &= ~VKD3DSPDM_SATURATE; -+ } -+ -+ if (dst->modifiers & VKD3DSPDM_MSAMPCENTROID) -+ { -+ FIXME("Ignoring _centroid modifier.\n"); -+ modifiers &= ~VKD3DSPDM_MSAMPCENTROID; -+ } -+ -+ VKD3D_ASSERT(!modifiers); - - spirv_compiler_emit_store_reg(compiler, &dst->reg, dst->write_mask, val_id); - } -@@ -4809,6 +4876,10 @@ static const struct vkd3d_spirv_builtin vkd3d_pixel_shader_position_builtin = - { - VKD3D_SHADER_COMPONENT_FLOAT, 4, SpvBuiltInFragCoord, frag_coord_fixup, - }; -+static const struct vkd3d_spirv_builtin vkd3d_output_point_size_builtin = -+{ -+ VKD3D_SHADER_COMPONENT_FLOAT, 1, SpvBuiltInPointSize, -+}; - static const struct - { - enum vkd3d_shader_register_type reg_type; -@@ -4828,6 +4899,8 @@ vkd3d_register_builtins[] = - - {VKD3DSPR_TESSCOORD, {VKD3D_SHADER_COMPONENT_FLOAT, 3, SpvBuiltInTessCoord}}, - -+ {VKD3DSPR_POINT_COORD, {VKD3D_SHADER_COMPONENT_FLOAT, 2, SpvBuiltInPointCoord}}, -+ - {VKD3DSPR_COVERAGE, {VKD3D_SHADER_COMPONENT_UINT, 1, SpvBuiltInSampleMask, NULL, 1}}, - {VKD3DSPR_SAMPLEMASK, {VKD3D_SHADER_COMPONENT_UINT, 1, SpvBuiltInSampleMask, NULL, 1}}, - -@@ -5398,7 +5471,11 @@ static void spirv_compiler_emit_output_register(struct spirv_compiler *compiler, - VKD3D_ASSERT(!reg->idx_count || !reg->idx[0].rel_addr); - VKD3D_ASSERT(reg->idx_count < 2); - -- if (!(builtin = get_spirv_builtin_for_register(reg->type))) -+ if (reg->type == VKD3DSPR_RASTOUT && reg->idx[0].offset == VSIR_RASTOUT_POINT_SIZE) -+ { -+ builtin = &vkd3d_output_point_size_builtin; -+ } -+ else if (!(builtin = get_spirv_builtin_for_register(reg->type))) - { - FIXME("Unhandled register %#x.\n", reg->type); - return; -@@ -5451,7 +5528,7 @@ static void spirv_compiler_emit_output(struct spirv_compiler *compiler, - const struct shader_signature *shader_signature; - const struct vkd3d_spirv_builtin *builtin; - enum vkd3d_shader_sysval_semantic sysval; -- uint32_t write_mask, reg_write_mask; -+ uint32_t write_mask; - bool use_private_variable = false; - struct vkd3d_symbol reg_symbol; - SpvStorageClass storage_class; -@@ -5502,7 +5579,6 @@ static void spirv_compiler_emit_output(struct spirv_compiler *compiler, - use_private_variable = true; - } - -- reg_write_mask = write_mask >> component_idx; - vkd3d_symbol_make_io(®_symbol, reg_type, element_idx); - - if (rb_get(&compiler->symbol_table, ®_symbol)) -@@ -5580,7 +5656,7 @@ static void spirv_compiler_emit_output(struct spirv_compiler *compiler, - - vkd3d_symbol_set_register_info(®_symbol, var_id, storage_class, - use_private_variable ? VKD3D_SHADER_COMPONENT_FLOAT : component_type, -- use_private_variable ? VKD3DSP_WRITEMASK_ALL : reg_write_mask); -+ use_private_variable ? VKD3DSP_WRITEMASK_ALL : write_mask); - reg_symbol.info.reg.is_aggregate = array_sizes[0] || array_sizes[1]; - VKD3D_ASSERT(!builtin || !builtin->spirv_array_size || use_private_variable || array_sizes[0] || array_sizes[1]); - -@@ -5591,7 +5667,7 @@ static void spirv_compiler_emit_output(struct spirv_compiler *compiler, - if (use_private_variable) - { - compiler->private_output_variable[element_idx] = var_id; -- compiler->private_output_variable_write_mask[element_idx] |= reg_write_mask; -+ compiler->private_output_variable_write_mask[element_idx] |= write_mask >> component_idx; - if (!compiler->epilogue_function_id) - compiler->epilogue_function_id = vkd3d_spirv_alloc_id(builder); - } -@@ -5846,11 +5922,8 @@ static size_t spirv_compiler_get_current_function_location(struct spirv_compiler - return builder->main_function_location; - } - --static void spirv_compiler_emit_dcl_global_flags(struct spirv_compiler *compiler, -- const struct vkd3d_shader_instruction *instruction) -+static void spirv_compiler_emit_global_flags(struct spirv_compiler *compiler, enum vsir_global_flags flags) - { -- enum vkd3d_shader_global_flags flags = instruction->declaration.global_flags; -- - if (flags & VKD3DSGF_FORCE_EARLY_DEPTH_STENCIL) - { - spirv_compiler_emit_execution_mode(compiler, SpvExecutionModeEarlyFragmentTests, NULL, 0); -@@ -6120,12 +6193,12 @@ static void spirv_compiler_decorate_descriptor(struct spirv_compiler *compiler, - static uint32_t spirv_compiler_build_descriptor_variable(struct spirv_compiler *compiler, - SpvStorageClass storage_class, uint32_t type_id, const struct vkd3d_shader_register *reg, - const struct vkd3d_shader_register_range *range, enum vkd3d_shader_resource_type resource_type, -- bool is_uav, bool is_uav_counter, struct vkd3d_descriptor_variable_info *var_info) -+ const struct vkd3d_shader_descriptor_info1 *descriptor, bool is_uav_counter, -+ struct vkd3d_descriptor_variable_info *var_info) - { - struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; - struct vkd3d_descriptor_binding_address binding_address; - struct vkd3d_shader_descriptor_binding binding; -- const struct vkd3d_shader_descriptor_info1 *d; - uint32_t array_type_id, ptr_type_id, var_id; - bool write_only = false, coherent = false; - struct vkd3d_symbol symbol; -@@ -6135,12 +6208,11 @@ static uint32_t spirv_compiler_build_descriptor_variable(struct spirv_compiler * - resource_type, is_uav_counter, &binding_address); - var_info->binding_base_idx = binding_address.binding_base_idx; - -- if (is_uav) -+ if (descriptor->type == VKD3D_SHADER_DESCRIPTOR_TYPE_UAV && !is_uav_counter) - { -- d = spirv_compiler_get_descriptor_info(compiler, VKD3D_SHADER_DESCRIPTOR_TYPE_UAV, range); -- write_only = !(d->flags & VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_READ); -+ write_only = !(descriptor->flags & VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_READ); - /* ROVs are implicitly globally coherent. */ -- coherent = d->uav_flags & (VKD3DSUF_GLOBALLY_COHERENT | VKD3DSUF_RASTERISER_ORDERED_VIEW); -+ coherent = descriptor->uav_flags & (VKD3DSUF_GLOBALLY_COHERENT | VKD3DSUF_RASTERISER_ORDERED_VIEW); - } - - if (binding.count == 1 && range->first == binding_address.binding_base_idx && range->last != ~0u -@@ -6194,11 +6266,12 @@ static uint32_t spirv_compiler_build_descriptor_variable(struct spirv_compiler * - } - - static void spirv_compiler_emit_cbv_declaration(struct spirv_compiler *compiler, -- const struct vkd3d_shader_register_range *range, unsigned int register_id, unsigned int size_in_bytes) -+ const struct vkd3d_shader_register_range *range, const struct vkd3d_shader_descriptor_info1 *descriptor) - { - struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; - uint32_t vec4_id, array_type_id, length_id, struct_id, var_id; - const SpvStorageClass storage_class = SpvStorageClassUniform; -+ unsigned int size_in_bytes = descriptor->buffer_size; - struct vkd3d_push_constant_buffer_binding *push_cb; - struct vkd3d_descriptor_variable_info var_info; - struct vkd3d_shader_register reg; -@@ -6206,7 +6279,7 @@ static void spirv_compiler_emit_cbv_declaration(struct spirv_compiler *compiler, - unsigned int size; - - vsir_register_init(®, VKD3DSPR_CONSTBUFFER, VKD3D_DATA_FLOAT, 3); -- reg.idx[0].offset = register_id; -+ reg.idx[0].offset = descriptor->register_id; - reg.idx[1].offset = range->first; - reg.idx[2].offset = range->last; - -@@ -6239,7 +6312,7 @@ static void spirv_compiler_emit_cbv_declaration(struct spirv_compiler *compiler, - vkd3d_spirv_build_op_name(builder, struct_id, "cb%u_struct", size); - - var_id = spirv_compiler_build_descriptor_variable(compiler, storage_class, struct_id, -- ®, range, VKD3D_SHADER_RESOURCE_BUFFER, false, false, &var_info); -+ ®, range, VKD3D_SHADER_RESOURCE_BUFFER, descriptor, false, &var_info); - - vkd3d_symbol_make_register(®_symbol, ®); - vkd3d_symbol_set_register_info(®_symbol, var_id, storage_class, -@@ -6275,7 +6348,7 @@ static void spirv_compiler_emit_dcl_immediate_constant_buffer(struct spirv_compi - } - - static void spirv_compiler_emit_sampler_declaration(struct spirv_compiler *compiler, -- const struct vkd3d_shader_register_range *range, unsigned int register_id) -+ const struct vkd3d_shader_register_range *range, const struct vkd3d_shader_descriptor_info1 *descriptor) - { - const SpvStorageClass storage_class = SpvStorageClassUniformConstant; - struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; -@@ -6285,7 +6358,7 @@ static void spirv_compiler_emit_sampler_declaration(struct spirv_compiler *compi - uint32_t type_id, var_id; - - vsir_register_init(®, VKD3DSPR_SAMPLER, VKD3D_DATA_FLOAT, 1); -- reg.idx[0].offset = register_id; -+ reg.idx[0].offset = descriptor->register_id; - - vkd3d_symbol_make_sampler(®_symbol, ®); - reg_symbol.info.sampler.range = *range; -@@ -6295,8 +6368,8 @@ static void spirv_compiler_emit_sampler_declaration(struct spirv_compiler *compi - return; - - type_id = vkd3d_spirv_get_op_type_sampler(builder); -- var_id = spirv_compiler_build_descriptor_variable(compiler, storage_class, type_id, ®, -- range, VKD3D_SHADER_RESOURCE_NONE, false, false, &var_info); -+ var_id = spirv_compiler_build_descriptor_variable(compiler, storage_class, type_id, -+ ®, range, VKD3D_SHADER_RESOURCE_NONE, descriptor, false, &var_info); - - vkd3d_symbol_make_register(®_symbol, ®); - vkd3d_symbol_set_register_info(®_symbol, var_id, storage_class, -@@ -6346,7 +6419,7 @@ static SpvImageFormat image_format_for_image_read(enum vkd3d_shader_component_ty - static uint32_t spirv_compiler_get_image_type_id(struct spirv_compiler *compiler, - const struct vkd3d_shader_register *reg, const struct vkd3d_shader_register_range *range, - const struct vkd3d_spirv_resource_type *resource_type_info, enum vkd3d_shader_component_type data_type, -- bool raw_structured, uint32_t depth) -+ bool raw_structured) - { - struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; - const struct vkd3d_shader_descriptor_info1 *d; -@@ -6369,7 +6442,7 @@ static uint32_t spirv_compiler_get_image_type_id(struct spirv_compiler *compiler - - sampled_type_id = vkd3d_spirv_get_type_id(builder, data_type, 1); - return vkd3d_spirv_get_op_type_image(builder, sampled_type_id, resource_type_info->dim, -- depth, resource_type_info->arrayed, resource_type_info->ms, -+ 2, resource_type_info->arrayed, resource_type_info->ms, - reg->type == VKD3DSPR_UAV ? 2 : 1, format); - } - -@@ -6384,18 +6457,14 @@ static void spirv_compiler_emit_combined_sampler_declarations(struct spirv_compi - const struct vkd3d_shader_combined_resource_sampler *current; - uint32_t image_type_id, type_id, ptr_type_id, var_id; - enum vkd3d_shader_binding_flag resource_type_flag; -- const struct vkd3d_shader_descriptor_info1 *d; - struct vkd3d_symbol symbol; - unsigned int i; -- bool depth; - - resource_type_flag = resource_type == VKD3D_SHADER_RESOURCE_BUFFER - ? VKD3D_SHADER_BINDING_FLAG_BUFFER : VKD3D_SHADER_BINDING_FLAG_IMAGE; - - for (i = 0; i < shader_interface->combined_sampler_count; ++i) - { -- struct vkd3d_shader_register_range sampler_range; -- - current = &shader_interface->combined_samplers[i]; - - if (current->resource_space != resource_range->space || current->resource_index != resource_range->first) -@@ -6417,16 +6486,8 @@ static void spirv_compiler_emit_combined_sampler_declarations(struct spirv_compi - current->sampler_space, current->binding.count); - } - -- sampler_range.space = current->sampler_space; -- sampler_range.first = current->sampler_index; -- sampler_range.last = current->sampler_index; -- d = spirv_compiler_get_descriptor_info(compiler, -- VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER, &sampler_range); -- depth = current->sampler_index != VKD3D_SHADER_DUMMY_SAMPLER_INDEX -- && (d->flags & VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_SAMPLER_COMPARISON_MODE); -- - image_type_id = spirv_compiler_get_image_type_id(compiler, resource, resource_range, -- resource_type_info, sampled_type, structure_stride || raw, depth); -+ resource_type_info, sampled_type, structure_stride || raw); - type_id = vkd3d_spirv_get_op_type_sampled_image(builder, image_type_id); - - ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, storage_class, type_id); -@@ -6461,21 +6522,24 @@ static void spirv_compiler_emit_combined_sampler_declarations(struct spirv_compi - } - - static void spirv_compiler_emit_resource_declaration(struct spirv_compiler *compiler, -- const struct vkd3d_shader_register_range *range, unsigned int register_id, -- unsigned int sample_count, bool is_uav, enum vkd3d_shader_resource_type resource_type, -- enum vkd3d_shader_resource_data_type resource_data_type, unsigned int structure_stride, bool raw) -+ const struct vkd3d_shader_register_range *range, const struct vkd3d_shader_descriptor_info1 *descriptor) - { -+ bool raw = descriptor->flags & VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_RAW_BUFFER; -+ enum vkd3d_shader_resource_type resource_type = descriptor->resource_type; - struct vkd3d_descriptor_variable_info var_info, counter_var_info = {0}; -+ bool is_uav = descriptor->type == VKD3D_SHADER_DESCRIPTOR_TYPE_UAV; -+ unsigned int structure_stride = descriptor->structure_stride / 4; - struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; - SpvStorageClass storage_class = SpvStorageClassUniformConstant; - uint32_t counter_type_id, type_id, var_id, counter_var_id = 0; - const struct vkd3d_spirv_resource_type *resource_type_info; -+ unsigned int sample_count = descriptor->sample_count; - enum vkd3d_shader_component_type sampled_type; - struct vkd3d_symbol resource_symbol; - struct vkd3d_shader_register reg; - - vsir_register_init(®, is_uav ? VKD3DSPR_UAV : VKD3DSPR_RESOURCE, VKD3D_DATA_FLOAT, 1); -- reg.idx[0].offset = register_id; -+ reg.idx[0].offset = descriptor->register_id; - - if (resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_2DMS && sample_count == 1) - resource_type = VKD3D_SHADER_RESOURCE_TEXTURE_2D; -@@ -6489,7 +6553,7 @@ static void spirv_compiler_emit_resource_declaration(struct spirv_compiler *comp - return; - } - -- sampled_type = vkd3d_component_type_from_resource_data_type(resource_data_type); -+ sampled_type = vkd3d_component_type_from_resource_data_type(descriptor->resource_data_type); - - if (!is_uav && spirv_compiler_has_combined_sampler_for_resource(compiler, range)) - { -@@ -6517,19 +6581,15 @@ static void spirv_compiler_emit_resource_declaration(struct spirv_compiler *comp - else - { - type_id = spirv_compiler_get_image_type_id(compiler, ®, range, -- resource_type_info, sampled_type, structure_stride || raw, 0); -+ resource_type_info, sampled_type, structure_stride || raw); - } - -- var_id = spirv_compiler_build_descriptor_variable(compiler, storage_class, type_id, ®, -- range, resource_type, is_uav, false, &var_info); -+ var_id = spirv_compiler_build_descriptor_variable(compiler, storage_class, -+ type_id, ®, range, resource_type, descriptor, false, &var_info); - - if (is_uav) - { -- const struct vkd3d_shader_descriptor_info1 *d; -- -- d = spirv_compiler_get_descriptor_info(compiler, VKD3D_SHADER_DESCRIPTOR_TYPE_UAV, range); -- -- if (d->uav_flags & VKD3DSUF_RASTERISER_ORDERED_VIEW) -+ if (descriptor->uav_flags & VKD3DSUF_RASTERISER_ORDERED_VIEW) - { - if (compiler->shader_type != VKD3D_SHADER_TYPE_PIXEL) - spirv_compiler_error(compiler, VKD3D_SHADER_ERROR_SPV_UNSUPPORTED_FEATURE, -@@ -6543,7 +6603,7 @@ static void spirv_compiler_emit_resource_declaration(struct spirv_compiler *comp - compiler->use_invocation_interlock = true; - } - -- if (d->flags & VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_COUNTER) -+ if (descriptor->flags & VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_COUNTER) - { - VKD3D_ASSERT(structure_stride); /* counters are valid only for structured buffers */ - -@@ -6571,7 +6631,7 @@ static void spirv_compiler_emit_resource_declaration(struct spirv_compiler *comp - } - - counter_var_id = spirv_compiler_build_descriptor_variable(compiler, storage_class, -- type_id, ®, range, resource_type, false, true, &counter_var_info); -+ type_id, ®, range, resource_type, descriptor, true, &counter_var_info); - } - } - -@@ -6709,7 +6769,8 @@ static void spirv_compiler_emit_dcl_input_primitive(struct spirv_compiler *compi - - static void spirv_compiler_emit_point_size(struct spirv_compiler *compiler) - { -- static const struct vkd3d_spirv_builtin point_size = {VKD3D_SHADER_COMPONENT_FLOAT, 1, SpvBuiltInPointSize}; -+ if (compiler->program->has_point_size) -+ return; - - /* Set the point size. Point sprites are not supported in d3d10+, but - * point primitives can still be used with e.g. stream output. Vulkan -@@ -6723,7 +6784,8 @@ static void spirv_compiler_emit_point_size(struct spirv_compiler *compiler) - || compiler->write_tess_geom_point_size) - { - vkd3d_spirv_build_op_store(&compiler->spirv_builder, -- spirv_compiler_emit_builtin_variable(compiler, &point_size, SpvStorageClassOutput, 0), -+ spirv_compiler_emit_builtin_variable(compiler, -+ &vkd3d_output_point_size_builtin, SpvStorageClassOutput, 0), - spirv_compiler_get_constant_float(compiler, 1.0f), SpvMemoryAccessMaskNone); - } - } -@@ -6760,15 +6822,11 @@ static void spirv_compiler_emit_dcl_gs_instances(struct spirv_compiler *compiler - compiler->spirv_builder.invocation_count = instruction->declaration.count; - } - --static void spirv_compiler_emit_dcl_tessellator_domain(struct spirv_compiler *compiler, -- const struct vkd3d_shader_instruction *instruction) -+static void spirv_compiler_emit_tessellator_domain(struct spirv_compiler *compiler, -+ enum vkd3d_tessellator_domain domain) - { -- enum vkd3d_tessellator_domain domain = instruction->declaration.tessellator_domain; - SpvExecutionMode mode; - -- if (compiler->shader_type == VKD3D_SHADER_TYPE_HULL && spirv_compiler_is_opengl_target(compiler)) -- return; -- - switch (domain) - { - case VKD3D_TESSELLATOR_DOMAIN_LINE: -@@ -6845,10 +6903,9 @@ static void spirv_compiler_emit_tessellator_partitioning(struct spirv_compiler * - spirv_compiler_emit_execution_mode(compiler, mode, NULL, 0); - } - --static void spirv_compiler_emit_dcl_thread_group(struct spirv_compiler *compiler, -- const struct vkd3d_shader_instruction *instruction) -+static void spirv_compiler_emit_thread_group_size(struct spirv_compiler *compiler, -+ const struct vsir_thread_group_size *group_size) - { -- const struct vkd3d_shader_thread_group_size *group_size = &instruction->declaration.thread_group_size; - const uint32_t local_size[] = {group_size->x, group_size->y, group_size->z}; - - spirv_compiler_emit_execution_mode(compiler, -@@ -7391,7 +7448,7 @@ static void spirv_compiler_emit_mov(struct spirv_compiler *compiler, - uint32_t components[VKD3D_VEC4_SIZE]; - - if (register_is_constant_or_undef(&src->reg) || src->reg.type == VKD3DSPR_SSA || dst->reg.type == VKD3DSPR_SSA -- || dst->modifiers || src->modifiers) -+ || src->reg.type == VKD3DSPR_PARAMETER || dst->modifiers || src->modifiers) - goto general_implementation; - - spirv_compiler_get_register_info(compiler, &dst->reg, &dst_reg_info); -@@ -8433,11 +8490,10 @@ static void spirv_compiler_prepare_image(struct spirv_compiler *compiler, - struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; - uint32_t sampler_var_id, sampler_id, sampled_image_type_id; - const struct vkd3d_symbol *symbol = NULL; -- bool load, sampled, depth_comparison; -+ bool load, sampled; - - load = !(flags & VKD3D_IMAGE_FLAG_NO_LOAD); - sampled = flags & VKD3D_IMAGE_FLAG_SAMPLED; -- depth_comparison = flags & VKD3D_IMAGE_FLAG_DEPTH; - - if (resource_reg->type == VKD3DSPR_RESOURCE) - symbol = spirv_compiler_find_combined_sampler(compiler, resource_reg, sampler_reg); -@@ -8491,7 +8547,7 @@ static void spirv_compiler_prepare_image(struct spirv_compiler *compiler, - - image->image_type_id = spirv_compiler_get_image_type_id(compiler, resource_reg, - &symbol->info.resource.range, image->resource_type_info, -- image->sampled_type, image->structure_stride || image->raw, depth_comparison); -+ image->sampled_type, image->structure_stride || image->raw); - - if (sampled) - { -@@ -8535,9 +8591,11 @@ static uint32_t spirv_compiler_emit_texel_offset(struct spirv_compiler *compiler - const struct vkd3d_shader_instruction *instruction, - const struct vkd3d_spirv_resource_type *resource_type_info) - { -+ unsigned int component_count = resource_type_info->coordinate_component_count - resource_type_info->arrayed; - const struct vkd3d_shader_texel_offset *offset = &instruction->texel_offset; -- unsigned int component_count = resource_type_info->offset_component_count; - int32_t data[4] = {offset->u, offset->v, offset->w, 0}; -+ -+ VKD3D_ASSERT(resource_type_info->dim != SpvDimCube); - return spirv_compiler_get_constant(compiler, - VKD3D_SHADER_COMPONENT_INT, component_count, (const uint32_t *)data); - } -@@ -8622,9 +8680,9 @@ static void spirv_compiler_emit_sample(struct spirv_compiler *compiler, - const struct vkd3d_shader_dst_param *dst = instruction->dst; - const struct vkd3d_shader_src_param *src = instruction->src; - const struct vkd3d_shader_src_param *resource, *sampler; -+ unsigned int image_operand_count = 0, component_count; - uint32_t sampled_type_id, coordinate_id, val_id; - SpvImageOperandsMask operands_mask = 0; -- unsigned int image_operand_count = 0; - struct vkd3d_shader_image image; - uint32_t image_operands[3]; - uint32_t coordinate_mask; -@@ -8649,7 +8707,8 @@ static void spirv_compiler_emit_sample(struct spirv_compiler *compiler, - case VKD3DSIH_SAMPLE_GRAD: - op = SpvOpImageSampleExplicitLod; - operands_mask |= SpvImageOperandsGradMask; -- coordinate_mask = (1u << image.resource_type_info->offset_component_count) - 1; -+ component_count = image.resource_type_info->coordinate_component_count - image.resource_type_info->arrayed; -+ coordinate_mask = (1u << component_count) - 1; - image_operands[image_operand_count++] = spirv_compiler_emit_load_src(compiler, - &src[3], coordinate_mask); - image_operands[image_operand_count++] = spirv_compiler_emit_load_src(compiler, -@@ -8738,10 +8797,10 @@ static void spirv_compiler_emit_gather4(struct spirv_compiler *compiler, - const struct vkd3d_shader_dst_param *dst = instruction->dst; - const struct vkd3d_shader_src_param *src = instruction->src; - unsigned int image_flags = VKD3D_IMAGE_FLAG_SAMPLED; -+ unsigned int component_count, component_idx; - SpvImageOperandsMask operands_mask = 0; - unsigned int image_operand_count = 0; - struct vkd3d_shader_image image; -- unsigned int component_idx; - uint32_t image_operands[1]; - uint32_t coordinate_mask; - bool extended_offset; -@@ -8763,10 +8822,12 @@ static void spirv_compiler_emit_gather4(struct spirv_compiler *compiler, - - if (offset) - { -+ component_count = image.resource_type_info->coordinate_component_count - image.resource_type_info->arrayed; -+ VKD3D_ASSERT(image.resource_type_info->dim != SpvDimCube); - vkd3d_spirv_enable_capability(builder, SpvCapabilityImageGatherExtended); - operands_mask |= SpvImageOperandsOffsetMask; - image_operands[image_operand_count++] = spirv_compiler_emit_load_src(compiler, -- offset, (1u << image.resource_type_info->offset_component_count) - 1); -+ offset, (1u << component_count) - 1); - } - else if (vkd3d_shader_instruction_has_texel_offset(instruction)) - { -@@ -8842,15 +8903,20 @@ static void spirv_compiler_emit_ld_raw_structured_srv_uav(struct spirv_compiler - uint32_t base_coordinate_id, component_idx; - uint32_t constituents[VKD3D_VEC4_SIZE]; - struct vkd3d_shader_image image; -+ bool storage_buffer_uav = false; - uint32_t indices[2]; - unsigned int i, j; - SpvOp op; - - resource = &src[instruction->src_count - 1]; -- resource_symbol = spirv_compiler_find_resource(compiler, &resource->reg); - -- if (resource->reg.type == VKD3DSPR_UAV -- && spirv_compiler_use_storage_buffer(compiler, &resource_symbol->info.resource)) -+ if (resource->reg.type == VKD3DSPR_UAV) -+ { -+ resource_symbol = spirv_compiler_find_resource(compiler, &resource->reg); -+ storage_buffer_uav = spirv_compiler_use_storage_buffer(compiler, &resource_symbol->info.resource); -+ } -+ -+ if (storage_buffer_uav) - { - texel_type_id = vkd3d_spirv_get_type_id(builder, resource_symbol->info.resource.sampled_type, 1); - ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, SpvStorageClassUniform, texel_type_id); -@@ -9569,7 +9635,7 @@ static uint32_t spirv_compiler_emit_query_sample_count(struct spirv_compiler *co - if (src->reg.type == VKD3DSPR_RASTERIZER) - { - val_id = spirv_compiler_emit_shader_parameter(compiler, -- VKD3D_SHADER_PARAMETER_NAME_RASTERIZER_SAMPLE_COUNT, VKD3D_DATA_UINT); -+ VKD3D_SHADER_PARAMETER_NAME_RASTERIZER_SAMPLE_COUNT, VKD3D_DATA_UINT, 1); - } - else - { -@@ -10132,9 +10198,6 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, - - switch (instruction->opcode) - { -- case VKD3DSIH_DCL_GLOBAL_FLAGS: -- spirv_compiler_emit_dcl_global_flags(compiler, instruction); -- break; - case VKD3DSIH_DCL_INDEXABLE_TEMP: - spirv_compiler_emit_dcl_indexable_temp(compiler, instruction); - break; -@@ -10172,9 +10235,6 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, - case VKD3DSIH_DCL_OUTPUT_CONTROL_POINT_COUNT: - spirv_compiler_emit_output_vertex_count(compiler, instruction); - break; -- case VKD3DSIH_DCL_TESSELLATOR_DOMAIN: -- spirv_compiler_emit_dcl_tessellator_domain(compiler, instruction); -- break; - case VKD3DSIH_DCL_TESSELLATOR_OUTPUT_PRIMITIVE: - spirv_compiler_emit_tessellator_output_primitive(compiler, - instruction->declaration.tessellator_output_primitive); -@@ -10183,9 +10243,6 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, - spirv_compiler_emit_tessellator_partitioning(compiler, - instruction->declaration.tessellator_partitioning); - break; -- case VKD3DSIH_DCL_THREAD_GROUP: -- spirv_compiler_emit_dcl_thread_group(compiler, instruction); -- break; - case VKD3DSIH_HS_CONTROL_POINT_PHASE: - case VKD3DSIH_HS_FORK_PHASE: - case VKD3DSIH_HS_JOIN_PHASE: -@@ -10506,7 +10563,6 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, - case VKD3DSIH_DCL_RESOURCE_STRUCTURED: - case VKD3DSIH_DCL_UAV_RAW: - case VKD3DSIH_DCL_UAV_STRUCTURED: -- case VKD3DSIH_DCL_UAV_TYPED: - case VKD3DSIH_HS_DECLS: - case VKD3DSIH_NOP: - /* nothing to do */ -@@ -10543,6 +10599,23 @@ static void spirv_compiler_emit_io_declarations(struct spirv_compiler *compiler) - else - spirv_compiler_emit_input(compiler, VKD3DSPR_PATCHCONST, i); - } -+ -+ if (compiler->program->has_point_size) -+ { -+ struct vkd3d_shader_dst_param dst; -+ -+ vsir_dst_param_init(&dst, VKD3DSPR_RASTOUT, VKD3D_DATA_FLOAT, 1); -+ dst.reg.idx[0].offset = VSIR_RASTOUT_POINT_SIZE; -+ spirv_compiler_emit_output_register(compiler, &dst); -+ } -+ -+ if (compiler->program->has_point_coord) -+ { -+ struct vkd3d_shader_dst_param dst; -+ -+ vsir_dst_param_init(&dst, VKD3DSPR_POINT_COORD, VKD3D_DATA_FLOAT, 0); -+ spirv_compiler_emit_input_register(compiler, &dst); -+ } - } - - static void spirv_compiler_emit_descriptor_declarations(struct spirv_compiler *compiler) -@@ -10564,23 +10637,16 @@ static void spirv_compiler_emit_descriptor_declarations(struct spirv_compiler *c - switch (descriptor->type) - { - case VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER: -- spirv_compiler_emit_sampler_declaration(compiler, &range, descriptor->register_id); -+ spirv_compiler_emit_sampler_declaration(compiler, &range, descriptor); - break; - - case VKD3D_SHADER_DESCRIPTOR_TYPE_CBV: -- spirv_compiler_emit_cbv_declaration(compiler, &range, descriptor->register_id, descriptor->buffer_size); -+ spirv_compiler_emit_cbv_declaration(compiler, &range, descriptor); - break; - - case VKD3D_SHADER_DESCRIPTOR_TYPE_SRV: -- spirv_compiler_emit_resource_declaration(compiler, &range, descriptor->register_id, -- descriptor->sample_count, false, descriptor->resource_type, descriptor->resource_data_type, -- descriptor->structure_stride / 4, descriptor->flags & VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_RAW_BUFFER); -- break; -- - case VKD3D_SHADER_DESCRIPTOR_TYPE_UAV: -- spirv_compiler_emit_resource_declaration(compiler, &range, descriptor->register_id, -- descriptor->sample_count, true, descriptor->resource_type, descriptor->resource_data_type, -- descriptor->structure_stride / 4, descriptor->flags & VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_RAW_BUFFER); -+ spirv_compiler_emit_resource_declaration(compiler, &range, descriptor); - break; - - default: -@@ -10600,10 +10666,12 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, struct - enum vkd3d_result result = VKD3D_OK; - unsigned int i, max_element_count; - -- if ((result = vsir_program_normalise(program, compiler->config_flags, -+ if ((result = vsir_program_transform(program, compiler->config_flags, - compile_info, compiler->message_context)) < 0) - return result; - -+ VKD3D_ASSERT(program->normalisation_level == VSIR_FULLY_NORMALISED_IO); -+ - max_element_count = max(program->output_signature.element_count, program->patch_constant_signature.element_count); - if (!(compiler->output_info = vkd3d_calloc(max_element_count, sizeof(*compiler->output_info)))) - return VKD3D_ERROR_OUT_OF_MEMORY; -@@ -10612,6 +10680,9 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, struct - spirv_compiler_emit_temps(compiler, program->temp_count); - if (program->ssa_count) - spirv_compiler_allocate_ssa_register_ids(compiler, program->ssa_count); -+ if (compiler->shader_type == VKD3D_SHADER_TYPE_COMPUTE) -+ spirv_compiler_emit_thread_group_size(compiler, &program->thread_group_size); -+ spirv_compiler_emit_global_flags(compiler, program->global_flags); - - spirv_compiler_emit_descriptor_declarations(compiler); - -@@ -10624,7 +10695,9 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, struct - { - uint32_t type_id, struct_id, ptr_type_id, var_id; - -- type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_UINT, 1); -+ type_id = vkd3d_spirv_get_type_id(builder, -+ vkd3d_component_type_from_data_type(parameter_data_type_map[parameter->data_type].type), -+ parameter_data_type_map[parameter->data_type].component_count); - - struct_id = vkd3d_spirv_build_op_type_struct(builder, &type_id, 1); - vkd3d_spirv_build_op_decorate(builder, struct_id, SpvDecorationBlock, NULL, 0); -@@ -10663,6 +10736,9 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, struct - compiler->input_control_point_count = program->input_control_point_count; - compiler->output_control_point_count = program->output_control_point_count; - -+ if (program->shader_version.type == VKD3D_SHADER_TYPE_HULL && !spirv_compiler_is_opengl_target(compiler)) -+ spirv_compiler_emit_tessellator_domain(compiler, program->tess_domain); -+ - if (compiler->shader_type != VKD3D_SHADER_TYPE_HULL) - spirv_compiler_emit_shader_signature_outputs(compiler); - -diff --git a/libs/vkd3d/libs/vkd3d-shader/tpf.c b/libs/vkd3d/libs/vkd3d-shader/tpf.c -index 84f641cc316..fcfe074e61e 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/tpf.c -+++ b/libs/vkd3d/libs/vkd3d-shader/tpf.c -@@ -23,6 +23,7 @@ - - #include "hlsl.h" - #include "vkd3d_shader_private.h" -+#include "d3dcommon.h" - - #define SM4_MAX_SRC_COUNT 6 - #define SM4_MAX_DST_COUNT 2 -@@ -616,6 +617,47 @@ enum vkd3d_sm4_shader_data_type - VKD3D_SM4_SHADER_DATA_MESSAGE = 0x4, - }; - -+enum vkd3d_sm4_stat_field -+{ -+ VKD3D_STAT_UNUSED = 0, -+ VKD3D_STAT_INSTR_COUNT, -+ VKD3D_STAT_MOV, -+ VKD3D_STAT_MOVC, -+ VKD3D_STAT_CONV, -+ VKD3D_STAT_FLOAT, -+ VKD3D_STAT_INT, -+ VKD3D_STAT_UINT, -+ VKD3D_STAT_EMIT, -+ VKD3D_STAT_CUT, -+ VKD3D_STAT_SAMPLE, -+ VKD3D_STAT_SAMPLE_C, -+ VKD3D_STAT_SAMPLE_GRAD, -+ VKD3D_STAT_SAMPLE_BIAS, -+ VKD3D_STAT_LOAD, -+ VKD3D_STAT_STORE, -+ VKD3D_STAT_DCL_VERTICES_OUT, -+ VKD3D_STAT_DCL_INPUT_PRIMITIVE, -+ VKD3D_STAT_DCL_OUTPUT_TOPOLOGY, -+ VKD3D_STAT_DCL_GS_INSTANCES, -+ VKD3D_STAT_BITWISE, -+ VKD3D_STAT_ATOMIC, -+ VKD3D_STAT_TESS_DOMAIN, -+ VKD3D_STAT_TESS_PARTITIONING, -+ VKD3D_STAT_TESS_OUTPUT_PRIMITIVE, -+ VKD3D_STAT_TESS_CONTROL_POINT_COUNT, -+ VKD3D_STAT_BARRIER, -+ VKD3D_STAT_LOD, -+ VKD3D_STAT_GATHER, -+ VKD3D_STAT_TEMPS, -+ VKD3D_STAT_COUNT, -+}; -+ -+struct vkd3d_sm4_stat_field_info -+{ -+ enum vkd3d_sm4_opcode opcode; -+ enum vkd3d_sm4_stat_field field; -+}; -+ - struct sm4_index_range - { - unsigned int index; -@@ -632,8 +674,10 @@ struct sm4_index_range_array - struct vkd3d_sm4_lookup_tables - { - const struct vkd3d_sm4_opcode_info *opcode_info_from_sm4[VKD3D_SM4_OP_COUNT]; -+ const struct vkd3d_sm4_opcode_info *opcode_info_from_vsir[VKD3DSIH_COUNT]; - const struct vkd3d_sm4_register_type_info *register_type_info_from_sm4[VKD3D_SM4_REGISTER_TYPE_COUNT]; - const struct vkd3d_sm4_register_type_info *register_type_info_from_vkd3d[VKD3DSPR_COUNT]; -+ const struct vkd3d_sm4_stat_field_info *stat_field_from_sm4[VKD3D_SM4_OP_COUNT]; - }; - - struct vkd3d_shader_sm4_parser -@@ -853,7 +897,7 @@ static void shader_sm4_read_dcl_resource(struct vkd3d_shader_instruction *ins, u - >> VKD3D_SM4_RESOURCE_SAMPLE_COUNT_SHIFT; - } - -- reg_data_type = opcode == VKD3D_SM4_OP_DCL_RESOURCE ? VKD3D_DATA_RESOURCE : VKD3D_DATA_UAV; -+ reg_data_type = VKD3D_DATA_UNUSED; - shader_sm4_read_dst_param(priv, &tokens, end, reg_data_type, &semantic->resource.reg); - shader_sm4_set_descriptor_register_range(priv, &semantic->resource.reg.reg, &semantic->resource.range); - -@@ -873,7 +917,7 @@ static void shader_sm4_read_dcl_resource(struct vkd3d_shader_instruction *ins, u - } - } - -- if (reg_data_type == VKD3D_DATA_UAV) -+ if (opcode != VKD3D_SM4_OP_DCL_RESOURCE) - ins->flags = (opcode_token & VKD3D_SM5_UAV_FLAGS_MASK) >> VKD3D_SM5_UAV_FLAGS_SHIFT; - - shader_sm4_read_register_space(priv, &tokens, end, &semantic->resource.range.space); -@@ -915,7 +959,7 @@ static void shader_sm4_read_dcl_sampler(struct vkd3d_shader_instruction *ins, ui - ins->flags = (opcode_token & VKD3D_SM4_SAMPLER_MODE_MASK) >> VKD3D_SM4_SAMPLER_MODE_SHIFT; - if (ins->flags & ~VKD3D_SM4_SAMPLER_COMPARISON) - FIXME("Unhandled sampler mode %#x.\n", ins->flags); -- shader_sm4_read_src_param(priv, &tokens, end, VKD3D_DATA_SAMPLER, &ins->declaration.sampler.src); -+ shader_sm4_read_src_param(priv, &tokens, end, VKD3D_DATA_UNUSED, &ins->declaration.sampler.src); - shader_sm4_set_descriptor_register_range(priv, &ins->declaration.sampler.src.reg, &ins->declaration.sampler.range); - shader_sm4_read_register_space(priv, &tokens, end, &ins->declaration.sampler.range.space); - } -@@ -1115,7 +1159,18 @@ static void shader_sm4_read_dcl_input_ps(struct vkd3d_shader_instruction *ins, u - struct signature_element *e = vsir_signature_find_element_for_reg( - &priv->p.program->input_signature, dst->reg.idx[dst->reg.idx_count - 1].offset, dst->write_mask); - -- e->interpolation_mode = ins->flags; -+ if (!e) -+ { -+ WARN("No matching signature element for input register %u with mask %#x.\n", -+ dst->reg.idx[dst->reg.idx_count - 1].offset, dst->write_mask); -+ vkd3d_shader_parser_error(&priv->p, VKD3D_SHADER_ERROR_TPF_INVALID_REGISTER_DCL, -+ "No matching signature element for input register %u with mask %#x.\n", -+ dst->reg.idx[dst->reg.idx_count - 1].offset, dst->write_mask); -+ } -+ else -+ { -+ e->interpolation_mode = ins->flags; -+ } - } - } - -@@ -1130,7 +1185,18 @@ static void shader_sm4_read_dcl_input_ps_siv(struct vkd3d_shader_instruction *in - struct signature_element *e = vsir_signature_find_element_for_reg( - &priv->p.program->input_signature, dst->reg.idx[dst->reg.idx_count - 1].offset, dst->write_mask); - -- e->interpolation_mode = ins->flags; -+ if (!e) -+ { -+ WARN("No matching signature element for input register %u with mask %#x.\n", -+ dst->reg.idx[dst->reg.idx_count - 1].offset, dst->write_mask); -+ vkd3d_shader_parser_error(&priv->p, VKD3D_SHADER_ERROR_TPF_INVALID_REGISTER_DCL, -+ "No matching signature element for input register %u with mask %#x.\n", -+ dst->reg.idx[dst->reg.idx_count - 1].offset, dst->write_mask); -+ } -+ else -+ { -+ e->interpolation_mode = ins->flags; -+ } - } - ins->declaration.register_semantic.sysval_semantic = *tokens; - } -@@ -1147,9 +1213,10 @@ static void shader_sm4_read_dcl_indexable_temp(struct vkd3d_shader_instruction * - } - - static void shader_sm4_read_dcl_global_flags(struct vkd3d_shader_instruction *ins, uint32_t opcode, -- uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -+ uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *sm4) - { - ins->declaration.global_flags = (opcode_token & VKD3D_SM4_GLOBAL_FLAGS_MASK) >> VKD3D_SM4_GLOBAL_FLAGS_SHIFT; -+ sm4->p.program->global_flags = ins->declaration.global_flags; - } - - static void shader_sm5_read_fcall(struct vkd3d_shader_instruction *ins, uint32_t opcode, uint32_t opcode_token, -@@ -1201,6 +1268,7 @@ static void shader_sm5_read_dcl_tessellator_domain(struct vkd3d_shader_instructi - { - ins->declaration.tessellator_domain = (opcode_token & VKD3D_SM5_TESSELLATOR_MASK) - >> VKD3D_SM5_TESSELLATOR_SHIFT; -+ priv->p.program->tess_domain = ins->declaration.tessellator_domain; - } - - static void shader_sm5_read_dcl_tessellator_partitioning(struct vkd3d_shader_instruction *ins, uint32_t opcode, -@@ -1224,11 +1292,14 @@ static void shader_sm5_read_dcl_hs_max_tessfactor(struct vkd3d_shader_instructio - } - - static void shader_sm5_read_dcl_thread_group(struct vkd3d_shader_instruction *ins, uint32_t opcode, -- uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -+ uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *sm4) - { -+ struct vsir_program *program = sm4->p.program; -+ - ins->declaration.thread_group_size.x = *tokens++; - ins->declaration.thread_group_size.y = *tokens++; - ins->declaration.thread_group_size.z = *tokens++; -+ program->thread_group_size = ins->declaration.thread_group_size; - } - - static void shader_sm5_read_dcl_uav_raw(struct vkd3d_shader_instruction *ins, uint32_t opcode, uint32_t opcode_token, -@@ -1237,7 +1308,7 @@ static void shader_sm5_read_dcl_uav_raw(struct vkd3d_shader_instruction *ins, ui - struct vkd3d_shader_raw_resource *resource = &ins->declaration.raw_resource; - const uint32_t *end = &tokens[token_count]; - -- shader_sm4_read_dst_param(priv, &tokens, end, VKD3D_DATA_UAV, &resource->resource.reg); -+ shader_sm4_read_dst_param(priv, &tokens, end, VKD3D_DATA_UNUSED, &resource->resource.reg); - shader_sm4_set_descriptor_register_range(priv, &resource->resource.reg.reg, &resource->resource.range); - ins->flags = (opcode_token & VKD3D_SM5_UAV_FLAGS_MASK) >> VKD3D_SM5_UAV_FLAGS_SHIFT; - shader_sm4_read_register_space(priv, &tokens, end, &resource->resource.range.space); -@@ -1249,7 +1320,7 @@ static void shader_sm5_read_dcl_uav_structured(struct vkd3d_shader_instruction * - struct vkd3d_shader_structured_resource *resource = &ins->declaration.structured_resource; - const uint32_t *end = &tokens[token_count]; - -- shader_sm4_read_dst_param(priv, &tokens, end, VKD3D_DATA_UAV, &resource->resource.reg); -+ shader_sm4_read_dst_param(priv, &tokens, end, VKD3D_DATA_UNUSED, &resource->resource.reg); - shader_sm4_set_descriptor_register_range(priv, &resource->resource.reg.reg, &resource->resource.range); - ins->flags = (opcode_token & VKD3D_SM5_UAV_FLAGS_MASK) >> VKD3D_SM5_UAV_FLAGS_SHIFT; - resource->byte_stride = *tokens++; -@@ -1286,7 +1357,7 @@ static void shader_sm5_read_dcl_resource_structured(struct vkd3d_shader_instruct - struct vkd3d_shader_structured_resource *resource = &ins->declaration.structured_resource; - const uint32_t *end = &tokens[token_count]; - -- shader_sm4_read_dst_param(priv, &tokens, end, VKD3D_DATA_RESOURCE, &resource->resource.reg); -+ shader_sm4_read_dst_param(priv, &tokens, end, VKD3D_DATA_UNUSED, &resource->resource.reg); - shader_sm4_set_descriptor_register_range(priv, &resource->resource.reg.reg, &resource->resource.range); - resource->byte_stride = *tokens++; - if (resource->byte_stride % 4) -@@ -1300,7 +1371,7 @@ static void shader_sm5_read_dcl_resource_raw(struct vkd3d_shader_instruction *in - struct vkd3d_shader_raw_resource *resource = &ins->declaration.raw_resource; - const uint32_t *end = &tokens[token_count]; - -- shader_sm4_read_dst_param(priv, &tokens, end, VKD3D_DATA_RESOURCE, &resource->resource.reg); -+ shader_sm4_read_dst_param(priv, &tokens, end, VKD3D_DATA_UNUSED, &resource->resource.reg); - shader_sm4_set_descriptor_register_range(priv, &resource->resource.reg.reg, &resource->resource.range); - shader_sm4_read_register_space(priv, &tokens, end, &resource->resource.range.space); - } -@@ -1330,11 +1401,23 @@ static const enum vkd3d_shader_register_precision register_precision_table[] = - /* VKD3D_SM4_REGISTER_PRECISION_MIN_UINT_16 */ VKD3D_SHADER_REGISTER_PRECISION_MIN_UINT_16, - }; - --struct tpf_writer -+struct sm4_stat -+{ -+ uint32_t fields[VKD3D_STAT_COUNT]; -+}; -+ -+struct tpf_compiler - { -+ /* OBJECTIVE: We want to get rid of this HLSL IR specific field. */ - struct hlsl_ctx *ctx; -- struct vkd3d_bytecode_buffer *buffer; -+ struct vsir_program *program; - struct vkd3d_sm4_lookup_tables lookup; -+ struct sm4_stat *stat; -+ -+ int result; -+ -+ struct vkd3d_bytecode_buffer *buffer; -+ struct dxbc_writer dxbc; - }; - - static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup) -@@ -1400,8 +1483,8 @@ static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup) - {VKD3D_SM4_OP_ISHR, VKD3DSIH_ISHR, "i", "ii"}, - {VKD3D_SM4_OP_ITOF, VKD3DSIH_ITOF, "f", "i"}, - {VKD3D_SM4_OP_LABEL, VKD3DSIH_LABEL, "", "O"}, -- {VKD3D_SM4_OP_LD, VKD3DSIH_LD, "u", "iR"}, -- {VKD3D_SM4_OP_LD2DMS, VKD3DSIH_LD2DMS, "u", "iRi"}, -+ {VKD3D_SM4_OP_LD, VKD3DSIH_LD, "u", "i*"}, -+ {VKD3D_SM4_OP_LD2DMS, VKD3DSIH_LD2DMS, "u", "i*i"}, - {VKD3D_SM4_OP_LOG, VKD3DSIH_LOG, "f", "f"}, - {VKD3D_SM4_OP_LOOP, VKD3DSIH_LOOP, "", ""}, - {VKD3D_SM4_OP_LT, VKD3DSIH_LTO, "u", "ff"}, -@@ -1417,7 +1500,7 @@ static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup) - {VKD3D_SM4_OP_NOP, VKD3DSIH_NOP, "", ""}, - {VKD3D_SM4_OP_NOT, VKD3DSIH_NOT, "u", "u"}, - {VKD3D_SM4_OP_OR, VKD3DSIH_OR, "u", "uu"}, -- {VKD3D_SM4_OP_RESINFO, VKD3DSIH_RESINFO, "f", "iR"}, -+ {VKD3D_SM4_OP_RESINFO, VKD3DSIH_RESINFO, "f", "i*"}, - {VKD3D_SM4_OP_RET, VKD3DSIH_RET, "", ""}, - {VKD3D_SM4_OP_RETC, VKD3DSIH_RETP, "", "u", - shader_sm4_read_conditional_op}, -@@ -1426,12 +1509,12 @@ static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup) - {VKD3D_SM4_OP_ROUND_PI, VKD3DSIH_ROUND_PI, "f", "f"}, - {VKD3D_SM4_OP_ROUND_Z, VKD3DSIH_ROUND_Z, "f", "f"}, - {VKD3D_SM4_OP_RSQ, VKD3DSIH_RSQ, "f", "f"}, -- {VKD3D_SM4_OP_SAMPLE, VKD3DSIH_SAMPLE, "u", "fRS"}, -- {VKD3D_SM4_OP_SAMPLE_C, VKD3DSIH_SAMPLE_C, "f", "fRSf"}, -- {VKD3D_SM4_OP_SAMPLE_C_LZ, VKD3DSIH_SAMPLE_C_LZ, "f", "fRSf"}, -- {VKD3D_SM4_OP_SAMPLE_LOD, VKD3DSIH_SAMPLE_LOD, "u", "fRSf"}, -- {VKD3D_SM4_OP_SAMPLE_GRAD, VKD3DSIH_SAMPLE_GRAD, "u", "fRSff"}, -- {VKD3D_SM4_OP_SAMPLE_B, VKD3DSIH_SAMPLE_B, "u", "fRSf"}, -+ {VKD3D_SM4_OP_SAMPLE, VKD3DSIH_SAMPLE, "u", "f**"}, -+ {VKD3D_SM4_OP_SAMPLE_C, VKD3DSIH_SAMPLE_C, "f", "f**f"}, -+ {VKD3D_SM4_OP_SAMPLE_C_LZ, VKD3DSIH_SAMPLE_C_LZ, "f", "f**f"}, -+ {VKD3D_SM4_OP_SAMPLE_LOD, VKD3DSIH_SAMPLE_LOD, "u", "f**f"}, -+ {VKD3D_SM4_OP_SAMPLE_GRAD, VKD3DSIH_SAMPLE_GRAD, "u", "f**ff"}, -+ {VKD3D_SM4_OP_SAMPLE_B, VKD3DSIH_SAMPLE_B, "u", "f**f"}, - {VKD3D_SM4_OP_SQRT, VKD3DSIH_SQRT, "f", "f"}, - {VKD3D_SM4_OP_SWITCH, VKD3DSIH_SWITCH, "", "i"}, - {VKD3D_SM4_OP_SINCOS, VKD3DSIH_SINCOS, "ff", "f"}, -@@ -1480,10 +1563,10 @@ static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup) - shader_sm4_read_dcl_indexable_temp}, - {VKD3D_SM4_OP_DCL_GLOBAL_FLAGS, VKD3DSIH_DCL_GLOBAL_FLAGS, "", "", - shader_sm4_read_dcl_global_flags}, -- {VKD3D_SM4_OP_LOD, VKD3DSIH_LOD, "f", "fRS"}, -- {VKD3D_SM4_OP_GATHER4, VKD3DSIH_GATHER4, "u", "fRS"}, -- {VKD3D_SM4_OP_SAMPLE_POS, VKD3DSIH_SAMPLE_POS, "f", "Ru"}, -- {VKD3D_SM4_OP_SAMPLE_INFO, VKD3DSIH_SAMPLE_INFO, "f", "R"}, -+ {VKD3D_SM4_OP_LOD, VKD3DSIH_LOD, "f", "f**"}, -+ {VKD3D_SM4_OP_GATHER4, VKD3DSIH_GATHER4, "u", "f**"}, -+ {VKD3D_SM4_OP_SAMPLE_POS, VKD3DSIH_SAMPLE_POS, "f", "*u"}, -+ {VKD3D_SM4_OP_SAMPLE_INFO, VKD3DSIH_SAMPLE_INFO, "f", "*"}, - {VKD3D_SM5_OP_HS_DECLS, VKD3DSIH_HS_DECLS, "", ""}, - {VKD3D_SM5_OP_HS_CONTROL_POINT_PHASE, VKD3DSIH_HS_CONTROL_POINT_PHASE, "", ""}, - {VKD3D_SM5_OP_HS_FORK_PHASE, VKD3DSIH_HS_FORK_PHASE, "", ""}, -@@ -1492,14 +1575,14 @@ static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup) - {VKD3D_SM5_OP_CUT_STREAM, VKD3DSIH_CUT_STREAM, "", "f"}, - {VKD3D_SM5_OP_FCALL, VKD3DSIH_FCALL, "", "O", - shader_sm5_read_fcall}, -- {VKD3D_SM5_OP_BUFINFO, VKD3DSIH_BUFINFO, "i", "U"}, -+ {VKD3D_SM5_OP_BUFINFO, VKD3DSIH_BUFINFO, "i", "*"}, - {VKD3D_SM5_OP_DERIV_RTX_COARSE, VKD3DSIH_DSX_COARSE, "f", "f"}, - {VKD3D_SM5_OP_DERIV_RTX_FINE, VKD3DSIH_DSX_FINE, "f", "f"}, - {VKD3D_SM5_OP_DERIV_RTY_COARSE, VKD3DSIH_DSY_COARSE, "f", "f"}, - {VKD3D_SM5_OP_DERIV_RTY_FINE, VKD3DSIH_DSY_FINE, "f", "f"}, -- {VKD3D_SM5_OP_GATHER4_C, VKD3DSIH_GATHER4_C, "f", "fRSf"}, -- {VKD3D_SM5_OP_GATHER4_PO, VKD3DSIH_GATHER4_PO, "f", "fiRS"}, -- {VKD3D_SM5_OP_GATHER4_PO_C, VKD3DSIH_GATHER4_PO_C, "f", "fiRSf"}, -+ {VKD3D_SM5_OP_GATHER4_C, VKD3DSIH_GATHER4_C, "f", "f**f"}, -+ {VKD3D_SM5_OP_GATHER4_PO, VKD3DSIH_GATHER4_PO, "f", "fi**"}, -+ {VKD3D_SM5_OP_GATHER4_PO_C, VKD3DSIH_GATHER4_PO_C, "f", "fi**f"}, - {VKD3D_SM5_OP_RCP, VKD3DSIH_RCP, "f", "f"}, - {VKD3D_SM5_OP_F32TOF16, VKD3DSIH_F32TOF16, "u", "f"}, - {VKD3D_SM5_OP_F16TOF32, VKD3DSIH_F16TOF32, "f", "u"}, -@@ -1551,33 +1634,33 @@ static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup) - shader_sm5_read_dcl_resource_raw}, - {VKD3D_SM5_OP_DCL_RESOURCE_STRUCTURED, VKD3DSIH_DCL_RESOURCE_STRUCTURED, "", "", - shader_sm5_read_dcl_resource_structured}, -- {VKD3D_SM5_OP_LD_UAV_TYPED, VKD3DSIH_LD_UAV_TYPED, "u", "iU"}, -- {VKD3D_SM5_OP_STORE_UAV_TYPED, VKD3DSIH_STORE_UAV_TYPED, "U", "iu"}, -- {VKD3D_SM5_OP_LD_RAW, VKD3DSIH_LD_RAW, "u", "iU"}, -- {VKD3D_SM5_OP_STORE_RAW, VKD3DSIH_STORE_RAW, "U", "uu"}, -- {VKD3D_SM5_OP_LD_STRUCTURED, VKD3DSIH_LD_STRUCTURED, "u", "iiR"}, -- {VKD3D_SM5_OP_STORE_STRUCTURED, VKD3DSIH_STORE_STRUCTURED, "U", "iiu"}, -- {VKD3D_SM5_OP_ATOMIC_AND, VKD3DSIH_ATOMIC_AND, "U", "iu"}, -- {VKD3D_SM5_OP_ATOMIC_OR, VKD3DSIH_ATOMIC_OR, "U", "iu"}, -- {VKD3D_SM5_OP_ATOMIC_XOR, VKD3DSIH_ATOMIC_XOR, "U", "iu"}, -- {VKD3D_SM5_OP_ATOMIC_CMP_STORE, VKD3DSIH_ATOMIC_CMP_STORE, "U", "iuu"}, -- {VKD3D_SM5_OP_ATOMIC_IADD, VKD3DSIH_ATOMIC_IADD, "U", "ii"}, -- {VKD3D_SM5_OP_ATOMIC_IMAX, VKD3DSIH_ATOMIC_IMAX, "U", "ii"}, -- {VKD3D_SM5_OP_ATOMIC_IMIN, VKD3DSIH_ATOMIC_IMIN, "U", "ii"}, -- {VKD3D_SM5_OP_ATOMIC_UMAX, VKD3DSIH_ATOMIC_UMAX, "U", "iu"}, -- {VKD3D_SM5_OP_ATOMIC_UMIN, VKD3DSIH_ATOMIC_UMIN, "U", "iu"}, -- {VKD3D_SM5_OP_IMM_ATOMIC_ALLOC, VKD3DSIH_IMM_ATOMIC_ALLOC, "u", "U"}, -- {VKD3D_SM5_OP_IMM_ATOMIC_CONSUME, VKD3DSIH_IMM_ATOMIC_CONSUME, "u", "U"}, -- {VKD3D_SM5_OP_IMM_ATOMIC_IADD, VKD3DSIH_IMM_ATOMIC_IADD, "uU", "ii"}, -- {VKD3D_SM5_OP_IMM_ATOMIC_AND, VKD3DSIH_IMM_ATOMIC_AND, "uU", "iu"}, -- {VKD3D_SM5_OP_IMM_ATOMIC_OR, VKD3DSIH_IMM_ATOMIC_OR, "uU", "iu"}, -- {VKD3D_SM5_OP_IMM_ATOMIC_XOR, VKD3DSIH_IMM_ATOMIC_XOR, "uU", "iu"}, -- {VKD3D_SM5_OP_IMM_ATOMIC_EXCH, VKD3DSIH_IMM_ATOMIC_EXCH, "uU", "iu"}, -- {VKD3D_SM5_OP_IMM_ATOMIC_CMP_EXCH, VKD3DSIH_IMM_ATOMIC_CMP_EXCH, "uU", "iuu"}, -- {VKD3D_SM5_OP_IMM_ATOMIC_IMAX, VKD3DSIH_IMM_ATOMIC_IMAX, "iU", "ii"}, -- {VKD3D_SM5_OP_IMM_ATOMIC_IMIN, VKD3DSIH_IMM_ATOMIC_IMIN, "iU", "ii"}, -- {VKD3D_SM5_OP_IMM_ATOMIC_UMAX, VKD3DSIH_IMM_ATOMIC_UMAX, "uU", "iu"}, -- {VKD3D_SM5_OP_IMM_ATOMIC_UMIN, VKD3DSIH_IMM_ATOMIC_UMIN, "uU", "iu"}, -+ {VKD3D_SM5_OP_LD_UAV_TYPED, VKD3DSIH_LD_UAV_TYPED, "u", "i*"}, -+ {VKD3D_SM5_OP_STORE_UAV_TYPED, VKD3DSIH_STORE_UAV_TYPED, "*", "iu"}, -+ {VKD3D_SM5_OP_LD_RAW, VKD3DSIH_LD_RAW, "u", "i*"}, -+ {VKD3D_SM5_OP_STORE_RAW, VKD3DSIH_STORE_RAW, "*", "uu"}, -+ {VKD3D_SM5_OP_LD_STRUCTURED, VKD3DSIH_LD_STRUCTURED, "u", "ii*"}, -+ {VKD3D_SM5_OP_STORE_STRUCTURED, VKD3DSIH_STORE_STRUCTURED, "*", "iiu"}, -+ {VKD3D_SM5_OP_ATOMIC_AND, VKD3DSIH_ATOMIC_AND, "*", "iu"}, -+ {VKD3D_SM5_OP_ATOMIC_OR, VKD3DSIH_ATOMIC_OR, "*", "iu"}, -+ {VKD3D_SM5_OP_ATOMIC_XOR, VKD3DSIH_ATOMIC_XOR, "*", "iu"}, -+ {VKD3D_SM5_OP_ATOMIC_CMP_STORE, VKD3DSIH_ATOMIC_CMP_STORE, "*", "iuu"}, -+ {VKD3D_SM5_OP_ATOMIC_IADD, VKD3DSIH_ATOMIC_IADD, "*", "ii"}, -+ {VKD3D_SM5_OP_ATOMIC_IMAX, VKD3DSIH_ATOMIC_IMAX, "*", "ii"}, -+ {VKD3D_SM5_OP_ATOMIC_IMIN, VKD3DSIH_ATOMIC_IMIN, "*", "ii"}, -+ {VKD3D_SM5_OP_ATOMIC_UMAX, VKD3DSIH_ATOMIC_UMAX, "*", "iu"}, -+ {VKD3D_SM5_OP_ATOMIC_UMIN, VKD3DSIH_ATOMIC_UMIN, "*", "iu"}, -+ {VKD3D_SM5_OP_IMM_ATOMIC_ALLOC, VKD3DSIH_IMM_ATOMIC_ALLOC, "u", "*"}, -+ {VKD3D_SM5_OP_IMM_ATOMIC_CONSUME, VKD3DSIH_IMM_ATOMIC_CONSUME, "u", "*"}, -+ {VKD3D_SM5_OP_IMM_ATOMIC_IADD, VKD3DSIH_IMM_ATOMIC_IADD, "u*", "ii"}, -+ {VKD3D_SM5_OP_IMM_ATOMIC_AND, VKD3DSIH_IMM_ATOMIC_AND, "u*", "iu"}, -+ {VKD3D_SM5_OP_IMM_ATOMIC_OR, VKD3DSIH_IMM_ATOMIC_OR, "u*", "iu"}, -+ {VKD3D_SM5_OP_IMM_ATOMIC_XOR, VKD3DSIH_IMM_ATOMIC_XOR, "u*", "iu"}, -+ {VKD3D_SM5_OP_IMM_ATOMIC_EXCH, VKD3DSIH_IMM_ATOMIC_EXCH, "u*", "iu"}, -+ {VKD3D_SM5_OP_IMM_ATOMIC_CMP_EXCH, VKD3DSIH_IMM_ATOMIC_CMP_EXCH, "u*", "iuu"}, -+ {VKD3D_SM5_OP_IMM_ATOMIC_IMAX, VKD3DSIH_IMM_ATOMIC_IMAX, "i*", "ii"}, -+ {VKD3D_SM5_OP_IMM_ATOMIC_IMIN, VKD3DSIH_IMM_ATOMIC_IMIN, "i*", "ii"}, -+ {VKD3D_SM5_OP_IMM_ATOMIC_UMAX, VKD3DSIH_IMM_ATOMIC_UMAX, "u*", "iu"}, -+ {VKD3D_SM5_OP_IMM_ATOMIC_UMIN, VKD3DSIH_IMM_ATOMIC_UMIN, "u*", "iu"}, - {VKD3D_SM5_OP_SYNC, VKD3DSIH_SYNC, "", "", - shader_sm5_read_sync}, - {VKD3D_SM5_OP_DADD, VKD3DSIH_DADD, "d", "dd"}, -@@ -1604,21 +1687,21 @@ static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup) - {VKD3D_SM5_OP_DTOU, VKD3DSIH_DTOU, "u", "d"}, - {VKD3D_SM5_OP_ITOD, VKD3DSIH_ITOD, "d", "i"}, - {VKD3D_SM5_OP_UTOD, VKD3DSIH_UTOD, "d", "u"}, -- {VKD3D_SM5_OP_GATHER4_S, VKD3DSIH_GATHER4_S, "uu", "fRS"}, -- {VKD3D_SM5_OP_GATHER4_C_S, VKD3DSIH_GATHER4_C_S, "fu", "fRSf"}, -- {VKD3D_SM5_OP_GATHER4_PO_S, VKD3DSIH_GATHER4_PO_S, "fu", "fiRS"}, -- {VKD3D_SM5_OP_GATHER4_PO_C_S, VKD3DSIH_GATHER4_PO_C_S, "fu", "fiRSf"}, -- {VKD3D_SM5_OP_LD_S, VKD3DSIH_LD_S, "uu", "iR"}, -- {VKD3D_SM5_OP_LD2DMS_S, VKD3DSIH_LD2DMS_S, "uu", "iRi"}, -+ {VKD3D_SM5_OP_GATHER4_S, VKD3DSIH_GATHER4_S, "uu", "f**"}, -+ {VKD3D_SM5_OP_GATHER4_C_S, VKD3DSIH_GATHER4_C_S, "fu", "f**f"}, -+ {VKD3D_SM5_OP_GATHER4_PO_S, VKD3DSIH_GATHER4_PO_S, "fu", "fi**"}, -+ {VKD3D_SM5_OP_GATHER4_PO_C_S, VKD3DSIH_GATHER4_PO_C_S, "fu", "fi**f"}, -+ {VKD3D_SM5_OP_LD_S, VKD3DSIH_LD_S, "uu", "i*"}, -+ {VKD3D_SM5_OP_LD2DMS_S, VKD3DSIH_LD2DMS_S, "uu", "i*i"}, - {VKD3D_SM5_OP_LD_UAV_TYPED_S, VKD3DSIH_LD_UAV_TYPED_S, "uu", "iU"}, - {VKD3D_SM5_OP_LD_RAW_S, VKD3DSIH_LD_RAW_S, "uu", "iU"}, -- {VKD3D_SM5_OP_LD_STRUCTURED_S, VKD3DSIH_LD_STRUCTURED_S, "uu", "iiR"}, -- {VKD3D_SM5_OP_SAMPLE_LOD_S, VKD3DSIH_SAMPLE_LOD_S, "uu", "fRSf"}, -- {VKD3D_SM5_OP_SAMPLE_C_LZ_S, VKD3DSIH_SAMPLE_C_LZ_S, "fu", "fRSf"}, -- {VKD3D_SM5_OP_SAMPLE_CL_S, VKD3DSIH_SAMPLE_CL_S, "uu", "fRSf"}, -- {VKD3D_SM5_OP_SAMPLE_B_CL_S, VKD3DSIH_SAMPLE_B_CL_S, "uu", "fRSff"}, -- {VKD3D_SM5_OP_SAMPLE_GRAD_CL_S, VKD3DSIH_SAMPLE_GRAD_CL_S, "uu", "fRSfff"}, -- {VKD3D_SM5_OP_SAMPLE_C_CL_S, VKD3DSIH_SAMPLE_C_CL_S, "fu", "fRSff"}, -+ {VKD3D_SM5_OP_LD_STRUCTURED_S, VKD3DSIH_LD_STRUCTURED_S, "uu", "ii*"}, -+ {VKD3D_SM5_OP_SAMPLE_LOD_S, VKD3DSIH_SAMPLE_LOD_S, "uu", "f**f"}, -+ {VKD3D_SM5_OP_SAMPLE_C_LZ_S, VKD3DSIH_SAMPLE_C_LZ_S, "fu", "f**f"}, -+ {VKD3D_SM5_OP_SAMPLE_CL_S, VKD3DSIH_SAMPLE_CL_S, "uu", "f**f"}, -+ {VKD3D_SM5_OP_SAMPLE_B_CL_S, VKD3DSIH_SAMPLE_B_CL_S, "uu", "f**ff"}, -+ {VKD3D_SM5_OP_SAMPLE_GRAD_CL_S, VKD3DSIH_SAMPLE_GRAD_CL_S, "uu", "f**fff"}, -+ {VKD3D_SM5_OP_SAMPLE_C_CL_S, VKD3DSIH_SAMPLE_C_CL_S, "fu", "f**ff"}, - {VKD3D_SM5_OP_CHECK_ACCESS_FULLY_MAPPED, VKD3DSIH_CHECK_ACCESS_FULLY_MAPPED, "u", "u"}, - }; - -@@ -1637,7 +1720,7 @@ static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup) - {VKD3D_SM4_RT_PRIMID, VKD3DSPR_PRIMID, VKD3D_SM4_SWIZZLE_NONE}, - {VKD3D_SM4_RT_DEPTHOUT, VKD3DSPR_DEPTHOUT, VKD3D_SM4_SWIZZLE_VEC4}, - {VKD3D_SM4_RT_NULL, VKD3DSPR_NULL, VKD3D_SM4_SWIZZLE_INVALID}, -- {VKD3D_SM4_RT_RASTERIZER, VKD3DSPR_RASTERIZER, VKD3D_SM4_SWIZZLE_VEC4}, -+ {VKD3D_SM4_RT_RASTERIZER, VKD3DSPR_RASTERIZER, VKD3D_SM4_SWIZZLE_SCALAR}, - {VKD3D_SM4_RT_OMASK, VKD3DSPR_SAMPLEMASK, VKD3D_SM4_SWIZZLE_VEC4}, - {VKD3D_SM5_RT_STREAM, VKD3DSPR_STREAM, VKD3D_SM4_SWIZZLE_VEC4}, - {VKD3D_SM5_RT_FUNCTION_BODY, VKD3DSPR_FUNCTIONBODY, VKD3D_SM4_SWIZZLE_VEC4}, -@@ -1662,6 +1745,161 @@ static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup) - {VKD3D_SM5_RT_OUTPUT_STENCIL_REF, VKD3DSPR_OUTSTENCILREF, VKD3D_SM4_SWIZZLE_VEC4}, - }; - -+ static const struct vkd3d_sm4_stat_field_info stat_field_table[] = -+ { -+ {VKD3D_SM4_OP_MOV, VKD3D_STAT_MOV}, -+ {VKD3D_SM4_OP_MOVC, VKD3D_STAT_MOVC}, -+ {VKD3D_SM5_OP_DMOV, VKD3D_STAT_MOV}, -+ {VKD3D_SM5_OP_DMOVC, VKD3D_STAT_MOVC}, -+ -+ {VKD3D_SM4_OP_ITOF, VKD3D_STAT_CONV}, -+ {VKD3D_SM4_OP_FTOI, VKD3D_STAT_CONV}, -+ {VKD3D_SM4_OP_FTOU, VKD3D_STAT_CONV}, -+ {VKD3D_SM4_OP_UTOF, VKD3D_STAT_CONV}, -+ {VKD3D_SM5_OP_DTOU, VKD3D_STAT_CONV}, -+ {VKD3D_SM5_OP_UTOD, VKD3D_STAT_CONV}, -+ {VKD3D_SM5_OP_DTOF, VKD3D_STAT_CONV}, -+ {VKD3D_SM5_OP_FTOD, VKD3D_STAT_CONV}, -+ {VKD3D_SM5_OP_DTOI, VKD3D_STAT_CONV}, -+ {VKD3D_SM5_OP_ITOD, VKD3D_STAT_CONV}, -+ {VKD3D_SM5_OP_F32TOF16, VKD3D_STAT_CONV}, -+ {VKD3D_SM5_OP_F16TOF32, VKD3D_STAT_CONV}, -+ -+ {VKD3D_SM4_OP_ADD, VKD3D_STAT_FLOAT}, -+ {VKD3D_SM4_OP_DIV, VKD3D_STAT_FLOAT}, -+ {VKD3D_SM4_OP_DP2, VKD3D_STAT_FLOAT}, -+ {VKD3D_SM4_OP_DP3, VKD3D_STAT_FLOAT}, -+ {VKD3D_SM4_OP_DP4, VKD3D_STAT_FLOAT}, -+ {VKD3D_SM4_OP_EQ, VKD3D_STAT_FLOAT}, -+ {VKD3D_SM4_OP_EXP, VKD3D_STAT_FLOAT}, -+ {VKD3D_SM4_OP_FRC, VKD3D_STAT_FLOAT}, -+ {VKD3D_SM4_OP_GE, VKD3D_STAT_FLOAT}, -+ {VKD3D_SM4_OP_LT, VKD3D_STAT_FLOAT}, -+ {VKD3D_SM4_OP_MAD, VKD3D_STAT_FLOAT}, -+ {VKD3D_SM4_OP_MIN, VKD3D_STAT_FLOAT}, -+ {VKD3D_SM4_OP_MAX, VKD3D_STAT_FLOAT}, -+ {VKD3D_SM4_OP_MUL, VKD3D_STAT_FLOAT}, -+ {VKD3D_SM4_OP_NE, VKD3D_STAT_FLOAT}, -+ {VKD3D_SM4_OP_ROUND_NE, VKD3D_STAT_FLOAT}, -+ {VKD3D_SM4_OP_ROUND_NI, VKD3D_STAT_FLOAT}, -+ {VKD3D_SM4_OP_ROUND_PI, VKD3D_STAT_FLOAT}, -+ {VKD3D_SM4_OP_ROUND_Z, VKD3D_STAT_FLOAT}, -+ {VKD3D_SM4_OP_RSQ, VKD3D_STAT_FLOAT}, -+ {VKD3D_SM4_OP_SQRT, VKD3D_STAT_FLOAT}, -+ {VKD3D_SM4_OP_SINCOS, VKD3D_STAT_FLOAT}, -+ {VKD3D_SM5_OP_RCP, VKD3D_STAT_FLOAT}, -+ {VKD3D_SM5_OP_DADD, VKD3D_STAT_FLOAT}, -+ {VKD3D_SM5_OP_DMAX, VKD3D_STAT_FLOAT}, -+ {VKD3D_SM5_OP_DMIN, VKD3D_STAT_FLOAT}, -+ {VKD3D_SM5_OP_DMUL, VKD3D_STAT_FLOAT}, -+ {VKD3D_SM5_OP_DEQ, VKD3D_STAT_FLOAT}, -+ {VKD3D_SM5_OP_DGE, VKD3D_STAT_FLOAT}, -+ {VKD3D_SM5_OP_DLT, VKD3D_STAT_FLOAT}, -+ {VKD3D_SM5_OP_DNE, VKD3D_STAT_FLOAT}, -+ {VKD3D_SM5_OP_DDIV, VKD3D_STAT_FLOAT}, -+ {VKD3D_SM5_OP_DFMA, VKD3D_STAT_FLOAT}, -+ {VKD3D_SM5_OP_DRCP, VKD3D_STAT_FLOAT}, -+ -+ {VKD3D_SM4_OP_IADD, VKD3D_STAT_INT}, -+ {VKD3D_SM4_OP_IEQ, VKD3D_STAT_INT}, -+ {VKD3D_SM4_OP_IGE, VKD3D_STAT_INT}, -+ {VKD3D_SM4_OP_ILT, VKD3D_STAT_INT}, -+ {VKD3D_SM4_OP_IMAD, VKD3D_STAT_INT}, -+ {VKD3D_SM4_OP_IMAX, VKD3D_STAT_INT}, -+ {VKD3D_SM4_OP_IMIN, VKD3D_STAT_INT}, -+ {VKD3D_SM4_OP_IMUL, VKD3D_STAT_INT}, -+ {VKD3D_SM4_OP_INE, VKD3D_STAT_INT}, -+ {VKD3D_SM4_OP_INEG, VKD3D_STAT_INT}, -+ {VKD3D_SM4_OP_ISHL, VKD3D_STAT_INT}, -+ {VKD3D_SM4_OP_ISHR, VKD3D_STAT_INT}, -+ {VKD3D_SM4_OP_ITOF, VKD3D_STAT_INT}, -+ -+ {VKD3D_SM4_OP_UDIV, VKD3D_STAT_UINT}, -+ {VKD3D_SM4_OP_ULT, VKD3D_STAT_UINT}, -+ {VKD3D_SM4_OP_UGE, VKD3D_STAT_UINT}, -+ {VKD3D_SM4_OP_UMUL, VKD3D_STAT_UINT}, -+ {VKD3D_SM4_OP_UMAX, VKD3D_STAT_UINT}, -+ {VKD3D_SM4_OP_UMIN, VKD3D_STAT_UINT}, -+ {VKD3D_SM4_OP_USHR, VKD3D_STAT_UINT}, -+ -+ {VKD3D_SM4_OP_EMIT, VKD3D_STAT_EMIT}, -+ {VKD3D_SM4_OP_CUT, VKD3D_STAT_CUT}, -+ {VKD3D_SM5_OP_EMIT_STREAM, VKD3D_STAT_EMIT}, -+ {VKD3D_SM5_OP_CUT_STREAM, VKD3D_STAT_CUT}, -+ -+ {VKD3D_SM4_OP_SAMPLE, VKD3D_STAT_SAMPLE}, -+ {VKD3D_SM4_OP_SAMPLE_LOD, VKD3D_STAT_SAMPLE}, -+ {VKD3D_SM5_OP_SAMPLE_LOD_S, VKD3D_STAT_SAMPLE}, -+ {VKD3D_SM5_OP_SAMPLE_CL_S, VKD3D_STAT_SAMPLE}, -+ {VKD3D_SM4_OP_SAMPLE_C, VKD3D_STAT_SAMPLE_C}, -+ {VKD3D_SM4_OP_SAMPLE_C_LZ, VKD3D_STAT_SAMPLE_C}, -+ {VKD3D_SM5_OP_SAMPLE_C_LZ_S, VKD3D_STAT_SAMPLE_C}, -+ {VKD3D_SM5_OP_SAMPLE_C_CL_S, VKD3D_STAT_SAMPLE_C}, -+ {VKD3D_SM4_OP_SAMPLE_GRAD, VKD3D_STAT_SAMPLE_GRAD}, -+ {VKD3D_SM5_OP_SAMPLE_GRAD_CL_S, VKD3D_STAT_SAMPLE_GRAD}, -+ {VKD3D_SM4_OP_SAMPLE_B, VKD3D_STAT_SAMPLE_BIAS}, -+ {VKD3D_SM4_OP_GATHER4, VKD3D_STAT_GATHER}, -+ {VKD3D_SM5_OP_GATHER4_PO, VKD3D_STAT_GATHER}, -+ {VKD3D_SM4_OP_LOD, VKD3D_STAT_LOD}, -+ -+ {VKD3D_SM4_OP_LD, VKD3D_STAT_LOAD}, -+ {VKD3D_SM4_OP_LD2DMS, VKD3D_STAT_LOAD}, -+ {VKD3D_SM5_OP_LD_UAV_TYPED, VKD3D_STAT_LOAD}, -+ {VKD3D_SM5_OP_LD_RAW, VKD3D_STAT_LOAD}, -+ {VKD3D_SM5_OP_LD_STRUCTURED, VKD3D_STAT_LOAD}, -+ {VKD3D_SM5_OP_LD_S, VKD3D_STAT_LOAD}, -+ {VKD3D_SM5_OP_LD2DMS_S, VKD3D_STAT_LOAD}, -+ {VKD3D_SM5_OP_LD_UAV_TYPED_S, VKD3D_STAT_LOAD}, -+ {VKD3D_SM5_OP_LD_RAW_S, VKD3D_STAT_LOAD}, -+ {VKD3D_SM5_OP_LD_STRUCTURED_S, VKD3D_STAT_LOAD}, -+ -+ {VKD3D_SM5_OP_STORE_UAV_TYPED, VKD3D_STAT_STORE}, -+ {VKD3D_SM5_OP_STORE_RAW, VKD3D_STAT_STORE}, -+ {VKD3D_SM5_OP_STORE_STRUCTURED,VKD3D_STAT_STORE}, -+ -+ {VKD3D_SM4_OP_DCL_VERTICES_OUT, VKD3D_STAT_DCL_VERTICES_OUT}, -+ {VKD3D_SM4_OP_DCL_INPUT_PRIMITIVE, VKD3D_STAT_DCL_INPUT_PRIMITIVE}, -+ {VKD3D_SM4_OP_DCL_OUTPUT_TOPOLOGY, VKD3D_STAT_DCL_OUTPUT_TOPOLOGY}, -+ {VKD3D_SM5_OP_DCL_GS_INSTANCES, VKD3D_STAT_DCL_GS_INSTANCES}, -+ -+ {VKD3D_SM4_OP_AND, VKD3D_STAT_BITWISE}, -+ {VKD3D_SM4_OP_NOT, VKD3D_STAT_BITWISE}, -+ {VKD3D_SM4_OP_OR, VKD3D_STAT_BITWISE}, -+ {VKD3D_SM4_OP_XOR, VKD3D_STAT_BITWISE}, -+ -+ {VKD3D_SM5_OP_ATOMIC_AND, VKD3D_STAT_ATOMIC}, -+ {VKD3D_SM5_OP_ATOMIC_OR, VKD3D_STAT_ATOMIC}, -+ {VKD3D_SM5_OP_ATOMIC_XOR, VKD3D_STAT_ATOMIC}, -+ {VKD3D_SM5_OP_ATOMIC_CMP_STORE, VKD3D_STAT_ATOMIC}, -+ {VKD3D_SM5_OP_ATOMIC_IADD, VKD3D_STAT_ATOMIC}, -+ {VKD3D_SM5_OP_ATOMIC_IMAX, VKD3D_STAT_ATOMIC}, -+ {VKD3D_SM5_OP_ATOMIC_IMIN, VKD3D_STAT_ATOMIC}, -+ {VKD3D_SM5_OP_ATOMIC_UMAX, VKD3D_STAT_ATOMIC}, -+ {VKD3D_SM5_OP_ATOMIC_UMIN, VKD3D_STAT_ATOMIC}, -+ {VKD3D_SM5_OP_IMM_ATOMIC_ALLOC, VKD3D_STAT_ATOMIC}, -+ {VKD3D_SM5_OP_IMM_ATOMIC_CONSUME, VKD3D_STAT_ATOMIC}, -+ {VKD3D_SM5_OP_IMM_ATOMIC_IADD, VKD3D_STAT_ATOMIC}, -+ {VKD3D_SM5_OP_IMM_ATOMIC_AND, VKD3D_STAT_ATOMIC}, -+ {VKD3D_SM5_OP_IMM_ATOMIC_OR, VKD3D_STAT_ATOMIC}, -+ {VKD3D_SM5_OP_IMM_ATOMIC_XOR, VKD3D_STAT_ATOMIC}, -+ {VKD3D_SM5_OP_IMM_ATOMIC_EXCH, VKD3D_STAT_ATOMIC}, -+ {VKD3D_SM5_OP_IMM_ATOMIC_CMP_EXCH, VKD3D_STAT_ATOMIC}, -+ {VKD3D_SM5_OP_IMM_ATOMIC_IMAX, VKD3D_STAT_ATOMIC}, -+ {VKD3D_SM5_OP_IMM_ATOMIC_IMIN, VKD3D_STAT_ATOMIC}, -+ {VKD3D_SM5_OP_IMM_ATOMIC_UMAX, VKD3D_STAT_ATOMIC}, -+ {VKD3D_SM5_OP_IMM_ATOMIC_UMIN, VKD3D_STAT_ATOMIC}, -+ -+ {VKD3D_SM5_OP_DCL_TESSELLATOR_DOMAIN, VKD3D_STAT_TESS_DOMAIN}, -+ {VKD3D_SM5_OP_DCL_TESSELLATOR_PARTITIONING, VKD3D_STAT_TESS_PARTITIONING}, -+ {VKD3D_SM5_OP_DCL_TESSELLATOR_OUTPUT_PRIMITIVE, VKD3D_STAT_TESS_OUTPUT_PRIMITIVE}, -+ {VKD3D_SM5_OP_DCL_INPUT_CONTROL_POINT_COUNT, VKD3D_STAT_TESS_CONTROL_POINT_COUNT}, -+ {VKD3D_SM5_OP_DCL_OUTPUT_CONTROL_POINT_COUNT, VKD3D_STAT_TESS_CONTROL_POINT_COUNT}, -+ -+ {VKD3D_SM5_OP_SYNC, VKD3D_STAT_BARRIER}, -+ -+ {VKD3D_SM4_OP_DCL_TEMPS, VKD3D_STAT_TEMPS}, -+ }; -+ - memset(lookup, 0, sizeof(*lookup)); - - for (i = 0; i < ARRAY_SIZE(opcode_table); ++i) -@@ -1669,6 +1907,7 @@ static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup) - const struct vkd3d_sm4_opcode_info *info = &opcode_table[i]; - - lookup->opcode_info_from_sm4[info->opcode] = info; -+ lookup->opcode_info_from_vsir[info->handler_idx] = info; - } - - for (i = 0; i < ARRAY_SIZE(register_type_table); ++i) -@@ -1678,13 +1917,13 @@ static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup) - lookup->register_type_info_from_sm4[info->sm4_type] = info; - lookup->register_type_info_from_vkd3d[info->vkd3d_type] = info; - } --} - --static void tpf_writer_init(struct tpf_writer *tpf, struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer) --{ -- tpf->ctx = ctx; -- tpf->buffer = buffer; -- init_sm4_lookup_tables(&tpf->lookup); -+ for (i = 0; i < ARRAY_SIZE(stat_field_table); ++i) -+ { -+ const struct vkd3d_sm4_stat_field_info *info = &stat_field_table[i]; -+ -+ lookup->stat_field_from_sm4[info->opcode] = info; -+ } - } - - static const struct vkd3d_sm4_opcode_info *get_info_from_sm4_opcode( -@@ -1695,6 +1934,24 @@ static const struct vkd3d_sm4_opcode_info *get_info_from_sm4_opcode( - return lookup->opcode_info_from_sm4[sm4_opcode]; - } - -+static const struct vkd3d_sm4_opcode_info *get_info_from_vsir_opcode( -+ const struct vkd3d_sm4_lookup_tables *lookup, enum vkd3d_shader_opcode vsir_opcode) -+{ -+ if (vsir_opcode >= VKD3DSIH_COUNT) -+ return NULL; -+ return lookup->opcode_info_from_vsir[vsir_opcode]; -+} -+ -+static unsigned int opcode_info_get_dst_count(const struct vkd3d_sm4_opcode_info *info) -+{ -+ return strnlen(info->dst_info, SM4_MAX_DST_COUNT); -+} -+ -+static unsigned int opcode_info_get_src_count(const struct vkd3d_sm4_opcode_info *info) -+{ -+ return strnlen(info->src_info, SM4_MAX_SRC_COUNT); -+} -+ - static const struct vkd3d_sm4_register_type_info *get_info_from_sm4_register_type( - const struct vkd3d_sm4_lookup_tables *lookup, enum vkd3d_sm4_register_type sm4_type) - { -@@ -1721,6 +1978,16 @@ static enum vkd3d_sm4_swizzle_type vkd3d_sm4_get_default_swizzle_type( - return register_type_info->default_src_swizzle_type; - } - -+static enum vkd3d_sm4_stat_field get_stat_field_from_sm4_opcode( -+ const struct vkd3d_sm4_lookup_tables *lookup, enum vkd3d_sm4_opcode sm4_opcode) -+{ -+ const struct vkd3d_sm4_stat_field_info *field_info; -+ -+ if (sm4_opcode >= VKD3D_SM4_OP_COUNT || !(field_info = lookup->stat_field_from_sm4[sm4_opcode])) -+ return VKD3D_STAT_UNUSED; -+ return field_info->field; -+} -+ - static enum vkd3d_data_type map_data_type(char t) - { - switch (t) -@@ -1735,12 +2002,8 @@ static enum vkd3d_data_type map_data_type(char t) - return VKD3D_DATA_UINT; - case 'O': - return VKD3D_DATA_OPAQUE; -- case 'R': -- return VKD3D_DATA_RESOURCE; -- case 'S': -- return VKD3D_DATA_SAMPLER; -- case 'U': -- return VKD3D_DATA_UAV; -+ case '*': -+ return VKD3D_DATA_UNUSED; - default: - ERR("Invalid data type '%c'.\n", t); - return VKD3D_DATA_FLOAT; -@@ -1973,7 +2236,7 @@ static bool shader_sm4_read_param(struct vkd3d_shader_sm4_parser *priv, const ui - return true; - } - --static bool shader_sm4_is_scalar_register(const struct vkd3d_shader_register *reg) -+bool shader_sm4_is_scalar_register(const struct vkd3d_shader_register *reg) - { - switch (reg->type) - { -@@ -2411,8 +2674,8 @@ static void shader_sm4_read_instruction(struct vkd3d_shader_sm4_parser *sm4, str - ins->raw = false; - ins->structured = false; - ins->predicate = NULL; -- ins->dst_count = strnlen(opcode_info->dst_info, SM4_MAX_DST_COUNT); -- ins->src_count = strnlen(opcode_info->src_info, SM4_MAX_SRC_COUNT); -+ ins->dst_count = opcode_info_get_dst_count(opcode_info); -+ ins->src_count = opcode_info_get_src_count(opcode_info); - ins->src = src_params = vsir_program_get_src_params(program, ins->src_count); - if (!src_params && ins->src_count) - { -@@ -2553,7 +2816,8 @@ static bool shader_sm4_init(struct vkd3d_shader_sm4_parser *sm4, struct vsir_pro - version.minor = VKD3D_SM4_VERSION_MINOR(version_token); - - /* Estimate instruction count to avoid reallocation in most shaders. */ -- if (!vsir_program_init(program, compile_info, &version, token_count / 7u + 20)) -+ if (!vsir_program_init(program, compile_info, -+ &version, token_count / 7u + 20, VSIR_CF_STRUCTURED, VSIR_NOT_NORMALISED)) - return false; - vkd3d_shader_parser_init(&sm4->p, program, message_context, compile_info->source_name); - sm4->ptr = sm4->start; -@@ -2670,6 +2934,21 @@ int tpf_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t con - if (program->shader_version.type == VKD3D_SHADER_TYPE_HULL) - uninvert_used_masks(&program->patch_constant_signature); - -+ switch (program->shader_version.type) -+ { -+ case VKD3D_SHADER_TYPE_HULL: -+ case VKD3D_SHADER_TYPE_DOMAIN: -+ break; -+ -+ default: -+ if (program->patch_constant_signature.element_count != 0) -+ { -+ WARN("The patch constant signature only makes sense for Hull and Domain Shaders, ignoring it.\n"); -+ shader_signature_cleanup(&program->patch_constant_signature); -+ } -+ break; -+ } -+ - if (!shader_sm4_parser_validate_signature(&sm4, &program->input_signature, - sm4.input_register_masks, "Input") - || !shader_sm4_parser_validate_signature(&sm4, &program->output_signature, -@@ -2706,12 +2985,8 @@ int tpf_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t con - && !sm4.has_control_point_phase && !sm4.p.failed) - shader_sm4_validate_default_phase_index_ranges(&sm4); - -- if (!sm4.p.failed) -- vkd3d_shader_parser_validate(&sm4.p, config_flags); -- - if (sm4.p.failed) - { -- WARN("Failed to parse shader.\n"); - vsir_program_cleanup(program); - return VKD3D_ERROR_INVALID_SHADER; - } -@@ -2719,24 +2994,10 @@ int tpf_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t con - return VKD3D_OK; - } - --static void write_sm4_block(const struct tpf_writer *tpf, const struct hlsl_block *block); -- --static bool type_is_integer(const struct hlsl_type *type) --{ -- switch (type->e.numeric.type) -- { -- case HLSL_TYPE_BOOL: -- case HLSL_TYPE_INT: -- case HLSL_TYPE_UINT: -- return true; -- -- default: -- return false; -- } --} -+static void write_sm4_block(struct tpf_compiler *tpf, const struct hlsl_block *block); - --bool hlsl_sm4_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, -- bool output, enum vkd3d_shader_register_type *type, bool *has_idx) -+bool sm4_register_from_semantic_name(const struct vkd3d_shader_version *version, -+ const char *semantic_name, bool output, enum vkd3d_shader_register_type *type, bool *has_idx) - { - unsigned int i; - -@@ -2750,12 +3011,19 @@ bool hlsl_sm4_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_sem - } - register_table[] = - { -- {"sv_dispatchthreadid", false, VKD3D_SHADER_TYPE_COMPUTE, VKD3DSPR_THREADID, false}, -- {"sv_groupid", false, VKD3D_SHADER_TYPE_COMPUTE, VKD3DSPR_THREADGROUPID, false}, -- {"sv_groupthreadid", false, VKD3D_SHADER_TYPE_COMPUTE, VKD3DSPR_LOCALTHREADID, false}, -+ {"sv_dispatchthreadid", false, VKD3D_SHADER_TYPE_COMPUTE, VKD3DSPR_THREADID, false}, -+ {"sv_groupid", false, VKD3D_SHADER_TYPE_COMPUTE, VKD3DSPR_THREADGROUPID, false}, -+ {"sv_groupindex", false, VKD3D_SHADER_TYPE_COMPUTE, VKD3DSPR_LOCALTHREADINDEX, false}, -+ {"sv_groupthreadid", false, VKD3D_SHADER_TYPE_COMPUTE, VKD3DSPR_LOCALTHREADID, false}, -+ -+ {"sv_domainlocation", false, VKD3D_SHADER_TYPE_DOMAIN, VKD3DSPR_TESSCOORD, false}, -+ {"sv_primitiveid", false, VKD3D_SHADER_TYPE_DOMAIN, VKD3DSPR_PRIMID, false}, - - {"sv_primitiveid", false, VKD3D_SHADER_TYPE_GEOMETRY, VKD3DSPR_PRIMID, false}, - -+ {"sv_outputcontrolpointid", false, VKD3D_SHADER_TYPE_HULL, VKD3DSPR_OUTPOINTID, false}, -+ {"sv_primitiveid", false, VKD3D_SHADER_TYPE_HULL, VKD3DSPR_PRIMID, false}, -+ - /* Put sv_target in this table, instead of letting it fall through to - * default varying allocation, so that the register index matches the - * usage index. */ -@@ -2768,9 +3036,9 @@ bool hlsl_sm4_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_sem - - for (i = 0; i < ARRAY_SIZE(register_table); ++i) - { -- if (!ascii_strcasecmp(semantic->name, register_table[i].semantic) -+ if (!ascii_strcasecmp(semantic_name, register_table[i].semantic) - && output == register_table[i].output -- && ctx->profile->type == register_table[i].shader_type) -+ && version->type == register_table[i].shader_type) - { - if (type) - *type = register_table[i].type; -@@ -2782,8 +3050,57 @@ bool hlsl_sm4_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_sem - return false; - } - --bool hlsl_sm4_usage_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, -- bool output, D3D_NAME *usage) -+static bool get_tessfactor_sysval_semantic(enum vkd3d_shader_sysval_semantic *semantic, -+ enum vkd3d_tessellator_domain domain, uint32_t index) -+{ -+ switch (domain) -+ { -+ case VKD3D_TESSELLATOR_DOMAIN_LINE: -+ if (index == 0) -+ *semantic = VKD3D_SHADER_SV_TESS_FACTOR_LINEDEN; -+ else if (index == 1) -+ *semantic = VKD3D_SHADER_SV_TESS_FACTOR_LINEDET; -+ else -+ return false; -+ return true; -+ -+ case VKD3D_TESSELLATOR_DOMAIN_TRIANGLE: -+ *semantic = VKD3D_SHADER_SV_TESS_FACTOR_TRIEDGE; -+ return index < 3; -+ -+ case VKD3D_TESSELLATOR_DOMAIN_QUAD: -+ *semantic = VKD3D_SHADER_SV_TESS_FACTOR_QUADEDGE; -+ return index < 4; -+ -+ default: -+ vkd3d_unreachable(); -+ } -+} -+ -+static bool get_insidetessfactor_sysval_semantic(enum vkd3d_shader_sysval_semantic *semantic, -+ enum vkd3d_tessellator_domain domain, uint32_t index) -+{ -+ switch (domain) -+ { -+ case VKD3D_TESSELLATOR_DOMAIN_LINE: -+ return false; -+ -+ case VKD3D_TESSELLATOR_DOMAIN_TRIANGLE: -+ *semantic = VKD3D_SHADER_SV_TESS_FACTOR_TRIINT; -+ return index == 0; -+ -+ case VKD3D_TESSELLATOR_DOMAIN_QUAD: -+ *semantic = VKD3D_SHADER_SV_TESS_FACTOR_QUADINT; -+ return index < 2; -+ -+ default: -+ vkd3d_unreachable(); -+ } -+} -+ -+bool sm4_sysval_semantic_from_semantic_name(enum vkd3d_shader_sysval_semantic *sysval_semantic, -+ const struct vkd3d_shader_version *version, bool semantic_compat_mapping, enum vkd3d_tessellator_domain domain, -+ const char *semantic_name, unsigned int semantic_idx, bool output, bool is_patch_constant_func) - { - unsigned int i; - -@@ -2792,54 +3109,104 @@ bool hlsl_sm4_usage_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semant - const char *name; - bool output; - enum vkd3d_shader_type shader_type; -- D3D_NAME usage; -+ enum vkd3d_shader_sysval_semantic semantic; - } - semantics[] = - { - {"sv_dispatchthreadid", false, VKD3D_SHADER_TYPE_COMPUTE, ~0u}, - {"sv_groupid", false, VKD3D_SHADER_TYPE_COMPUTE, ~0u}, -+ {"sv_groupindex", false, VKD3D_SHADER_TYPE_COMPUTE, ~0u}, - {"sv_groupthreadid", false, VKD3D_SHADER_TYPE_COMPUTE, ~0u}, - -- {"position", false, VKD3D_SHADER_TYPE_GEOMETRY, D3D_NAME_POSITION}, -- {"sv_position", false, VKD3D_SHADER_TYPE_GEOMETRY, D3D_NAME_POSITION}, -- {"sv_primitiveid", false, VKD3D_SHADER_TYPE_GEOMETRY, D3D_NAME_PRIMITIVE_ID}, -- -- {"position", true, VKD3D_SHADER_TYPE_GEOMETRY, D3D_NAME_POSITION}, -- {"sv_position", true, VKD3D_SHADER_TYPE_GEOMETRY, D3D_NAME_POSITION}, -- {"sv_primitiveid", true, VKD3D_SHADER_TYPE_GEOMETRY, D3D_NAME_PRIMITIVE_ID}, -- -- {"position", false, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_POSITION}, -- {"sv_position", false, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_POSITION}, -- {"sv_primitiveid", false, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_PRIMITIVE_ID}, -- {"sv_isfrontface", false, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_IS_FRONT_FACE}, -- {"sv_rendertargetarrayindex", false, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_RENDER_TARGET_ARRAY_INDEX}, -- {"sv_viewportarrayindex", false, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_VIEWPORT_ARRAY_INDEX}, -- -- {"color", true, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_TARGET}, -- {"depth", true, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_DEPTH}, -- {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_TARGET}, -- {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_DEPTH}, -- {"sv_coverage", true, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_COVERAGE}, -- -- {"sv_position", false, VKD3D_SHADER_TYPE_VERTEX, D3D_NAME_UNDEFINED}, -- {"sv_vertexid", false, VKD3D_SHADER_TYPE_VERTEX, D3D_NAME_VERTEX_ID}, -- {"sv_instanceid", false, VKD3D_SHADER_TYPE_VERTEX, D3D_NAME_INSTANCE_ID}, -- -- {"position", true, VKD3D_SHADER_TYPE_VERTEX, D3D_NAME_POSITION}, -- {"sv_position", true, VKD3D_SHADER_TYPE_VERTEX, D3D_NAME_POSITION}, -- {"sv_rendertargetarrayindex", true, VKD3D_SHADER_TYPE_VERTEX, D3D_NAME_RENDER_TARGET_ARRAY_INDEX}, -- {"sv_viewportarrayindex", true, VKD3D_SHADER_TYPE_VERTEX, D3D_NAME_VIEWPORT_ARRAY_INDEX}, -+ {"sv_domainlocation", false, VKD3D_SHADER_TYPE_DOMAIN, ~0u}, -+ {"sv_position", false, VKD3D_SHADER_TYPE_DOMAIN, VKD3D_SHADER_SV_NONE}, -+ {"sv_primitiveid", false, VKD3D_SHADER_TYPE_DOMAIN, ~0u}, -+ -+ {"sv_position", true, VKD3D_SHADER_TYPE_DOMAIN, VKD3D_SHADER_SV_POSITION}, -+ -+ {"position", false, VKD3D_SHADER_TYPE_GEOMETRY, VKD3D_SHADER_SV_POSITION}, -+ {"sv_position", false, VKD3D_SHADER_TYPE_GEOMETRY, VKD3D_SHADER_SV_POSITION}, -+ {"sv_primitiveid", false, VKD3D_SHADER_TYPE_GEOMETRY, VKD3D_SHADER_SV_PRIMITIVE_ID}, -+ -+ {"position", true, VKD3D_SHADER_TYPE_GEOMETRY, VKD3D_SHADER_SV_POSITION}, -+ {"sv_position", true, VKD3D_SHADER_TYPE_GEOMETRY, VKD3D_SHADER_SV_POSITION}, -+ {"sv_primitiveid", true, VKD3D_SHADER_TYPE_GEOMETRY, VKD3D_SHADER_SV_PRIMITIVE_ID}, -+ -+ {"sv_outputcontrolpointid", false, VKD3D_SHADER_TYPE_HULL, ~0u}, -+ {"sv_position", false, VKD3D_SHADER_TYPE_HULL, ~0u}, -+ {"sv_primitiveid", false, VKD3D_SHADER_TYPE_HULL, ~0u}, -+ -+ {"sv_position", true, VKD3D_SHADER_TYPE_HULL, VKD3D_SHADER_SV_POSITION}, -+ -+ {"position", false, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SHADER_SV_POSITION}, -+ {"sv_position", false, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SHADER_SV_POSITION}, -+ {"sv_primitiveid", false, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SHADER_SV_PRIMITIVE_ID}, -+ {"sv_isfrontface", false, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SHADER_SV_IS_FRONT_FACE}, -+ {"sv_rendertargetarrayindex", false, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SHADER_SV_RENDER_TARGET_ARRAY_INDEX}, -+ {"sv_viewportarrayindex", false, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SHADER_SV_VIEWPORT_ARRAY_INDEX}, -+ {"sv_sampleindex", false, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SHADER_SV_SAMPLE_INDEX}, -+ -+ {"color", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SHADER_SV_TARGET}, -+ {"depth", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SHADER_SV_DEPTH}, -+ {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SHADER_SV_TARGET}, -+ {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SHADER_SV_DEPTH}, -+ {"sv_coverage", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SHADER_SV_COVERAGE}, -+ -+ {"sv_position", false, VKD3D_SHADER_TYPE_VERTEX, VKD3D_SHADER_SV_NONE}, -+ {"sv_vertexid", false, VKD3D_SHADER_TYPE_VERTEX, VKD3D_SHADER_SV_VERTEX_ID}, -+ {"sv_instanceid", false, VKD3D_SHADER_TYPE_VERTEX, VKD3D_SHADER_SV_INSTANCE_ID}, -+ -+ {"position", true, VKD3D_SHADER_TYPE_VERTEX, VKD3D_SHADER_SV_POSITION}, -+ {"sv_position", true, VKD3D_SHADER_TYPE_VERTEX, VKD3D_SHADER_SV_POSITION}, -+ {"sv_rendertargetarrayindex", true, VKD3D_SHADER_TYPE_VERTEX, VKD3D_SHADER_SV_RENDER_TARGET_ARRAY_INDEX}, -+ {"sv_viewportarrayindex", true, VKD3D_SHADER_TYPE_VERTEX, VKD3D_SHADER_SV_VIEWPORT_ARRAY_INDEX}, - }; -- bool needs_compat_mapping = ascii_strncasecmp(semantic->name, "sv_", 3); -+ bool needs_compat_mapping = ascii_strncasecmp(semantic_name, "sv_", 3); -+ -+ if (is_patch_constant_func) -+ { -+ if (output) -+ { -+ if (!ascii_strcasecmp(semantic_name, "sv_tessfactor")) -+ return get_tessfactor_sysval_semantic(sysval_semantic, domain, semantic_idx); -+ if (!ascii_strcasecmp(semantic_name, "sv_insidetessfactor")) -+ return get_insidetessfactor_sysval_semantic(sysval_semantic, domain, semantic_idx); -+ if (!ascii_strcasecmp(semantic_name, "sv_position")) -+ { -+ *sysval_semantic = VKD3D_SHADER_SV_NONE; -+ return true; -+ } -+ } -+ else -+ { -+ if (!ascii_strcasecmp(semantic_name, "sv_primitiveid") -+ || !ascii_strcasecmp(semantic_name, "sv_position")) -+ { -+ *sysval_semantic = ~0u; -+ return true; -+ } -+ return false; -+ } -+ } -+ else if (version->type == VKD3D_SHADER_TYPE_DOMAIN) -+ { -+ if (!output) -+ { -+ if (!ascii_strcasecmp(semantic_name, "sv_tessfactor")) -+ return get_tessfactor_sysval_semantic(sysval_semantic, domain, semantic_idx); -+ if (!ascii_strcasecmp(semantic_name, "sv_insidetessfactor")) -+ return get_insidetessfactor_sysval_semantic(sysval_semantic, domain, semantic_idx); -+ } -+ } - - for (i = 0; i < ARRAY_SIZE(semantics); ++i) - { -- if (!ascii_strcasecmp(semantic->name, semantics[i].name) -+ if (!ascii_strcasecmp(semantic_name, semantics[i].name) - && output == semantics[i].output -- && (ctx->semantic_compat_mapping == needs_compat_mapping || !needs_compat_mapping) -- && ctx->profile->type == semantics[i].shader_type) -+ && (semantic_compat_mapping == needs_compat_mapping || !needs_compat_mapping) -+ && version->type == semantics[i].shader_type) - { -- *usage = semantics[i].usage; -+ *sysval_semantic = semantics[i].semantic; - return true; - } - } -@@ -2847,7 +3214,7 @@ bool hlsl_sm4_usage_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semant - if (!needs_compat_mapping) - return false; - -- *usage = D3D_NAME_UNDEFINED; -+ *sysval_semantic = VKD3D_SHADER_SV_NONE; - return true; - } - -@@ -2865,110 +3232,66 @@ static void add_section(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc, - ctx->result = buffer->status; - } - --static void write_sm4_signature(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc, bool output) -+static int signature_element_pointer_compare(const void *x, const void *y) -+{ -+ const struct signature_element *e = *(const struct signature_element **)x; -+ const struct signature_element *f = *(const struct signature_element **)y; -+ int ret; -+ -+ if ((ret = vkd3d_u32_compare(e->register_index, f->register_index))) -+ return ret; -+ return vkd3d_u32_compare(e->mask, f->mask); -+} -+ -+static void tpf_write_signature(struct tpf_compiler *tpf, const struct shader_signature *signature, uint32_t tag) - { -+ bool output = tag == TAG_OSGN || (tag == TAG_PCSG -+ && tpf->program->shader_version.type == VKD3D_SHADER_TYPE_HULL); -+ const struct signature_element **sorted_elements; - struct vkd3d_bytecode_buffer buffer = {0}; -- struct vkd3d_string_buffer *string; -- const struct hlsl_ir_var *var; -- size_t count_position; - unsigned int i; -- bool ret; - -- count_position = put_u32(&buffer, 0); -+ put_u32(&buffer, signature->element_count); - put_u32(&buffer, 8); /* unknown */ - -- LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) -- { -- unsigned int width = (1u << var->data_type->dimx) - 1, use_mask; -- uint32_t usage_idx, reg_idx; -- D3D_NAME usage; -- bool has_idx; -- -- if ((output && !var->is_output_semantic) || (!output && !var->is_input_semantic)) -- continue; -- -- ret = hlsl_sm4_usage_from_semantic(ctx, &var->semantic, output, &usage); -- VKD3D_ASSERT(ret); -- if (usage == ~0u) -- continue; -- usage_idx = var->semantic.index; -+ if (!(sorted_elements = vkd3d_calloc(signature->element_count, sizeof(*sorted_elements)))) -+ return; -+ for (i = 0; i < signature->element_count; ++i) -+ sorted_elements[i] = &signature->elements[i]; -+ qsort(sorted_elements, signature->element_count, sizeof(*sorted_elements), signature_element_pointer_compare); - -- if (hlsl_sm4_register_from_semantic(ctx, &var->semantic, output, NULL, &has_idx)) -- { -- reg_idx = has_idx ? var->semantic.index : ~0u; -- } -- else -- { -- VKD3D_ASSERT(var->regs[HLSL_REGSET_NUMERIC].allocated); -- reg_idx = var->regs[HLSL_REGSET_NUMERIC].id; -- } -+ for (i = 0; i < signature->element_count; ++i) -+ { -+ const struct signature_element *element = sorted_elements[i]; -+ enum vkd3d_shader_sysval_semantic sysval; -+ uint32_t used_mask = element->used_mask; - -- use_mask = width; /* FIXME: accurately report use mask */ - if (output) -- use_mask = 0xf ^ use_mask; -+ used_mask = 0xf ^ used_mask; - -- /* Special pixel shader semantics (TARGET, DEPTH, COVERAGE). */ -- if (usage >= 64) -- usage = 0; -+ sysval = element->sysval_semantic; -+ if (sysval >= VKD3D_SHADER_SV_TARGET) -+ sysval = VKD3D_SHADER_SV_NONE; - - put_u32(&buffer, 0); /* name */ -- put_u32(&buffer, usage_idx); -- put_u32(&buffer, usage); -- switch (var->data_type->e.numeric.type) -- { -- case HLSL_TYPE_FLOAT: -- case HLSL_TYPE_HALF: -- put_u32(&buffer, D3D_REGISTER_COMPONENT_FLOAT32); -- break; -- -- case HLSL_TYPE_INT: -- put_u32(&buffer, D3D_REGISTER_COMPONENT_SINT32); -- break; -- -- case HLSL_TYPE_BOOL: -- case HLSL_TYPE_UINT: -- put_u32(&buffer, D3D_REGISTER_COMPONENT_UINT32); -- break; -- -- default: -- if ((string = hlsl_type_to_string(ctx, var->data_type))) -- hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -- "Invalid data type %s for semantic variable %s.", string->buffer, var->name); -- hlsl_release_string_buffer(ctx, string); -- put_u32(&buffer, D3D_REGISTER_COMPONENT_UNKNOWN); -- } -- put_u32(&buffer, reg_idx); -- put_u32(&buffer, vkd3d_make_u16(width, use_mask)); -+ put_u32(&buffer, element->semantic_index); -+ put_u32(&buffer, sysval); -+ put_u32(&buffer, element->component_type); -+ put_u32(&buffer, element->register_index); -+ put_u32(&buffer, vkd3d_make_u16(element->mask, used_mask)); - } - -- i = 0; -- LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) -+ for (i = 0; i < signature->element_count; ++i) - { -- const char *semantic = var->semantic.name; -+ const struct signature_element *element = sorted_elements[i]; - size_t string_offset; -- D3D_NAME usage; -- -- if ((output && !var->is_output_semantic) || (!output && !var->is_input_semantic)) -- continue; - -- hlsl_sm4_usage_from_semantic(ctx, &var->semantic, output, &usage); -- if (usage == ~0u) -- continue; -- -- if (usage == D3D_NAME_TARGET && !ascii_strcasecmp(semantic, "color")) -- string_offset = put_string(&buffer, "SV_Target"); -- else if (usage == D3D_NAME_DEPTH && !ascii_strcasecmp(semantic, "depth")) -- string_offset = put_string(&buffer, "SV_Depth"); -- else if (usage == D3D_NAME_POSITION && !ascii_strcasecmp(semantic, "position")) -- string_offset = put_string(&buffer, "SV_Position"); -- else -- string_offset = put_string(&buffer, semantic); -- set_u32(&buffer, (2 + i++ * 6) * sizeof(uint32_t), string_offset); -+ string_offset = put_string(&buffer, element->semantic_name); -+ set_u32(&buffer, (2 + i * 6) * sizeof(uint32_t), string_offset); - } - -- set_u32(&buffer, count_position, i); -- -- add_section(ctx, dxbc, output ? TAG_OSGN : TAG_ISGN, &buffer); -+ add_section(tpf->ctx, &tpf->dxbc, tag, &buffer); -+ vkd3d_free(sorted_elements); - } - - static D3D_SHADER_VARIABLE_CLASS sm4_class(const struct hlsl_type *type) -@@ -2990,6 +3313,7 @@ static D3D_SHADER_VARIABLE_CLASS sm4_class(const struct hlsl_type *type) - case HLSL_CLASS_DEPTH_STENCIL_STATE: - case HLSL_CLASS_DEPTH_STENCIL_VIEW: - case HLSL_CLASS_EFFECT_GROUP: -+ case HLSL_CLASS_ERROR: - case HLSL_CLASS_STRUCT: - case HLSL_CLASS_PASS: - case HLSL_CLASS_PIXEL_SHADER: -@@ -3008,6 +3332,7 @@ static D3D_SHADER_VARIABLE_CLASS sm4_class(const struct hlsl_type *type) - case HLSL_CLASS_HULL_SHADER: - case HLSL_CLASS_GEOMETRY_SHADER: - case HLSL_CLASS_BLEND_STATE: -+ case HLSL_CLASS_STREAM_OUTPUT: - case HLSL_CLASS_NULL: - break; - } -@@ -3123,24 +3448,30 @@ static D3D_SHADER_INPUT_TYPE sm4_resource_type(const struct hlsl_type *type) - vkd3d_unreachable(); - } - --static D3D_RESOURCE_RETURN_TYPE sm4_resource_format(const struct hlsl_type *type) -+static enum vkd3d_sm4_data_type sm4_data_type(const struct hlsl_type *type) - { -- switch (type->e.resource.format->e.numeric.type) -+ const struct hlsl_type *format = type->e.resource.format; -+ -+ switch (format->e.numeric.type) - { - case HLSL_TYPE_DOUBLE: -- return D3D_RETURN_TYPE_DOUBLE; -+ return VKD3D_SM4_DATA_DOUBLE; - - case HLSL_TYPE_FLOAT: - case HLSL_TYPE_HALF: -- return D3D_RETURN_TYPE_FLOAT; -+ if (format->modifiers & HLSL_MODIFIER_UNORM) -+ return VKD3D_SM4_DATA_UNORM; -+ if (format->modifiers & HLSL_MODIFIER_SNORM) -+ return VKD3D_SM4_DATA_SNORM; -+ return VKD3D_SM4_DATA_FLOAT; - - case HLSL_TYPE_INT: -- return D3D_RETURN_TYPE_SINT; -+ return VKD3D_SM4_DATA_INT; - break; - - case HLSL_TYPE_BOOL: - case HLSL_TYPE_UINT: -- return D3D_RETURN_TYPE_UINT; -+ return VKD3D_SM4_DATA_UINT; - - default: - vkd3d_unreachable(); -@@ -3170,6 +3501,7 @@ static D3D_SRV_DIMENSION sm4_rdef_resource_dimension(const struct hlsl_type *typ - case HLSL_SAMPLER_DIM_CUBEARRAY: - return D3D_SRV_DIMENSION_TEXTURECUBEARRAY; - case HLSL_SAMPLER_DIM_BUFFER: -+ case HLSL_SAMPLER_DIM_RAW_BUFFER: - case HLSL_SAMPLER_DIM_STRUCTURED_BUFFER: - return D3D_SRV_DIMENSION_BUFFER; - default: -@@ -3398,6 +3730,48 @@ static struct extern_resource *sm4_get_extern_resources(struct hlsl_ctx *ctx, un - return extern_resources; - } - -+/* For some reason, for matrices, values from default value initializers end up in different -+ * components than from regular initializers. Default value initializers fill the matrix in -+ * vertical reading order (left-to-right top-to-bottom) instead of regular reading order -+ * (top-to-bottom left-to-right), so they have to be adjusted. -+ * An exception is that the order of matrix initializers for function parameters are row-major -+ * (top-to-bottom left-to-right). */ -+static unsigned int get_component_index_from_default_initializer_index(struct hlsl_type *type, unsigned int index) -+{ -+ unsigned int element_comp_count, element, x, y, i; -+ unsigned int base = 0; -+ -+ switch (type->class) -+ { -+ case HLSL_CLASS_MATRIX: -+ x = index / type->dimy; -+ y = index % type->dimy; -+ return y * type->dimx + x; -+ -+ case HLSL_CLASS_ARRAY: -+ element_comp_count = hlsl_type_component_count(type->e.array.type); -+ element = index / element_comp_count; -+ base = element * element_comp_count; -+ return base + get_component_index_from_default_initializer_index(type->e.array.type, index - base); -+ -+ case HLSL_CLASS_STRUCT: -+ for (i = 0; i < type->e.record.field_count; ++i) -+ { -+ struct hlsl_type *field_type = type->e.record.fields[i].type; -+ -+ element_comp_count = hlsl_type_component_count(field_type); -+ if (index - base < element_comp_count) -+ return base + get_component_index_from_default_initializer_index(field_type, index - base); -+ base += element_comp_count; -+ } -+ break; -+ -+ default: -+ return index; -+ } -+ vkd3d_unreachable(); -+} -+ - static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) - { - uint32_t binding_desc_size = (hlsl_version_ge(ctx, 5, 1) ? 10 : 8) * sizeof(uint32_t); -@@ -3471,7 +3845,7 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) - { - unsigned int dimx = resource->component_type->e.resource.format->dimx; - -- put_u32(&buffer, sm4_resource_format(resource->component_type)); -+ put_u32(&buffer, sm4_data_type(resource->component_type)); - put_u32(&buffer, sm4_rdef_resource_dimension(resource->component_type)); - put_u32(&buffer, ~0u); /* FIXME: multisample count */ - flags |= (dimx - 1) << VKD3D_SM4_SIF_TEXTURE_COMPONENTS_SHIFT; -@@ -3552,7 +3926,7 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) - { - uint32_t flags = 0; - -- if (var->last_read) -+ if (var->is_read) - flags |= D3D_SVF_USED; - - put_u32(&buffer, 0); /* name */ -@@ -3598,7 +3972,7 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) - for (k = 0; k < comp_count; ++k) - { - struct hlsl_type *comp_type = hlsl_type_get_component_type(ctx, var->data_type, k); -- unsigned int comp_offset; -+ unsigned int comp_offset, comp_index; - enum hlsl_regset regset; - - if (comp_type->class == HLSL_CLASS_STRING) -@@ -3608,7 +3982,8 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) - continue; - } - -- comp_offset = hlsl_type_get_component_offset(ctx, var->data_type, k, ®set); -+ comp_index = get_component_index_from_default_initializer_index(var->data_type, k); -+ comp_offset = hlsl_type_get_component_offset(ctx, var->data_type, comp_index, ®set); - if (regset == HLSL_REGSET_NUMERIC) - { - if (comp_type->e.numeric.type == HLSL_TYPE_DOUBLE) -@@ -3655,6 +4030,7 @@ static enum vkd3d_sm4_resource_type sm4_resource_dimension(const struct hlsl_typ - case HLSL_SAMPLER_DIM_CUBEARRAY: - return VKD3D_SM4_RESOURCE_TEXTURE_CUBEARRAY; - case HLSL_SAMPLER_DIM_BUFFER: -+ case HLSL_SAMPLER_DIM_RAW_BUFFER: - case HLSL_SAMPLER_DIM_STRUCTURED_BUFFER: - return VKD3D_SM4_RESOURCE_BUFFER; - default: -@@ -3779,11 +4155,13 @@ static void sm4_numeric_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_s - *writemask = hlsl_combine_writemasks(var->regs[HLSL_REGSET_NUMERIC].writemask, *writemask); - } - --static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_register *reg, -+static void sm4_register_from_deref(const struct tpf_compiler *tpf, struct vkd3d_shader_register *reg, - uint32_t *writemask, const struct hlsl_deref *deref, struct sm4_instruction *sm4_instr) - { -- const struct hlsl_type *data_type = hlsl_deref_get_type(ctx, deref); -+ const struct vkd3d_shader_version *version = &tpf->program->shader_version; -+ const struct hlsl_type *data_type = hlsl_deref_get_type(tpf->ctx, deref); - const struct hlsl_ir_var *var = deref->var; -+ struct hlsl_ctx *ctx = tpf->ctx; - - if (var->is_uniform) - { -@@ -3793,7 +4171,7 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_re - { - reg->type = VKD3DSPR_RESOURCE; - reg->dimension = VSIR_DIMENSION_VEC4; -- if (hlsl_version_ge(ctx, 5, 1)) -+ if (vkd3d_shader_ver_ge(version, 5, 1)) - { - reg->idx[0].offset = var->regs[HLSL_REGSET_TEXTURES].id; - reg->idx[1].offset = var->regs[HLSL_REGSET_TEXTURES].index; /* FIXME: array index */ -@@ -3812,7 +4190,7 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_re - { - reg->type = VKD3DSPR_UAV; - reg->dimension = VSIR_DIMENSION_VEC4; -- if (hlsl_version_ge(ctx, 5, 1)) -+ if (vkd3d_shader_ver_ge(version, 5, 1)) - { - reg->idx[0].offset = var->regs[HLSL_REGSET_UAVS].id; - reg->idx[1].offset = var->regs[HLSL_REGSET_UAVS].index; /* FIXME: array index */ -@@ -3831,7 +4209,7 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_re - { - reg->type = VKD3DSPR_SAMPLER; - reg->dimension = VSIR_DIMENSION_NONE; -- if (hlsl_version_ge(ctx, 5, 1)) -+ if (vkd3d_shader_ver_ge(version, 5, 1)) - { - reg->idx[0].offset = var->regs[HLSL_REGSET_SAMPLERS].id; - reg->idx[1].offset = var->regs[HLSL_REGSET_SAMPLERS].index; /* FIXME: array index */ -@@ -3853,7 +4231,7 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_re - VKD3D_ASSERT(data_type->class <= HLSL_CLASS_VECTOR); - reg->type = VKD3DSPR_CONSTBUFFER; - reg->dimension = VSIR_DIMENSION_VEC4; -- if (hlsl_version_ge(ctx, 5, 1)) -+ if (vkd3d_shader_ver_ge(version, 5, 1)) - { - reg->idx[0].offset = var->buffer->reg.id; - reg->idx[1].offset = var->buffer->reg.index; /* FIXME: array index */ -@@ -3873,7 +4251,7 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_re - { - bool has_idx; - -- if (hlsl_sm4_register_from_semantic(ctx, &var->semantic, false, ®->type, &has_idx)) -+ if (sm4_register_from_semantic_name(version, var->semantic.name, false, ®->type, &has_idx)) - { - unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref); - -@@ -3883,7 +4261,10 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_re - reg->idx_count = 1; - } - -- reg->dimension = VSIR_DIMENSION_VEC4; -+ if (shader_sm4_is_scalar_register(reg)) -+ reg->dimension = VSIR_DIMENSION_SCALAR; -+ else -+ reg->dimension = VSIR_DIMENSION_VEC4; - *writemask = ((1u << data_type->dimx) - 1) << (offset % 4); - } - else -@@ -3891,7 +4272,11 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_re - struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref); - - VKD3D_ASSERT(hlsl_reg.allocated); -- reg->type = VKD3DSPR_INPUT; -+ -+ if (version->type == VKD3D_SHADER_TYPE_DOMAIN) -+ reg->type = VKD3DSPR_PATCHCONST; -+ else -+ reg->type = VKD3DSPR_INPUT; - reg->dimension = VSIR_DIMENSION_VEC4; - reg->idx[0].offset = hlsl_reg.id; - reg->idx_count = 1; -@@ -3902,7 +4287,7 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_re - { - bool has_idx; - -- if (hlsl_sm4_register_from_semantic(ctx, &var->semantic, true, ®->type, &has_idx)) -+ if (sm4_register_from_semantic_name(version, var->semantic.name, true, ®->type, &has_idx)) - { - unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref); - -@@ -3912,7 +4297,7 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_re - reg->idx_count = 1; - } - -- if (reg->type == VKD3DSPR_DEPTHOUT) -+ if (shader_sm4_is_scalar_register(reg)) - reg->dimension = VSIR_DIMENSION_SCALAR; - else - reg->dimension = VSIR_DIMENSION_VEC4; -@@ -3938,13 +4323,13 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_re - } - } - --static void sm4_src_from_deref(const struct tpf_writer *tpf, struct vkd3d_shader_src_param *src, -+static void sm4_src_from_deref(const struct tpf_compiler *tpf, struct vkd3d_shader_src_param *src, - const struct hlsl_deref *deref, unsigned int map_writemask, struct sm4_instruction *sm4_instr) - { - unsigned int hlsl_swizzle; - uint32_t writemask; - -- sm4_register_from_deref(tpf->ctx, &src->reg, &writemask, deref, sm4_instr); -+ sm4_register_from_deref(tpf, &src->reg, &writemask, deref, sm4_instr); - if (vkd3d_sm4_get_default_swizzle_type(&tpf->lookup, src->reg.type) == VKD3D_SM4_SWIZZLE_VEC4) - { - hlsl_swizzle = hlsl_map_swizzle(hlsl_swizzle_from_writemask(writemask), map_writemask); -@@ -3982,7 +4367,7 @@ static void sm4_src_from_constant_value(struct vkd3d_shader_src_param *src, - } - } - --static void sm4_src_from_node(const struct tpf_writer *tpf, struct vkd3d_shader_src_param *src, -+static void sm4_src_from_node(const struct tpf_compiler *tpf, struct vkd3d_shader_src_param *src, - const struct hlsl_ir_node *instr, uint32_t map_writemask) - { - unsigned int hlsl_swizzle; -@@ -4018,7 +4403,7 @@ static unsigned int sm4_get_index_addressing_from_reg(const struct vkd3d_shader_ - return 0; - } - --static uint32_t sm4_encode_register(const struct tpf_writer *tpf, const struct vkd3d_shader_register *reg, -+static uint32_t sm4_encode_register(const struct tpf_compiler *tpf, const struct vkd3d_shader_register *reg, - enum vkd3d_sm4_swizzle_type sm4_swizzle_type, uint32_t sm4_swizzle) - { - const struct vkd3d_sm4_register_type_info *register_type_info; -@@ -4078,7 +4463,7 @@ static uint32_t sm4_encode_register(const struct tpf_writer *tpf, const struct v - return token; - } - --static void sm4_write_register_index(const struct tpf_writer *tpf, const struct vkd3d_shader_register *reg, -+static void sm4_write_register_index(const struct tpf_compiler *tpf, const struct vkd3d_shader_register *reg, - unsigned int j) - { - unsigned int addressing = sm4_get_index_addressing_from_reg(reg, j); -@@ -4108,7 +4493,7 @@ static void sm4_write_register_index(const struct tpf_writer *tpf, const struct - } - } - --static void sm4_write_dst_register(const struct tpf_writer *tpf, const struct vkd3d_shader_dst_param *dst) -+static void sm4_write_dst_register(const struct tpf_compiler *tpf, const struct vkd3d_shader_dst_param *dst) - { - struct vkd3d_bytecode_buffer *buffer = tpf->buffer; - uint32_t token = 0; -@@ -4121,7 +4506,7 @@ static void sm4_write_dst_register(const struct tpf_writer *tpf, const struct vk - sm4_write_register_index(tpf, &dst->reg, j); - } - --static void sm4_write_src_register(const struct tpf_writer *tpf, const struct vkd3d_shader_src_param *src) -+static void sm4_write_src_register(const struct tpf_compiler *tpf, const struct vkd3d_shader_src_param *src) - { - struct vkd3d_bytecode_buffer *buffer = tpf->buffer; - uint32_t token = 0, mod_token = 0; -@@ -4182,10 +4567,55 @@ static void sm4_write_src_register(const struct tpf_writer *tpf, const struct vk - } - } - --static void write_sm4_instruction(const struct tpf_writer *tpf, const struct sm4_instruction *instr) -+static void sm4_update_stat_counters(const struct tpf_compiler *tpf, const struct sm4_instruction *instr) -+{ -+ enum vkd3d_shader_type shader_type = tpf->program->shader_version.type; -+ enum vkd3d_sm4_stat_field stat_field; -+ uint32_t opcode; -+ -+ ++tpf->stat->fields[VKD3D_STAT_INSTR_COUNT]; -+ -+ opcode = instr->opcode & VKD3D_SM4_OPCODE_MASK; -+ stat_field = get_stat_field_from_sm4_opcode(&tpf->lookup, opcode); -+ -+ switch (opcode) -+ { -+ case VKD3D_SM4_OP_DCL_TEMPS: -+ tpf->stat->fields[stat_field] = max(tpf->stat->fields[stat_field], instr->idx[0]); -+ break; -+ case VKD3D_SM4_OP_DCL_OUTPUT_TOPOLOGY: -+ case VKD3D_SM4_OP_DCL_INPUT_PRIMITIVE: -+ tpf->stat->fields[stat_field] = (instr->opcode & VKD3D_SM4_PRIMITIVE_TYPE_MASK) -+ >> VKD3D_SM4_PRIMITIVE_TYPE_SHIFT; -+ break; -+ case VKD3D_SM4_OP_DCL_VERTICES_OUT: -+ case VKD3D_SM5_OP_DCL_GS_INSTANCES: -+ tpf->stat->fields[stat_field] = instr->idx[0]; -+ break; -+ case VKD3D_SM5_OP_DCL_TESSELLATOR_DOMAIN: -+ case VKD3D_SM5_OP_DCL_TESSELLATOR_PARTITIONING: -+ case VKD3D_SM5_OP_DCL_TESSELLATOR_OUTPUT_PRIMITIVE: -+ tpf->stat->fields[stat_field] = (instr->opcode & VKD3D_SM5_TESSELLATOR_MASK) >> VKD3D_SM5_TESSELLATOR_SHIFT; -+ break; -+ case VKD3D_SM5_OP_DCL_INPUT_CONTROL_POINT_COUNT: -+ case VKD3D_SM5_OP_DCL_OUTPUT_CONTROL_POINT_COUNT: -+ if ((shader_type == VKD3D_SHADER_TYPE_HULL && opcode == VKD3D_SM5_OP_DCL_OUTPUT_CONTROL_POINT_COUNT) -+ || (shader_type == VKD3D_SHADER_TYPE_DOMAIN -+ && opcode == VKD3D_SM5_OP_DCL_INPUT_CONTROL_POINT_COUNT)) -+ { -+ tpf->stat->fields[stat_field] = (instr->opcode & VKD3D_SM5_CONTROL_POINT_COUNT_MASK) -+ >> VKD3D_SM5_CONTROL_POINT_COUNT_SHIFT; -+ } -+ break; -+ default: -+ ++tpf->stat->fields[stat_field]; -+ } -+} -+ -+static void write_sm4_instruction(const struct tpf_compiler *tpf, const struct sm4_instruction *instr) - { -- struct vkd3d_bytecode_buffer *buffer = tpf->buffer; - uint32_t token = instr->opcode | instr->extra_bits; -+ struct vkd3d_bytecode_buffer *buffer = tpf->buffer; - unsigned int size, i, j; - size_t token_position; - -@@ -4218,6 +4648,8 @@ static void write_sm4_instruction(const struct tpf_writer *tpf, const struct sm4 - size = (bytecode_get_size(buffer) - token_position) / sizeof(uint32_t); - token |= (size << VKD3D_SM4_INSTRUCTION_LENGTH_SHIFT); - set_u32(buffer, token_position, token); -+ -+ sm4_update_stat_counters(tpf, instr); - } - - static bool encode_texel_offset_as_aoffimmi(struct sm4_instruction *instr, -@@ -4247,7 +4679,7 @@ static bool encode_texel_offset_as_aoffimmi(struct sm4_instruction *instr, - return true; - } - --static void write_sm4_dcl_constant_buffer(const struct tpf_writer *tpf, const struct hlsl_buffer *cbuffer) -+static void write_sm4_dcl_constant_buffer(const struct tpf_compiler *tpf, const struct hlsl_buffer *cbuffer) - { - size_t size = (cbuffer->used_size + 3) / 4; - -@@ -4282,7 +4714,7 @@ static void write_sm4_dcl_constant_buffer(const struct tpf_writer *tpf, const st - write_sm4_instruction(tpf, &instr); - } - --static void write_sm4_dcl_samplers(const struct tpf_writer *tpf, const struct extern_resource *resource) -+static void write_sm4_dcl_samplers(const struct tpf_compiler *tpf, const struct extern_resource *resource) - { - unsigned int i; - struct sm4_instruction instr = -@@ -4323,9 +4755,10 @@ static void write_sm4_dcl_samplers(const struct tpf_writer *tpf, const struct ex - } - } - --static void write_sm4_dcl_textures(const struct tpf_writer *tpf, const struct extern_resource *resource, -+static void write_sm4_dcl_textures(const struct tpf_compiler *tpf, const struct extern_resource *resource, - bool uav) - { -+ const struct vkd3d_shader_version *version = &tpf->program->shader_version; - enum hlsl_regset regset = uav ? HLSL_REGSET_UAVS : HLSL_REGSET_TEXTURES; - struct hlsl_type *component_type; - struct sm4_instruction instr; -@@ -4348,21 +4781,21 @@ static void write_sm4_dcl_textures(const struct tpf_writer *tpf, const struct ex - .dsts[0].reg.idx_count = 1, - .dst_count = 1, - -- .idx[0] = sm4_resource_format(component_type) * 0x1111, -+ .idx[0] = sm4_data_type(component_type) * 0x1111, - .idx_count = 1, - }; - - multisampled = component_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS - || component_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY; - -- if (hlsl_version_lt(tpf->ctx, 4, 1) && multisampled && !component_type->sample_count) -+ if (!vkd3d_shader_ver_ge(version, 4, 1) && multisampled && !component_type->sample_count) - { - hlsl_error(tpf->ctx, &resource->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -- "Multisampled texture object declaration needs sample count for profile %s.", -- tpf->ctx->profile->name); -+ "Multisampled texture object declaration needs sample count for profile %u.%u.", -+ version->major, version->minor); - } - -- if (hlsl_version_ge(tpf->ctx, 5, 1)) -+ if (vkd3d_shader_ver_ge(version, 5, 1)) - { - VKD3D_ASSERT(!i); - instr.dsts[0].reg.idx[0].offset = resource->id; -@@ -4387,6 +4820,9 @@ static void write_sm4_dcl_textures(const struct tpf_writer *tpf, const struct ex - instr.opcode = VKD3D_SM5_OP_DCL_UAV_STRUCTURED; - instr.byte_stride = component_type->e.resource.format->reg_size[HLSL_REGSET_NUMERIC] * 4; - break; -+ case HLSL_SAMPLER_DIM_RAW_BUFFER: -+ instr.opcode = VKD3D_SM5_OP_DCL_UAV_RAW; -+ break; - default: - instr.opcode = VKD3D_SM5_OP_DCL_UAV_TYPED; - break; -@@ -4397,7 +4833,15 @@ static void write_sm4_dcl_textures(const struct tpf_writer *tpf, const struct ex - } - else - { -- instr.opcode = VKD3D_SM4_OP_DCL_RESOURCE; -+ switch (component_type->sampler_dim) -+ { -+ case HLSL_SAMPLER_DIM_RAW_BUFFER: -+ instr.opcode = VKD3D_SM5_OP_DCL_RESOURCE_RAW; -+ break; -+ default: -+ instr.opcode = VKD3D_SM4_OP_DCL_RESOURCE; -+ break; -+ } - } - instr.extra_bits |= (sm4_resource_dimension(component_type) << VKD3D_SM4_RESOURCE_TYPE_SHIFT); - -@@ -4408,305 +4852,189 @@ static void write_sm4_dcl_textures(const struct tpf_writer *tpf, const struct ex - } - } - --static void write_sm4_dcl_semantic(const struct tpf_writer *tpf, const struct hlsl_ir_var *var) -+static void tpf_dcl_temps(const struct tpf_compiler *tpf, unsigned int count) - { -- const struct hlsl_profile_info *profile = tpf->ctx->profile; -- const bool output = var->is_output_semantic; -- D3D_NAME usage; -- bool has_idx; -- - struct sm4_instruction instr = - { -- .dsts[0].reg.dimension = VSIR_DIMENSION_VEC4, -- .dst_count = 1, -+ .opcode = VKD3D_SM4_OP_DCL_TEMPS, -+ -+ .idx = {count}, -+ .idx_count = 1, - }; - -- if (hlsl_sm4_register_from_semantic(tpf->ctx, &var->semantic, output, &instr.dsts[0].reg.type, &has_idx)) -- { -- if (has_idx) -- { -- instr.dsts[0].reg.idx[0].offset = var->semantic.index; -- instr.dsts[0].reg.idx_count = 1; -- } -- else -- { -- instr.dsts[0].reg.idx_count = 0; -- } -- instr.dsts[0].write_mask = (1 << var->data_type->dimx) - 1; -- } -- else -+ write_sm4_instruction(tpf, &instr); -+} -+ -+static void tpf_dcl_indexable_temp(const struct tpf_compiler *tpf, const struct vkd3d_shader_indexable_temp *temp) -+{ -+ struct sm4_instruction instr = - { -- instr.dsts[0].reg.type = output ? VKD3DSPR_OUTPUT : VKD3DSPR_INPUT; -- instr.dsts[0].reg.idx[0].offset = var->regs[HLSL_REGSET_NUMERIC].id; -- instr.dsts[0].reg.idx_count = 1; -- instr.dsts[0].write_mask = var->regs[HLSL_REGSET_NUMERIC].writemask; -- } -+ .opcode = VKD3D_SM4_OP_DCL_INDEXABLE_TEMP, - -- if (instr.dsts[0].reg.type == VKD3DSPR_DEPTHOUT) -- instr.dsts[0].reg.dimension = VSIR_DIMENSION_SCALAR; -+ .idx = {temp->register_idx, temp->register_size, temp->component_count}, -+ .idx_count = 3, -+ }; - -- hlsl_sm4_usage_from_semantic(tpf->ctx, &var->semantic, output, &usage); -- if (usage == ~0u) -- usage = D3D_NAME_UNDEFINED; -+ write_sm4_instruction(tpf, &instr); -+} - -- if (var->is_input_semantic) -+static void tpf_dcl_semantic(const struct tpf_compiler *tpf, enum vkd3d_sm4_opcode opcode, -+ const struct vkd3d_shader_dst_param *dst, uint32_t interpolation_flags) -+{ -+ struct sm4_instruction instr = - { -- switch (usage) -- { -- case D3D_NAME_UNDEFINED: -- instr.opcode = (profile->type == VKD3D_SHADER_TYPE_PIXEL) -- ? VKD3D_SM4_OP_DCL_INPUT_PS : VKD3D_SM4_OP_DCL_INPUT; -- break; -+ .opcode = opcode, - -- case D3D_NAME_INSTANCE_ID: -- case D3D_NAME_PRIMITIVE_ID: -- case D3D_NAME_VERTEX_ID: -- instr.opcode = (profile->type == VKD3D_SHADER_TYPE_PIXEL) -- ? VKD3D_SM4_OP_DCL_INPUT_PS_SGV : VKD3D_SM4_OP_DCL_INPUT_SGV; -- break; -+ .dsts[0] = *dst, -+ .dst_count = 1, - -- default: -- instr.opcode = (profile->type == VKD3D_SHADER_TYPE_PIXEL) -- ? VKD3D_SM4_OP_DCL_INPUT_PS_SIV : VKD3D_SM4_OP_DCL_INPUT_SIV; -- break; -- } -- -- if (profile->type == VKD3D_SHADER_TYPE_PIXEL) -- { -- enum vkd3d_shader_interpolation_mode mode = VKD3DSIM_LINEAR; -- -- if ((var->storage_modifiers & HLSL_STORAGE_NOINTERPOLATION) || type_is_integer(var->data_type)) -- { -- mode = VKD3DSIM_CONSTANT; -- } -- else -- { -- static const struct -- { -- unsigned int modifiers; -- enum vkd3d_shader_interpolation_mode mode; -- } -- modes[] = -- { -- { HLSL_STORAGE_CENTROID | HLSL_STORAGE_NOPERSPECTIVE, VKD3DSIM_LINEAR_NOPERSPECTIVE_CENTROID }, -- { HLSL_STORAGE_NOPERSPECTIVE, VKD3DSIM_LINEAR_NOPERSPECTIVE }, -- { HLSL_STORAGE_CENTROID, VKD3DSIM_LINEAR_CENTROID }, -- { HLSL_STORAGE_CENTROID | HLSL_STORAGE_LINEAR, VKD3DSIM_LINEAR_CENTROID }, -- }; -- unsigned int i; -- -- for (i = 0; i < ARRAY_SIZE(modes); ++i) -- { -- if ((var->storage_modifiers & modes[i].modifiers) == modes[i].modifiers) -- { -- mode = modes[i].mode; -- break; -- } -- } -- } -- -- instr.extra_bits |= mode << VKD3D_SM4_INTERPOLATION_MODE_SHIFT; -- } -- } -- else -- { -- if (usage == D3D_NAME_UNDEFINED || profile->type == VKD3D_SHADER_TYPE_PIXEL) -- instr.opcode = VKD3D_SM4_OP_DCL_OUTPUT; -- else -- instr.opcode = VKD3D_SM4_OP_DCL_OUTPUT_SIV; -- } -- -- switch (usage) -- { -- case D3D_NAME_COVERAGE: -- case D3D_NAME_DEPTH: -- case D3D_NAME_DEPTH_GREATER_EQUAL: -- case D3D_NAME_DEPTH_LESS_EQUAL: -- case D3D_NAME_TARGET: -- case D3D_NAME_UNDEFINED: -- break; -- -- default: -- instr.idx_count = 1; -- instr.idx[0] = usage; -- break; -- } -+ .extra_bits = interpolation_flags << VKD3D_SM4_INTERPOLATION_MODE_SHIFT, -+ }; - - write_sm4_instruction(tpf, &instr); - } - --static void write_sm4_dcl_temps(const struct tpf_writer *tpf, uint32_t temp_count) -+static void tpf_dcl_siv_semantic(const struct tpf_compiler *tpf, enum vkd3d_sm4_opcode opcode, -+ const struct vkd3d_shader_register_semantic *semantic, uint32_t interpolation_flags) - { - struct sm4_instruction instr = - { -- .opcode = VKD3D_SM4_OP_DCL_TEMPS, -+ .opcode = opcode, - -- .idx = {temp_count}, -+ .dsts[0] = semantic->reg, -+ .dst_count = 1, -+ -+ .idx[0] = semantic->sysval_semantic, - .idx_count = 1, -+ -+ .extra_bits = interpolation_flags << VKD3D_SM4_INTERPOLATION_MODE_SHIFT, - }; - - write_sm4_instruction(tpf, &instr); - } - --static void write_sm4_dcl_indexable_temp(const struct tpf_writer *tpf, uint32_t idx, -- uint32_t size, uint32_t comp_count) -+static void tpf_dcl_thread_group(const struct tpf_compiler *tpf, const struct vsir_thread_group_size *group_size) - { - struct sm4_instruction instr = - { -- .opcode = VKD3D_SM4_OP_DCL_INDEXABLE_TEMP, -+ .opcode = VKD3D_SM5_OP_DCL_THREAD_GROUP, - -- .idx = {idx, size, comp_count}, -+ .idx = {group_size->x, group_size->y, group_size->z}, - .idx_count = 3, - }; - - write_sm4_instruction(tpf, &instr); - } - --static void write_sm4_dcl_thread_group(const struct tpf_writer *tpf, const uint32_t thread_count[3]) -+static void write_sm4_dcl_global_flags(const struct tpf_compiler *tpf, uint32_t flags) - { - struct sm4_instruction instr = - { -- .opcode = VKD3D_SM5_OP_DCL_THREAD_GROUP, -- -- .idx[0] = thread_count[0], -- .idx[1] = thread_count[1], -- .idx[2] = thread_count[2], -- .idx_count = 3, -+ .opcode = VKD3D_SM4_OP_DCL_GLOBAL_FLAGS, -+ .extra_bits = flags << VKD3D_SM4_GLOBAL_FLAGS_SHIFT, - }; - - write_sm4_instruction(tpf, &instr); - } - --static void write_sm4_ret(const struct tpf_writer *tpf) -+static void tpf_write_hs_decls(const struct tpf_compiler *tpf) - { - struct sm4_instruction instr = - { -- .opcode = VKD3D_SM4_OP_RET, -+ .opcode = VKD3D_SM5_OP_HS_DECLS, - }; - - write_sm4_instruction(tpf, &instr); - } - --static void write_sm4_unary_op(const struct tpf_writer *tpf, enum vkd3d_sm4_opcode opcode, -- const struct hlsl_ir_node *dst, const struct hlsl_ir_node *src, enum vkd3d_shader_src_modifier src_mod) -+static void tpf_write_hs_control_point_phase(const struct tpf_compiler *tpf) - { -- struct sm4_instruction instr; -- -- memset(&instr, 0, sizeof(instr)); -- instr.opcode = opcode; -- -- sm4_dst_from_node(&instr.dsts[0], dst); -- instr.dst_count = 1; -- -- sm4_src_from_node(tpf, &instr.srcs[0], src, instr.dsts[0].write_mask); -- instr.srcs[0].modifiers = src_mod; -- instr.src_count = 1; -+ struct sm4_instruction instr = -+ { -+ .opcode = VKD3D_SM5_OP_HS_CONTROL_POINT_PHASE, -+ }; - - write_sm4_instruction(tpf, &instr); - } - --static void write_sm4_unary_op_with_two_destinations(const struct tpf_writer *tpf, enum vkd3d_sm4_opcode opcode, -- const struct hlsl_ir_node *dst, unsigned int dst_idx, const struct hlsl_ir_node *src) -+static void tpf_write_hs_fork_phase(const struct tpf_compiler *tpf) - { -- struct sm4_instruction instr; -- -- memset(&instr, 0, sizeof(instr)); -- instr.opcode = opcode; -- -- VKD3D_ASSERT(dst_idx < ARRAY_SIZE(instr.dsts)); -- sm4_dst_from_node(&instr.dsts[dst_idx], dst); -- instr.dsts[1 - dst_idx].reg.type = VKD3DSPR_NULL; -- instr.dsts[1 - dst_idx].reg.dimension = VSIR_DIMENSION_NONE; -- instr.dsts[1 - dst_idx].reg.idx_count = 0; -- instr.dst_count = 2; -- -- sm4_src_from_node(tpf, &instr.srcs[0], src, instr.dsts[dst_idx].write_mask); -- instr.src_count = 1; -+ struct sm4_instruction instr = -+ { -+ .opcode = VKD3D_SM5_OP_HS_FORK_PHASE, -+ }; - - write_sm4_instruction(tpf, &instr); - } - --static void write_sm4_binary_op(const struct tpf_writer *tpf, enum vkd3d_sm4_opcode opcode, -- const struct hlsl_ir_node *dst, const struct hlsl_ir_node *src1, const struct hlsl_ir_node *src2) -+static void tpf_write_dcl_input_control_point_count(const struct tpf_compiler *tpf, const uint32_t count) - { -- struct sm4_instruction instr; -- -- memset(&instr, 0, sizeof(instr)); -- instr.opcode = opcode; -- -- sm4_dst_from_node(&instr.dsts[0], dst); -- instr.dst_count = 1; -- -- sm4_src_from_node(tpf, &instr.srcs[0], src1, instr.dsts[0].write_mask); -- sm4_src_from_node(tpf, &instr.srcs[1], src2, instr.dsts[0].write_mask); -- instr.src_count = 2; -+ struct sm4_instruction instr = -+ { -+ .opcode = VKD3D_SM5_OP_DCL_INPUT_CONTROL_POINT_COUNT, -+ .extra_bits = count << VKD3D_SM5_CONTROL_POINT_COUNT_SHIFT, -+ }; - - write_sm4_instruction(tpf, &instr); - } - --/* dp# instructions don't map the swizzle. */ --static void write_sm4_binary_op_dot(const struct tpf_writer *tpf, enum vkd3d_sm4_opcode opcode, -- const struct hlsl_ir_node *dst, const struct hlsl_ir_node *src1, const struct hlsl_ir_node *src2) -+static void tpf_write_dcl_output_control_point_count(const struct tpf_compiler *tpf, const uint32_t count) - { -- struct sm4_instruction instr; -- -- memset(&instr, 0, sizeof(instr)); -- instr.opcode = opcode; -- -- sm4_dst_from_node(&instr.dsts[0], dst); -- instr.dst_count = 1; -- -- sm4_src_from_node(tpf, &instr.srcs[0], src1, VKD3DSP_WRITEMASK_ALL); -- sm4_src_from_node(tpf, &instr.srcs[1], src2, VKD3DSP_WRITEMASK_ALL); -- instr.src_count = 2; -+ struct sm4_instruction instr = -+ { -+ .opcode = VKD3D_SM5_OP_DCL_OUTPUT_CONTROL_POINT_COUNT, -+ .extra_bits = count << VKD3D_SM5_CONTROL_POINT_COUNT_SHIFT, -+ }; - - write_sm4_instruction(tpf, &instr); - } - --static void write_sm4_binary_op_with_two_destinations(const struct tpf_writer *tpf, -- enum vkd3d_sm4_opcode opcode, const struct hlsl_ir_node *dst, unsigned int dst_idx, -- const struct hlsl_ir_node *src1, const struct hlsl_ir_node *src2) -+static void tpf_write_dcl_tessellator_domain(const struct tpf_compiler *tpf, enum vkd3d_tessellator_domain domain) - { -- struct sm4_instruction instr; -- -- memset(&instr, 0, sizeof(instr)); -- instr.opcode = opcode; -+ struct sm4_instruction instr = -+ { -+ .opcode = VKD3D_SM5_OP_DCL_TESSELLATOR_DOMAIN, -+ .extra_bits = domain << VKD3D_SM5_TESSELLATOR_SHIFT, -+ }; - -- VKD3D_ASSERT(dst_idx < ARRAY_SIZE(instr.dsts)); -- sm4_dst_from_node(&instr.dsts[dst_idx], dst); -- instr.dsts[1 - dst_idx].reg.type = VKD3DSPR_NULL; -- instr.dsts[1 - dst_idx].reg.dimension = VSIR_DIMENSION_NONE; -- instr.dsts[1 - dst_idx].reg.idx_count = 0; -- instr.dst_count = 2; -+ write_sm4_instruction(tpf, &instr); -+} - -- sm4_src_from_node(tpf, &instr.srcs[0], src1, instr.dsts[dst_idx].write_mask); -- sm4_src_from_node(tpf, &instr.srcs[1], src2, instr.dsts[dst_idx].write_mask); -- instr.src_count = 2; -+static void tpf_write_dcl_tessellator_partitioning(const struct tpf_compiler *tpf, -+ enum vkd3d_shader_tessellator_partitioning partitioning) -+{ -+ struct sm4_instruction instr = -+ { -+ .opcode = VKD3D_SM5_OP_DCL_TESSELLATOR_PARTITIONING, -+ .extra_bits = partitioning << VKD3D_SM5_TESSELLATOR_SHIFT, -+ }; - - write_sm4_instruction(tpf, &instr); - } - --static void write_sm4_ternary_op(const struct tpf_writer *tpf, enum vkd3d_sm4_opcode opcode, -- const struct hlsl_ir_node *dst, const struct hlsl_ir_node *src1, const struct hlsl_ir_node *src2, -- const struct hlsl_ir_node *src3) -+static void tpf_write_dcl_tessellator_output_primitive(const struct tpf_compiler *tpf, -+ enum vkd3d_shader_tessellator_output_primitive output_primitive) - { -- struct sm4_instruction instr; -- -- memset(&instr, 0, sizeof(instr)); -- instr.opcode = opcode; -+ struct sm4_instruction instr = -+ { -+ .opcode = VKD3D_SM5_OP_DCL_TESSELLATOR_OUTPUT_PRIMITIVE, -+ .extra_bits = output_primitive << VKD3D_SM5_TESSELLATOR_SHIFT, -+ }; - -- sm4_dst_from_node(&instr.dsts[0], dst); -- instr.dst_count = 1; -+ write_sm4_instruction(tpf, &instr); -+} - -- sm4_src_from_node(tpf, &instr.srcs[0], src1, instr.dsts[0].write_mask); -- sm4_src_from_node(tpf, &instr.srcs[1], src2, instr.dsts[0].write_mask); -- sm4_src_from_node(tpf, &instr.srcs[2], src3, instr.dsts[0].write_mask); -- instr.src_count = 3; -+static void write_sm4_ret(const struct tpf_compiler *tpf) -+{ -+ struct sm4_instruction instr = -+ { -+ .opcode = VKD3D_SM4_OP_RET, -+ }; - - write_sm4_instruction(tpf, &instr); - } - --static void write_sm4_ld(const struct tpf_writer *tpf, const struct hlsl_ir_node *dst, -+static void write_sm4_ld(const struct tpf_compiler *tpf, const struct hlsl_ir_node *dst, - const struct hlsl_deref *resource, const struct hlsl_ir_node *coords, - const struct hlsl_ir_node *sample_index, const struct hlsl_ir_node *texel_offset, - enum hlsl_sampler_dim dim) -@@ -4715,12 +5043,16 @@ static void write_sm4_ld(const struct tpf_writer *tpf, const struct hlsl_ir_node - bool multisampled = resource_type->class == HLSL_CLASS_TEXTURE - && (resource_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS || resource_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY); - bool uav = (hlsl_deref_get_regset(tpf->ctx, resource) == HLSL_REGSET_UAVS); -+ const struct vkd3d_shader_version *version = &tpf->program->shader_version; -+ bool raw = resource_type->sampler_dim == HLSL_SAMPLER_DIM_RAW_BUFFER; - unsigned int coords_writemask = VKD3DSP_WRITEMASK_ALL; - struct sm4_instruction instr; - - memset(&instr, 0, sizeof(instr)); - if (uav) - instr.opcode = VKD3D_SM5_OP_LD_UAV_TYPED; -+ else if (raw) -+ instr.opcode = VKD3D_SM5_OP_LD_RAW; - else - instr.opcode = multisampled ? VKD3D_SM4_OP_LD2DMS : VKD3D_SM4_OP_LD; - -@@ -4769,7 +5101,7 @@ static void write_sm4_ld(const struct tpf_writer *tpf, const struct hlsl_ir_node - reg->dimension = VSIR_DIMENSION_SCALAR; - reg->u.immconst_u32[0] = index->value.u[0].u; - } -- else if (tpf->ctx->profile->major_version == 4 && tpf->ctx->profile->minor_version == 0) -+ else if (version->major == 4 && version->minor == 0) - { - hlsl_error(tpf->ctx, &sample_index->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Expected literal sample index."); - } -@@ -4784,7 +5116,7 @@ static void write_sm4_ld(const struct tpf_writer *tpf, const struct hlsl_ir_node - write_sm4_instruction(tpf, &instr); - } - --static void write_sm4_sample(const struct tpf_writer *tpf, const struct hlsl_ir_resource_load *load) -+static void write_sm4_sample(const struct tpf_compiler *tpf, const struct hlsl_ir_resource_load *load) - { - const struct hlsl_ir_node *texel_offset = load->texel_offset.node; - const struct hlsl_ir_node *coords = load->coords.node; -@@ -4821,775 +5153,107 @@ static void write_sm4_sample(const struct tpf_writer *tpf, const struct hlsl_ir_ - break; - - default: -- vkd3d_unreachable(); -- } -- -- if (texel_offset) -- { -- if (!encode_texel_offset_as_aoffimmi(&instr, texel_offset)) -- { -- hlsl_error(tpf->ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, -- "Offset must resolve to integer literal in the range -8 to 7."); -- return; -- } -- } -- -- sm4_dst_from_node(&instr.dsts[0], dst); -- instr.dst_count = 1; -- -- sm4_src_from_node(tpf, &instr.srcs[0], coords, VKD3DSP_WRITEMASK_ALL); -- sm4_src_from_deref(tpf, &instr.srcs[1], resource, instr.dsts[0].write_mask, &instr); -- sm4_src_from_deref(tpf, &instr.srcs[2], sampler, VKD3DSP_WRITEMASK_ALL, &instr); -- instr.src_count = 3; -- -- if (load->load_type == HLSL_RESOURCE_SAMPLE_LOD -- || load->load_type == HLSL_RESOURCE_SAMPLE_LOD_BIAS) -- { -- sm4_src_from_node(tpf, &instr.srcs[3], load->lod.node, VKD3DSP_WRITEMASK_ALL); -- ++instr.src_count; -- } -- else if (load->load_type == HLSL_RESOURCE_SAMPLE_GRAD) -- { -- sm4_src_from_node(tpf, &instr.srcs[3], load->ddx.node, VKD3DSP_WRITEMASK_ALL); -- sm4_src_from_node(tpf, &instr.srcs[4], load->ddy.node, VKD3DSP_WRITEMASK_ALL); -- instr.src_count += 2; -- } -- else if (load->load_type == HLSL_RESOURCE_SAMPLE_CMP -- || load->load_type == HLSL_RESOURCE_SAMPLE_CMP_LZ) -- { -- sm4_src_from_node(tpf, &instr.srcs[3], load->cmp.node, VKD3DSP_WRITEMASK_ALL); -- ++instr.src_count; -- } -- -- write_sm4_instruction(tpf, &instr); --} -- --static void write_sm4_sampleinfo(const struct tpf_writer *tpf, const struct hlsl_ir_resource_load *load) --{ -- const struct hlsl_deref *resource = &load->resource; -- const struct hlsl_ir_node *dst = &load->node; -- struct sm4_instruction instr; -- -- VKD3D_ASSERT(dst->data_type->e.numeric.type == HLSL_TYPE_UINT || dst->data_type->e.numeric.type == HLSL_TYPE_FLOAT); -- -- memset(&instr, 0, sizeof(instr)); -- instr.opcode = VKD3D_SM4_OP_SAMPLE_INFO; -- if (dst->data_type->e.numeric.type == HLSL_TYPE_UINT) -- instr.extra_bits |= VKD3DSI_SAMPLE_INFO_UINT << VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT; -- -- sm4_dst_from_node(&instr.dsts[0], dst); -- instr.dst_count = 1; -- -- sm4_src_from_deref(tpf, &instr.srcs[0], resource, instr.dsts[0].write_mask, &instr); -- instr.src_count = 1; -- -- write_sm4_instruction(tpf, &instr); --} -- --static void write_sm4_resinfo(const struct tpf_writer *tpf, const struct hlsl_ir_resource_load *load) --{ -- const struct hlsl_deref *resource = &load->resource; -- const struct hlsl_ir_node *dst = &load->node; -- struct sm4_instruction instr; -- -- if (resource->data_type->sampler_dim == HLSL_SAMPLER_DIM_BUFFER -- || resource->data_type->sampler_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER) -- { -- hlsl_fixme(tpf->ctx, &load->node.loc, "resinfo for buffers."); -- return; -- } -- -- VKD3D_ASSERT(dst->data_type->e.numeric.type == HLSL_TYPE_UINT || dst->data_type->e.numeric.type == HLSL_TYPE_FLOAT); -- -- memset(&instr, 0, sizeof(instr)); -- instr.opcode = VKD3D_SM4_OP_RESINFO; -- if (dst->data_type->e.numeric.type == HLSL_TYPE_UINT) -- instr.extra_bits |= VKD3DSI_RESINFO_UINT << VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT; -- -- sm4_dst_from_node(&instr.dsts[0], dst); -- instr.dst_count = 1; -- -- sm4_src_from_node(tpf, &instr.srcs[0], load->lod.node, VKD3DSP_WRITEMASK_ALL); -- sm4_src_from_deref(tpf, &instr.srcs[1], resource, instr.dsts[0].write_mask, &instr); -- instr.src_count = 2; -- -- write_sm4_instruction(tpf, &instr); --} -- --static bool type_is_float(const struct hlsl_type *type) --{ -- return type->e.numeric.type == HLSL_TYPE_FLOAT || type->e.numeric.type == HLSL_TYPE_HALF; --} -- --static void write_sm4_cast_from_bool(const struct tpf_writer *tpf, const struct hlsl_ir_expr *expr, -- const struct hlsl_ir_node *arg, uint32_t mask) --{ -- struct sm4_instruction instr; -- -- memset(&instr, 0, sizeof(instr)); -- instr.opcode = VKD3D_SM4_OP_AND; -- -- sm4_dst_from_node(&instr.dsts[0], &expr->node); -- instr.dst_count = 1; -- -- sm4_src_from_node(tpf, &instr.srcs[0], arg, instr.dsts[0].write_mask); -- instr.srcs[1].reg.type = VKD3DSPR_IMMCONST; -- instr.srcs[1].reg.dimension = VSIR_DIMENSION_SCALAR; -- instr.srcs[1].reg.u.immconst_u32[0] = mask; -- instr.src_count = 2; -- -- write_sm4_instruction(tpf, &instr); --} -- --static void write_sm4_cast(const struct tpf_writer *tpf, const struct hlsl_ir_expr *expr) --{ -- static const union -- { -- uint32_t u; -- float f; -- } one = { .f = 1.0 }; -- const struct hlsl_ir_node *arg1 = expr->operands[0].node; -- const struct hlsl_type *dst_type = expr->node.data_type; -- const struct hlsl_type *src_type = arg1->data_type; -- -- /* Narrowing casts were already lowered. */ -- VKD3D_ASSERT(src_type->dimx == dst_type->dimx); -- -- switch (dst_type->e.numeric.type) -- { -- case HLSL_TYPE_HALF: -- case HLSL_TYPE_FLOAT: -- switch (src_type->e.numeric.type) -- { -- case HLSL_TYPE_HALF: -- case HLSL_TYPE_FLOAT: -- write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); -- break; -- -- case HLSL_TYPE_INT: -- write_sm4_unary_op(tpf, VKD3D_SM4_OP_ITOF, &expr->node, arg1, 0); -- break; -- -- case HLSL_TYPE_UINT: -- write_sm4_unary_op(tpf, VKD3D_SM4_OP_UTOF, &expr->node, arg1, 0); -- break; -- -- case HLSL_TYPE_BOOL: -- write_sm4_cast_from_bool(tpf, expr, arg1, one.u); -- break; -- -- case HLSL_TYPE_DOUBLE: -- hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 cast from double to float."); -- break; -- -- default: -- vkd3d_unreachable(); -- } -- break; -- -- case HLSL_TYPE_INT: -- switch (src_type->e.numeric.type) -- { -- case HLSL_TYPE_HALF: -- case HLSL_TYPE_FLOAT: -- write_sm4_unary_op(tpf, VKD3D_SM4_OP_FTOI, &expr->node, arg1, 0); -- break; -- -- case HLSL_TYPE_INT: -- case HLSL_TYPE_UINT: -- write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); -- break; -- -- case HLSL_TYPE_BOOL: -- write_sm4_cast_from_bool(tpf, expr, arg1, 1); -- break; -- -- case HLSL_TYPE_DOUBLE: -- hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 cast from double to int."); -- break; -- -- default: -- vkd3d_unreachable(); -- } -- break; -- -- case HLSL_TYPE_UINT: -- switch (src_type->e.numeric.type) -- { -- case HLSL_TYPE_HALF: -- case HLSL_TYPE_FLOAT: -- write_sm4_unary_op(tpf, VKD3D_SM4_OP_FTOU, &expr->node, arg1, 0); -- break; -- -- case HLSL_TYPE_INT: -- case HLSL_TYPE_UINT: -- write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); -- break; -- -- case HLSL_TYPE_BOOL: -- write_sm4_cast_from_bool(tpf, expr, arg1, 1); -- break; -- -- case HLSL_TYPE_DOUBLE: -- hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 cast from double to uint."); -- break; -- -- default: -- vkd3d_unreachable(); -- } -- break; -- -- case HLSL_TYPE_DOUBLE: -- hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 cast to double."); -- break; -- -- case HLSL_TYPE_BOOL: -- /* Casts to bool should have already been lowered. */ -- default: -- vkd3d_unreachable(); -- } --} -- --static void write_sm4_store_uav_typed(const struct tpf_writer *tpf, const struct hlsl_deref *dst, -- const struct hlsl_ir_node *coords, const struct hlsl_ir_node *value) --{ -- struct sm4_instruction instr; -- -- memset(&instr, 0, sizeof(instr)); -- instr.opcode = VKD3D_SM5_OP_STORE_UAV_TYPED; -- -- sm4_register_from_deref(tpf->ctx, &instr.dsts[0].reg, &instr.dsts[0].write_mask, dst, &instr); -- instr.dst_count = 1; -- -- sm4_src_from_node(tpf, &instr.srcs[0], coords, VKD3DSP_WRITEMASK_ALL); -- sm4_src_from_node(tpf, &instr.srcs[1], value, VKD3DSP_WRITEMASK_ALL); -- instr.src_count = 2; -- -- write_sm4_instruction(tpf, &instr); --} -- --static void write_sm4_rasterizer_sample_count(const struct tpf_writer *tpf, const struct hlsl_ir_node *dst) --{ -- struct sm4_instruction instr; -- -- memset(&instr, 0, sizeof(instr)); -- instr.opcode = VKD3D_SM4_OP_SAMPLE_INFO; -- instr.extra_bits |= VKD3DSI_SAMPLE_INFO_UINT << VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT; -- -- sm4_dst_from_node(&instr.dsts[0], dst); -- instr.dst_count = 1; -- -- instr.srcs[0].reg.type = VKD3DSPR_RASTERIZER; -- instr.srcs[0].reg.dimension = VSIR_DIMENSION_VEC4; -- instr.srcs[0].swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X); -- instr.src_count = 1; -- -- write_sm4_instruction(tpf, &instr); --} -- --static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_expr *expr) --{ -- const struct hlsl_ir_node *arg1 = expr->operands[0].node; -- const struct hlsl_ir_node *arg2 = expr->operands[1].node; -- const struct hlsl_ir_node *arg3 = expr->operands[2].node; -- const struct hlsl_type *dst_type = expr->node.data_type; -- struct vkd3d_string_buffer *dst_type_string; -- -- VKD3D_ASSERT(expr->node.reg.allocated); -- -- if (!(dst_type_string = hlsl_type_to_string(tpf->ctx, dst_type))) -- return; -- -- switch (expr->op) -- { -- case HLSL_OP0_RASTERIZER_SAMPLE_COUNT: -- if (tpf->ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL && hlsl_version_ge(tpf->ctx, 4, 1)) -- write_sm4_rasterizer_sample_count(tpf, &expr->node); -- else -- hlsl_error(tpf->ctx, &expr->node.loc, VKD3D_SHADER_ERROR_HLSL_INCOMPATIBLE_PROFILE, -- "GetRenderTargetSampleCount() can only be used from a pixel shader using version 4.1 or higher."); -- break; -- -- case HLSL_OP1_ABS: -- switch (dst_type->e.numeric.type) -- { -- case HLSL_TYPE_FLOAT: -- write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV, &expr->node, arg1, VKD3DSPSM_ABS); -- break; -- -- default: -- hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s absolute value expression.", dst_type_string->buffer); -- } -- break; -- -- case HLSL_OP1_BIT_NOT: -- VKD3D_ASSERT(type_is_integer(dst_type)); -- write_sm4_unary_op(tpf, VKD3D_SM4_OP_NOT, &expr->node, arg1, 0); -- break; -- -- case HLSL_OP1_CAST: -- write_sm4_cast(tpf, expr); -- break; -- -- case HLSL_OP1_CEIL: -- VKD3D_ASSERT(type_is_float(dst_type)); -- write_sm4_unary_op(tpf, VKD3D_SM4_OP_ROUND_PI, &expr->node, arg1, 0); -- break; -- -- case HLSL_OP1_COS: -- VKD3D_ASSERT(type_is_float(dst_type)); -- write_sm4_unary_op_with_two_destinations(tpf, VKD3D_SM4_OP_SINCOS, &expr->node, 1, arg1); -- break; -- -- case HLSL_OP1_DSX: -- VKD3D_ASSERT(type_is_float(dst_type)); -- write_sm4_unary_op(tpf, VKD3D_SM4_OP_DERIV_RTX, &expr->node, arg1, 0); -- break; -- -- case HLSL_OP1_DSX_COARSE: -- VKD3D_ASSERT(type_is_float(dst_type)); -- write_sm4_unary_op(tpf, VKD3D_SM5_OP_DERIV_RTX_COARSE, &expr->node, arg1, 0); -- break; -- -- case HLSL_OP1_DSX_FINE: -- VKD3D_ASSERT(type_is_float(dst_type)); -- write_sm4_unary_op(tpf, VKD3D_SM5_OP_DERIV_RTX_FINE, &expr->node, arg1, 0); -- break; -- -- case HLSL_OP1_DSY: -- VKD3D_ASSERT(type_is_float(dst_type)); -- write_sm4_unary_op(tpf, VKD3D_SM4_OP_DERIV_RTY, &expr->node, arg1, 0); -- break; -- -- case HLSL_OP1_DSY_COARSE: -- VKD3D_ASSERT(type_is_float(dst_type)); -- write_sm4_unary_op(tpf, VKD3D_SM5_OP_DERIV_RTY_COARSE, &expr->node, arg1, 0); -- break; -- -- case HLSL_OP1_DSY_FINE: -- VKD3D_ASSERT(type_is_float(dst_type)); -- write_sm4_unary_op(tpf, VKD3D_SM5_OP_DERIV_RTY_FINE, &expr->node, arg1, 0); -- break; -- -- case HLSL_OP1_EXP2: -- VKD3D_ASSERT(type_is_float(dst_type)); -- write_sm4_unary_op(tpf, VKD3D_SM4_OP_EXP, &expr->node, arg1, 0); -- break; -- -- case HLSL_OP1_F16TOF32: -- VKD3D_ASSERT(type_is_float(dst_type)); -- VKD3D_ASSERT(hlsl_version_ge(tpf->ctx, 5, 0)); -- write_sm4_unary_op(tpf, VKD3D_SM5_OP_F16TOF32, &expr->node, arg1, 0); -- break; -- -- case HLSL_OP1_FLOOR: -- VKD3D_ASSERT(type_is_float(dst_type)); -- write_sm4_unary_op(tpf, VKD3D_SM4_OP_ROUND_NI, &expr->node, arg1, 0); -- break; -- -- case HLSL_OP1_FRACT: -- VKD3D_ASSERT(type_is_float(dst_type)); -- write_sm4_unary_op(tpf, VKD3D_SM4_OP_FRC, &expr->node, arg1, 0); -- break; -- -- case HLSL_OP1_LOG2: -- VKD3D_ASSERT(type_is_float(dst_type)); -- write_sm4_unary_op(tpf, VKD3D_SM4_OP_LOG, &expr->node, arg1, 0); -- break; -- -- case HLSL_OP1_LOGIC_NOT: -- VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); -- write_sm4_unary_op(tpf, VKD3D_SM4_OP_NOT, &expr->node, arg1, 0); -- break; -- -- case HLSL_OP1_NEG: -- switch (dst_type->e.numeric.type) -- { -- case HLSL_TYPE_FLOAT: -- write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV, &expr->node, arg1, VKD3DSPSM_NEG); -- break; -- -- case HLSL_TYPE_INT: -- case HLSL_TYPE_UINT: -- write_sm4_unary_op(tpf, VKD3D_SM4_OP_INEG, &expr->node, arg1, 0); -- break; -- -- default: -- hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s negation expression.", dst_type_string->buffer); -- } -- break; -- -- case HLSL_OP1_RCP: -- switch (dst_type->e.numeric.type) -- { -- case HLSL_TYPE_FLOAT: -- /* SM5 comes with a RCP opcode */ -- if (tpf->ctx->profile->major_version >= 5) -- { -- write_sm4_unary_op(tpf, VKD3D_SM5_OP_RCP, &expr->node, arg1, 0); -- } -- else -- { -- /* For SM4, implement as DIV dst, 1.0, src */ -- struct sm4_instruction instr; -- struct hlsl_constant_value one; -- -- VKD3D_ASSERT(type_is_float(dst_type)); -- -- memset(&instr, 0, sizeof(instr)); -- instr.opcode = VKD3D_SM4_OP_DIV; -- -- sm4_dst_from_node(&instr.dsts[0], &expr->node); -- instr.dst_count = 1; -- -- for (unsigned int i = 0; i < 4; i++) -- one.u[i].f = 1.0f; -- sm4_src_from_constant_value(&instr.srcs[0], &one, dst_type->dimx, instr.dsts[0].write_mask); -- sm4_src_from_node(tpf, &instr.srcs[1], arg1, instr.dsts[0].write_mask); -- instr.src_count = 2; -- -- write_sm4_instruction(tpf, &instr); -- } -- break; -- -- default: -- hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s rcp expression.", dst_type_string->buffer); -- } -- break; -- -- case HLSL_OP1_REINTERPRET: -- write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); -- break; -- -- case HLSL_OP1_ROUND: -- VKD3D_ASSERT(type_is_float(dst_type)); -- write_sm4_unary_op(tpf, VKD3D_SM4_OP_ROUND_NE, &expr->node, arg1, 0); -- break; -- -- case HLSL_OP1_RSQ: -- VKD3D_ASSERT(type_is_float(dst_type)); -- write_sm4_unary_op(tpf, VKD3D_SM4_OP_RSQ, &expr->node, arg1, 0); -- break; -- -- case HLSL_OP1_SAT: -- VKD3D_ASSERT(type_is_float(dst_type)); -- write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV -- | (VKD3D_SM4_INSTRUCTION_FLAG_SATURATE << VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT), -- &expr->node, arg1, 0); -- break; -- -- case HLSL_OP1_SIN: -- VKD3D_ASSERT(type_is_float(dst_type)); -- write_sm4_unary_op_with_two_destinations(tpf, VKD3D_SM4_OP_SINCOS, &expr->node, 0, arg1); -- break; -- -- case HLSL_OP1_SQRT: -- VKD3D_ASSERT(type_is_float(dst_type)); -- write_sm4_unary_op(tpf, VKD3D_SM4_OP_SQRT, &expr->node, arg1, 0); -- break; -- -- case HLSL_OP1_TRUNC: -- VKD3D_ASSERT(type_is_float(dst_type)); -- write_sm4_unary_op(tpf, VKD3D_SM4_OP_ROUND_Z, &expr->node, arg1, 0); -- break; -- -- case HLSL_OP2_ADD: -- switch (dst_type->e.numeric.type) -- { -- case HLSL_TYPE_FLOAT: -- write_sm4_binary_op(tpf, VKD3D_SM4_OP_ADD, &expr->node, arg1, arg2); -- break; -- -- case HLSL_TYPE_INT: -- case HLSL_TYPE_UINT: -- write_sm4_binary_op(tpf, VKD3D_SM4_OP_IADD, &expr->node, arg1, arg2); -- break; -- -- default: -- hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s addition expression.", dst_type_string->buffer); -- } -- break; -- -- case HLSL_OP2_BIT_AND: -- VKD3D_ASSERT(type_is_integer(dst_type)); -- write_sm4_binary_op(tpf, VKD3D_SM4_OP_AND, &expr->node, arg1, arg2); -- break; -- -- case HLSL_OP2_BIT_OR: -- VKD3D_ASSERT(type_is_integer(dst_type)); -- write_sm4_binary_op(tpf, VKD3D_SM4_OP_OR, &expr->node, arg1, arg2); -- break; -- -- case HLSL_OP2_BIT_XOR: -- VKD3D_ASSERT(type_is_integer(dst_type)); -- write_sm4_binary_op(tpf, VKD3D_SM4_OP_XOR, &expr->node, arg1, arg2); -- break; -- -- case HLSL_OP2_DIV: -- switch (dst_type->e.numeric.type) -- { -- case HLSL_TYPE_FLOAT: -- write_sm4_binary_op(tpf, VKD3D_SM4_OP_DIV, &expr->node, arg1, arg2); -- break; -- -- case HLSL_TYPE_UINT: -- write_sm4_binary_op_with_two_destinations(tpf, VKD3D_SM4_OP_UDIV, &expr->node, 0, arg1, arg2); -- break; -- -- default: -- hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s division expression.", dst_type_string->buffer); -- } -- break; -- -- case HLSL_OP2_DOT: -- switch (dst_type->e.numeric.type) -- { -- case HLSL_TYPE_FLOAT: -- switch (arg1->data_type->dimx) -- { -- case 4: -- write_sm4_binary_op_dot(tpf, VKD3D_SM4_OP_DP4, &expr->node, arg1, arg2); -- break; -- -- case 3: -- write_sm4_binary_op_dot(tpf, VKD3D_SM4_OP_DP3, &expr->node, arg1, arg2); -- break; -- -- case 2: -- write_sm4_binary_op_dot(tpf, VKD3D_SM4_OP_DP2, &expr->node, arg1, arg2); -- break; -- -- case 1: -- default: -- vkd3d_unreachable(); -- } -- break; -- -- default: -- hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s dot expression.", dst_type_string->buffer); -- } -- break; -- -- case HLSL_OP2_EQUAL: -- { -- const struct hlsl_type *src_type = arg1->data_type; -- -- VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); -- -- switch (src_type->e.numeric.type) -- { -- case HLSL_TYPE_FLOAT: -- write_sm4_binary_op(tpf, VKD3D_SM4_OP_EQ, &expr->node, arg1, arg2); -- break; -- -- case HLSL_TYPE_BOOL: -- case HLSL_TYPE_INT: -- case HLSL_TYPE_UINT: -- write_sm4_binary_op(tpf, VKD3D_SM4_OP_IEQ, &expr->node, arg1, arg2); -- break; -- -- default: -- hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 equality between \"%s\" operands.", -- debug_hlsl_type(tpf->ctx, src_type)); -- break; -- } -- break; -- } -- -- case HLSL_OP2_GEQUAL: -- { -- const struct hlsl_type *src_type = arg1->data_type; -- -- VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); -- -- switch (src_type->e.numeric.type) -- { -- case HLSL_TYPE_FLOAT: -- write_sm4_binary_op(tpf, VKD3D_SM4_OP_GE, &expr->node, arg1, arg2); -- break; -- -- case HLSL_TYPE_INT: -- write_sm4_binary_op(tpf, VKD3D_SM4_OP_IGE, &expr->node, arg1, arg2); -- break; -- -- case HLSL_TYPE_BOOL: -- case HLSL_TYPE_UINT: -- write_sm4_binary_op(tpf, VKD3D_SM4_OP_UGE, &expr->node, arg1, arg2); -- break; -- -- default: -- hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 greater-than-or-equal between \"%s\" operands.", -- debug_hlsl_type(tpf->ctx, src_type)); -- break; -- } -- break; -- } -- -- case HLSL_OP2_LESS: -- { -- const struct hlsl_type *src_type = arg1->data_type; -- -- VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); -- -- switch (src_type->e.numeric.type) -- { -- case HLSL_TYPE_FLOAT: -- write_sm4_binary_op(tpf, VKD3D_SM4_OP_LT, &expr->node, arg1, arg2); -- break; -- -- case HLSL_TYPE_INT: -- write_sm4_binary_op(tpf, VKD3D_SM4_OP_ILT, &expr->node, arg1, arg2); -- break; -- -- case HLSL_TYPE_BOOL: -- case HLSL_TYPE_UINT: -- write_sm4_binary_op(tpf, VKD3D_SM4_OP_ULT, &expr->node, arg1, arg2); -- break; -- -- default: -- hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 less-than between \"%s\" operands.", -- debug_hlsl_type(tpf->ctx, src_type)); -- break; -- } -- break; -- } -- -- case HLSL_OP2_LOGIC_AND: -- VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); -- write_sm4_binary_op(tpf, VKD3D_SM4_OP_AND, &expr->node, arg1, arg2); -- break; -- -- case HLSL_OP2_LOGIC_OR: -- VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); -- write_sm4_binary_op(tpf, VKD3D_SM4_OP_OR, &expr->node, arg1, arg2); -- break; -- -- case HLSL_OP2_LSHIFT: -- VKD3D_ASSERT(type_is_integer(dst_type)); -- VKD3D_ASSERT(dst_type->e.numeric.type != HLSL_TYPE_BOOL); -- write_sm4_binary_op(tpf, VKD3D_SM4_OP_ISHL, &expr->node, arg1, arg2); -- break; -- -- case HLSL_OP2_MAX: -- switch (dst_type->e.numeric.type) -- { -- case HLSL_TYPE_FLOAT: -- write_sm4_binary_op(tpf, VKD3D_SM4_OP_MAX, &expr->node, arg1, arg2); -- break; -- -- case HLSL_TYPE_INT: -- write_sm4_binary_op(tpf, VKD3D_SM4_OP_IMAX, &expr->node, arg1, arg2); -- break; -- -- case HLSL_TYPE_UINT: -- write_sm4_binary_op(tpf, VKD3D_SM4_OP_UMAX, &expr->node, arg1, arg2); -- break; -- -- default: -- hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s maximum expression.", dst_type_string->buffer); -- } -- break; -+ vkd3d_unreachable(); -+ } - -- case HLSL_OP2_MIN: -- switch (dst_type->e.numeric.type) -- { -- case HLSL_TYPE_FLOAT: -- write_sm4_binary_op(tpf, VKD3D_SM4_OP_MIN, &expr->node, arg1, arg2); -- break; -+ if (texel_offset) -+ { -+ if (!encode_texel_offset_as_aoffimmi(&instr, texel_offset)) -+ { -+ hlsl_error(tpf->ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, -+ "Offset must resolve to integer literal in the range -8 to 7."); -+ return; -+ } -+ } - -- case HLSL_TYPE_INT: -- write_sm4_binary_op(tpf, VKD3D_SM4_OP_IMIN, &expr->node, arg1, arg2); -- break; -+ sm4_dst_from_node(&instr.dsts[0], dst); -+ instr.dst_count = 1; - -- case HLSL_TYPE_UINT: -- write_sm4_binary_op(tpf, VKD3D_SM4_OP_UMIN, &expr->node, arg1, arg2); -- break; -+ sm4_src_from_node(tpf, &instr.srcs[0], coords, VKD3DSP_WRITEMASK_ALL); -+ sm4_src_from_deref(tpf, &instr.srcs[1], resource, instr.dsts[0].write_mask, &instr); -+ sm4_src_from_deref(tpf, &instr.srcs[2], sampler, VKD3DSP_WRITEMASK_ALL, &instr); -+ instr.src_count = 3; - -- default: -- hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s minimum expression.", dst_type_string->buffer); -- } -- break; -+ if (load->load_type == HLSL_RESOURCE_SAMPLE_LOD -+ || load->load_type == HLSL_RESOURCE_SAMPLE_LOD_BIAS) -+ { -+ sm4_src_from_node(tpf, &instr.srcs[3], load->lod.node, VKD3DSP_WRITEMASK_ALL); -+ ++instr.src_count; -+ } -+ else if (load->load_type == HLSL_RESOURCE_SAMPLE_GRAD) -+ { -+ sm4_src_from_node(tpf, &instr.srcs[3], load->ddx.node, VKD3DSP_WRITEMASK_ALL); -+ sm4_src_from_node(tpf, &instr.srcs[4], load->ddy.node, VKD3DSP_WRITEMASK_ALL); -+ instr.src_count += 2; -+ } -+ else if (load->load_type == HLSL_RESOURCE_SAMPLE_CMP -+ || load->load_type == HLSL_RESOURCE_SAMPLE_CMP_LZ) -+ { -+ sm4_src_from_node(tpf, &instr.srcs[3], load->cmp.node, VKD3DSP_WRITEMASK_ALL); -+ ++instr.src_count; -+ } - -- case HLSL_OP2_MOD: -- switch (dst_type->e.numeric.type) -- { -- case HLSL_TYPE_UINT: -- write_sm4_binary_op_with_two_destinations(tpf, VKD3D_SM4_OP_UDIV, &expr->node, 1, arg1, arg2); -- break; -+ write_sm4_instruction(tpf, &instr); -+} - -- default: -- hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s modulus expression.", dst_type_string->buffer); -- } -- break; -+static void write_sm4_sampleinfo(const struct tpf_compiler *tpf, const struct hlsl_ir_resource_load *load) -+{ -+ const struct hlsl_deref *resource = &load->resource; -+ const struct hlsl_ir_node *dst = &load->node; -+ struct sm4_instruction instr; - -- case HLSL_OP2_MUL: -- switch (dst_type->e.numeric.type) -- { -- case HLSL_TYPE_FLOAT: -- write_sm4_binary_op(tpf, VKD3D_SM4_OP_MUL, &expr->node, arg1, arg2); -- break; -+ VKD3D_ASSERT(dst->data_type->e.numeric.type == HLSL_TYPE_UINT || dst->data_type->e.numeric.type == HLSL_TYPE_FLOAT); - -- case HLSL_TYPE_INT: -- case HLSL_TYPE_UINT: -- /* Using IMUL instead of UMUL because we're taking the low -- * bits, and the native compiler generates IMUL. */ -- write_sm4_binary_op_with_two_destinations(tpf, VKD3D_SM4_OP_IMUL, &expr->node, 1, arg1, arg2); -- break; -+ memset(&instr, 0, sizeof(instr)); -+ instr.opcode = VKD3D_SM4_OP_SAMPLE_INFO; -+ if (dst->data_type->e.numeric.type == HLSL_TYPE_UINT) -+ instr.extra_bits |= VKD3DSI_SAMPLE_INFO_UINT << VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT; - -- default: -- hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s multiplication expression.", dst_type_string->buffer); -- } -- break; -+ sm4_dst_from_node(&instr.dsts[0], dst); -+ instr.dst_count = 1; - -- case HLSL_OP2_NEQUAL: -- { -- const struct hlsl_type *src_type = arg1->data_type; -+ sm4_src_from_deref(tpf, &instr.srcs[0], resource, instr.dsts[0].write_mask, &instr); -+ instr.src_count = 1; - -- VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); -+ write_sm4_instruction(tpf, &instr); -+} - -- switch (src_type->e.numeric.type) -- { -- case HLSL_TYPE_FLOAT: -- write_sm4_binary_op(tpf, VKD3D_SM4_OP_NE, &expr->node, arg1, arg2); -- break; -+static void write_sm4_resinfo(const struct tpf_compiler *tpf, const struct hlsl_ir_resource_load *load) -+{ -+ const struct hlsl_deref *resource = &load->resource; -+ const struct hlsl_ir_node *dst = &load->node; -+ struct sm4_instruction instr; - -- case HLSL_TYPE_BOOL: -- case HLSL_TYPE_INT: -- case HLSL_TYPE_UINT: -- write_sm4_binary_op(tpf, VKD3D_SM4_OP_INE, &expr->node, arg1, arg2); -- break; -+ if (resource->data_type->sampler_dim == HLSL_SAMPLER_DIM_BUFFER -+ || resource->data_type->sampler_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER) -+ { -+ hlsl_fixme(tpf->ctx, &load->node.loc, "resinfo for buffers."); -+ return; -+ } - -- default: -- hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 inequality between \"%s\" operands.", -- debug_hlsl_type(tpf->ctx, src_type)); -- break; -- } -- break; -- } -+ VKD3D_ASSERT(dst->data_type->e.numeric.type == HLSL_TYPE_UINT || dst->data_type->e.numeric.type == HLSL_TYPE_FLOAT); - -- case HLSL_OP2_RSHIFT: -- VKD3D_ASSERT(type_is_integer(dst_type)); -- VKD3D_ASSERT(dst_type->e.numeric.type != HLSL_TYPE_BOOL); -- write_sm4_binary_op(tpf, dst_type->e.numeric.type == HLSL_TYPE_INT ? VKD3D_SM4_OP_ISHR : VKD3D_SM4_OP_USHR, -- &expr->node, arg1, arg2); -- break; -+ memset(&instr, 0, sizeof(instr)); -+ instr.opcode = VKD3D_SM4_OP_RESINFO; -+ if (dst->data_type->e.numeric.type == HLSL_TYPE_UINT) -+ instr.extra_bits |= VKD3DSI_RESINFO_UINT << VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT; - -- case HLSL_OP3_TERNARY: -- write_sm4_ternary_op(tpf, VKD3D_SM4_OP_MOVC, &expr->node, arg1, arg2, arg3); -- break; -+ sm4_dst_from_node(&instr.dsts[0], dst); -+ instr.dst_count = 1; - -- default: -- hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s expression.", debug_hlsl_expr_op(expr->op)); -- } -+ sm4_src_from_node(tpf, &instr.srcs[0], load->lod.node, VKD3DSP_WRITEMASK_ALL); -+ sm4_src_from_deref(tpf, &instr.srcs[1], resource, instr.dsts[0].write_mask, &instr); -+ instr.src_count = 2; - -- hlsl_release_string_buffer(tpf->ctx, dst_type_string); -+ write_sm4_instruction(tpf, &instr); - } - --static void write_sm4_if(const struct tpf_writer *tpf, const struct hlsl_ir_if *iff) -+static void write_sm4_if(struct tpf_compiler *tpf, const struct hlsl_ir_if *iff) - { - struct sm4_instruction instr = - { -- .opcode = VKD3D_SM4_OP_IF | VKD3D_SM4_CONDITIONAL_NZ, -+ .opcode = VKD3D_SM4_OP_IF, -+ .extra_bits = VKD3D_SM4_CONDITIONAL_NZ, - .src_count = 1, - }; - -@@ -5614,7 +5278,7 @@ static void write_sm4_if(const struct tpf_writer *tpf, const struct hlsl_ir_if * - write_sm4_instruction(tpf, &instr); - } - --static void write_sm4_jump(const struct tpf_writer *tpf, const struct hlsl_ir_jump *jump) -+static void write_sm4_jump(const struct tpf_compiler *tpf, const struct hlsl_ir_jump *jump) - { - struct sm4_instruction instr = {0}; - -@@ -5650,57 +5314,7 @@ static void write_sm4_jump(const struct tpf_writer *tpf, const struct hlsl_ir_ju - write_sm4_instruction(tpf, &instr); - } - --/* Does this variable's data come directly from the API user, rather than being -- * temporary or from a previous shader stage? -- * I.e. is it a uniform or VS input? */ --static bool var_is_user_input(struct hlsl_ctx *ctx, const struct hlsl_ir_var *var) --{ -- if (var->is_uniform) -- return true; -- -- return var->is_input_semantic && ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX; --} -- --static void write_sm4_load(const struct tpf_writer *tpf, const struct hlsl_ir_load *load) --{ -- const struct hlsl_type *type = load->node.data_type; -- struct sm4_instruction instr; -- -- memset(&instr, 0, sizeof(instr)); -- -- sm4_dst_from_node(&instr.dsts[0], &load->node); -- instr.dst_count = 1; -- -- VKD3D_ASSERT(hlsl_is_numeric_type(type)); -- if (type->e.numeric.type == HLSL_TYPE_BOOL && var_is_user_input(tpf->ctx, load->src.var)) -- { -- struct hlsl_constant_value value; -- -- /* Uniform bools can be specified as anything, but internal bools always -- * have 0 for false and ~0 for true. Normalize that here. */ -- -- instr.opcode = VKD3D_SM4_OP_MOVC; -- -- sm4_src_from_deref(tpf, &instr.srcs[0], &load->src, instr.dsts[0].write_mask, &instr); -- -- memset(&value, 0xff, sizeof(value)); -- sm4_src_from_constant_value(&instr.srcs[1], &value, type->dimx, instr.dsts[0].write_mask); -- memset(&value, 0, sizeof(value)); -- sm4_src_from_constant_value(&instr.srcs[2], &value, type->dimx, instr.dsts[0].write_mask); -- instr.src_count = 3; -- } -- else -- { -- instr.opcode = VKD3D_SM4_OP_MOV; -- -- sm4_src_from_deref(tpf, &instr.srcs[0], &load->src, instr.dsts[0].write_mask, &instr); -- instr.src_count = 1; -- } -- -- write_sm4_instruction(tpf, &instr); --} -- --static void write_sm4_loop(const struct tpf_writer *tpf, const struct hlsl_ir_loop *loop) -+static void write_sm4_loop(struct tpf_compiler *tpf, const struct hlsl_ir_loop *loop) - { - struct sm4_instruction instr = - { -@@ -5715,10 +5329,11 @@ static void write_sm4_loop(const struct tpf_writer *tpf, const struct hlsl_ir_lo - write_sm4_instruction(tpf, &instr); - } - --static void write_sm4_gather(const struct tpf_writer *tpf, const struct hlsl_ir_node *dst, -+static void write_sm4_gather(const struct tpf_compiler *tpf, const struct hlsl_ir_node *dst, - const struct hlsl_deref *resource, const struct hlsl_deref *sampler, - const struct hlsl_ir_node *coords, uint32_t swizzle, const struct hlsl_ir_node *texel_offset) - { -+ const struct vkd3d_shader_version *version = &tpf->program->shader_version; - struct vkd3d_shader_src_param *src; - struct sm4_instruction instr; - -@@ -5735,7 +5350,7 @@ static void write_sm4_gather(const struct tpf_writer *tpf, const struct hlsl_ir_ - { - if (!encode_texel_offset_as_aoffimmi(&instr, texel_offset)) - { -- if (tpf->ctx->profile->major_version < 5) -+ if (!vkd3d_shader_ver_ge(version, 5, 0)) - { - hlsl_error(tpf->ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, - "Offset must resolve to integer literal in the range -8 to 7 for profiles < 5."); -@@ -5756,7 +5371,7 @@ static void write_sm4_gather(const struct tpf_writer *tpf, const struct hlsl_ir_ - write_sm4_instruction(tpf, &instr); - } - --static void write_sm4_resource_load(const struct tpf_writer *tpf, const struct hlsl_ir_resource_load *load) -+static void write_sm4_resource_load(const struct tpf_compiler *tpf, const struct hlsl_ir_resource_load *load) - { - const struct hlsl_ir_node *texel_offset = load->texel_offset.node; - const struct hlsl_ir_node *sample_index = load->sample_index.node; -@@ -5825,45 +5440,7 @@ static void write_sm4_resource_load(const struct tpf_writer *tpf, const struct h - } - } - --static void write_sm4_resource_store(const struct tpf_writer *tpf, const struct hlsl_ir_resource_store *store) --{ -- struct hlsl_type *resource_type = hlsl_deref_get_type(tpf->ctx, &store->resource); -- -- if (!store->resource.var->is_uniform) -- { -- hlsl_fixme(tpf->ctx, &store->node.loc, "Store to non-uniform resource variable."); -- return; -- } -- -- if (resource_type->sampler_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER) -- { -- hlsl_fixme(tpf->ctx, &store->node.loc, "Structured buffers store is not implemented."); -- return; -- } -- -- write_sm4_store_uav_typed(tpf, &store->resource, store->coords.node, store->value.node); --} -- --static void write_sm4_store(const struct tpf_writer *tpf, const struct hlsl_ir_store *store) --{ -- const struct hlsl_ir_node *rhs = store->rhs.node; -- struct sm4_instruction instr; -- uint32_t writemask; -- -- memset(&instr, 0, sizeof(instr)); -- instr.opcode = VKD3D_SM4_OP_MOV; -- -- sm4_register_from_deref(tpf->ctx, &instr.dsts[0].reg, &writemask, &store->lhs, &instr); -- instr.dsts[0].write_mask = hlsl_combine_writemasks(writemask, store->writemask); -- instr.dst_count = 1; -- -- sm4_src_from_node(tpf, &instr.srcs[0], rhs, instr.dsts[0].write_mask); -- instr.src_count = 1; -- -- write_sm4_instruction(tpf, &instr); --} -- --static void write_sm4_switch(const struct tpf_writer *tpf, const struct hlsl_ir_switch *s) -+static void write_sm4_switch(struct tpf_compiler *tpf, const struct hlsl_ir_switch *s) - { - const struct hlsl_ir_node *selector = s->selector.node; - struct hlsl_ir_switch_case *c; -@@ -5903,30 +5480,176 @@ static void write_sm4_switch(const struct tpf_writer *tpf, const struct hlsl_ir_ - write_sm4_instruction(tpf, &instr); - } - --static void write_sm4_swizzle(const struct tpf_writer *tpf, const struct hlsl_ir_swizzle *swizzle) -+static void tpf_simple_instruction(struct tpf_compiler *tpf, const struct vkd3d_shader_instruction *ins) - { -- unsigned int hlsl_swizzle; -- struct sm4_instruction instr; -- uint32_t writemask; -+ const struct vkd3d_sm4_opcode_info *info; -+ struct sm4_instruction instr = {0}; -+ unsigned int dst_count, src_count; - -- memset(&instr, 0, sizeof(instr)); -- instr.opcode = VKD3D_SM4_OP_MOV; -+ info = get_info_from_vsir_opcode(&tpf->lookup, ins->opcode); -+ VKD3D_ASSERT(info); - -- sm4_dst_from_node(&instr.dsts[0], &swizzle->node); -- instr.dst_count = 1; -+ dst_count = opcode_info_get_dst_count(info); -+ src_count = opcode_info_get_src_count(info); - -- sm4_register_from_node(&instr.srcs[0].reg, &writemask, swizzle->val.node); -- hlsl_swizzle = hlsl_map_swizzle(hlsl_combine_swizzles(hlsl_swizzle_from_writemask(writemask), -- swizzle->swizzle, swizzle->node.data_type->dimx), instr.dsts[0].write_mask); -- instr.srcs[0].swizzle = swizzle_from_sm4(hlsl_swizzle); -- instr.src_count = 1; -+ if (ins->dst_count != dst_count) -+ { -+ ERR("Invalid destination count %u for vsir instruction %#x (expected %u).\n", -+ ins->dst_count, ins->opcode, dst_count); -+ tpf->result = VKD3D_ERROR_INVALID_SHADER; -+ return; -+ } -+ if (ins->src_count != src_count) -+ { -+ ERR("Invalid source count %u for vsir instruction %#x (expected %u).\n", -+ ins->src_count, ins->opcode, src_count); -+ tpf->result = VKD3D_ERROR_INVALID_SHADER; -+ return; -+ } -+ -+ instr.opcode = info->opcode; -+ instr.extra_bits = ins->flags << VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT; -+ instr.dst_count = ins->dst_count; -+ instr.src_count = ins->src_count; -+ -+ for (unsigned int i = 0; i < ins->dst_count; ++i) -+ { -+ instr.dsts[i] = ins->dst[i]; -+ -+ if (instr.dsts[i].modifiers & VKD3DSPDM_SATURATE) -+ { -+ /* For vsir SATURATE is a dst modifier, while for tpf it is an instruction flag. */ -+ VKD3D_ASSERT(ins->dst_count == 1); -+ instr.dsts[i].modifiers &= ~VKD3DSPDM_SATURATE; -+ instr.extra_bits |= VKD3D_SM4_INSTRUCTION_FLAG_SATURATE << VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT; -+ } -+ } -+ for (unsigned int i = 0; i < ins->src_count; ++i) -+ instr.srcs[i] = ins->src[i]; - - write_sm4_instruction(tpf, &instr); - } - --static void write_sm4_block(const struct tpf_writer *tpf, const struct hlsl_block *block) -+static void tpf_handle_instruction(struct tpf_compiler *tpf, const struct vkd3d_shader_instruction *ins) -+{ -+ switch (ins->opcode) -+ { -+ case VKD3DSIH_DCL_TEMPS: -+ tpf_dcl_temps(tpf, ins->declaration.count); -+ break; -+ -+ case VKD3DSIH_DCL_INDEXABLE_TEMP: -+ tpf_dcl_indexable_temp(tpf, &ins->declaration.indexable_temp); -+ break; -+ -+ case VKD3DSIH_DCL_INPUT: -+ tpf_dcl_semantic(tpf, VKD3D_SM4_OP_DCL_INPUT, &ins->declaration.dst, 0); -+ break; -+ -+ case VKD3DSIH_DCL_INPUT_PS: -+ tpf_dcl_semantic(tpf, VKD3D_SM4_OP_DCL_INPUT_PS, &ins->declaration.dst, ins->flags); -+ break; -+ -+ case VKD3DSIH_DCL_INPUT_PS_SGV: -+ tpf_dcl_siv_semantic(tpf, VKD3D_SM4_OP_DCL_INPUT_PS_SGV, &ins->declaration.register_semantic, 0); -+ break; -+ -+ case VKD3DSIH_DCL_INPUT_PS_SIV: -+ tpf_dcl_siv_semantic(tpf, VKD3D_SM4_OP_DCL_INPUT_PS_SIV, &ins->declaration.register_semantic, ins->flags); -+ break; -+ -+ case VKD3DSIH_DCL_INPUT_SGV: -+ tpf_dcl_siv_semantic(tpf, VKD3D_SM4_OP_DCL_INPUT_SGV, &ins->declaration.register_semantic, 0); -+ break; -+ -+ case VKD3DSIH_DCL_INPUT_SIV: -+ tpf_dcl_siv_semantic(tpf, VKD3D_SM4_OP_DCL_INPUT_SIV, &ins->declaration.register_semantic, 0); -+ break; -+ -+ case VKD3DSIH_DCL_OUTPUT: -+ tpf_dcl_semantic(tpf, VKD3D_SM4_OP_DCL_OUTPUT, &ins->declaration.dst, 0); -+ break; -+ -+ case VKD3DSIH_DCL_OUTPUT_SIV: -+ tpf_dcl_siv_semantic(tpf, VKD3D_SM4_OP_DCL_OUTPUT_SIV, &ins->declaration.register_semantic, 0); -+ break; -+ -+ case VKD3DSIH_ADD: -+ case VKD3DSIH_AND: -+ case VKD3DSIH_DIV: -+ case VKD3DSIH_DP2: -+ case VKD3DSIH_DP3: -+ case VKD3DSIH_DP4: -+ case VKD3DSIH_DSX: -+ case VKD3DSIH_DSX_COARSE: -+ case VKD3DSIH_DSX_FINE: -+ case VKD3DSIH_DSY: -+ case VKD3DSIH_DSY_COARSE: -+ case VKD3DSIH_DSY_FINE: -+ case VKD3DSIH_EQO: -+ case VKD3DSIH_EXP: -+ case VKD3DSIH_F16TOF32: -+ case VKD3DSIH_F32TOF16: -+ case VKD3DSIH_FRC: -+ case VKD3DSIH_FTOI: -+ case VKD3DSIH_FTOU: -+ case VKD3DSIH_GEO: -+ case VKD3DSIH_IADD: -+ case VKD3DSIH_IEQ: -+ case VKD3DSIH_IGE: -+ case VKD3DSIH_ILT: -+ case VKD3DSIH_IMAD: -+ case VKD3DSIH_IMAX: -+ case VKD3DSIH_IMIN: -+ case VKD3DSIH_IMUL: -+ case VKD3DSIH_INE: -+ case VKD3DSIH_INEG: -+ case VKD3DSIH_ISHL: -+ case VKD3DSIH_ISHR: -+ case VKD3DSIH_ITOF: -+ case VKD3DSIH_LOG: -+ case VKD3DSIH_LTO: -+ case VKD3DSIH_MAD: -+ case VKD3DSIH_MAX: -+ case VKD3DSIH_MIN: -+ case VKD3DSIH_MOV: -+ case VKD3DSIH_MOVC: -+ case VKD3DSIH_MUL: -+ case VKD3DSIH_NEU: -+ case VKD3DSIH_NOT: -+ case VKD3DSIH_OR: -+ case VKD3DSIH_RCP: -+ case VKD3DSIH_ROUND_NE: -+ case VKD3DSIH_ROUND_NI: -+ case VKD3DSIH_ROUND_PI: -+ case VKD3DSIH_ROUND_Z: -+ case VKD3DSIH_RSQ: -+ case VKD3DSIH_SAMPLE_INFO: -+ case VKD3DSIH_SINCOS: -+ case VKD3DSIH_SQRT: -+ case VKD3DSIH_STORE_RAW: -+ case VKD3DSIH_STORE_UAV_TYPED: -+ case VKD3DSIH_UDIV: -+ case VKD3DSIH_UGE: -+ case VKD3DSIH_ULT: -+ case VKD3DSIH_UMAX: -+ case VKD3DSIH_UMIN: -+ case VKD3DSIH_USHR: -+ case VKD3DSIH_UTOF: -+ case VKD3DSIH_XOR: -+ tpf_simple_instruction(tpf, ins); -+ break; -+ -+ default: -+ vkd3d_unreachable(); -+ break; -+ } -+} -+ -+static void write_sm4_block(struct tpf_compiler *tpf, const struct hlsl_block *block) - { - const struct hlsl_ir_node *instr; -+ unsigned int vsir_instr_idx; - - LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry) - { -@@ -5952,10 +5675,6 @@ static void write_sm4_block(const struct tpf_writer *tpf, const struct hlsl_bloc - case HLSL_IR_CONSTANT: - vkd3d_unreachable(); - -- case HLSL_IR_EXPR: -- write_sm4_expr(tpf, hlsl_ir_expr(instr)); -- break; -- - case HLSL_IR_IF: - write_sm4_if(tpf, hlsl_ir_if(instr)); - break; -@@ -5964,32 +5683,21 @@ static void write_sm4_block(const struct tpf_writer *tpf, const struct hlsl_bloc - write_sm4_jump(tpf, hlsl_ir_jump(instr)); - break; - -- case HLSL_IR_LOAD: -- write_sm4_load(tpf, hlsl_ir_load(instr)); -- break; -- - case HLSL_IR_RESOURCE_LOAD: - write_sm4_resource_load(tpf, hlsl_ir_resource_load(instr)); - break; - -- case HLSL_IR_RESOURCE_STORE: -- write_sm4_resource_store(tpf, hlsl_ir_resource_store(instr)); -- break; -- - case HLSL_IR_LOOP: - write_sm4_loop(tpf, hlsl_ir_loop(instr)); - break; - -- case HLSL_IR_STORE: -- write_sm4_store(tpf, hlsl_ir_store(instr)); -- break; -- - case HLSL_IR_SWITCH: - write_sm4_switch(tpf, hlsl_ir_switch(instr)); - break; - -- case HLSL_IR_SWIZZLE: -- write_sm4_swizzle(tpf, hlsl_ir_swizzle(instr)); -+ case HLSL_IR_VSIR_INSTRUCTION_REF: -+ vsir_instr_idx = hlsl_ir_vsir_instruction_ref(instr)->vsir_instr_idx; -+ tpf_handle_instruction(tpf, &tpf->program->instructions.elements[vsir_instr_idx]); - break; - - default: -@@ -5998,18 +5706,26 @@ static void write_sm4_block(const struct tpf_writer *tpf, const struct hlsl_bloc - } - } - --static void write_sm4_shdr(struct hlsl_ctx *ctx, -- const struct hlsl_ir_function_decl *entry_func, struct dxbc_writer *dxbc) -+static void tpf_write_shader_function(struct tpf_compiler *tpf, struct hlsl_ir_function_decl *func) - { -- const struct hlsl_profile_info *profile = ctx->profile; -+ if (tpf->program->shader_version.type == VKD3D_SHADER_TYPE_COMPUTE) -+ tpf_dcl_thread_group(tpf, &tpf->program->thread_group_size); -+ -+ write_sm4_block(tpf, &func->body); -+ -+ write_sm4_ret(tpf); -+} -+ -+static void tpf_write_shdr(struct tpf_compiler *tpf, struct hlsl_ir_function_decl *entry_func) -+{ -+ const struct vkd3d_shader_version *version = &tpf->program->shader_version; - struct vkd3d_bytecode_buffer buffer = {0}; - struct extern_resource *extern_resources; - unsigned int extern_resources_count, i; - const struct hlsl_buffer *cbuffer; -- const struct hlsl_scope *scope; -- const struct hlsl_ir_var *var; -+ struct hlsl_ctx *ctx = tpf->ctx; - size_t token_count_position; -- struct tpf_writer tpf; -+ uint32_t global_flags = 0; - - static const uint16_t shader_types[VKD3D_SHADER_TYPE_COUNT] = - { -@@ -6024,17 +5740,54 @@ static void write_sm4_shdr(struct hlsl_ctx *ctx, - VKD3D_SM4_LIB, - }; - -- tpf_writer_init(&tpf, ctx, &buffer); -+ tpf->buffer = &buffer; - - extern_resources = sm4_get_extern_resources(ctx, &extern_resources_count); - -- put_u32(&buffer, vkd3d_make_u32((profile->major_version << 4) | profile->minor_version, shader_types[profile->type])); -+ put_u32(&buffer, vkd3d_make_u32((version->major << 4) | version->minor, shader_types[version->type])); - token_count_position = put_u32(&buffer, 0); - -+ if (version->major == 4) -+ { -+ for (i = 0; i < extern_resources_count; ++i) -+ { -+ const struct extern_resource *resource = &extern_resources[i]; -+ const struct hlsl_type *type = resource->component_type; -+ -+ if (type && type->class == HLSL_CLASS_TEXTURE && type->sampler_dim == HLSL_SAMPLER_DIM_RAW_BUFFER) -+ { -+ global_flags |= VKD3DSGF_ENABLE_RAW_AND_STRUCTURED_BUFFERS; -+ break; -+ } -+ } -+ } -+ -+ if (entry_func->early_depth_test && vkd3d_shader_ver_ge(version, 5, 0)) -+ global_flags |= VKD3DSGF_FORCE_EARLY_DEPTH_STENCIL; -+ -+ if (global_flags) -+ write_sm4_dcl_global_flags(tpf, global_flags); -+ -+ if (version->type == VKD3D_SHADER_TYPE_HULL) -+ { -+ tpf_write_hs_decls(tpf); -+ -+ tpf_write_dcl_input_control_point_count(tpf, 1); /* TODO: Obtain from InputPatch */ -+ tpf_write_dcl_output_control_point_count(tpf, ctx->output_control_point_count); -+ tpf_write_dcl_tessellator_domain(tpf, ctx->domain); -+ tpf_write_dcl_tessellator_partitioning(tpf, ctx->partitioning); -+ tpf_write_dcl_tessellator_output_primitive(tpf, ctx->output_primitive); -+ } -+ else if (version->type == VKD3D_SHADER_TYPE_DOMAIN) -+ { -+ tpf_write_dcl_input_control_point_count(tpf, 0); /* TODO: Obtain from OutputPatch */ -+ tpf_write_dcl_tessellator_domain(tpf, ctx->domain); -+ } -+ - LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) - { - if (cbuffer->reg.allocated) -- write_sm4_dcl_constant_buffer(&tpf, cbuffer); -+ write_sm4_dcl_constant_buffer(tpf, cbuffer); - } - - for (i = 0; i < extern_resources_count; ++i) -@@ -6042,59 +5795,37 @@ static void write_sm4_shdr(struct hlsl_ctx *ctx, - const struct extern_resource *resource = &extern_resources[i]; - - if (resource->regset == HLSL_REGSET_SAMPLERS) -- write_sm4_dcl_samplers(&tpf, resource); -+ write_sm4_dcl_samplers(tpf, resource); - else if (resource->regset == HLSL_REGSET_TEXTURES) -- write_sm4_dcl_textures(&tpf, resource, false); -+ write_sm4_dcl_textures(tpf, resource, false); - else if (resource->regset == HLSL_REGSET_UAVS) -- write_sm4_dcl_textures(&tpf, resource, true); -- } -- -- LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) -- { -- if ((var->is_input_semantic && var->last_read) || (var->is_output_semantic && var->first_write)) -- write_sm4_dcl_semantic(&tpf, var); -+ write_sm4_dcl_textures(tpf, resource, true); - } - -- if (profile->type == VKD3D_SHADER_TYPE_COMPUTE) -- write_sm4_dcl_thread_group(&tpf, ctx->thread_count); -+ if (version->type == VKD3D_SHADER_TYPE_HULL) -+ tpf_write_hs_control_point_phase(tpf); - -- if (ctx->temp_count) -- write_sm4_dcl_temps(&tpf, ctx->temp_count); -+ tpf_write_shader_function(tpf, entry_func); - -- LIST_FOR_EACH_ENTRY(scope, &ctx->scopes, struct hlsl_scope, entry) -+ if (version->type == VKD3D_SHADER_TYPE_HULL) - { -- LIST_FOR_EACH_ENTRY(var, &scope->vars, struct hlsl_ir_var, scope_entry) -- { -- if (var->is_uniform || var->is_input_semantic || var->is_output_semantic) -- continue; -- if (!var->regs[HLSL_REGSET_NUMERIC].allocated) -- continue; -- -- if (var->indexable) -- { -- unsigned int id = var->regs[HLSL_REGSET_NUMERIC].id; -- unsigned int size = align(var->data_type->reg_size[HLSL_REGSET_NUMERIC], 4) / 4; -- -- write_sm4_dcl_indexable_temp(&tpf, id, size, 4); -- } -- } -+ tpf_write_hs_fork_phase(tpf); -+ tpf_write_shader_function(tpf, ctx->patch_constant_func); - } - -- write_sm4_block(&tpf, &entry_func->body); -- -- write_sm4_ret(&tpf); -- - set_u32(&buffer, token_count_position, bytecode_get_size(&buffer) / sizeof(uint32_t)); - -- add_section(ctx, dxbc, TAG_SHDR, &buffer); -+ add_section(ctx, &tpf->dxbc, TAG_SHDR, &buffer); -+ tpf->buffer = NULL; - - sm4_free_extern_resources(extern_resources, extern_resources_count); - } - --static void write_sm4_sfi0(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) -+static void tpf_write_sfi0(struct tpf_compiler *tpf) - { - struct extern_resource *extern_resources; - unsigned int extern_resources_count; -+ struct hlsl_ctx *ctx = tpf->ctx; - uint64_t *flags; - - flags = vkd3d_calloc(1, sizeof(*flags)); -@@ -6110,29 +5841,101 @@ static void write_sm4_sfi0(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) - /* FIXME: We also emit code that should require UAVS_AT_EVERY_STAGE, - * STENCIL_REF, and TYPED_UAV_LOAD_ADDITIONAL_FORMATS. */ - -- if (flags) -- dxbc_writer_add_section(dxbc, TAG_SFI0, flags, sizeof(*flags)); -+ if (*flags) -+ dxbc_writer_add_section(&tpf->dxbc, TAG_SFI0, flags, sizeof(*flags)); - else - vkd3d_free(flags); - } - --int hlsl_sm4_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, struct vkd3d_shader_code *out) -+static void tpf_write_stat(struct tpf_compiler *tpf) - { -- struct dxbc_writer dxbc; -+ struct vkd3d_bytecode_buffer buffer = {0}; -+ const struct sm4_stat *stat = tpf->stat; -+ struct hlsl_ctx *ctx = tpf->ctx; -+ -+ put_u32(&buffer, stat->fields[VKD3D_STAT_INSTR_COUNT]); -+ put_u32(&buffer, stat->fields[VKD3D_STAT_TEMPS]); -+ put_u32(&buffer, 0); /* Def count */ -+ put_u32(&buffer, 0); /* DCL count */ -+ put_u32(&buffer, stat->fields[VKD3D_STAT_FLOAT]); -+ put_u32(&buffer, stat->fields[VKD3D_STAT_INT]); -+ put_u32(&buffer, stat->fields[VKD3D_STAT_UINT]); -+ put_u32(&buffer, 0); /* Static flow control count */ -+ put_u32(&buffer, 0); /* Dynamic flow control count */ -+ put_u32(&buffer, 0); /* Macro instruction count */ -+ put_u32(&buffer, 0); /* Temp array count */ -+ put_u32(&buffer, 0); /* Array instr count */ -+ put_u32(&buffer, stat->fields[VKD3D_STAT_CUT]); -+ put_u32(&buffer, stat->fields[VKD3D_STAT_EMIT]); -+ put_u32(&buffer, stat->fields[VKD3D_STAT_SAMPLE]); -+ put_u32(&buffer, stat->fields[VKD3D_STAT_LOAD]); -+ put_u32(&buffer, stat->fields[VKD3D_STAT_SAMPLE_C]); -+ put_u32(&buffer, stat->fields[VKD3D_STAT_SAMPLE_BIAS]); -+ put_u32(&buffer, stat->fields[VKD3D_STAT_SAMPLE_GRAD]); -+ put_u32(&buffer, stat->fields[VKD3D_STAT_MOV]); -+ put_u32(&buffer, stat->fields[VKD3D_STAT_MOVC]); -+ put_u32(&buffer, stat->fields[VKD3D_STAT_CONV]); -+ put_u32(&buffer, stat->fields[VKD3D_STAT_BITWISE]); -+ put_u32(&buffer, stat->fields[VKD3D_STAT_DCL_INPUT_PRIMITIVE]); -+ put_u32(&buffer, stat->fields[VKD3D_STAT_DCL_OUTPUT_TOPOLOGY]); -+ put_u32(&buffer, stat->fields[VKD3D_STAT_DCL_VERTICES_OUT]); -+ put_u32(&buffer, stat->fields[VKD3D_STAT_GATHER]); -+ put_u32(&buffer, stat->fields[VKD3D_STAT_LOD]); -+ put_u32(&buffer, 0); /* Sample frequency */ -+ -+ if (hlsl_version_ge(ctx, 5, 0)) -+ { -+ put_u32(&buffer, stat->fields[VKD3D_STAT_DCL_GS_INSTANCES]); -+ put_u32(&buffer, stat->fields[VKD3D_STAT_TESS_CONTROL_POINT_COUNT]); -+ put_u32(&buffer, stat->fields[VKD3D_STAT_TESS_OUTPUT_PRIMITIVE]); -+ put_u32(&buffer, stat->fields[VKD3D_STAT_TESS_PARTITIONING]); -+ put_u32(&buffer, stat->fields[VKD3D_STAT_TESS_DOMAIN]); -+ put_u32(&buffer, stat->fields[VKD3D_STAT_BARRIER]); -+ put_u32(&buffer, stat->fields[VKD3D_STAT_ATOMIC]); -+ put_u32(&buffer, stat->fields[VKD3D_STAT_STORE]); -+ } -+ -+ add_section(ctx, &tpf->dxbc, TAG_STAT, &buffer); -+} -+ -+/* OBJECTIVE: Stop relying on ctx and entry_func on this function, receiving -+ * data from the other parameters instead, so they can be removed from the -+ * arguments and this function can be independent of HLSL structs. */ -+int tpf_compile(struct vsir_program *program, uint64_t config_flags, -+ struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context, -+ struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) -+{ -+ enum vkd3d_shader_type shader_type = program->shader_version.type; -+ struct tpf_compiler tpf = {0}; -+ struct sm4_stat stat = {0}; - size_t i; - int ret; - -- dxbc_writer_init(&dxbc); -- -- write_sm4_signature(ctx, &dxbc, false); -- write_sm4_signature(ctx, &dxbc, true); -- write_sm4_rdef(ctx, &dxbc); -- write_sm4_shdr(ctx, entry_func, &dxbc); -- write_sm4_sfi0(ctx, &dxbc); -- -- if (!(ret = ctx->result)) -- ret = dxbc_writer_write(&dxbc, out); -- for (i = 0; i < dxbc.section_count; ++i) -- vkd3d_shader_free_shader_code(&dxbc.sections[i].data); -+ tpf.ctx = ctx; -+ tpf.program = program; -+ tpf.buffer = NULL; -+ tpf.stat = &stat; -+ init_sm4_lookup_tables(&tpf.lookup); -+ dxbc_writer_init(&tpf.dxbc); -+ -+ tpf_write_signature(&tpf, &program->input_signature, TAG_ISGN); -+ tpf_write_signature(&tpf, &program->output_signature, TAG_OSGN); -+ if (shader_type == VKD3D_SHADER_TYPE_HULL || shader_type == VKD3D_SHADER_TYPE_DOMAIN) -+ tpf_write_signature(&tpf, &program->patch_constant_signature, TAG_PCSG); -+ write_sm4_rdef(ctx, &tpf.dxbc); -+ tpf_write_shdr(&tpf, entry_func); -+ tpf_write_sfi0(&tpf); -+ tpf_write_stat(&tpf); -+ -+ ret = VKD3D_OK; -+ if (ctx->result) -+ ret = ctx->result; -+ if (tpf.result) -+ ret = tpf.result; -+ -+ if (!ret) -+ ret = dxbc_writer_write(&tpf.dxbc, out); -+ for (i = 0; i < tpf.dxbc.section_count; ++i) -+ vkd3d_shader_free_shader_code(&tpf.dxbc.sections[i].data); - return ret; - } -diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c -index 306c1ca0dd8..db61eec8f28 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c -+++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c -@@ -23,6 +23,8 @@ - #include <stdio.h> - #include <math.h> - -+/* VKD3D_DEBUG_ENV_NAME("VKD3D_SHADER_DEBUG"); */ -+ - static inline int char_to_int(char c) - { - if ('0' <= c && c <= '9') -@@ -443,30 +445,57 @@ void set_string(struct vkd3d_bytecode_buffer *buffer, size_t offset, const char - bytecode_set_bytes(buffer, offset, string, length); - } - --static void vkd3d_shader_dump_blob(const char *path, const char *profile, -- const char *suffix, const void *data, size_t size) -+struct shader_dump_data - { -- static unsigned int shader_id = 0; -+ uint8_t checksum[16]; -+ const char *path; -+ const char *profile; -+ const char *source_suffix; -+ const char *target_suffix; -+}; -+ -+static void vkd3d_shader_dump_shader(const struct shader_dump_data *dump_data, -+ const void *data, size_t size, bool source) -+{ -+ static const char hexadecimal_digits[] = "0123456789abcdef"; -+ const uint8_t *checksum = dump_data->checksum; -+ char str_checksum[33]; -+ unsigned int pos = 0; - char filename[1024]; -- unsigned int id; -+ unsigned int i; - FILE *f; - -- id = vkd3d_atomic_increment_u32(&shader_id) - 1; -+ if (!dump_data->path) -+ return; -+ -+ for (i = 0; i < ARRAY_SIZE(dump_data->checksum); ++i) -+ { -+ str_checksum[2 * i] = hexadecimal_digits[checksum[i] >> 4]; -+ str_checksum[2 * i + 1] = hexadecimal_digits[checksum[i] & 0xf]; -+ } -+ str_checksum[32] = '\0'; -+ -+ pos = snprintf(filename, ARRAY_SIZE(filename), "%s/vkd3d-shader-%s", dump_data->path, str_checksum); - -- if (profile) -- snprintf(filename, ARRAY_SIZE(filename), "%s/vkd3d-shader-%u-%s.%s", path, id, profile, suffix); -+ if (dump_data->profile) -+ pos += snprintf(filename + pos, ARRAY_SIZE(filename) - pos, "-%s", dump_data->profile); -+ -+ if (source) -+ pos += snprintf(filename + pos, ARRAY_SIZE(filename) - pos, "-source.%s", dump_data->source_suffix); - else -- snprintf(filename, ARRAY_SIZE(filename), "%s/vkd3d-shader-%u.%s", path, id, suffix); -+ pos += snprintf(filename + pos, ARRAY_SIZE(filename) - pos, "-target.%s", dump_data->target_suffix); -+ -+ TRACE("Dumping shader to \"%s\".\n", filename); - if ((f = fopen(filename, "wb"))) - { - if (fwrite(data, 1, size, f) != size) -- ERR("Failed to write shader to %s.\n", filename); -+ WARN("Failed to write shader to %s.\n", filename); - if (fclose(f)) -- ERR("Failed to close stream %s.\n", filename); -+ WARN("Failed to close stream %s.\n", filename); - } - else - { -- ERR("Failed to open %s for dumping shader.\n", filename); -+ WARN("Failed to open %s for dumping shader.\n", filename); - } - } - -@@ -488,37 +517,61 @@ static const char *shader_get_source_type_suffix(enum vkd3d_shader_source_type t - } - } - --void vkd3d_shader_dump_shader(const struct vkd3d_shader_compile_info *compile_info) -+static const char *shader_get_target_type_suffix(enum vkd3d_shader_target_type type) -+{ -+ switch (type) -+ { -+ case VKD3D_SHADER_TARGET_SPIRV_BINARY: -+ return "spv"; -+ case VKD3D_SHADER_TARGET_SPIRV_TEXT: -+ return "spv.s"; -+ case VKD3D_SHADER_TARGET_D3D_ASM: -+ return "d3d.s"; -+ case VKD3D_SHADER_TARGET_D3D_BYTECODE: -+ return "d3dbc"; -+ case VKD3D_SHADER_TARGET_DXBC_TPF: -+ return "dxbc"; -+ case VKD3D_SHADER_TARGET_GLSL: -+ return "glsl"; -+ case VKD3D_SHADER_TARGET_FX: -+ return "fx"; -+ case VKD3D_SHADER_TARGET_MSL: -+ return "msl"; -+ default: -+ FIXME("Unhandled target type %#x.\n", type); -+ return "bin"; -+ } -+} -+ -+static void fill_shader_dump_data(const struct vkd3d_shader_compile_info *compile_info, -+ struct shader_dump_data *data) - { -- const struct vkd3d_shader_code *shader = &compile_info->source; -- const struct vkd3d_shader_hlsl_source_info *hlsl_source_info; -- const struct hlsl_profile_info *profile; -- const char *profile_name = NULL; - static bool enabled = true; -- const char *path; -+ -+ data->path = NULL; - - if (!enabled) - return; - -- if (!(path = getenv("VKD3D_SHADER_DUMP_PATH"))) -+ if (!(data->path = getenv("VKD3D_SHADER_DUMP_PATH"))) - { - enabled = false; - return; - } - -+ data->profile = NULL; - if (compile_info->source_type == VKD3D_SHADER_SOURCE_HLSL) - { -- if (!(hlsl_source_info = vkd3d_find_struct(compile_info->next, HLSL_SOURCE_INFO))) -- return; -- -- if (!(profile = hlsl_get_target_info(hlsl_source_info->profile))) -- return; -+ const struct vkd3d_shader_hlsl_source_info *hlsl_source_info; - -- profile_name = profile->name; -+ if ((hlsl_source_info = vkd3d_find_struct(compile_info->next, HLSL_SOURCE_INFO))) -+ data->profile = hlsl_source_info->profile; - } - -- vkd3d_shader_dump_blob(path, profile_name, shader_get_source_type_suffix(compile_info->source_type), -- shader->code, shader->size); -+ vkd3d_compute_md5(compile_info->source.code, compile_info->source.size, -+ (uint32_t *)data->checksum, VKD3D_MD5_STANDARD); -+ data->source_suffix = shader_get_source_type_suffix(compile_info->source_type); -+ data->target_suffix = shader_get_target_type_suffix(compile_info->target_type); - } - - static void init_scan_signature_info(const struct vkd3d_shader_compile_info *info) -@@ -627,6 +680,53 @@ static int vkd3d_shader_validate_compile_info(const struct vkd3d_shader_compile_ - return VKD3D_OK; - } - -+static enum vkd3d_result vsir_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t config_flags, -+ struct vkd3d_shader_message_context *message_context, struct vsir_program *program) -+{ -+ enum vkd3d_result ret; -+ -+ switch (compile_info->source_type) -+ { -+ case VKD3D_SHADER_SOURCE_D3D_BYTECODE: -+ ret = d3dbc_parse(compile_info, config_flags, message_context, program); -+ break; -+ -+ case VKD3D_SHADER_SOURCE_DXBC_TPF: -+ ret = tpf_parse(compile_info, config_flags, message_context, program); -+ break; -+ -+ case VKD3D_SHADER_SOURCE_DXBC_DXIL: -+ ret = dxil_parse(compile_info, config_flags, message_context, program); -+ break; -+ -+ default: -+ ERR("Unsupported source type %#x.\n", compile_info->source_type); -+ ret = VKD3D_ERROR_INVALID_ARGUMENT; -+ break; -+ } -+ -+ if (ret < 0) -+ { -+ WARN("Failed to parse shader.\n"); -+ return ret; -+ } -+ -+ if ((ret = vsir_program_validate(program, config_flags, compile_info->source_name, message_context)) < 0) -+ { -+ WARN("Failed to validate shader after parsing, ret %d.\n", ret); -+ -+ if (TRACE_ON()) -+ vsir_program_trace(program); -+ -+ vsir_program_cleanup(program); -+ return ret; -+ } -+ -+ if (compile_info->target_type != VKD3D_SHADER_TARGET_NONE) -+ ret = vsir_program_transform_early(program, config_flags, compile_info, message_context); -+ return ret; -+} -+ - void vkd3d_shader_free_messages(char *messages) - { - TRACE("messages %p.\n", messages); -@@ -707,6 +807,9 @@ struct vkd3d_shader_scan_context - - struct vkd3d_shader_scan_combined_resource_sampler_info *combined_sampler_info; - size_t combined_samplers_size; -+ -+ enum vkd3d_shader_tessellator_output_primitive output_primitive; -+ enum vkd3d_shader_tessellator_partitioning partitioning; - }; - - static VKD3D_PRINTF_FUNC(3, 4) void vkd3d_shader_scan_error(struct vkd3d_shader_scan_context *context, -@@ -1164,6 +1267,12 @@ static int vkd3d_shader_scan_instruction(struct vkd3d_shader_scan_context *conte - VKD3D_SHADER_RESOURCE_BUFFER, VKD3D_SHADER_RESOURCE_DATA_UINT, 0, - instruction->declaration.structured_resource.byte_stride, false, instruction->flags); - break; -+ case VKD3DSIH_DCL_TESSELLATOR_OUTPUT_PRIMITIVE: -+ context->output_primitive = instruction->declaration.tessellator_output_primitive; -+ break; -+ case VKD3DSIH_DCL_TESSELLATOR_PARTITIONING: -+ context->partitioning = instruction->declaration.tessellator_partitioning; -+ break; - case VKD3DSIH_IF: - case VKD3DSIH_IFC: - cf_info = vkd3d_shader_scan_push_cf_info(context); -@@ -1404,6 +1513,7 @@ static int vsir_program_scan(struct vsir_program *program, const struct vkd3d_sh - struct vkd3d_shader_scan_descriptor_info1 *descriptor_info1) - { - struct vkd3d_shader_scan_combined_resource_sampler_info *combined_sampler_info; -+ struct vkd3d_shader_scan_hull_shader_tessellation_info *tessellation_info; - struct vkd3d_shader_scan_descriptor_info1 local_descriptor_info1 = {0}; - struct vkd3d_shader_scan_descriptor_info *descriptor_info; - struct vkd3d_shader_scan_signature_info *signature_info; -@@ -1432,11 +1542,13 @@ static int vsir_program_scan(struct vsir_program *program, const struct vkd3d_sh - descriptor_info1 = &local_descriptor_info1; - } - -+ tessellation_info = vkd3d_find_struct(compile_info->next, SCAN_HULL_SHADER_TESSELLATION_INFO); -+ - vkd3d_shader_scan_context_init(&context, &program->shader_version, compile_info, - descriptor_info1, combined_sampler_info, message_context); - - if (TRACE_ON()) -- vkd3d_shader_trace(program); -+ vsir_program_trace(program); - - for (i = 0; i < program->instructions.count; ++i) - { -@@ -1475,6 +1587,12 @@ static int vsir_program_scan(struct vsir_program *program, const struct vkd3d_sh - if (!ret && descriptor_info) - ret = convert_descriptor_info(descriptor_info, descriptor_info1); - -+ if (!ret && tessellation_info) -+ { -+ tessellation_info->output_primitive = context.output_primitive; -+ tessellation_info->partitioning = context.partitioning; -+ } -+ - if (ret < 0) - { - if (combined_sampler_info) -@@ -1497,6 +1615,7 @@ static int vsir_program_scan(struct vsir_program *program, const struct vkd3d_sh - int vkd3d_shader_scan(const struct vkd3d_shader_compile_info *compile_info, char **messages) - { - struct vkd3d_shader_message_context message_context; -+ struct shader_dump_data dump_data; - int ret; - - TRACE("compile_info %p, messages %p.\n", compile_info, messages); -@@ -1511,7 +1630,8 @@ int vkd3d_shader_scan(const struct vkd3d_shader_compile_info *compile_info, char - - vkd3d_shader_message_context_init(&message_context, compile_info->log_level); - -- vkd3d_shader_dump_shader(compile_info); -+ fill_shader_dump_data(compile_info, &dump_data); -+ vkd3d_shader_dump_shader(&dump_data, compile_info->source.code, compile_info->source.size, true); - - if (compile_info->source_type == VKD3D_SHADER_SOURCE_HLSL) - { -@@ -1523,31 +1643,7 @@ int vkd3d_shader_scan(const struct vkd3d_shader_compile_info *compile_info, char - uint64_t config_flags = vkd3d_shader_init_config_flags(); - struct vsir_program program; - -- switch (compile_info->source_type) -- { -- case VKD3D_SHADER_SOURCE_D3D_BYTECODE: -- ret = d3dbc_parse(compile_info, config_flags, &message_context, &program); -- break; -- -- case VKD3D_SHADER_SOURCE_DXBC_TPF: -- ret = tpf_parse(compile_info, config_flags, &message_context, &program); -- break; -- -- case VKD3D_SHADER_SOURCE_DXBC_DXIL: -- ret = dxil_parse(compile_info, config_flags, &message_context, &program); -- break; -- -- default: -- ERR("Unsupported source type %#x.\n", compile_info->source_type); -- ret = VKD3D_ERROR_INVALID_ARGUMENT; -- break; -- } -- -- if (ret < 0) -- { -- WARN("Failed to parse shader.\n"); -- } -- else -+ if (!(ret = vsir_parse(compile_info, config_flags, &message_context, &program))) - { - ret = vsir_program_scan(&program, compile_info, &message_context, NULL); - vsir_program_cleanup(&program); -@@ -1565,6 +1661,7 @@ int vsir_program_compile(struct vsir_program *program, uint64_t config_flags, - const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *out, - struct vkd3d_shader_message_context *message_context) - { -+ struct vkd3d_shader_scan_combined_resource_sampler_info combined_sampler_info; - struct vkd3d_shader_scan_descriptor_info1 scan_descriptor_info; - struct vkd3d_shader_compile_info scan_info; - int ret; -@@ -1578,9 +1675,14 @@ int vsir_program_compile(struct vsir_program *program, uint64_t config_flags, - break; - - case VKD3D_SHADER_TARGET_GLSL: -+ combined_sampler_info.type = VKD3D_SHADER_STRUCTURE_TYPE_SCAN_COMBINED_RESOURCE_SAMPLER_INFO; -+ combined_sampler_info.next = scan_info.next; -+ scan_info.next = &combined_sampler_info; - if ((ret = vsir_program_scan(program, &scan_info, message_context, &scan_descriptor_info)) < 0) - return ret; -- ret = glsl_compile(program, config_flags, compile_info, out, message_context); -+ ret = glsl_compile(program, config_flags, &scan_descriptor_info, -+ &combined_sampler_info, compile_info, out, message_context); -+ vkd3d_shader_free_scan_combined_resource_sampler_info(&combined_sampler_info); - vkd3d_shader_free_scan_descriptor_info1(&scan_descriptor_info); - break; - -@@ -1593,6 +1695,13 @@ int vsir_program_compile(struct vsir_program *program, uint64_t config_flags, - vkd3d_shader_free_scan_descriptor_info1(&scan_descriptor_info); - break; - -+ case VKD3D_SHADER_TARGET_MSL: -+ if ((ret = vsir_program_scan(program, &scan_info, message_context, &scan_descriptor_info)) < 0) -+ return ret; -+ ret = msl_compile(program, config_flags, &scan_descriptor_info, compile_info, out, message_context); -+ vkd3d_shader_free_scan_descriptor_info1(&scan_descriptor_info); -+ break; -+ - default: - /* Validation should prevent us from reaching this. */ - vkd3d_unreachable(); -@@ -1620,6 +1729,7 @@ int vkd3d_shader_compile(const struct vkd3d_shader_compile_info *compile_info, - struct vkd3d_shader_code *out, char **messages) - { - struct vkd3d_shader_message_context message_context; -+ struct shader_dump_data dump_data; - int ret; - - TRACE("compile_info %p, out %p, messages %p.\n", compile_info, out, messages); -@@ -1634,48 +1744,32 @@ int vkd3d_shader_compile(const struct vkd3d_shader_compile_info *compile_info, - - vkd3d_shader_message_context_init(&message_context, compile_info->log_level); - -- vkd3d_shader_dump_shader(compile_info); -+ fill_shader_dump_data(compile_info, &dump_data); -+ vkd3d_shader_dump_shader(&dump_data, compile_info->source.code, compile_info->source.size, true); - - if (compile_info->source_type == VKD3D_SHADER_SOURCE_HLSL) - { - ret = compile_hlsl(compile_info, out, &message_context); - } -+ else if (compile_info->source_type == VKD3D_SHADER_SOURCE_FX) -+ { -+ ret = fx_parse(compile_info, out, &message_context); -+ } - else - { - uint64_t config_flags = vkd3d_shader_init_config_flags(); - struct vsir_program program; - -- switch (compile_info->source_type) -- { -- case VKD3D_SHADER_SOURCE_D3D_BYTECODE: -- ret = d3dbc_parse(compile_info, config_flags, &message_context, &program); -- break; -- -- case VKD3D_SHADER_SOURCE_DXBC_TPF: -- ret = tpf_parse(compile_info, config_flags, &message_context, &program); -- break; -- -- case VKD3D_SHADER_SOURCE_DXBC_DXIL: -- ret = dxil_parse(compile_info, config_flags, &message_context, &program); -- break; -- -- default: -- ERR("Unsupported source type %#x.\n", compile_info->source_type); -- ret = VKD3D_ERROR_INVALID_ARGUMENT; -- break; -- } -- -- if (ret < 0) -- { -- WARN("Failed to parse shader.\n"); -- } -- else -+ if (!(ret = vsir_parse(compile_info, config_flags, &message_context, &program))) - { - ret = vsir_program_compile(&program, config_flags, compile_info, out, &message_context); - vsir_program_cleanup(&program); - } - } - -+ if (ret >= 0) -+ vkd3d_shader_dump_shader(&dump_data, out->code, out->size, false); -+ - vkd3d_shader_message_context_trace_messages(&message_context); - if (!vkd3d_shader_message_context_copy_messages(&message_context, messages)) - ret = VKD3D_ERROR_OUT_OF_MEMORY; -@@ -1777,6 +1871,8 @@ void shader_signature_cleanup(struct shader_signature *signature) - } - vkd3d_free(signature->elements); - signature->elements = NULL; -+ signature->elements_capacity = 0; -+ signature->element_count = 0; - } - - int vkd3d_shader_parse_input_signature(const struct vkd3d_shader_code *dxbc, -@@ -1868,6 +1964,7 @@ const enum vkd3d_shader_source_type *vkd3d_shader_get_supported_source_types(uns - #ifdef VKD3D_SHADER_UNSUPPORTED_DXIL - VKD3D_SHADER_SOURCE_DXBC_DXIL, - #endif -+ VKD3D_SHADER_SOURCE_FX, - }; - - TRACE("count %p.\n", count); -@@ -1888,6 +1985,9 @@ const enum vkd3d_shader_target_type *vkd3d_shader_get_supported_target_types( - VKD3D_SHADER_TARGET_D3D_ASM, - #ifdef VKD3D_SHADER_UNSUPPORTED_GLSL - VKD3D_SHADER_TARGET_GLSL, -+#endif -+#ifdef VKD3D_SHADER_UNSUPPORTED_MSL -+ VKD3D_SHADER_TARGET_MSL, - #endif - }; - -@@ -1923,6 +2023,11 @@ const enum vkd3d_shader_target_type *vkd3d_shader_get_supported_target_types( - }; - #endif - -+ static const enum vkd3d_shader_target_type fx_types[] = -+ { -+ VKD3D_SHADER_TARGET_D3D_ASM, -+ }; -+ - TRACE("source_type %#x, count %p.\n", source_type, count); - - switch (source_type) -@@ -1945,6 +2050,10 @@ const enum vkd3d_shader_target_type *vkd3d_shader_get_supported_target_types( - return dxbc_dxil_types; - #endif - -+ case VKD3D_SHADER_SOURCE_FX: -+ *count = ARRAY_SIZE(fx_types); -+ return fx_types; -+ - default: - *count = 0; - return NULL; -diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -index ef66a8ca07a..ad04972b3fb 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -+++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -@@ -59,6 +59,9 @@ - #define VKD3D_VEC4_SIZE 4 - #define VKD3D_DVEC2_SIZE 2 - -+#define VKD3D_SHADER_COMPONENT_TYPE_COUNT (VKD3D_SHADER_COMPONENT_UINT64 + 1) -+#define VKD3D_SHADER_MINIMUM_PRECISION_COUNT (VKD3D_SHADER_MINIMUM_PRECISION_UINT_16 + 1) -+ - enum vkd3d_shader_error - { - VKD3D_SHADER_ERROR_DXBC_INVALID_SIZE = 1, -@@ -80,6 +83,7 @@ enum vkd3d_shader_error - VKD3D_SHADER_ERROR_TPF_INVALID_CASE_VALUE = 1007, - VKD3D_SHADER_ERROR_TPF_INVALID_REGISTER_DIMENSION = 1008, - VKD3D_SHADER_ERROR_TPF_INVALID_REGISTER_SWIZZLE = 1009, -+ VKD3D_SHADER_ERROR_TPF_INVALID_REGISTER_DCL = 1010, - - VKD3D_SHADER_WARNING_TPF_MASK_NOT_CONTIGUOUS = 1300, - VKD3D_SHADER_WARNING_TPF_UNHANDLED_INDEX_RANGE_MASK = 1301, -@@ -152,6 +156,13 @@ enum vkd3d_shader_error - VKD3D_SHADER_ERROR_HLSL_UNKNOWN_MODIFIER = 5030, - VKD3D_SHADER_ERROR_HLSL_INVALID_STATE_BLOCK_ENTRY = 5031, - VKD3D_SHADER_ERROR_HLSL_FAILED_FORCED_UNROLL = 5032, -+ VKD3D_SHADER_ERROR_HLSL_INVALID_PROFILE = 5033, -+ VKD3D_SHADER_ERROR_HLSL_MISPLACED_COMPILE = 5034, -+ VKD3D_SHADER_ERROR_HLSL_INVALID_DOMAIN = 5035, -+ VKD3D_SHADER_ERROR_HLSL_INVALID_CONTROL_POINT_COUNT = 5036, -+ VKD3D_SHADER_ERROR_HLSL_INVALID_OUTPUT_PRIMITIVE = 5037, -+ VKD3D_SHADER_ERROR_HLSL_INVALID_PARTITIONING = 5038, -+ VKD3D_SHADER_ERROR_HLSL_MISPLACED_SAMPLER_STATE = 5039, - - VKD3D_SHADER_WARNING_HLSL_IMPLICIT_TRUNCATION = 5300, - VKD3D_SHADER_WARNING_HLSL_DIVISION_BY_ZERO = 5301, -@@ -159,8 +170,11 @@ enum vkd3d_shader_error - VKD3D_SHADER_WARNING_HLSL_IMAGINARY_NUMERIC_RESULT = 5303, - VKD3D_SHADER_WARNING_HLSL_NON_FINITE_RESULT = 5304, - VKD3D_SHADER_WARNING_HLSL_IGNORED_ATTRIBUTE = 5305, -+ VKD3D_SHADER_WARNING_HLSL_IGNORED_DEFAULT_VALUE = 5306, - - VKD3D_SHADER_ERROR_GLSL_INTERNAL = 6000, -+ VKD3D_SHADER_ERROR_GLSL_BINDING_NOT_FOUND = 6001, -+ VKD3D_SHADER_ERROR_GLSL_UNSUPPORTED = 6002, - - VKD3D_SHADER_ERROR_D3DBC_UNEXPECTED_EOF = 7000, - VKD3D_SHADER_ERROR_D3DBC_INVALID_VERSION_TOKEN = 7001, -@@ -169,6 +183,11 @@ enum vkd3d_shader_error - VKD3D_SHADER_ERROR_D3DBC_OUT_OF_MEMORY = 7004, - VKD3D_SHADER_ERROR_D3DBC_INVALID_REGISTER_INDEX = 7005, - VKD3D_SHADER_ERROR_D3DBC_UNDECLARED_SEMANTIC = 7006, -+ VKD3D_SHADER_ERROR_D3DBC_INVALID_REGISTER_TYPE = 7007, -+ VKD3D_SHADER_ERROR_D3DBC_INVALID_REGISTER_COUNT = 7008, -+ VKD3D_SHADER_ERROR_D3DBC_NOT_IMPLEMENTED = 7009, -+ VKD3D_SHADER_ERROR_D3DBC_INVALID_PROFILE = 7010, -+ VKD3D_SHADER_ERROR_D3DBC_INVALID_WRITEMASK = 7011, - - VKD3D_SHADER_WARNING_D3DBC_IGNORED_INSTRUCTION_FLAGS= 7300, - -@@ -225,8 +244,18 @@ enum vkd3d_shader_error - VKD3D_SHADER_ERROR_VSIR_INVALID_SSA_USAGE = 9017, - VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION = 9018, - VKD3D_SHADER_ERROR_VSIR_INVALID_GS = 9019, -+ VKD3D_SHADER_ERROR_VSIR_INVALID_PARAMETER = 9020, -+ VKD3D_SHADER_ERROR_VSIR_MISSING_SEMANTIC = 9021, -+ VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE = 9022, - - VKD3D_SHADER_WARNING_VSIR_DYNAMIC_DESCRIPTOR_ARRAY = 9300, -+ -+ VKD3D_SHADER_ERROR_MSL_INTERNAL = 10000, -+ VKD3D_SHADER_ERROR_MSL_BINDING_NOT_FOUND = 10001, -+ -+ VKD3D_SHADER_ERROR_FX_NOT_IMPLEMENTED = 11000, -+ VKD3D_SHADER_ERROR_FX_INVALID_VERSION = 11001, -+ VKD3D_SHADER_ERROR_FX_INVALID_DATA = 11002, - }; - - enum vkd3d_shader_opcode -@@ -556,6 +585,8 @@ enum vkd3d_shader_opcode - VKD3DSIH_XOR, - - VKD3DSIH_INVALID, -+ -+ VKD3DSIH_COUNT, - }; - - enum vkd3d_shader_register_type -@@ -619,12 +650,20 @@ enum vkd3d_shader_register_type - VKD3DSPR_WAVELANECOUNT, - VKD3DSPR_WAVELANEINDEX, - VKD3DSPR_PARAMETER, -+ VKD3DSPR_POINT_COORD, - - VKD3DSPR_COUNT, - - VKD3DSPR_INVALID = ~0u, - }; - -+enum vsir_rastout_register -+{ -+ VSIR_RASTOUT_POSITION = 0x0, -+ VSIR_RASTOUT_FOG = 0x1, -+ VSIR_RASTOUT_POINT_SIZE = 0x2, -+}; -+ - enum vkd3d_shader_register_precision - { - VKD3D_SHADER_REGISTER_PRECISION_DEFAULT, -@@ -642,9 +681,6 @@ enum vkd3d_data_type - { - VKD3D_DATA_FLOAT, - VKD3D_DATA_INT, -- VKD3D_DATA_RESOURCE, -- VKD3D_DATA_SAMPLER, -- VKD3D_DATA_UAV, - VKD3D_DATA_UINT, - VKD3D_DATA_UNORM, - VKD3D_DATA_SNORM, -@@ -740,7 +776,7 @@ enum vkd3d_shader_interpolation_mode - VKD3DSIM_COUNT = 8, - }; - --enum vkd3d_shader_global_flags -+enum vsir_global_flags - { - VKD3DSGF_REFACTORING_ALLOWED = 0x01, - VKD3DSGF_ENABLE_DOUBLE_PRECISION_FLOAT_OPS = 0x02, -@@ -1042,6 +1078,9 @@ enum vkd3d_shader_input_sysval_semantic - - struct signature_element - { -+ /* sort_index is not a property of the signature element, it is just a -+ * convenience field used to retain the original order in a signature and -+ * recover it after having permuted the signature itself. */ - unsigned int sort_index; - const char *semantic_name; - unsigned int semantic_index; -@@ -1080,6 +1119,8 @@ static inline bool vsir_sysval_semantic_is_clip_cull(enum vkd3d_shader_sysval_se - - struct signature_element *vsir_signature_find_element_for_reg(const struct shader_signature *signature, - unsigned int reg_idx, unsigned int write_mask); -+bool vsir_signature_find_sysval(const struct shader_signature *signature, -+ enum vkd3d_shader_sysval_semantic sysval, unsigned int semantic_index, unsigned int *element_index); - void shader_signature_cleanup(struct shader_signature *signature); - - struct dxbc_shader_desc -@@ -1145,7 +1186,7 @@ struct vkd3d_shader_tgsm_structured - bool zero_init; - }; - --struct vkd3d_shader_thread_group_size -+struct vsir_thread_group_size - { - unsigned int x, y, z; - }; -@@ -1210,7 +1251,7 @@ struct vkd3d_shader_instruction - const struct vkd3d_shader_src_param *predicate; - union - { -- enum vkd3d_shader_global_flags global_flags; -+ enum vsir_global_flags global_flags; - struct vkd3d_shader_semantic semantic; - struct vkd3d_shader_register_semantic register_semantic; - struct vkd3d_shader_primitive_type primitive_type; -@@ -1224,7 +1265,7 @@ struct vkd3d_shader_instruction - struct vkd3d_shader_structured_resource structured_resource; - struct vkd3d_shader_tgsm_raw tgsm_raw; - struct vkd3d_shader_tgsm_structured tgsm_structured; -- struct vkd3d_shader_thread_group_size thread_group_size; -+ struct vsir_thread_group_size thread_group_size; - enum vkd3d_tessellator_domain tessellator_domain; - enum vkd3d_shader_tessellator_output_primitive tessellator_output_primitive; - enum vkd3d_shader_tessellator_partitioning tessellator_partitioning; -@@ -1344,8 +1385,6 @@ bool shader_instruction_array_add_icb(struct vkd3d_shader_instruction_array *ins - struct vkd3d_shader_immediate_constant_buffer *icb); - bool shader_instruction_array_clone_instruction(struct vkd3d_shader_instruction_array *instructions, - unsigned int dst, unsigned int src); --struct vkd3d_shader_src_param *instruction_array_create_outpointid_param( -- struct vkd3d_shader_instruction_array *instructions); - void shader_instruction_array_destroy(struct vkd3d_shader_instruction_array *instructions); - - enum vkd3d_shader_config_flags -@@ -1353,6 +1392,19 @@ enum vkd3d_shader_config_flags - VKD3D_SHADER_CONFIG_FLAG_FORCE_VALIDATION = 0x00000001, - }; - -+enum vsir_control_flow_type -+{ -+ VSIR_CF_STRUCTURED, -+ VSIR_CF_BLOCKS, -+}; -+ -+enum vsir_normalisation_level -+{ -+ VSIR_NOT_NORMALISED, -+ VSIR_NORMALISED_HULL_CONTROL_POINT_IO, -+ VSIR_FULLY_NORMALISED_IO, -+}; -+ - struct vsir_program - { - struct vkd3d_shader_version shader_version; -@@ -1367,11 +1419,19 @@ struct vsir_program - bool free_parameters; - - unsigned int input_control_point_count, output_control_point_count; -+ struct vsir_thread_group_size thread_group_size; - unsigned int flat_constant_count[3]; - unsigned int block_count; - unsigned int temp_count; - unsigned int ssa_count; -+ enum vsir_global_flags global_flags; - bool use_vocp; -+ bool has_point_size; -+ bool has_point_coord; -+ uint8_t diffuse_written_mask; -+ enum vsir_control_flow_type cf_type; -+ enum vsir_normalisation_level normalisation_level; -+ enum vkd3d_tessellator_domain tess_domain; - - const char **block_names; - size_t block_name_count; -@@ -1384,11 +1444,19 @@ int vsir_program_compile(struct vsir_program *program, uint64_t config_flags, - const struct vkd3d_shader_parameter1 *vsir_program_get_parameter( - const struct vsir_program *program, enum vkd3d_shader_parameter_name name); - bool vsir_program_init(struct vsir_program *program, const struct vkd3d_shader_compile_info *compile_info, -- const struct vkd3d_shader_version *version, unsigned int reserve); --enum vkd3d_result vsir_program_normalise(struct vsir_program *program, uint64_t config_flags, -+ const struct vkd3d_shader_version *version, unsigned int reserve, enum vsir_control_flow_type cf_type, -+ enum vsir_normalisation_level normalisation_level); -+enum vkd3d_result vsir_program_transform(struct vsir_program *program, uint64_t config_flags, -+ const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context); -+enum vkd3d_result vsir_program_transform_early(struct vsir_program *program, uint64_t config_flags, - const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context); - enum vkd3d_result vsir_program_validate(struct vsir_program *program, uint64_t config_flags, - const char *source_name, struct vkd3d_shader_message_context *message_context); -+struct vkd3d_shader_src_param *vsir_program_create_outpointid_param( -+ struct vsir_program *program); -+bool vsir_instruction_init_with_params(struct vsir_program *program, -+ struct vkd3d_shader_instruction *ins, const struct vkd3d_shader_location *location, -+ enum vkd3d_shader_opcode opcode, unsigned int dst_count, unsigned int src_count); - - static inline struct vkd3d_shader_dst_param *vsir_program_get_dst_params( - struct vsir_program *program, unsigned int count) -@@ -1417,12 +1485,6 @@ void vkd3d_shader_parser_init(struct vkd3d_shader_parser *parser, struct vsir_pr - void vkd3d_shader_parser_warning(struct vkd3d_shader_parser *parser, - enum vkd3d_shader_error error, const char *format, ...) VKD3D_PRINTF_FUNC(3, 4); - --static inline enum vkd3d_result vkd3d_shader_parser_validate(struct vkd3d_shader_parser *parser, uint64_t config_flags) --{ -- return vsir_program_validate(parser->program, config_flags, -- parser->location.source_name, parser->message_context); --} -- - struct vkd3d_shader_descriptor_info1 - { - enum vkd3d_shader_descriptor_type type; -@@ -1445,7 +1507,7 @@ struct vkd3d_shader_scan_descriptor_info1 - unsigned int descriptor_count; - }; - --void vkd3d_shader_trace(const struct vsir_program *program); -+void vsir_program_trace(const struct vsir_program *program); - - const char *shader_get_type_prefix(enum vkd3d_shader_type type); - -@@ -1465,6 +1527,7 @@ enum vsir_asm_flags - { - VSIR_ASM_FLAG_NONE = 0, - VSIR_ASM_FLAG_DUMP_TYPES = 0x1, -+ VSIR_ASM_FLAG_DUMP_ALL_INDICES = 0x2, - }; - - enum vkd3d_result d3d_asm_compile(const struct vsir_program *program, -@@ -1549,18 +1612,30 @@ void vkd3d_shader_warning(struct vkd3d_shader_message_context *context, const st - void vkd3d_shader_vwarning(struct vkd3d_shader_message_context *context, const struct vkd3d_shader_location *location, - enum vkd3d_shader_error error, const char *format, va_list args); - --void vkd3d_shader_dump_shader(const struct vkd3d_shader_compile_info *compile_info); - uint64_t vkd3d_shader_init_config_flags(void); - void vkd3d_shader_trace_text_(const char *text, size_t size, const char *function); - #define vkd3d_shader_trace_text(text, size) \ - vkd3d_shader_trace_text_(text, size, __FUNCTION__) - -+bool sm1_register_from_semantic_name(const struct vkd3d_shader_version *version, const char *semantic_name, -+ unsigned int semantic_index, bool output, enum vkd3d_shader_register_type *type, unsigned int *reg); -+bool sm1_usage_from_semantic_name(const char *semantic_name, -+ uint32_t semantic_index, enum vkd3d_decl_usage *usage, uint32_t *usage_idx); -+bool sm4_register_from_semantic_name(const struct vkd3d_shader_version *version, -+ const char *semantic_name, bool output, enum vkd3d_shader_register_type *type, bool *has_idx); -+bool shader_sm4_is_scalar_register(const struct vkd3d_shader_register *reg); -+bool sm4_sysval_semantic_from_semantic_name(enum vkd3d_shader_sysval_semantic *sysval_semantic, -+ const struct vkd3d_shader_version *version, bool semantic_compat_mapping, enum vkd3d_tessellator_domain domain, -+ const char *semantic_name, unsigned int semantic_idx, bool output, bool is_patch_constant_func); -+ - int d3dbc_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t config_flags, - struct vkd3d_shader_message_context *message_context, struct vsir_program *program); - int dxil_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t config_flags, - struct vkd3d_shader_message_context *message_context, struct vsir_program *program); - int tpf_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t config_flags, - struct vkd3d_shader_message_context *message_context, struct vsir_program *program); -+int fx_parse(const struct vkd3d_shader_compile_info *compile_info, -+ struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context); - - void free_dxbc_shader_desc(struct dxbc_shader_desc *desc); - -@@ -1570,8 +1645,10 @@ int shader_parse_input_signature(const struct vkd3d_shader_code *dxbc, - struct vkd3d_shader_message_context *message_context, struct shader_signature *signature); - - int glsl_compile(struct vsir_program *program, uint64_t config_flags, -- const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *out, -- struct vkd3d_shader_message_context *message_context); -+ const struct vkd3d_shader_scan_descriptor_info1 *descriptor_info, -+ const struct vkd3d_shader_scan_combined_resource_sampler_info *combined_sampler_info, -+ const struct vkd3d_shader_compile_info *compile_info, -+ struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context); - - #define SPIRV_MAX_SRC_COUNT 6 - -@@ -1580,7 +1657,18 @@ int spirv_compile(struct vsir_program *program, uint64_t config_flags, - const struct vkd3d_shader_compile_info *compile_info, - struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context); - --void vkd3d_compute_dxbc_checksum(const void *dxbc, size_t size, uint32_t checksum[4]); -+int msl_compile(struct vsir_program *program, uint64_t config_flags, -+ const struct vkd3d_shader_scan_descriptor_info1 *descriptor_info, -+ const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *out, -+ struct vkd3d_shader_message_context *message_context); -+ -+enum vkd3d_md5_variant -+{ -+ VKD3D_MD5_STANDARD, -+ VKD3D_MD5_DXBC, -+}; -+ -+void vkd3d_compute_md5(const void *dxbc, size_t size, uint32_t checksum[4], enum vkd3d_md5_variant variant); - - int preproc_lexer_parse(const struct vkd3d_shader_compile_info *compile_info, - struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context); -@@ -1853,7 +1941,7 @@ static inline void *vkd3d_find_struct_(const struct vkd3d_struct *chain, - #define VKD3D_DXBC_HEADER_SIZE (8 * sizeof(uint32_t)) - #define VKD3D_DXBC_CHUNK_ALIGNMENT sizeof(uint32_t) - --#define DXBC_MAX_SECTION_COUNT 5 -+#define DXBC_MAX_SECTION_COUNT 7 - - struct dxbc_writer - { -diff --git a/libs/vkd3d/libs/vkd3d/command.c b/libs/vkd3d/libs/vkd3d/command.c -index dcc7690876f..a55a97f6f2f 100644 ---- a/libs/vkd3d/libs/vkd3d/command.c -+++ b/libs/vkd3d/libs/vkd3d/command.c -@@ -19,6 +19,7 @@ - */ - - #include "vkd3d_private.h" -+#include <math.h> - - static void d3d12_fence_incref(struct d3d12_fence *fence); - static void d3d12_fence_decref(struct d3d12_fence *fence); -@@ -2004,6 +2005,8 @@ static void d3d12_command_list_invalidate_bindings(struct d3d12_command_list *li - - vkd3d_array_reserve((void **)&bindings->vk_uav_counter_views, &bindings->vk_uav_counter_views_size, - state->uav_counters.binding_count, sizeof(*bindings->vk_uav_counter_views)); -+ memset(bindings->vk_uav_counter_views, 0, -+ state->uav_counters.binding_count * sizeof(*bindings->vk_uav_counter_views)); - bindings->uav_counters_dirty = true; - } - } -@@ -2451,6 +2454,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_list_Close(ID3D12GraphicsCommandL - } - - list->is_recording = false; -+ list->has_depth_bounds = false; - - if (!list->is_valid) - { -@@ -2479,7 +2483,7 @@ static void d3d12_command_list_reset_state(struct d3d12_command_list *list, - list->fb_layer_count = 0; - - list->xfb_enabled = false; -- -+ list->has_depth_bounds = false; - list->is_predicated = false; - - list->current_framebuffer = VK_NULL_HANDLE; -@@ -2793,39 +2797,30 @@ static bool vk_write_descriptor_set_from_d3d12_desc(VkWriteDescriptorSet *vk_des - /* We use separate bindings for buffer and texture SRVs/UAVs. - * See d3d12_root_signature_init(). For unbounded ranges the - * descriptors exist in two consecutive sets, otherwise they occur -- * in pairs in one set. */ -- if (range->descriptor_count == UINT_MAX) -- { -- if (vk_descriptor_type != VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER -- && vk_descriptor_type != VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER) -- { -- vk_descriptor_write->dstSet = vk_descriptor_sets[set + 1]; -- vk_descriptor_write->dstBinding = 0; -- } -- } -- else -- { -- if (!use_array) -- vk_descriptor_write->dstBinding = vk_binding + 2 * index; -- if (vk_descriptor_type != VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER -- && vk_descriptor_type != VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER) -- ++vk_descriptor_write->dstBinding; -- } -- -+ * as consecutive ranges within a set. */ - if (vk_descriptor_type == VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER - || vk_descriptor_type == VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER) - { - vk_descriptor_write->pTexelBufferView = &u.view->v.u.vk_buffer_view; -+ break; -+ } -+ -+ if (range->descriptor_count == UINT_MAX) -+ { -+ vk_descriptor_write->dstSet = vk_descriptor_sets[set + 1]; -+ vk_descriptor_write->dstBinding = 0; - } - else - { -- vk_image_info->sampler = VK_NULL_HANDLE; -- vk_image_info->imageView = u.view->v.u.vk_image_view; -- vk_image_info->imageLayout = u.header->magic == VKD3D_DESCRIPTOR_MAGIC_SRV -- ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : VK_IMAGE_LAYOUT_GENERAL; -- -- vk_descriptor_write->pImageInfo = vk_image_info; -+ vk_descriptor_write->dstBinding += use_array ? 1 : range->descriptor_count; - } -+ -+ vk_image_info->sampler = VK_NULL_HANDLE; -+ vk_image_info->imageView = u.view->v.u.vk_image_view; -+ vk_image_info->imageLayout = u.header->magic == VKD3D_DESCRIPTOR_MAGIC_SRV -+ ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : VK_IMAGE_LAYOUT_GENERAL; -+ -+ vk_descriptor_write->pImageInfo = vk_image_info; - break; - - case VKD3D_DESCRIPTOR_MAGIC_SAMPLER: -@@ -3078,7 +3073,7 @@ done: - vkd3d_free(vk_descriptor_writes); - } - --static void d3d12_command_list_update_descriptors(struct d3d12_command_list *list, -+static void d3d12_command_list_update_virtual_descriptors(struct d3d12_command_list *list, - enum vkd3d_pipeline_bind_point bind_point) - { - struct vkd3d_pipeline_bindings *bindings = &list->pipeline_bindings[bind_point]; -@@ -3210,6 +3205,9 @@ static void command_list_flush_vk_heap_updates(struct d3d12_command_list *list) - - static void command_list_add_descriptor_heap(struct d3d12_command_list *list, struct d3d12_descriptor_heap *heap) - { -+ if (!list->device->use_vk_heaps) -+ return; -+ - if (!contains_heap(list->descriptor_heaps, list->descriptor_heap_count, heap)) - { - if (list->descriptor_heap_count == ARRAY_SIZE(list->descriptor_heaps)) -@@ -3296,6 +3294,15 @@ static void d3d12_command_list_update_heap_descriptors(struct d3d12_command_list - d3d12_command_list_bind_descriptor_heap(list, bind_point, sampler_heap); - } - -+static void d3d12_command_list_update_descriptors(struct d3d12_command_list *list, -+ enum vkd3d_pipeline_bind_point bind_point) -+{ -+ if (list->device->use_vk_heaps) -+ d3d12_command_list_update_heap_descriptors(list, bind_point); -+ else -+ d3d12_command_list_update_virtual_descriptors(list, bind_point); -+} -+ - static bool d3d12_command_list_update_compute_state(struct d3d12_command_list *list) - { - d3d12_command_list_end_current_render_pass(list); -@@ -3303,7 +3310,7 @@ static bool d3d12_command_list_update_compute_state(struct d3d12_command_list *l - if (!d3d12_command_list_update_compute_pipeline(list)) - return false; - -- list->update_descriptors(list, VKD3D_PIPELINE_BIND_POINT_COMPUTE); -+ d3d12_command_list_update_descriptors(list, VKD3D_PIPELINE_BIND_POINT_COMPUTE); - - return true; - } -@@ -3320,7 +3327,7 @@ static bool d3d12_command_list_begin_render_pass(struct d3d12_command_list *list - if (!d3d12_command_list_update_current_framebuffer(list)) - return false; - -- list->update_descriptors(list, VKD3D_PIPELINE_BIND_POINT_GRAPHICS); -+ d3d12_command_list_update_descriptors(list, VKD3D_PIPELINE_BIND_POINT_GRAPHICS); - - if (list->current_render_pass != VK_NULL_HANDLE) - return true; -@@ -3351,6 +3358,12 @@ static bool d3d12_command_list_begin_render_pass(struct d3d12_command_list *list - list->xfb_enabled = true; - } - -+ if (graphics->ds_desc.depthBoundsTestEnable && !list->has_depth_bounds) -+ { -+ list->has_depth_bounds = true; -+ VK_CALL(vkCmdSetDepthBounds(list->vk_command_buffer, 0.0f, 1.0f)); -+ } -+ - return true; - } - -@@ -4791,15 +4804,16 @@ static void STDMETHODCALLTYPE d3d12_command_list_IASetVertexBuffers(ID3D12Graphi - VkDeviceSize offsets[ARRAY_SIZE(list->strides)]; - const struct vkd3d_vk_device_procs *vk_procs; - VkBuffer buffers[ARRAY_SIZE(list->strides)]; -+ struct d3d12_device *device = list->device; -+ unsigned int i, stride, max_view_count; - struct d3d12_resource *resource; - bool invalidate = false; -- unsigned int i, stride; - - TRACE("iface %p, start_slot %u, view_count %u, views %p.\n", iface, start_slot, view_count, views); - -- vk_procs = &list->device->vk_procs; -- null_resources = &list->device->null_resources; -- gpu_va_allocator = &list->device->gpu_va_allocator; -+ vk_procs = &device->vk_procs; -+ null_resources = &device->null_resources; -+ gpu_va_allocator = &device->gpu_va_allocator; - - if (!vkd3d_bound_range(start_slot, view_count, ARRAY_SIZE(list->strides))) - { -@@ -4807,6 +4821,27 @@ static void STDMETHODCALLTYPE d3d12_command_list_IASetVertexBuffers(ID3D12Graphi - return; - } - -+ max_view_count = device->vk_info.device_limits.maxVertexInputBindings; -+ if (start_slot < max_view_count) -+ max_view_count -= start_slot; -+ else -+ max_view_count = 0; -+ -+ /* Although simply skipping unsupported binding slots isn't especially -+ * likely to work well in the general case, applications sometimes -+ * explicitly set all 32 vertex buffer bindings slots supported by -+ * Direct3D 12, with unused slots set to NULL. "Spider-Man Remastered" is -+ * an example of such an application. */ -+ if (view_count > max_view_count) -+ { -+ for (i = max_view_count; i < view_count; ++i) -+ { -+ if (views && views[i].BufferLocation) -+ WARN("Ignoring unsupported vertex buffer slot %u.\n", start_slot + i); -+ } -+ view_count = max_view_count; -+ } -+ - for (i = 0; i < view_count; ++i) - { - if (views && views[i].BufferLocation) -@@ -5939,7 +5974,25 @@ static void STDMETHODCALLTYPE d3d12_command_list_AtomicCopyBufferUINT64(ID3D12Gr - static void STDMETHODCALLTYPE d3d12_command_list_OMSetDepthBounds(ID3D12GraphicsCommandList6 *iface, - FLOAT min, FLOAT max) - { -- FIXME("iface %p, min %.8e, max %.8e stub!\n", iface, min, max); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); -+ const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs; -+ -+ TRACE("iface %p, min %.8e, max %.8e.\n", iface, min, max); -+ -+ if (isnan(max)) -+ max = 0.0f; -+ if (isnan(min)) -+ min = 0.0f; -+ -+ if (!list->device->vk_info.EXT_depth_range_unrestricted && (min < 0.0f || min > 1.0f || max < 0.0f || max > 1.0f)) -+ { -+ WARN("VK_EXT_depth_range_unrestricted was not found, clamping depth bounds to 0.0 and 1.0.\n"); -+ max = vkd3d_clamp(max, 0.0f, 1.0f); -+ min = vkd3d_clamp(min, 0.0f, 1.0f); -+ } -+ -+ list->has_depth_bounds = true; -+ VK_CALL(vkCmdSetDepthBounds(list->vk_command_buffer, min, max)); - } - - static void STDMETHODCALLTYPE d3d12_command_list_SetSamplePositions(ID3D12GraphicsCommandList6 *iface, -@@ -6189,8 +6242,6 @@ static HRESULT d3d12_command_list_init(struct d3d12_command_list *list, struct d - - list->allocator = allocator; - -- list->update_descriptors = device->use_vk_heaps ? d3d12_command_list_update_heap_descriptors -- : d3d12_command_list_update_descriptors; - list->descriptor_heap_count = 0; - - if (SUCCEEDED(hr = d3d12_command_allocator_allocate_command_buffer(allocator, list))) -diff --git a/libs/vkd3d/libs/vkd3d/device.c b/libs/vkd3d/libs/vkd3d/device.c -index 01841c89692..e92373a36fa 100644 ---- a/libs/vkd3d/libs/vkd3d/device.c -+++ b/libs/vkd3d/libs/vkd3d/device.c -@@ -102,6 +102,7 @@ static const struct vkd3d_optional_extension_info optional_device_extensions[] = - VK_EXTENSION(EXT_CALIBRATED_TIMESTAMPS, EXT_calibrated_timestamps), - VK_EXTENSION(EXT_CONDITIONAL_RENDERING, EXT_conditional_rendering), - VK_DEBUG_EXTENSION(EXT_DEBUG_MARKER, EXT_debug_marker), -+ VK_EXTENSION(EXT_DEPTH_RANGE_UNRESTRICTED, EXT_depth_range_unrestricted), - VK_EXTENSION(EXT_DEPTH_CLIP_ENABLE, EXT_depth_clip_enable), - VK_EXTENSION(EXT_DESCRIPTOR_INDEXING, EXT_descriptor_indexing), - VK_EXTENSION(EXT_FRAGMENT_SHADER_INTERLOCK, EXT_fragment_shader_interlock), -@@ -135,7 +136,8 @@ static HRESULT vkd3d_create_vk_descriptor_heap_layout(struct d3d12_device *devic - VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, - }; - -- if (device->vk_info.EXT_mutable_descriptor_type && index && index != VKD3D_SET_INDEX_UAV_COUNTER -+ if (device->vk_info.EXT_mutable_descriptor_type -+ && index != VKD3D_SET_INDEX_MUTABLE && index != VKD3D_SET_INDEX_UAV_COUNTER - && device->vk_descriptor_heap_layouts[index].applicable_heap_type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV) - { - device->vk_descriptor_heap_layouts[index].vk_set_layout = VK_NULL_HANDLE; -@@ -143,7 +145,7 @@ static HRESULT vkd3d_create_vk_descriptor_heap_layout(struct d3d12_device *devic - } - - binding.binding = 0; -- binding.descriptorType = (device->vk_info.EXT_mutable_descriptor_type && !index) -+ binding.descriptorType = (device->vk_info.EXT_mutable_descriptor_type && index == VKD3D_SET_INDEX_MUTABLE) - ? VK_DESCRIPTOR_TYPE_MUTABLE_EXT : device->vk_descriptor_heap_layouts[index].type; - binding.descriptorCount = device->vk_descriptor_heap_layouts[index].count; - binding.stageFlags = VK_SHADER_STAGE_ALL; -@@ -199,14 +201,20 @@ static HRESULT vkd3d_vk_descriptor_heap_layouts_init(struct d3d12_device *device - { - static const struct vkd3d_vk_descriptor_heap_layout vk_descriptor_heap_layouts[VKD3D_SET_INDEX_COUNT] = - { -- {VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, true, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV}, -- {VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, true, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV}, -- {VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, false, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV}, -- {VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, true, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV}, -- {VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, false, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV}, -- {VK_DESCRIPTOR_TYPE_SAMPLER, false, D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER}, -- /* UAV counters */ -- {VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, true, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV}, -+ [VKD3D_SET_INDEX_UNIFORM_BUFFER] = -+ {VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, true, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV}, -+ [VKD3D_SET_INDEX_UNIFORM_TEXEL_BUFFER] = -+ {VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, true, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV}, -+ [VKD3D_SET_INDEX_SAMPLED_IMAGE] = -+ {VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, false, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV}, -+ [VKD3D_SET_INDEX_STORAGE_TEXEL_BUFFER] = -+ {VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, true, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV}, -+ [VKD3D_SET_INDEX_STORAGE_IMAGE] = -+ {VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, false, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV}, -+ [VKD3D_SET_INDEX_SAMPLER] = -+ {VK_DESCRIPTOR_TYPE_SAMPLER, false, D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER}, -+ [VKD3D_SET_INDEX_UAV_COUNTER] = -+ {VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, true, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV}, - }; - const struct vkd3d_device_descriptor_limits *limits = &device->vk_info.descriptor_limits; - enum vkd3d_vk_descriptor_set_index set; -@@ -589,7 +597,7 @@ static HRESULT vkd3d_instance_init(struct vkd3d_instance *instance, - } - if (!create_info->pfn_create_thread != !create_info->pfn_join_thread) - { -- ERR("Invalid create/join thread function pointers.\n"); -+ WARN("Invalid create/join thread function pointers.\n"); - return E_INVALIDARG; - } - if (create_info->wchar_size != 2 && create_info->wchar_size != 4) -@@ -607,7 +615,7 @@ static HRESULT vkd3d_instance_init(struct vkd3d_instance *instance, - - if (FAILED(hr = vkd3d_init_vk_global_procs(instance, create_info->pfn_vkGetInstanceProcAddr))) - { -- ERR("Failed to initialise Vulkan global procs, hr %s.\n", debugstr_hresult(hr)); -+ WARN("Failed to initialise Vulkan global procs, hr %s.\n", debugstr_hresult(hr)); - return hr; - } - -@@ -689,7 +697,7 @@ static HRESULT vkd3d_instance_init(struct vkd3d_instance *instance, - vkd3d_free(extensions); - if (vr < 0) - { -- ERR("Failed to create Vulkan instance, vr %d.\n", vr); -+ WARN("Failed to create Vulkan instance, vr %d.\n", vr); - if (instance->libvulkan) - vkd3d_dlclose(instance->libvulkan); - return hresult_from_vk_result(vr); -@@ -697,7 +705,7 @@ static HRESULT vkd3d_instance_init(struct vkd3d_instance *instance, - - if (FAILED(hr = vkd3d_load_vk_instance_procs(&instance->vk_procs, vk_global_procs, vk_instance))) - { -- ERR("Failed to load instance procs, hr %s.\n", debugstr_hresult(hr)); -+ WARN("Failed to load instance procs, hr %s.\n", debugstr_hresult(hr)); - if (instance->vk_procs.vkDestroyInstance) - instance->vk_procs.vkDestroyInstance(vk_instance, NULL); - if (instance->libvulkan) -@@ -1572,6 +1580,111 @@ static HRESULT vkd3d_check_device_extensions(struct d3d12_device *device, - return S_OK; - } - -+static void vkd3d_override_caps(struct d3d12_device *device) -+{ -+ const char *caps_override, *p; -+ -+ static const struct override_value -+ { -+ const char *str; -+ uint32_t value; -+ } -+ feature_level_override_values[] = -+ { -+ {"11.0", D3D_FEATURE_LEVEL_11_0}, -+ {"11.1", D3D_FEATURE_LEVEL_11_1}, -+ {"12.0", D3D_FEATURE_LEVEL_12_0}, -+ {"12.1", D3D_FEATURE_LEVEL_12_1}, -+ {"12.2", D3D_FEATURE_LEVEL_12_2}, -+ }, -+ resource_binding_tier_override_values[] = -+ { -+ {"1", D3D12_RESOURCE_BINDING_TIER_1}, -+ {"2", D3D12_RESOURCE_BINDING_TIER_2}, -+ {"3", D3D12_RESOURCE_BINDING_TIER_3}, -+ }; -+ static const struct override_field -+ { -+ const char *name; -+ size_t offset; -+ const struct override_value *values; -+ size_t value_count; -+ } -+ override_fields[] = -+ { -+ { -+ "feature_level", -+ offsetof(struct d3d12_device, vk_info.max_feature_level), -+ feature_level_override_values, -+ ARRAY_SIZE(feature_level_override_values) -+ }, -+ { -+ "resource_binding_tier", -+ offsetof(struct d3d12_device, feature_options.ResourceBindingTier), -+ resource_binding_tier_override_values, -+ ARRAY_SIZE(resource_binding_tier_override_values) -+ }, -+ }; -+ -+ if (!(caps_override = getenv("VKD3D_CAPS_OVERRIDE"))) -+ return; -+ -+ p = caps_override; -+ for (;;) -+ { -+ size_t i; -+ -+ for (i = 0; i < ARRAY_SIZE(override_fields); ++i) -+ { -+ const struct override_field *field = &override_fields[i]; -+ size_t len = strlen(field->name); -+ -+ if (strncmp(p, field->name, len) == 0 && p[len] == '=') -+ { -+ size_t j; -+ -+ p += len + 1; -+ -+ for (j = 0; j < field->value_count; ++j) -+ { -+ const struct override_value *value = &field->values[j]; -+ size_t value_len = strlen(value->str); -+ -+ if (strncmp(p, value->str, value_len) == 0 -+ && (p[value_len] == '\0' || p[value_len] == ',')) -+ { -+ memcpy(&((uint8_t *)device)[field->offset], (uint8_t *)&value->value, sizeof(value->value)); -+ -+ p += value_len; -+ if (p[0] == '\0') -+ { -+ TRACE("Overriding caps with: %s\n", caps_override); -+ return; -+ } -+ p += 1; -+ -+ break; -+ } -+ } -+ -+ if (j == field->value_count) -+ { -+ WARN("Cannot parse the override caps string: %s\n", caps_override); -+ return; -+ } -+ -+ break; -+ } -+ } -+ -+ if (i == ARRAY_SIZE(override_fields)) -+ { -+ WARN("Cannot parse the override caps string: %s\n", caps_override); -+ return; -+ } -+ } -+} -+ - static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, - const struct vkd3d_device_create_info *create_info, - struct vkd3d_physical_device_info *physical_device_info, -@@ -1583,7 +1696,7 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, - VkPhysicalDeviceDescriptorIndexingFeaturesEXT *descriptor_indexing; - VkPhysicalDevice physical_device = device->vk_physical_device; - struct vkd3d_vulkan_info *vulkan_info = &device->vk_info; -- VkExtensionProperties *vk_extensions; -+ VkExtensionProperties *vk_extensions = NULL; - VkPhysicalDeviceFeatures *features; - uint32_t vk_extension_count; - HRESULT hr; -@@ -1741,6 +1854,9 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, - vulkan_info->EXT_shader_viewport_index_layer; - - vkd3d_init_feature_level(vulkan_info, features, &device->feature_options); -+ -+ vkd3d_override_caps(device); -+ - if (vulkan_info->max_feature_level < create_info->minimum_feature_level) - { - WARN("Feature level %#x is not supported.\n", create_info->minimum_feature_level); -@@ -1809,6 +1925,28 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, - && descriptor_indexing->descriptorBindingUniformTexelBufferUpdateAfterBind - && descriptor_indexing->descriptorBindingStorageTexelBufferUpdateAfterBind; - -+ if (device->use_vk_heaps && device->vk_info.KHR_push_descriptor) -+ { -+ /* VKD3D_SET_INDEX_COUNT for the Vulkan heaps, one for the push -+ * descriptors set and one for the static samplers set. */ -+ unsigned int descriptor_set_count = VKD3D_SET_INDEX_COUNT + 2; -+ -+ /* A mutable descriptor set can replace all those that should otherwise -+ * back the SRV-UAV-CBV descriptor heap. */ -+ if (device->vk_info.EXT_mutable_descriptor_type) -+ descriptor_set_count -= VKD3D_SET_INDEX_COUNT - (VKD3D_SET_INDEX_MUTABLE + 1); -+ -+ /* For many Vulkan implementations maxBoundDescriptorSets == 8; also, -+ * if mutable descriptors are not available the descriptor set count -+ * will be 9; so saving a descriptor set is going to be often -+ * significant. */ -+ if (descriptor_set_count > device->vk_info.device_limits.maxBoundDescriptorSets) -+ { -+ WARN("Disabling VK_KHR_push_descriptor to save a descriptor set.\n"); -+ device->vk_info.KHR_push_descriptor = VK_FALSE; -+ } -+ } -+ - if (device->use_vk_heaps) - vkd3d_device_vk_heaps_descriptor_limits_init(&vulkan_info->descriptor_limits, - &physical_device_info->descriptor_indexing_properties); -@@ -1816,6 +1954,13 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, - vkd3d_device_descriptor_limits_init(&vulkan_info->descriptor_limits, - &physical_device_info->properties2.properties.limits); - -+ TRACE("Device %p: using %s descriptor heaps, with%s descriptor indexing, " -+ "with%s push descriptors, with%s mutable descriptors\n", -+ device, device->use_vk_heaps ? "Vulkan" : "virtual", -+ device->vk_info.EXT_descriptor_indexing ? "" : "out", -+ device->vk_info.KHR_push_descriptor ? "" : "out", -+ device->vk_info.EXT_mutable_descriptor_type ? "" : "out"); -+ - vkd3d_chain_physical_device_info_structures(physical_device_info, device); - - return S_OK; -diff --git a/libs/vkd3d/libs/vkd3d/resource.c b/libs/vkd3d/libs/vkd3d/resource.c -index 6d6820d3752..1f7d90eb95f 100644 ---- a/libs/vkd3d/libs/vkd3d/resource.c -+++ b/libs/vkd3d/libs/vkd3d/resource.c -@@ -2498,7 +2498,7 @@ static void d3d12_desc_write_vk_heap_null_descriptor(struct d3d12_descriptor_hea - enum vkd3d_vk_descriptor_set_index set, end; - unsigned int i = writes->count; - -- end = device->vk_info.EXT_mutable_descriptor_type ? VKD3D_SET_INDEX_UNIFORM_BUFFER -+ end = device->vk_info.EXT_mutable_descriptor_type ? VKD3D_SET_INDEX_MUTABLE - : VKD3D_SET_INDEX_STORAGE_IMAGE; - /* Binding a shader with the wrong null descriptor type works in Windows. - * To support that here we must write one to all applicable Vulkan sets. */ -@@ -4250,7 +4250,8 @@ static HRESULT d3d12_descriptor_heap_create_descriptor_pool(struct d3d12_descrip - if (device->vk_descriptor_heap_layouts[set].applicable_heap_type == desc->Type - && device->vk_descriptor_heap_layouts[set].vk_set_layout) - { -- pool_sizes[pool_desc.poolSizeCount].type = (device->vk_info.EXT_mutable_descriptor_type && !set) -+ pool_sizes[pool_desc.poolSizeCount].type = -+ (device->vk_info.EXT_mutable_descriptor_type && set == VKD3D_SET_INDEX_MUTABLE) - ? VK_DESCRIPTOR_TYPE_MUTABLE_EXT : device->vk_descriptor_heap_layouts[set].type; - pool_sizes[pool_desc.poolSizeCount++].descriptorCount = desc->NumDescriptors; - } -@@ -4280,11 +4281,12 @@ static HRESULT d3d12_descriptor_heap_create_descriptor_set(struct d3d12_descript - - if (!device->vk_descriptor_heap_layouts[set].vk_set_layout) - { -- /* Set 0 uses mutable descriptors, and this set is unused. */ -- if (!descriptor_heap->vk_descriptor_sets[0].vk_set -- && FAILED(hr = d3d12_descriptor_heap_create_descriptor_set(descriptor_heap, device, 0))) -+ /* Mutable descriptors are in use, and this set is unused. */ -+ if (!descriptor_heap->vk_descriptor_sets[VKD3D_SET_INDEX_MUTABLE].vk_set -+ && FAILED(hr = d3d12_descriptor_heap_create_descriptor_set(descriptor_heap, -+ device, VKD3D_SET_INDEX_MUTABLE))) - return hr; -- descriptor_set->vk_set = descriptor_heap->vk_descriptor_sets[0].vk_set; -+ descriptor_set->vk_set = descriptor_heap->vk_descriptor_sets[VKD3D_SET_INDEX_MUTABLE].vk_set; - descriptor_set->vk_type = device->vk_descriptor_heap_layouts[set].type; - return S_OK; - } -diff --git a/libs/vkd3d/libs/vkd3d/state.c b/libs/vkd3d/libs/vkd3d/state.c -index 682d488faa8..2b0f81d3812 100644 ---- a/libs/vkd3d/libs/vkd3d/state.c -+++ b/libs/vkd3d/libs/vkd3d/state.c -@@ -219,6 +219,30 @@ static VkShaderStageFlags stage_flags_from_visibility(D3D12_SHADER_VISIBILITY vi - } - } - -+static VkShaderStageFlags stage_flags_from_vkd3d_shader_visibility(enum vkd3d_shader_visibility visibility) -+{ -+ switch (visibility) -+ { -+ case VKD3D_SHADER_VISIBILITY_ALL: -+ return VK_SHADER_STAGE_ALL; -+ case VKD3D_SHADER_VISIBILITY_VERTEX: -+ return VK_SHADER_STAGE_VERTEX_BIT; -+ case VKD3D_SHADER_VISIBILITY_HULL: -+ return VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT; -+ case VKD3D_SHADER_VISIBILITY_DOMAIN: -+ return VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT; -+ case VKD3D_SHADER_VISIBILITY_GEOMETRY: -+ return VK_SHADER_STAGE_GEOMETRY_BIT; -+ case VKD3D_SHADER_VISIBILITY_PIXEL: -+ return VK_SHADER_STAGE_FRAGMENT_BIT; -+ case VKD3D_SHADER_VISIBILITY_COMPUTE: -+ return VK_SHADER_STAGE_COMPUTE_BIT; -+ default: -+ FIXME("Unhandled visibility %#x.\n", visibility); -+ return VKD3D_SHADER_VISIBILITY_ALL; -+ } -+} -+ - static enum vkd3d_shader_visibility vkd3d_shader_visibility_from_d3d12(D3D12_SHADER_VISIBILITY visibility) - { - switch (visibility) -@@ -260,23 +284,6 @@ static VkDescriptorType vk_descriptor_type_from_vkd3d_descriptor_type(enum vkd3d - } - } - --static VkDescriptorType vk_descriptor_type_from_d3d12_root_parameter(D3D12_ROOT_PARAMETER_TYPE type) --{ -- switch (type) -- { -- /* SRV and UAV root parameters are buffer views. */ -- case D3D12_ROOT_PARAMETER_TYPE_SRV: -- return VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER; -- case D3D12_ROOT_PARAMETER_TYPE_UAV: -- return VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER; -- case D3D12_ROOT_PARAMETER_TYPE_CBV: -- return VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; -- default: -- FIXME("Unhandled descriptor root parameter type %#x.\n", type); -- return VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; -- } --} -- - static enum vkd3d_shader_descriptor_type vkd3d_descriptor_type_from_d3d12_range_type( - D3D12_DESCRIPTOR_RANGE_TYPE type) - { -@@ -313,20 +320,6 @@ static enum vkd3d_shader_descriptor_type vkd3d_descriptor_type_from_d3d12_root_p - } - } - --static bool vk_binding_from_d3d12_descriptor_range(struct VkDescriptorSetLayoutBinding *binding_desc, -- enum vkd3d_shader_descriptor_type descriptor_type, D3D12_SHADER_VISIBILITY shader_visibility, -- bool is_buffer, uint32_t vk_binding, unsigned int descriptor_count) --{ -- binding_desc->binding = vk_binding; -- binding_desc->descriptorType -- = vk_descriptor_type_from_vkd3d_descriptor_type(descriptor_type, is_buffer); -- binding_desc->descriptorCount = descriptor_count; -- binding_desc->stageFlags = stage_flags_from_visibility(shader_visibility); -- binding_desc->pImmutableSamplers = NULL; -- -- return true; --} -- - struct d3d12_root_signature_info - { - size_t binding_count; -@@ -719,18 +712,66 @@ static HRESULT d3d12_root_signature_init_push_constants(struct d3d12_root_signat - return S_OK; - } - -+struct vk_binding_array -+{ -+ VkDescriptorSetLayoutBinding *bindings; -+ size_t capacity, count; -+ -+ unsigned int table_index; -+ unsigned int unbounded_offset; -+ VkDescriptorSetLayoutCreateFlags flags; -+}; -+ -+static void vk_binding_array_cleanup(struct vk_binding_array *array) -+{ -+ vkd3d_free(array->bindings); -+ array->bindings = NULL; -+} -+ -+static bool vk_binding_array_add_binding(struct vk_binding_array *array, -+ VkDescriptorType descriptor_type, unsigned int descriptor_count, -+ VkShaderStageFlags stage_flags, const VkSampler *immutable_sampler, unsigned int *binding_idx) -+{ -+ unsigned int binding_count = array->count; -+ VkDescriptorSetLayoutBinding *binding; -+ -+ if (!vkd3d_array_reserve((void **)&array->bindings, &array->capacity, -+ array->count + 1, sizeof(*array->bindings))) -+ { -+ ERR("Failed to reallocate the Vulkan binding array.\n"); -+ return false; -+ } -+ -+ *binding_idx = binding_count; -+ binding = &array->bindings[binding_count]; -+ binding->binding = binding_count; -+ binding->descriptorType = descriptor_type; -+ binding->descriptorCount = descriptor_count; -+ binding->stageFlags = stage_flags; -+ binding->pImmutableSamplers = immutable_sampler; -+ ++array->count; -+ -+ return true; -+} -+ - struct vkd3d_descriptor_set_context - { -- VkDescriptorSetLayoutBinding *current_binding; -- VkDescriptorSetLayoutBinding *first_binding; -+ struct vk_binding_array vk_bindings[VKD3D_MAX_DESCRIPTOR_SETS]; - unsigned int table_index; - unsigned int unbounded_offset; - unsigned int descriptor_index; - unsigned int uav_counter_index; - unsigned int push_constant_index; -- uint32_t descriptor_binding; - }; - -+static void descriptor_set_context_cleanup(struct vkd3d_descriptor_set_context *context) -+{ -+ size_t i; -+ -+ for (i = 0; i < ARRAY_SIZE(context->vk_bindings); ++i) -+ vk_binding_array_cleanup(&context->vk_bindings[i]); -+} -+ - static bool vkd3d_validate_descriptor_set_count(struct d3d12_device *device, unsigned int set_count) - { - uint32_t max_count = min(VKD3D_MAX_DESCRIPTOR_SETS, device->vk_info.device_limits.maxBoundDescriptorSets); -@@ -738,63 +779,63 @@ static bool vkd3d_validate_descriptor_set_count(struct d3d12_device *device, uns - if (set_count > max_count) - { - /* NOTE: If maxBoundDescriptorSets is < 9, try VKD3D_CONFIG=virtual_heaps */ -- ERR("Required descriptor set count exceeds maximum allowed count of %u.\n", max_count); -+ WARN("Required descriptor set count exceeds maximum allowed count of %u.\n", max_count); - return false; - } - - return true; - } - --static HRESULT vkd3d_create_descriptor_set_layout(struct d3d12_device *device, -- VkDescriptorSetLayoutCreateFlags flags, unsigned int binding_count, bool unbounded, -- const VkDescriptorSetLayoutBinding *bindings, VkDescriptorSetLayout *set_layout); -- --static HRESULT d3d12_root_signature_append_descriptor_set_layout(struct d3d12_root_signature *root_signature, -- struct vkd3d_descriptor_set_context *context, VkDescriptorSetLayoutCreateFlags flags) -+static struct vk_binding_array *d3d12_root_signature_current_vk_binding_array( -+ struct d3d12_root_signature *root_signature, struct vkd3d_descriptor_set_context *context) - { -- struct d3d12_descriptor_set_layout *layout; -- unsigned int index; -- HRESULT hr; -- -- if (!context->descriptor_binding) -- return S_OK; -+ if (root_signature->vk_set_count >= ARRAY_SIZE(context->vk_bindings)) -+ return NULL; - -- index = root_signature->vk_set_count; -- layout = &root_signature->descriptor_set_layouts[index]; -+ return &context->vk_bindings[root_signature->vk_set_count]; -+} - -- if (!vkd3d_validate_descriptor_set_count(root_signature->device, index + 1)) -- return E_INVALIDARG; -+static void d3d12_root_signature_append_vk_binding_array(struct d3d12_root_signature *root_signature, -+ VkDescriptorSetLayoutCreateFlags flags, struct vkd3d_descriptor_set_context *context) -+{ -+ struct vk_binding_array *array; - -- if (FAILED(hr = vkd3d_create_descriptor_set_layout(root_signature->device, flags, context->descriptor_binding, -- context->unbounded_offset != UINT_MAX, context->first_binding, &layout->vk_layout))) -- return hr; -- layout->table_index = context->table_index; -- layout->unbounded_offset = context->unbounded_offset; -- ++root_signature->vk_set_count; -+ if (!(array = d3d12_root_signature_current_vk_binding_array(root_signature, context)) || !array->count) -+ return; - -- context->current_binding = context->first_binding; -- context->descriptor_binding = 0; -+ array->table_index = context->table_index; -+ array->unbounded_offset = context->unbounded_offset; -+ array->flags = flags; - -- return S_OK; -+ ++root_signature->vk_set_count; - } - - static HRESULT d3d12_root_signature_append_vk_binding(struct d3d12_root_signature *root_signature, -- enum vkd3d_shader_descriptor_type descriptor_type, unsigned int register_space, unsigned int register_idx, -- bool buffer_descriptor, enum vkd3d_shader_visibility shader_visibility, -- unsigned int descriptor_count, struct vkd3d_descriptor_set_context *context) -+ enum vkd3d_shader_descriptor_type descriptor_type, unsigned int register_space, -+ unsigned int register_idx, bool buffer_descriptor, enum vkd3d_shader_visibility shader_visibility, -+ unsigned int descriptor_count, struct vkd3d_descriptor_set_context *context, -+ const VkSampler *immutable_sampler, unsigned int *binding_idx) - { - struct vkd3d_shader_descriptor_offset *offset = root_signature->descriptor_offsets - ? &root_signature->descriptor_offsets[context->descriptor_index] : NULL; -- struct vkd3d_shader_resource_binding *mapping -- = &root_signature->descriptor_mapping[context->descriptor_index++]; -+ struct vkd3d_shader_resource_binding *mapping; -+ struct vk_binding_array *array; -+ unsigned int idx; -+ -+ if (!(array = d3d12_root_signature_current_vk_binding_array(root_signature, context)) -+ || !(vk_binding_array_add_binding(&context->vk_bindings[root_signature->vk_set_count], -+ vk_descriptor_type_from_vkd3d_descriptor_type(descriptor_type, buffer_descriptor), descriptor_count, -+ stage_flags_from_vkd3d_shader_visibility(shader_visibility), immutable_sampler, &idx))) -+ return E_OUTOFMEMORY; - -+ mapping = &root_signature->descriptor_mapping[context->descriptor_index++]; - mapping->type = descriptor_type; - mapping->register_space = register_space; - mapping->register_index = register_idx; - mapping->shader_visibility = shader_visibility; - mapping->flags = buffer_descriptor ? VKD3D_SHADER_BINDING_FLAG_BUFFER : VKD3D_SHADER_BINDING_FLAG_IMAGE; - mapping->binding.set = root_signature->vk_set_count; -- mapping->binding.binding = context->descriptor_binding++; -+ mapping->binding.binding = idx; - mapping->binding.count = descriptor_count; - if (offset) - { -@@ -803,37 +844,11 @@ static HRESULT d3d12_root_signature_append_vk_binding(struct d3d12_root_signatur - } - - if (context->unbounded_offset != UINT_MAX) -- return d3d12_root_signature_append_descriptor_set_layout(root_signature, context, 0); -- -- return S_OK; --} -- --static HRESULT d3d12_root_signature_assign_vk_bindings(struct d3d12_root_signature *root_signature, -- enum vkd3d_shader_descriptor_type descriptor_type, unsigned int register_space, unsigned int base_register_idx, -- unsigned int binding_count, bool is_buffer_descriptor, bool duplicate_descriptors, -- enum vkd3d_shader_visibility shader_visibility, struct vkd3d_descriptor_set_context *context, -- uint32_t *first_binding) --{ -- unsigned int i; -- HRESULT hr; -- -- is_buffer_descriptor |= descriptor_type == VKD3D_SHADER_DESCRIPTOR_TYPE_CBV; -- duplicate_descriptors = (descriptor_type == VKD3D_SHADER_DESCRIPTOR_TYPE_SRV -- || descriptor_type == VKD3D_SHADER_DESCRIPTOR_TYPE_UAV) -- && duplicate_descriptors; -+ d3d12_root_signature_append_vk_binding_array(root_signature, 0, context); - -- *first_binding = context->descriptor_binding; -- for (i = 0; i < binding_count; ++i) -- { -- if (duplicate_descriptors -- && FAILED(hr = d3d12_root_signature_append_vk_binding(root_signature, descriptor_type, -- register_space, base_register_idx + i, true, shader_visibility, 1, context))) -- return hr; -+ if (binding_idx) -+ *binding_idx = idx; - -- if (FAILED(hr = d3d12_root_signature_append_vk_binding(root_signature, descriptor_type, register_space, -- base_register_idx + i, is_buffer_descriptor, shader_visibility, 1, context))) -- return hr; -- } - return S_OK; - } - -@@ -895,38 +910,41 @@ static unsigned int vk_binding_count_from_descriptor_range(const struct d3d12_ro - return min(count, VKD3D_MAX_VIRTUAL_HEAP_DESCRIPTORS_PER_TYPE); - } - --static HRESULT d3d12_root_signature_init_descriptor_array_binding(struct d3d12_root_signature *root_signature, -+static HRESULT d3d12_root_signature_init_descriptor_table_binding(struct d3d12_root_signature *root_signature, - const struct d3d12_root_descriptor_table_range *range, D3D12_SHADER_VISIBILITY visibility, -+ unsigned int vk_binding_array_count, unsigned int bindings_per_range, - struct vkd3d_descriptor_set_context *context) - { - enum vkd3d_shader_visibility shader_visibility = vkd3d_shader_visibility_from_d3d12(visibility); -- bool is_buffer = range->type == VKD3D_SHADER_DESCRIPTOR_TYPE_CBV; -+ bool is_buffer = range->type != VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER; - enum vkd3d_shader_descriptor_type descriptor_type = range->type; -+ unsigned int i, register_space = range->register_space; - HRESULT hr; - - if (range->descriptor_count == UINT_MAX) - context->unbounded_offset = range->offset; - -- if (descriptor_type == VKD3D_SHADER_DESCRIPTOR_TYPE_SRV || descriptor_type == VKD3D_SHADER_DESCRIPTOR_TYPE_UAV) -+ for (i = 0; i < bindings_per_range; ++i) - { -- if (!vk_binding_from_d3d12_descriptor_range(context->current_binding, -- descriptor_type, visibility, true, context->descriptor_binding, range->vk_binding_count)) -- return E_NOTIMPL; -- ++context->current_binding; -- -- if (FAILED(hr = d3d12_root_signature_append_vk_binding(root_signature, descriptor_type, range->register_space, -- range->base_register_idx, true, shader_visibility, range->vk_binding_count, context))) -+ if (FAILED(hr = d3d12_root_signature_append_vk_binding(root_signature, descriptor_type, -+ register_space, range->base_register_idx + i, is_buffer, shader_visibility, -+ vk_binding_array_count, context, NULL, NULL))) - return hr; - } - -- if (!vk_binding_from_d3d12_descriptor_range(context->current_binding, -- descriptor_type, visibility, is_buffer, context->descriptor_binding, range->vk_binding_count)) -- return E_NOTIMPL; -- ++context->current_binding; -+ if (descriptor_type != VKD3D_SHADER_DESCRIPTOR_TYPE_SRV && descriptor_type != VKD3D_SHADER_DESCRIPTOR_TYPE_UAV) -+ { -+ context->unbounded_offset = UINT_MAX; -+ return S_OK; -+ } - -- if (FAILED(hr = d3d12_root_signature_append_vk_binding(root_signature, descriptor_type, range->register_space, -- range->base_register_idx, is_buffer, shader_visibility, range->vk_binding_count, context))) -- return hr; -+ for (i = 0; i < bindings_per_range; ++i) -+ { -+ if (FAILED(hr = d3d12_root_signature_append_vk_binding(root_signature, descriptor_type, -+ register_space, range->base_register_idx + i, false, shader_visibility, -+ vk_binding_array_count, context, NULL, NULL))) -+ return hr; -+ } - - context->unbounded_offset = UINT_MAX; - -@@ -998,7 +1016,7 @@ static void vkd3d_descriptor_heap_binding_from_descriptor_range(const struct d3d - } - else - { -- binding->set = 0; -+ binding->set = VKD3D_SET_INDEX_MUTABLE; - descriptor_set_size = descriptor_limits->sampled_image_max_descriptors; - } - } -@@ -1107,18 +1125,19 @@ static int compare_descriptor_range(const void *a, const void *b) - if ((ret = vkd3d_u32_compare(range_a->offset, range_b->offset))) - return ret; - -- return (range_a->descriptor_count == UINT_MAX) - (range_b->descriptor_count == UINT_MAX); -+ /* Place bounded ranges after unbounded ones of equal offset, -+ * so the bounded range can be mapped to the unbounded one. */ -+ return (range_b->descriptor_count == UINT_MAX) - (range_a->descriptor_count == UINT_MAX); - } - - static HRESULT d3d12_root_signature_init_root_descriptor_tables(struct d3d12_root_signature *root_signature, - const D3D12_ROOT_SIGNATURE_DESC *desc, const struct d3d12_root_signature_info *info, - struct vkd3d_descriptor_set_context *context) - { -+ unsigned int i, j, range_count, bindings_per_range, vk_binding_array_count; - const struct d3d12_device *device = root_signature->device; - bool use_vk_heaps = root_signature->device->use_vk_heaps; - struct d3d12_root_descriptor_table *table; -- unsigned int i, j, k, range_count; -- uint32_t vk_binding; - HRESULT hr; - - root_signature->descriptor_table_mask = 0; -@@ -1175,7 +1194,6 @@ static HRESULT d3d12_root_signature_init_root_descriptor_tables(struct d3d12_roo - for (j = 0; j < range_count; ++j) - { - struct d3d12_root_descriptor_table_range *range; -- VkDescriptorSetLayoutBinding *cur_binding; - - range = &table->ranges[j]; - -@@ -1221,53 +1239,23 @@ static HRESULT d3d12_root_signature_init_root_descriptor_tables(struct d3d12_roo - base_range = range; - } - -- range->binding = context->descriptor_binding; - range->vk_binding_count = vk_binding_count_from_descriptor_range(range, - info, &device->vk_info.descriptor_limits); -- -- if (FAILED(hr = d3d12_root_signature_init_descriptor_array_binding(root_signature, -- range, p->ShaderVisibility, context))) -- return hr; -- -- continue; -+ vk_binding_array_count = range->vk_binding_count; -+ bindings_per_range = 1; - } -- -- cur_binding = context->current_binding; -- -- if (FAILED(hr = d3d12_root_signature_assign_vk_bindings(root_signature, -- range->type, range->register_space, range->base_register_idx, range->descriptor_count, false, true, -- shader_visibility, context, &vk_binding))) -- return hr; -- -- /* Unroll descriptor range. */ -- for (k = 0; k < range->descriptor_count; ++k) -+ else - { -- uint32_t vk_current_binding = vk_binding + k; -- -- if (range->type == VKD3D_SHADER_DESCRIPTOR_TYPE_SRV -- || range->type == VKD3D_SHADER_DESCRIPTOR_TYPE_UAV) -- { -- vk_current_binding = vk_binding + 2 * k; -- -- /* Assign binding for image view. */ -- if (!vk_binding_from_d3d12_descriptor_range(cur_binding, -- range->type, p->ShaderVisibility, false, vk_current_binding + 1, 1)) -- return E_NOTIMPL; -- -- ++cur_binding; -- } -- -- if (!vk_binding_from_d3d12_descriptor_range(cur_binding, -- range->type, p->ShaderVisibility, true, vk_current_binding, 1)) -- return E_NOTIMPL; -- -- ++cur_binding; -+ range->vk_binding_count = range->descriptor_count; -+ vk_binding_array_count = 1; -+ bindings_per_range = range->descriptor_count; - } - -- table->ranges[j].vk_binding_count = table->ranges[j].descriptor_count; -- table->ranges[j].binding = vk_binding; -+ range->binding = context->vk_bindings[root_signature->vk_set_count].count; - -- context->current_binding = cur_binding; -+ if (FAILED(hr = d3d12_root_signature_init_descriptor_table_binding(root_signature, range, -+ p->ShaderVisibility, vk_binding_array_count, bindings_per_range, context))) -+ return hr; - } - ++context->push_constant_index; - } -@@ -1278,8 +1266,7 @@ static HRESULT d3d12_root_signature_init_root_descriptor_tables(struct d3d12_roo - static HRESULT d3d12_root_signature_init_root_descriptors(struct d3d12_root_signature *root_signature, - const D3D12_ROOT_SIGNATURE_DESC *desc, struct vkd3d_descriptor_set_context *context) - { -- VkDescriptorSetLayoutBinding *cur_binding = context->current_binding; -- unsigned int i; -+ unsigned int binding, i; - HRESULT hr; - - root_signature->push_descriptor_mask = 0; -@@ -1294,23 +1281,16 @@ static HRESULT d3d12_root_signature_init_root_descriptors(struct d3d12_root_sign - - root_signature->push_descriptor_mask |= 1u << i; - -- if (FAILED(hr = d3d12_root_signature_assign_vk_bindings(root_signature, -+ if (FAILED(hr = d3d12_root_signature_append_vk_binding(root_signature, - vkd3d_descriptor_type_from_d3d12_root_parameter_type(p->ParameterType), -- p->u.Descriptor.RegisterSpace, p->u.Descriptor.ShaderRegister, 1, true, false, -- vkd3d_shader_visibility_from_d3d12(p->ShaderVisibility), context, &cur_binding->binding))) -+ p->u.Descriptor.RegisterSpace, p->u.Descriptor.ShaderRegister, true, -+ vkd3d_shader_visibility_from_d3d12(p->ShaderVisibility), 1, context, NULL, &binding))) - return hr; -- cur_binding->descriptorType = vk_descriptor_type_from_d3d12_root_parameter(p->ParameterType); -- cur_binding->descriptorCount = 1; -- cur_binding->stageFlags = stage_flags_from_visibility(p->ShaderVisibility); -- cur_binding->pImmutableSamplers = NULL; - - root_signature->parameters[i].parameter_type = p->ParameterType; -- root_signature->parameters[i].u.descriptor.binding = cur_binding->binding; -- -- ++cur_binding; -+ root_signature->parameters[i].u.descriptor.binding = binding; - } - -- context->current_binding = cur_binding; - return S_OK; - } - -@@ -1318,7 +1298,6 @@ static HRESULT d3d12_root_signature_init_static_samplers(struct d3d12_root_signa - struct d3d12_device *device, const D3D12_ROOT_SIGNATURE_DESC *desc, - struct vkd3d_descriptor_set_context *context) - { -- VkDescriptorSetLayoutBinding *cur_binding = context->current_binding; - unsigned int i; - HRESULT hr; - -@@ -1330,21 +1309,15 @@ static HRESULT d3d12_root_signature_init_static_samplers(struct d3d12_root_signa - if (FAILED(hr = vkd3d_create_static_sampler(device, s, &root_signature->static_samplers[i]))) - return hr; - -- if (FAILED(hr = d3d12_root_signature_assign_vk_bindings(root_signature, -- VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER, s->RegisterSpace, s->ShaderRegister, 1, false, false, -- vkd3d_shader_visibility_from_d3d12(s->ShaderVisibility), context, &cur_binding->binding))) -+ if (FAILED(hr = d3d12_root_signature_append_vk_binding(root_signature, -+ VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER, s->RegisterSpace, s->ShaderRegister, false, -+ vkd3d_shader_visibility_from_d3d12(s->ShaderVisibility), 1, context, -+ &root_signature->static_samplers[i], NULL))) - return hr; -- cur_binding->descriptorType = VK_DESCRIPTOR_TYPE_SAMPLER; -- cur_binding->descriptorCount = 1; -- cur_binding->stageFlags = stage_flags_from_visibility(s->ShaderVisibility); -- cur_binding->pImmutableSamplers = &root_signature->static_samplers[i]; -- -- ++cur_binding; - } - -- context->current_binding = cur_binding; - if (device->use_vk_heaps) -- return d3d12_root_signature_append_descriptor_set_layout(root_signature, context, 0); -+ d3d12_root_signature_append_vk_binding_array(root_signature, 0, context); - - return S_OK; - } -@@ -1477,26 +1450,57 @@ static HRESULT vkd3d_create_pipeline_layout(struct d3d12_device *device, - return S_OK; - } - -+static HRESULT d3d12_root_signature_create_descriptor_set_layouts(struct d3d12_root_signature *root_signature, -+ struct vkd3d_descriptor_set_context *context) -+{ -+ unsigned int i; -+ HRESULT hr; -+ -+ d3d12_root_signature_append_vk_binding_array(root_signature, 0, context); -+ -+ if (!vkd3d_validate_descriptor_set_count(root_signature->device, root_signature->vk_set_count)) -+ return E_INVALIDARG; -+ -+ for (i = 0; i < root_signature->vk_set_count; ++i) -+ { -+ struct d3d12_descriptor_set_layout *layout = &root_signature->descriptor_set_layouts[i]; -+ struct vk_binding_array *array = &context->vk_bindings[i]; -+ -+ VKD3D_ASSERT(array->count); -+ -+ if (FAILED(hr = vkd3d_create_descriptor_set_layout(root_signature->device, array->flags, array->count, -+ array->unbounded_offset != UINT_MAX, array->bindings, &layout->vk_layout))) -+ return hr; -+ layout->unbounded_offset = array->unbounded_offset; -+ layout->table_index = array->table_index; -+ } -+ -+ return S_OK; -+} -+ - static unsigned int d3d12_root_signature_copy_descriptor_set_layouts(const struct d3d12_root_signature *root_signature, - VkDescriptorSetLayout *vk_set_layouts) - { - const struct d3d12_device *device = root_signature->device; - enum vkd3d_vk_descriptor_set_index set; -+ VkDescriptorSetLayout vk_set_layout; - unsigned int i; - - for (i = 0; i < root_signature->vk_set_count; ++i) - vk_set_layouts[i] = root_signature->descriptor_set_layouts[i].vk_layout; - -- if (device->use_vk_heaps) -+ if (!device->use_vk_heaps) -+ return i; -+ -+ for (set = 0; set < ARRAY_SIZE(device->vk_descriptor_heap_layouts); ++set) - { -- VkDescriptorSetLayout mutable_layout = device->vk_descriptor_heap_layouts[0].vk_set_layout; -+ vk_set_layout = device->vk_descriptor_heap_layouts[set].vk_set_layout; - -- for (set = 0; set < ARRAY_SIZE(device->vk_descriptor_heap_layouts); ++set) -- { -- VkDescriptorSetLayout vk_set_layout = device->vk_descriptor_heap_layouts[set].vk_set_layout; -- /* All layouts must be valid, so if null, just set it to the mutable one. */ -- vk_set_layouts[i++] = vk_set_layout ? vk_set_layout : mutable_layout; -- } -+ VKD3D_ASSERT(vk_set_layout); -+ vk_set_layouts[i++] = vk_set_layout; -+ -+ if (device->vk_info.EXT_mutable_descriptor_type && set == VKD3D_SET_INDEX_MUTABLE) -+ break; - } - - return i; -@@ -1508,7 +1512,6 @@ static HRESULT d3d12_root_signature_init(struct d3d12_root_signature *root_signa - VkDescriptorSetLayout vk_layouts[VKD3D_MAX_DESCRIPTOR_SETS]; - const struct vkd3d_vulkan_info *vk_info = &device->vk_info; - struct vkd3d_descriptor_set_context context; -- VkDescriptorSetLayoutBinding *binding_desc; - struct d3d12_root_signature_info info; - bool use_vk_heaps; - unsigned int i; -@@ -1516,7 +1519,6 @@ static HRESULT d3d12_root_signature_init(struct d3d12_root_signature *root_signa - - memset(&context, 0, sizeof(context)); - context.unbounded_offset = UINT_MAX; -- binding_desc = NULL; - - root_signature->ID3D12RootSignature_iface.lpVtbl = &d3d12_root_signature_vtbl; - root_signature->refcount = 1; -@@ -1578,20 +1580,14 @@ static HRESULT d3d12_root_signature_init(struct d3d12_root_signature *root_signa - sizeof(*root_signature->static_samplers)))) - goto fail; - -- if (!(binding_desc = vkd3d_calloc(info.binding_count, sizeof(*binding_desc)))) -- goto fail; -- context.first_binding = binding_desc; -- context.current_binding = binding_desc; -- - if (FAILED(hr = d3d12_root_signature_init_root_descriptors(root_signature, desc, &context))) - goto fail; - - /* We use KHR_push_descriptor for root descriptor parameters. */ - if (vk_info->KHR_push_descriptor) - { -- if (FAILED(hr = d3d12_root_signature_append_descriptor_set_layout(root_signature, -- &context, VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR))) -- goto fail; -+ d3d12_root_signature_append_vk_binding_array(root_signature, -+ VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR, &context); - } - - root_signature->main_set = root_signature->vk_set_count; -@@ -1607,11 +1603,10 @@ static HRESULT d3d12_root_signature_init(struct d3d12_root_signature *root_signa - if (use_vk_heaps) - d3d12_root_signature_init_descriptor_table_push_constants(root_signature, &context); - -- if (FAILED(hr = d3d12_root_signature_append_descriptor_set_layout(root_signature, &context, 0))) -+ if (FAILED(hr = d3d12_root_signature_create_descriptor_set_layouts(root_signature, &context))) - goto fail; - -- vkd3d_free(binding_desc); -- binding_desc = NULL; -+ descriptor_set_context_cleanup(&context); - - i = d3d12_root_signature_copy_descriptor_set_layouts(root_signature, vk_layouts); - if (FAILED(hr = vkd3d_create_pipeline_layout(device, i, -@@ -1627,7 +1622,7 @@ static HRESULT d3d12_root_signature_init(struct d3d12_root_signature *root_signa - return S_OK; - - fail: -- vkd3d_free(binding_desc); -+ descriptor_set_context_cleanup(&context); - d3d12_root_signature_cleanup(root_signature, device); - return hr; - } -@@ -2286,7 +2281,7 @@ static HRESULT create_shader_stage(struct d3d12_device *device, - - const struct vkd3d_shader_compile_option options[] = - { -- {VKD3D_SHADER_COMPILE_OPTION_API_VERSION, VKD3D_SHADER_API_VERSION_1_13}, -+ {VKD3D_SHADER_COMPILE_OPTION_API_VERSION, VKD3D_SHADER_API_VERSION_1_14}, - {VKD3D_SHADER_COMPILE_OPTION_TYPED_UAV, typed_uav_compile_option(device)}, - {VKD3D_SHADER_COMPILE_OPTION_WRITE_TESS_GEOM_POINT_SIZE, 0}, - {VKD3D_SHADER_COMPILE_OPTION_FEATURE, feature_flags_compile_option(device)}, -@@ -2341,7 +2336,7 @@ static int vkd3d_scan_dxbc(const struct d3d12_device *device, const D3D12_SHADER - - const struct vkd3d_shader_compile_option options[] = - { -- {VKD3D_SHADER_COMPILE_OPTION_API_VERSION, VKD3D_SHADER_API_VERSION_1_13}, -+ {VKD3D_SHADER_COMPILE_OPTION_API_VERSION, VKD3D_SHADER_API_VERSION_1_14}, - {VKD3D_SHADER_COMPILE_OPTION_TYPED_UAV, typed_uav_compile_option(device)}, - }; - -@@ -3867,6 +3862,7 @@ VkPipeline d3d12_pipeline_state_get_or_create_pipeline(struct d3d12_pipeline_sta - VK_DYNAMIC_STATE_SCISSOR, - VK_DYNAMIC_STATE_BLEND_CONSTANTS, - VK_DYNAMIC_STATE_STENCIL_REFERENCE, -+ VK_DYNAMIC_STATE_DEPTH_BOUNDS, - }; - static const VkPipelineDynamicStateCreateInfo dynamic_desc = - { -@@ -4019,7 +4015,7 @@ static int compile_hlsl_cs(const struct vkd3d_shader_code *hlsl, struct vkd3d_sh - - static const struct vkd3d_shader_compile_option options[] = - { -- {VKD3D_SHADER_COMPILE_OPTION_API_VERSION, VKD3D_SHADER_API_VERSION_1_13}, -+ {VKD3D_SHADER_COMPILE_OPTION_API_VERSION, VKD3D_SHADER_API_VERSION_1_14}, - }; - - info.type = VKD3D_SHADER_STRUCTURE_TYPE_COMPILE_INFO; -diff --git a/libs/vkd3d/libs/vkd3d/utils.c b/libs/vkd3d/libs/vkd3d/utils.c -index 831dc07af56..839bb173854 100644 ---- a/libs/vkd3d/libs/vkd3d/utils.c -+++ b/libs/vkd3d/libs/vkd3d/utils.c -@@ -703,7 +703,7 @@ const char *debug_vk_extent_3d(VkExtent3D extent) - - const char *debug_vk_queue_flags(VkQueueFlags flags) - { -- char buffer[159]; -+ char buffer[191]; - - buffer[0] = '\0'; - #define FLAG_TO_STR(f) if (flags & f) { strcat(buffer, " | "#f); flags &= ~f; } -@@ -715,6 +715,7 @@ const char *debug_vk_queue_flags(VkQueueFlags flags) - #undef FLAG_TO_STR - #define FLAG_TO_STR(f, n) if (flags & f) { strcat(buffer, " | "#n); flags &= ~f; } - FLAG_TO_STR(0x20, VK_QUEUE_VIDEO_DECODE_BIT_KHR) -+ FLAG_TO_STR(0x40, VK_QUEUE_VIDEO_ENCODE_BIT_KHR) - #undef FLAG_TO_STR - if (flags) - FIXME("Unrecognized flag(s) %#x.\n", flags); -diff --git a/libs/vkd3d/libs/vkd3d/vkd3d_main.c b/libs/vkd3d/libs/vkd3d/vkd3d_main.c -index 9eccec111c7..5215cf8ef86 100644 ---- a/libs/vkd3d/libs/vkd3d/vkd3d_main.c -+++ b/libs/vkd3d/libs/vkd3d/vkd3d_main.c -@@ -415,6 +415,7 @@ HRESULT vkd3d_create_versioned_root_signature_deserializer(const void *data, SIZ - if (FAILED(hr = d3d12_versioned_root_signature_deserializer_init(object, &dxbc))) - { - vkd3d_free(object); -+ *deserializer = NULL; - return hr; - } - -diff --git a/libs/vkd3d/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/libs/vkd3d/vkd3d_private.h -index ba4e2e8488d..97a99782d6a 100644 ---- a/libs/vkd3d/libs/vkd3d/vkd3d_private.h -+++ b/libs/vkd3d/libs/vkd3d/vkd3d_private.h -@@ -131,6 +131,7 @@ struct vkd3d_vulkan_info - bool EXT_calibrated_timestamps; - bool EXT_conditional_rendering; - bool EXT_debug_marker; -+ bool EXT_depth_range_unrestricted; - bool EXT_depth_clip_enable; - bool EXT_descriptor_indexing; - bool EXT_fragment_shader_interlock; -@@ -771,14 +772,21 @@ void d3d12_dsv_desc_create_dsv(struct d3d12_dsv_desc *dsv_desc, struct d3d12_dev - - enum vkd3d_vk_descriptor_set_index - { -- VKD3D_SET_INDEX_UNIFORM_BUFFER = 0, -- VKD3D_SET_INDEX_UNIFORM_TEXEL_BUFFER = 1, -- VKD3D_SET_INDEX_SAMPLED_IMAGE = 2, -- VKD3D_SET_INDEX_STORAGE_TEXEL_BUFFER = 3, -- VKD3D_SET_INDEX_STORAGE_IMAGE = 4, -- VKD3D_SET_INDEX_SAMPLER = 5, -- VKD3D_SET_INDEX_UAV_COUNTER = 6, -- VKD3D_SET_INDEX_COUNT = 7 -+ VKD3D_SET_INDEX_SAMPLER, -+ VKD3D_SET_INDEX_UAV_COUNTER, -+ VKD3D_SET_INDEX_MUTABLE, -+ -+ /* These are used when mutable descriptors are not available to back -+ * SRV-UAV-CBV descriptor heaps. They must stay at the end of this -+ * enumeration, so that they can be ignored when mutable descriptors are -+ * used. */ -+ VKD3D_SET_INDEX_UNIFORM_BUFFER = VKD3D_SET_INDEX_MUTABLE, -+ VKD3D_SET_INDEX_UNIFORM_TEXEL_BUFFER, -+ VKD3D_SET_INDEX_SAMPLED_IMAGE, -+ VKD3D_SET_INDEX_STORAGE_TEXEL_BUFFER, -+ VKD3D_SET_INDEX_STORAGE_IMAGE, -+ -+ VKD3D_SET_INDEX_COUNT - }; - - extern const enum vkd3d_vk_descriptor_set_index vk_descriptor_set_index_table[]; -@@ -1254,7 +1262,7 @@ struct d3d12_command_list - VkFormat dsv_format; - - bool xfb_enabled; -- -+ bool has_depth_bounds; - bool is_predicated; - - VkFramebuffer current_framebuffer; -@@ -1271,7 +1279,6 @@ struct d3d12_command_list - VkBuffer so_counter_buffers[D3D12_SO_BUFFER_SLOT_COUNT]; - VkDeviceSize so_counter_buffer_offsets[D3D12_SO_BUFFER_SLOT_COUNT]; - -- void (*update_descriptors)(struct d3d12_command_list *list, enum vkd3d_pipeline_bind_point bind_point); - struct d3d12_descriptor_heap *descriptor_heaps[64]; - unsigned int descriptor_heap_count; - --- -2.45.2 - diff --git a/staging/upstream-commit b/staging/upstream-commit index 565fc3e0..a3833332 100644 --- a/staging/upstream-commit +++ b/staging/upstream-commit @@ -1 +1 @@ -51ccd95c49c2c61ad41960b25a01f834601d70c0 +96a94318125332cd7aa80c475b35e38439b7c2b3