From aea35abd0efd5cca9e6af5d894539fcb6de6784e Mon Sep 17 00:00:00 2001 From: Alistair Leslie-Hughes Date: Tue, 3 Sep 2024 07:18:49 +1000 Subject: [PATCH] Updated vkd3d to 03ad04c89004c7f800c5b1a0ea7ba28622916328. --- libs/vkd3d/Makefile.in | 1 + libs/vkd3d/include/private/vkd3d_common.h | 4 +- libs/vkd3d/include/vkd3d_shader.h | 159 +- libs/vkd3d/libs/vkd3d-common/blob.c | 1 + libs/vkd3d/libs/vkd3d-shader/checksum.c | 49 +- libs/vkd3d/libs/vkd3d-shader/d3d_asm.c | 94 +- libs/vkd3d/libs/vkd3d-shader/d3dbc.c | 1270 ++----- libs/vkd3d/libs/vkd3d-shader/dxbc.c | 21 +- libs/vkd3d/libs/vkd3d-shader/dxil.c | 116 +- libs/vkd3d/libs/vkd3d-shader/fx.c | 1001 ++++- libs/vkd3d/libs/vkd3d-shader/glsl.c | 2107 ++++++++++- libs/vkd3d/libs/vkd3d-shader/hlsl.c | 349 +- libs/vkd3d/libs/vkd3d-shader/hlsl.h | 134 +- libs/vkd3d/libs/vkd3d-shader/hlsl.l | 3 +- libs/vkd3d/libs/vkd3d-shader/hlsl.y | 1046 ++++-- libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 1789 ++++++++- .../libs/vkd3d-shader/hlsl_constant_ops.c | 20 +- libs/vkd3d/libs/vkd3d-shader/ir.c | 3347 +++++++++++------ libs/vkd3d/libs/vkd3d-shader/msl.c | 881 +++++ libs/vkd3d/libs/vkd3d-shader/preproc.h | 3 +- libs/vkd3d/libs/vkd3d-shader/preproc.l | 56 +- libs/vkd3d/libs/vkd3d-shader/preproc.y | 13 - libs/vkd3d/libs/vkd3d-shader/spirv.c | 433 ++- libs/vkd3d/libs/vkd3d-shader/tpf.c | 1360 +++++-- .../libs/vkd3d-shader/vkd3d_shader_main.c | 145 +- .../libs/vkd3d-shader/vkd3d_shader_private.h | 105 +- libs/vkd3d/libs/vkd3d/command.c | 91 +- libs/vkd3d/libs/vkd3d/device.c | 1 + libs/vkd3d/libs/vkd3d/state.c | 383 +- libs/vkd3d/libs/vkd3d/utils.c | 3 +- libs/vkd3d/libs/vkd3d/vkd3d_main.c | 1 + libs/vkd3d/libs/vkd3d/vkd3d_private.h | 4 +- 32 files changed, 11274 insertions(+), 3716 deletions(-) create mode 100644 libs/vkd3d/libs/vkd3d-shader/msl.c diff --git a/libs/vkd3d/Makefile.in b/libs/vkd3d/Makefile.in index 94e4833dc9a..b073790d986 100644 --- a/libs/vkd3d/Makefile.in +++ b/libs/vkd3d/Makefile.in @@ -25,6 +25,7 @@ SOURCES = \ libs/vkd3d-shader/hlsl_codegen.c \ libs/vkd3d-shader/hlsl_constant_ops.c \ libs/vkd3d-shader/ir.c \ + libs/vkd3d-shader/msl.c \ libs/vkd3d-shader/preproc.l \ libs/vkd3d-shader/preproc.y \ libs/vkd3d-shader/spirv.c \ diff --git a/libs/vkd3d/include/private/vkd3d_common.h b/libs/vkd3d/include/private/vkd3d_common.h index 39145a97df1..fd62730f948 100644 --- a/libs/vkd3d/include/private/vkd3d_common.h +++ b/libs/vkd3d/include/private/vkd3d_common.h @@ -62,6 +62,8 @@ #define VKD3D_STRINGIFY(x) #x #define VKD3D_EXPAND_AND_STRINGIFY(x) VKD3D_EXPAND(VKD3D_STRINGIFY(x)) +#define vkd3d_clamp(value, lower, upper) max(min(value, upper), lower) + #define TAG_AON9 VKD3D_MAKE_TAG('A', 'o', 'n', '9') #define TAG_DXBC VKD3D_MAKE_TAG('D', 'X', 'B', 'C') #define TAG_DXIL VKD3D_MAKE_TAG('D', 'X', 'I', 'L') @@ -273,7 +275,7 @@ static inline unsigned int vkd3d_popcount(unsigned int v) { #ifdef _MSC_VER return __popcnt(v); -#elif defined(__MINGW32__) +#elif defined(HAVE_BUILTIN_POPCOUNT) return __builtin_popcount(v); #else v -= (v >> 1) & 0x55555555; diff --git a/libs/vkd3d/include/vkd3d_shader.h b/libs/vkd3d/include/vkd3d_shader.h index d9a355d3bc9..5c0d13ea9e2 100644 --- a/libs/vkd3d/include/vkd3d_shader.h +++ b/libs/vkd3d/include/vkd3d_shader.h @@ -190,6 +190,17 @@ enum vkd3d_shader_compile_option_backward_compatibility * - DEPTH to SV_Depth for pixel shader outputs. */ VKD3D_SHADER_COMPILE_OPTION_BACKCOMPAT_MAP_SEMANTIC_NAMES = 0x00000001, + /** + * Causes 'double' to behave as an alias for 'float'. This option only + * applies to HLSL sources with shader model 1-3 target profiles. Without + * this option using the 'double' type produces compilation errors in + * these target profiles. + * + * This option is disabled by default. + * + * \since 1.14 + */ + VKD3D_SHADER_COMPILE_OPTION_DOUBLE_AS_FLOAT_ALIAS = 0x00000002, VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_COMPILE_OPTION_BACKWARD_COMPATIBILITY), }; @@ -469,8 +480,8 @@ enum vkd3d_shader_parameter_type /** The parameter value is embedded directly in the shader. */ VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT, /** - * The parameter value is provided to the shader via a specialization - * constant. This value is only supported for the SPIR-V target type. + * The parameter value is provided to the shader via specialization + * constants. This value is only supported for the SPIR-V target type. */ VKD3D_SHADER_PARAMETER_TYPE_SPECIALIZATION_CONSTANT, /** @@ -495,6 +506,13 @@ enum vkd3d_shader_parameter_data_type VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32, /** The parameter is provided as a 32-bit float. \since 1.13 */ VKD3D_SHADER_PARAMETER_DATA_TYPE_FLOAT32, + /** + * The parameter is provided as a 4-dimensional vector of 32-bit floats. + * This parameter must be used with struct vkd3d_shader_parameter1; + * it cannot be used with struct vkd3d_shader_parameter. + * \since 1.14 + */ + VKD3D_SHADER_PARAMETER_DATA_TYPE_FLOAT32_VEC4, VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_PARAMETER_DATA_TYPE), }; @@ -578,6 +596,110 @@ enum vkd3d_shader_parameter_name * \since 1.13 */ VKD3D_SHADER_PARAMETER_NAME_FLAT_INTERPOLATION, + /** + * A mask of enabled clip planes. + * + * When this parameter is provided to a vertex shader, for each nonzero bit + * of this mask, a user clip distance will be generated from vertex position + * in clip space, and the clip plane defined by the indexed vector, taken + * from the VKD3D_SHADER_PARAMETER_NAME_CLIP_PLANE_# parameter. + * + * Regardless of the specific clip planes which are enabled, the clip + * distances which are output are a contiguous array starting from clip + * distance 0. This affects the interface of OpenGL. For example, if only + * clip planes 1 and 3 are enabled (and so the value of the mask is 0xa), + * the user should enable only GL_CLIP_DISTANCE0 and GL_CLIP_DISTANCE1. + * + * The default value is zero, i.e. do not enable any clip planes. + * + * The data type for this parameter must be + * VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32. + * + * Only VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT is supported in this + * version of vkd3d-shader. + * + * If the source shader writes clip distances and this parameter is nonzero, + * compilation fails. + * + * \since 1.14 + */ + VKD3D_SHADER_PARAMETER_NAME_CLIP_PLANE_MASK, + /** + * Clip plane values. + * See VKD3D_SHADER_PARAMETER_NAME_CLIP_PLANE_MASK for documentation of + * clip planes. + * + * These enum values are contiguous and arithmetic may safely be performed + * on them. That is, VKD3D_SHADER_PARAMETER_NAME_CLIP_PLANE_[n] is + * VKD3D_SHADER_PARAMETER_NAME_CLIP_PLANE_0 plus n. + * + * The data type for each parameter must be + * VKD3D_SHADER_PARAMETER_DATA_TYPE_FLOAT32_VEC4. + * + * The default value for each plane is a (0, 0, 0, 0) vector. + * + * \since 1.14 + */ + VKD3D_SHADER_PARAMETER_NAME_CLIP_PLANE_0, + VKD3D_SHADER_PARAMETER_NAME_CLIP_PLANE_1, + VKD3D_SHADER_PARAMETER_NAME_CLIP_PLANE_2, + VKD3D_SHADER_PARAMETER_NAME_CLIP_PLANE_3, + VKD3D_SHADER_PARAMETER_NAME_CLIP_PLANE_4, + VKD3D_SHADER_PARAMETER_NAME_CLIP_PLANE_5, + VKD3D_SHADER_PARAMETER_NAME_CLIP_PLANE_6, + VKD3D_SHADER_PARAMETER_NAME_CLIP_PLANE_7, + /** + * Point size. + * + * When this parameter is provided to a vertex, tessellation, or geometry + * shader, and the source shader does not write point size, it specifies a + * uniform value which will be written to point size. + * If the source shader writes point size, this parameter is ignored. + * + * This parameter can be used to implement fixed function point size, as + * present in Direct3D versions 8 and 9, if the target environment does not + * support point size as part of its own fixed-function API (as Vulkan and + * core OpenGL). + * + * The data type for this parameter must be + * VKD3D_SHADER_PARAMETER_DATA_TYPE_FLOAT32. + * + * \since 1.14 + */ + VKD3D_SHADER_PARAMETER_NAME_POINT_SIZE, + /** + * Minimum point size. + * + * When this parameter is provided to a vertex, tessellation, or geometry + * shader, and the source shader writes point size or uses the + * VKD3D_SHADER_PARAMETER_NAME_POINT_SIZE parameter, the point size will + * be clamped to the provided minimum value. + * If point size is not written in one of these ways, + * this parameter is ignored. + * If this parameter is not provided, the point size will not be clamped + * to a minimum size by vkd3d-shader. + * + * This parameter can be used to implement fixed function point size, as + * present in Direct3D versions 8 and 9, if the target environment does not + * support point size as part of its own fixed-function API (as Vulkan and + * core OpenGL). + * + * The data type for this parameter must be + * VKD3D_SHADER_PARAMETER_DATA_TYPE_FLOAT32. + * + * \since 1.14 + */ + VKD3D_SHADER_PARAMETER_NAME_POINT_SIZE_MIN, + /** + * Maximum point size. + * + * This parameter has identical behaviour to + * VKD3D_SHADER_PARAMETER_NAME_POINT_SIZE_MIN, except that it provides + * the maximum size rather than the minimum. + * + * \since 1.14 + */ + VKD3D_SHADER_PARAMETER_NAME_POINT_SIZE_MAX, VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_PARAMETER_NAME), }; @@ -625,6 +747,13 @@ struct vkd3d_shader_parameter_immediate_constant1 * VKD3D_SHADER_PARAMETER_DATA_TYPE_FLOAT32. */ float f32; + /** + * A pointer to the value if the parameter's data type is + * VKD3D_SHADER_PARAMETER_DATA_TYPE_FLOAT32_VEC4. + * + * \since 1.14 + */ + float f32_vec4[4]; void *_pointer_pad; uint32_t _pad[4]; } u; @@ -636,7 +765,13 @@ struct vkd3d_shader_parameter_immediate_constant1 */ struct vkd3d_shader_parameter_specialization_constant { - /** The ID of the specialization constant. */ + /** + * The ID of the specialization constant. + * If the type comprises more than one constant, such as + * VKD3D_SHADER_PARAMETER_DATA_TYPE_FLOAT32_VEC4, then a contiguous + * array of specialization constants should be used, one for each component, + * and this ID should point to the first component. + */ uint32_t id; }; @@ -1046,6 +1181,11 @@ enum vkd3d_shader_source_type * the format used for Direct3D shader model 6 shaders. \since 1.9 */ VKD3D_SHADER_SOURCE_DXBC_DXIL, + /** + * Binary format used by Direct3D 9/10.x/11 effects. + * Input is a raw FX section without container. \since 1.14 + */ + VKD3D_SHADER_SOURCE_FX, VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_SOURCE_TYPE), }; @@ -1087,6 +1227,10 @@ enum vkd3d_shader_target_type * Output is a raw FX section without container. \since 1.11 */ VKD3D_SHADER_TARGET_FX, + /** + * A 'Metal Shading Language' shader. \since 1.14 + */ + VKD3D_SHADER_TARGET_MSL, VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_TARGET_TYPE), }; @@ -1292,7 +1436,8 @@ typedef int (*PFN_vkd3d_shader_open_include)(const char *filename, bool local, * vkd3d_shader_preprocess_info. * * \param code Contents of the included file, which were allocated by the - * \ref pfn_open_include callback. The user must free them. + * vkd3d_shader_preprocess_info.pfn_open_include callback. + * The user must free them. * * \param context The user-defined pointer passed to struct * vkd3d_shader_preprocess_info. @@ -1319,8 +1464,8 @@ struct vkd3d_shader_preprocess_info /** * Pointer to an array of predefined macros. Each macro in this array will - * be expanded as if a corresponding #define statement were prepended to the - * source code. + * be expanded as if a corresponding \#define statement were prepended to + * the source code. * * If the same macro is specified multiple times, only the last value is * used. @@ -2798,7 +2943,7 @@ VKD3D_SHADER_API void vkd3d_shader_free_scan_signature_info(struct vkd3d_shader_ * \param input_signature The input signature of the second shader. * * \param count On output, contains the number of entries written into - * \ref varyings. + * "varyings". * * \param varyings Pointer to an output array of varyings. * This must point to space for N varyings, where N is the number of elements diff --git a/libs/vkd3d/libs/vkd3d-common/blob.c b/libs/vkd3d/libs/vkd3d-common/blob.c index f60ef7db769..c2c6ad67804 100644 --- a/libs/vkd3d/libs/vkd3d-common/blob.c +++ b/libs/vkd3d/libs/vkd3d-common/blob.c @@ -20,6 +20,7 @@ #define WIDL_C_INLINE_WRAPPERS #endif #define COBJMACROS + #define CONST_VTABLE #include "vkd3d.h" #include "vkd3d_blob.h" diff --git a/libs/vkd3d/libs/vkd3d-shader/checksum.c b/libs/vkd3d/libs/vkd3d-shader/checksum.c index d9560628c77..45de1c92513 100644 --- a/libs/vkd3d/libs/vkd3d-shader/checksum.c +++ b/libs/vkd3d/libs/vkd3d-shader/checksum.c @@ -33,6 +33,11 @@ * will fill a supplied 16-byte array with the digest. */ +/* + * DXBC uses a variation of the MD5 algorithm, which only changes the way + * the message is padded in the final step. + */ + #include "vkd3d_shader_private.h" #define DXBC_CHECKSUM_BLOCK_SIZE 64 @@ -230,10 +235,9 @@ static void md5_update(struct md5_ctx *ctx, const unsigned char *buf, unsigned i memcpy(ctx->in, buf, len); } -static void dxbc_checksum_final(struct md5_ctx *ctx) +static void md5_final(struct md5_ctx *ctx, enum vkd3d_md5_variant variant) { unsigned int padding; - unsigned int length; unsigned int count; unsigned char *p; @@ -260,7 +264,7 @@ static void dxbc_checksum_final(struct md5_ctx *ctx) /* Now fill the next block */ memset(ctx->in, 0, DXBC_CHECKSUM_BLOCK_SIZE); } - else + else if (variant == VKD3D_MD5_DXBC) { /* Make place for bitcount at the beginning of the block */ memmove(&ctx->in[4], ctx->in, count); @@ -268,33 +272,44 @@ static void dxbc_checksum_final(struct md5_ctx *ctx) /* Pad block to 60 bytes */ memset(p + 4, 0, padding - 4); } + else + { + /* Pad block to 56 bytes */ + memset(p, 0, padding - 8); + } /* Append length in bits and transform */ - length = ctx->i[0]; - memcpy(&ctx->in[0], &length, sizeof(length)); - byte_reverse(&ctx->in[4], 14); - length = ctx->i[0] >> 2 | 0x1; - memcpy(&ctx->in[DXBC_CHECKSUM_BLOCK_SIZE - 4], &length, sizeof(length)); + if (variant == VKD3D_MD5_DXBC) + { + unsigned int length; + + length = ctx->i[0]; + memcpy(&ctx->in[0], &length, sizeof(length)); + byte_reverse(&ctx->in[4], 14); + length = ctx->i[0] >> 2 | 0x1; + memcpy(&ctx->in[DXBC_CHECKSUM_BLOCK_SIZE - 4], &length, sizeof(length)); + } + else + { + byte_reverse(ctx->in, 14); + + ((unsigned int *)ctx->in)[14] = ctx->i[0]; + ((unsigned int *)ctx->in)[15] = ctx->i[1]; + } md5_transform(ctx->buf, (unsigned int *)ctx->in); byte_reverse((unsigned char *)ctx->buf, 4); memcpy(ctx->digest, ctx->buf, 16); } -#define DXBC_CHECKSUM_SKIP_BYTE_COUNT 20 - -void vkd3d_compute_dxbc_checksum(const void *dxbc, size_t size, uint32_t checksum[4]) +void vkd3d_compute_md5(const void *data, size_t size, uint32_t checksum[4], enum vkd3d_md5_variant variant) { - const uint8_t *ptr = dxbc; + const uint8_t *ptr = data; struct md5_ctx ctx; - VKD3D_ASSERT(size > DXBC_CHECKSUM_SKIP_BYTE_COUNT); - ptr += DXBC_CHECKSUM_SKIP_BYTE_COUNT; - size -= DXBC_CHECKSUM_SKIP_BYTE_COUNT; - md5_init(&ctx); md5_update(&ctx, ptr, size); - dxbc_checksum_final(&ctx); + md5_final(&ctx, variant); memcpy(checksum, ctx.digest, sizeof(ctx.digest)); } diff --git a/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c index 77e9711300f..38d566d9fe0 100644 --- a/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c +++ b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c @@ -79,7 +79,7 @@ static const char * const shader_opcode_names[] = [VKD3DSIH_DCL_INDEXABLE_TEMP ] = "dcl_indexableTemp", [VKD3DSIH_DCL_INPUT ] = "dcl_input", [VKD3DSIH_DCL_INPUT_CONTROL_POINT_COUNT ] = "dcl_input_control_point_count", - [VKD3DSIH_DCL_INPUT_PRIMITIVE ] = "dcl_inputPrimitive", + [VKD3DSIH_DCL_INPUT_PRIMITIVE ] = "dcl_inputprimitive", [VKD3DSIH_DCL_INPUT_PS ] = "dcl_input_ps", [VKD3DSIH_DCL_INPUT_PS_SGV ] = "dcl_input_ps_sgv", [VKD3DSIH_DCL_INPUT_PS_SIV ] = "dcl_input_ps_siv", @@ -89,7 +89,7 @@ static const char * const shader_opcode_names[] = [VKD3DSIH_DCL_OUTPUT ] = "dcl_output", [VKD3DSIH_DCL_OUTPUT_CONTROL_POINT_COUNT ] = "dcl_output_control_point_count", [VKD3DSIH_DCL_OUTPUT_SIV ] = "dcl_output_siv", - [VKD3DSIH_DCL_OUTPUT_TOPOLOGY ] = "dcl_outputTopology", + [VKD3DSIH_DCL_OUTPUT_TOPOLOGY ] = "dcl_outputtopology", [VKD3DSIH_DCL_RESOURCE_RAW ] = "dcl_resource_raw", [VKD3DSIH_DCL_RESOURCE_STRUCTURED ] = "dcl_resource_structured", [VKD3DSIH_DCL_SAMPLER ] = "dcl_sampler", @@ -104,7 +104,7 @@ static const char * const shader_opcode_names[] = [VKD3DSIH_DCL_UAV_RAW ] = "dcl_uav_raw", [VKD3DSIH_DCL_UAV_STRUCTURED ] = "dcl_uav_structured", [VKD3DSIH_DCL_UAV_TYPED ] = "dcl_uav_typed", - [VKD3DSIH_DCL_VERTICES_OUT ] = "dcl_maxOutputVertexCount", + [VKD3DSIH_DCL_VERTICES_OUT ] = "dcl_maxout", [VKD3DSIH_DDIV ] = "ddiv", [VKD3DSIH_DEF ] = "def", [VKD3DSIH_DEFAULT ] = "default", @@ -675,9 +675,6 @@ static void shader_dump_data_type(struct vkd3d_d3d_asm_compiler *compiler, enum { [VKD3D_DATA_FLOAT ] = "float", [VKD3D_DATA_INT ] = "int", - [VKD3D_DATA_RESOURCE ] = "resource", - [VKD3D_DATA_SAMPLER ] = "sampler", - [VKD3D_DATA_UAV ] = "uav", [VKD3D_DATA_UINT ] = "uint", [VKD3D_DATA_UNORM ] = "unorm", [VKD3D_DATA_SNORM ] = "snorm", @@ -1229,8 +1226,6 @@ static void shader_print_register(struct vkd3d_d3d_asm_compiler *compiler, const case VKD3D_DATA_INT: shader_print_int_literal(compiler, "", reg->u.immconst_u32[0], ""); break; - case VKD3D_DATA_RESOURCE: - case VKD3D_DATA_SAMPLER: case VKD3D_DATA_UINT: shader_print_uint_literal(compiler, "", reg->u.immconst_u32[0], ""); break; @@ -1266,8 +1261,6 @@ static void shader_print_register(struct vkd3d_d3d_asm_compiler *compiler, const shader_print_int_literal(compiler, ", ", reg->u.immconst_u32[2], ""); shader_print_int_literal(compiler, ", ", reg->u.immconst_u32[3], ""); break; - case VKD3D_DATA_RESOURCE: - case VKD3D_DATA_SAMPLER: case VKD3D_DATA_UINT: shader_print_uint_literal(compiler, "", reg->u.immconst_u32[0], ""); shader_print_uint_literal(compiler, ", ", reg->u.immconst_u32[1], ""); @@ -1319,6 +1312,23 @@ static void shader_print_register(struct vkd3d_d3d_asm_compiler *compiler, const } vkd3d_string_buffer_printf(buffer, ")"); } + else if (compiler->flags & VSIR_ASM_FLAG_DUMP_ALL_INDICES) + { + unsigned int i = 0; + + if (reg->idx_count == 0 || reg->idx[0].rel_addr) + { + vkd3d_string_buffer_printf(buffer, "%s", compiler->colours.reset); + } + else + { + vkd3d_string_buffer_printf(buffer, "%u%s", offset, compiler->colours.reset); + i = 1; + } + + for (; i < reg->idx_count; ++i) + shader_print_subscript(compiler, reg->idx[i].offset, reg->idx[i].rel_addr); + } else if (reg->type != VKD3DSPR_RASTOUT && reg->type != VKD3DSPR_MISCTYPE && reg->type != VKD3DSPR_NULL @@ -2258,7 +2268,7 @@ static const char *get_semantic_register_name(enum vkd3d_shader_sysval_semantic } } -static enum vkd3d_result dump_signature(struct vkd3d_d3d_asm_compiler *compiler, +static enum vkd3d_result dump_dxbc_signature(struct vkd3d_d3d_asm_compiler *compiler, const char *name, const char *register_name, const struct shader_signature *signature) { struct vkd3d_string_buffer *buffer = &compiler->buffer; @@ -2325,21 +2335,21 @@ static enum vkd3d_result dump_signature(struct vkd3d_d3d_asm_compiler *compiler, return VKD3D_OK; } -static enum vkd3d_result dump_signatures(struct vkd3d_d3d_asm_compiler *compiler, +static enum vkd3d_result dump_dxbc_signatures(struct vkd3d_d3d_asm_compiler *compiler, const struct vsir_program *program) { enum vkd3d_result ret; - if ((ret = dump_signature(compiler, ".input", + if ((ret = dump_dxbc_signature(compiler, ".input", program->shader_version.type == VKD3D_SHADER_TYPE_DOMAIN ? "vicp" : "v", &program->input_signature)) < 0) return ret; - if ((ret = dump_signature(compiler, ".output", "o", + if ((ret = dump_dxbc_signature(compiler, ".output", "o", &program->output_signature)) < 0) return ret; - if ((ret = dump_signature(compiler, ".patch_constant", + if ((ret = dump_dxbc_signature(compiler, ".patch_constant", program->shader_version.type == VKD3D_SHADER_TYPE_DOMAIN ? "vpc" : "o", &program->patch_constant_signature)) < 0) return ret; @@ -2427,7 +2437,7 @@ enum vkd3d_result d3d_asm_compile(const struct vsir_program *program, * doesn't even have an explicit concept of signature. */ if (formatting & VKD3D_SHADER_COMPILE_OPTION_FORMATTING_IO_SIGNATURES && shader_version->major >= 4) { - if ((result = dump_signatures(&compiler, program)) < 0) + if ((result = dump_dxbc_signatures(&compiler, program)) < 0) { vkd3d_string_buffer_cleanup(buffer); return result; @@ -2489,12 +2499,58 @@ enum vkd3d_result d3d_asm_compile(const struct vsir_program *program, return result; } -void vkd3d_shader_trace(const struct vsir_program *program) +/* This is meant exclusively for development use. Therefore, differently from + * dump_dxbc_signature(), it doesn't try particularly hard to make the output + * nice or easily parsable, and it dumps all fields, not just the DXBC ones. + * This format isn't meant to be stable. */ +static void trace_signature(const struct shader_signature *signature, const char *signature_type) { - const char *p, *q, *end; + struct vkd3d_string_buffer buffer; + unsigned int i; + + TRACE("%s signature:%s\n", signature_type, signature->element_count == 0 ? " empty" : ""); + + vkd3d_string_buffer_init(&buffer); + + for (i = 0; i < signature->element_count; ++i) + { + const struct signature_element *element = &signature->elements[i]; + + vkd3d_string_buffer_clear(&buffer); + + vkd3d_string_buffer_printf(&buffer, "Element %u: %s %u-%u %s", i, + get_component_type_name(element->component_type), + element->register_index, element->register_index + element->register_count, + element->semantic_name); + if (element->semantic_index != -1) + vkd3d_string_buffer_printf(&buffer, "%u", element->semantic_index); + vkd3d_string_buffer_printf(&buffer, + " mask %#x used_mask %#x sysval %s min_precision %s interpolation %u stream %u", + element->mask, element->used_mask, get_sysval_semantic_name(element->sysval_semantic), + get_minimum_precision_name(element->min_precision), element->interpolation_mode, + element->stream_index); + if (element->target_location != -1) + vkd3d_string_buffer_printf(&buffer, " target %u", element->target_location); + else + vkd3d_string_buffer_printf(&buffer, " unused"); + + TRACE("%s\n", buffer.buffer); + } + + vkd3d_string_buffer_cleanup(&buffer); +} + +void vsir_program_trace(const struct vsir_program *program) +{ + const unsigned int flags = VSIR_ASM_FLAG_DUMP_TYPES | VSIR_ASM_FLAG_DUMP_ALL_INDICES; struct vkd3d_shader_code code; + const char *p, *q, *end; + + trace_signature(&program->input_signature, "Input"); + trace_signature(&program->output_signature, "Output"); + trace_signature(&program->patch_constant_signature, "Patch-constant"); - if (d3d_asm_compile(program, NULL, &code, VSIR_ASM_FLAG_DUMP_TYPES) != VKD3D_OK) + if (d3d_asm_compile(program, NULL, &code, flags) != VKD3D_OK) return; end = (const char *)code.code + code.size; diff --git a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c index d05394c3ab7..ae8e864c179 100644 --- a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c +++ b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c @@ -104,6 +104,12 @@ enum vkd3d_sm1_resource_type VKD3D_SM1_RESOURCE_TEXTURE_3D = 0x4, }; +enum vkd3d_sm1_misc_register +{ + VKD3D_SM1_MISC_POSITION = 0x0, + VKD3D_SM1_MISC_FACE = 0x1, +}; + enum vkd3d_sm1_opcode { VKD3D_SM1_OP_NOP = 0x00, @@ -444,17 +450,36 @@ static uint32_t swizzle_from_sm1(uint32_t swizzle) shader_sm1_get_swizzle_component(swizzle, 3)); } +/* D3DBC doesn't have the concept of index count. All registers implicitly have + * exactly one index. However for some register types the index doesn't make + * sense, so we remove it. */ +static unsigned int idx_count_from_reg_type(enum vkd3d_shader_register_type reg_type) +{ + switch (reg_type) + { + case VKD3DSPR_DEPTHOUT: + return 0; + + default: + return 1; + } +} + static void shader_sm1_parse_src_param(uint32_t param, struct vkd3d_shader_src_param *rel_addr, struct vkd3d_shader_src_param *src) { enum vkd3d_shader_register_type reg_type = ((param & VKD3D_SM1_REGISTER_TYPE_MASK) >> VKD3D_SM1_REGISTER_TYPE_SHIFT) | ((param & VKD3D_SM1_REGISTER_TYPE_MASK2) >> VKD3D_SM1_REGISTER_TYPE_SHIFT2); + unsigned int idx_count = idx_count_from_reg_type(reg_type); - vsir_register_init(&src->reg, reg_type, VKD3D_DATA_FLOAT, 1); + vsir_register_init(&src->reg, reg_type, VKD3D_DATA_FLOAT, idx_count); src->reg.precision = VKD3D_SHADER_REGISTER_PRECISION_DEFAULT; src->reg.non_uniform = false; - src->reg.idx[0].offset = param & VKD3D_SM1_REGISTER_NUMBER_MASK; - src->reg.idx[0].rel_addr = rel_addr; + if (idx_count == 1) + { + src->reg.idx[0].offset = param & VKD3D_SM1_REGISTER_NUMBER_MASK; + src->reg.idx[0].rel_addr = rel_addr; + } if (src->reg.type == VKD3DSPR_SAMPLER) src->reg.dimension = VSIR_DIMENSION_NONE; else if (src->reg.type == VKD3DSPR_DEPTHOUT) @@ -470,12 +495,16 @@ static void shader_sm1_parse_dst_param(uint32_t param, struct vkd3d_shader_src_p { enum vkd3d_shader_register_type reg_type = ((param & VKD3D_SM1_REGISTER_TYPE_MASK) >> VKD3D_SM1_REGISTER_TYPE_SHIFT) | ((param & VKD3D_SM1_REGISTER_TYPE_MASK2) >> VKD3D_SM1_REGISTER_TYPE_SHIFT2); + unsigned int idx_count = idx_count_from_reg_type(reg_type); - vsir_register_init(&dst->reg, reg_type, VKD3D_DATA_FLOAT, 1); + vsir_register_init(&dst->reg, reg_type, VKD3D_DATA_FLOAT, idx_count); dst->reg.precision = VKD3D_SHADER_REGISTER_PRECISION_DEFAULT; dst->reg.non_uniform = false; - dst->reg.idx[0].offset = param & VKD3D_SM1_REGISTER_NUMBER_MASK; - dst->reg.idx[0].rel_addr = rel_addr; + if (idx_count == 1) + { + dst->reg.idx[0].offset = param & VKD3D_SM1_REGISTER_NUMBER_MASK; + dst->reg.idx[0].rel_addr = rel_addr; + } if (dst->reg.type == VKD3DSPR_SAMPLER) dst->reg.dimension = VSIR_DIMENSION_NONE; else if (dst->reg.type == VKD3DSPR_DEPTHOUT) @@ -532,6 +561,21 @@ static struct signature_element *find_signature_element_by_register_index( return NULL; } +/* Add missing bits to a mask to make it contiguous. */ +static unsigned int make_mask_contiguous(unsigned int mask) +{ + static const unsigned int table[] = + { + 0x0, 0x1, 0x2, 0x3, + 0x4, 0x7, 0x6, 0x7, + 0x8, 0xf, 0xe, 0xf, + 0xc, 0xf, 0xe, 0xf, + }; + + VKD3D_ASSERT(mask < ARRAY_SIZE(table)); + return table[mask]; +} + static bool add_signature_element(struct vkd3d_shader_sm1_parser *sm1, bool output, const char *name, unsigned int index, enum vkd3d_shader_sysval_semantic sysval, unsigned int register_index, bool is_dcl, unsigned int mask) @@ -547,7 +591,7 @@ static bool add_signature_element(struct vkd3d_shader_sm1_parser *sm1, bool outp if ((element = find_signature_element(signature, name, index))) { - element->mask |= mask; + element->mask = make_mask_contiguous(element->mask | mask); if (!is_dcl) element->used_mask |= mask; return true; @@ -567,7 +611,7 @@ static bool add_signature_element(struct vkd3d_shader_sm1_parser *sm1, bool outp element->register_index = register_index; element->target_location = register_index; element->register_count = 1; - element->mask = mask; + element->mask = make_mask_contiguous(mask); element->used_mask = is_dcl ? 0 : mask; if (program->shader_version.type == VKD3D_SHADER_TYPE_PIXEL && !output) element->interpolation_mode = VKD3DSIM_LINEAR; @@ -601,7 +645,7 @@ static bool add_signature_element_from_register(struct vkd3d_shader_sm1_parser * const struct vkd3d_shader_register *reg, bool is_dcl, unsigned int mask) { const struct vkd3d_shader_version *version = &sm1->p.program->shader_version; - unsigned int register_index = reg->idx[0].offset; + unsigned int register_index = reg->idx_count > 0 ? reg->idx[0].offset : 0; switch (reg->type) { @@ -921,6 +965,9 @@ static void shader_sm1_read_dst_param(struct vkd3d_shader_sm1_parser *sm1, const shader_sm1_parse_src_param(addr_token, NULL, dst_rel_addr); } shader_sm1_parse_dst_param(token, dst_rel_addr, dst_param); + + if (dst_param->reg.type == VKD3DSPR_RASTOUT && dst_param->reg.idx[0].offset == VSIR_RASTOUT_POINT_SIZE) + sm1->p.program->has_point_size = true; } static void shader_sm1_read_semantic(struct vkd3d_shader_sm1_parser *sm1, @@ -1272,7 +1319,8 @@ static enum vkd3d_result shader_sm1_init(struct vkd3d_shader_sm1_parser *sm1, st sm1->end = &code[token_count]; /* Estimate instruction count to avoid reallocation in most shaders. */ - if (!vsir_program_init(program, compile_info, &version, code_size != ~(size_t)0 ? token_count / 4u + 4 : 16)) + if (!vsir_program_init(program, compile_info, &version, + code_size != ~(size_t)0 ? token_count / 4u + 4 : 16, VSIR_CF_STRUCTURED, false)) return VKD3D_ERROR_OUT_OF_MEMORY; vkd3d_shader_parser_init(&sm1->p, program, message_context, compile_info->source_name); @@ -1338,9 +1386,6 @@ int d3dbc_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t c for (i = 0; i < ARRAY_SIZE(program->flat_constant_count); ++i) program->flat_constant_count[i] = get_external_constant_count(&sm1, i); - if (!sm1.p.failed) - ret = vkd3d_shader_parser_validate(&sm1.p, config_flags); - if (sm1.p.failed && ret >= 0) ret = VKD3D_ERROR_INVALID_SHADER; @@ -1351,10 +1396,21 @@ int d3dbc_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t c return ret; } - return ret; + if ((ret = vkd3d_shader_parser_validate(&sm1.p, config_flags)) < 0) + { + WARN("Failed to validate shader after parsing, ret %d.\n", ret); + + if (TRACE_ON()) + vsir_program_trace(program); + + vsir_program_cleanup(program); + return ret; + } + + return VKD3D_OK; } -bool hlsl_sm1_register_from_semantic(const struct vkd3d_shader_version *version, const char *semantic_name, +bool sm1_register_from_semantic_name(const struct vkd3d_shader_version *version, const char *semantic_name, unsigned int semantic_index, bool output, enum vkd3d_shader_register_type *type, unsigned int *reg) { unsigned int i; @@ -1384,22 +1440,22 @@ bool hlsl_sm1_register_from_semantic(const struct vkd3d_shader_version *version, {"depth", true, VKD3D_SHADER_TYPE_PIXEL, 3, VKD3DSPR_DEPTHOUT}, {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, 3, VKD3DSPR_DEPTHOUT}, {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, 3, VKD3DSPR_COLOROUT}, - {"sv_position", false, VKD3D_SHADER_TYPE_PIXEL, 3, VKD3DSPR_MISCTYPE, D3DSMO_POSITION}, - {"vface", false, VKD3D_SHADER_TYPE_PIXEL, 3, VKD3DSPR_MISCTYPE, D3DSMO_FACE}, - {"vpos", false, VKD3D_SHADER_TYPE_PIXEL, 3, VKD3DSPR_MISCTYPE, D3DSMO_POSITION}, + {"sv_position", false, VKD3D_SHADER_TYPE_PIXEL, 3, VKD3DSPR_MISCTYPE, VKD3D_SM1_MISC_POSITION}, + {"vface", false, VKD3D_SHADER_TYPE_PIXEL, 3, VKD3DSPR_MISCTYPE, VKD3D_SM1_MISC_FACE}, + {"vpos", false, VKD3D_SHADER_TYPE_PIXEL, 3, VKD3DSPR_MISCTYPE, VKD3D_SM1_MISC_POSITION}, {"color", true, VKD3D_SHADER_TYPE_VERTEX, 1, VKD3DSPR_ATTROUT}, - {"fog", true, VKD3D_SHADER_TYPE_VERTEX, 1, VKD3DSPR_RASTOUT, D3DSRO_FOG}, - {"position", true, VKD3D_SHADER_TYPE_VERTEX, 1, VKD3DSPR_RASTOUT, D3DSRO_POSITION}, - {"psize", true, VKD3D_SHADER_TYPE_VERTEX, 1, VKD3DSPR_RASTOUT, D3DSRO_POINT_SIZE}, - {"sv_position", true, VKD3D_SHADER_TYPE_VERTEX, 1, VKD3DSPR_RASTOUT, D3DSRO_POSITION}, + {"fog", true, VKD3D_SHADER_TYPE_VERTEX, 1, VKD3DSPR_RASTOUT, VSIR_RASTOUT_FOG}, + {"position", true, VKD3D_SHADER_TYPE_VERTEX, 1, VKD3DSPR_RASTOUT, VSIR_RASTOUT_POSITION}, + {"psize", true, VKD3D_SHADER_TYPE_VERTEX, 1, VKD3DSPR_RASTOUT, VSIR_RASTOUT_POINT_SIZE}, + {"sv_position", true, VKD3D_SHADER_TYPE_VERTEX, 1, VKD3DSPR_RASTOUT, VSIR_RASTOUT_POSITION}, {"texcoord", true, VKD3D_SHADER_TYPE_VERTEX, 1, VKD3DSPR_TEXCRDOUT}, {"color", true, VKD3D_SHADER_TYPE_VERTEX, 2, VKD3DSPR_ATTROUT}, - {"fog", true, VKD3D_SHADER_TYPE_VERTEX, 2, VKD3DSPR_RASTOUT, D3DSRO_FOG}, - {"position", true, VKD3D_SHADER_TYPE_VERTEX, 2, VKD3DSPR_RASTOUT, D3DSRO_POSITION}, - {"psize", true, VKD3D_SHADER_TYPE_VERTEX, 2, VKD3DSPR_RASTOUT, D3DSRO_POINT_SIZE}, - {"sv_position", true, VKD3D_SHADER_TYPE_VERTEX, 2, VKD3DSPR_RASTOUT, D3DSRO_POSITION}, + {"fog", true, VKD3D_SHADER_TYPE_VERTEX, 2, VKD3DSPR_RASTOUT, VSIR_RASTOUT_FOG}, + {"position", true, VKD3D_SHADER_TYPE_VERTEX, 2, VKD3DSPR_RASTOUT, VSIR_RASTOUT_POSITION}, + {"psize", true, VKD3D_SHADER_TYPE_VERTEX, 2, VKD3DSPR_RASTOUT, VSIR_RASTOUT_POINT_SIZE}, + {"sv_position", true, VKD3D_SHADER_TYPE_VERTEX, 2, VKD3DSPR_RASTOUT, VSIR_RASTOUT_POSITION}, {"texcoord", true, VKD3D_SHADER_TYPE_VERTEX, 2, VKD3DSPR_TEXCRDOUT}, }; @@ -1422,33 +1478,33 @@ bool hlsl_sm1_register_from_semantic(const struct vkd3d_shader_version *version, return false; } -bool hlsl_sm1_usage_from_semantic(const char *semantic_name, - uint32_t semantic_index, D3DDECLUSAGE *usage, uint32_t *usage_idx) +bool sm1_usage_from_semantic_name(const char *semantic_name, + uint32_t semantic_index, enum vkd3d_decl_usage *usage, uint32_t *usage_idx) { static const struct { const char *name; - D3DDECLUSAGE usage; + enum vkd3d_decl_usage usage; } semantics[] = { - {"binormal", D3DDECLUSAGE_BINORMAL}, - {"blendindices", D3DDECLUSAGE_BLENDINDICES}, - {"blendweight", D3DDECLUSAGE_BLENDWEIGHT}, - {"color", D3DDECLUSAGE_COLOR}, - {"depth", D3DDECLUSAGE_DEPTH}, - {"fog", D3DDECLUSAGE_FOG}, - {"normal", D3DDECLUSAGE_NORMAL}, - {"position", D3DDECLUSAGE_POSITION}, - {"positiont", D3DDECLUSAGE_POSITIONT}, - {"psize", D3DDECLUSAGE_PSIZE}, - {"sample", D3DDECLUSAGE_SAMPLE}, - {"sv_depth", D3DDECLUSAGE_DEPTH}, - {"sv_position", D3DDECLUSAGE_POSITION}, - {"sv_target", D3DDECLUSAGE_COLOR}, - {"tangent", D3DDECLUSAGE_TANGENT}, - {"tessfactor", D3DDECLUSAGE_TESSFACTOR}, - {"texcoord", D3DDECLUSAGE_TEXCOORD}, + {"binormal", VKD3D_DECL_USAGE_BINORMAL}, + {"blendindices", VKD3D_DECL_USAGE_BLEND_INDICES}, + {"blendweight", VKD3D_DECL_USAGE_BLEND_WEIGHT}, + {"color", VKD3D_DECL_USAGE_COLOR}, + {"depth", VKD3D_DECL_USAGE_DEPTH}, + {"fog", VKD3D_DECL_USAGE_FOG}, + {"normal", VKD3D_DECL_USAGE_NORMAL}, + {"position", VKD3D_DECL_USAGE_POSITION}, + {"positiont", VKD3D_DECL_USAGE_POSITIONT}, + {"psize", VKD3D_DECL_USAGE_PSIZE}, + {"sample", VKD3D_DECL_USAGE_SAMPLE}, + {"sv_depth", VKD3D_DECL_USAGE_DEPTH}, + {"sv_position", VKD3D_DECL_USAGE_POSITION}, + {"sv_target", VKD3D_DECL_USAGE_COLOR}, + {"tangent", VKD3D_DECL_USAGE_TANGENT}, + {"tessfactor", VKD3D_DECL_USAGE_TESS_FACTOR}, + {"texcoord", VKD3D_DECL_USAGE_TEXCOORD}, }; unsigned int i; @@ -1468,21 +1524,17 @@ bool hlsl_sm1_usage_from_semantic(const char *semantic_name, struct d3dbc_compiler { + const struct vkd3d_sm1_opcode_info *opcode_table; struct vsir_program *program; struct vkd3d_bytecode_buffer buffer; struct vkd3d_shader_message_context *message_context; - - /* OBJECTIVE: Store all the required information in the other fields so - * that this hlsl_ctx is no longer necessary. */ - struct hlsl_ctx *ctx; + bool failed; }; static uint32_t sm1_version(enum vkd3d_shader_type type, unsigned int major, unsigned int minor) { - if (type == VKD3D_SHADER_TYPE_VERTEX) - return D3DVS_VERSION(major, minor); - else - return D3DPS_VERSION(major, minor); + return vkd3d_make_u32(vkd3d_make_u16(minor, major), + type == VKD3D_SHADER_TYPE_VERTEX ? VKD3D_SM1_VS : VKD3D_SM1_PS); } D3DXPARAMETER_CLASS hlsl_sm1_class(const struct hlsl_type *type) @@ -1512,6 +1564,7 @@ D3DXPARAMETER_CLASS hlsl_sm1_class(const struct hlsl_type *type) case HLSL_CLASS_DEPTH_STENCIL_STATE: case HLSL_CLASS_DEPTH_STENCIL_VIEW: case HLSL_CLASS_EFFECT_GROUP: + case HLSL_CLASS_ERROR: case HLSL_CLASS_PASS: case HLSL_CLASS_RASTERIZER_STATE: case HLSL_CLASS_RENDER_TARGET_VIEW: @@ -1617,6 +1670,7 @@ D3DXPARAMETER_TYPE hlsl_sm1_base_type(const struct hlsl_type *type) case HLSL_CLASS_DEPTH_STENCIL_STATE: case HLSL_CLASS_DEPTH_STENCIL_VIEW: case HLSL_CLASS_EFFECT_GROUP: + case HLSL_CLASS_ERROR: case HLSL_CLASS_PASS: case HLSL_CLASS_RASTERIZER_STATE: case HLSL_CLASS_RENDER_TARGET_VIEW: @@ -1709,7 +1763,7 @@ static void sm1_sort_externs(struct hlsl_ctx *ctx) void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer) { - size_t ctab_offset, ctab_start, ctab_end, vars_start, size_offset, creator_offset, offset; + size_t ctab_offset, ctab_start, ctab_end, vars_offset, vars_start, size_offset, creator_offset, offset; unsigned int uniform_count = 0; struct hlsl_ir_var *var; @@ -1741,15 +1795,16 @@ void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buff size_offset = put_u32(buffer, 0); ctab_offset = put_u32(buffer, VKD3D_MAKE_TAG('C','T','A','B')); - ctab_start = put_u32(buffer, sizeof(D3DXSHADER_CONSTANTTABLE)); + ctab_start = put_u32(buffer, 7 * sizeof(uint32_t)); /* CTAB header size. */ creator_offset = put_u32(buffer, 0); put_u32(buffer, sm1_version(ctx->profile->type, ctx->profile->major_version, ctx->profile->minor_version)); put_u32(buffer, uniform_count); - put_u32(buffer, sizeof(D3DXSHADER_CONSTANTTABLE)); /* offset of constants */ + vars_offset = put_u32(buffer, 0); put_u32(buffer, 0); /* FIXME: flags */ put_u32(buffer, 0); /* FIXME: target string */ vars_start = bytecode_align(buffer); + set_u32(buffer, vars_offset, vars_start - ctab_start); LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) { @@ -1825,8 +1880,10 @@ void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buff switch (comp_type->e.numeric.type) { case HLSL_TYPE_DOUBLE: - hlsl_fixme(ctx, &var->loc, "Write double default values."); - uni.u = 0; + if (ctx->double_as_float_alias) + uni.u = var->default_values[k].number.u; + else + uni.u = 0; break; case HLSL_TYPE_INT: @@ -1860,24 +1917,24 @@ void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buff set_u32(buffer, creator_offset, offset - ctab_start); ctab_end = bytecode_align(buffer); - set_u32(buffer, size_offset, vkd3d_make_u32(D3DSIO_COMMENT, (ctab_end - ctab_offset) / sizeof(uint32_t))); + set_u32(buffer, size_offset, vkd3d_make_u32(VKD3D_SM1_OP_COMMENT, (ctab_end - ctab_offset) / sizeof(uint32_t))); } static uint32_t sm1_encode_register_type(enum vkd3d_shader_register_type type) { - return ((type << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK) - | ((type << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2); + return ((type << VKD3D_SM1_REGISTER_TYPE_SHIFT) & VKD3D_SM1_REGISTER_TYPE_MASK) + | ((type << VKD3D_SM1_REGISTER_TYPE_SHIFT2) & VKD3D_SM1_REGISTER_TYPE_MASK2); } struct sm1_instruction { - D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode; + enum vkd3d_sm1_opcode opcode; unsigned int flags; struct sm1_dst_register { enum vkd3d_shader_register_type type; - D3DSHADER_PARAM_DSTMOD_TYPE mod; + enum vkd3d_shader_dst_modifier mod; unsigned int writemask; uint32_t reg; } dst; @@ -1885,7 +1942,7 @@ struct sm1_instruction struct sm1_src_register { enum vkd3d_shader_register_type type; - D3DSHADER_PARAM_SRCMOD_TYPE mod; + enum vkd3d_shader_src_modifier mod; unsigned int swizzle; uint32_t reg; } srcs[4]; @@ -1900,11 +1957,11 @@ static bool is_inconsequential_instr(const struct sm1_instruction *instr) const struct sm1_dst_register *dst = &instr->dst; unsigned int i; - if (instr->opcode != D3DSIO_MOV) + if (instr->opcode != VKD3D_SM1_OP_MOV) return false; - if (dst->mod != D3DSPDM_NONE) + if (dst->mod != VKD3DSPDM_NONE) return false; - if (src->mod != D3DSPSM_NONE) + if (src->mod != VKD3DSPSM_NONE) return false; if (src->type != dst->type) return false; @@ -1923,13 +1980,19 @@ static bool is_inconsequential_instr(const struct sm1_instruction *instr) static void write_sm1_dst_register(struct vkd3d_bytecode_buffer *buffer, const struct sm1_dst_register *reg) { VKD3D_ASSERT(reg->writemask); - put_u32(buffer, (1u << 31) | sm1_encode_register_type(reg->type) | reg->mod | (reg->writemask << 16) | reg->reg); + put_u32(buffer, VKD3D_SM1_INSTRUCTION_PARAMETER + | sm1_encode_register_type(reg->type) + | (reg->mod << VKD3D_SM1_DST_MODIFIER_SHIFT) + | (reg->writemask << VKD3D_SM1_WRITEMASK_SHIFT) | reg->reg); } static void write_sm1_src_register(struct vkd3d_bytecode_buffer *buffer, const struct sm1_src_register *reg) { - put_u32(buffer, (1u << 31) | sm1_encode_register_type(reg->type) | reg->mod | (reg->swizzle << 16) | reg->reg); + put_u32(buffer, VKD3D_SM1_INSTRUCTION_PARAMETER + | sm1_encode_register_type(reg->type) + | (reg->mod << VKD3D_SM1_SRC_MODIFIER_SHIFT) + | (reg->swizzle << VKD3D_SM1_SWIZZLE_SHIFT) | reg->reg); } static void d3dbc_write_instruction(struct d3dbc_compiler *d3dbc, const struct sm1_instruction *instr) @@ -1945,7 +2008,7 @@ static void d3dbc_write_instruction(struct d3dbc_compiler *d3dbc, const struct s token |= VKD3D_SM1_INSTRUCTION_FLAGS_MASK & (instr->flags << VKD3D_SM1_INSTRUCTION_FLAGS_SHIFT); if (version->major > 1) - token |= (instr->has_dst + instr->src_count) << D3DSI_INSTLENGTH_SHIFT; + token |= (instr->has_dst + instr->src_count) << VKD3D_SM1_INSTRUCTION_LENGTH_SHIFT; put_u32(buffer, token); if (instr->has_dst) @@ -1955,346 +2018,112 @@ static void d3dbc_write_instruction(struct d3dbc_compiler *d3dbc, const struct s write_sm1_src_register(buffer, &instr->srcs[i]); }; -static void sm1_map_src_swizzle(struct sm1_src_register *src, unsigned int map_writemask) -{ - src->swizzle = hlsl_map_swizzle(src->swizzle, map_writemask); -} - -static void d3dbc_write_dp2add(struct d3dbc_compiler *d3dbc, const struct hlsl_reg *dst, - const struct hlsl_reg *src1, const struct hlsl_reg *src2, const struct hlsl_reg *src3) -{ - struct sm1_instruction instr = - { - .opcode = D3DSIO_DP2ADD, - - .dst.type = VKD3DSPR_TEMP, - .dst.writemask = dst->writemask, - .dst.reg = dst->id, - .has_dst = 1, - - .srcs[0].type = VKD3DSPR_TEMP, - .srcs[0].swizzle = hlsl_swizzle_from_writemask(src1->writemask), - .srcs[0].reg = src1->id, - .srcs[1].type = VKD3DSPR_TEMP, - .srcs[1].swizzle = hlsl_swizzle_from_writemask(src2->writemask), - .srcs[1].reg = src2->id, - .srcs[2].type = VKD3DSPR_TEMP, - .srcs[2].swizzle = hlsl_swizzle_from_writemask(src3->writemask), - .srcs[2].reg = src3->id, - .src_count = 3, - }; - - d3dbc_write_instruction(d3dbc, &instr); -} - -static void d3dbc_write_ternary_op(struct d3dbc_compiler *d3dbc, - D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, const struct hlsl_reg *dst, - const struct hlsl_reg *src1, const struct hlsl_reg *src2, const struct hlsl_reg *src3) -{ - struct sm1_instruction instr = - { - .opcode = opcode, - - .dst.type = VKD3DSPR_TEMP, - .dst.writemask = dst->writemask, - .dst.reg = dst->id, - .has_dst = 1, - - .srcs[0].type = VKD3DSPR_TEMP, - .srcs[0].swizzle = hlsl_swizzle_from_writemask(src1->writemask), - .srcs[0].reg = src1->id, - .srcs[1].type = VKD3DSPR_TEMP, - .srcs[1].swizzle = hlsl_swizzle_from_writemask(src2->writemask), - .srcs[1].reg = src2->id, - .srcs[2].type = VKD3DSPR_TEMP, - .srcs[2].swizzle = hlsl_swizzle_from_writemask(src3->writemask), - .srcs[2].reg = src3->id, - .src_count = 3, - }; - - sm1_map_src_swizzle(&instr.srcs[0], instr.dst.writemask); - sm1_map_src_swizzle(&instr.srcs[1], instr.dst.writemask); - sm1_map_src_swizzle(&instr.srcs[2], instr.dst.writemask); - d3dbc_write_instruction(d3dbc, &instr); -} - -static void d3dbc_write_binary_op(struct d3dbc_compiler *d3dbc, D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, - const struct hlsl_reg *dst, const struct hlsl_reg *src1, const struct hlsl_reg *src2) +static const struct vkd3d_sm1_opcode_info *shader_sm1_get_opcode_info_from_vsir( + struct d3dbc_compiler *d3dbc, enum vkd3d_shader_opcode vkd3d_opcode) { - struct sm1_instruction instr = - { - .opcode = opcode, - - .dst.type = VKD3DSPR_TEMP, - .dst.writemask = dst->writemask, - .dst.reg = dst->id, - .has_dst = 1, - - .srcs[0].type = VKD3DSPR_TEMP, - .srcs[0].swizzle = hlsl_swizzle_from_writemask(src1->writemask), - .srcs[0].reg = src1->id, - .srcs[1].type = VKD3DSPR_TEMP, - .srcs[1].swizzle = hlsl_swizzle_from_writemask(src2->writemask), - .srcs[1].reg = src2->id, - .src_count = 2, - }; - - sm1_map_src_swizzle(&instr.srcs[0], instr.dst.writemask); - sm1_map_src_swizzle(&instr.srcs[1], instr.dst.writemask); - d3dbc_write_instruction(d3dbc, &instr); -} + const struct vkd3d_shader_version *version = &d3dbc->program->shader_version; + const struct vkd3d_sm1_opcode_info *info; + unsigned int i = 0; -static void d3dbc_write_dot(struct d3dbc_compiler *d3dbc, D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, - const struct hlsl_reg *dst, const struct hlsl_reg *src1, const struct hlsl_reg *src2) -{ - struct sm1_instruction instr = + for (;;) { - .opcode = opcode, - - .dst.type = VKD3DSPR_TEMP, - .dst.writemask = dst->writemask, - .dst.reg = dst->id, - .has_dst = 1, - - .srcs[0].type = VKD3DSPR_TEMP, - .srcs[0].swizzle = hlsl_swizzle_from_writemask(src1->writemask), - .srcs[0].reg = src1->id, - .srcs[1].type = VKD3DSPR_TEMP, - .srcs[1].swizzle = hlsl_swizzle_from_writemask(src2->writemask), - .srcs[1].reg = src2->id, - .src_count = 2, - }; + info = &d3dbc->opcode_table[i++]; + if (info->vkd3d_opcode == VKD3DSIH_INVALID) + return NULL; - d3dbc_write_instruction(d3dbc, &instr); + if (vkd3d_opcode == info->vkd3d_opcode + && vkd3d_shader_ver_ge(version, info->min_version.major, info->min_version.minor) + && (vkd3d_shader_ver_le(version, info->max_version.major, info->max_version.minor) + || !info->max_version.major)) + return info; + } } -static void d3dbc_write_unary_op(struct d3dbc_compiler *d3dbc, D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, - const struct hlsl_reg *dst, const struct hlsl_reg *src, - D3DSHADER_PARAM_SRCMOD_TYPE src_mod, D3DSHADER_PARAM_DSTMOD_TYPE dst_mod) +static uint32_t swizzle_from_vsir(uint32_t swizzle) { - struct sm1_instruction instr = - { - .opcode = opcode, + uint32_t x = vsir_swizzle_get_component(swizzle, 0); + uint32_t y = vsir_swizzle_get_component(swizzle, 1); + uint32_t z = vsir_swizzle_get_component(swizzle, 2); + uint32_t w = vsir_swizzle_get_component(swizzle, 3); - .dst.type = VKD3DSPR_TEMP, - .dst.mod = dst_mod, - .dst.writemask = dst->writemask, - .dst.reg = dst->id, - .has_dst = 1, + if (x & ~0x3u || y & ~0x3u || z & ~0x3u || w & ~0x3u) + ERR("Unexpected vsir swizzle: 0x%08x.\n", swizzle); - .srcs[0].type = VKD3DSPR_TEMP, - .srcs[0].swizzle = hlsl_swizzle_from_writemask(src->writemask), - .srcs[0].reg = src->id, - .srcs[0].mod = src_mod, - .src_count = 1, - }; - - sm1_map_src_swizzle(&instr.srcs[0], instr.dst.writemask); - d3dbc_write_instruction(d3dbc, &instr); + return ((x & 0x3u) << VKD3D_SM1_SWIZZLE_COMPONENT_SHIFT(0)) + | ((y & 0x3) << VKD3D_SM1_SWIZZLE_COMPONENT_SHIFT(1)) + | ((z & 0x3) << VKD3D_SM1_SWIZZLE_COMPONENT_SHIFT(2)) + | ((w & 0x3) << VKD3D_SM1_SWIZZLE_COMPONENT_SHIFT(3)); } -static void d3dbc_write_cast(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr) +static void sm1_src_reg_from_vsir(struct d3dbc_compiler *d3dbc, const struct vkd3d_shader_src_param *param, + struct sm1_src_register *src, const struct vkd3d_shader_location *loc) { - struct hlsl_ir_expr *expr = hlsl_ir_expr(instr); - const struct hlsl_ir_node *arg1 = expr->operands[0].node; - const struct hlsl_type *dst_type = expr->node.data_type; - const struct hlsl_type *src_type = arg1->data_type; - struct hlsl_ctx *ctx = d3dbc->ctx; + src->mod = param->modifiers; + src->reg = param->reg.idx[0].offset; + src->type = param->reg.type; + src->swizzle = swizzle_from_vsir(param->swizzle); - /* Narrowing casts were already lowered. */ - VKD3D_ASSERT(src_type->dimx == dst_type->dimx); - - switch (dst_type->e.numeric.type) + if (param->reg.idx[0].rel_addr) { - case HLSL_TYPE_HALF: - case HLSL_TYPE_FLOAT: - switch (src_type->e.numeric.type) - { - case HLSL_TYPE_INT: - case HLSL_TYPE_UINT: - case HLSL_TYPE_BOOL: - /* Integrals are internally represented as floats, so no change is necessary.*/ - case HLSL_TYPE_HALF: - case HLSL_TYPE_FLOAT: - d3dbc_write_unary_op(d3dbc, D3DSIO_MOV, &instr->reg, &arg1->reg, 0, 0); - break; - - case HLSL_TYPE_DOUBLE: - hlsl_fixme(ctx, &instr->loc, "SM1 cast from double to float."); - break; - - default: - vkd3d_unreachable(); - } - break; - - case HLSL_TYPE_INT: - case HLSL_TYPE_UINT: - switch(src_type->e.numeric.type) - { - case HLSL_TYPE_HALF: - case HLSL_TYPE_FLOAT: - /* A compilation pass turns these into FLOOR+REINTERPRET, so we should not - * reach this case unless we are missing something. */ - hlsl_fixme(ctx, &instr->loc, "Unlowered SM1 cast from float to integer."); - break; - case HLSL_TYPE_INT: - case HLSL_TYPE_UINT: - d3dbc_write_unary_op(d3dbc, D3DSIO_MOV, &instr->reg, &arg1->reg, 0, 0); - break; - - case HLSL_TYPE_BOOL: - hlsl_fixme(ctx, &instr->loc, "SM1 cast from bool to integer."); - break; - - case HLSL_TYPE_DOUBLE: - hlsl_fixme(ctx, &instr->loc, "SM1 cast from double to integer."); - break; - - default: - vkd3d_unreachable(); - } - break; - - case HLSL_TYPE_DOUBLE: - hlsl_fixme(ctx, &instr->loc, "SM1 cast to double."); - break; - - case HLSL_TYPE_BOOL: - /* Casts to bool should have already been lowered. */ - default: - hlsl_fixme(ctx, &expr->node.loc, "SM1 cast from %s to %s.", - debug_hlsl_type(ctx, src_type), debug_hlsl_type(ctx, dst_type)); - break; + vkd3d_shader_error(d3dbc->message_context, loc, VKD3D_SHADER_ERROR_D3DBC_NOT_IMPLEMENTED, + "Unhandled relative addressing on source register."); + d3dbc->failed = true; } } -static void d3dbc_write_constant_defs(struct d3dbc_compiler *d3dbc) +static void sm1_dst_reg_from_vsir(struct d3dbc_compiler *d3dbc, const struct vkd3d_shader_dst_param *param, + struct sm1_dst_register *dst, const struct vkd3d_shader_location *loc) { - const struct vkd3d_shader_version *version = &d3dbc->program->shader_version; - struct vkd3d_bytecode_buffer *buffer = &d3dbc->buffer; - struct hlsl_ctx *ctx = d3dbc->ctx; - unsigned int i, x; + dst->mod = param->modifiers; + dst->reg = param->reg.idx[0].offset; + dst->type = param->reg.type; + dst->writemask = param->write_mask; - for (i = 0; i < ctx->constant_defs.count; ++i) + if (param->reg.idx[0].rel_addr) { - const struct hlsl_constant_register *constant_reg = &ctx->constant_defs.regs[i]; - uint32_t token = D3DSIO_DEF; - const struct sm1_dst_register reg = - { - .type = VKD3DSPR_CONST, - .writemask = VKD3DSP_WRITEMASK_ALL, - .reg = constant_reg->index, - }; - - if (version->major > 1) - token |= 5 << D3DSI_INSTLENGTH_SHIFT; - put_u32(buffer, token); - - write_sm1_dst_register(buffer, ®); - for (x = 0; x < 4; ++x) - put_f32(buffer, constant_reg->value.f[x]); + vkd3d_shader_error(d3dbc->message_context, loc, VKD3D_SHADER_ERROR_D3DBC_NOT_IMPLEMENTED, + "Unhandled relative addressing on destination register."); + d3dbc->failed = true; } } -static void d3dbc_write_semantic_dcl(struct d3dbc_compiler *d3dbc, - const struct signature_element *element, bool output) +static void d3dbc_write_vsir_def(struct d3dbc_compiler *d3dbc, const struct vkd3d_shader_instruction *ins) { const struct vkd3d_shader_version *version = &d3dbc->program->shader_version; struct vkd3d_bytecode_buffer *buffer = &d3dbc->buffer; - struct sm1_dst_register reg = {0}; - uint32_t token, usage_idx; - D3DDECLUSAGE usage; - bool ret; + uint32_t token; - if (hlsl_sm1_register_from_semantic(version, element->semantic_name, - element->semantic_index, output, ®.type, ®.reg)) - { - usage = 0; - usage_idx = 0; - } - else + const struct sm1_dst_register reg = { - ret = hlsl_sm1_usage_from_semantic(element->semantic_name, element->semantic_index, &usage, &usage_idx); - VKD3D_ASSERT(ret); - reg.type = output ? VKD3DSPR_OUTPUT : VKD3DSPR_INPUT; - reg.reg = element->register_index; - } + .type = VKD3DSPR_CONST, + .writemask = VKD3DSP_WRITEMASK_ALL, + .reg = ins->dst[0].reg.idx[0].offset, + }; - token = D3DSIO_DCL; + token = VKD3D_SM1_OP_DEF; if (version->major > 1) - token |= 2 << D3DSI_INSTLENGTH_SHIFT; + token |= 5 << VKD3D_SM1_INSTRUCTION_LENGTH_SHIFT; put_u32(buffer, token); - token = (1u << 31); - token |= usage << D3DSP_DCL_USAGE_SHIFT; - token |= usage_idx << D3DSP_DCL_USAGEINDEX_SHIFT; - put_u32(buffer, token); - - reg.writemask = element->mask; write_sm1_dst_register(buffer, ®); + for (unsigned int x = 0; x < 4; ++x) + put_f32(buffer, ins->src[0].reg.u.immconst_f32[x]); } -static void d3dbc_write_semantic_dcls(struct d3dbc_compiler *d3dbc) -{ - struct vsir_program *program = d3dbc->program; - const struct vkd3d_shader_version *version; - bool write_in = false, write_out = false; - - version = &program->shader_version; - if (version->type == VKD3D_SHADER_TYPE_PIXEL && version->major >= 2) - write_in = true; - else if (version->type == VKD3D_SHADER_TYPE_VERTEX && version->major == 3) - write_in = write_out = true; - else if (version->type == VKD3D_SHADER_TYPE_VERTEX && version->major < 3) - write_in = true; - - if (write_in) - { - for (unsigned int i = 0; i < program->input_signature.element_count; ++i) - d3dbc_write_semantic_dcl(d3dbc, &program->input_signature.elements[i], false); - } - - if (write_out) - { - for (unsigned int i = 0; i < program->output_signature.element_count; ++i) - d3dbc_write_semantic_dcl(d3dbc, &program->output_signature.elements[i], true); - } -} - -static void d3dbc_write_sampler_dcl(struct d3dbc_compiler *d3dbc, - unsigned int reg_id, enum hlsl_sampler_dim sampler_dim) +static void d3dbc_write_vsir_sampler_dcl(struct d3dbc_compiler *d3dbc, + unsigned int reg_id, enum vkd3d_sm1_resource_type res_type) { const struct vkd3d_shader_version *version = &d3dbc->program->shader_version; struct vkd3d_bytecode_buffer *buffer = &d3dbc->buffer; struct sm1_dst_register reg = {0}; - uint32_t token, res_type = 0; + uint32_t token; - token = D3DSIO_DCL; + token = VKD3D_SM1_OP_DCL; if (version->major > 1) - token |= 2 << D3DSI_INSTLENGTH_SHIFT; + token |= 2 << VKD3D_SM1_INSTRUCTION_LENGTH_SHIFT; put_u32(buffer, token); - switch (sampler_dim) - { - case HLSL_SAMPLER_DIM_2D: - res_type = VKD3D_SM1_RESOURCE_TEXTURE_2D; - break; - - case HLSL_SAMPLER_DIM_CUBE: - res_type = VKD3D_SM1_RESOURCE_TEXTURE_CUBE; - break; - - case HLSL_SAMPLER_DIM_3D: - res_type = VKD3D_SM1_RESOURCE_TEXTURE_3D; - break; - - default: - vkd3d_unreachable(); - break; - } - - token = (1u << 31); + token = VKD3D_SM1_INSTRUCTION_PARAMETER; token |= res_type << VKD3D_SM1_RESOURCE_TYPE_SHIFT; put_u32(buffer, token); @@ -2305,618 +2134,283 @@ static void d3dbc_write_sampler_dcl(struct d3dbc_compiler *d3dbc, write_sm1_dst_register(buffer, ®); } -static void d3dbc_write_sampler_dcls(struct d3dbc_compiler *d3dbc) +static void d3dbc_write_vsir_dcl(struct d3dbc_compiler *d3dbc, const struct vkd3d_shader_instruction *ins) { const struct vkd3d_shader_version *version = &d3dbc->program->shader_version; - struct hlsl_ctx *ctx = d3dbc->ctx; - enum hlsl_sampler_dim sampler_dim; - unsigned int i, count, reg_id; - struct hlsl_ir_var *var; + const struct vkd3d_shader_semantic *semantic = &ins->declaration.semantic; + unsigned int reg_id; if (version->major < 2) return; - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) - { - if (!var->regs[HLSL_REGSET_SAMPLERS].allocated) - continue; - - count = var->bind_count[HLSL_REGSET_SAMPLERS]; - - for (i = 0; i < count; ++i) - { - if (var->objects_usage[HLSL_REGSET_SAMPLERS][i].used) - { - sampler_dim = var->objects_usage[HLSL_REGSET_SAMPLERS][i].sampler_dim; - if (sampler_dim == HLSL_SAMPLER_DIM_GENERIC) - { - /* These can appear in sm4-style combined sample instructions. */ - hlsl_fixme(ctx, &var->loc, "Generic samplers need to be lowered."); - continue; - } - - reg_id = var->regs[HLSL_REGSET_SAMPLERS].index + i; - d3dbc_write_sampler_dcl(d3dbc, reg_id, sampler_dim); - } - } - } -} - -static void d3dbc_write_constant(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr) -{ - const struct hlsl_ir_constant *constant = hlsl_ir_constant(instr); - struct sm1_instruction sm1_instr = - { - .opcode = D3DSIO_MOV, - - .dst.type = VKD3DSPR_TEMP, - .dst.reg = instr->reg.id, - .dst.writemask = instr->reg.writemask, - .has_dst = 1, - - .srcs[0].type = VKD3DSPR_CONST, - .srcs[0].reg = constant->reg.id, - .srcs[0].swizzle = hlsl_swizzle_from_writemask(constant->reg.writemask), - .src_count = 1, - }; - - VKD3D_ASSERT(instr->reg.allocated); - VKD3D_ASSERT(constant->reg.allocated); - sm1_map_src_swizzle(&sm1_instr.srcs[0], sm1_instr.dst.writemask); - d3dbc_write_instruction(d3dbc, &sm1_instr); -} - -static void d3dbc_write_per_component_unary_op(struct d3dbc_compiler *d3dbc, - const struct hlsl_ir_node *instr, D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode) -{ - struct hlsl_ir_expr *expr = hlsl_ir_expr(instr); - struct hlsl_ir_node *arg1 = expr->operands[0].node; - unsigned int i; - - for (i = 0; i < instr->data_type->dimx; ++i) - { - struct hlsl_reg src = arg1->reg, dst = instr->reg; - - src.writemask = hlsl_combine_writemasks(src.writemask, 1u << i); - dst.writemask = hlsl_combine_writemasks(dst.writemask, 1u << i); - d3dbc_write_unary_op(d3dbc, opcode, &dst, &src, 0, 0); - } -} - -static void d3dbc_write_sincos(struct d3dbc_compiler *d3dbc, enum hlsl_ir_expr_op op, - const struct hlsl_reg *dst, const struct hlsl_reg *src) -{ - struct sm1_instruction instr = - { - .opcode = D3DSIO_SINCOS, - - .dst.type = VKD3DSPR_TEMP, - .dst.writemask = dst->writemask, - .dst.reg = dst->id, - .has_dst = 1, - - .srcs[0].type = VKD3DSPR_TEMP, - .srcs[0].swizzle = hlsl_swizzle_from_writemask(src->writemask), - .srcs[0].reg = src->id, - .src_count = 1, - }; - - if (op == HLSL_OP1_COS_REDUCED) - VKD3D_ASSERT(dst->writemask == VKD3DSP_WRITEMASK_0); - else /* HLSL_OP1_SIN_REDUCED */ - VKD3D_ASSERT(dst->writemask == VKD3DSP_WRITEMASK_1); - - if (d3dbc->ctx->profile->major_version < 3) - { - instr.src_count = 3; - - instr.srcs[1].type = VKD3DSPR_CONST; - instr.srcs[1].swizzle = hlsl_swizzle_from_writemask(VKD3DSP_WRITEMASK_ALL); - instr.srcs[1].reg = d3dbc->ctx->d3dsincosconst1.id; - - instr.srcs[2].type = VKD3DSPR_CONST; - instr.srcs[2].swizzle = hlsl_swizzle_from_writemask(VKD3DSP_WRITEMASK_ALL); - instr.srcs[2].reg = d3dbc->ctx->d3dsincosconst2.id; - } - - d3dbc_write_instruction(d3dbc, &instr); -} - -static void d3dbc_write_expr(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr) -{ - const struct vkd3d_shader_version *version = &d3dbc->program->shader_version; - struct hlsl_ir_expr *expr = hlsl_ir_expr(instr); - struct hlsl_ir_node *arg1 = expr->operands[0].node; - struct hlsl_ir_node *arg2 = expr->operands[1].node; - struct hlsl_ir_node *arg3 = expr->operands[2].node; - struct hlsl_ctx *ctx = d3dbc->ctx; - - VKD3D_ASSERT(instr->reg.allocated); - - if (expr->op == HLSL_OP1_REINTERPRET) - { - d3dbc_write_unary_op(d3dbc, D3DSIO_MOV, &instr->reg, &arg1->reg, 0, 0); - return; - } - - if (expr->op == HLSL_OP1_CAST) - { - d3dbc_write_cast(d3dbc, instr); - return; - } + reg_id = semantic->resource.reg.reg.idx[0].offset; - if (instr->data_type->e.numeric.type != HLSL_TYPE_FLOAT) + if (semantic->resource.reg.reg.type != VKD3DSPR_SAMPLER) { - /* These need to be lowered. */ - hlsl_fixme(ctx, &instr->loc, "SM1 non-float expression."); + vkd3d_shader_error(d3dbc->message_context, &ins->location, VKD3D_SHADER_ERROR_D3DBC_INVALID_REGISTER_TYPE, + "dcl instruction with register type %u.", semantic->resource.reg.reg.type); + d3dbc->failed = true; return; } - switch (expr->op) + switch (semantic->resource_type) { - case HLSL_OP1_ABS: - d3dbc_write_unary_op(d3dbc, D3DSIO_ABS, &instr->reg, &arg1->reg, 0, 0); - break; - - case HLSL_OP1_DSX: - d3dbc_write_unary_op(d3dbc, D3DSIO_DSX, &instr->reg, &arg1->reg, 0, 0); - break; - - case HLSL_OP1_DSY: - d3dbc_write_unary_op(d3dbc, D3DSIO_DSY, &instr->reg, &arg1->reg, 0, 0); - break; - - case HLSL_OP1_EXP2: - d3dbc_write_per_component_unary_op(d3dbc, instr, D3DSIO_EXP); - break; - - case HLSL_OP1_LOG2: - d3dbc_write_per_component_unary_op(d3dbc, instr, D3DSIO_LOG); - break; - - case HLSL_OP1_NEG: - d3dbc_write_unary_op(d3dbc, D3DSIO_MOV, &instr->reg, &arg1->reg, D3DSPSM_NEG, 0); + case VKD3D_SHADER_RESOURCE_TEXTURE_2D: + d3dbc_write_vsir_sampler_dcl(d3dbc, reg_id, VKD3D_SM1_RESOURCE_TEXTURE_2D); break; - case HLSL_OP1_SAT: - d3dbc_write_unary_op(d3dbc, D3DSIO_MOV, &instr->reg, &arg1->reg, 0, D3DSPDM_SATURATE); + case VKD3D_SHADER_RESOURCE_TEXTURE_CUBE: + d3dbc_write_vsir_sampler_dcl(d3dbc, reg_id, VKD3D_SM1_RESOURCE_TEXTURE_CUBE); break; - case HLSL_OP1_RCP: - d3dbc_write_per_component_unary_op(d3dbc, instr, D3DSIO_RCP); - break; - - case HLSL_OP1_RSQ: - d3dbc_write_per_component_unary_op(d3dbc, instr, D3DSIO_RSQ); - break; - - case HLSL_OP1_COS_REDUCED: - case HLSL_OP1_SIN_REDUCED: - d3dbc_write_sincos(d3dbc, expr->op, &instr->reg, &arg1->reg); - break; - - case HLSL_OP2_ADD: - d3dbc_write_binary_op(d3dbc, D3DSIO_ADD, &instr->reg, &arg1->reg, &arg2->reg); - break; - - case HLSL_OP2_MAX: - d3dbc_write_binary_op(d3dbc, D3DSIO_MAX, &instr->reg, &arg1->reg, &arg2->reg); - break; - - case HLSL_OP2_MIN: - d3dbc_write_binary_op(d3dbc, D3DSIO_MIN, &instr->reg, &arg1->reg, &arg2->reg); - break; - - case HLSL_OP2_MUL: - d3dbc_write_binary_op(d3dbc, D3DSIO_MUL, &instr->reg, &arg1->reg, &arg2->reg); - break; - - case HLSL_OP1_FRACT: - d3dbc_write_unary_op(d3dbc, D3DSIO_FRC, &instr->reg, &arg1->reg, D3DSPSM_NONE, 0); - break; - - case HLSL_OP2_DOT: - switch (arg1->data_type->dimx) - { - case 4: - d3dbc_write_dot(d3dbc, D3DSIO_DP4, &instr->reg, &arg1->reg, &arg2->reg); - break; - - case 3: - d3dbc_write_dot(d3dbc, D3DSIO_DP3, &instr->reg, &arg1->reg, &arg2->reg); - break; - - default: - vkd3d_unreachable(); - } - break; - - case HLSL_OP2_LOGIC_AND: - d3dbc_write_binary_op(d3dbc, D3DSIO_MIN, &instr->reg, &arg1->reg, &arg2->reg); - break; - - case HLSL_OP2_LOGIC_OR: - d3dbc_write_binary_op(d3dbc, D3DSIO_MAX, &instr->reg, &arg1->reg, &arg2->reg); - break; - - case HLSL_OP2_SLT: - if (version->type == VKD3D_SHADER_TYPE_PIXEL) - hlsl_fixme(ctx, &instr->loc, "Lower SLT instructions for pixel shaders."); - d3dbc_write_binary_op(d3dbc, D3DSIO_SLT, &instr->reg, &arg1->reg, &arg2->reg); - break; - - case HLSL_OP3_CMP: - if (version->type == VKD3D_SHADER_TYPE_VERTEX) - hlsl_fixme(ctx, &instr->loc, "Lower CMP instructions for vertex shaders."); - d3dbc_write_ternary_op(d3dbc, D3DSIO_CMP, &instr->reg, &arg1->reg, &arg2->reg, &arg3->reg); - break; - - case HLSL_OP3_DP2ADD: - d3dbc_write_dp2add(d3dbc, &instr->reg, &arg1->reg, &arg2->reg, &arg3->reg); - break; - - case HLSL_OP3_MAD: - d3dbc_write_ternary_op(d3dbc, D3DSIO_MAD, &instr->reg, &arg1->reg, &arg2->reg, &arg3->reg); + case VKD3D_SHADER_RESOURCE_TEXTURE_3D: + d3dbc_write_vsir_sampler_dcl(d3dbc, reg_id, VKD3D_SM1_RESOURCE_TEXTURE_3D); break; default: - hlsl_fixme(ctx, &instr->loc, "SM1 \"%s\" expression.", debug_hlsl_expr_op(expr->op)); - break; - } -} - -static void d3dbc_write_block(struct d3dbc_compiler *d3dbc, const struct hlsl_block *block); - -static void d3dbc_write_if(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr) -{ - const struct hlsl_ir_if *iff = hlsl_ir_if(instr); - const struct hlsl_ir_node *condition; - struct sm1_instruction sm1_ifc, sm1_else, sm1_endif; - - condition = iff->condition.node; - VKD3D_ASSERT(condition->data_type->dimx == 1 && condition->data_type->dimy == 1); - - sm1_ifc = (struct sm1_instruction) - { - .opcode = D3DSIO_IFC, - .flags = VKD3D_SHADER_REL_OP_NE, /* Make it a "if_ne" instruction. */ - - .srcs[0].type = VKD3DSPR_TEMP, - .srcs[0].swizzle = hlsl_swizzle_from_writemask(condition->reg.writemask), - .srcs[0].reg = condition->reg.id, - .srcs[0].mod = 0, - - .srcs[1].type = VKD3DSPR_TEMP, - .srcs[1].swizzle = hlsl_swizzle_from_writemask(condition->reg.writemask), - .srcs[1].reg = condition->reg.id, - .srcs[1].mod = D3DSPSM_NEG, - - .src_count = 2, - }; - d3dbc_write_instruction(d3dbc, &sm1_ifc); - d3dbc_write_block(d3dbc, &iff->then_block); - - if (!list_empty(&iff->else_block.instrs)) - { - sm1_else = (struct sm1_instruction){.opcode = D3DSIO_ELSE}; - d3dbc_write_instruction(d3dbc, &sm1_else); - d3dbc_write_block(d3dbc, &iff->else_block); + vkd3d_shader_error(d3dbc->message_context, &ins->location, VKD3D_SHADER_ERROR_D3DBC_INVALID_RESOURCE_TYPE, + "dcl instruction with resource_type %u.", semantic->resource_type); + d3dbc->failed = true; + return; } - - sm1_endif = (struct sm1_instruction){.opcode = D3DSIO_ENDIF}; - d3dbc_write_instruction(d3dbc, &sm1_endif); } -static void d3dbc_write_jump(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr) +static const struct vkd3d_sm1_opcode_info *shader_sm1_get_opcode_info_from_vsir_instruction( + struct d3dbc_compiler *d3dbc, const struct vkd3d_shader_instruction *ins) { - const struct hlsl_ir_jump *jump = hlsl_ir_jump(instr); + const struct vkd3d_sm1_opcode_info *info; - switch (jump->type) + if (!(info = shader_sm1_get_opcode_info_from_vsir(d3dbc, ins->opcode))) { - case HLSL_IR_JUMP_DISCARD_NEG: - { - struct hlsl_reg *reg = &jump->condition.node->reg; - - struct sm1_instruction sm1_instr = - { - .opcode = D3DSIO_TEXKILL, - - .dst.type = VKD3DSPR_TEMP, - .dst.reg = reg->id, - .dst.writemask = reg->writemask, - .has_dst = 1, - }; - - d3dbc_write_instruction(d3dbc, &sm1_instr); - break; - } - - default: - hlsl_fixme(d3dbc->ctx, &jump->node.loc, "Jump type %s.", hlsl_jump_type_to_string(jump->type)); + vkd3d_shader_error(d3dbc->message_context, &ins->location, VKD3D_SHADER_ERROR_D3DBC_INVALID_OPCODE, + "Opcode %#x not supported for shader profile.", ins->opcode); + d3dbc->failed = true; + return NULL; } -} - -static void d3dbc_write_load(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr) -{ - const struct hlsl_ir_load *load = hlsl_ir_load(instr); - struct hlsl_ctx *ctx = d3dbc->ctx; - const struct hlsl_reg reg = hlsl_reg_from_deref(ctx, &load->src); - struct sm1_instruction sm1_instr = - { - .opcode = D3DSIO_MOV, - .dst.type = VKD3DSPR_TEMP, - .dst.reg = instr->reg.id, - .dst.writemask = instr->reg.writemask, - .has_dst = 1, - - .srcs[0].type = VKD3DSPR_TEMP, - .srcs[0].reg = reg.id, - .srcs[0].swizzle = hlsl_swizzle_from_writemask(reg.writemask), - .src_count = 1, - }; - - VKD3D_ASSERT(instr->reg.allocated); - - if (load->src.var->is_uniform) + if (ins->dst_count != info->dst_count) { - VKD3D_ASSERT(reg.allocated); - sm1_instr.srcs[0].type = VKD3DSPR_CONST; + vkd3d_shader_error(d3dbc->message_context, &ins->location, VKD3D_SHADER_ERROR_D3DBC_INVALID_REGISTER_COUNT, + "Invalid destination count %u for vsir instruction %#x (expected %u).", + ins->dst_count, ins->opcode, info->dst_count); + d3dbc->failed = true; + return NULL; } - else if (load->src.var->is_input_semantic) + if (ins->src_count != info->src_count) { - if (!hlsl_sm1_register_from_semantic(&d3dbc->program->shader_version, load->src.var->semantic.name, - load->src.var->semantic.index, false, &sm1_instr.srcs[0].type, &sm1_instr.srcs[0].reg)) - { - VKD3D_ASSERT(reg.allocated); - sm1_instr.srcs[0].type = VKD3DSPR_INPUT; - sm1_instr.srcs[0].reg = reg.id; - } - else - sm1_instr.srcs[0].swizzle = hlsl_swizzle_from_writemask((1 << load->src.var->data_type->dimx) - 1); + vkd3d_shader_error(d3dbc->message_context, &ins->location, VKD3D_SHADER_ERROR_D3DBC_INVALID_REGISTER_COUNT, + "Invalid source count %u for vsir instruction %#x (expected %u).", + ins->src_count, ins->opcode, info->src_count); + d3dbc->failed = true; + return NULL; } - sm1_map_src_swizzle(&sm1_instr.srcs[0], sm1_instr.dst.writemask); - d3dbc_write_instruction(d3dbc, &sm1_instr); + return info; } -static void d3dbc_write_resource_load(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr) +static void d3dbc_write_vsir_simple_instruction(struct d3dbc_compiler *d3dbc, + const struct vkd3d_shader_instruction *ins) { - const struct hlsl_ir_resource_load *load = hlsl_ir_resource_load(instr); - struct hlsl_ir_node *coords = load->coords.node; - struct hlsl_ir_node *ddx = load->ddx.node; - struct hlsl_ir_node *ddy = load->ddy.node; - unsigned int sampler_offset, reg_id; - struct hlsl_ctx *ctx = d3dbc->ctx; - struct sm1_instruction sm1_instr; + struct sm1_instruction instr = {0}; + const struct vkd3d_sm1_opcode_info *info; - sampler_offset = hlsl_offset_from_deref_safe(ctx, &load->resource); - reg_id = load->resource.var->regs[HLSL_REGSET_SAMPLERS].index + sampler_offset; + if (!(info = shader_sm1_get_opcode_info_from_vsir_instruction(d3dbc, ins))) + return; - sm1_instr = (struct sm1_instruction) - { - .dst.type = VKD3DSPR_TEMP, - .dst.reg = instr->reg.id, - .dst.writemask = instr->reg.writemask, - .has_dst = 1, + instr.opcode = info->sm1_opcode; + instr.flags = ins->flags; + instr.has_dst = info->dst_count; + instr.src_count = info->src_count; - .srcs[0].type = VKD3DSPR_TEMP, - .srcs[0].reg = coords->reg.id, - .srcs[0].swizzle = hlsl_swizzle_from_writemask(coords->reg.writemask), + if (instr.has_dst) + sm1_dst_reg_from_vsir(d3dbc, &ins->dst[0], &instr.dst, &ins->location); + for (unsigned int i = 0; i < instr.src_count; ++i) + sm1_src_reg_from_vsir(d3dbc, &ins->src[i], &instr.srcs[i], &ins->location); - .srcs[1].type = VKD3DSPR_COMBINED_SAMPLER, - .srcs[1].reg = reg_id, - .srcs[1].swizzle = hlsl_swizzle_from_writemask(VKD3DSP_WRITEMASK_ALL), + d3dbc_write_instruction(d3dbc, &instr); +} - .src_count = 2, - }; +static void d3dbc_write_vsir_instruction(struct d3dbc_compiler *d3dbc, const struct vkd3d_shader_instruction *ins) +{ + uint32_t writemask; - switch (load->load_type) + switch (ins->opcode) { - case HLSL_RESOURCE_SAMPLE: - sm1_instr.opcode = D3DSIO_TEX; + case VKD3DSIH_DEF: + d3dbc_write_vsir_def(d3dbc, ins); break; - case HLSL_RESOURCE_SAMPLE_PROJ: - sm1_instr.opcode = D3DSIO_TEX; - sm1_instr.opcode |= VKD3DSI_TEXLD_PROJECT << VKD3D_SM1_INSTRUCTION_FLAGS_SHIFT; + case VKD3DSIH_DCL: + d3dbc_write_vsir_dcl(d3dbc, ins); break; - case HLSL_RESOURCE_SAMPLE_LOD_BIAS: - sm1_instr.opcode = D3DSIO_TEX; - sm1_instr.opcode |= VKD3DSI_TEXLD_BIAS << VKD3D_SM1_INSTRUCTION_FLAGS_SHIFT; + case VKD3DSIH_ABS: + case VKD3DSIH_ADD: + case VKD3DSIH_CMP: + case VKD3DSIH_DP2ADD: + case VKD3DSIH_DP3: + case VKD3DSIH_DP4: + case VKD3DSIH_DSX: + case VKD3DSIH_DSY: + case VKD3DSIH_ELSE: + case VKD3DSIH_ENDIF: + case VKD3DSIH_FRC: + case VKD3DSIH_IFC: + case VKD3DSIH_MAD: + case VKD3DSIH_MAX: + case VKD3DSIH_MIN: + case VKD3DSIH_MOV: + case VKD3DSIH_MUL: + case VKD3DSIH_SINCOS: + case VKD3DSIH_SLT: + case VKD3DSIH_TEX: + case VKD3DSIH_TEXKILL: + case VKD3DSIH_TEXLDD: + d3dbc_write_vsir_simple_instruction(d3dbc, ins); break; - case HLSL_RESOURCE_SAMPLE_GRAD: - sm1_instr.opcode = D3DSIO_TEXLDD; - - sm1_instr.srcs[2].type = VKD3DSPR_TEMP; - sm1_instr.srcs[2].reg = ddx->reg.id; - sm1_instr.srcs[2].swizzle = hlsl_swizzle_from_writemask(ddx->reg.writemask); - - sm1_instr.srcs[3].type = VKD3DSPR_TEMP; - sm1_instr.srcs[3].reg = ddy->reg.id; - sm1_instr.srcs[3].swizzle = hlsl_swizzle_from_writemask(ddy->reg.writemask); - - sm1_instr.src_count += 2; + case VKD3DSIH_EXP: + case VKD3DSIH_LOG: + case VKD3DSIH_RCP: + case VKD3DSIH_RSQ: + writemask = ins->dst->write_mask; + if (writemask != VKD3DSP_WRITEMASK_0 && writemask != VKD3DSP_WRITEMASK_1 + && writemask != VKD3DSP_WRITEMASK_2 && writemask != VKD3DSP_WRITEMASK_3) + { + vkd3d_shader_error(d3dbc->message_context, &ins->location, + VKD3D_SHADER_ERROR_D3DBC_INVALID_WRITEMASK, + "writemask %#x for vsir instruction with opcode %#x is not single component.", + writemask, ins->opcode); + d3dbc->failed = true; + } + d3dbc_write_vsir_simple_instruction(d3dbc, ins); break; default: - hlsl_fixme(ctx, &instr->loc, "Resource load type %u.", load->load_type); - return; + vkd3d_shader_error(d3dbc->message_context, &ins->location, VKD3D_SHADER_ERROR_D3DBC_INVALID_OPCODE, + "vsir instruction with opcode %#x.", ins->opcode); + d3dbc->failed = true; + break; } - - VKD3D_ASSERT(instr->reg.allocated); - - d3dbc_write_instruction(d3dbc, &sm1_instr); } -static void d3dbc_write_store(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr) +static void d3dbc_write_semantic_dcl(struct d3dbc_compiler *d3dbc, + const struct signature_element *element, bool output) { const struct vkd3d_shader_version *version = &d3dbc->program->shader_version; - const struct hlsl_ir_store *store = hlsl_ir_store(instr); - struct hlsl_ctx *ctx = d3dbc->ctx; - const struct hlsl_reg reg = hlsl_reg_from_deref(ctx, &store->lhs); - const struct hlsl_ir_node *rhs = store->rhs.node; - struct sm1_instruction sm1_instr = - { - .opcode = D3DSIO_MOV, - - .dst.type = VKD3DSPR_TEMP, - .dst.reg = reg.id, - .dst.writemask = hlsl_combine_writemasks(reg.writemask, store->writemask), - .has_dst = 1, - - .srcs[0].type = VKD3DSPR_TEMP, - .srcs[0].reg = rhs->reg.id, - .srcs[0].swizzle = hlsl_swizzle_from_writemask(rhs->reg.writemask), - .src_count = 1, - }; + struct vkd3d_bytecode_buffer *buffer = &d3dbc->buffer; + struct sm1_dst_register reg = {0}; + enum vkd3d_decl_usage usage; + uint32_t token, usage_idx; + bool ret; - if (store->lhs.var->is_output_semantic) + if (sm1_register_from_semantic_name(version, element->semantic_name, + element->semantic_index, output, ®.type, ®.reg)) { - if (version->type == VKD3D_SHADER_TYPE_PIXEL && version->major == 1) - { - sm1_instr.dst.type = VKD3DSPR_TEMP; - sm1_instr.dst.reg = 0; - } - else if (!hlsl_sm1_register_from_semantic(&d3dbc->program->shader_version, store->lhs.var->semantic.name, - store->lhs.var->semantic.index, true, &sm1_instr.dst.type, &sm1_instr.dst.reg)) - { - VKD3D_ASSERT(reg.allocated); - sm1_instr.dst.type = VKD3DSPR_OUTPUT; - sm1_instr.dst.reg = reg.id; - } - else - sm1_instr.dst.writemask = (1u << store->lhs.var->data_type->dimx) - 1; + usage = 0; + usage_idx = 0; } else - VKD3D_ASSERT(reg.allocated); - - sm1_map_src_swizzle(&sm1_instr.srcs[0], sm1_instr.dst.writemask); - d3dbc_write_instruction(d3dbc, &sm1_instr); -} - -static void d3dbc_write_swizzle(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr) -{ - const struct hlsl_ir_swizzle *swizzle = hlsl_ir_swizzle(instr); - const struct hlsl_ir_node *val = swizzle->val.node; - struct sm1_instruction sm1_instr = { - .opcode = D3DSIO_MOV, + ret = sm1_usage_from_semantic_name(element->semantic_name, element->semantic_index, &usage, &usage_idx); + VKD3D_ASSERT(ret); + reg.type = output ? VKD3DSPR_OUTPUT : VKD3DSPR_INPUT; + reg.reg = element->register_index; + } - .dst.type = VKD3DSPR_TEMP, - .dst.reg = instr->reg.id, - .dst.writemask = instr->reg.writemask, - .has_dst = 1, + token = VKD3D_SM1_OP_DCL; + if (version->major > 1) + token |= 2 << VKD3D_SM1_INSTRUCTION_LENGTH_SHIFT; + put_u32(buffer, token); - .srcs[0].type = VKD3DSPR_TEMP, - .srcs[0].reg = val->reg.id, - .srcs[0].swizzle = hlsl_combine_swizzles(hlsl_swizzle_from_writemask(val->reg.writemask), - swizzle->swizzle, instr->data_type->dimx), - .src_count = 1, - }; + token = (1u << 31); + token |= usage << VKD3D_SM1_DCL_USAGE_SHIFT; + token |= usage_idx << VKD3D_SM1_DCL_USAGE_INDEX_SHIFT; + put_u32(buffer, token); - VKD3D_ASSERT(instr->reg.allocated); - VKD3D_ASSERT(val->reg.allocated); - sm1_map_src_swizzle(&sm1_instr.srcs[0], sm1_instr.dst.writemask); - d3dbc_write_instruction(d3dbc, &sm1_instr); + reg.writemask = element->mask; + write_sm1_dst_register(buffer, ®); } -static void d3dbc_write_block(struct d3dbc_compiler *d3dbc, const struct hlsl_block *block) +static void d3dbc_write_semantic_dcls(struct d3dbc_compiler *d3dbc) { - struct hlsl_ctx *ctx = d3dbc->ctx; - const struct hlsl_ir_node *instr; - - LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry) - { - if (instr->data_type) - { - if (instr->data_type->class != HLSL_CLASS_SCALAR && instr->data_type->class != HLSL_CLASS_VECTOR) - { - hlsl_fixme(ctx, &instr->loc, "Class %#x should have been lowered or removed.", instr->data_type->class); - break; - } - } - - switch (instr->type) - { - case HLSL_IR_CALL: - vkd3d_unreachable(); - - case HLSL_IR_CONSTANT: - d3dbc_write_constant(d3dbc, instr); - break; - - case HLSL_IR_EXPR: - d3dbc_write_expr(d3dbc, instr); - break; - - case HLSL_IR_IF: - if (hlsl_version_ge(ctx, 2, 1)) - d3dbc_write_if(d3dbc, instr); - else - hlsl_fixme(ctx, &instr->loc, "Flatten \"if\" conditionals branches."); - break; - - case HLSL_IR_JUMP: - d3dbc_write_jump(d3dbc, instr); - break; + struct vsir_program *program = d3dbc->program; + const struct vkd3d_shader_version *version; + bool write_in = false, write_out = false; - case HLSL_IR_LOAD: - d3dbc_write_load(d3dbc, instr); - break; + version = &program->shader_version; + if (version->type == VKD3D_SHADER_TYPE_PIXEL && version->major >= 2) + write_in = true; + else if (version->type == VKD3D_SHADER_TYPE_VERTEX && version->major == 3) + write_in = write_out = true; + else if (version->type == VKD3D_SHADER_TYPE_VERTEX && version->major < 3) + write_in = true; - case HLSL_IR_RESOURCE_LOAD: - d3dbc_write_resource_load(d3dbc, instr); - break; + if (write_in) + { + for (unsigned int i = 0; i < program->input_signature.element_count; ++i) + d3dbc_write_semantic_dcl(d3dbc, &program->input_signature.elements[i], false); + } - case HLSL_IR_STORE: - d3dbc_write_store(d3dbc, instr); - break; + if (write_out) + { + for (unsigned int i = 0; i < program->output_signature.element_count; ++i) + d3dbc_write_semantic_dcl(d3dbc, &program->output_signature.elements[i], true); + } +} - case HLSL_IR_SWIZZLE: - d3dbc_write_swizzle(d3dbc, instr); - break; +static void d3dbc_write_program_instructions(struct d3dbc_compiler *d3dbc) +{ + struct vsir_program *program = d3dbc->program; + unsigned int i; - default: - hlsl_fixme(ctx, &instr->loc, "Instruction type %s.", hlsl_node_type_to_string(instr->type)); - } - } + for (i = 0; i < program->instructions.count; ++i) + d3dbc_write_vsir_instruction(d3dbc, &program->instructions.elements[i]); } -/* OBJECTIVE: Stop relying on ctx and entry_func on this function, receiving - * data from the other parameters instead, so it can be removed as an argument - * and be declared in vkd3d_shader_private.h and used without relying on HLSL - * IR structs. */ int d3dbc_compile(struct vsir_program *program, uint64_t config_flags, const struct vkd3d_shader_compile_info *compile_info, const struct vkd3d_shader_code *ctab, - struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context, - struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) + struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context) { const struct vkd3d_shader_version *version = &program->shader_version; struct d3dbc_compiler d3dbc = {0}; struct vkd3d_bytecode_buffer *buffer = &d3dbc.buffer; + int result; - d3dbc.ctx = ctx; d3dbc.program = program; d3dbc.message_context = message_context; + switch (version->type) + { + case VKD3D_SHADER_TYPE_VERTEX: + d3dbc.opcode_table = vs_opcode_table; + break; + + case VKD3D_SHADER_TYPE_PIXEL: + d3dbc.opcode_table = ps_opcode_table; + break; + + default: + vkd3d_shader_error(message_context, NULL, VKD3D_SHADER_ERROR_D3DBC_INVALID_PROFILE, + "Invalid shader type %u.", version->type); + return VKD3D_ERROR_INVALID_SHADER; + } put_u32(buffer, sm1_version(version->type, version->major, version->minor)); bytecode_put_bytes(buffer, ctab->code, ctab->size); - d3dbc_write_constant_defs(&d3dbc); d3dbc_write_semantic_dcls(&d3dbc); - d3dbc_write_sampler_dcls(&d3dbc); - d3dbc_write_block(&d3dbc, &entry_func->body); + d3dbc_write_program_instructions(&d3dbc); - put_u32(buffer, D3DSIO_END); + put_u32(buffer, VKD3D_SM1_OP_END); + result = VKD3D_OK; if (buffer->status) - ctx->result = buffer->status; + result = buffer->status; + if (d3dbc.failed) + result = VKD3D_ERROR_INVALID_SHADER; - if (!ctx->result) + if (!result) { out->code = buffer->data; out->size = buffer->size; @@ -2925,5 +2419,5 @@ int d3dbc_compile(struct vsir_program *program, uint64_t config_flags, { vkd3d_free(buffer->data); } - return ctx->result; + return result; } diff --git a/libs/vkd3d/libs/vkd3d-shader/dxbc.c b/libs/vkd3d/libs/vkd3d-shader/dxbc.c index 184788dc57e..f6ac8e0829e 100644 --- a/libs/vkd3d/libs/vkd3d-shader/dxbc.c +++ b/libs/vkd3d/libs/vkd3d-shader/dxbc.c @@ -20,6 +20,19 @@ #include "vkd3d_shader_private.h" +#define DXBC_CHECKSUM_SKIP_BYTE_COUNT 20 + +static void compute_dxbc_checksum(const void *dxbc, size_t size, uint32_t checksum[4]) +{ + const uint8_t *ptr = dxbc; + + VKD3D_ASSERT(size > DXBC_CHECKSUM_SKIP_BYTE_COUNT); + ptr += DXBC_CHECKSUM_SKIP_BYTE_COUNT; + size -= DXBC_CHECKSUM_SKIP_BYTE_COUNT; + + vkd3d_compute_md5(ptr, size, checksum, VKD3D_MD5_DXBC); +} + void dxbc_writer_init(struct dxbc_writer *dxbc) { memset(dxbc, 0, sizeof(*dxbc)); @@ -72,7 +85,7 @@ int vkd3d_shader_serialize_dxbc(size_t section_count, const struct vkd3d_shader_ } set_u32(&buffer, size_position, bytecode_get_size(&buffer)); - vkd3d_compute_dxbc_checksum(buffer.data, buffer.size, checksum); + compute_dxbc_checksum(buffer.data, buffer.size, checksum); for (i = 0; i < 4; ++i) set_u32(&buffer, checksum_position + i * sizeof(uint32_t), checksum[i]); @@ -188,7 +201,7 @@ static int parse_dxbc(const struct vkd3d_shader_code *dxbc, struct vkd3d_shader_ checksum[3] = read_u32(&ptr); if (!(flags & VKD3D_SHADER_PARSE_DXBC_IGNORE_CHECKSUM)) { - vkd3d_compute_dxbc_checksum(data, data_size, calculated_checksum); + compute_dxbc_checksum(data, data_size, calculated_checksum); if (memcmp(checksum, calculated_checksum, sizeof(checksum))) { WARN("Checksum {0x%08x, 0x%08x, 0x%08x, 0x%08x} does not match " @@ -406,8 +419,6 @@ static int shader_parse_signature(const struct vkd3d_shader_dxbc_section_desc *s const char *name; uint32_t mask; - e[i].sort_index = i; - if (has_stream_index) e[i].stream_index = read_u32(&ptr); else @@ -1488,7 +1499,7 @@ int vkd3d_shader_serialize_root_signature(const struct vkd3d_shader_versioned_ro dxbc->code = context.buffer.data; dxbc->size = total_size; - vkd3d_compute_dxbc_checksum(dxbc->code, dxbc->size, checksum); + compute_dxbc_checksum(dxbc->code, dxbc->size, checksum); for (i = 0; i < 4; ++i) set_u32(&context.buffer, (i + 1) * sizeof(uint32_t), checksum[i]); diff --git a/libs/vkd3d/libs/vkd3d-shader/dxil.c b/libs/vkd3d/libs/vkd3d-shader/dxil.c index 4a17c62292b..f9f44f34bcf 100644 --- a/libs/vkd3d/libs/vkd3d-shader/dxil.c +++ b/libs/vkd3d/libs/vkd3d-shader/dxil.c @@ -3888,7 +3888,7 @@ static void sm6_parser_init_signature(struct sm6_parser *sm6, const struct shade if (is_control_point) { if (reg_type == VKD3DSPR_OUTPUT) - param->reg.idx[count].rel_addr = instruction_array_create_outpointid_param(&sm6->p.program->instructions); + param->reg.idx[count].rel_addr = vsir_program_create_outpointid_param(sm6->p.program); param->reg.idx[count++].offset = 0; } @@ -4161,8 +4161,7 @@ static void sm6_parser_emit_atomicrmw(struct sm6_parser *sm6, const struct dxil_ dst_param_init(&dst_params[0]); dst_params[1].reg = ptr->u.reg; - /* The groupshared register has data type UAV when accessed. */ - dst_params[1].reg.data_type = VKD3D_DATA_UAV; + dst_params[1].reg.data_type = VKD3D_DATA_UNUSED; dst_params[1].reg.idx[1].rel_addr = NULL; dst_params[1].reg.idx[1].offset = ~0u; dst_params[1].reg.idx_count = 1; @@ -4175,6 +4174,7 @@ static enum vkd3d_shader_opcode map_binary_op(uint64_t code, const struct sm6_ty const struct sm6_type *type_b, struct sm6_parser *sm6) { bool is_int = sm6_type_is_bool_i16_i32_i64(type_a); + bool is_double = sm6_type_is_double(type_a); bool is_bool = sm6_type_is_bool(type_a); enum vkd3d_shader_opcode op; bool is_valid; @@ -4199,7 +4199,7 @@ static enum vkd3d_shader_opcode map_binary_op(uint64_t code, const struct sm6_ty case BINOP_ADD: case BINOP_SUB: /* NEG is applied later for subtraction. */ - op = is_int ? VKD3DSIH_IADD : VKD3DSIH_ADD; + op = is_int ? VKD3DSIH_IADD : (is_double ? VKD3DSIH_DADD : VKD3DSIH_ADD); is_valid = !is_bool; break; case BINOP_AND: @@ -4215,7 +4215,7 @@ static enum vkd3d_shader_opcode map_binary_op(uint64_t code, const struct sm6_ty is_valid = is_int && !is_bool; break; case BINOP_MUL: - op = is_int ? VKD3DSIH_UMUL : VKD3DSIH_MUL; + op = is_int ? VKD3DSIH_UMUL : (is_double ? VKD3DSIH_DMUL : VKD3DSIH_MUL); is_valid = !is_bool; break; case BINOP_OR: @@ -4223,7 +4223,7 @@ static enum vkd3d_shader_opcode map_binary_op(uint64_t code, const struct sm6_ty is_valid = is_int; break; case BINOP_SDIV: - op = is_int ? VKD3DSIH_IDIV : VKD3DSIH_DIV; + op = is_int ? VKD3DSIH_IDIV : (is_double ? VKD3DSIH_DDIV : VKD3DSIH_DIV); is_valid = !is_bool; break; case BINOP_SREM: @@ -4865,8 +4865,10 @@ static void sm6_parser_emit_dx_cbuffer_load(struct sm6_parser *sm6, enum dx_intr if (!(src_param = instruction_src_params_alloc(ins, 1, sm6))) return; src_param_init_vector_from_reg(src_param, &buffer->u.handle.reg); + /* Differently from other descriptors, constant buffers require an + * additional index, used to index within the constant buffer itself. */ + src_param->reg.idx_count = 3; register_index_address_init(&src_param->reg.idx[2], operands[1], sm6); - VKD3D_ASSERT(src_param->reg.idx_count == 3); type = sm6_type_get_scalar_type(dst->type, 0); VKD3D_ASSERT(type); @@ -4965,8 +4967,7 @@ static void sm6_parser_emit_dx_create_handle(struct sm6_parser *sm6, enum dx_int dst->u.handle.d = d; reg = &dst->u.handle.reg; - /* Set idx_count to 3 for use with load/store instructions. */ - vsir_register_init(reg, d->reg_type, d->reg_data_type, 3); + vsir_register_init(reg, d->reg_type, d->reg_data_type, 2); reg->dimension = VSIR_DIMENSION_VEC4; reg->idx[0].offset = id; register_index_address_init(®->idx[1], operands[2], sm6); @@ -5871,6 +5872,8 @@ static void sm6_parser_emit_dx_store_output(struct sm6_parser *sm6, enum dx_intr return; } e = &signature->elements[row_index]; + if (!e->sysval_semantic) + column_index += vsir_write_mask_get_component_idx(e->mask); if (column_index >= VKD3D_VEC4_SIZE) { @@ -6861,7 +6864,6 @@ static void sm6_parser_emit_cmpxchg(struct sm6_parser *sm6, const struct dxil_re struct vkd3d_shader_dst_param *dst_params; struct vkd3d_shader_src_param *src_params; const struct sm6_value *ptr, *cmp, *new; - const struct sm6_type *type; unsigned int i = 0; bool is_volatile; uint64_t code; @@ -6887,9 +6889,10 @@ static void sm6_parser_emit_cmpxchg(struct sm6_parser *sm6, const struct dxil_re return; } - type = ptr->type->u.pointer.type; - cmp = sm6_parser_get_value_by_ref(sm6, record, type, &i); - new = sm6_parser_get_value_by_ref(sm6, record, type, &i); + /* Forward-referenced comparands are stored as value/type pairs, even + * though in principle we could use the destination type. */ + cmp = sm6_parser_get_value_by_ref(sm6, record, NULL, &i); + new = sm6_parser_get_value_by_ref(sm6, record, ptr->type->u.pointer.type, &i); if (!cmp || !new) return; @@ -7287,7 +7290,6 @@ static void sm6_parser_emit_store(struct sm6_parser *sm6, const struct dxil_reco unsigned int i = 0, alignment, operand_count; struct vkd3d_shader_src_param *src_params; struct vkd3d_shader_dst_param *dst_param; - const struct sm6_type *pointee_type; const struct sm6_value *ptr, *src; uint64_t alignment_code; @@ -7299,13 +7301,14 @@ static void sm6_parser_emit_store(struct sm6_parser *sm6, const struct dxil_reco return; } - pointee_type = ptr->type->u.pointer.type; - if (!(src = sm6_parser_get_value_by_ref(sm6, record, pointee_type, &i))) + /* Forward-referenced sources are stored as value/type pairs, even + * though in principle we could use the destination type. */ + if (!(src = sm6_parser_get_value_by_ref(sm6, record, NULL, &i))) return; if (!sm6_value_validate_is_numeric(src, sm6)) return; - if (pointee_type != src->type) + if (ptr->type->u.pointer.type != src->type) { WARN("Type mismatch.\n"); vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_TYPE_MISMATCH, @@ -8908,7 +8911,7 @@ static enum vkd3d_result sm6_parser_resources_load_srv(struct sm6_parser *sm6, d->resource_type = ins->resource_type; d->kind = kind; d->reg_type = VKD3DSPR_RESOURCE; - d->reg_data_type = (ins->resource_type == VKD3D_SHADER_RESOURCE_BUFFER) ? VKD3D_DATA_UINT : VKD3D_DATA_RESOURCE; + d->reg_data_type = VKD3D_DATA_UNUSED; d->resource_data_type = (ins->opcode == VKD3DSIH_DCL) ? ins->declaration.semantic.resource_data_type[0] : VKD3D_DATA_UNUSED; @@ -8982,7 +8985,7 @@ static enum vkd3d_result sm6_parser_resources_load_uav(struct sm6_parser *sm6, d->resource_type = ins->resource_type; d->kind = values[0]; d->reg_type = VKD3DSPR_UAV; - d->reg_data_type = (ins->resource_type == VKD3D_SHADER_RESOURCE_BUFFER) ? VKD3D_DATA_UINT : VKD3D_DATA_UAV; + d->reg_data_type = VKD3D_DATA_UNUSED; d->resource_data_type = (ins->opcode == VKD3DSIH_DCL_UAV_TYPED) ? ins->declaration.semantic.resource_data_type[0] : VKD3D_DATA_UNUSED; @@ -9346,7 +9349,7 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const WARN("Signature element is not a node.\n"); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_SIGNATURE, "Signature element is not a metadata node."); - return VKD3D_ERROR_INVALID_SHADER; + goto invalid; } element_node = m->u.node; @@ -9355,7 +9358,7 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const WARN("Invalid operand count %u.\n", element_node->operand_count); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_SIGNATURE, "Invalid signature element operand count %u.", element_node->operand_count); - return VKD3D_ERROR_INVALID_SHADER; + goto invalid; } if (element_node->operand_count > 11) { @@ -9374,7 +9377,7 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const WARN("Failed to load uint value at index %u.\n", j); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_SIGNATURE, "Signature element value at index %u is not an integer.", j); - return VKD3D_ERROR_INVALID_SHADER; + goto invalid; } } @@ -9385,7 +9388,7 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const FIXME("Unsupported element id %u not equal to its index %u.\n", values[0], i); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_SIGNATURE, "A non-sequential and non-zero-based element id is not supported."); - return VKD3D_ERROR_INVALID_SHADER; + goto invalid; } if (!sm6_metadata_value_is_string(element_node->operands[1])) @@ -9393,7 +9396,7 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const WARN("Element name is not a string.\n"); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_SIGNATURE, "Signature element name is not a metadata string."); - return VKD3D_ERROR_INVALID_SHADER; + goto invalid; } e->semantic_name = element_node->operands[1]->u.string_value; @@ -9407,7 +9410,7 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const WARN("Unhandled semantic kind %u.\n", j); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_SIGNATURE, "DXIL semantic kind %u is unhandled.", j); - return VKD3D_ERROR_INVALID_SHADER; + goto invalid; } if ((e->interpolation_mode = values[5]) >= VKD3DSIM_COUNT) @@ -9415,7 +9418,7 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const WARN("Unhandled interpolation mode %u.\n", e->interpolation_mode); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_SIGNATURE, "Interpolation mode %u is unhandled.", e->interpolation_mode); - return VKD3D_ERROR_INVALID_SHADER; + goto invalid; } e->register_count = values[6]; @@ -9430,7 +9433,7 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const WARN("Unhandled I/O register semantic kind %u.\n", j); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_SIGNATURE, "DXIL semantic kind %u is unhandled for an I/O register.", j); - return VKD3D_ERROR_INVALID_SHADER; + goto invalid; } } else if (e->register_index > MAX_REG_OUTPUT || e->register_count > MAX_REG_OUTPUT - e->register_index) @@ -9439,7 +9442,7 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_SIGNATURE, "A signature element starting row of %u with count %u is invalid.", e->register_index, e->register_count); - return VKD3D_ERROR_INVALID_SHADER; + goto invalid; } index = values[9]; @@ -9448,7 +9451,7 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const WARN("Invalid column start %u with count %u.\n", index, column_count); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_SIGNATURE, "A signature element starting column %u with count %u is invalid.", index, column_count); - return VKD3D_ERROR_INVALID_SHADER; + goto invalid; } e->mask = vkd3d_write_mask_from_component_count(column_count); @@ -9471,7 +9474,7 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const WARN("Semantic index list is not a node.\n"); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_SIGNATURE, "Signature element semantic index list is not a metadata node."); - return VKD3D_ERROR_INVALID_SHADER; + goto invalid; } element_node = m->u.node; @@ -9516,6 +9519,10 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const s->element_count = operand_count; return VKD3D_OK; + +invalid: + vkd3d_free(elements); + return VKD3D_ERROR_INVALID_SHADER; } static enum vkd3d_result sm6_parser_signatures_init(struct sm6_parser *sm6, const struct sm6_metadata_value *m, @@ -9633,6 +9640,7 @@ static enum vkd3d_result sm6_parser_emit_thread_group(struct sm6_parser *sm6, co ins->declaration.thread_group_size.x = group_sizes[0]; ins->declaration.thread_group_size.y = group_sizes[1]; ins->declaration.thread_group_size.z = group_sizes[2]; + sm6->p.program->thread_group_size = ins->declaration.thread_group_size; return VKD3D_OK; } @@ -10303,12 +10311,28 @@ static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, struct vsir_pro /* Estimate instruction count to avoid reallocation in most shaders. */ count = max(token_count, 400) - 400; - if (!vsir_program_init(program, compile_info, &version, (count + (count >> 2)) / 2u + 10)) + if (!vsir_program_init(program, compile_info, &version, + (count + (count >> 2)) / 2u + 10, VSIR_CF_BLOCKS, true)) return VKD3D_ERROR_OUT_OF_MEMORY; vkd3d_shader_parser_init(&sm6->p, program, message_context, compile_info->source_name); sm6->ptr = &sm6->start[1]; sm6->bitpos = 2; + switch (program->shader_version.type) + { + case VKD3D_SHADER_TYPE_HULL: + case VKD3D_SHADER_TYPE_DOMAIN: + break; + + default: + if (program->patch_constant_signature.element_count != 0) + { + WARN("The patch constant signature only makes sense for Hull and Domain Shaders, ignoring it.\n"); + shader_signature_cleanup(&program->patch_constant_signature); + } + break; + } + input_signature = &program->input_signature; output_signature = &program->output_signature; patch_constant_signature = &program->patch_constant_signature; @@ -10526,9 +10550,16 @@ static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, struct vsir_pro dxil_block_destroy(&sm6->root_block); + if (sm6->p.failed) + { + ret = VKD3D_ERROR_INVALID_SHADER; + goto fail; + } + return VKD3D_OK; fail: + sm6_parser_cleanup(sm6); vsir_program_cleanup(program); return ret; } @@ -10570,18 +10601,25 @@ int dxil_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t co free_dxbc_shader_desc(&dxbc_desc); vkd3d_free(byte_code); - if (!sm6.p.failed && ret >= 0) - ret = vkd3d_shader_parser_validate(&sm6.p, config_flags); - - if (sm6.p.failed && ret >= 0) - ret = VKD3D_ERROR_INVALID_SHADER; - - sm6_parser_cleanup(&sm6); if (ret < 0) { WARN("Failed to parse shader.\n"); return ret; } - return ret; + if ((ret = vkd3d_shader_parser_validate(&sm6.p, config_flags)) < 0) + { + WARN("Failed to validate shader after parsing, ret %d.\n", ret); + + if (TRACE_ON()) + vsir_program_trace(program); + + sm6_parser_cleanup(&sm6); + vsir_program_cleanup(program); + return ret; + } + + sm6_parser_cleanup(&sm6); + + return VKD3D_OK; } diff --git a/libs/vkd3d/libs/vkd3d-shader/fx.c b/libs/vkd3d/libs/vkd3d-shader/fx.c index a1d1fd6572f..f3f7a2c765c 100644 --- a/libs/vkd3d/libs/vkd3d-shader/fx.c +++ b/libs/vkd3d/libs/vkd3d-shader/fx.c @@ -25,6 +25,17 @@ static inline size_t put_u32_unaligned(struct vkd3d_bytecode_buffer *buffer, uin return bytecode_put_bytes_unaligned(buffer, &value, sizeof(value)); } +struct fx_4_binary_type +{ + uint32_t name; + uint32_t class; + uint32_t element_count; + uint32_t unpacked_size; + uint32_t stride; + uint32_t packed_size; + uint32_t typeinfo; +}; + struct string_entry { struct rb_entry entry; @@ -38,6 +49,7 @@ struct type_entry struct list entry; const char *name; uint32_t elements_count; + uint32_t modifiers; uint32_t offset; }; @@ -181,6 +193,7 @@ struct fx_write_context struct vkd3d_bytecode_buffer unstructured; struct vkd3d_bytecode_buffer structured; + struct vkd3d_bytecode_buffer objects; struct rb_tree strings; struct list types; @@ -223,11 +236,6 @@ static void set_status(struct fx_write_context *fx, int status) fx->status = status; } -static bool has_annotations(const struct hlsl_ir_var *var) -{ - return var->annotations && !list_empty(&var->annotations->vars); -} - static uint32_t write_string(const char *string, struct fx_write_context *fx) { return fx->ops->write_string(string, fx); @@ -278,9 +286,9 @@ static void write_fx_4_state_block(struct hlsl_ir_var *var, unsigned int block_i static uint32_t write_type(const struct hlsl_type *type, struct fx_write_context *fx) { + unsigned int elements_count, modifiers; const struct hlsl_type *element_type; struct type_entry *type_entry; - unsigned int elements_count; const char *name; VKD3D_ASSERT(fx->ctx->profile->major_version >= 4); @@ -297,6 +305,7 @@ static uint32_t write_type(const struct hlsl_type *type, struct fx_write_context } name = get_fx_4_type_name(element_type); + modifiers = element_type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK; LIST_FOR_EACH_ENTRY(type_entry, &fx->types, struct type_entry, entry) { @@ -306,6 +315,9 @@ static uint32_t write_type(const struct hlsl_type *type, struct fx_write_context if (type_entry->elements_count != elements_count) continue; + if (type_entry->modifiers != modifiers) + continue; + return type_entry->offset; } @@ -315,6 +327,7 @@ static uint32_t write_type(const struct hlsl_type *type, struct fx_write_context type_entry->offset = write_fx_4_type(type, fx); type_entry->name = name; type_entry->elements_count = elements_count; + type_entry->modifiers = modifiers; list_add_tail(&fx->types, &type_entry->entry); @@ -429,17 +442,26 @@ static void write_fx_4_pass(struct hlsl_ir_var *var, struct fx_write_context *fx write_fx_4_state_block(var, 0, count_offset, fx); } +static void write_fx_2_annotations(struct hlsl_ir_var *var, uint32_t count_offset, struct fx_write_context *fx) +{ + struct vkd3d_bytecode_buffer *buffer = &fx->structured; + uint32_t count; + + count = write_annotations(var->annotations, fx); + set_u32(buffer, count_offset, count); +} + static void write_fx_2_pass(struct hlsl_ir_var *var, struct fx_write_context *fx) { struct vkd3d_bytecode_buffer *buffer = &fx->structured; - uint32_t name_offset; + uint32_t name_offset, annotation_count_offset; name_offset = write_string(var->name, fx); put_u32(buffer, name_offset); - put_u32(buffer, 0); /* Annotation count. */ + annotation_count_offset = put_u32(buffer, 0); put_u32(buffer, 0); /* Assignment count. */ - /* TODO: annotations */ + write_fx_2_annotations(var, annotation_count_offset, fx); /* TODO: assignments */ if (var->state_block_count && var->state_blocks[0]->count) @@ -459,25 +481,48 @@ static uint32_t get_fx_4_type_size(const struct hlsl_type *type) return type->reg_size[HLSL_REGSET_NUMERIC] * sizeof(float) * elements_count; } -static const uint32_t fx_4_numeric_base_type[] = +enum fx_4_type_constants { - [HLSL_TYPE_FLOAT] = 1, - [HLSL_TYPE_INT ] = 2, - [HLSL_TYPE_UINT ] = 3, - [HLSL_TYPE_BOOL ] = 4, + /* Numeric types encoding */ + FX_4_NUMERIC_TYPE_FLOAT = 1, + FX_4_NUMERIC_TYPE_INT = 2, + FX_4_NUMERIC_TYPE_UINT = 3, + FX_4_NUMERIC_TYPE_BOOL = 4, + + FX_4_NUMERIC_CLASS_SCALAR = 1, + FX_4_NUMERIC_CLASS_VECTOR = 2, + FX_4_NUMERIC_CLASS_MATRIX = 3, + + FX_4_NUMERIC_BASE_TYPE_SHIFT = 3, + FX_4_NUMERIC_ROWS_SHIFT = 8, + FX_4_NUMERIC_COLUMNS_SHIFT = 11, + FX_4_NUMERIC_COLUMN_MAJOR_MASK = 0x4000, + + /* Object types */ + FX_4_OBJECT_TYPE_STRING = 1, + + /* Types */ + FX_4_TYPE_CLASS_NUMERIC = 1, + FX_4_TYPE_CLASS_OBJECT = 2, + FX_4_TYPE_CLASS_STRUCT = 3, +}; + +static const uint32_t fx_4_numeric_base_types[] = +{ + [HLSL_TYPE_HALF ] = FX_4_NUMERIC_TYPE_FLOAT, + [HLSL_TYPE_FLOAT] = FX_4_NUMERIC_TYPE_FLOAT, + [HLSL_TYPE_INT ] = FX_4_NUMERIC_TYPE_INT, + [HLSL_TYPE_UINT ] = FX_4_NUMERIC_TYPE_UINT, + [HLSL_TYPE_BOOL ] = FX_4_NUMERIC_TYPE_BOOL, }; static uint32_t get_fx_4_numeric_type_description(const struct hlsl_type *type, struct fx_write_context *fx) { - static const unsigned int NUMERIC_BASE_TYPE_SHIFT = 3; - static const unsigned int NUMERIC_ROWS_SHIFT = 8; - static const unsigned int NUMERIC_COLUMNS_SHIFT = 11; - static const unsigned int NUMERIC_COLUMN_MAJOR_MASK = 0x4000; static const uint32_t numeric_type_class[] = { - [HLSL_CLASS_SCALAR] = 1, - [HLSL_CLASS_VECTOR] = 2, - [HLSL_CLASS_MATRIX] = 3, + [HLSL_CLASS_SCALAR] = FX_4_NUMERIC_CLASS_SCALAR, + [HLSL_CLASS_VECTOR] = FX_4_NUMERIC_CLASS_VECTOR, + [HLSL_CLASS_MATRIX] = FX_4_NUMERIC_CLASS_MATRIX, }; struct hlsl_ctx *ctx = fx->ctx; uint32_t value = 0; @@ -497,20 +542,21 @@ static uint32_t get_fx_4_numeric_type_description(const struct hlsl_type *type, switch (type->e.numeric.type) { case HLSL_TYPE_FLOAT: + case HLSL_TYPE_HALF: case HLSL_TYPE_INT: case HLSL_TYPE_UINT: case HLSL_TYPE_BOOL: - value |= (fx_4_numeric_base_type[type->e.numeric.type] << NUMERIC_BASE_TYPE_SHIFT); + value |= (fx_4_numeric_base_types[type->e.numeric.type] << FX_4_NUMERIC_BASE_TYPE_SHIFT); break; default: hlsl_fixme(ctx, &ctx->location, "Not implemented for base type %u.", type->e.numeric.type); return 0; } - value |= (type->dimy & 0x7) << NUMERIC_ROWS_SHIFT; - value |= (type->dimx & 0x7) << NUMERIC_COLUMNS_SHIFT; + value |= (type->dimy & 0x7) << FX_4_NUMERIC_ROWS_SHIFT; + value |= (type->dimx & 0x7) << FX_4_NUMERIC_COLUMNS_SHIFT; if (type->modifiers & HLSL_MODIFIER_COLUMN_MAJOR) - value |= NUMERIC_COLUMN_MAJOR_MASK; + value |= FX_4_NUMERIC_COLUMN_MAJOR_MASK; return value; } @@ -564,17 +610,32 @@ static const char * get_fx_4_type_name(const struct hlsl_type *type) case HLSL_CLASS_VERTEX_SHADER: return "VertexShader"; + case HLSL_CLASS_GEOMETRY_SHADER: + return "GeometryShader"; + case HLSL_CLASS_PIXEL_SHADER: return "PixelShader"; case HLSL_CLASS_STRING: return "String"; + case HLSL_CLASS_SCALAR: + case HLSL_CLASS_VECTOR: + case HLSL_CLASS_MATRIX: + if (type->e.numeric.type == HLSL_TYPE_HALF) + return "float"; + /* fall-through */ default: return type->name; } } +static bool is_numeric_fx_4_type(const struct hlsl_type *type) +{ + type = hlsl_get_multiarray_element_type(type); + return type->class == HLSL_CLASS_STRUCT || hlsl_is_numeric_type(type); +} + static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_context *fx) { struct field_offsets @@ -584,48 +645,46 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co uint32_t offset; uint32_t type; }; - uint32_t name_offset, offset, total_size, packed_size, stride, numeric_desc; + uint32_t name_offset, offset, unpacked_size, packed_size, stride, numeric_desc; struct vkd3d_bytecode_buffer *buffer = &fx->unstructured; struct field_offsets *field_offsets = NULL; + const struct hlsl_type *element_type; struct hlsl_ctx *ctx = fx->ctx; uint32_t elements_count = 0; const char *name; size_t i; - /* Resolve arrays to element type and number of elements. */ if (type->class == HLSL_CLASS_ARRAY) - { elements_count = hlsl_get_multiarray_size(type); - type = hlsl_get_multiarray_element_type(type); - } + element_type = hlsl_get_multiarray_element_type(type); - name = get_fx_4_type_name(type); + name = get_fx_4_type_name(element_type); name_offset = write_string(name, fx); - if (type->class == HLSL_CLASS_STRUCT) + if (element_type->class == HLSL_CLASS_STRUCT) { - if (!(field_offsets = hlsl_calloc(ctx, type->e.record.field_count, sizeof(*field_offsets)))) + if (!(field_offsets = hlsl_calloc(ctx, element_type->e.record.field_count, sizeof(*field_offsets)))) return 0; - for (i = 0; i < type->e.record.field_count; ++i) + for (i = 0; i < element_type->e.record.field_count; ++i) { - const struct hlsl_struct_field *field = &type->e.record.fields[i]; + const struct hlsl_struct_field *field = &element_type->e.record.fields[i]; field_offsets[i].name = write_string(field->name, fx); field_offsets[i].semantic = write_string(field->semantic.raw_name, fx); - field_offsets[i].offset = field->reg_offset[HLSL_REGSET_NUMERIC]; + field_offsets[i].offset = field->reg_offset[HLSL_REGSET_NUMERIC] * sizeof(float); field_offsets[i].type = write_type(field->type, fx); } } offset = put_u32_unaligned(buffer, name_offset); - switch (type->class) + switch (element_type->class) { case HLSL_CLASS_SCALAR: case HLSL_CLASS_VECTOR: case HLSL_CLASS_MATRIX: - put_u32_unaligned(buffer, 1); + put_u32_unaligned(buffer, FX_4_TYPE_CLASS_NUMERIC); break; case HLSL_CLASS_DEPTH_STENCIL_STATE: @@ -643,15 +702,16 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co case HLSL_CLASS_GEOMETRY_SHADER: case HLSL_CLASS_BLEND_STATE: case HLSL_CLASS_STRING: - put_u32_unaligned(buffer, 2); + put_u32_unaligned(buffer, FX_4_TYPE_CLASS_OBJECT); break; case HLSL_CLASS_STRUCT: - put_u32_unaligned(buffer, 3); + put_u32_unaligned(buffer, FX_4_TYPE_CLASS_STRUCT); break; case HLSL_CLASS_ARRAY: case HLSL_CLASS_EFFECT_GROUP: + case HLSL_CLASS_ERROR: case HLSL_CLASS_PASS: case HLSL_CLASS_TECHNIQUE: case HLSL_CLASS_CONSTANT_BUFFER: @@ -659,32 +719,32 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co vkd3d_unreachable(); case HLSL_CLASS_VOID: - FIXME("Writing type class %u is not implemented.\n", type->class); + FIXME("Writing type class %u is not implemented.\n", element_type->class); set_status(fx, VKD3D_ERROR_NOT_IMPLEMENTED); return 0; } /* Structures can only contain numeric fields, this is validated during variable declaration. */ - total_size = stride = type->reg_size[HLSL_REGSET_NUMERIC] * sizeof(float); + unpacked_size = type->reg_size[HLSL_REGSET_NUMERIC] * sizeof(float); + packed_size = 0; - if (type->class == HLSL_CLASS_STRUCT || hlsl_is_numeric_type(type)) - packed_size = hlsl_type_component_count(type) * sizeof(float); + if (is_numeric_fx_4_type(element_type)) + packed_size = hlsl_type_component_count(element_type) * sizeof(float); if (elements_count) - { - total_size *= elements_count; packed_size *= elements_count; - } + + stride = element_type->reg_size[HLSL_REGSET_NUMERIC] * sizeof(float); stride = align(stride, 4 * sizeof(float)); put_u32_unaligned(buffer, elements_count); - put_u32_unaligned(buffer, total_size); + put_u32_unaligned(buffer, unpacked_size); put_u32_unaligned(buffer, stride); put_u32_unaligned(buffer, packed_size); - if (type->class == HLSL_CLASS_STRUCT) + if (element_type->class == HLSL_CLASS_STRUCT) { - put_u32_unaligned(buffer, type->e.record.field_count); - for (i = 0; i < type->e.record.field_count; ++i) + put_u32_unaligned(buffer, element_type->e.record.field_count); + for (i = 0; i < element_type->e.record.field_count; ++i) { const struct field_offsets *field = &field_offsets[i]; @@ -700,7 +760,7 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co put_u32_unaligned(buffer, 0); /* Interface count */ } } - else if (type->class == HLSL_CLASS_TEXTURE) + else if (element_type->class == HLSL_CLASS_TEXTURE) { static const uint32_t texture_type[] = { @@ -716,13 +776,13 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co [HLSL_SAMPLER_DIM_CUBEARRAY] = 23, }; - put_u32_unaligned(buffer, texture_type[type->sampler_dim]); + put_u32_unaligned(buffer, texture_type[element_type->sampler_dim]); } - else if (type->class == HLSL_CLASS_SAMPLER) + else if (element_type->class == HLSL_CLASS_SAMPLER) { put_u32_unaligned(buffer, 21); } - else if (type->class == HLSL_CLASS_UAV) + else if (element_type->class == HLSL_CLASS_UAV) { static const uint32_t uav_type[] = { @@ -735,60 +795,60 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co [HLSL_SAMPLER_DIM_STRUCTURED_BUFFER] = 40, }; - put_u32_unaligned(buffer, uav_type[type->sampler_dim]); + put_u32_unaligned(buffer, uav_type[element_type->sampler_dim]); } - else if (type->class == HLSL_CLASS_DEPTH_STENCIL_VIEW) + else if (element_type->class == HLSL_CLASS_DEPTH_STENCIL_VIEW) { put_u32_unaligned(buffer, 20); } - else if (type->class == HLSL_CLASS_RENDER_TARGET_VIEW) + else if (element_type->class == HLSL_CLASS_RENDER_TARGET_VIEW) { put_u32_unaligned(buffer, 19); } - else if (type->class == HLSL_CLASS_PIXEL_SHADER) + else if (element_type->class == HLSL_CLASS_PIXEL_SHADER) { put_u32_unaligned(buffer, 5); } - else if (type->class == HLSL_CLASS_VERTEX_SHADER) + else if (element_type->class == HLSL_CLASS_VERTEX_SHADER) { put_u32_unaligned(buffer, 6); } - else if (type->class == HLSL_CLASS_RASTERIZER_STATE) + else if (element_type->class == HLSL_CLASS_RASTERIZER_STATE) { put_u32_unaligned(buffer, 4); } - else if (type->class == HLSL_CLASS_DEPTH_STENCIL_STATE) + else if (element_type->class == HLSL_CLASS_DEPTH_STENCIL_STATE) { put_u32_unaligned(buffer, 3); } - else if (type->class == HLSL_CLASS_BLEND_STATE) + else if (element_type->class == HLSL_CLASS_BLEND_STATE) { put_u32_unaligned(buffer, 2); } - else if (type->class == HLSL_CLASS_STRING) + else if (element_type->class == HLSL_CLASS_STRING) { - put_u32_unaligned(buffer, 1); + put_u32_unaligned(buffer, FX_4_OBJECT_TYPE_STRING); } - else if (hlsl_is_numeric_type(type)) + else if (hlsl_is_numeric_type(element_type)) { - numeric_desc = get_fx_4_numeric_type_description(type, fx); + numeric_desc = get_fx_4_numeric_type_description(element_type, fx); put_u32_unaligned(buffer, numeric_desc); } - else if (type->class == HLSL_CLASS_COMPUTE_SHADER) + else if (element_type->class == HLSL_CLASS_COMPUTE_SHADER) { put_u32_unaligned(buffer, 28); } - else if (type->class == HLSL_CLASS_HULL_SHADER) + else if (element_type->class == HLSL_CLASS_HULL_SHADER) { put_u32_unaligned(buffer, 29); } - else if (type->class == HLSL_CLASS_DOMAIN_SHADER) + else if (element_type->class == HLSL_CLASS_DOMAIN_SHADER) { put_u32_unaligned(buffer, 30); } else { - FIXME("Type %u is not supported.\n", type->class); + FIXME("Type %u is not supported.\n", element_type->class); set_status(fx, VKD3D_ERROR_NOT_IMPLEMENTED); } @@ -963,16 +1023,16 @@ static uint32_t write_fx_2_parameter(const struct hlsl_type *type, const char *n static void write_fx_2_technique(struct hlsl_ir_var *var, struct fx_write_context *fx) { + uint32_t name_offset, pass_count_offset, annotation_count_offset, count = 0; struct vkd3d_bytecode_buffer *buffer = &fx->structured; - uint32_t name_offset, count_offset, count = 0; struct hlsl_ir_var *pass; name_offset = write_string(var->name, fx); put_u32(buffer, name_offset); - put_u32(buffer, 0); /* Annotation count. */ - count_offset = put_u32(buffer, 0); /* Pass count. */ + annotation_count_offset = put_u32(buffer, 0); + pass_count_offset = put_u32(buffer, 0); - /* FIXME: annotations */ + write_fx_2_annotations(var, annotation_count_offset, fx); LIST_FOR_EACH_ENTRY(pass, &var->scope->vars, struct hlsl_ir_var, scope_entry) { @@ -980,47 +1040,128 @@ static void write_fx_2_technique(struct hlsl_ir_var *var, struct fx_write_contex ++count; } - set_u32(buffer, count_offset, count); + set_u32(buffer, pass_count_offset, count); } -static uint32_t get_fx_2_type_size(const struct hlsl_type *type) +static uint32_t write_fx_2_default_value(struct hlsl_type *value_type, struct hlsl_default_value *value, + struct fx_write_context *fx) { - uint32_t size = 0, elements_count; - size_t i; + const struct hlsl_type *type = hlsl_get_multiarray_element_type(value_type); + uint32_t elements_count = hlsl_get_multiarray_size(value_type), i, j; + struct vkd3d_bytecode_buffer *buffer = &fx->unstructured; + struct hlsl_ctx *ctx = fx->ctx; + uint32_t offset = buffer->size; + unsigned int comp_count; - if (type->class == HLSL_CLASS_ARRAY) + if (!value) + return 0; + + comp_count = hlsl_type_component_count(type); + + for (i = 0; i < elements_count; ++i) { - elements_count = hlsl_get_multiarray_size(type); - type = hlsl_get_multiarray_element_type(type); - return get_fx_2_type_size(type) * elements_count; + switch (type->class) + { + case HLSL_CLASS_SCALAR: + case HLSL_CLASS_VECTOR: + case HLSL_CLASS_MATRIX: + { + switch (type->e.numeric.type) + { + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_HALF: + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: + case HLSL_TYPE_BOOL: + + for (j = 0; j < comp_count; ++j) + { + put_u32(buffer, value->number.u); + value++; + } + break; + default: + hlsl_fixme(ctx, &ctx->location, "Writing default values for numeric type %u is not implemented.", + type->e.numeric.type); + } + + break; + } + case HLSL_CLASS_STRUCT: + { + struct hlsl_struct_field *fields = type->e.record.fields; + + for (j = 0; j < type->e.record.field_count; ++j) + { + write_fx_2_default_value(fields[i].type, value, fx); + value += hlsl_type_component_count(fields[i].type); + } + break; + } + default: + hlsl_fixme(ctx, &ctx->location, "Writing default values for class %u is not implemented.", type->class); + } } - else if (type->class == HLSL_CLASS_STRUCT) + + return offset; +} + +static uint32_t write_fx_2_object_initializer(const struct hlsl_ir_var *var, struct fx_write_context *fx) +{ + const struct hlsl_type *type = hlsl_get_multiarray_element_type(var->data_type); + unsigned int i, elements_count = hlsl_get_multiarray_size(var->data_type); + struct vkd3d_bytecode_buffer *buffer = &fx->objects; + uint32_t offset = fx->unstructured.size, id, size; + struct hlsl_ctx *ctx = fx->ctx; + const void *data; + + for (i = 0; i < elements_count; ++i) { - for (i = 0; i < type->e.record.field_count; ++i) + if (type->class == HLSL_CLASS_SAMPLER) { - const struct hlsl_struct_field *field = &type->e.record.fields[i]; - size += get_fx_2_type_size(field->type); + hlsl_fixme(ctx, &var->loc, "Writing fx_2_0 sampler objects initializers is not implemented."); } + else + { + switch (type->class) + { + case HLSL_CLASS_STRING: + { + const char *string = var->default_values[i].string ? var->default_values[i].string : ""; + size = strlen(string) + 1; + data = string; + break; + } + case HLSL_CLASS_TEXTURE: + size = 0; + break; + case HLSL_CLASS_PIXEL_SHADER: + case HLSL_CLASS_VERTEX_SHADER: + size = 0; + hlsl_fixme(ctx, &var->loc, "Writing fx_2_0 shader objects initializers is not implemented."); + break; + default: + vkd3d_unreachable(); + } + id = fx->object_variable_count++; - return size; + put_u32(&fx->unstructured, id); + + put_u32(buffer, id); + put_u32(buffer, size); + if (size) + bytecode_put_bytes(buffer, data, size); + } } - return type->dimx * type->dimy * sizeof(float); + return offset; } static uint32_t write_fx_2_initial_value(const struct hlsl_ir_var *var, struct fx_write_context *fx) { - struct vkd3d_bytecode_buffer *buffer = &fx->unstructured; - const struct hlsl_type *type = var->data_type; - uint32_t offset, size, elements_count = 1; - - size = get_fx_2_type_size(type); - - if (type->class == HLSL_CLASS_ARRAY) - { - elements_count = hlsl_get_multiarray_size(type); - type = hlsl_get_multiarray_element_type(type); - } + const struct hlsl_type *type = hlsl_get_multiarray_element_type(var->data_type); + struct hlsl_ctx *ctx = fx->ctx; + uint32_t offset; /* Note that struct fields must all be numeric; * this was validated in check_invalid_object_fields(). */ @@ -1030,21 +1171,20 @@ static uint32_t write_fx_2_initial_value(const struct hlsl_ir_var *var, struct f case HLSL_CLASS_VECTOR: case HLSL_CLASS_MATRIX: case HLSL_CLASS_STRUCT: - /* FIXME: write actual initial value */ - if (var->default_values) - hlsl_fixme(fx->ctx, &var->loc, "Write default values.\n"); - - offset = put_u32(buffer, 0); + offset = write_fx_2_default_value(var->data_type, var->default_values, fx); + break; - for (uint32_t i = 1; i < size / sizeof(uint32_t); ++i) - put_u32(buffer, 0); + case HLSL_CLASS_SAMPLER: + case HLSL_CLASS_TEXTURE: + case HLSL_CLASS_STRING: + case HLSL_CLASS_PIXEL_SHADER: + case HLSL_CLASS_VERTEX_SHADER: + offset = write_fx_2_object_initializer(var, fx); break; default: - /* Objects are given sequential ids. */ - offset = put_u32(buffer, fx->object_variable_count++); - for (uint32_t i = 1; i < elements_count; ++i) - put_u32(buffer, fx->object_variable_count++); + offset = 0; + hlsl_fixme(ctx, &var->loc, "Writing initializer not implemented for parameter class %#x.", type->class); break; } @@ -1070,6 +1210,7 @@ static bool is_type_supported_fx_2(struct hlsl_ctx *ctx, const struct hlsl_type return is_type_supported_fx_2(ctx, type->e.array.type, loc); case HLSL_CLASS_TEXTURE: + case HLSL_CLASS_SAMPLER: switch (type->sampler_dim) { case HLSL_SAMPLER_DIM_1D: @@ -1083,9 +1224,10 @@ static bool is_type_supported_fx_2(struct hlsl_ctx *ctx, const struct hlsl_type } break; - case HLSL_CLASS_PIXEL_SHADER: - case HLSL_CLASS_SAMPLER: case HLSL_CLASS_STRING: + return true; + + case HLSL_CLASS_PIXEL_SHADER: case HLSL_CLASS_VERTEX_SHADER: hlsl_fixme(ctx, loc, "Write fx 2.0 parameter class %#x.", type->class); return false; @@ -1104,6 +1246,7 @@ static bool is_type_supported_fx_2(struct hlsl_ctx *ctx, const struct hlsl_type return false; case HLSL_CLASS_EFFECT_GROUP: + case HLSL_CLASS_ERROR: case HLSL_CLASS_PASS: case HLSL_CLASS_TECHNIQUE: case HLSL_CLASS_CONSTANT_BUFFER: @@ -1117,8 +1260,8 @@ static bool is_type_supported_fx_2(struct hlsl_ctx *ctx, const struct hlsl_type static void write_fx_2_parameters(struct fx_write_context *fx) { + uint32_t desc_offset, value_offset, flags, annotation_count_offset; struct vkd3d_bytecode_buffer *buffer = &fx->structured; - uint32_t desc_offset, value_offset, flags; struct hlsl_ctx *ctx = fx->ctx; struct hlsl_ir_var *var; enum fx_2_parameter_flags @@ -1138,23 +1281,35 @@ static void write_fx_2_parameters(struct fx_write_context *fx) if (var->storage_modifiers & HLSL_STORAGE_SHARED) flags |= IS_SHARED; - put_u32(buffer, desc_offset); /* Parameter description */ - put_u32(buffer, value_offset); /* Value */ - put_u32(buffer, flags); /* Flags */ + put_u32(buffer, desc_offset); + put_u32(buffer, value_offset); + put_u32(buffer, flags); - put_u32(buffer, 0); /* Annotations count */ - if (has_annotations(var)) - hlsl_fixme(ctx, &ctx->location, "Writing annotations for parameters is not implemented."); + annotation_count_offset = put_u32(buffer, 0); + write_fx_2_annotations(var, annotation_count_offset, fx); ++fx->parameter_count; } } +static void write_fx_2_annotation(struct hlsl_ir_var *var, struct fx_write_context *fx) +{ + struct vkd3d_bytecode_buffer *buffer = &fx->structured; + uint32_t desc_offset, value_offset; + + desc_offset = write_fx_2_parameter(var->data_type, var->name, &var->semantic, fx); + value_offset = write_fx_2_initial_value(var, fx); + + put_u32(buffer, desc_offset); + put_u32(buffer, value_offset); +} + static const struct fx_write_context_ops fx_2_ops = { .write_string = write_fx_2_string, .write_technique = write_fx_2_technique, .write_pass = write_fx_2_pass, + .write_annotation = write_fx_2_annotation, }; static int hlsl_fx_2_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) @@ -1180,19 +1335,18 @@ static int hlsl_fx_2_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) object_count = put_u32(structured, 0); write_fx_2_parameters(&fx); - set_u32(structured, parameter_count, fx.parameter_count); - set_u32(structured, object_count, fx.object_variable_count); - write_techniques(ctx->globals, &fx); - set_u32(structured, technique_count, fx.technique_count); - set_u32(structured, shader_count, fx.shader_count); - - put_u32(structured, 0); /* String count */ + put_u32(structured, fx.object_variable_count - 1); put_u32(structured, 0); /* Resource count */ - /* TODO: strings */ + bytecode_put_bytes(structured, fx.objects.data, fx.objects.size); /* TODO: resources */ + set_u32(structured, parameter_count, fx.parameter_count); + set_u32(structured, object_count, fx.object_variable_count); + set_u32(structured, technique_count, fx.technique_count); + set_u32(structured, shader_count, fx.shader_count); + size = align(fx.unstructured.size, 4); set_u32(&buffer, offset, size); @@ -1201,6 +1355,7 @@ static int hlsl_fx_2_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) vkd3d_free(fx.unstructured.data); vkd3d_free(fx.structured.data); + vkd3d_free(fx.objects.data); if (!fx.technique_count) hlsl_error(ctx, &ctx->location, VKD3D_SHADER_ERROR_HLSL_MISSING_TECHNIQUE, "No techniques found."); @@ -1252,6 +1407,7 @@ static uint32_t write_fx_4_default_value(struct hlsl_type *value_type, struct hl switch (type->e.numeric.type) { case HLSL_TYPE_FLOAT: + case HLSL_TYPE_HALF: case HLSL_TYPE_INT: case HLSL_TYPE_UINT: case HLSL_TYPE_BOOL: @@ -1420,7 +1576,7 @@ static uint32_t write_fx_4_state_numeric_value(struct hlsl_ir_constant *value, s case HLSL_TYPE_INT: case HLSL_TYPE_UINT: case HLSL_TYPE_BOOL: - type = fx_4_numeric_base_type[data_type->e.numeric.type]; + type = fx_4_numeric_base_types[data_type->e.numeric.type]; break; default: type = 0; @@ -1438,11 +1594,14 @@ static uint32_t write_fx_4_state_numeric_value(struct hlsl_ir_constant *value, s static void write_fx_4_state_assignment(const struct hlsl_ir_var *var, struct hlsl_state_block_entry *entry, struct fx_write_context *fx) { - uint32_t value_offset = 0, assignment_type = 0, rhs_offset; - uint32_t type_offset; + uint32_t value_offset = 0, assignment_type = 0, rhs_offset, type_offset, offset; + struct vkd3d_bytecode_buffer *unstructured = &fx->unstructured; struct vkd3d_bytecode_buffer *buffer = &fx->structured; - struct hlsl_ctx *ctx = fx->ctx; struct hlsl_ir_node *value = entry->args->node; + struct hlsl_ctx *ctx = fx->ctx; + struct hlsl_ir_var *index_var; + struct hlsl_ir_constant *c; + struct hlsl_ir_load *load; put_u32(buffer, entry->name_id); put_u32(buffer, entry->lhs_index); @@ -1453,7 +1612,7 @@ static void write_fx_4_state_assignment(const struct hlsl_ir_var *var, struct hl { case HLSL_IR_CONSTANT: { - struct hlsl_ir_constant *c = hlsl_ir_constant(value); + c = hlsl_ir_constant(value); value_offset = write_fx_4_state_numeric_value(c, fx); assignment_type = 1; @@ -1461,15 +1620,71 @@ static void write_fx_4_state_assignment(const struct hlsl_ir_var *var, struct hl } case HLSL_IR_LOAD: { - struct hlsl_ir_load *l = hlsl_ir_load(value); + load = hlsl_ir_load(value); - if (l->src.path_len) + if (load->src.path_len) hlsl_fixme(ctx, &var->loc, "Indexed access in RHS values is not implemented."); - value_offset = write_fx_4_string(l->src.var->name, fx); + value_offset = write_fx_4_string(load->src.var->name, fx); assignment_type = 2; break; } + case HLSL_IR_INDEX: + { + struct hlsl_ir_index *index = hlsl_ir_index(value); + struct hlsl_ir_node *val = index->val.node; + struct hlsl_ir_node *idx = index->idx.node; + struct hlsl_type *type; + + if (val->type != HLSL_IR_LOAD) + { + hlsl_fixme(ctx, &var->loc, "Unexpected indexed RHS value type."); + break; + } + + load = hlsl_ir_load(val); + value_offset = write_fx_4_string(load->src.var->name, fx); + type = load->src.var->data_type; + + switch (idx->type) + { + case HLSL_IR_CONSTANT: + { + c = hlsl_ir_constant(idx); + value_offset = put_u32(unstructured, value_offset); + put_u32(unstructured, c->value.u[0].u); + assignment_type = 3; + + if (c->value.u[0].u >= type->e.array.elements_count) + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_OFFSET_OUT_OF_BOUNDS, + "Array index %u exceeds array size %u.", c->value.u[0].u, type->e.array.elements_count); + break; + } + + case HLSL_IR_LOAD: + { + load = hlsl_ir_load(idx); + index_var = load->src.var; + + /* Special case for uint index variables, for anything more complex use an expression. */ + if (hlsl_types_are_equal(index_var->data_type, hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT)) + && !load->src.path_len) + { + offset = write_fx_4_string(index_var->name, fx); + + value_offset = put_u32(unstructured, value_offset); + put_u32(unstructured, offset); + assignment_type = 4; + break; + } + } + /* fall through */ + + default: + hlsl_fixme(ctx, &var->loc, "Complex array index expressions in RHS values are not implemented."); + } + break; + } default: hlsl_fixme(ctx, &var->loc, "Unsupported assignment type for state %s.", entry->name); } @@ -2118,7 +2333,7 @@ static unsigned int decompose_fx_4_state_function_call(struct hlsl_ir_var *var, const struct function_component *comp = &components[i]; unsigned int arg_index = (i + 1) % entry->args_count; block->entries[entry_index + i] = clone_stateblock_entry(ctx, entry, comp->name, - comp->lhs_has_index, comp->lhs_index, arg_index); + comp->lhs_has_index, comp->lhs_index, true, arg_index); } hlsl_free_state_block_entry(entry); @@ -2126,7 +2341,7 @@ static unsigned int decompose_fx_4_state_function_call(struct hlsl_ir_var *var, } /* For some states assignment sets all of the elements. This behaviour is limited to certain states of BlendState - object, and only when fx_5_0 profile is used. */ + object, and only when fx_4_1 or fx_5_0 profile is used. */ static unsigned int decompose_fx_4_state_block_expand_array(struct hlsl_ir_var *var, struct hlsl_state_block *block, unsigned int entry_index, struct fx_write_context *fx) { @@ -2140,7 +2355,7 @@ static unsigned int decompose_fx_4_state_block_expand_array(struct hlsl_ir_var * if (type->class != HLSL_CLASS_BLEND_STATE) return 1; - if (ctx->profile->major_version != 5) + if (hlsl_version_lt(ctx, 4, 1)) return 1; if (entry->lhs_has_index) return 1; @@ -2164,7 +2379,7 @@ static unsigned int decompose_fx_4_state_block_expand_array(struct hlsl_ir_var * for (i = 1; i < array_size; ++i) { block->entries[entry_index + i] = clone_stateblock_entry(ctx, entry, - entry->name, true, i, 0); + entry->name, true, i, true, 0); } return array_size; @@ -2401,6 +2616,9 @@ static void write_fx_4_buffer(struct hlsl_buffer *b, struct fx_write_context *fx size = 0; LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) { + if (!is_numeric_fx_4_type(var->data_type)) + continue; + if (var->buffer != b) continue; @@ -2629,3 +2847,506 @@ int hlsl_emit_effect_binary(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) vkd3d_unreachable(); } } + +struct fx_parser +{ + const uint8_t *ptr, *start, *end; + struct vkd3d_shader_message_context *message_context; + struct vkd3d_string_buffer buffer; + unsigned int indent; + struct + { + const uint8_t *ptr; + const uint8_t *end; + uint32_t size; + } unstructured; + uint32_t buffer_count; + uint32_t object_count; + bool failed; +}; + +static uint32_t fx_parser_read_u32(struct fx_parser *parser) +{ + uint32_t ret; + + if ((parser->end - parser->ptr) < sizeof(uint32_t)) + { + parser->failed = true; + return 0; + } + + ret = *(uint32_t *)parser->ptr; + parser->ptr += sizeof(uint32_t); + + return ret; +} + +static void fx_parser_read_u32s(struct fx_parser *parser, void *dst, size_t size) +{ + uint32_t *ptr = dst; + size_t i; + + for (i = 0; i < size / sizeof(uint32_t); ++i) + ptr[i] = fx_parser_read_u32(parser); +} + +static void fx_parser_skip(struct fx_parser *parser, size_t size) +{ + if ((parser->end - parser->ptr) < size) + { + parser->ptr = parser->end; + parser->failed = true; + return; + } + parser->ptr += size; +} + +static void VKD3D_PRINTF_FUNC(3, 4) fx_parser_error(struct fx_parser *parser, enum vkd3d_shader_error error, + const char *format, ...) +{ + va_list args; + + va_start(args, format); + vkd3d_shader_verror(parser->message_context, NULL, error, format, args); + va_end(args); + + parser->failed = true; +} + +static int fx_2_parse(struct fx_parser *parser) +{ + fx_parser_error(parser, VKD3D_SHADER_ERROR_FX_NOT_IMPLEMENTED, "Parsing fx_2_0 binaries is not implemented.\n"); + + return -1; +} + +static void fx_parser_read_unstructured(struct fx_parser *parser, void *dst, uint32_t offset, size_t size) +{ + const uint8_t *ptr = parser->unstructured.ptr; + + memset(dst, 0, size); + if (offset >= parser->unstructured.size + || size > parser->unstructured.size - offset) + { + parser->failed = true; + return; + } + + ptr += offset; + memcpy(dst, ptr, size); +} + +static const char *fx_4_get_string(struct fx_parser *parser, uint32_t offset) +{ + const uint8_t *ptr = parser->unstructured.ptr; + const uint8_t *end = parser->unstructured.end; + + if (offset >= parser->unstructured.size) + { + parser->failed = true; + return ""; + } + + ptr += offset; + + while (ptr < end && *ptr) + ++ptr; + + if (*ptr) + { + parser->failed = true; + return ""; + } + + return (const char *)(parser->unstructured.ptr + offset); +} + +static void parse_fx_start_indent(struct fx_parser *parser) +{ + ++parser->indent; +} + +static void parse_fx_end_indent(struct fx_parser *parser) +{ + --parser->indent; +} + +static void parse_fx_print_indent(struct fx_parser *parser) +{ + vkd3d_string_buffer_printf(&parser->buffer, "%*s", 4 * parser->indent, ""); +} + +static void parse_fx_4_numeric_value(struct fx_parser *parser, uint32_t offset, + const struct fx_4_binary_type *type) +{ + unsigned int base_type, comp_count; + size_t i; + + base_type = (type->typeinfo >> FX_4_NUMERIC_BASE_TYPE_SHIFT) & 0xf; + + comp_count = type->packed_size / sizeof(uint32_t); + for (i = 0; i < comp_count; ++i) + { + union hlsl_constant_value_component value; + + fx_parser_read_unstructured(parser, &value, offset, sizeof(uint32_t)); + + if (base_type == FX_4_NUMERIC_TYPE_FLOAT) + vkd3d_string_buffer_printf(&parser->buffer, "%f", value.f); + else if (base_type == FX_4_NUMERIC_TYPE_INT) + vkd3d_string_buffer_printf(&parser->buffer, "%d", value.i); + else if (base_type == FX_4_NUMERIC_TYPE_UINT) + vkd3d_string_buffer_printf(&parser->buffer, "%u", value.u); + else if (base_type == FX_4_NUMERIC_TYPE_BOOL) + vkd3d_string_buffer_printf(&parser->buffer, "%s", value.u ? "true" : "false" ); + else + vkd3d_string_buffer_printf(&parser->buffer, "%#x", value.u); + + if (i < comp_count - 1) + vkd3d_string_buffer_printf(&parser->buffer, ", "); + + offset += sizeof(uint32_t); + } +} + +static void fx_4_parse_string_initializer(struct fx_parser *parser, uint32_t offset) +{ + const char *str = fx_4_get_string(parser, offset); + vkd3d_string_buffer_printf(&parser->buffer, "\"%s\"", str); +} + +static void fx_parse_fx_4_annotations(struct fx_parser *parser) +{ + struct fx_4_annotation + { + uint32_t name; + uint32_t type; + } var; + struct fx_4_binary_type type; + const char *name, *type_name; + uint32_t count, i, value; + + count = fx_parser_read_u32(parser); + + if (!count) + return; + + vkd3d_string_buffer_printf(&parser->buffer, "\n"); + parse_fx_print_indent(parser); + vkd3d_string_buffer_printf(&parser->buffer, "<\n"); + parse_fx_start_indent(parser); + + for (i = 0; i < count; ++i) + { + fx_parser_read_u32s(parser, &var, sizeof(var)); + fx_parser_read_unstructured(parser, &type, var.type, sizeof(type)); + + name = fx_4_get_string(parser, var.name); + type_name = fx_4_get_string(parser, type.name); + + parse_fx_print_indent(parser); + vkd3d_string_buffer_printf(&parser->buffer, "%s %s", type_name, name); + if (type.element_count) + vkd3d_string_buffer_printf(&parser->buffer, "[%u]", type.element_count); + vkd3d_string_buffer_printf(&parser->buffer, " = "); + if (type.element_count) + vkd3d_string_buffer_printf(&parser->buffer, "{ "); + + if (type.class == FX_4_TYPE_CLASS_NUMERIC) + { + value = fx_parser_read_u32(parser); + parse_fx_4_numeric_value(parser, value, &type); + } + else if (type.class == FX_4_TYPE_CLASS_OBJECT && type.typeinfo == FX_4_OBJECT_TYPE_STRING) + { + uint32_t element_count = max(type.element_count, 1); + + for (uint32_t j = 0; j < element_count; ++j) + { + value = fx_parser_read_u32(parser); + fx_4_parse_string_initializer(parser, value); + if (j < element_count - 1) + vkd3d_string_buffer_printf(&parser->buffer, ", "); + } + } + else + { + fx_parser_error(parser, VKD3D_SHADER_ERROR_FX_INVALID_DATA, + "Only numeric and string types are supported in annotations.\n"); + } + + if (type.element_count) + vkd3d_string_buffer_printf(&parser->buffer, " }"); + vkd3d_string_buffer_printf(&parser->buffer, ";\n"); + } + parse_fx_end_indent(parser); + + parse_fx_print_indent(parser); + vkd3d_string_buffer_printf(&parser->buffer, ">"); +} + + +static void fx_parse_fx_4_numeric_variables(struct fx_parser *parser, uint32_t count) +{ + struct fx_4_numeric_variable + { + uint32_t name; + uint32_t type; + uint32_t semantic; + uint32_t offset; + uint32_t value; + uint32_t flags; + } var; + const char *name, *semantic, *type_name; + struct fx_4_binary_type type; + uint32_t i; + + for (i = 0; i < count; ++i) + { + fx_parser_read_u32s(parser, &var, sizeof(var)); + fx_parser_read_unstructured(parser, &type, var.type, sizeof(type)); + + name = fx_4_get_string(parser, var.name); + type_name = fx_4_get_string(parser, type.name); + + vkd3d_string_buffer_printf(&parser->buffer, " %s %s", type_name, name); + if (type.element_count) + vkd3d_string_buffer_printf(&parser->buffer, "[%u]", type.element_count); + + if (var.semantic) + { + semantic = fx_4_get_string(parser, var.semantic); + vkd3d_string_buffer_printf(&parser->buffer, " : %s", semantic); + } + fx_parse_fx_4_annotations(parser); + + if (var.value) + { + vkd3d_string_buffer_printf(&parser->buffer, " = { "); + parse_fx_4_numeric_value(parser, var.value, &type); + vkd3d_string_buffer_printf(&parser->buffer, " }"); + } + vkd3d_string_buffer_printf(&parser->buffer, "; // Offset: %u, size %u.\n", var.offset, type.unpacked_size); + } +} + +static void fx_parse_buffers(struct fx_parser *parser) +{ + struct fx_buffer + { + uint32_t name; + uint32_t size; + uint32_t flags; + uint32_t count; + uint32_t bind_point; + } buffer; + const char *name; + uint32_t i; + + if (parser->failed) + return; + + for (i = 0; i < parser->buffer_count; ++i) + { + fx_parser_read_u32s(parser, &buffer, sizeof(buffer)); + + name = fx_4_get_string(parser, buffer.name); + + vkd3d_string_buffer_printf(&parser->buffer, "cbuffer %s", name); + fx_parse_fx_4_annotations(parser); + + vkd3d_string_buffer_printf(&parser->buffer, "\n{\n"); + parse_fx_start_indent(parser); + fx_parse_fx_4_numeric_variables(parser, buffer.count); + parse_fx_end_indent(parser); + vkd3d_string_buffer_printf(&parser->buffer, "}\n\n"); + } +} + +static void fx_4_parse_objects(struct fx_parser *parser) +{ + struct fx_4_object_variable + { + uint32_t name; + uint32_t type; + uint32_t semantic; + uint32_t bind_point; + } var; + uint32_t i, j, value, element_count; + struct fx_4_binary_type type; + const char *name, *type_name; + + if (parser->failed) + return; + + for (i = 0; i < parser->object_count; ++i) + { + fx_parser_read_u32s(parser, &var, sizeof(var)); + fx_parser_read_unstructured(parser, &type, var.type, sizeof(type)); + + name = fx_4_get_string(parser, var.name); + type_name = fx_4_get_string(parser, type.name); + vkd3d_string_buffer_printf(&parser->buffer, "%s %s", type_name, name); + if (type.element_count) + vkd3d_string_buffer_printf(&parser->buffer, "[%u]", type.element_count); + vkd3d_string_buffer_printf(&parser->buffer, " = {\n"); + + element_count = max(type.element_count, 1); + for (j = 0; j < element_count; ++j) + { + switch (type.typeinfo) + { + case FX_4_OBJECT_TYPE_STRING: + vkd3d_string_buffer_printf(&parser->buffer, " "); + value = fx_parser_read_u32(parser); + fx_4_parse_string_initializer(parser, value); + break; + default: + fx_parser_error(parser, VKD3D_SHADER_ERROR_FX_NOT_IMPLEMENTED, + "Parsing object type %u is not implemented.\n", type.typeinfo); + return; + } + vkd3d_string_buffer_printf(&parser->buffer, ",\n"); + } + vkd3d_string_buffer_printf(&parser->buffer, "};\n"); + } +} + +static int fx_4_parse(struct fx_parser *parser) +{ + struct fx_4_header + { + uint32_t version; + uint32_t buffer_count; + uint32_t numeric_variable_count; + uint32_t object_count; + uint32_t shared_buffer_count; + uint32_t shared_numeric_variable_count; + uint32_t shared_object_count; + uint32_t technique_count; + uint32_t unstructured_size; + uint32_t string_count; + uint32_t texture_count; + uint32_t depth_stencil_state_count; + uint32_t blend_state_count; + uint32_t rasterizer_state_count; + uint32_t sampler_state_count; + uint32_t rtv_count; + uint32_t dsv_count; + uint32_t shader_count; + uint32_t inline_shader_count; + } header; + + fx_parser_read_u32s(parser, &header, sizeof(header)); + parser->buffer_count = header.buffer_count; + parser->object_count = header.object_count; + + if (parser->end - parser->ptr < header.unstructured_size) + { + parser->failed = true; + return -1; + } + + parser->unstructured.ptr = parser->ptr; + parser->unstructured.end = parser->ptr + header.unstructured_size; + parser->unstructured.size = header.unstructured_size; + fx_parser_skip(parser, header.unstructured_size); + + fx_parse_buffers(parser); + fx_4_parse_objects(parser); + + return parser->failed ? - 1 : 0; +} + +static int fx_5_parse(struct fx_parser *parser) +{ + struct fx_5_header + { + uint32_t version; + uint32_t buffer_count; + uint32_t numeric_variable_count; + uint32_t object_count; + uint32_t shared_buffer_count; + uint32_t shared_numeric_variable_count; + uint32_t shared_object_count; + uint32_t technique_count; + uint32_t unstructured_size; + uint32_t string_count; + uint32_t texture_count; + uint32_t depth_stencil_state_count; + uint32_t blend_state_count; + uint32_t rasterizer_state_count; + uint32_t sampler_state_count; + uint32_t rtv_count; + uint32_t dsv_count; + uint32_t shader_count; + uint32_t inline_shader_count; + uint32_t group_count; + uint32_t uav_count; + uint32_t interface_variable_count; + uint32_t interface_variable_element_count; + uint32_t class_instance_element_count; + } header; + + fx_parser_read_u32s(parser, &header, sizeof(header)); + parser->buffer_count = header.buffer_count; + parser->object_count = header.object_count; + + if (parser->end - parser->ptr < header.unstructured_size) + { + parser->failed = true; + return -1; + } + + parser->unstructured.ptr = parser->ptr; + parser->unstructured.end = parser->ptr + header.unstructured_size; + parser->unstructured.size = header.unstructured_size; + fx_parser_skip(parser, header.unstructured_size); + + fx_parse_buffers(parser); + fx_4_parse_objects(parser); + + return parser->failed ? - 1 : 0; +} + +int fx_parse(const struct vkd3d_shader_compile_info *compile_info, + struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context) +{ + struct fx_parser parser = + { + .start = compile_info->source.code, + .ptr = compile_info->source.code, + .end = (uint8_t *)compile_info->source.code + compile_info->source.size, + .message_context = message_context, + }; + uint32_t version; + int ret; + + vkd3d_string_buffer_init(&parser.buffer); + + if (parser.end - parser.start < sizeof(version)) + return -1; + version = *(uint32_t *)parser.ptr; + + switch (version) + { + case 0xfeff0901: + ret = fx_2_parse(&parser); + break; + case 0xfeff1001: + case 0xfeff1011: + ret = fx_4_parse(&parser); + break; + case 0xfeff2001: + ret = fx_5_parse(&parser); + break; + default: + fx_parser_error(&parser, VKD3D_SHADER_ERROR_FX_INVALID_VERSION, + "Invalid effect binary version value 0x%08x.", version); + ret = -1; + } + + vkd3d_shader_code_from_string_buffer(out, &parser.buffer); + + return ret; +} diff --git a/libs/vkd3d/libs/vkd3d-shader/glsl.c b/libs/vkd3d/libs/vkd3d-shader/glsl.c index d1f02ab568b..46515818d07 100644 --- a/libs/vkd3d/libs/vkd3d-shader/glsl.c +++ b/libs/vkd3d/libs/vkd3d-shader/glsl.c @@ -18,6 +18,13 @@ #include "vkd3d_shader_private.h" +struct glsl_resource_type_info +{ + size_t coord_size; + bool shadow; + const char *type_suffix; +}; + struct glsl_src { struct vkd3d_string_buffer *str; @@ -38,9 +45,26 @@ struct vkd3d_glsl_generator struct vkd3d_shader_location location; struct vkd3d_shader_message_context *message_context; unsigned int indent; + const char *prefix; bool failed; + + struct shader_limits + { + unsigned int input_count; + unsigned int output_count; + } limits; + bool interstage_input; + bool interstage_output; + + const struct vkd3d_shader_interface_info *interface_info; + const struct vkd3d_shader_descriptor_offset_info *offset_info; + const struct vkd3d_shader_scan_descriptor_info1 *descriptor_info; + const struct vkd3d_shader_scan_combined_resource_sampler_info *combined_sampler_info; }; +static void shader_glsl_print_subscript(struct vkd3d_string_buffer *buffer, struct vkd3d_glsl_generator *gen, + const struct vkd3d_shader_src_param *rel_addr, unsigned int offset); + static void VKD3D_PRINTF_FUNC(3, 4) vkd3d_glsl_compiler_error( struct vkd3d_glsl_generator *generator, enum vkd3d_shader_error error, const char *fmt, ...) @@ -53,11 +77,110 @@ static void VKD3D_PRINTF_FUNC(3, 4) vkd3d_glsl_compiler_error( generator->failed = true; } +static const char *shader_glsl_get_prefix(enum vkd3d_shader_type type) +{ + switch (type) + { + case VKD3D_SHADER_TYPE_VERTEX: + return "vs"; + case VKD3D_SHADER_TYPE_HULL: + return "hs"; + case VKD3D_SHADER_TYPE_DOMAIN: + return "ds"; + case VKD3D_SHADER_TYPE_GEOMETRY: + return "gs"; + case VKD3D_SHADER_TYPE_PIXEL: + return "ps"; + case VKD3D_SHADER_TYPE_COMPUTE: + return "cs"; + default: + return NULL; + } +} + +static const struct glsl_resource_type_info *shader_glsl_get_resource_type_info(enum vkd3d_shader_resource_type t) +{ + static const struct glsl_resource_type_info info[] = + { + {0, 0, "None"}, /* VKD3D_SHADER_RESOURCE_NONE */ + {1, 0, "Buffer"}, /* VKD3D_SHADER_RESOURCE_BUFFER */ + {1, 1, "1D"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_1D */ + {2, 1, "2D"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_2D */ + {2, 0, "2DMS"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_2DMS */ + {3, 0, "3D"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_3D */ + {3, 1, "Cube"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_CUBE */ + {2, 1, "1DArray"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_1DARRAY */ + {3, 1, "2DArray"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_2DARRAY */ + {3, 0, "2DMSArray"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY */ + {4, 1, "CubeArray"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_CUBEARRAY */ + }; + + if (!t || t >= ARRAY_SIZE(info)) + return NULL; + + return &info[t]; +} + +static const struct vkd3d_shader_descriptor_info1 *shader_glsl_get_descriptor(struct vkd3d_glsl_generator *gen, + enum vkd3d_shader_descriptor_type type, unsigned int idx, unsigned int space) +{ + const struct vkd3d_shader_scan_descriptor_info1 *info = gen->descriptor_info; + + for (unsigned int i = 0; i < info->descriptor_count; ++i) + { + const struct vkd3d_shader_descriptor_info1 *d = &info->descriptors[i]; + + if (d->type == type && d->register_space == space && d->register_index == idx) + return d; + } + + return NULL; +} + +static const struct vkd3d_shader_descriptor_info1 *shader_glsl_get_descriptor_by_id( + struct vkd3d_glsl_generator *gen, enum vkd3d_shader_descriptor_type type, unsigned int id) +{ + const struct vkd3d_shader_scan_descriptor_info1 *info = gen->descriptor_info; + + for (unsigned int i = 0; i < info->descriptor_count; ++i) + { + const struct vkd3d_shader_descriptor_info1 *d = &info->descriptors[i]; + + if (d->type == type && d->register_id == id) + return d; + } + + return NULL; +} + static void shader_glsl_print_indent(struct vkd3d_string_buffer *buffer, unsigned int indent) { vkd3d_string_buffer_printf(buffer, "%*s", 4 * indent, ""); } +static void shader_glsl_print_combined_sampler_name(struct vkd3d_string_buffer *buffer, + struct vkd3d_glsl_generator *gen, unsigned int resource_index, + unsigned int resource_space, unsigned int sampler_index, unsigned int sampler_space) +{ + vkd3d_string_buffer_printf(buffer, "%s_t_%u", gen->prefix, resource_index); + if (resource_space) + vkd3d_string_buffer_printf(buffer, "_%u", resource_space); + if (sampler_index != VKD3D_SHADER_DUMMY_SAMPLER_INDEX) + { + vkd3d_string_buffer_printf(buffer, "_s_%u", sampler_index); + if (sampler_space) + vkd3d_string_buffer_printf(buffer, "_%u", sampler_space); + } +} + +static void shader_glsl_print_image_name(struct vkd3d_string_buffer *buffer, + struct vkd3d_glsl_generator *gen, unsigned int idx, unsigned int space) +{ + vkd3d_string_buffer_printf(buffer, "%s_image_%u", gen->prefix, idx); + if (space) + vkd3d_string_buffer_printf(buffer, "_%u", space); +} + static void shader_glsl_print_register_name(struct vkd3d_string_buffer *buffer, struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_register *reg) { @@ -67,6 +190,95 @@ static void shader_glsl_print_register_name(struct vkd3d_string_buffer *buffer, vkd3d_string_buffer_printf(buffer, "r[%u]", reg->idx[0].offset); break; + case VKD3DSPR_INPUT: + if (reg->idx_count != 1) + { + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, + "Internal compiler error: Unhandled input register index count %u.", reg->idx_count); + vkd3d_string_buffer_printf(buffer, "", reg->type); + break; + } + if (reg->idx[0].rel_addr) + { + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, + "Internal compiler error: Unhandled input register indirect addressing."); + vkd3d_string_buffer_printf(buffer, "", reg->type); + break; + } + vkd3d_string_buffer_printf(buffer, "%s_in[%u]", gen->prefix, reg->idx[0].offset); + break; + + case VKD3DSPR_OUTPUT: + if (reg->idx_count != 1) + { + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, + "Internal compiler error: Unhandled output register index count %u.", reg->idx_count); + vkd3d_string_buffer_printf(buffer, "", reg->type); + break; + } + if (reg->idx[0].rel_addr) + { + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, + "Internal compiler error: Unhandled output register indirect addressing."); + vkd3d_string_buffer_printf(buffer, "", reg->type); + break; + } + vkd3d_string_buffer_printf(buffer, "%s_out[%u]", gen->prefix, reg->idx[0].offset); + break; + + case VKD3DSPR_DEPTHOUT: + if (gen->program->shader_version.type != VKD3D_SHADER_TYPE_PIXEL) + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, + "Internal compiler error: Unhandled depth output in shader type #%x.", + gen->program->shader_version.type); + vkd3d_string_buffer_printf(buffer, "gl_FragDepth"); + break; + + case VKD3DSPR_IMMCONST: + switch (reg->dimension) + { + case VSIR_DIMENSION_SCALAR: + vkd3d_string_buffer_printf(buffer, "%#xu", reg->u.immconst_u32[0]); + break; + + case VSIR_DIMENSION_VEC4: + vkd3d_string_buffer_printf(buffer, "uvec4(%#xu, %#xu, %#xu, %#xu)", + reg->u.immconst_u32[0], reg->u.immconst_u32[1], + reg->u.immconst_u32[2], reg->u.immconst_u32[3]); + break; + + default: + vkd3d_string_buffer_printf(buffer, "", reg->dimension); + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, + "Internal compiler error: Unhandled dimension %#x.", reg->dimension); + break; + } + break; + + case VKD3DSPR_CONSTBUFFER: + if (reg->idx_count != 3) + { + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, + "Internal compiler error: Unhandled constant buffer register index count %u.", reg->idx_count); + vkd3d_string_buffer_printf(buffer, "", reg->type); + break; + } + if (reg->idx[0].rel_addr || reg->idx[2].rel_addr) + { + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, + "Internal compiler error: Unhandled constant buffer register indirect addressing."); + vkd3d_string_buffer_printf(buffer, "", reg->type); + break; + } + vkd3d_string_buffer_printf(buffer, "%s_cb_%u[%u]", + gen->prefix, reg->idx[0].offset, reg->idx[2].offset); + break; + + case VKD3DSPR_IDXTEMP: + vkd3d_string_buffer_printf(buffer, "x%u", reg->idx[0].offset); + shader_glsl_print_subscript(buffer, gen, reg->idx[1].rel_addr, reg->idx[1].offset); + break; + default: vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, "Internal compiler error: Unhandled register type %#x.", reg->type); @@ -106,23 +318,118 @@ static void glsl_src_cleanup(struct glsl_src *src, struct vkd3d_string_buffer_ca vkd3d_string_buffer_release(cache, src->str); } -static void glsl_src_init(struct glsl_src *glsl_src, struct vkd3d_glsl_generator *gen, - const struct vkd3d_shader_src_param *vsir_src, uint32_t mask) +static void shader_glsl_print_bitcast(struct vkd3d_string_buffer *dst, struct vkd3d_glsl_generator *gen, + const char *src, enum vkd3d_data_type dst_data_type, enum vkd3d_data_type src_data_type, unsigned int size) +{ + if (dst_data_type == VKD3D_DATA_UNORM || dst_data_type == VKD3D_DATA_SNORM) + dst_data_type = VKD3D_DATA_FLOAT; + if (src_data_type == VKD3D_DATA_UNORM || src_data_type == VKD3D_DATA_SNORM) + src_data_type = VKD3D_DATA_FLOAT; + + if (dst_data_type == src_data_type) + { + vkd3d_string_buffer_printf(dst, "%s", src); + return; + } + + if (src_data_type == VKD3D_DATA_FLOAT) + { + switch (dst_data_type) + { + case VKD3D_DATA_INT: + vkd3d_string_buffer_printf(dst, "floatBitsToInt(%s)", src); + return; + case VKD3D_DATA_UINT: + vkd3d_string_buffer_printf(dst, "floatBitsToUint(%s)", src); + return; + default: + break; + } + } + + if (src_data_type == VKD3D_DATA_UINT) + { + switch (dst_data_type) + { + case VKD3D_DATA_FLOAT: + vkd3d_string_buffer_printf(dst, "uintBitsToFloat(%s)", src); + return; + case VKD3D_DATA_INT: + if (size == 1) + vkd3d_string_buffer_printf(dst, "int(%s)", src); + else + vkd3d_string_buffer_printf(dst, "ivec%u(%s)", size, src); + return; + default: + break; + } + } + + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, + "Internal compiler error: Unhandled bitcast from %#x to %#x.", + src_data_type, dst_data_type); + vkd3d_string_buffer_printf(dst, "%s", src); +} + +static void shader_glsl_print_src(struct vkd3d_string_buffer *buffer, struct vkd3d_glsl_generator *gen, + const struct vkd3d_shader_src_param *vsir_src, uint32_t mask, enum vkd3d_data_type data_type) { const struct vkd3d_shader_register *reg = &vsir_src->reg; + struct vkd3d_string_buffer *register_name, *str; + enum vkd3d_data_type src_data_type; + unsigned int size; - glsl_src->str = vkd3d_string_buffer_get(&gen->string_buffers); + register_name = vkd3d_string_buffer_get(&gen->string_buffers); if (reg->non_uniform) vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, "Internal compiler error: Unhandled 'non-uniform' modifier."); - if (vsir_src->modifiers) - vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, - "Internal compiler error: Unhandled source modifier(s) %#x.", vsir_src->modifiers); - shader_glsl_print_register_name(glsl_src->str, gen, reg); + if (reg->type == VKD3DSPR_IMMCONST) + src_data_type = VKD3D_DATA_UINT; + else + src_data_type = VKD3D_DATA_FLOAT; + + shader_glsl_print_register_name(register_name, gen, reg); + + if (!vsir_src->modifiers) + str = buffer; + else + str = vkd3d_string_buffer_get(&gen->string_buffers); + + size = reg->dimension == VSIR_DIMENSION_VEC4 ? 4 : 1; + shader_glsl_print_bitcast(str, gen, register_name->buffer, data_type, src_data_type, size); if (reg->dimension == VSIR_DIMENSION_VEC4) - shader_glsl_print_swizzle(glsl_src->str, vsir_src->swizzle, mask); + shader_glsl_print_swizzle(str, vsir_src->swizzle, mask); + + switch (vsir_src->modifiers) + { + case VKD3DSPSM_NONE: + break; + case VKD3DSPSM_NEG: + vkd3d_string_buffer_printf(buffer, "-%s", str->buffer); + break; + case VKD3DSPSM_ABS: + vkd3d_string_buffer_printf(buffer, "abs(%s)", str->buffer); + break; + default: + vkd3d_string_buffer_printf(buffer, "(%s)", + vsir_src->modifiers, str->buffer); + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, + "Internal compiler error: Unhandled source modifier(s) %#x.", vsir_src->modifiers); + break; + } + + if (str != buffer) + vkd3d_string_buffer_release(&gen->string_buffers, str); + vkd3d_string_buffer_release(&gen->string_buffers, register_name); +} + +static void glsl_src_init(struct glsl_src *glsl_src, struct vkd3d_glsl_generator *gen, + const struct vkd3d_shader_src_param *vsir_src, uint32_t mask) +{ + glsl_src->str = vkd3d_string_buffer_get(&gen->string_buffers); + shader_glsl_print_src(glsl_src->str, gen, vsir_src, mask, vsir_src->reg.data_type); } static void glsl_dst_cleanup(struct glsl_dst *dst, struct vkd3d_string_buffer_cache *cache) @@ -153,26 +460,89 @@ static uint32_t glsl_dst_init(struct glsl_dst *glsl_dst, struct vkd3d_glsl_gener return write_mask; } -static void VKD3D_PRINTF_FUNC(3, 4) shader_glsl_print_assignment( - struct vkd3d_glsl_generator *gen, struct glsl_dst *dst, const char *format, ...) +static void shader_glsl_print_subscript(struct vkd3d_string_buffer *buffer, struct vkd3d_glsl_generator *gen, + const struct vkd3d_shader_src_param *rel_addr, unsigned int offset) { - va_list args; + struct glsl_src r; + + if (!rel_addr) + { + vkd3d_string_buffer_printf(buffer, "[%u]", offset); + return; + } + + glsl_src_init(&r, gen, rel_addr, VKD3DSP_WRITEMASK_0); + vkd3d_string_buffer_printf(buffer, "[%s", r.str->buffer); + if (offset) + vkd3d_string_buffer_printf(buffer, " + %u", offset); + else + vkd3d_string_buffer_printf(buffer, "]"); + glsl_src_cleanup(&r, &gen->string_buffers); +} + +static void VKD3D_PRINTF_FUNC(4, 0) shader_glsl_vprint_assignment(struct vkd3d_glsl_generator *gen, + struct glsl_dst *dst, enum vkd3d_data_type data_type, const char *format, va_list args) +{ + struct vkd3d_string_buffer *buffer = gen->buffer; + uint32_t modifiers = dst->vsir->modifiers; + bool close = true; if (dst->vsir->shift) vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, "Internal compiler error: Unhandled destination shift %#x.", dst->vsir->shift); - if (dst->vsir->modifiers) + if (modifiers & ~VKD3DSPDM_SATURATE) vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, - "Internal compiler error: Unhandled destination modifier(s) %#x.", dst->vsir->modifiers); + "Internal compiler error: Unhandled destination modifier(s) %#x.", modifiers); - shader_glsl_print_indent(gen->buffer, gen->indent); - vkd3d_string_buffer_printf(gen->buffer, "%s%s = ", dst->register_name->buffer, dst->mask->buffer); + shader_glsl_print_indent(buffer, gen->indent); + vkd3d_string_buffer_printf(buffer, "%s%s = ", dst->register_name->buffer, dst->mask->buffer); + if (modifiers & VKD3DSPDM_SATURATE) + vkd3d_string_buffer_printf(buffer, "clamp("); + + switch (data_type) + { + default: + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, + "Internal compiler error: Unhandled destination register data type %#x.", data_type); + /* fall through */ + case VKD3D_DATA_FLOAT: + close = false; + break; + case VKD3D_DATA_INT: + vkd3d_string_buffer_printf(buffer, "intBitsToFloat("); + break; + case VKD3D_DATA_UINT: + vkd3d_string_buffer_printf(buffer, "uintBitsToFloat("); + break; + } + + vkd3d_string_buffer_vprintf(buffer, format, args); + + if (close) + vkd3d_string_buffer_printf(buffer, ")"); + if (modifiers & VKD3DSPDM_SATURATE) + vkd3d_string_buffer_printf(buffer, ", 0.0, 1.0)"); + vkd3d_string_buffer_printf(buffer, ";\n"); +} + +static void VKD3D_PRINTF_FUNC(3, 4) shader_glsl_print_assignment( + struct vkd3d_glsl_generator *gen, struct glsl_dst *dst, const char *format, ...) +{ + va_list args; va_start(args, format); - vkd3d_string_buffer_vprintf(gen->buffer, format, args); + shader_glsl_vprint_assignment(gen, dst, dst->vsir->reg.data_type, format, args); va_end(args); +} + +static void VKD3D_PRINTF_FUNC(4, 5) shader_glsl_print_assignment_ext(struct vkd3d_glsl_generator *gen, + struct glsl_dst *dst, enum vkd3d_data_type data_type, const char *format, ...) +{ + va_list args; - vkd3d_string_buffer_printf(gen->buffer, ";\n"); + va_start(args, format); + shader_glsl_vprint_assignment(gen, dst, data_type, format, args); + va_end(args); } static void shader_glsl_unhandled(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_instruction *ins) @@ -183,138 +553,1671 @@ static void shader_glsl_unhandled(struct vkd3d_glsl_generator *gen, const struct "Internal compiler error: Unhandled instruction %#x.", ins->opcode); } -static void shader_glsl_mov(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_instruction *ins) +static void shader_glsl_binop(struct vkd3d_glsl_generator *gen, + const struct vkd3d_shader_instruction *ins, const char *op) { - struct glsl_src src; + struct glsl_src src[2]; struct glsl_dst dst; uint32_t mask; mask = glsl_dst_init(&dst, gen, ins, &ins->dst[0]); - glsl_src_init(&src, gen, &ins->src[0], mask); + glsl_src_init(&src[0], gen, &ins->src[0], mask); + glsl_src_init(&src[1], gen, &ins->src[1], mask); - shader_glsl_print_assignment(gen, &dst, "%s", src.str->buffer); + shader_glsl_print_assignment(gen, &dst, "%s %s %s", src[0].str->buffer, op, src[1].str->buffer); - glsl_src_cleanup(&src, &gen->string_buffers); + glsl_src_cleanup(&src[1], &gen->string_buffers); + glsl_src_cleanup(&src[0], &gen->string_buffers); glsl_dst_cleanup(&dst, &gen->string_buffers); } -static void shader_glsl_ret(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_instruction *ins) +static void shader_glsl_dot(struct vkd3d_glsl_generator *gen, + const struct vkd3d_shader_instruction *ins, uint32_t src_mask) { - const struct vkd3d_shader_version *version = &gen->program->shader_version; + unsigned int component_count; + struct glsl_src src[2]; + struct glsl_dst dst; + uint32_t dst_mask; - /* - * TODO: Implement in_subroutine - * TODO: shader_glsl_generate_shader_epilogue(generator); - */ - if (version->major >= 4) - { - shader_glsl_print_indent(gen->buffer, gen->indent); - vkd3d_string_buffer_printf(gen->buffer, "return;\n"); - } + dst_mask = glsl_dst_init(&dst, gen, ins, &ins->dst[0]); + glsl_src_init(&src[0], gen, &ins->src[0], src_mask); + glsl_src_init(&src[1], gen, &ins->src[1], src_mask); + + if ((component_count = vsir_write_mask_component_count(dst_mask)) > 1) + shader_glsl_print_assignment(gen, &dst, "vec%d(dot(%s, %s))", + component_count, src[0].str->buffer, src[1].str->buffer); + else + shader_glsl_print_assignment(gen, &dst, "dot(%s, %s)", + src[0].str->buffer, src[1].str->buffer); + + glsl_src_cleanup(&src[1], &gen->string_buffers); + glsl_src_cleanup(&src[0], &gen->string_buffers); + glsl_dst_cleanup(&dst, &gen->string_buffers); } -static void vkd3d_glsl_handle_instruction(struct vkd3d_glsl_generator *gen, - const struct vkd3d_shader_instruction *ins) +static void shader_glsl_intrinsic(struct vkd3d_glsl_generator *gen, + const struct vkd3d_shader_instruction *ins, const char *op) { - gen->location = ins->location; + struct vkd3d_string_buffer *args; + struct glsl_src src; + struct glsl_dst dst; + unsigned int i; + uint32_t mask; - switch (ins->opcode) + mask = glsl_dst_init(&dst, gen, ins, &ins->dst[0]); + args = vkd3d_string_buffer_get(&gen->string_buffers); + + for (i = 0; i < ins->src_count; ++i) { - case VKD3DSIH_DCL_INPUT: - case VKD3DSIH_DCL_OUTPUT: - case VKD3DSIH_DCL_OUTPUT_SIV: - case VKD3DSIH_NOP: - break; - case VKD3DSIH_MOV: - shader_glsl_mov(gen, ins); - break; - case VKD3DSIH_RET: - shader_glsl_ret(gen, ins); - break; - default: - shader_glsl_unhandled(gen, ins); - break; + glsl_src_init(&src, gen, &ins->src[i], mask); + vkd3d_string_buffer_printf(args, "%s%s", i ? ", " : "", src.str->buffer); + glsl_src_cleanup(&src, &gen->string_buffers); } + shader_glsl_print_assignment(gen, &dst, "%s(%s)", op, args->buffer); + + vkd3d_string_buffer_release(&gen->string_buffers, args); + glsl_dst_cleanup(&dst, &gen->string_buffers); } -static void shader_glsl_generate_declarations(struct vkd3d_glsl_generator *gen) +static void shader_glsl_relop(struct vkd3d_glsl_generator *gen, + const struct vkd3d_shader_instruction *ins, const char *scalar_op, const char *vector_op) { - const struct vsir_program *program = gen->program; - struct vkd3d_string_buffer *buffer = gen->buffer; + unsigned int mask_size; + struct glsl_src src[2]; + struct glsl_dst dst; + uint32_t mask; - if (program->temp_count) - vkd3d_string_buffer_printf(buffer, "vec4 r[%u];\n\n", program->temp_count); + mask = glsl_dst_init(&dst, gen, ins, &ins->dst[0]); + glsl_src_init(&src[0], gen, &ins->src[0], mask); + glsl_src_init(&src[1], gen, &ins->src[1], mask); + + if ((mask_size = vsir_write_mask_component_count(mask)) > 1) + shader_glsl_print_assignment(gen, &dst, "uvec%u(%s(%s, %s)) * 0xffffffffu", + mask_size, vector_op, src[0].str->buffer, src[1].str->buffer); + else + shader_glsl_print_assignment(gen, &dst, "%s %s %s ? 0xffffffffu : 0u", + src[0].str->buffer, scalar_op, src[1].str->buffer); + + glsl_src_cleanup(&src[1], &gen->string_buffers); + glsl_src_cleanup(&src[0], &gen->string_buffers); + glsl_dst_cleanup(&dst, &gen->string_buffers); } -static int vkd3d_glsl_generator_generate(struct vkd3d_glsl_generator *gen, struct vkd3d_shader_code *out) +static void shader_glsl_cast(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_instruction *ins, + const char *scalar_constructor, const char *vector_constructor) { - const struct vkd3d_shader_instruction_array *instructions = &gen->program->instructions; - struct vkd3d_string_buffer *buffer = gen->buffer; - unsigned int i; - void *code; - - MESSAGE("Generating a GLSL shader. This is unsupported; you get to keep all the pieces if it breaks.\n"); + unsigned int component_count; + struct glsl_src src; + struct glsl_dst dst; + uint32_t mask; - vkd3d_string_buffer_printf(buffer, "#version 440\n\n"); + mask = glsl_dst_init(&dst, gen, ins, &ins->dst[0]); + glsl_src_init(&src, gen, &ins->src[0], mask); - vkd3d_string_buffer_printf(buffer, "/* Generated by %s. */\n\n", vkd3d_shader_get_version(NULL, NULL)); + if ((component_count = vsir_write_mask_component_count(mask)) > 1) + shader_glsl_print_assignment(gen, &dst, "%s%u(%s)", + vector_constructor, component_count, src.str->buffer); + else + shader_glsl_print_assignment(gen, &dst, "%s(%s)", + scalar_constructor, src.str->buffer); - shader_glsl_generate_declarations(gen); + glsl_src_cleanup(&src, &gen->string_buffers); + glsl_dst_cleanup(&dst, &gen->string_buffers); +} - vkd3d_string_buffer_printf(buffer, "void main()\n{\n"); +static void shader_glsl_end_block(struct vkd3d_glsl_generator *gen) +{ + --gen->indent; + shader_glsl_print_indent(gen->buffer, gen->indent); + vkd3d_string_buffer_printf(gen->buffer, "}\n"); +} +static void shader_glsl_begin_block(struct vkd3d_glsl_generator *gen) +{ + shader_glsl_print_indent(gen->buffer, gen->indent); + vkd3d_string_buffer_printf(gen->buffer, "{\n"); ++gen->indent; - for (i = 0; i < instructions->count; ++i) - { - vkd3d_glsl_handle_instruction(gen, &instructions->elements[i]); - } +} - vkd3d_string_buffer_printf(buffer, "}\n"); +static void shader_glsl_if(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_instruction *ins) +{ + const char *condition; + struct glsl_src src; - if (TRACE_ON()) - vkd3d_string_buffer_trace(buffer); + glsl_src_init(&src, gen, &ins->src[0], VKD3DSP_WRITEMASK_0); - if (gen->failed) - return VKD3D_ERROR_INVALID_SHADER; + shader_glsl_print_indent(gen->buffer, gen->indent); + condition = ins->flags == VKD3D_SHADER_CONDITIONAL_OP_NZ ? "bool" : "!bool"; + vkd3d_string_buffer_printf(gen->buffer, "if (%s(%s))\n", condition, src.str->buffer); - if ((code = vkd3d_malloc(buffer->buffer_size))) - { - memcpy(code, buffer->buffer, buffer->content_size); - out->size = buffer->content_size; - out->code = code; - } - else return VKD3D_ERROR_OUT_OF_MEMORY; + glsl_src_cleanup(&src, &gen->string_buffers); - return VKD3D_OK; + shader_glsl_begin_block(gen); } -static void vkd3d_glsl_generator_cleanup(struct vkd3d_glsl_generator *gen) +static void shader_glsl_else(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_instruction *ins) { - vkd3d_string_buffer_release(&gen->string_buffers, gen->buffer); - vkd3d_string_buffer_cache_cleanup(&gen->string_buffers); + shader_glsl_end_block(gen); + shader_glsl_print_indent(gen->buffer, gen->indent); + vkd3d_string_buffer_printf(gen->buffer, "else\n"); + shader_glsl_begin_block(gen); } -static void vkd3d_glsl_generator_init(struct vkd3d_glsl_generator *gen, - struct vsir_program *program, struct vkd3d_shader_message_context *message_context) +static void shader_glsl_loop(struct vkd3d_glsl_generator *gen) { - memset(gen, 0, sizeof(*gen)); - gen->program = program; - vkd3d_string_buffer_cache_init(&gen->string_buffers); - gen->buffer = vkd3d_string_buffer_get(&gen->string_buffers); - gen->message_context = message_context; + shader_glsl_print_indent(gen->buffer, gen->indent); + vkd3d_string_buffer_printf(gen->buffer, "for (;;)\n"); + shader_glsl_begin_block(gen); } -int glsl_compile(struct vsir_program *program, uint64_t config_flags, - const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *out, - struct vkd3d_shader_message_context *message_context) +static void shader_glsl_break(struct vkd3d_glsl_generator *gen) { - struct vkd3d_glsl_generator generator; - int ret; + shader_glsl_print_indent(gen->buffer, gen->indent); + vkd3d_string_buffer_printf(gen->buffer, "break;\n"); +} - if ((ret = vsir_program_normalise(program, config_flags, compile_info, message_context)) < 0) - return ret; +static void shader_glsl_continue(struct vkd3d_glsl_generator *gen) +{ + shader_glsl_print_indent(gen->buffer, gen->indent); + vkd3d_string_buffer_printf(gen->buffer, "continue;\n"); +} + +static void shader_glsl_switch(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_instruction *ins) +{ + struct glsl_src src; + + glsl_src_init(&src, gen, &ins->src[0], VKD3DSP_WRITEMASK_0); + + shader_glsl_print_indent(gen->buffer, gen->indent); + vkd3d_string_buffer_printf(gen->buffer, "switch (%s)\n", src.str->buffer); + shader_glsl_begin_block(gen); + + glsl_src_cleanup(&src, &gen->string_buffers); +} + +static void shader_glsl_case(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_instruction *ins) +{ + struct glsl_src src; + + glsl_src_init(&src, gen, &ins->src[0], VKD3DSP_WRITEMASK_0); + + shader_glsl_print_indent(gen->buffer, gen->indent); + vkd3d_string_buffer_printf(gen->buffer, "case %s:\n", src.str->buffer); + + glsl_src_cleanup(&src, &gen->string_buffers); +} + +static void shader_glsl_default(struct vkd3d_glsl_generator *gen) +{ + shader_glsl_print_indent(gen->buffer, gen->indent); + vkd3d_string_buffer_printf(gen->buffer, "default:\n"); +} + +static void shader_glsl_ld(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_instruction *ins) +{ + const struct glsl_resource_type_info *resource_type_info; + unsigned int resource_id, resource_idx, resource_space; + const struct vkd3d_shader_descriptor_info1 *d; + enum vkd3d_shader_component_type sampled_type; + enum vkd3d_shader_resource_type resource_type; + struct vkd3d_string_buffer *fetch; + enum vkd3d_data_type data_type; + struct glsl_src coord, lod; + struct glsl_dst dst; + uint32_t coord_mask; + + if (vkd3d_shader_instruction_has_texel_offset(ins)) + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, + "Internal compiler error: Unhandled texel fetch offset."); + + if (ins->src[1].reg.idx[0].rel_addr || ins->src[1].reg.idx[1].rel_addr) + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_UNSUPPORTED, + "Descriptor indexing is not supported."); + + resource_id = ins->src[1].reg.idx[0].offset; + resource_idx = ins->src[1].reg.idx[1].offset; + if ((d = shader_glsl_get_descriptor_by_id(gen, VKD3D_SHADER_DESCRIPTOR_TYPE_SRV, resource_id))) + { + resource_type = d->resource_type; + resource_space = d->register_space; + sampled_type = vkd3d_component_type_from_resource_data_type(d->resource_data_type); + data_type = vkd3d_data_type_from_component_type(sampled_type); + } + else + { + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, + "Internal compiler error: Undeclared resource descriptor %u.", resource_id); + resource_space = 0; + resource_type = VKD3D_SHADER_RESOURCE_TEXTURE_2D; + data_type = VKD3D_DATA_FLOAT; + } + + if ((resource_type_info = shader_glsl_get_resource_type_info(resource_type))) + { + coord_mask = vkd3d_write_mask_from_component_count(resource_type_info->coord_size); + } + else + { + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, + "Internal compiler error: Unhandled resource type %#x.", resource_type); + coord_mask = vkd3d_write_mask_from_component_count(2); + } + + glsl_dst_init(&dst, gen, ins, &ins->dst[0]); + glsl_src_init(&coord, gen, &ins->src[0], coord_mask); + glsl_src_init(&lod, gen, &ins->src[0], VKD3DSP_WRITEMASK_3); + fetch = vkd3d_string_buffer_get(&gen->string_buffers); + + vkd3d_string_buffer_printf(fetch, "texelFetch("); + shader_glsl_print_combined_sampler_name(fetch, gen, resource_idx, + resource_space, VKD3D_SHADER_DUMMY_SAMPLER_INDEX, 0); + vkd3d_string_buffer_printf(fetch, ", %s", coord.str->buffer); + if (resource_type != VKD3D_SHADER_RESOURCE_BUFFER) + vkd3d_string_buffer_printf(fetch, ", %s", lod.str->buffer); + vkd3d_string_buffer_printf(fetch, ")"); + shader_glsl_print_swizzle(fetch, ins->src[1].swizzle, ins->dst[0].write_mask); + + shader_glsl_print_assignment_ext(gen, &dst, data_type, "%s", fetch->buffer); + + vkd3d_string_buffer_release(&gen->string_buffers, fetch); + glsl_src_cleanup(&lod, &gen->string_buffers); + glsl_src_cleanup(&coord, &gen->string_buffers); + glsl_dst_cleanup(&dst, &gen->string_buffers); +} + +static void shader_glsl_sample(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_instruction *ins) +{ + const struct glsl_resource_type_info *resource_type_info; + unsigned int resource_id, resource_idx, resource_space; + unsigned int sampler_id, sampler_idx, sampler_space; + const struct vkd3d_shader_descriptor_info1 *d; + enum vkd3d_shader_component_type sampled_type; + enum vkd3d_shader_resource_type resource_type; + struct vkd3d_string_buffer *sample; + enum vkd3d_data_type data_type; + struct glsl_src coord; + struct glsl_dst dst; + uint32_t coord_mask; + + if (vkd3d_shader_instruction_has_texel_offset(ins)) + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, + "Internal compiler error: Unhandled texel sample offset."); + + if (ins->src[1].reg.idx[0].rel_addr || ins->src[1].reg.idx[1].rel_addr + || ins->src[2].reg.idx[0].rel_addr || ins->src[2].reg.idx[1].rel_addr) + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_UNSUPPORTED, + "Descriptor indexing is not supported."); + + resource_id = ins->src[1].reg.idx[0].offset; + resource_idx = ins->src[1].reg.idx[1].offset; + if ((d = shader_glsl_get_descriptor_by_id(gen, VKD3D_SHADER_DESCRIPTOR_TYPE_SRV, resource_id))) + { + resource_type = d->resource_type; + resource_space = d->register_space; + sampled_type = vkd3d_component_type_from_resource_data_type(d->resource_data_type); + data_type = vkd3d_data_type_from_component_type(sampled_type); + } + else + { + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, + "Internal compiler error: Undeclared resource descriptor %u.", resource_id); + resource_space = 0; + resource_type = VKD3D_SHADER_RESOURCE_TEXTURE_2D; + data_type = VKD3D_DATA_FLOAT; + } + + if ((resource_type_info = shader_glsl_get_resource_type_info(resource_type))) + { + coord_mask = vkd3d_write_mask_from_component_count(resource_type_info->coord_size); + } + else + { + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, + "Internal compiler error: Unhandled resource type %#x.", resource_type); + coord_mask = vkd3d_write_mask_from_component_count(2); + } + + sampler_id = ins->src[2].reg.idx[0].offset; + sampler_idx = ins->src[2].reg.idx[1].offset; + if ((d = shader_glsl_get_descriptor_by_id(gen, VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER, sampler_id))) + { + sampler_space = d->register_space; + } + else + { + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, + "Internal compiler error: Undeclared sampler descriptor %u.", sampler_id); + sampler_space = 0; + } + + glsl_dst_init(&dst, gen, ins, &ins->dst[0]); + glsl_src_init(&coord, gen, &ins->src[0], coord_mask); + sample = vkd3d_string_buffer_get(&gen->string_buffers); + + vkd3d_string_buffer_printf(sample, "texture("); + shader_glsl_print_combined_sampler_name(sample, gen, resource_idx, resource_space, sampler_idx, sampler_space); + vkd3d_string_buffer_printf(sample, ", %s)", coord.str->buffer); + shader_glsl_print_swizzle(sample, ins->src[1].swizzle, ins->dst[0].write_mask); + + shader_glsl_print_assignment_ext(gen, &dst, data_type, "%s", sample->buffer); + + vkd3d_string_buffer_release(&gen->string_buffers, sample); + glsl_src_cleanup(&coord, &gen->string_buffers); + glsl_dst_cleanup(&dst, &gen->string_buffers); +} + +static void shader_glsl_store_uav_typed(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_instruction *ins) +{ + const struct glsl_resource_type_info *resource_type_info; + enum vkd3d_shader_component_type component_type; + const struct vkd3d_shader_descriptor_info1 *d; + enum vkd3d_shader_resource_type resource_type; + unsigned int uav_id, uav_idx, uav_space; + struct vkd3d_string_buffer *image_data; + struct glsl_src image_coord; + uint32_t coord_mask; + + if (ins->dst[0].reg.idx[0].rel_addr || ins->dst[0].reg.idx[1].rel_addr) + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_UNSUPPORTED, + "Descriptor indexing is not supported."); + + uav_id = ins->dst[0].reg.idx[0].offset; + uav_idx = ins->dst[0].reg.idx[1].offset; + if ((d = shader_glsl_get_descriptor_by_id(gen, VKD3D_SHADER_DESCRIPTOR_TYPE_UAV, uav_id))) + { + resource_type = d->resource_type; + uav_space = d->register_space; + component_type = vkd3d_component_type_from_resource_data_type(d->resource_data_type); + } + else + { + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, + "Internal compiler error: Undeclared UAV descriptor %u.", uav_id); + uav_space = 0; + resource_type = VKD3D_SHADER_RESOURCE_TEXTURE_2D; + component_type = VKD3D_SHADER_COMPONENT_FLOAT; + } + + if ((resource_type_info = shader_glsl_get_resource_type_info(resource_type))) + { + coord_mask = vkd3d_write_mask_from_component_count(resource_type_info->coord_size); + } + else + { + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, + "Internal compiler error: Unhandled UAV type %#x.", resource_type); + coord_mask = vkd3d_write_mask_from_component_count(2); + } + + glsl_src_init(&image_coord, gen, &ins->src[0], coord_mask); + image_data = vkd3d_string_buffer_get(&gen->string_buffers); + + if (ins->src[1].reg.dimension == VSIR_DIMENSION_SCALAR) + { + switch (component_type) + { + case VKD3D_SHADER_COMPONENT_UINT: + vkd3d_string_buffer_printf(image_data, "uvec4("); + break; + case VKD3D_SHADER_COMPONENT_INT: + vkd3d_string_buffer_printf(image_data, "ivec4("); + break; + default: + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, + "Internal compiler error: Unhandled component type %#x.", component_type); + /* fall through */ + case VKD3D_SHADER_COMPONENT_FLOAT: + vkd3d_string_buffer_printf(image_data, "vec4("); + break; + } + } + shader_glsl_print_src(image_data, gen, &ins->src[1], VKD3DSP_WRITEMASK_ALL, + vkd3d_data_type_from_component_type(component_type)); + if (ins->src[1].reg.dimension == VSIR_DIMENSION_SCALAR) + vkd3d_string_buffer_printf(image_data, ", 0, 0, 0)"); + + shader_glsl_print_indent(gen->buffer, gen->indent); + vkd3d_string_buffer_printf(gen->buffer, "imageStore("); + shader_glsl_print_image_name(gen->buffer, gen, uav_idx, uav_space); + vkd3d_string_buffer_printf(gen->buffer, ", %s, %s);\n", image_coord.str->buffer, image_data->buffer); + + vkd3d_string_buffer_release(&gen->string_buffers, image_data); + glsl_src_cleanup(&image_coord, &gen->string_buffers); +} + +static void shader_glsl_unary_op(struct vkd3d_glsl_generator *gen, + const struct vkd3d_shader_instruction *ins, const char *op) +{ + struct glsl_src src; + struct glsl_dst dst; + uint32_t mask; + + mask = glsl_dst_init(&dst, gen, ins, &ins->dst[0]); + glsl_src_init(&src, gen, &ins->src[0], mask); + + shader_glsl_print_assignment(gen, &dst, "%s%s", op, src.str->buffer); + + glsl_src_cleanup(&src, &gen->string_buffers); + glsl_dst_cleanup(&dst, &gen->string_buffers); +} + +static void shader_glsl_mov(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_instruction *ins) +{ + struct glsl_src src; + struct glsl_dst dst; + uint32_t mask; + + mask = glsl_dst_init(&dst, gen, ins, &ins->dst[0]); + glsl_src_init(&src, gen, &ins->src[0], mask); + + shader_glsl_print_assignment(gen, &dst, "%s", src.str->buffer); + + glsl_src_cleanup(&src, &gen->string_buffers); + glsl_dst_cleanup(&dst, &gen->string_buffers); +} + +static void shader_glsl_movc(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_instruction *ins) +{ + unsigned int component_count; + struct glsl_src src[3]; + struct glsl_dst dst; + uint32_t mask; + + mask = glsl_dst_init(&dst, gen, ins, &ins->dst[0]); + glsl_src_init(&src[0], gen, &ins->src[0], mask); + glsl_src_init(&src[1], gen, &ins->src[1], mask); + glsl_src_init(&src[2], gen, &ins->src[2], mask); + + if ((component_count = vsir_write_mask_component_count(mask)) > 1) + shader_glsl_print_assignment(gen, &dst, "mix(%s, %s, bvec%u(%s))", + src[2].str->buffer, src[1].str->buffer, component_count, src[0].str->buffer); + else + shader_glsl_print_assignment(gen, &dst, "mix(%s, %s, bool(%s))", + src[2].str->buffer, src[1].str->buffer, src[0].str->buffer); + + glsl_src_cleanup(&src[2], &gen->string_buffers); + glsl_src_cleanup(&src[1], &gen->string_buffers); + glsl_src_cleanup(&src[0], &gen->string_buffers); + glsl_dst_cleanup(&dst, &gen->string_buffers); +} + +static void shader_glsl_mul_extended(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_instruction *ins) +{ + struct glsl_src src[2]; + struct glsl_dst dst; + uint32_t mask; + + if (ins->dst[0].reg.type != VKD3DSPR_NULL) + { + /* FIXME: imulExtended()/umulExtended() from ARB_gpu_shader5/GLSL 4.00+. */ + mask = glsl_dst_init(&dst, gen, ins, &ins->dst[0]); + shader_glsl_print_assignment(gen, &dst, ""); + glsl_dst_cleanup(&dst, &gen->string_buffers); + + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, + "Internal compiler error: Unhandled 64-bit integer multiplication."); + } + + if (ins->dst[1].reg.type != VKD3DSPR_NULL) + { + mask = glsl_dst_init(&dst, gen, ins, &ins->dst[1]); + glsl_src_init(&src[0], gen, &ins->src[0], mask); + glsl_src_init(&src[1], gen, &ins->src[1], mask); + + shader_glsl_print_assignment(gen, &dst, "%s * %s", src[0].str->buffer, src[1].str->buffer); + + glsl_src_cleanup(&src[1], &gen->string_buffers); + glsl_src_cleanup(&src[0], &gen->string_buffers); + glsl_dst_cleanup(&dst, &gen->string_buffers); + } +} + +static void shader_glsl_print_sysval_name(struct vkd3d_string_buffer *buffer, struct vkd3d_glsl_generator *gen, + enum vkd3d_shader_sysval_semantic sysval, unsigned int idx) +{ + const struct vkd3d_shader_version *version = &gen->program->shader_version; + + switch (sysval) + { + case VKD3D_SHADER_SV_POSITION: + if (version->type == VKD3D_SHADER_TYPE_COMPUTE) + { + vkd3d_string_buffer_printf(buffer, "", sysval); + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, + "Internal compiler error: Unhandled SV_POSITION in shader type #%x.", version->type); + break; + } + if (idx) + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, + "Internal compiler error: Unhandled SV_POSITION index %u.", idx); + if (version->type == VKD3D_SHADER_TYPE_PIXEL) + vkd3d_string_buffer_printf(buffer, "gl_FragCoord"); + else + vkd3d_string_buffer_printf(buffer, "gl_Position"); + break; + + case VKD3D_SHADER_SV_VERTEX_ID: + if (version->type != VKD3D_SHADER_TYPE_VERTEX) + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, + "Internal compiler error: Unhandled SV_VERTEX_ID in shader type #%x.", version->type); + vkd3d_string_buffer_printf(buffer, "intBitsToFloat(ivec4(gl_VertexID, 0, 0, 0))"); + break; + + case VKD3D_SHADER_SV_IS_FRONT_FACE: + if (version->type != VKD3D_SHADER_TYPE_PIXEL) + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, + "Internal compiler error: Unhandled SV_IS_FRONT_FACE in shader type #%x.", version->type); + vkd3d_string_buffer_printf(buffer, + "uintBitsToFloat(uvec4(gl_FrontFacing ? 0xffffffffu : 0u, 0u, 0u, 0u))"); + + break; + + case VKD3D_SHADER_SV_TARGET: + if (version->type != VKD3D_SHADER_TYPE_PIXEL) + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, + "Internal compiler error: Unhandled SV_TARGET in shader type #%x.", version->type); + vkd3d_string_buffer_printf(buffer, "shader_out_%u", idx); + break; + + default: + vkd3d_string_buffer_printf(buffer, "", sysval); + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, + "Internal compiler error: Unhandled system value %#x.", sysval); + break; + } +} + +static void shader_glsl_shader_prologue(struct vkd3d_glsl_generator *gen) +{ + const struct shader_signature *signature = &gen->program->input_signature; + struct vkd3d_string_buffer *buffer = gen->buffer; + const struct signature_element *e; + unsigned int i; + + for (i = 0; i < signature->element_count; ++i) + { + e = &signature->elements[i]; + + if (e->target_location == SIGNATURE_TARGET_LOCATION_UNUSED) + continue; + + shader_glsl_print_indent(buffer, gen->indent); + vkd3d_string_buffer_printf(buffer, "%s_in[%u]", gen->prefix, e->register_index); + shader_glsl_print_write_mask(buffer, e->mask); + if (e->sysval_semantic == VKD3D_SHADER_SV_NONE) + { + if (gen->interstage_input) + { + vkd3d_string_buffer_printf(buffer, " = shader_in.reg_%u", e->target_location); + if (e->target_location >= gen->limits.input_count) + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, + "Internal compiler error: Input element %u specifies target location %u, " + "but only %u inputs are supported.", + i, e->target_location, gen->limits.input_count); + } + else + { + switch (e->component_type) + { + case VKD3D_SHADER_COMPONENT_UINT: + vkd3d_string_buffer_printf(buffer, " = uintBitsToFloat(shader_in_%u)", i); + break; + case VKD3D_SHADER_COMPONENT_INT: + vkd3d_string_buffer_printf(buffer, " = intBitsToFloat(shader_in_%u)", i); + break; + default: + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, + "Internal compiler error: Unhandled input component type %#x.", e->component_type); + /* fall through */ + case VKD3D_SHADER_COMPONENT_FLOAT: + vkd3d_string_buffer_printf(buffer, " = shader_in_%u", i); + break; + } + } + } + else + { + vkd3d_string_buffer_printf(buffer, " = "); + shader_glsl_print_sysval_name(buffer, gen, e->sysval_semantic, e->semantic_index); + } + shader_glsl_print_write_mask(buffer, e->mask); + vkd3d_string_buffer_printf(buffer, ";\n"); + } +} + +static void shader_glsl_shader_epilogue(struct vkd3d_glsl_generator *gen) +{ + const struct shader_signature *signature = &gen->program->output_signature; + struct vkd3d_string_buffer *buffer = gen->buffer; + enum vkd3d_shader_component_type type; + const struct signature_element *e; + unsigned int i; + + for (i = 0; i < signature->element_count; ++i) + { + e = &signature->elements[i]; + + if (e->target_location == SIGNATURE_TARGET_LOCATION_UNUSED) + continue; + + type = e->component_type; + shader_glsl_print_indent(buffer, gen->indent); + if (e->sysval_semantic == VKD3D_SHADER_SV_NONE) + { + if (gen->interstage_output) + { + type = VKD3D_SHADER_COMPONENT_FLOAT; + vkd3d_string_buffer_printf(buffer, "shader_out.reg_%u", e->target_location); + if (e->target_location >= gen->limits.output_count) + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, + "Internal compiler error: Output element %u specifies target location %u, " + "but only %u outputs are supported.", + i, e->target_location, gen->limits.output_count); + } + else + { + vkd3d_string_buffer_printf(buffer, "", e->target_location); + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, + "Internal compiler error: Unhandled output."); + } + } + else + { + shader_glsl_print_sysval_name(buffer, gen, e->sysval_semantic, e->semantic_index); + } + shader_glsl_print_write_mask(buffer, e->mask); + switch (type) + { + case VKD3D_SHADER_COMPONENT_UINT: + vkd3d_string_buffer_printf(buffer, " = floatBitsToUint(%s_out[%u])", gen->prefix, e->register_index); + break; + default: + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, + "Internal compiler error: Unhandled output component type %#x.", e->component_type); + /* fall through */ + case VKD3D_SHADER_COMPONENT_FLOAT: + vkd3d_string_buffer_printf(buffer, " = %s_out[%u]", gen->prefix, e->register_index); + break; + } + shader_glsl_print_write_mask(buffer, e->mask); + vkd3d_string_buffer_printf(buffer, ";\n"); + } +} + +static void shader_glsl_ret(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_instruction *ins) +{ + const struct vkd3d_shader_version *version = &gen->program->shader_version; + + if (version->major >= 4) + { + shader_glsl_shader_epilogue(gen); + shader_glsl_print_indent(gen->buffer, gen->indent); + vkd3d_string_buffer_printf(gen->buffer, "return;\n"); + } +} + +static void shader_glsl_dcl_indexable_temp(struct vkd3d_glsl_generator *gen, + const struct vkd3d_shader_instruction *ins) +{ + shader_glsl_print_indent(gen->buffer, gen->indent); + vkd3d_string_buffer_printf(gen->buffer, "vec4 x%u[%u];\n", + ins->declaration.indexable_temp.register_idx, + ins->declaration.indexable_temp.register_size); +} + +static void vkd3d_glsl_handle_instruction(struct vkd3d_glsl_generator *gen, + const struct vkd3d_shader_instruction *ins) +{ + gen->location = ins->location; + + switch (ins->opcode) + { + case VKD3DSIH_ADD: + case VKD3DSIH_IADD: + shader_glsl_binop(gen, ins, "+"); + break; + case VKD3DSIH_AND: + shader_glsl_binop(gen, ins, "&"); + break; + case VKD3DSIH_BREAK: + shader_glsl_break(gen); + break; + case VKD3DSIH_CASE: + shader_glsl_case(gen, ins); + break; + case VKD3DSIH_CONTINUE: + shader_glsl_continue(gen); + break; + case VKD3DSIH_DCL_INDEXABLE_TEMP: + shader_glsl_dcl_indexable_temp(gen, ins); + break; + case VKD3DSIH_DCL_INPUT: + case VKD3DSIH_DCL_INPUT_PS: + case VKD3DSIH_DCL_INPUT_PS_SGV: + case VKD3DSIH_DCL_INPUT_PS_SIV: + case VKD3DSIH_DCL_INPUT_SGV: + case VKD3DSIH_DCL_OUTPUT: + case VKD3DSIH_DCL_OUTPUT_SIV: + case VKD3DSIH_NOP: + break; + case VKD3DSIH_DEFAULT: + shader_glsl_default(gen); + break; + case VKD3DSIH_DIV: + shader_glsl_binop(gen, ins, "/"); + break; + case VKD3DSIH_DP2: + shader_glsl_dot(gen, ins, vkd3d_write_mask_from_component_count(2)); + break; + case VKD3DSIH_DP3: + shader_glsl_dot(gen, ins, vkd3d_write_mask_from_component_count(3)); + break; + case VKD3DSIH_DP4: + shader_glsl_dot(gen, ins, VKD3DSP_WRITEMASK_ALL); + break; + case VKD3DSIH_ELSE: + shader_glsl_else(gen, ins); + break; + case VKD3DSIH_ENDIF: + case VKD3DSIH_ENDLOOP: + case VKD3DSIH_ENDSWITCH: + shader_glsl_end_block(gen); + break; + case VKD3DSIH_EQO: + case VKD3DSIH_IEQ: + shader_glsl_relop(gen, ins, "==", "equal"); + break; + case VKD3DSIH_EXP: + shader_glsl_intrinsic(gen, ins, "exp2"); + break; + case VKD3DSIH_FRC: + shader_glsl_intrinsic(gen, ins, "fract"); + break; + case VKD3DSIH_FTOI: + shader_glsl_cast(gen, ins, "int", "ivec"); + break; + case VKD3DSIH_FTOU: + shader_glsl_cast(gen, ins, "uint", "uvec"); + break; + case VKD3DSIH_GEO: + case VKD3DSIH_IGE: + shader_glsl_relop(gen, ins, ">=", "greaterThanEqual"); + break; + case VKD3DSIH_IF: + shader_glsl_if(gen, ins); + break; + case VKD3DSIH_MAD: + shader_glsl_intrinsic(gen, ins, "fma"); + break; + case VKD3DSIH_ILT: + case VKD3DSIH_LTO: + case VKD3DSIH_ULT: + shader_glsl_relop(gen, ins, "<", "lessThan"); + break; + case VKD3DSIH_IMAX: + case VKD3DSIH_MAX: + shader_glsl_intrinsic(gen, ins, "max"); + break; + case VKD3DSIH_MIN: + shader_glsl_intrinsic(gen, ins, "min"); + break; + case VKD3DSIH_IMUL: + shader_glsl_mul_extended(gen, ins); + break; + case VKD3DSIH_INE: + case VKD3DSIH_NEU: + shader_glsl_relop(gen, ins, "!=", "notEqual"); + break; + case VKD3DSIH_INEG: + shader_glsl_unary_op(gen, ins, "-"); + break; + case VKD3DSIH_ISHL: + shader_glsl_binop(gen, ins, "<<"); + break; + case VKD3DSIH_ISHR: + case VKD3DSIH_USHR: + shader_glsl_binop(gen, ins, ">>"); + break; + case VKD3DSIH_ITOF: + case VKD3DSIH_UTOF: + shader_glsl_cast(gen, ins, "float", "vec"); + break; + case VKD3DSIH_LD: + shader_glsl_ld(gen, ins); + break; + case VKD3DSIH_LOG: + shader_glsl_intrinsic(gen, ins, "log2"); + break; + case VKD3DSIH_LOOP: + shader_glsl_loop(gen); + break; + case VKD3DSIH_MOV: + shader_glsl_mov(gen, ins); + break; + case VKD3DSIH_MOVC: + shader_glsl_movc(gen, ins); + break; + case VKD3DSIH_MUL: + shader_glsl_binop(gen, ins, "*"); + break; + case VKD3DSIH_NOT: + shader_glsl_unary_op(gen, ins, "~"); + break; + case VKD3DSIH_OR: + shader_glsl_binop(gen, ins, "|"); + break; + case VKD3DSIH_RET: + shader_glsl_ret(gen, ins); + break; + case VKD3DSIH_ROUND_NE: + shader_glsl_intrinsic(gen, ins, "roundEven"); + break; + case VKD3DSIH_ROUND_NI: + shader_glsl_intrinsic(gen, ins, "floor"); + break; + case VKD3DSIH_ROUND_PI: + shader_glsl_intrinsic(gen, ins, "ceil"); + break; + case VKD3DSIH_ROUND_Z: + shader_glsl_intrinsic(gen, ins, "trunc"); + break; + case VKD3DSIH_RSQ: + shader_glsl_intrinsic(gen, ins, "inversesqrt"); + break; + case VKD3DSIH_SAMPLE: + shader_glsl_sample(gen, ins); + break; + case VKD3DSIH_SQRT: + shader_glsl_intrinsic(gen, ins, "sqrt"); + break; + case VKD3DSIH_STORE_UAV_TYPED: + shader_glsl_store_uav_typed(gen, ins); + break; + case VKD3DSIH_SWITCH: + shader_glsl_switch(gen, ins); + break; + default: + shader_glsl_unhandled(gen, ins); + break; + } +} + +static bool shader_glsl_check_shader_visibility(const struct vkd3d_glsl_generator *gen, + enum vkd3d_shader_visibility visibility) +{ + enum vkd3d_shader_type t = gen->program->shader_version.type; + + switch (visibility) + { + case VKD3D_SHADER_VISIBILITY_ALL: + return true; + case VKD3D_SHADER_VISIBILITY_VERTEX: + return t == VKD3D_SHADER_TYPE_VERTEX; + case VKD3D_SHADER_VISIBILITY_HULL: + return t == VKD3D_SHADER_TYPE_HULL; + case VKD3D_SHADER_VISIBILITY_DOMAIN: + return t == VKD3D_SHADER_TYPE_DOMAIN; + case VKD3D_SHADER_VISIBILITY_GEOMETRY: + return t == VKD3D_SHADER_TYPE_GEOMETRY; + case VKD3D_SHADER_VISIBILITY_PIXEL: + return t == VKD3D_SHADER_TYPE_PIXEL; + case VKD3D_SHADER_VISIBILITY_COMPUTE: + return t == VKD3D_SHADER_TYPE_COMPUTE; + default: + WARN("Invalid shader visibility %#x.\n", visibility); + return false; + } +} + +static bool shader_glsl_get_uav_binding(const struct vkd3d_glsl_generator *gen, unsigned int register_space, + unsigned int register_idx, enum vkd3d_shader_resource_type resource_type, unsigned int *binding_idx) +{ + const struct vkd3d_shader_interface_info *interface_info = gen->interface_info; + const struct vkd3d_shader_resource_binding *binding; + enum vkd3d_shader_binding_flag resource_type_flag; + unsigned int i; + + if (!interface_info) + return false; + + resource_type_flag = resource_type == VKD3D_SHADER_RESOURCE_BUFFER + ? VKD3D_SHADER_BINDING_FLAG_BUFFER : VKD3D_SHADER_BINDING_FLAG_IMAGE; + + for (i = 0; i < interface_info->binding_count; ++i) + { + binding = &interface_info->bindings[i]; + + if (binding->type != VKD3D_SHADER_DESCRIPTOR_TYPE_UAV) + continue; + if (binding->register_space != register_space) + continue; + if (binding->register_index != register_idx) + continue; + if (!shader_glsl_check_shader_visibility(gen, binding->shader_visibility)) + continue; + if (!(binding->flags & resource_type_flag)) + continue; + *binding_idx = i; + return true; + } + + return false; +} + +static void shader_glsl_generate_uav_declaration(struct vkd3d_glsl_generator *gen, + const struct vkd3d_shader_descriptor_info1 *uav) +{ + const struct glsl_resource_type_info *resource_type_info; + const char *image_type_prefix, *image_type, *read_format; + const struct vkd3d_shader_descriptor_binding *binding; + const struct vkd3d_shader_descriptor_offset *offset; + struct vkd3d_string_buffer *buffer = gen->buffer; + enum vkd3d_shader_component_type component_type; + unsigned int binding_idx; + + if (uav->count != 1) + { + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_UNSUPPORTED, + "UAV %u has unsupported descriptor array size %u.", uav->register_id, uav->count); + return; + } + + if (!shader_glsl_get_uav_binding(gen, uav->register_space, + uav->register_index, uav->resource_type, &binding_idx)) + { + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_BINDING_NOT_FOUND, + "No descriptor binding specified for UAV %u.", uav->register_id); + return; + } + + binding = &gen->interface_info->bindings[binding_idx].binding; + + if (binding->set != 0) + { + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_BINDING_NOT_FOUND, + "Unsupported binding set %u specified for UAV %u.", binding->set, uav->register_id); + return; + } + + if (binding->count != 1) + { + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_BINDING_NOT_FOUND, + "Unsupported binding count %u specified for UAV %u.", binding->count, uav->register_id); + return; + } + + if (gen->offset_info && gen->offset_info->binding_offsets) + { + offset = &gen->offset_info->binding_offsets[binding_idx]; + if (offset->static_offset || offset->dynamic_offset_index != ~0u) + { + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, + "Internal compiler error: Unhandled descriptor offset specified for UAV %u.", + uav->register_id); + return; + } + } + + if ((resource_type_info = shader_glsl_get_resource_type_info(uav->resource_type))) + { + image_type = resource_type_info->type_suffix; + } + else + { + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, + "Internal compiler error: Unhandled UAV type %#x.", uav->resource_type); + image_type = ""; + } + + switch ((component_type = vkd3d_component_type_from_resource_data_type(uav->resource_data_type))) + { + case VKD3D_SHADER_COMPONENT_UINT: + image_type_prefix = "u"; + read_format = "r32ui"; + break; + case VKD3D_SHADER_COMPONENT_INT: + image_type_prefix = "i"; + read_format = "r32i"; + break; + default: + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, + "Internal compiler error: Unhandled component type %#x for UAV %u.", + component_type, uav->register_id); + /* fall through */ + case VKD3D_SHADER_COMPONENT_FLOAT: + image_type_prefix = ""; + read_format = "r32f"; + break; + } + + vkd3d_string_buffer_printf(buffer, "layout(binding = %u", binding->binding); + if (uav->flags & VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_READ) + vkd3d_string_buffer_printf(buffer, ", %s) ", read_format); + else + vkd3d_string_buffer_printf(buffer, ") writeonly "); + vkd3d_string_buffer_printf(buffer, "uniform %simage%s ", image_type_prefix, image_type); + shader_glsl_print_image_name(buffer, gen, uav->register_index, uav->register_space); + vkd3d_string_buffer_printf(buffer, ";\n"); +} + +static bool shader_glsl_get_cbv_binding(const struct vkd3d_glsl_generator *gen, + unsigned int register_space, unsigned int register_idx, unsigned int *binding_idx) +{ + const struct vkd3d_shader_interface_info *interface_info = gen->interface_info; + const struct vkd3d_shader_resource_binding *binding; + unsigned int i; + + if (!interface_info) + return false; + + for (i = 0; i < interface_info->binding_count; ++i) + { + binding = &interface_info->bindings[i]; + + if (binding->type != VKD3D_SHADER_DESCRIPTOR_TYPE_CBV) + continue; + if (binding->register_space != register_space) + continue; + if (binding->register_index != register_idx) + continue; + if (!shader_glsl_check_shader_visibility(gen, binding->shader_visibility)) + continue; + if (!(binding->flags & VKD3D_SHADER_BINDING_FLAG_BUFFER)) + continue; + *binding_idx = i; + return true; + } + + return false; +} + +static void shader_glsl_generate_cbv_declaration(struct vkd3d_glsl_generator *gen, + const struct vkd3d_shader_descriptor_info1 *cbv) +{ + const struct vkd3d_shader_descriptor_binding *binding; + const struct vkd3d_shader_descriptor_offset *offset; + struct vkd3d_string_buffer *buffer = gen->buffer; + const char *prefix = gen->prefix; + unsigned int binding_idx; + size_t size; + + if (cbv->count != 1) + { + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_UNSUPPORTED, + "Constant buffer %u has unsupported descriptor array size %u.", cbv->register_id, cbv->count); + return; + } + + if (!shader_glsl_get_cbv_binding(gen, cbv->register_space, cbv->register_index, &binding_idx)) + { + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_BINDING_NOT_FOUND, + "No descriptor binding specified for constant buffer %u.", cbv->register_id); + return; + } + + binding = &gen->interface_info->bindings[binding_idx].binding; + + if (binding->set != 0) + { + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_BINDING_NOT_FOUND, + "Unsupported binding set %u specified for constant buffer %u.", binding->set, cbv->register_id); + return; + } + + if (binding->count != 1) + { + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_BINDING_NOT_FOUND, + "Unsupported binding count %u specified for constant buffer %u.", binding->count, cbv->register_id); + return; + } + + if (gen->offset_info && gen->offset_info->binding_offsets) + { + offset = &gen->offset_info->binding_offsets[binding_idx]; + if (offset->static_offset || offset->dynamic_offset_index != ~0u) + { + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, + "Internal compiler error: Unhandled descriptor offset specified for constant buffer %u.", + cbv->register_id); + return; + } + } + + size = align(cbv->buffer_size, VKD3D_VEC4_SIZE * sizeof(uint32_t)); + size /= VKD3D_VEC4_SIZE * sizeof(uint32_t); + + vkd3d_string_buffer_printf(buffer, + "layout(std140, binding = %u) uniform block_%s_cb_%u { vec4 %s_cb_%u[%zu]; };\n", + binding->binding, prefix, cbv->register_id, prefix, cbv->register_id, size); +} + +static bool shader_glsl_get_combined_sampler_binding(const struct vkd3d_glsl_generator *gen, + const struct vkd3d_shader_combined_resource_sampler_info *crs, + enum vkd3d_shader_resource_type resource_type, unsigned int *binding_idx) +{ + const struct vkd3d_shader_interface_info *interface_info = gen->interface_info; + const struct vkd3d_shader_combined_resource_sampler *s; + enum vkd3d_shader_binding_flag resource_type_flag; + unsigned int i; + + if (!interface_info) + return false; + + resource_type_flag = resource_type == VKD3D_SHADER_RESOURCE_BUFFER + ? VKD3D_SHADER_BINDING_FLAG_BUFFER : VKD3D_SHADER_BINDING_FLAG_IMAGE; + + for (i = 0; i < interface_info->combined_sampler_count; ++i) + { + s = &interface_info->combined_samplers[i]; + + if (s->resource_space != crs->resource_space) + continue; + if (s->resource_index != crs->resource_index) + continue; + if (crs->sampler_index != VKD3D_SHADER_DUMMY_SAMPLER_INDEX) + { + if (s->sampler_space != crs->sampler_space) + continue; + if (s->sampler_index != crs->sampler_index) + continue; + } + if (!shader_glsl_check_shader_visibility(gen, s->shader_visibility)) + continue; + if (!(s->flags & resource_type_flag)) + continue; + *binding_idx = i; + return true; + } + + return false; +} + +static void shader_glsl_generate_sampler_declaration(struct vkd3d_glsl_generator *gen, + const struct vkd3d_shader_combined_resource_sampler_info *crs) +{ + const struct vkd3d_shader_descriptor_info1 *sampler, *srv; + const struct glsl_resource_type_info *resource_type_info; + const struct vkd3d_shader_descriptor_binding *binding; + struct vkd3d_string_buffer *buffer = gen->buffer; + enum vkd3d_shader_component_type component_type; + const char *sampler_type, *sampler_type_prefix; + unsigned int binding_idx; + bool shadow = false; + + if (crs->sampler_index != VKD3D_SHADER_DUMMY_SAMPLER_INDEX) + { + if (!(sampler = shader_glsl_get_descriptor(gen, VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER, + crs->sampler_index, crs->sampler_space))) + { + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, + "Internal compiler error: No descriptor found for sampler %u, space %u.", + crs->sampler_index, crs->sampler_space); + return; + } + shadow = sampler->flags & VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_SAMPLER_COMPARISON_MODE; + } + + if (!(srv = shader_glsl_get_descriptor(gen, VKD3D_SHADER_DESCRIPTOR_TYPE_SRV, + crs->resource_index, crs->resource_space))) + { + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, + "Internal compiler error: No descriptor found for resource %u, space %u.", + crs->resource_index, crs->resource_space); + return; + } + + if ((resource_type_info = shader_glsl_get_resource_type_info(srv->resource_type))) + { + sampler_type = resource_type_info->type_suffix; + if (shadow && !resource_type_info->shadow) + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_UNSUPPORTED, + "Comparison samplers are not supported with resource type %#x.", srv->resource_type); + } + else + { + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, + "Internal compiler error: Unhandled resource type %#x for combined resource/sampler " + "for resource %u, space %u and sampler %u, space %u.", srv->resource_type, + crs->resource_index, crs->resource_space, crs->sampler_index, crs->sampler_space); + sampler_type = ""; + } + + switch ((component_type = vkd3d_component_type_from_resource_data_type(srv->resource_data_type))) + { + case VKD3D_SHADER_COMPONENT_UINT: + sampler_type_prefix = "u"; + break; + case VKD3D_SHADER_COMPONENT_INT: + sampler_type_prefix = "i"; + break; + case VKD3D_SHADER_COMPONENT_FLOAT: + sampler_type_prefix = ""; + break; + default: + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, + "Internal compiler error: Unhandled component type %#x for combined resource/sampler " + "for resource %u, space %u and sampler %u, space %u.", component_type, + crs->resource_index, crs->resource_space, crs->sampler_index, crs->sampler_space); + sampler_type_prefix = ""; + break; + } + + if (!shader_glsl_get_combined_sampler_binding(gen, crs, srv->resource_type, &binding_idx)) + { + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_BINDING_NOT_FOUND, + "No descriptor binding specified for combined resource/sampler " + "for resource %u, space %u and sampler %u, space %u.", + crs->resource_index, crs->resource_space, crs->sampler_index, crs->sampler_space); + return; + } + + binding = &gen->interface_info->combined_samplers[binding_idx].binding; + + if (binding->set != 0) + { + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_BINDING_NOT_FOUND, + "Unsupported binding set %u specified for combined resource/sampler " + "for resource %u, space %u and sampler %u, space %u.", binding->set, + crs->resource_index, crs->resource_space, crs->sampler_index, crs->sampler_space); + return; + } + + if (binding->count != 1) + { + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_BINDING_NOT_FOUND, + "Unsupported binding count %u specified for combined resource/sampler " + "for resource %u, space %u and sampler %u, space %u.", binding->count, + crs->resource_index, crs->resource_space, crs->sampler_index, crs->sampler_space); + return; + } + + vkd3d_string_buffer_printf(buffer, "layout(binding = %u) uniform %ssampler%s%s ", + binding->binding, sampler_type_prefix, sampler_type, shadow ? "Shadow" : ""); + shader_glsl_print_combined_sampler_name(buffer, gen, crs->resource_index, + crs->resource_space, crs->sampler_index, crs->sampler_space); + vkd3d_string_buffer_printf(buffer, ";\n"); +} + +static void shader_glsl_generate_descriptor_declarations(struct vkd3d_glsl_generator *gen) +{ + const struct vkd3d_shader_scan_combined_resource_sampler_info *sampler_info = gen->combined_sampler_info; + const struct vkd3d_shader_scan_descriptor_info1 *info = gen->descriptor_info; + const struct vkd3d_shader_descriptor_info1 *descriptor; + unsigned int i; + + for (i = 0; i < info->descriptor_count; ++i) + { + descriptor = &info->descriptors[i]; + + switch (descriptor->type) + { + case VKD3D_SHADER_DESCRIPTOR_TYPE_SRV: + case VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER: + /* GLSL uses combined resource/sampler descriptors.*/ + break; + + case VKD3D_SHADER_DESCRIPTOR_TYPE_UAV: + shader_glsl_generate_uav_declaration(gen, descriptor); + break; + + case VKD3D_SHADER_DESCRIPTOR_TYPE_CBV: + shader_glsl_generate_cbv_declaration(gen, descriptor); + break; + + default: + vkd3d_string_buffer_printf(gen->buffer, "/* */\n", descriptor->type); + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, + "Internal compiler error: Unhandled descriptor type %#x.", descriptor->type); + break; + } + } + for (i = 0; i < sampler_info->combined_sampler_count; ++i) + { + shader_glsl_generate_sampler_declaration(gen, &sampler_info->combined_samplers[i]); + } + if (info->descriptor_count) + vkd3d_string_buffer_printf(gen->buffer, "\n"); +} + +static const struct signature_element *signature_get_element_by_location( + const struct shader_signature *signature, unsigned int location) +{ + const struct signature_element *e; + unsigned int i; + + for (i = 0; i < signature->element_count; ++i) + { + e = &signature->elements[i]; + + if (e->target_location != location) + continue; + + return e; + } + + return NULL; +} + +static const char *shader_glsl_get_interpolation(struct vkd3d_glsl_generator *gen, + const struct shader_signature *signature, const char *type, unsigned int location) +{ + enum vkd3d_shader_interpolation_mode m; + const struct signature_element *e; + + if ((e = signature_get_element_by_location(signature, location))) + m = e->interpolation_mode; + else + m = VKD3DSIM_NONE; + + switch (m) + { + case VKD3DSIM_NONE: + case VKD3DSIM_LINEAR: + return ""; + case VKD3DSIM_CONSTANT: + return "flat "; + default: + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, + "Internal compiler error: Unhandled interpolation mode %#x for %s location %u.", m, type, location); + return ""; + } +} + +static void shader_glsl_generate_interface_block(struct vkd3d_glsl_generator *gen, + const struct shader_signature *signature, const char *type, unsigned int count) +{ + struct vkd3d_string_buffer *buffer = gen->buffer; + const char *interpolation; + unsigned int i; + + vkd3d_string_buffer_printf(buffer, "%s shader_in_out\n{\n", type); + for (i = 0; i < count; ++i) + { + interpolation = shader_glsl_get_interpolation(gen, signature, type, i); + vkd3d_string_buffer_printf(buffer, " %svec4 reg_%u;\n", interpolation, i); + } + vkd3d_string_buffer_printf(buffer, "} shader_%s;\n", type); +} + +static void shader_glsl_generate_input_declarations(struct vkd3d_glsl_generator *gen) +{ + const struct shader_signature *signature = &gen->program->input_signature; + struct vkd3d_string_buffer *buffer = gen->buffer; + const struct signature_element *e; + unsigned int i, count; + + if (!gen->interstage_input) + { + for (i = 0, count = 0; i < signature->element_count; ++i) + { + e = &signature->elements[i]; + + if (e->target_location == SIGNATURE_TARGET_LOCATION_UNUSED || e->sysval_semantic) + continue; + + if (e->min_precision != VKD3D_SHADER_MINIMUM_PRECISION_NONE) + { + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, + "Internal compiler error: Unhandled minimum precision %#x.", e->min_precision); + continue; + } + + if (e->interpolation_mode != VKD3DSIM_NONE) + { + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, + "Internal compiler error: Unhandled interpolation mode %#x.", e->interpolation_mode); + continue; + } + + vkd3d_string_buffer_printf(buffer, "layout(location = %u) in ", e->target_location); + switch (e->component_type) + { + case VKD3D_SHADER_COMPONENT_UINT: + vkd3d_string_buffer_printf(buffer, "uvec4"); + break; + case VKD3D_SHADER_COMPONENT_INT: + vkd3d_string_buffer_printf(buffer, "ivec4"); + break; + case VKD3D_SHADER_COMPONENT_FLOAT: + vkd3d_string_buffer_printf(buffer, "vec4"); + break; + default: + vkd3d_string_buffer_printf(buffer, "", e->component_type); + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, + "Internal compiler error: Unhandled input component type %#x.", e->component_type); + break; + } + vkd3d_string_buffer_printf(buffer, " shader_in_%u;\n", i); + ++count; + } + if (count) + vkd3d_string_buffer_printf(buffer, "\n"); + } + else if (gen->limits.input_count) + { + shader_glsl_generate_interface_block(gen, signature, "in", gen->limits.input_count); + vkd3d_string_buffer_printf(buffer, "\n"); + } +} + +static void shader_glsl_generate_output_declarations(struct vkd3d_glsl_generator *gen) +{ + const struct shader_signature *signature = &gen->program->output_signature; + struct vkd3d_string_buffer *buffer = gen->buffer; + const struct signature_element *e; + unsigned int i, count; + + if (!gen->interstage_output) + { + for (i = 0, count = 0; i < signature->element_count; ++i) + { + e = &signature->elements[i]; + + if (e->target_location == SIGNATURE_TARGET_LOCATION_UNUSED) + continue; + + if (e->sysval_semantic != VKD3D_SHADER_SV_TARGET) + { + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, + "Internal compiler error: Unhandled system value %#x.", e->sysval_semantic); + continue; + } + + if (e->min_precision != VKD3D_SHADER_MINIMUM_PRECISION_NONE) + { + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, + "Internal compiler error: Unhandled minimum precision %#x.", e->min_precision); + continue; + } + + if (e->interpolation_mode != VKD3DSIM_NONE) + { + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, + "Internal compiler error: Unhandled interpolation mode %#x.", e->interpolation_mode); + continue; + } + + vkd3d_string_buffer_printf(buffer, "layout(location = %u) out ", e->target_location); + switch (e->component_type) + { + case VKD3D_SHADER_COMPONENT_UINT: + vkd3d_string_buffer_printf(buffer, "uvec4"); + break; + case VKD3D_SHADER_COMPONENT_FLOAT: + vkd3d_string_buffer_printf(buffer, "vec4"); + break; + default: + vkd3d_string_buffer_printf(buffer, "", e->component_type); + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, + "Internal compiler error: Unhandled output component type %#x.", e->component_type); + break; + } + vkd3d_string_buffer_printf(buffer, " shader_out_%u;\n", e->semantic_index); + ++count; + } + if (count) + vkd3d_string_buffer_printf(buffer, "\n"); + } + else if (gen->limits.output_count) + { + shader_glsl_generate_interface_block(gen, signature, "out", gen->limits.output_count); + vkd3d_string_buffer_printf(buffer, "\n"); + } +} + +static void shader_glsl_generate_declarations(struct vkd3d_glsl_generator *gen) +{ + const struct vsir_program *program = gen->program; + struct vkd3d_string_buffer *buffer = gen->buffer; + const struct vsir_thread_group_size *group_size; + + if (program->shader_version.type == VKD3D_SHADER_TYPE_COMPUTE) + { + group_size = &program->thread_group_size; + vkd3d_string_buffer_printf(buffer, "layout(local_size_x = %u, local_size_y = %u, local_size_z = %u) in;\n\n", + group_size->x, group_size->y, group_size->z); + } + + shader_glsl_generate_descriptor_declarations(gen); + shader_glsl_generate_input_declarations(gen); + shader_glsl_generate_output_declarations(gen); + + if (gen->limits.input_count) + vkd3d_string_buffer_printf(buffer, "vec4 %s_in[%u];\n", gen->prefix, gen->limits.input_count); + if (gen->limits.output_count) + vkd3d_string_buffer_printf(buffer, "vec4 %s_out[%u];\n", gen->prefix, gen->limits.output_count); + if (program->temp_count) + vkd3d_string_buffer_printf(buffer, "vec4 r[%u];\n", program->temp_count); + vkd3d_string_buffer_printf(buffer, "\n"); +} + +static int vkd3d_glsl_generator_generate(struct vkd3d_glsl_generator *gen, struct vkd3d_shader_code *out) +{ + const struct vkd3d_shader_instruction_array *instructions = &gen->program->instructions; + struct vkd3d_string_buffer *buffer = gen->buffer; + unsigned int i; + void *code; + + MESSAGE("Generating a GLSL shader. This is unsupported; you get to keep all the pieces if it breaks.\n"); + + vkd3d_string_buffer_printf(buffer, "#version 440\n\n"); + + vkd3d_string_buffer_printf(buffer, "/* Generated by %s. */\n\n", vkd3d_shader_get_version(NULL, NULL)); + + shader_glsl_generate_declarations(gen); + + vkd3d_string_buffer_printf(buffer, "void main()\n{\n"); + + ++gen->indent; + shader_glsl_shader_prologue(gen); + for (i = 0; i < instructions->count; ++i) + { + vkd3d_glsl_handle_instruction(gen, &instructions->elements[i]); + } + + vkd3d_string_buffer_printf(buffer, "}\n"); + + if (TRACE_ON()) + vkd3d_string_buffer_trace(buffer); + + if (gen->failed) + return VKD3D_ERROR_INVALID_SHADER; + + if ((code = vkd3d_malloc(buffer->buffer_size))) + { + memcpy(code, buffer->buffer, buffer->content_size); + out->size = buffer->content_size; + out->code = code; + } + else return VKD3D_ERROR_OUT_OF_MEMORY; + + return VKD3D_OK; +} + +static void vkd3d_glsl_generator_cleanup(struct vkd3d_glsl_generator *gen) +{ + vkd3d_string_buffer_release(&gen->string_buffers, gen->buffer); + vkd3d_string_buffer_cache_cleanup(&gen->string_buffers); +} + +static void shader_glsl_init_limits(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_version *version) +{ + struct shader_limits *limits = &gen->limits; + + if (version->major < 4 || version->major >= 6) + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, + "Internal compiler error: Unhandled shader version %u.%u.", version->major, version->minor); + + switch (version->type) + { + case VKD3D_SHADER_TYPE_VERTEX: + limits->input_count = 32; + limits->output_count = 32; + break; + case VKD3D_SHADER_TYPE_PIXEL: + limits->input_count = 32; + limits->output_count = 8; + break; + case VKD3D_SHADER_TYPE_COMPUTE: + limits->input_count = 0; + limits->output_count = 0; + break; + default: + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, + "Internal compiler error: Unhandled shader type %#x.", version->type); + limits->input_count = 0; + limits->output_count = 0; + break; + } +} + +static void vkd3d_glsl_generator_init(struct vkd3d_glsl_generator *gen, + struct vsir_program *program, const struct vkd3d_shader_compile_info *compile_info, + const struct vkd3d_shader_scan_descriptor_info1 *descriptor_info, + const struct vkd3d_shader_scan_combined_resource_sampler_info *combined_sampler_info, + struct vkd3d_shader_message_context *message_context) +{ + enum vkd3d_shader_type type = program->shader_version.type; + + memset(gen, 0, sizeof(*gen)); + gen->program = program; + vkd3d_string_buffer_cache_init(&gen->string_buffers); + gen->buffer = vkd3d_string_buffer_get(&gen->string_buffers); + gen->location.source_name = compile_info->source_name; + gen->message_context = message_context; + if (!(gen->prefix = shader_glsl_get_prefix(type))) + { + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, + "Internal compiler error: Unhandled shader type %#x.", type); + gen->prefix = "unknown"; + } + shader_glsl_init_limits(gen, &program->shader_version); + gen->interstage_input = type != VKD3D_SHADER_TYPE_VERTEX && type != VKD3D_SHADER_TYPE_COMPUTE; + gen->interstage_output = type != VKD3D_SHADER_TYPE_PIXEL && type != VKD3D_SHADER_TYPE_COMPUTE; + + gen->interface_info = vkd3d_find_struct(compile_info->next, INTERFACE_INFO); + gen->offset_info = vkd3d_find_struct(compile_info->next, DESCRIPTOR_OFFSET_INFO); + gen->descriptor_info = descriptor_info; + gen->combined_sampler_info = combined_sampler_info; +} + +int glsl_compile(struct vsir_program *program, uint64_t config_flags, + const struct vkd3d_shader_scan_descriptor_info1 *descriptor_info, + const struct vkd3d_shader_scan_combined_resource_sampler_info *combined_sampler_info, + const struct vkd3d_shader_compile_info *compile_info, + struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context) +{ + struct vkd3d_glsl_generator generator; + int ret; + + if ((ret = vsir_program_transform(program, config_flags, compile_info, message_context)) < 0) + return ret; + + VKD3D_ASSERT(program->normalised_io); + VKD3D_ASSERT(program->normalised_hull_cp_io); - vkd3d_glsl_generator_init(&generator, program, message_context); + vkd3d_glsl_generator_init(&generator, program, compile_info, + descriptor_info, combined_sampler_info, message_context); ret = vkd3d_glsl_generator_generate(&generator, out); vkd3d_glsl_generator_cleanup(&generator); diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.c b/libs/vkd3d/libs/vkd3d-shader/hlsl.c index bd5baacd83d..6ad0117fd5c 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.c +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.c @@ -254,6 +254,46 @@ bool hlsl_type_is_resource(const struct hlsl_type *type) } } +bool hlsl_type_is_shader(const struct hlsl_type *type) +{ + switch (type->class) + { + case HLSL_CLASS_ARRAY: + return hlsl_type_is_shader(type->e.array.type); + + case HLSL_CLASS_COMPUTE_SHADER: + case HLSL_CLASS_DOMAIN_SHADER: + case HLSL_CLASS_GEOMETRY_SHADER: + case HLSL_CLASS_HULL_SHADER: + case HLSL_CLASS_PIXEL_SHADER: + case HLSL_CLASS_VERTEX_SHADER: + return true; + + case HLSL_CLASS_SCALAR: + case HLSL_CLASS_VECTOR: + case HLSL_CLASS_MATRIX: + case HLSL_CLASS_STRUCT: + case HLSL_CLASS_DEPTH_STENCIL_STATE: + case HLSL_CLASS_DEPTH_STENCIL_VIEW: + case HLSL_CLASS_EFFECT_GROUP: + case HLSL_CLASS_ERROR: + case HLSL_CLASS_PASS: + case HLSL_CLASS_RASTERIZER_STATE: + case HLSL_CLASS_RENDER_TARGET_VIEW: + case HLSL_CLASS_SAMPLER: + case HLSL_CLASS_STRING: + case HLSL_CLASS_TECHNIQUE: + case HLSL_CLASS_TEXTURE: + case HLSL_CLASS_UAV: + case HLSL_CLASS_CONSTANT_BUFFER: + case HLSL_CLASS_BLEND_STATE: + case HLSL_CLASS_VOID: + case HLSL_CLASS_NULL: + return false; + } + return false; +} + /* Only intended to be used for derefs (after copies have been lowered to components or vectors) or * resources, since for both their data types span across a single regset. */ static enum hlsl_regset type_get_regset(const struct hlsl_type *type) @@ -379,6 +419,7 @@ static void hlsl_type_calculate_reg_size(struct hlsl_ctx *ctx, struct hlsl_type case HLSL_CLASS_DEPTH_STENCIL_STATE: case HLSL_CLASS_DEPTH_STENCIL_VIEW: case HLSL_CLASS_EFFECT_GROUP: + case HLSL_CLASS_ERROR: case HLSL_CLASS_PASS: case HLSL_CLASS_PIXEL_SHADER: case HLSL_CLASS_RASTERIZER_STATE: @@ -455,6 +496,7 @@ static bool type_is_single_component(const struct hlsl_type *type) { case HLSL_CLASS_DEPTH_STENCIL_STATE: case HLSL_CLASS_DEPTH_STENCIL_VIEW: + case HLSL_CLASS_ERROR: case HLSL_CLASS_PIXEL_SHADER: case HLSL_CLASS_SCALAR: case HLSL_CLASS_SAMPLER: @@ -631,6 +673,7 @@ unsigned int hlsl_type_get_component_offset(struct hlsl_ctx *ctx, struct hlsl_ty break; case HLSL_CLASS_EFFECT_GROUP: + case HLSL_CLASS_ERROR: case HLSL_CLASS_PASS: case HLSL_CLASS_TECHNIQUE: case HLSL_CLASS_VOID: @@ -930,6 +973,7 @@ static const char * get_case_insensitive_typename(const char *name) { "dword", "float", + "geometryshader", "matrix", "pixelshader", "texture", @@ -1021,6 +1065,7 @@ unsigned int hlsl_type_component_count(const struct hlsl_type *type) case HLSL_CLASS_DEPTH_STENCIL_STATE: case HLSL_CLASS_DEPTH_STENCIL_VIEW: + case HLSL_CLASS_ERROR: case HLSL_CLASS_PIXEL_SHADER: case HLSL_CLASS_RASTERIZER_STATE: case HLSL_CLASS_RENDER_TARGET_VIEW: @@ -1115,6 +1160,7 @@ bool hlsl_types_are_equal(const struct hlsl_type *t1, const struct hlsl_type *t2 case HLSL_CLASS_DEPTH_STENCIL_STATE: case HLSL_CLASS_DEPTH_STENCIL_VIEW: case HLSL_CLASS_EFFECT_GROUP: + case HLSL_CLASS_ERROR: case HLSL_CLASS_PASS: case HLSL_CLASS_PIXEL_SHADER: case HLSL_CLASS_RASTERIZER_STATE: @@ -1575,7 +1621,6 @@ struct hlsl_ir_node *hlsl_new_binary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_exp { struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {arg1, arg2}; - VKD3D_ASSERT(hlsl_types_are_equal(arg1->data_type, arg2->data_type)); return hlsl_new_expr(ctx, op, operands, arg1->data_type, &arg1->loc); } @@ -1589,6 +1634,16 @@ struct hlsl_ir_node *hlsl_new_ternary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_ex return hlsl_new_expr(ctx, op, operands, arg1->data_type, &arg1->loc); } +static struct hlsl_ir_node *hlsl_new_error_expr(struct hlsl_ctx *ctx) +{ + static const struct vkd3d_shader_location loc = {.source_name = ""}; + struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {0}; + + /* Use a dummy location; we should never report any messages related to + * this expression. */ + return hlsl_new_expr(ctx, HLSL_OP0_ERROR, operands, ctx->builtin_types.error, &loc); +} + struct hlsl_ir_node *hlsl_new_if(struct hlsl_ctx *ctx, struct hlsl_ir_node *condition, struct hlsl_block *then_block, struct hlsl_block *else_block, const struct vkd3d_shader_location *loc) { @@ -1792,6 +1847,118 @@ struct hlsl_ir_node *hlsl_new_swizzle(struct hlsl_ctx *ctx, uint32_t s, unsigned return &swizzle->node; } +struct hlsl_ir_node *hlsl_new_compile(struct hlsl_ctx *ctx, enum hlsl_compile_type compile_type, + const char *profile_name, struct hlsl_ir_node **args, unsigned int args_count, + struct hlsl_block *args_instrs, const struct vkd3d_shader_location *loc) +{ + const struct hlsl_profile_info *profile_info = NULL; + struct hlsl_ir_compile *compile; + struct hlsl_type *type = NULL; + unsigned int i; + + switch (compile_type) + { + case HLSL_COMPILE_TYPE_COMPILE: + if (!(profile_info = hlsl_get_target_info(profile_name))) + { + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_PROFILE, "Unknown profile \"%s\".", profile_name); + return NULL; + } + + if (profile_info->type == VKD3D_SHADER_TYPE_PIXEL) + type = hlsl_get_type(ctx->cur_scope, "PixelShader", true, true); + else if (profile_info->type == VKD3D_SHADER_TYPE_VERTEX) + type = hlsl_get_type(ctx->cur_scope, "VertexShader", true, true); + + if (!type) + { + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_PROFILE, "Invalid profile \"%s\".", profile_name); + return NULL; + } + + break; + + case HLSL_COMPILE_TYPE_CONSTRUCTGSWITHSO: + type = hlsl_get_type(ctx->cur_scope, "GeometryShader", true, true); + break; + } + + if (!(compile = hlsl_alloc(ctx, sizeof(*compile)))) + return NULL; + + init_node(&compile->node, HLSL_IR_COMPILE, type, loc); + + compile->compile_type = compile_type; + compile->profile = profile_info; + + hlsl_block_init(&compile->instrs); + hlsl_block_add_block(&compile->instrs, args_instrs); + + compile->args_count = args_count; + if (!(compile->args = hlsl_alloc(ctx, sizeof(*compile->args) * args_count))) + { + vkd3d_free(compile); + return NULL; + } + for (i = 0; i < compile->args_count; ++i) + hlsl_src_from_node(&compile->args[i], args[i]); + + return &compile->node; +} + +bool hlsl_state_block_add_entry(struct hlsl_state_block *state_block, + struct hlsl_state_block_entry *entry) +{ + if (!vkd3d_array_reserve((void **)&state_block->entries, + &state_block->capacity, state_block->count + 1, + sizeof(*state_block->entries))) + return false; + + state_block->entries[state_block->count++] = entry; + return true; +} + +struct hlsl_ir_node *hlsl_new_sampler_state(struct hlsl_ctx *ctx, + const struct hlsl_state_block *state_block, struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_sampler_state *sampler_state; + struct hlsl_type *type = ctx->builtin_types.sampler[HLSL_SAMPLER_DIM_GENERIC]; + + if (!(sampler_state = hlsl_alloc(ctx, sizeof(*sampler_state)))) + return NULL; + + init_node(&sampler_state->node, HLSL_IR_SAMPLER_STATE, type, loc); + + if (!(sampler_state->state_block = hlsl_alloc(ctx, sizeof(*sampler_state->state_block)))) + { + vkd3d_free(sampler_state); + return NULL; + } + + if (state_block) + { + for (unsigned int i = 0; i < state_block->count; ++i) + { + const struct hlsl_state_block_entry *src = state_block->entries[i]; + struct hlsl_state_block_entry *entry; + + if (!(entry = clone_stateblock_entry(ctx, src, src->name, src->lhs_has_index, src->lhs_index, false, 0))) + { + hlsl_free_instr(&sampler_state->node); + return NULL; + } + + if (!hlsl_state_block_add_entry(sampler_state->state_block, entry)) + { + hlsl_free_instr(&sampler_state->node); + return NULL; + } + } + } + + return &sampler_state->node; +} + struct hlsl_ir_node *hlsl_new_stateblock_constant(struct hlsl_ctx *ctx, const char *name, struct vkd3d_shader_location *loc) { @@ -2142,6 +2309,51 @@ static struct hlsl_ir_node *clone_index(struct hlsl_ctx *ctx, struct clone_instr return dst; } +static struct hlsl_ir_node *clone_compile(struct hlsl_ctx *ctx, + struct clone_instr_map *map, struct hlsl_ir_compile *compile) +{ + const char *profile_name = NULL; + struct hlsl_ir_node **args; + struct hlsl_ir_node *node; + struct hlsl_block block; + unsigned int i; + + if (!(clone_block(ctx, &block, &compile->instrs, map))) + return NULL; + + if (!(args = hlsl_alloc(ctx, sizeof(*args) * compile->args_count))) + { + hlsl_block_cleanup(&block); + return NULL; + } + for (i = 0; i < compile->args_count; ++i) + { + args[i] = map_instr(map, compile->args[i].node); + VKD3D_ASSERT(args[i]); + } + + if (compile->profile) + profile_name = compile->profile->name; + + if (!(node = hlsl_new_compile(ctx, compile->compile_type, profile_name, + args, compile->args_count, &block, &compile->node.loc))) + { + hlsl_block_cleanup(&block); + vkd3d_free(args); + return NULL; + } + + vkd3d_free(args); + return node; +} + +static struct hlsl_ir_node *clone_sampler_state(struct hlsl_ctx *ctx, + struct clone_instr_map *map, struct hlsl_ir_sampler_state *sampler_state) +{ + return hlsl_new_sampler_state(ctx, sampler_state->state_block, + &sampler_state->node.loc); +} + static struct hlsl_ir_node *clone_stateblock_constant(struct hlsl_ctx *ctx, struct clone_instr_map *map, struct hlsl_ir_stateblock_constant *constant) { @@ -2149,8 +2361,8 @@ static struct hlsl_ir_node *clone_stateblock_constant(struct hlsl_ctx *ctx, } struct hlsl_state_block_entry *clone_stateblock_entry(struct hlsl_ctx *ctx, - struct hlsl_state_block_entry *src, const char *name, bool lhs_has_index, - unsigned int lhs_index, unsigned int arg_index) + const struct hlsl_state_block_entry *src, const char *name, bool lhs_has_index, + unsigned int lhs_index, bool single_arg, unsigned int arg_index) { struct hlsl_state_block_entry *entry; struct clone_instr_map map = { 0 }; @@ -2166,7 +2378,11 @@ struct hlsl_state_block_entry *clone_stateblock_entry(struct hlsl_ctx *ctx, return NULL; } - entry->args_count = 1; + if (single_arg) + entry->args_count = 1; + else + entry->args_count = src->args_count; + if (!(entry->args = hlsl_alloc(ctx, sizeof(*entry->args) * entry->args_count))) { hlsl_free_state_block_entry(entry); @@ -2179,7 +2395,16 @@ struct hlsl_state_block_entry *clone_stateblock_entry(struct hlsl_ctx *ctx, hlsl_free_state_block_entry(entry); return NULL; } - clone_src(&map, entry->args, &src->args[arg_index]); + + if (single_arg) + { + clone_src(&map, entry->args, &src->args[arg_index]); + } + else + { + for (unsigned int i = 0; i < src->args_count; ++i) + clone_src(&map, &entry->args[i], &src->args[i]); + } vkd3d_free(map.instrs); return entry; @@ -2284,6 +2509,12 @@ static struct hlsl_ir_node *clone_instr(struct hlsl_ctx *ctx, case HLSL_IR_SWIZZLE: return clone_swizzle(ctx, map, hlsl_ir_swizzle(instr)); + case HLSL_IR_COMPILE: + return clone_compile(ctx, map, hlsl_ir_compile(instr)); + + case HLSL_IR_SAMPLER_STATE: + return clone_sampler_state(ctx, map, hlsl_ir_sampler_state(instr)); + case HLSL_IR_STATEBLOCK_CONSTANT: return clone_stateblock_constant(ctx, map, hlsl_ir_stateblock_constant(instr)); } @@ -2314,6 +2545,7 @@ struct hlsl_ir_function_decl *hlsl_new_func_decl(struct hlsl_ctx *ctx, decl->return_type = return_type; decl->parameters = *parameters; decl->loc = *loc; + list_init(&decl->extern_vars); if (!hlsl_types_are_equal(return_type, ctx->builtin_types.Void)) { @@ -2570,6 +2802,10 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru } return string; + case HLSL_CLASS_ERROR: + vkd3d_string_buffer_printf(string, ""); + return string; + case HLSL_CLASS_DEPTH_STENCIL_STATE: case HLSL_CLASS_DEPTH_STENCIL_VIEW: case HLSL_CLASS_EFFECT_GROUP: @@ -2698,6 +2934,9 @@ const char *hlsl_node_type_to_string(enum hlsl_ir_node_type type) [HLSL_IR_STORE ] = "HLSL_IR_STORE", [HLSL_IR_SWITCH ] = "HLSL_IR_SWITCH", [HLSL_IR_SWIZZLE ] = "HLSL_IR_SWIZZLE", + + [HLSL_IR_COMPILE] = "HLSL_IR_COMPILE", + [HLSL_IR_SAMPLER_STATE] = "HLSL_IR_SAMPLER_STATE", [HLSL_IR_STATEBLOCK_CONSTANT] = "HLSL_IR_STATEBLOCK_CONSTANT", }; @@ -2907,6 +3146,7 @@ const char *debug_hlsl_expr_op(enum hlsl_ir_expr_op op) { static const char *const op_names[] = { + [HLSL_OP0_ERROR] = "error", [HLSL_OP0_VOID] = "void", [HLSL_OP0_RASTERIZER_SAMPLE_COUNT] = "GetRenderTargetSampleCount", @@ -3146,6 +3386,40 @@ static void dump_ir_index(struct vkd3d_string_buffer *buffer, const struct hlsl_ vkd3d_string_buffer_printf(buffer, "]"); } +static void dump_ir_compile(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, + const struct hlsl_ir_compile *compile) +{ + unsigned int i; + + switch (compile->compile_type) + { + case HLSL_COMPILE_TYPE_COMPILE: + vkd3d_string_buffer_printf(buffer, "compile %s {\n", compile->profile->name); + break; + + case HLSL_COMPILE_TYPE_CONSTRUCTGSWITHSO: + vkd3d_string_buffer_printf(buffer, "ConstructGSWithSO {\n"); + break; + } + + dump_block(ctx, buffer, &compile->instrs); + + vkd3d_string_buffer_printf(buffer, " %10s } (", ""); + for (i = 0; i < compile->args_count; ++i) + { + dump_src(buffer, &compile->args[i]); + if (i + 1 < compile->args_count) + vkd3d_string_buffer_printf(buffer, ", "); + } + vkd3d_string_buffer_printf(buffer, ")"); +} + +static void dump_ir_sampler_state(struct vkd3d_string_buffer *buffer, + const struct hlsl_ir_sampler_state *sampler_state) +{ + vkd3d_string_buffer_printf(buffer, "sampler_state {...}"); +} + static void dump_ir_stateblock_constant(struct vkd3d_string_buffer *buffer, const struct hlsl_ir_stateblock_constant *constant) { @@ -3245,6 +3519,14 @@ static void dump_instr(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, dump_ir_swizzle(buffer, hlsl_ir_swizzle(instr)); break; + case HLSL_IR_COMPILE: + dump_ir_compile(ctx, buffer, hlsl_ir_compile(instr)); + break; + + case HLSL_IR_SAMPLER_STATE: + dump_ir_sampler_state(buffer, hlsl_ir_sampler_state(instr)); + break; + case HLSL_IR_STATEBLOCK_CONSTANT: dump_ir_stateblock_constant(buffer, hlsl_ir_stateblock_constant(instr)); break; @@ -3308,8 +3590,8 @@ void hlsl_replace_node(struct hlsl_ir_node *old, struct hlsl_ir_node *new) { struct hlsl_src *src, *next; - VKD3D_ASSERT(old->data_type->dimx == new->data_type->dimx); - VKD3D_ASSERT(old->data_type->dimy == new->data_type->dimy); + VKD3D_ASSERT(old->data_type == new->data_type || old->data_type->dimx == new->data_type->dimx); + VKD3D_ASSERT(old->data_type == new->data_type || old->data_type->dimy == new->data_type->dimy); LIST_FOR_EACH_ENTRY_SAFE(src, next, &old->uses, struct hlsl_src, entry) { @@ -3459,6 +3741,24 @@ static void free_ir_index(struct hlsl_ir_index *index) vkd3d_free(index); } +static void free_ir_compile(struct hlsl_ir_compile *compile) +{ + unsigned int i; + + for (i = 0; i < compile->args_count; ++i) + hlsl_src_remove(&compile->args[i]); + + hlsl_block_cleanup(&compile->instrs); + vkd3d_free(compile); +} + +static void free_ir_sampler_state(struct hlsl_ir_sampler_state *sampler_state) +{ + if (sampler_state->state_block) + hlsl_free_state_block(sampler_state->state_block); + vkd3d_free(sampler_state); +} + static void free_ir_stateblock_constant(struct hlsl_ir_stateblock_constant *constant) { vkd3d_free(constant->name); @@ -3527,6 +3827,14 @@ void hlsl_free_instr(struct hlsl_ir_node *node) free_ir_switch(hlsl_ir_switch(node)); break; + case HLSL_IR_COMPILE: + free_ir_compile(hlsl_ir_compile(node)); + break; + + case HLSL_IR_SAMPLER_STATE: + free_ir_sampler_state(hlsl_ir_sampler_state(node)); + break; + case HLSL_IR_STATEBLOCK_CONSTANT: free_ir_stateblock_constant(hlsl_ir_stateblock_constant(node)); break; @@ -3801,12 +4109,12 @@ static void declare_predefined_types(struct hlsl_ctx *ctx) static const char * const names[] = { - "float", - "half", - "double", - "int", - "uint", - "bool", + [HLSL_TYPE_FLOAT] = "float", + [HLSL_TYPE_HALF] = "half", + [HLSL_TYPE_DOUBLE] = "double", + [HLSL_TYPE_INT] = "int", + [HLSL_TYPE_UINT] = "uint", + [HLSL_TYPE_BOOL] = "bool", }; static const char *const variants_float[] = {"min10float", "min16float"}; @@ -3957,6 +4265,7 @@ static void declare_predefined_types(struct hlsl_ctx *ctx) ctx->builtin_types.Void = hlsl_new_simple_type(ctx, "void", HLSL_CLASS_VOID); ctx->builtin_types.null = hlsl_new_type(ctx, "NULL", HLSL_CLASS_NULL, HLSL_TYPE_UINT, 1, 1); ctx->builtin_types.string = hlsl_new_simple_type(ctx, "string", HLSL_CLASS_STRING); + ctx->builtin_types.error = hlsl_new_simple_type(ctx, "", HLSL_CLASS_ERROR); hlsl_scope_add_type(ctx->globals, ctx->builtin_types.string); hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "DepthStencilView", HLSL_CLASS_DEPTH_STENCIL_VIEW)); hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "DepthStencilState", HLSL_CLASS_DEPTH_STENCIL_STATE)); @@ -4059,6 +4368,7 @@ static bool hlsl_ctx_init(struct hlsl_ctx *ctx, const struct vkd3d_shader_compil case VKD3D_SHADER_COMPILE_OPTION_BACKWARD_COMPATIBILITY: ctx->semantic_compat_mapping = option->value & VKD3D_SHADER_COMPILE_OPTION_BACKCOMPAT_MAP_SEMANTIC_NAMES; + ctx->double_as_float_alias = option->value & VKD3D_SHADER_COMPILE_OPTION_DOUBLE_AS_FLOAT_ALIAS; break; case VKD3D_SHADER_COMPILE_OPTION_CHILD_EFFECT: @@ -4078,6 +4388,15 @@ static bool hlsl_ctx_init(struct hlsl_ctx *ctx, const struct vkd3d_shader_compil } } + if (!(ctx->error_instr = hlsl_new_error_expr(ctx))) + return false; + hlsl_block_add_instr(&ctx->static_initializers, ctx->error_instr); + + ctx->domain = VKD3D_TESSELLATOR_DOMAIN_INVALID; + ctx->output_control_point_count = UINT_MAX; + ctx->output_primitive = 0; + ctx->partitioning = 0; + return true; } @@ -4089,8 +4408,6 @@ static void hlsl_ctx_cleanup(struct hlsl_ctx *ctx) struct hlsl_type *type, *next_type; unsigned int i; - hlsl_block_cleanup(&ctx->static_initializers); - for (i = 0; i < ctx->source_files_count; ++i) vkd3d_free((void *)ctx->source_files[i]); vkd3d_free(ctx->source_files); @@ -4098,6 +4415,8 @@ static void hlsl_ctx_cleanup(struct hlsl_ctx *ctx) rb_destroy(&ctx->functions, free_function_rb, NULL); + hlsl_block_cleanup(&ctx->static_initializers); + /* State blocks must be free before the variables, because they contain instructions that may * refer to them. */ LIST_FOR_EACH_ENTRY_SAFE(scope, next_scope, &ctx->scopes, struct hlsl_scope, entry) diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h index 22e25b23988..efe3aec024b 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.h +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h @@ -22,7 +22,6 @@ #include "vkd3d_shader_private.h" #include "wine/rbtree.h" -#include "d3dcommon.h" #include "d3dx9shader.h" /* The general IR structure is inspired by Mesa GLSL hir, even though the code @@ -70,6 +69,14 @@ static inline unsigned int hlsl_swizzle_get_component(uint32_t swizzle, unsigned return (swizzle >> HLSL_SWIZZLE_SHIFT(idx)) & HLSL_SWIZZLE_MASK; } +static inline uint32_t vsir_swizzle_from_hlsl(uint32_t swizzle) +{ + return vkd3d_shader_create_swizzle(hlsl_swizzle_get_component(swizzle, 0), + hlsl_swizzle_get_component(swizzle, 1), + hlsl_swizzle_get_component(swizzle, 2), + hlsl_swizzle_get_component(swizzle, 3)); +} + enum hlsl_type_class { HLSL_CLASS_SCALAR, @@ -99,6 +106,7 @@ enum hlsl_type_class HLSL_CLASS_BLEND_STATE, HLSL_CLASS_VOID, HLSL_CLASS_NULL, + HLSL_CLASS_ERROR, }; enum hlsl_base_type @@ -316,6 +324,9 @@ enum hlsl_ir_node_type HLSL_IR_STORE, HLSL_IR_SWIZZLE, HLSL_IR_SWITCH, + + HLSL_IR_COMPILE, + HLSL_IR_SAMPLER_STATE, HLSL_IR_STATEBLOCK_CONSTANT, }; @@ -352,6 +363,9 @@ struct hlsl_block { /* List containing instruction nodes; linked by the hlsl_ir_node.entry fields. */ struct list instrs; + /* Instruction representing the "value" of this block, if applicable. + * This may point to an instruction outside of this block! */ + struct hlsl_ir_node *value; }; /* A reference to an instruction node (struct hlsl_ir_node), usable as a field in other structs. @@ -474,6 +488,8 @@ struct hlsl_ir_var * range). The IR instructions are numerated starting from 2, because 0 means unused, and 1 * means function entry. */ unsigned int first_write, last_read; + /* Whether the variable is read in any entry function. */ + bool is_read; /* Offset where the variable's value is stored within its buffer in numeric register components. * This in case the variable is uniform. */ unsigned int buffer_offset; @@ -591,10 +607,18 @@ struct hlsl_ir_function_decl unsigned int attr_count; const struct hlsl_attribute *const *attrs; + bool early_depth_test; + /* Synthetic boolean variable marking whether a return statement has been * executed. Needed to deal with return statements in non-uniform control * flow, since some backends can't handle them. */ struct hlsl_ir_var *early_return_var; + + /* List of all the extern semantic variables; linked by the + * hlsl_ir_var.extern_entry fields. This exists as a convenience because + * it is often necessary to iterate all extern variables and these can be + * declared in as function parameters, or as the function return value. */ + struct list extern_vars; }; struct hlsl_ir_call @@ -646,6 +670,7 @@ struct hlsl_ir_switch enum hlsl_ir_expr_op { + HLSL_OP0_ERROR, HLSL_OP0_VOID, HLSL_OP0_RASTERIZER_SAMPLE_COUNT, @@ -703,7 +728,7 @@ enum hlsl_ir_expr_op HLSL_OP2_SLT, /* DP2ADD(a, b, c) computes the scalar product of a.xy and b.xy, - * then adds c. */ + * then adds c, where c must have dimx=1. */ HLSL_OP3_DP2ADD, /* TERNARY(a, b, c) returns 'b' if 'a' is true and 'c' otherwise. 'a' must always be boolean. * CMP(a, b, c) returns 'b' if 'a' >= 0, and 'c' otherwise. It's used only for SM1-SM3 targets. */ @@ -854,6 +879,43 @@ struct hlsl_ir_string_constant char *string; }; +/* Represents shader compilation call for effects, such as "CompileShader()". + * + * Unlike hlsl_ir_call, it is not flattened, thus, it keeps track of its + * arguments and maintains its own instruction block. */ +struct hlsl_ir_compile +{ + struct hlsl_ir_node node; + + enum hlsl_compile_type + { + /* A shader compilation through the CompileShader() function or the "compile" syntax. */ + HLSL_COMPILE_TYPE_COMPILE, + /* A call to ConstructGSWithSO(), which receives a geometry shader and retrieves one as well. */ + HLSL_COMPILE_TYPE_CONSTRUCTGSWITHSO, + } compile_type; + + /* Special field to store the profile argument for HLSL_COMPILE_TYPE_COMPILE. */ + const struct hlsl_profile_info *profile; + + /* Block containing the instructions required by the arguments of the + * compilation call. */ + struct hlsl_block instrs; + + /* Arguments to the compilation call. For HLSL_COMPILE_TYPE_COMPILE + * args[0] is an hlsl_ir_call to the specified function. */ + struct hlsl_src *args; + unsigned int args_count; +}; + +/* Represents a state block initialized with the "sampler_state" keyword. */ +struct hlsl_ir_sampler_state +{ + struct hlsl_ir_node node; + + struct hlsl_state_block *state_block; +}; + /* Stateblock constants are undeclared values found on state blocks or technique passes descriptions, * that do not concern regular pixel, vertex, or compute shaders, except for parsing. */ struct hlsl_ir_stateblock_constant @@ -965,10 +1027,11 @@ struct hlsl_ctx struct hlsl_scope *dummy_scope; /* List of all the scopes in the program; linked by the hlsl_scope.entry fields. */ struct list scopes; - /* List of all the extern variables; linked by the hlsl_ir_var.extern_entry fields. - * This exists as a convenience because it is often necessary to iterate all extern variables - * and these can be declared in global scope, as function parameters, or as the function - * return value. */ + + /* List of all the extern variables, excluding semantic variables; linked + * by the hlsl_ir_var.extern_entry fields. This exists as a convenience + * because it is often necessary to iterate all extern variables declared + * in the global scope or as function parameters. */ struct list extern_vars; /* List containing both the built-in HLSL buffers ($Globals and $Params) and the ones declared @@ -1003,8 +1066,12 @@ struct hlsl_ctx struct hlsl_type *string; struct hlsl_type *Void; struct hlsl_type *null; + struct hlsl_type *error; } builtin_types; + /* Pre-allocated "error" expression. */ + struct hlsl_ir_node *error_instr; + /* List of the instruction nodes for initializing static variables. */ struct hlsl_block static_initializers; @@ -1016,19 +1083,23 @@ struct hlsl_ctx { uint32_t index; struct hlsl_vec4 value; + struct vkd3d_shader_location loc; } *regs; size_t count, size; } constant_defs; /* 'c' registers where the constants expected by SM2 sincos are stored. */ struct hlsl_reg d3dsincosconst1, d3dsincosconst2; - /* Number of temp. registers required for the shader to run, i.e. the largest temp register - * index that will be used in the output bytecode (+1). */ - uint32_t temp_count; /* Number of threads to be executed (on the X, Y, and Z dimensions) in a single thread group in * compute shader profiles. It is set using the numthreads() attribute in the entry point. */ uint32_t thread_count[3]; + enum vkd3d_tessellator_domain domain; + unsigned int output_control_point_count; + enum vkd3d_shader_tessellator_output_primitive output_primitive; + enum vkd3d_shader_tessellator_partitioning partitioning; + struct hlsl_ir_function_decl *patch_constant_func; + /* In some cases we generate opcodes by parsing an HLSL function and then * invoking it. If not NULL, this field is the name of the function that we * are currently parsing, "mangled" with an internal prefix to avoid @@ -1044,6 +1115,7 @@ struct hlsl_ctx bool child_effect; bool include_empty_buffers; bool warn_implicit_truncation; + bool double_as_float_alias; }; static inline bool hlsl_version_ge(const struct hlsl_ctx *ctx, unsigned int major, unsigned int minor) @@ -1149,6 +1221,18 @@ static inline struct hlsl_ir_switch *hlsl_ir_switch(const struct hlsl_ir_node *n return CONTAINING_RECORD(node, struct hlsl_ir_switch, node); } +static inline struct hlsl_ir_compile *hlsl_ir_compile(const struct hlsl_ir_node *node) +{ + VKD3D_ASSERT(node->type == HLSL_IR_COMPILE); + return CONTAINING_RECORD(node, struct hlsl_ir_compile, node); +} + +static inline struct hlsl_ir_sampler_state *hlsl_ir_sampler_state(const struct hlsl_ir_node *node) +{ + VKD3D_ASSERT(node->type == HLSL_IR_SAMPLER_STATE); + return CONTAINING_RECORD(node, struct hlsl_ir_sampler_state, node); +}; + static inline struct hlsl_ir_stateblock_constant *hlsl_ir_stateblock_constant(const struct hlsl_ir_node *node) { VKD3D_ASSERT(node->type == HLSL_IR_STATEBLOCK_CONSTANT); @@ -1158,16 +1242,19 @@ static inline struct hlsl_ir_stateblock_constant *hlsl_ir_stateblock_constant(co static inline void hlsl_block_init(struct hlsl_block *block) { list_init(&block->instrs); + block->value = NULL; } static inline void hlsl_block_add_instr(struct hlsl_block *block, struct hlsl_ir_node *instr) { list_add_tail(&block->instrs, &instr->entry); + block->value = (instr->data_type ? instr : NULL); } static inline void hlsl_block_add_block(struct hlsl_block *block, struct hlsl_block *add) { list_move_tail(&block->instrs, &add->instrs); + block->value = add->value; } static inline void hlsl_src_from_node(struct hlsl_src *src, struct hlsl_ir_node *node) @@ -1330,13 +1417,19 @@ bool hlsl_clone_block(struct hlsl_ctx *ctx, struct hlsl_block *dst_block, const void hlsl_dump_function(struct hlsl_ctx *ctx, const struct hlsl_ir_function_decl *func); void hlsl_dump_var_default_values(const struct hlsl_ir_var *var); +bool hlsl_state_block_add_entry(struct hlsl_state_block *state_block, + struct hlsl_state_block_entry *entry); bool hlsl_validate_state_block_entry(struct hlsl_ctx *ctx, struct hlsl_state_block_entry *entry, const struct vkd3d_shader_location *loc); struct hlsl_state_block_entry *clone_stateblock_entry(struct hlsl_ctx *ctx, - struct hlsl_state_block_entry *src, const char *name, bool lhs_has_index, - unsigned int lhs_index, unsigned int arg_index); + const struct hlsl_state_block_entry *src, const char *name, bool lhs_has_index, + unsigned int lhs_index, bool single_arg, unsigned int arg_index); +void hlsl_lower_index_loads(struct hlsl_ctx *ctx, struct hlsl_block *body); void hlsl_run_const_passes(struct hlsl_ctx *ctx, struct hlsl_block *body); +uint32_t allocate_temp_registers(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func); +void mark_indexable_vars(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func); +void compute_liveness(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func); int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, enum vkd3d_shader_target_type target_type, struct vkd3d_shader_code *out); int hlsl_emit_effect_binary(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out); @@ -1428,6 +1521,9 @@ bool hlsl_index_is_noncontiguous(struct hlsl_ir_index *index); bool hlsl_index_is_resource_access(struct hlsl_ir_index *index); bool hlsl_index_chain_has_resource_access(struct hlsl_ir_index *index); +struct hlsl_ir_node *hlsl_new_compile(struct hlsl_ctx *ctx, enum hlsl_compile_type compile_type, + const char *profile_name, struct hlsl_ir_node **args, unsigned int args_count, + struct hlsl_block *args_instrs, const struct vkd3d_shader_location *loc); struct hlsl_ir_node *hlsl_new_index(struct hlsl_ctx *ctx, struct hlsl_ir_node *val, struct hlsl_ir_node *idx, const struct vkd3d_shader_location *loc); struct hlsl_ir_node *hlsl_new_loop(struct hlsl_ctx *ctx, @@ -1440,6 +1536,8 @@ struct hlsl_type *hlsl_new_struct_type(struct hlsl_ctx *ctx, const char *name, struct hlsl_struct_field *fields, size_t field_count); struct hlsl_ir_node *hlsl_new_swizzle(struct hlsl_ctx *ctx, uint32_t s, unsigned int components, struct hlsl_ir_node *val, const struct vkd3d_shader_location *loc); +struct hlsl_ir_node *hlsl_new_sampler_state(struct hlsl_ctx *ctx, + const struct hlsl_state_block *state_block, struct vkd3d_shader_location *loc); struct hlsl_ir_node *hlsl_new_stateblock_constant(struct hlsl_ctx *ctx, const char *name, struct vkd3d_shader_location *loc); struct hlsl_ir_node *hlsl_new_string_constant(struct hlsl_ctx *ctx, const char *str, @@ -1493,6 +1591,7 @@ unsigned int hlsl_type_minor_size(const struct hlsl_type *type); unsigned int hlsl_type_major_size(const struct hlsl_type *type); unsigned int hlsl_type_element_count(const struct hlsl_type *type); bool hlsl_type_is_resource(const struct hlsl_type *type); +bool hlsl_type_is_shader(const struct hlsl_type *type); unsigned int hlsl_type_get_sm4_offset(const struct hlsl_type *type, unsigned int offset); bool hlsl_types_are_equal(const struct hlsl_type *t1, const struct hlsl_type *t2); @@ -1525,23 +1624,16 @@ bool hlsl_transform_ir(struct hlsl_ctx *ctx, bool (*func)(struct hlsl_ctx *ctx, D3DXPARAMETER_CLASS hlsl_sm1_class(const struct hlsl_type *type); D3DXPARAMETER_TYPE hlsl_sm1_base_type(const struct hlsl_type *type); -bool hlsl_sm1_register_from_semantic(const struct vkd3d_shader_version *version, const char *semantic_name, - unsigned int semantic_index, bool output, enum vkd3d_shader_register_type *type, unsigned int *reg); -bool hlsl_sm1_usage_from_semantic(const char *semantic_name, - uint32_t semantic_index, D3DDECLUSAGE *usage, uint32_t *usage_idx); void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer); int d3dbc_compile(struct vsir_program *program, uint64_t config_flags, const struct vkd3d_shader_compile_info *compile_info, const struct vkd3d_shader_code *ctab, + struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context); + +int tpf_compile(struct vsir_program *program, uint64_t config_flags, struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context, struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func); -bool hlsl_sm4_usage_from_semantic(struct hlsl_ctx *ctx, - const struct hlsl_semantic *semantic, bool output, D3D_NAME *usage); -bool hlsl_sm4_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, - bool output, enum vkd3d_shader_register_type *type, bool *has_idx); -int hlsl_sm4_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, struct vkd3d_shader_code *out); - struct hlsl_ir_function_decl *hlsl_compile_internal_function(struct hlsl_ctx *ctx, const char *name, const char *hlsl); int hlsl_lexer_compile(struct hlsl_ctx *ctx, const struct vkd3d_shader_code *hlsl); diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.l b/libs/vkd3d/libs/vkd3d-shader/hlsl.l index 0c02b27817e..97d8b13772b 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.l +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.l @@ -80,7 +80,9 @@ centroid {return KW_CENTROID; } column_major {return KW_COLUMN_MAJOR; } ComputeShader {return KW_COMPUTESHADER; } compile {return KW_COMPILE; } +CompileShader {return KW_COMPILESHADER; } const {return KW_CONST; } +ConstructGSWithSO {return KW_CONSTRUCTGSWITHSO; } continue {return KW_CONTINUE; } DepthStencilState {return KW_DEPTHSTENCILSTATE; } DepthStencilView {return KW_DEPTHSTENCILVIEW; } @@ -88,7 +90,6 @@ default {return KW_DEFAULT; } discard {return KW_DISCARD; } DomainShader {return KW_DOMAINSHADER; } do {return KW_DO; } -double {return KW_DOUBLE; } else {return KW_ELSE; } export {return KW_EXPORT; } extern {return KW_EXTERN; } diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.y b/libs/vkd3d/libs/vkd3d-shader/hlsl.y index 3f319dea0d8..213cec79c3d 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.y +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.y @@ -40,6 +40,7 @@ struct parse_initializer unsigned int args_count; struct hlsl_block *instrs; bool braces; + struct vkd3d_shader_location loc; }; struct parse_parameter @@ -147,7 +148,7 @@ static void yyerror(YYLTYPE *loc, void *scanner, struct hlsl_ctx *ctx, const cha static struct hlsl_ir_node *node_from_block(struct hlsl_block *block) { - return LIST_ENTRY(list_tail(&block->instrs), struct hlsl_ir_node, entry); + return block->value; } static struct hlsl_block *make_empty_block(struct hlsl_ctx *ctx) @@ -437,6 +438,9 @@ static struct hlsl_ir_node *add_implicit_conversion(struct hlsl_ctx *ctx, struct if (hlsl_types_are_equal(src_type, dst_type)) return node; + if (node->type == HLSL_IR_SAMPLER_STATE && dst_type->class == HLSL_CLASS_SAMPLER) + return node; + if (!implicit_compatible_data_types(ctx, src_type, dst_type)) { struct vkd3d_string_buffer *src_string, *dst_string; @@ -458,6 +462,40 @@ static struct hlsl_ir_node *add_implicit_conversion(struct hlsl_ctx *ctx, struct return add_cast(ctx, block, node, dst_type, loc); } +static bool add_explicit_conversion(struct hlsl_ctx *ctx, struct hlsl_block *block, + struct hlsl_type *dst_type, const struct parse_array_sizes *arrays, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *instr = node_from_block(block); + struct hlsl_type *src_type = instr->data_type; + unsigned int i; + + for (i = 0; i < arrays->count; ++i) + { + if (arrays->sizes[i] == HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT) + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Implicit size arrays not allowed in casts."); + dst_type = hlsl_new_array_type(ctx, dst_type, arrays->sizes[i]); + } + + if (instr->data_type->class == HLSL_CLASS_ERROR) + return true; + + if (!explicit_compatible_data_types(ctx, src_type, dst_type)) + { + struct vkd3d_string_buffer *src_string, *dst_string; + + src_string = hlsl_type_to_string(ctx, src_type); + dst_string = hlsl_type_to_string(ctx, dst_type); + if (src_string && dst_string) + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Can't cast from %s to %s.", + src_string->buffer, dst_string->buffer); + hlsl_release_string_buffer(ctx, src_string); + hlsl_release_string_buffer(ctx, dst_string); + return false; + } + + return add_cast(ctx, block, instr, dst_type, loc); +} + static uint32_t add_modifiers(struct hlsl_ctx *ctx, uint32_t modifiers, uint32_t mod, const struct vkd3d_shader_location *loc) { @@ -489,9 +527,10 @@ static bool append_conditional_break(struct hlsl_ctx *ctx, struct hlsl_block *co check_condition_type(ctx, condition); bool_type = hlsl_get_scalar_type(ctx, HLSL_TYPE_BOOL); - if (!(cast = hlsl_new_cast(ctx, condition, bool_type, &condition->loc))) + /* We already checked for a 1-component numeric type, so + * add_implicit_conversion() is equivalent to add_cast() here. */ + if (!(cast = add_cast(ctx, cond_block, condition, bool_type, &condition->loc))) return false; - hlsl_block_add_instr(cond_block, cast); if (!(not = hlsl_new_unary_expr(ctx, HLSL_OP1_LOGIC_NOT, cast, &condition->loc))) return false; @@ -516,7 +555,7 @@ enum loop_type LOOP_DO_WHILE }; -static bool attribute_list_has_duplicates(const struct parse_attribute_list *attrs) +static void check_attribute_list_for_duplicates(struct hlsl_ctx *ctx, const struct parse_attribute_list *attrs) { unsigned int i, j; @@ -525,11 +564,10 @@ static bool attribute_list_has_duplicates(const struct parse_attribute_list *att for (j = i + 1; j < attrs->count; ++j) { if (!strcmp(attrs->attrs[i]->name, attrs->attrs[j]->name)) - return true; + hlsl_error(ctx, &attrs->attrs[j]->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, + "Found duplicate attribute \"%s\".", attrs->attrs[j]->name); } } - - return false; } static void resolve_loop_continue(struct hlsl_ctx *ctx, struct hlsl_block *block, enum loop_type type, @@ -610,8 +648,10 @@ static struct hlsl_default_value evaluate_static_expression(struct hlsl_ctx *ctx { switch (node->type) { + case HLSL_IR_COMPILE: case HLSL_IR_CONSTANT: case HLSL_IR_EXPR: + case HLSL_IR_SAMPLER_STATE: case HLSL_IR_STRING_CONSTANT: case HLSL_IR_SWIZZLE: case HLSL_IR_LOAD: @@ -639,14 +679,15 @@ static struct hlsl_default_value evaluate_static_expression(struct hlsl_ctx *ctx return ret; hlsl_block_add_block(&expr, block); - if (!add_implicit_conversion(ctx, &expr, node_from_block(&expr), dst_type, loc)) + if (!(node = add_implicit_conversion(ctx, &expr, node_from_block(&expr), dst_type, loc))) { hlsl_block_cleanup(&expr); return ret; } /* Wrap the node into a src to allow the reference to survive the multiple const passes. */ - hlsl_src_from_node(&src, node_from_block(&expr)); + hlsl_src_from_node(&src, node); + hlsl_lower_index_loads(ctx, &expr); hlsl_run_const_passes(ctx, &expr); node = src.node; hlsl_src_remove(&src); @@ -697,9 +738,7 @@ static struct hlsl_block *create_loop(struct hlsl_ctx *ctx, enum loop_type type, unsigned int i, unroll_limit = 0; struct hlsl_ir_node *loop; - if (attribute_list_has_duplicates(attributes)) - hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, "Found duplicate attribute."); - + check_attribute_list_for_duplicates(ctx, attributes); check_loop_attributes(ctx, attributes, loc); /* Ignore unroll(0) attribute, and any invalid attribute. */ @@ -974,6 +1013,12 @@ static bool add_array_access(struct hlsl_ctx *ctx, struct hlsl_block *block, str const struct hlsl_type *expr_type = array->data_type, *index_type = index->data_type; struct hlsl_ir_node *return_index, *cast; + if (array->data_type->class == HLSL_CLASS_ERROR || index->data_type->class == HLSL_CLASS_ERROR) + { + block->value = ctx->error_instr; + return true; + } + if ((expr_type->class == HLSL_CLASS_TEXTURE || expr_type->class == HLSL_CLASS_UAV) && expr_type->sampler_dim != HLSL_SAMPLER_DIM_GENERIC) { @@ -1164,6 +1209,32 @@ static bool gen_struct_fields(struct hlsl_ctx *ctx, struct parse_fields *fields, return true; } +static bool add_record_access_recurse(struct hlsl_ctx *ctx, struct hlsl_block *block, + const char *name, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *record = node_from_block(block); + const struct hlsl_type *type = record->data_type; + const struct hlsl_struct_field *field, *base; + + if ((field = get_struct_field(type->e.record.fields, type->e.record.field_count, name))) + { + unsigned int field_idx = field - type->e.record.fields; + + return add_record_access(ctx, block, record, field_idx, loc); + } + else if ((base = get_struct_field(type->e.record.fields, type->e.record.field_count, "$super"))) + { + unsigned int base_idx = base - type->e.record.fields; + + if (!add_record_access(ctx, block, record, base_idx, loc)) + return false; + return add_record_access_recurse(ctx, block, name, loc); + } + + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_NOT_DEFINED, "Field \"%s\" is not defined.", name); + return false; +} + static bool add_typedef(struct hlsl_ctx *ctx, struct hlsl_type *const orig_type, struct list *list) { struct parse_variable_def *v, *v_next; @@ -1227,7 +1298,8 @@ static bool add_typedef(struct hlsl_ctx *ctx, struct hlsl_type *const orig_type, } static void initialize_var_components(struct hlsl_ctx *ctx, struct hlsl_block *instrs, - struct hlsl_ir_var *dst, unsigned int *store_index, struct hlsl_ir_node *src); + struct hlsl_ir_var *dst, unsigned int *store_index, struct hlsl_ir_node *src, + bool is_default_values_initializer); static bool add_func_parameter(struct hlsl_ctx *ctx, struct hlsl_func_parameters *parameters, struct parse_parameter *param, const struct vkd3d_shader_location *loc) @@ -1285,7 +1357,8 @@ static bool add_func_parameter(struct hlsl_ctx *ctx, struct hlsl_func_parameters for (i = 0; i < param->initializer.args_count; ++i) { - initialize_var_components(ctx, param->initializer.instrs, var, &store_index, param->initializer.args[i]); + initialize_var_components(ctx, param->initializer.instrs, var, + &store_index, param->initializer.args[i], true); } free_parse_initializer(¶m->initializer); @@ -1673,25 +1746,36 @@ static struct hlsl_ir_node *add_expr(struct hlsl_ctx *ctx, struct hlsl_block *bl return expr; } -static void check_integer_type(struct hlsl_ctx *ctx, const struct hlsl_ir_node *instr) +static bool type_is_integer(enum hlsl_base_type type) { - const struct hlsl_type *type = instr->data_type; - struct vkd3d_string_buffer *string; - - switch (type->e.numeric.type) + switch (type) { case HLSL_TYPE_BOOL: case HLSL_TYPE_INT: case HLSL_TYPE_UINT: - break; + return true; - default: - if ((string = hlsl_type_to_string(ctx, type))) - hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, - "Expression type '%s' is not integer.", string->buffer); - hlsl_release_string_buffer(ctx, string); - break; + case HLSL_TYPE_DOUBLE: + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_HALF: + return false; } + + vkd3d_unreachable(); +} + +static void check_integer_type(struct hlsl_ctx *ctx, const struct hlsl_ir_node *instr) +{ + const struct hlsl_type *type = instr->data_type; + struct vkd3d_string_buffer *string; + + if (type_is_integer(type->e.numeric.type)) + return; + + if ((string = hlsl_type_to_string(ctx, type))) + hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Expression type '%s' is not integer.", string->buffer); + hlsl_release_string_buffer(ctx, string); } static struct hlsl_ir_node *add_unary_arithmetic_expr(struct hlsl_ctx *ctx, struct hlsl_block *block, @@ -1699,12 +1783,18 @@ static struct hlsl_ir_node *add_unary_arithmetic_expr(struct hlsl_ctx *ctx, stru { struct hlsl_ir_node *args[HLSL_MAX_OPERANDS] = {arg}; + if (arg->data_type->class == HLSL_CLASS_ERROR) + return arg; + return add_expr(ctx, block, op, args, arg->data_type, loc); } static struct hlsl_ir_node *add_unary_bitwise_expr(struct hlsl_ctx *ctx, struct hlsl_block *block, enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg, const struct vkd3d_shader_location *loc) { + if (arg->data_type->class == HLSL_CLASS_ERROR) + return arg; + check_integer_type(ctx, arg); return add_unary_arithmetic_expr(ctx, block, op, arg, loc); @@ -1716,6 +1806,9 @@ static struct hlsl_ir_node *add_unary_logical_expr(struct hlsl_ctx *ctx, struct struct hlsl_ir_node *args[HLSL_MAX_OPERANDS] = {0}; struct hlsl_type *bool_type; + if (arg->data_type->class == HLSL_CLASS_ERROR) + return arg; + bool_type = hlsl_get_numeric_type(ctx, arg->data_type->class, HLSL_TYPE_BOOL, arg->data_type->dimx, arg->data_type->dimy); @@ -1745,7 +1838,11 @@ static struct hlsl_ir_node *add_binary_arithmetic_expr(struct hlsl_ctx *ctx, str struct hlsl_ir_node *args[HLSL_MAX_OPERANDS] = {0}; struct hlsl_type *common_type; - common_type = get_common_numeric_type(ctx, arg1, arg2, loc); + if (!(common_type = get_common_numeric_type(ctx, arg1, arg2, loc))) + { + block->value = ctx->error_instr; + return block->value; + } if (!(args[0] = add_implicit_conversion(ctx, block, arg1, common_type, loc))) return NULL; @@ -1942,6 +2039,12 @@ static struct hlsl_block *add_binary_expr_merge(struct hlsl_ctx *ctx, struct hls hlsl_block_add_block(block1, block2); destroy_block(block2); + if (arg1->data_type->class == HLSL_CLASS_ERROR || arg2->data_type->class == HLSL_CLASS_ERROR) + { + block1->value = ctx->error_instr; + return block1; + } + if (add_binary_expr(ctx, block1, op, arg1, arg2, loc) == NULL) return NULL; @@ -2048,18 +2151,23 @@ static bool invert_swizzle_matrix(uint32_t *swizzle, unsigned int *writemask, un return true; } -static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *lhs, +static bool add_assignment(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *lhs, enum parse_assign_op assign_op, struct hlsl_ir_node *rhs) { struct hlsl_type *lhs_type = lhs->data_type; - struct hlsl_ir_node *copy; unsigned int writemask = 0, width = 0; bool matrix_writemask = false; + if (lhs->data_type->class == HLSL_CLASS_ERROR || rhs->data_type->class == HLSL_CLASS_ERROR) + { + block->value = ctx->error_instr; + return true; + } + if (assign_op == ASSIGN_OP_SUB) { if (!(rhs = add_unary_arithmetic_expr(ctx, block, HLSL_OP1_NEG, rhs, &rhs->loc))) - return NULL; + return false; assign_op = ASSIGN_OP_ADD; } if (assign_op != ASSIGN_OP_ASSIGN) @@ -2068,7 +2176,7 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_blo VKD3D_ASSERT(op); if (!(rhs = add_binary_expr(ctx, block, op, lhs, rhs, &rhs->loc))) - return NULL; + return false; } if (hlsl_is_numeric_type(lhs_type)) @@ -2078,14 +2186,14 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_blo } if (!(rhs = add_implicit_conversion(ctx, block, rhs, lhs_type, &rhs->loc))) - return NULL; + return false; while (lhs->type != HLSL_IR_LOAD && lhs->type != HLSL_IR_INDEX) { if (lhs->type == HLSL_IR_EXPR && hlsl_ir_expr(lhs)->op == HLSL_OP1_CAST) { hlsl_fixme(ctx, &lhs->loc, "Cast on the LHS."); - return NULL; + return false; } else if (lhs->type == HLSL_IR_SWIZZLE) { @@ -2100,25 +2208,23 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_blo if (swizzle->val.node->type != HLSL_IR_LOAD && swizzle->val.node->type != HLSL_IR_INDEX) { hlsl_fixme(ctx, &lhs->loc, "Unhandled source of matrix swizzle."); - return NULL; + return false; } if (!invert_swizzle_matrix(&s, &writemask, &width)) { hlsl_error(ctx, &lhs->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_WRITEMASK, "Invalid writemask for matrix."); - return NULL; + return false; } matrix_writemask = true; } else if (!invert_swizzle(&s, &writemask, &width)) { hlsl_error(ctx, &lhs->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_WRITEMASK, "Invalid writemask."); - return NULL; + return false; } if (!(new_swizzle = hlsl_new_swizzle(ctx, s, width, rhs, &swizzle->node.loc))) - { - return NULL; - } + return false; hlsl_block_add_instr(block, new_swizzle); lhs = swizzle->val.node; @@ -2127,7 +2233,7 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_blo else { hlsl_error(ctx, &lhs->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_LVALUE, "Invalid lvalue."); - return NULL; + return false; } } @@ -2142,11 +2248,11 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_blo if (!hlsl_index_is_resource_access(hlsl_ir_index(lhs))) { hlsl_fixme(ctx, &lhs->loc, "Non-direct structured resource store."); - return NULL; + return false; } if (!hlsl_init_deref_from_index_chain(ctx, &resource_deref, hlsl_ir_index(lhs)->val.node)) - return NULL; + return false; resource_type = hlsl_deref_get_type(ctx, &resource_deref); VKD3D_ASSERT(resource_type->class == HLSL_CLASS_TEXTURE || resource_type->class == HLSL_CLASS_UAV); @@ -2168,7 +2274,7 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_blo if (!(store = hlsl_new_resource_store(ctx, &resource_deref, coords, rhs, &lhs->loc))) { hlsl_cleanup_deref(&resource_deref); - return NULL; + return false; } hlsl_block_add_instr(block, store); hlsl_cleanup_deref(&resource_deref); @@ -2195,13 +2301,13 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_blo if (!(load = hlsl_add_load_component(ctx, block, rhs, k++, &rhs->loc))) { hlsl_cleanup_deref(&deref); - return NULL; + return false; } if (!hlsl_new_store_component(ctx, &store_block, &deref, component, load)) { hlsl_cleanup_deref(&deref); - return NULL; + return false; } hlsl_block_add_block(block, &store_block); } @@ -2226,23 +2332,23 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_blo continue; if (!(c = hlsl_new_uint_constant(ctx, i, &lhs->loc))) - return NULL; + return false; hlsl_block_add_instr(block, c); if (!(cell = hlsl_new_index(ctx, &row->node, c, &lhs->loc))) - return NULL; + return false; hlsl_block_add_instr(block, cell); if (!(load = hlsl_add_load_component(ctx, block, rhs, k++, &rhs->loc))) - return NULL; + return false; if (!hlsl_init_deref_from_index_chain(ctx, &deref, cell)) - return NULL; + return false; if (!(store = hlsl_new_store_index(ctx, &deref, NULL, load, 0, &rhs->loc))) { hlsl_cleanup_deref(&deref); - return NULL; + return false; } hlsl_block_add_instr(block, store); hlsl_cleanup_deref(&deref); @@ -2254,24 +2360,19 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_blo struct hlsl_deref deref; if (!hlsl_init_deref_from_index_chain(ctx, &deref, lhs)) - return NULL; + return false; if (!(store = hlsl_new_store_index(ctx, &deref, NULL, rhs, writemask, &rhs->loc))) { hlsl_cleanup_deref(&deref); - return NULL; + return false; } hlsl_block_add_instr(block, store); hlsl_cleanup_deref(&deref); } - /* Don't use the instruction itself as a source, as this makes structure - * splitting easier. Instead copy it here. Since we retrieve sources from - * the last instruction in the list, we do need to copy. */ - if (!(copy = hlsl_new_copy(ctx, rhs))) - return NULL; - hlsl_block_add_instr(block, copy); - return copy; + block->value = rhs; + return true; } static bool add_increment(struct hlsl_ctx *ctx, struct hlsl_block *block, bool decrement, bool post, @@ -2280,6 +2381,9 @@ static bool add_increment(struct hlsl_ctx *ctx, struct hlsl_block *block, bool d struct hlsl_ir_node *lhs = node_from_block(block); struct hlsl_ir_node *one; + if (lhs->data_type->class == HLSL_CLASS_ERROR) + return true; + if (lhs->data_type->modifiers & HLSL_MODIFIER_CONST) hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_MODIFIES_CONST, "Argument to %s%screment operator is const.", post ? "post" : "pre", decrement ? "de" : "in"); @@ -2307,57 +2411,9 @@ static bool add_increment(struct hlsl_ctx *ctx, struct hlsl_block *block, bool d return true; } -/* For some reason, for matrices, values from default value initializers end up in different - * components than from regular initializers. Default value initializers fill the matrix in - * vertical reading order (left-to-right top-to-bottom) instead of regular reading order - * (top-to-bottom left-to-right), so they have to be adjusted. - * An exception is that the order of matrix initializers for function parameters are row-major - * (top-to-bottom left-to-right). */ -static unsigned int get_component_index_from_default_initializer_index(struct hlsl_ctx *ctx, - struct hlsl_type *type, unsigned int index) -{ - unsigned int element_comp_count, element, x, y, i; - unsigned int base = 0; - - if (ctx->profile->major_version < 4) - return index; - - if (ctx->profile->type == VKD3D_SHADER_TYPE_EFFECT) - return index; - - switch (type->class) - { - case HLSL_CLASS_MATRIX: - x = index / type->dimy; - y = index % type->dimy; - return y * type->dimx + x; - - case HLSL_CLASS_ARRAY: - element_comp_count = hlsl_type_component_count(type->e.array.type); - element = index / element_comp_count; - base = element * element_comp_count; - return base + get_component_index_from_default_initializer_index(ctx, type->e.array.type, index - base); - - case HLSL_CLASS_STRUCT: - for (i = 0; i < type->e.record.field_count; ++i) - { - struct hlsl_type *field_type = type->e.record.fields[i].type; - - element_comp_count = hlsl_type_component_count(field_type); - if (index - base < element_comp_count) - return base + get_component_index_from_default_initializer_index(ctx, field_type, index - base); - base += element_comp_count; - } - break; - - default: - return index; - } - vkd3d_unreachable(); -} - static void initialize_var_components(struct hlsl_ctx *ctx, struct hlsl_block *instrs, - struct hlsl_ir_var *dst, unsigned int *store_index, struct hlsl_ir_node *src) + struct hlsl_ir_var *dst, unsigned int *store_index, struct hlsl_ir_node *src, + bool is_default_values_initializer) { unsigned int src_comp_count = hlsl_type_component_count(src->data_type); struct hlsl_deref dst_deref; @@ -2376,38 +2432,107 @@ static void initialize_var_components(struct hlsl_ctx *ctx, struct hlsl_block *i dst_comp_type = hlsl_type_get_component_type(ctx, dst->data_type, *store_index); - if (dst->default_values) + if (is_default_values_initializer) { struct hlsl_default_value default_value = {0}; - unsigned int dst_index; - - if (!hlsl_clone_block(ctx, &block, instrs)) - return; - default_value = evaluate_static_expression(ctx, &block, dst_comp_type, &src->loc); - if (dst->is_param) - dst_index = *store_index; + if (src->type == HLSL_IR_COMPILE || src->type == HLSL_IR_SAMPLER_STATE) + { + if (hlsl_is_numeric_type(dst_comp_type)) + { + /* Default values are discarded if they contain an object + * literal expression for a numeric component. */ + if (dst->default_values) + { + hlsl_warning(ctx, &src->loc, VKD3D_SHADER_WARNING_HLSL_IGNORED_DEFAULT_VALUE, + "Component %u in variable '%s' initializer is object literal. Default values discarded.", + k, dst->name); + vkd3d_free(dst->default_values); + dst->default_values = NULL; + } + } + } else - dst_index = get_component_index_from_default_initializer_index(ctx, dst->data_type, *store_index); + { + if (!hlsl_clone_block(ctx, &block, instrs)) + return; + default_value = evaluate_static_expression(ctx, &block, dst_comp_type, &src->loc); - dst->default_values[dst_index] = default_value; + if (dst->default_values) + dst->default_values[*store_index] = default_value; - hlsl_block_cleanup(&block); + hlsl_block_cleanup(&block); + } } else { - if (!(conv = add_implicit_conversion(ctx, instrs, load, dst_comp_type, &src->loc))) - return; + if (src->type == HLSL_IR_SAMPLER_STATE) + { + /* Sampler states end up in the variable's state_blocks instead of + * being used to initialize its value. */ + struct hlsl_ir_sampler_state *sampler_state = hlsl_ir_sampler_state(src); - if (!hlsl_new_store_component(ctx, &block, &dst_deref, *store_index, conv)) - return; - hlsl_block_add_block(instrs, &block); + if (dst_comp_type->class != HLSL_CLASS_SAMPLER) + { + struct vkd3d_string_buffer *dst_string; + + dst_string = hlsl_type_to_string(ctx, dst_comp_type); + if (dst_string) + hlsl_error(ctx, &src->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Cannot assign sampler_state to %s.", dst_string->buffer); + hlsl_release_string_buffer(ctx, dst_string); + return; + } + + if (!hlsl_array_reserve(ctx, (void **)&dst->state_blocks, &dst->state_block_capacity, + dst->state_block_count + 1, sizeof(*dst->state_blocks))) + return; + + dst->state_blocks[dst->state_block_count] = sampler_state->state_block; + sampler_state->state_block = NULL; + ++dst->state_block_count; + } + else + { + if (!(conv = add_implicit_conversion(ctx, instrs, load, dst_comp_type, &src->loc))) + return; + + if (!hlsl_new_store_component(ctx, &block, &dst_deref, *store_index, conv)) + return; + hlsl_block_add_block(instrs, &block); + } } ++*store_index; } } +static void initialize_var(struct hlsl_ctx *ctx, struct hlsl_ir_var *dst, + const struct parse_initializer *initializer, bool is_default_values_initializer) +{ + unsigned int store_index = 0; + + /* If any of the elements has an error type, then initializer_size() is not + * meaningful. */ + for (unsigned int i = 0; i < initializer->args_count; ++i) + { + if (initializer->args[i]->data_type->class == HLSL_CLASS_ERROR) + return; + } + + if (initializer_size(initializer) != hlsl_type_component_count(dst->data_type)) + { + hlsl_error(ctx, &initializer->loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, + "Expected %u components in initializer, but got %u.", + hlsl_type_component_count(dst->data_type), initializer_size(initializer)); + return; + } + + for (unsigned int i = 0; i < initializer->args_count; ++i) + initialize_var_components(ctx, initializer->instrs, dst, &store_index, + initializer->args[i], is_default_values_initializer); +} + static bool type_has_object_components(const struct hlsl_type *type) { if (type->class == HLSL_CLASS_ARRAY) @@ -2733,13 +2858,15 @@ static struct hlsl_block *initialize_vars(struct hlsl_ctx *ctx, struct list *var if (v->initializer.args_count) { - unsigned int store_index = 0; bool is_default_values_initializer; - unsigned int size, k; is_default_values_initializer = (ctx->cur_buffer != ctx->globals_buffer) || (var->storage_modifiers & HLSL_STORAGE_UNIFORM) || ctx->cur_scope->annotations; + if (hlsl_get_multiarray_element_type(type)->class == HLSL_CLASS_SAMPLER) + is_default_values_initializer = false; + if (hlsl_type_is_shader(type)) + is_default_values_initializer = false; if (is_default_values_initializer) { @@ -2769,19 +2896,7 @@ static struct hlsl_block *initialize_vars(struct hlsl_ctx *ctx, struct list *var v->initializer.args[0] = node_from_block(v->initializer.instrs); } - size = initializer_size(&v->initializer); - if (component_count != size) - { - hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, - "Expected %u components in initializer, but got %u.", component_count, size); - free_parse_variable_def(v); - continue; - } - - for (k = 0; k < v->initializer.args_count; ++k) - { - initialize_var_components(ctx, v->initializer.instrs, var, &store_index, v->initializer.args[k]); - } + initialize_var(ctx, var, &v->initializer, is_default_values_initializer); if (is_default_values_initializer) { @@ -2795,6 +2910,9 @@ static struct hlsl_block *initialize_vars(struct hlsl_ctx *ctx, struct list *var { hlsl_block_add_block(initializers, v->initializer.instrs); } + + if (var->state_blocks) + TRACE("Variable %s has %u state blocks.\n", var->name, var->state_block_count); } else if (var->storage_modifiers & HLSL_STORAGE_STATIC) { @@ -2835,28 +2953,36 @@ static struct hlsl_block *initialize_vars(struct hlsl_ctx *ctx, struct list *var return initializers; } -static bool func_is_compatible_match(struct hlsl_ctx *ctx, - const struct hlsl_ir_function_decl *decl, const struct parse_initializer *args) +static bool func_is_compatible_match(struct hlsl_ctx *ctx, const struct hlsl_ir_function_decl *decl, + bool is_compile, const struct parse_initializer *args) { - unsigned int i; + unsigned int i, k; - if (decl->parameters.count < args->args_count) - return false; - - for (i = 0; i < args->args_count; ++i) + k = 0; + for (i = 0; i < decl->parameters.count; ++i) { - if (!implicit_compatible_data_types(ctx, args->args[i]->data_type, decl->parameters.vars[i]->data_type)) + if (is_compile && !(decl->parameters.vars[i]->storage_modifiers & HLSL_STORAGE_UNIFORM)) + continue; + + if (k >= args->args_count) + { + if (!decl->parameters.vars[i]->default_values) + return false; + return true; + } + + if (!implicit_compatible_data_types(ctx, args->args[k]->data_type, decl->parameters.vars[i]->data_type)) return false; - } - if (args->args_count < decl->parameters.count && !decl->parameters.vars[args->args_count]->default_values) + ++k; + } + if (k < args->args_count) return false; - return true; } static struct hlsl_ir_function_decl *find_function_call(struct hlsl_ctx *ctx, - const char *name, const struct parse_initializer *args, + const char *name, const struct parse_initializer *args, bool is_compile, const struct vkd3d_shader_location *loc) { struct hlsl_ir_function_decl *decl, *compatible_match = NULL; @@ -2869,7 +2995,7 @@ static struct hlsl_ir_function_decl *find_function_call(struct hlsl_ctx *ctx, LIST_FOR_EACH_ENTRY(decl, &func->overloads, struct hlsl_ir_function_decl, entry) { - if (func_is_compatible_match(ctx, decl, args)) + if (func_is_compatible_match(ctx, decl, is_compile, args)) { if (compatible_match) { @@ -2890,26 +3016,35 @@ static struct hlsl_ir_node *hlsl_new_void_expr(struct hlsl_ctx *ctx, const struc return hlsl_new_expr(ctx, HLSL_OP0_VOID, operands, ctx->builtin_types.Void, loc); } -static bool add_user_call(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func, - const struct parse_initializer *args, const struct vkd3d_shader_location *loc) +static struct hlsl_ir_node *add_user_call(struct hlsl_ctx *ctx, + struct hlsl_ir_function_decl *func, const struct parse_initializer *args, + bool is_compile, const struct vkd3d_shader_location *loc) { struct hlsl_ir_node *call; - unsigned int i, j; + unsigned int i, j, k; VKD3D_ASSERT(args->args_count <= func->parameters.count); - for (i = 0; i < args->args_count; ++i) + k = 0; + for (i = 0; i < func->parameters.count; ++i) { struct hlsl_ir_var *param = func->parameters.vars[i]; - struct hlsl_ir_node *arg = args->args[i]; + struct hlsl_ir_node *arg; + + if (is_compile && !(param->storage_modifiers & HLSL_STORAGE_UNIFORM)) + continue; + + if (k >= args->args_count) + break; + arg = args->args[k]; if (!hlsl_types_are_equal(arg->data_type, param->data_type)) { struct hlsl_ir_node *cast; if (!(cast = add_cast(ctx, args->instrs, arg, param->data_type, &arg->loc))) - return false; - args->args[i] = cast; + return NULL; + args->args[k] = cast; arg = cast; } @@ -2918,13 +3053,15 @@ static bool add_user_call(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *fu struct hlsl_ir_node *store; if (!(store = hlsl_new_simple_store(ctx, param, arg))) - return false; + return NULL; hlsl_block_add_instr(args->instrs, store); } + + ++k; } /* Add default values for the remaining parameters. */ - for (i = args->args_count; i < func->parameters.count; ++i) + for (; i < func->parameters.count; ++i) { struct hlsl_ir_var *param = func->parameters.vars[i]; unsigned int comp_count = hlsl_type_component_count(param->data_type); @@ -2932,6 +3069,9 @@ static bool add_user_call(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *fu VKD3D_ASSERT(param->default_values); + if (is_compile && !(param->storage_modifiers & HLSL_STORAGE_UNIFORM)) + continue; + hlsl_init_simple_deref_from_var(¶m_deref, param); for (j = 0; j < comp_count; ++j) @@ -2945,20 +3085,23 @@ static bool add_user_call(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *fu { value.u[0] = param->default_values[j].number; if (!(comp = hlsl_new_constant(ctx, type, &value, loc))) - return false; + return NULL; hlsl_block_add_instr(args->instrs, comp); if (!hlsl_new_store_component(ctx, &store_block, ¶m_deref, j, comp)) - return false; + return NULL; hlsl_block_add_block(args->instrs, &store_block); } } } if (!(call = hlsl_new_call(ctx, func, loc))) - return false; + return NULL; hlsl_block_add_instr(args->instrs, call); + if (is_compile) + return call; + for (i = 0; i < args->args_count; ++i) { struct hlsl_ir_var *param = func->parameters.vars[i]; @@ -2973,11 +3116,11 @@ static bool add_user_call(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *fu "Output argument to \"%s\" is const.", func->func->name); if (!(load = hlsl_new_var_load(ctx, param, &arg->loc))) - return false; + return NULL; hlsl_block_add_instr(args->instrs, &load->node); if (!add_assignment(ctx, args->instrs, arg, ASSIGN_OP_ASSIGN, &load->node)) - return false; + return NULL; } } @@ -2998,7 +3141,7 @@ static bool add_user_call(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *fu hlsl_block_add_instr(args->instrs, expr); } - return true; + return call; } static struct hlsl_ir_node *intrinsic_float_convert_arg(struct hlsl_ctx *ctx, @@ -3006,7 +3149,7 @@ static struct hlsl_ir_node *intrinsic_float_convert_arg(struct hlsl_ctx *ctx, { struct hlsl_type *type = arg->data_type; - if (type->e.numeric.type == HLSL_TYPE_FLOAT || type->e.numeric.type == HLSL_TYPE_HALF) + if (!type_is_integer(type->e.numeric.type)) return arg; type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_FLOAT, type->dimx, type->dimy); @@ -3094,14 +3237,12 @@ static bool elementwise_intrinsic_convert_args(struct hlsl_ctx *ctx, static bool elementwise_intrinsic_float_convert_args(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { - enum hlsl_base_type base_type; struct hlsl_type *type; if (!(type = elementwise_intrinsic_get_common_type(ctx, params, loc))) return false; - - base_type = type->e.numeric.type == HLSL_TYPE_HALF ? HLSL_TYPE_HALF : HLSL_TYPE_FLOAT; - type = hlsl_get_numeric_type(ctx, type->class, base_type, type->dimx, type->dimy); + if (type_is_integer(type->e.numeric.type)) + type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_FLOAT, type->dimx, type->dimy); return convert_args(ctx, params, type, loc); } @@ -3129,6 +3270,7 @@ static bool write_acos_or_asin(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc, bool asin_mode) { struct hlsl_ir_function_decl *func; + struct hlsl_ir_node *arg; struct hlsl_type *type; char *body; @@ -3152,8 +3294,9 @@ static bool write_acos_or_asin(struct hlsl_ctx *ctx, const char *fn_name = asin_mode ? fn_name_asin : fn_name_acos; - type = params->args[0]->data_type; - type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_FLOAT, type->dimx, type->dimy); + if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) + return false; + type = arg->data_type; if (!(body = hlsl_sprintf_alloc(ctx, template, type->name, fn_name, type->name, @@ -3165,7 +3308,7 @@ static bool write_acos_or_asin(struct hlsl_ctx *ctx, if (!func) return false; - return add_user_call(ctx, func, params, loc); + return !!add_user_call(ctx, func, params, false, loc); } static bool intrinsic_acos(struct hlsl_ctx *ctx, @@ -3282,9 +3425,9 @@ static bool write_atan_or_atan2(struct hlsl_ctx *ctx, " : poly_approx;\n" "}"; - if (!(type = elementwise_intrinsic_get_common_type(ctx, params, loc))) + if (!elementwise_intrinsic_float_convert_args(ctx, params, loc)) return false; - type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_FLOAT, type->dimx, type->dimy); + type = params->args[0]->data_type; if (!(buf = hlsl_get_string_buffer(ctx))) return false; @@ -3314,7 +3457,7 @@ static bool write_atan_or_atan2(struct hlsl_ctx *ctx, if (!func) return false; - return add_user_call(ctx, func, params, loc); + return !!add_user_call(ctx, func, params, false, loc); } static bool intrinsic_atan(struct hlsl_ctx *ctx, @@ -3507,7 +3650,7 @@ static bool write_cosh_or_sinh(struct hlsl_ctx *ctx, if (!func) return false; - return add_user_call(ctx, func, params, loc); + return !!add_user_call(ctx, func, params, false, loc); } static bool intrinsic_cosh(struct hlsl_ctx *ctx, @@ -3525,9 +3668,8 @@ static bool intrinsic_cross(struct hlsl_ctx *ctx, struct hlsl_type *cast_type; enum hlsl_base_type base; - if (arg1->data_type->e.numeric.type == HLSL_TYPE_HALF && arg2->data_type->e.numeric.type == HLSL_TYPE_HALF) - base = HLSL_TYPE_HALF; - else + base = expr_common_base_type(arg1->data_type->e.numeric.type, arg2->data_type->e.numeric.type); + if (type_is_integer(base)) base = HLSL_TYPE_FLOAT; cast_type = hlsl_get_vector_type(ctx, base, 3); @@ -3698,15 +3840,14 @@ static bool intrinsic_determinant(struct hlsl_ctx *ctx, return false; } + if (!(arg = intrinsic_float_convert_arg(ctx, params, arg, loc))) + return false; + dim = min(type->dimx, type->dimy); if (dim == 1) - { - if (!(arg = intrinsic_float_convert_arg(ctx, params, arg, loc))) - return false; return hlsl_add_load_component(ctx, params->instrs, arg, 0, loc); - } - typename = type->e.numeric.type == HLSL_TYPE_HALF ? "half" : "float"; + typename = hlsl_get_scalar_type(ctx, arg->data_type->e.numeric.type)->name; template = templates[dim]; switch (dim) @@ -3734,7 +3875,7 @@ static bool intrinsic_determinant(struct hlsl_ctx *ctx, if (!func) return false; - return add_user_call(ctx, func, params, loc); + return !!add_user_call(ctx, func, params, false, loc); } static bool intrinsic_distance(struct hlsl_ctx *ctx, @@ -3751,19 +3892,63 @@ static bool intrinsic_distance(struct hlsl_ctx *ctx, if (!(neg = add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_NEG, arg2, loc))) return false; - if (!(add = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_ADD, arg1, neg, loc))) + if (!(add = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_ADD, arg1, neg, loc))) + return false; + + if (!(dot = add_binary_dot_expr(ctx, params->instrs, add, add, loc))) + return false; + + return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_SQRT, dot, loc); +} + +static bool intrinsic_dot(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + return !!add_binary_dot_expr(ctx, params->instrs, params->args[0], params->args[1], loc); +} + +static bool intrinsic_dst(struct hlsl_ctx *ctx, const struct parse_initializer *params, + const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_function_decl *func; + struct hlsl_type *type, *vec4_type; + char *body; + + static const char template[] = + "%s dst(%s i0, %s i1)\n" + "{\n" + /* Scalars and vector-4s are both valid inputs, so promote scalars + * if necessary. */ + " %s src0 = i0, src1 = i1;\n" + " return %s(1, src0.y * src1.y, src0.z, src1.w);\n" + "}"; + + if (!elementwise_intrinsic_convert_args(ctx, params, loc)) + return false; + type = params->args[0]->data_type; + if (!(type->class == HLSL_CLASS_SCALAR + || (type->class == HLSL_CLASS_VECTOR && type->dimx == 4))) + { + struct vkd3d_string_buffer *string; + if ((string = hlsl_type_to_string(ctx, type))) + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Wrong dimension for dst(): expected scalar or 4-dimensional vector, but got %s.", + string->buffer); + hlsl_release_string_buffer(ctx, string); + } + vec4_type = hlsl_get_vector_type(ctx, type->e.numeric.type, 4); + + if (!(body = hlsl_sprintf_alloc(ctx, template, + vec4_type->name, type->name, type->name, + vec4_type->name, + vec4_type->name))) return false; - - if (!(dot = add_binary_dot_expr(ctx, params->instrs, add, add, loc))) + func = hlsl_compile_internal_function(ctx, "dst", body); + vkd3d_free(body); + if (!func) return false; - return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_SQRT, dot, loc); -} - -static bool intrinsic_dot(struct hlsl_ctx *ctx, - const struct parse_initializer *params, const struct vkd3d_shader_location *loc) -{ - return !!add_binary_dot_expr(ctx, params->instrs, params->args[0], params->args[1], loc); + return !!add_user_call(ctx, func, params, false, loc); } static bool intrinsic_exp(struct hlsl_ctx *ctx, @@ -3809,9 +3994,9 @@ static bool intrinsic_faceforward(struct hlsl_ctx *ctx, " return dot(i, ng) < 0 ? n : -n;\n" "}\n"; - if (!(type = elementwise_intrinsic_get_common_type(ctx, params, loc))) + if (!elementwise_intrinsic_float_convert_args(ctx, params, loc)) return false; - type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_FLOAT, type->dimx, type->dimy); + type = params->args[0]->data_type; if (!(body = hlsl_sprintf_alloc(ctx, template, type->name, type->name, type->name, type->name))) @@ -3821,7 +4006,7 @@ static bool intrinsic_faceforward(struct hlsl_ctx *ctx, if (!func) return false; - return add_user_call(ctx, func, params, loc); + return !!add_user_call(ctx, func, params, false, loc); } static bool intrinsic_f16tof32(struct hlsl_ctx *ctx, @@ -3926,7 +4111,7 @@ static bool intrinsic_fwidth(struct hlsl_ctx *ctx, if (!func) return false; - return add_user_call(ctx, func, params, loc); + return !!add_user_call(ctx, func, params, false, loc); } static bool intrinsic_ldexp(struct hlsl_ctx *ctx, @@ -4029,7 +4214,7 @@ static bool intrinsic_lit(struct hlsl_ctx *ctx, if (!(func = hlsl_compile_internal_function(ctx, "lit", body))) return false; - return add_user_call(ctx, func, params, loc); + return !!add_user_call(ctx, func, params, false, loc); } static bool intrinsic_log(struct hlsl_ctx *ctx, @@ -4081,6 +4266,20 @@ static bool intrinsic_log2(struct hlsl_ctx *ctx, return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_LOG2, arg, loc); } +static bool intrinsic_mad(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *args[HLSL_MAX_OPERANDS] = {0}; + + if (!elementwise_intrinsic_convert_args(ctx, params, loc)) + return false; + + args[0] = params->args[0]; + args[1] = params->args[1]; + args[2] = params->args[2]; + return add_expr(ctx, params->instrs, HLSL_OP3_MAD, args, args[0]->data_type, loc); +} + static bool intrinsic_max(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { @@ -4099,6 +4298,35 @@ static bool intrinsic_min(struct hlsl_ctx *ctx, return !!add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MIN, params->args[0], params->args[1], loc); } +static bool intrinsic_modf(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_function_decl *func; + struct hlsl_type *type; + char *body; + + static const char template[] = + "%s modf(%s x, out %s ip)\n" + "{\n" + " ip = trunc(x);\n" + " return x - ip;\n" + "}"; + + if (!elementwise_intrinsic_float_convert_args(ctx, params, loc)) + return false; + type = params->args[0]->data_type; + + if (!(body = hlsl_sprintf_alloc(ctx, template, + type->name, type->name, type->name))) + return false; + func = hlsl_compile_internal_function(ctx, "modf", body); + vkd3d_free(body); + if (!func) + return false; + + return !!add_user_call(ctx, func, params, false, loc); +} + static bool intrinsic_mul(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { @@ -4285,13 +4513,9 @@ static bool intrinsic_reflect(struct hlsl_ctx *ctx, static bool intrinsic_refract(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { - struct hlsl_type *r_type = params->args[0]->data_type; - struct hlsl_type *n_type = params->args[1]->data_type; - struct hlsl_type *i_type = params->args[2]->data_type; - struct hlsl_type *res_type, *idx_type, *scal_type; - struct parse_initializer mut_params; + struct hlsl_type *type, *scalar_type; struct hlsl_ir_function_decl *func; - enum hlsl_base_type base; + struct hlsl_ir_node *index; char *body; static const char template[] = @@ -4303,28 +4527,34 @@ static bool intrinsic_refract(struct hlsl_ctx *ctx, " return t >= 0.0 ? i.x * r - (i.x * d + sqrt(t)) * n : 0;\n" "}"; - if (r_type->class == HLSL_CLASS_MATRIX - || n_type->class == HLSL_CLASS_MATRIX - || i_type->class == HLSL_CLASS_MATRIX) + if (params->args[0]->data_type->class == HLSL_CLASS_MATRIX + || params->args[1]->data_type->class == HLSL_CLASS_MATRIX + || params->args[2]->data_type->class == HLSL_CLASS_MATRIX) { hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Matrix arguments are not supported."); return false; } - VKD3D_ASSERT(params->args_count == 3); - mut_params = *params; - mut_params.args_count = 2; - if (!(res_type = elementwise_intrinsic_get_common_type(ctx, &mut_params, loc))) + /* This is technically not an elementwise intrinsic, but the first two + * arguments are. + * The third argument is a scalar, but can be passed as a vector, + * which should generate an implicit truncation warning. + * Cast down to scalar explicitly, then we can just use + * elementwise_intrinsic_float_convert_args(). + * This may result in casting the scalar back to a vector, + * which we will only use the first component of. */ + + scalar_type = hlsl_get_scalar_type(ctx, params->args[2]->data_type->e.numeric.type); + if (!(index = add_implicit_conversion(ctx, params->instrs, params->args[2], scalar_type, loc))) return false; + params->args[2] = index; - base = expr_common_base_type(res_type->e.numeric.type, i_type->e.numeric.type); - base = base == HLSL_TYPE_HALF ? HLSL_TYPE_HALF : HLSL_TYPE_FLOAT; - res_type = convert_numeric_type(ctx, res_type, base); - idx_type = convert_numeric_type(ctx, i_type, base); - scal_type = hlsl_get_scalar_type(ctx, base); + if (!elementwise_intrinsic_float_convert_args(ctx, params, loc)) + return false; + type = params->args[0]->data_type; - if (!(body = hlsl_sprintf_alloc(ctx, template, res_type->name, res_type->name, - res_type->name, idx_type->name, scal_type->name))) + if (!(body = hlsl_sprintf_alloc(ctx, template, type->name, type->name, + type->name, type->name, scalar_type->name))) return false; func = hlsl_compile_internal_function(ctx, "refract", body); @@ -4332,7 +4562,7 @@ static bool intrinsic_refract(struct hlsl_ctx *ctx, if (!func) return false; - return add_user_call(ctx, func, params, loc); + return !!add_user_call(ctx, func, params, false, loc); } static bool intrinsic_round(struct hlsl_ctx *ctx, @@ -4415,6 +4645,35 @@ static bool intrinsic_sin(struct hlsl_ctx *ctx, return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_SIN, arg, loc); } +static bool intrinsic_sincos(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_function_decl *func; + struct hlsl_type *type; + char *body; + + static const char template[] = + "void sincos(%s f, out %s s, out %s c)\n" + "{\n" + " s = sin(f);\n" + " c = cos(f);\n" + "}"; + + if (!elementwise_intrinsic_float_convert_args(ctx, params, loc)) + return false; + type = params->args[0]->data_type; + + if (!(body = hlsl_sprintf_alloc(ctx, template, + type->name, type->name, type->name))) + return false; + func = hlsl_compile_internal_function(ctx, "sincos", body); + vkd3d_free(body); + if (!func) + return false; + + return !!add_user_call(ctx, func, params, false, loc); +} + static bool intrinsic_sinh(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { @@ -4436,9 +4695,9 @@ static bool intrinsic_smoothstep(struct hlsl_ctx *ctx, " return (p * p) * (3 - 2 * p);\n" "}"; - if (!(type = elementwise_intrinsic_get_common_type(ctx, params, loc))) + if (!elementwise_intrinsic_float_convert_args(ctx, params, loc)) return false; - type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_FLOAT, type->dimx, type->dimy); + type = params->args[0]->data_type; if (!(body = hlsl_sprintf_alloc(ctx, template, type->name, type->name, type->name, type->name, type->name))) return false; @@ -4447,7 +4706,7 @@ static bool intrinsic_smoothstep(struct hlsl_ctx *ctx, if (!func) return false; - return add_user_call(ctx, func, params, loc); + return !!add_user_call(ctx, func, params, false, loc); } static bool intrinsic_sqrt(struct hlsl_ctx *ctx, @@ -4469,13 +4728,12 @@ static bool intrinsic_step(struct hlsl_ctx *ctx, if (!elementwise_intrinsic_float_convert_args(ctx, params, loc)) return false; + type = params->args[0]->data_type; if (!(ge = add_binary_comparison_expr(ctx, params->instrs, HLSL_OP2_GEQUAL, params->args[1], params->args[0], loc))) return false; - type = ge->data_type; - type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_FLOAT, type->dimx, type->dimy); return !!add_implicit_conversion(ctx, params->instrs, ge, type, loc); } @@ -4523,7 +4781,7 @@ static bool intrinsic_tanh(struct hlsl_ctx *ctx, if (!func) return false; - return add_user_call(ctx, func, params, loc); + return !!add_user_call(ctx, func, params, false, loc); } static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer *params, @@ -4661,17 +4919,17 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * if (!(var = hlsl_new_synthetic_var(ctx, "coords", hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, 2), loc))) return false; - initialize_var_components(ctx, params->instrs, var, &idx, coords); + initialize_var_components(ctx, params->instrs, var, &idx, coords, false); if (hlsl_version_ge(ctx, 4, 0)) { if (!(half = hlsl_new_float_constant(ctx, 0.5f, loc))) return false; hlsl_block_add_instr(params->instrs, half); - initialize_var_components(ctx, params->instrs, var, &idx, half); + initialize_var_components(ctx, params->instrs, var, &idx, half, false); } else - initialize_var_components(ctx, params->instrs, var, &idx, coords); + initialize_var_components(ctx, params->instrs, var, &idx, coords, false); if (!(load = hlsl_new_var_load(ctx, var, loc))) return false; @@ -4937,6 +5195,7 @@ intrinsic_functions[] = {"determinant", 1, true, intrinsic_determinant}, {"distance", 2, true, intrinsic_distance}, {"dot", 2, true, intrinsic_dot}, + {"dst", 2, true, intrinsic_dst}, {"exp", 1, true, intrinsic_exp}, {"exp2", 1, true, intrinsic_exp2}, {"f16tof32", 1, true, intrinsic_f16tof32}, @@ -4952,8 +5211,10 @@ intrinsic_functions[] = {"log", 1, true, intrinsic_log}, {"log10", 1, true, intrinsic_log10}, {"log2", 1, true, intrinsic_log2}, + {"mad", 3, true, intrinsic_mad}, {"max", 2, true, intrinsic_max}, {"min", 2, true, intrinsic_min}, + {"modf", 2, true, intrinsic_modf}, {"mul", 2, true, intrinsic_mul}, {"normalize", 1, true, intrinsic_normalize}, {"pow", 2, true, intrinsic_pow}, @@ -4966,6 +5227,7 @@ intrinsic_functions[] = {"saturate", 1, true, intrinsic_saturate}, {"sign", 1, true, intrinsic_sign}, {"sin", 1, true, intrinsic_sin}, + {"sincos", 3, true, intrinsic_sincos}, {"sinh", 1, true, intrinsic_sinh}, {"smoothstep", 3, true, intrinsic_smoothstep}, {"sqrt", 1, true, intrinsic_sqrt}, @@ -5002,9 +5264,18 @@ static struct hlsl_block *add_call(struct hlsl_ctx *ctx, const char *name, struct intrinsic_function *intrinsic; struct hlsl_ir_function_decl *decl; - if ((decl = find_function_call(ctx, name, args, loc))) + for (unsigned int i = 0; i < args->args_count; ++i) + { + if (args->args[i]->data_type->class == HLSL_CLASS_ERROR) + { + args->instrs->value = ctx->error_instr; + return args->instrs; + } + } + + if ((decl = find_function_call(ctx, name, args, false, loc))) { - if (!add_user_call(ctx, decl, args, loc)) + if (!add_user_call(ctx, decl, args, false, loc)) goto fail; } else if ((intrinsic = bsearch(name, intrinsic_functions, ARRAY_SIZE(intrinsic_functions), @@ -5060,18 +5331,94 @@ fail: return NULL; } +static struct hlsl_block *add_shader_compilation(struct hlsl_ctx *ctx, const char *profile_name, + const char *function_name, struct parse_initializer *args, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *compile, *call_to_compile = NULL; + struct hlsl_ir_function_decl *decl; + + if (!ctx->in_state_block && ctx->cur_scope != ctx->globals) + { + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_MISPLACED_COMPILE, + "Shader compilation statements must be in global scope or a state block."); + free_parse_initializer(args); + return NULL; + } + + if (!(decl = find_function_call(ctx, function_name, args, true, loc))) + { + if (rb_get(&ctx->functions, function_name)) + { + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_NOT_DEFINED, + "No compatible \"%s\" declaration with %u uniform parameters found.", + function_name, args->args_count); + } + else + { + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_NOT_DEFINED, + "Function \"%s\" is not defined.", function_name); + } + free_parse_initializer(args); + return NULL; + } + + if (!(call_to_compile = add_user_call(ctx, decl, args, true, loc))) + { + free_parse_initializer(args); + return NULL; + } + + if (!(compile = hlsl_new_compile(ctx, HLSL_COMPILE_TYPE_COMPILE, + profile_name, &call_to_compile, 1, args->instrs, loc))) + { + free_parse_initializer(args); + return NULL; + } + + free_parse_initializer(args); + return make_block(ctx, compile); +} + +static struct hlsl_block *add_compile_variant(struct hlsl_ctx *ctx, enum hlsl_compile_type compile_type, + struct parse_initializer *args, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *compile; + + switch (compile_type) + { + case HLSL_COMPILE_TYPE_COMPILE: + vkd3d_unreachable(); + + case HLSL_COMPILE_TYPE_CONSTRUCTGSWITHSO: + if (args->args_count != 2 && args->args_count != 6) + { + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, + "Wrong number of arguments to ConstructGSWithSO: expected 2 or 6, but got %u.", + args->args_count); + } + break; + } + + if (!(compile = hlsl_new_compile(ctx, compile_type, NULL, args->args, args->args_count, args->instrs, loc))) + { + free_parse_initializer(args); + return NULL; + } + + free_parse_initializer(args); + return make_block(ctx, compile); +} + static struct hlsl_block *add_constructor(struct hlsl_ctx *ctx, struct hlsl_type *type, struct parse_initializer *params, const struct vkd3d_shader_location *loc) { struct hlsl_ir_load *load; struct hlsl_ir_var *var; - unsigned int i, idx = 0; if (!(var = hlsl_new_synthetic_var(ctx, "constructor", type, loc))) return NULL; - for (i = 0; i < params->args_count; ++i) - initialize_var_components(ctx, params->instrs, var, &idx, params->args[i]); + initialize_var(ctx, var, params, false); if (!(load = hlsl_new_var_load(ctx, var, loc))) return NULL; @@ -5088,6 +5435,14 @@ static bool add_ternary(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_type *cond_type = cond->data_type; struct hlsl_type *common_type; + if (cond->data_type->class == HLSL_CLASS_ERROR + || first->data_type->class == HLSL_CLASS_ERROR + || second->data_type->class == HLSL_CLASS_ERROR) + { + block->value = ctx->error_instr; + return true; + } + if (cond_type->class > HLSL_CLASS_LAST_NUMERIC) { struct vkd3d_string_buffer *string; @@ -5113,11 +5468,6 @@ static bool add_ternary(struct hlsl_ctx *ctx, struct hlsl_block *block, } else { - cond_type = hlsl_get_numeric_type(ctx, cond_type->class, HLSL_TYPE_BOOL, - cond_type->dimx, cond_type->dimy); - if (!(cond = add_implicit_conversion(ctx, block, cond, cond_type, &cond->loc))) - return false; - if (common_type->dimx == 1 && common_type->dimy == 1) { common_type = hlsl_get_numeric_type(ctx, cond_type->class, @@ -5139,6 +5489,11 @@ static bool add_ternary(struct hlsl_ctx *ctx, struct hlsl_block *block, hlsl_release_string_buffer(ctx, cond_string); hlsl_release_string_buffer(ctx, value_string); } + + cond_type = hlsl_get_numeric_type(ctx, common_type->class, HLSL_TYPE_BOOL, + common_type->dimx, common_type->dimy); + if (!(cond = add_implicit_conversion(ctx, block, cond, cond_type, &cond->loc))) + return false; } if (!(first = add_implicit_conversion(ctx, block, first, common_type, &first->loc))) @@ -5854,6 +6209,21 @@ static bool add_method_call(struct hlsl_ctx *ctx, struct hlsl_block *block, stru const struct hlsl_type *object_type = object->data_type; const struct method_function *method; + if (object_type->class == HLSL_CLASS_ERROR) + { + block->value = ctx->error_instr; + return true; + } + + for (unsigned int i = 0; i < params->args_count; ++i) + { + if (params->args[i]->data_type->class == HLSL_CLASS_ERROR) + { + block->value = ctx->error_instr; + return true; + } + } + if (object_type->class != HLSL_CLASS_TEXTURE || object_type->sampler_dim == HLSL_SAMPLER_DIM_GENERIC) { struct vkd3d_string_buffer *string; @@ -5995,16 +6365,6 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, hlsl_release_string_buffer(ctx, string); } -static bool state_block_add_entry(struct hlsl_state_block *state_block, struct hlsl_state_block_entry *entry) -{ - if (!vkd3d_array_reserve((void **)&state_block->entries, &state_block->capacity, state_block->count + 1, - sizeof(*state_block->entries))) - return false; - - state_block->entries[state_block->count++] = entry; - return true; -} - } %locations @@ -6058,8 +6418,10 @@ static bool state_block_add_entry(struct hlsl_state_block *state_block, struct h %token KW_CENTROID %token KW_COLUMN_MAJOR %token KW_COMPILE +%token KW_COMPILESHADER %token KW_COMPUTESHADER %token KW_CONST +%token KW_CONSTRUCTGSWITHSO %token KW_CONTINUE %token KW_DEFAULT %token KW_DEPTHSTENCILSTATE @@ -6067,7 +6429,6 @@ static bool state_block_add_entry(struct hlsl_state_block *state_block, struct h %token KW_DISCARD %token KW_DO %token KW_DOMAINSHADER -%token KW_DOUBLE %token KW_ELSE %token KW_EXPORT %token KW_EXTERN @@ -6273,6 +6634,7 @@ static bool state_block_add_entry(struct hlsl_state_block *state_block, struct h %type switch_case +%type base_optional %type field_type %type named_struct_spec %type unnamed_struct_spec @@ -6487,11 +6849,28 @@ struct_spec: | unnamed_struct_spec named_struct_spec: - KW_STRUCT any_identifier '{' fields_list '}' + KW_STRUCT any_identifier base_optional '{' fields_list '}' { bool ret; - $$ = hlsl_new_struct_type(ctx, $2, $4.fields, $4.count); + if ($3) + { + char *name; + + if (!(name = hlsl_strdup(ctx, "$super"))) + YYABORT; + if (!hlsl_array_reserve(ctx, (void **)&$5.fields, &$5.capacity, 1 + $5.count, sizeof(*$5.fields))) + YYABORT; + memmove(&$5.fields[1], $5.fields, $5.count * sizeof(*$5.fields)); + ++$5.count; + + memset(&$5.fields[0], 0, sizeof($5.fields[0])); + $5.fields[0].type = $3; + $5.fields[0].loc = @3; + $5.fields[0].name = name; + } + + $$ = hlsl_new_struct_type(ctx, $2, $5.fields, $5.count); if (hlsl_get_var(ctx->cur_scope, $2)) { @@ -6518,6 +6897,23 @@ any_identifier: | TYPE_IDENTIFIER | NEW_IDENTIFIER +/* TODO: Multiple inheritance support for interfaces. */ +base_optional: + %empty + { + $$ = NULL; + } + | ':' TYPE_IDENTIFIER + { + $$ = hlsl_get_type(ctx->cur_scope, $2, true, true); + if ($$->class != HLSL_CLASS_STRUCT) + { + hlsl_error(ctx, &@2, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Base type \"%s\" is not a struct.", $2); + YYABORT; + } + vkd3d_free($2); + } + fields_list: %empty { @@ -6827,6 +7223,8 @@ func_prototype: func_prototype_no_attrs | attribute_list func_prototype_no_attrs { + check_attribute_list_for_duplicates(ctx, &$1); + if ($2.first) { $2.decl->attr_count = $1.count; @@ -7614,11 +8012,21 @@ stateblock_lhs_identifier: if (!($$ = hlsl_strdup(ctx, "pixelshader"))) YYABORT; } + | KW_TEXTURE + { + if (!($$ = hlsl_strdup(ctx, "texture"))) + YYABORT; + } | KW_VERTEXSHADER { if (!($$ = hlsl_strdup(ctx, "vertexshader"))) YYABORT; } + | KW_GEOMETRYSHADER + { + if (!($$ = hlsl_strdup(ctx, "geometryshader"))) + YYABORT; + } state_block_index_opt: %empty @@ -7666,7 +8074,7 @@ state_block: vkd3d_free($5.args); $$ = $1; - state_block_add_entry($$, entry); + hlsl_state_block_add_entry($$, entry); } | state_block any_identifier '(' func_arguments ')' ';' { @@ -7694,7 +8102,7 @@ state_block: hlsl_validate_state_block_entry(ctx, entry, &@4); $$ = $1; - state_block_add_entry($$, entry); + hlsl_state_block_add_entry($$, entry); } state_block_list: @@ -7931,6 +8339,7 @@ complex_initializer: $$.args[0] = node_from_block($1); $$.instrs = $1; $$.braces = false; + $$.loc = @$; } | '{' complex_initializer_list '}' { @@ -7962,6 +8371,7 @@ complex_initializer_list: $$.args[$$.args_count++] = $3.args[i]; hlsl_block_add_block($$.instrs, $3.instrs); free_parse_initializer(&$3); + $$.loc = @$; } initializer_expr: @@ -7979,6 +8389,7 @@ initializer_expr_list: $$.args[0] = node_from_block($1); $$.instrs = $1; $$.braces = false; + $$.loc = @$; } | initializer_expr_list ',' initializer_expr { @@ -8092,8 +8503,7 @@ selection_statement: struct hlsl_ir_node *instr; unsigned int i; - if (attribute_list_has_duplicates(attributes)) - hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, "Found duplicate attribute."); + check_attribute_list_for_duplicates(ctx, attributes); for (i = 0; i < attributes->count; ++i) { @@ -8298,6 +8708,7 @@ func_arguments: if (!($$.instrs = make_empty_block(ctx))) YYABORT; $$.braces = false; + $$.loc = @$; } | initializer_expr_list @@ -8391,6 +8802,34 @@ primary_expr: { $$ = $2; } + + | KW_COMPILE any_identifier var_identifier '(' func_arguments ')' + { + if (!($$ = add_shader_compilation(ctx, $2, $3, &$5, &@1))) + { + vkd3d_free($2); + vkd3d_free($3); + YYABORT; + } + vkd3d_free($2); + vkd3d_free($3); + } + | KW_COMPILESHADER '(' any_identifier ',' var_identifier '(' func_arguments ')' ')' + { + if (!($$ = add_shader_compilation(ctx, $3, $5, &$7, &@1))) + { + vkd3d_free($3); + vkd3d_free($5); + YYABORT; + } + vkd3d_free($3); + vkd3d_free($5); + } + | KW_CONSTRUCTGSWITHSO '(' func_arguments ')' + { + if (!($$ = add_compile_variant(ctx, HLSL_COMPILE_TYPE_CONSTRUCTGSWITHSO, &$3, &@1))) + YYABORT; + } | var_identifier '(' func_arguments ')' { if (!($$ = add_call(ctx, $1, &$3, &@1))) @@ -8400,6 +8839,25 @@ primary_expr: } vkd3d_free($1); } + | KW_SAMPLER_STATE '{' state_block_start state_block '}' + { + struct hlsl_ir_node *sampler_state; + ctx->in_state_block = 0; + + if (!ctx->in_state_block && ctx->cur_scope != ctx->globals) + hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_MISPLACED_SAMPLER_STATE, + "sampler_state must be in global scope or a state block."); + + if (!(sampler_state = hlsl_new_sampler_state(ctx, $4, &@1))) + { + hlsl_free_state_block($4); + YYABORT; + } + hlsl_free_state_block($4); + + if (!($$ = make_block(ctx, sampler_state))) + YYABORT; + } | NEW_IDENTIFIER { if (ctx->in_state_block) @@ -8416,7 +8874,11 @@ primary_expr: else { hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_NOT_DEFINED, "Identifier \"%s\" is not declared.", $1); - YYABORT; + vkd3d_free($1); + + if (!($$ = make_empty_block(ctx))) + YYABORT; + $$->value = ctx->error_instr; } } @@ -8446,25 +8908,12 @@ postfix_expr: if (node->data_type->class == HLSL_CLASS_STRUCT) { - struct hlsl_type *type = node->data_type; - const struct hlsl_struct_field *field; - unsigned int field_idx = 0; - - if (!(field = get_struct_field(type->e.record.fields, type->e.record.field_count, $3))) - { - hlsl_error(ctx, &@3, VKD3D_SHADER_ERROR_HLSL_NOT_DEFINED, "Field \"%s\" is not defined.", $3); - vkd3d_free($3); - YYABORT; - } - - field_idx = field - type->e.record.fields; - if (!add_record_access(ctx, $1, node, field_idx, &@2)) + if (!add_record_access_recurse(ctx, $1, $3, &@2)) { vkd3d_free($3); YYABORT; } vkd3d_free($3); - $$ = $1; } else if (hlsl_is_numeric_type(node->data_type)) { @@ -8478,14 +8927,14 @@ postfix_expr: } hlsl_block_add_instr($1, swizzle); vkd3d_free($3); - $$ = $1; } - else + else if (node->data_type->class != HLSL_CLASS_ERROR) { hlsl_error(ctx, &@3, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, "Invalid subscript \"%s\".", $3); vkd3d_free($3); YYABORT; } + $$ = $1; } | postfix_expr '[' expr ']' { @@ -8523,14 +8972,6 @@ postfix_expr: free_parse_initializer(&$4); YYABORT; } - if ($2->dimx * $2->dimy != initializer_size(&$4)) - { - hlsl_error(ctx, &@4, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, - "Expected %u components in constructor, but got %u.", - $2->dimx * $2->dimy, initializer_size(&$4)); - free_parse_initializer(&$4); - YYABORT; - } if (!($$ = add_constructor(ctx, $2, &$4, &@2))) { @@ -8597,10 +9038,6 @@ unary_expr: /* var_modifiers is necessary to avoid shift/reduce conflicts. */ | '(' var_modifiers type arrays ')' unary_expr { - struct hlsl_type *src_type = node_from_block($6)->data_type; - struct hlsl_type *dst_type; - unsigned int i; - if ($2) { hlsl_error(ctx, &@2, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, @@ -8608,36 +9045,13 @@ unary_expr: YYABORT; } - dst_type = $3; - for (i = 0; i < $4.count; ++i) - { - if ($4.sizes[i] == HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT) - { - hlsl_error(ctx, &@3, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, - "Implicit size arrays not allowed in casts."); - } - dst_type = hlsl_new_array_type(ctx, dst_type, $4.sizes[i]); - } - - if (!explicit_compatible_data_types(ctx, src_type, dst_type)) - { - struct vkd3d_string_buffer *src_string, *dst_string; - - src_string = hlsl_type_to_string(ctx, src_type); - dst_string = hlsl_type_to_string(ctx, dst_type); - if (src_string && dst_string) - hlsl_error(ctx, &@3, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Can't cast from %s to %s.", - src_string->buffer, dst_string->buffer); - hlsl_release_string_buffer(ctx, src_string); - hlsl_release_string_buffer(ctx, dst_string); - YYABORT; - } - - if (!add_cast(ctx, $6, node_from_block($6), dst_type, &@3)) + if (!add_explicit_conversion(ctx, $6, $3, &$4, &@3)) { destroy_block($6); + vkd3d_free($4.sizes); YYABORT; } + vkd3d_free($4.sizes); $$ = $6; } diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c index 92b5c71c43f..88bec8610cb 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c @@ -276,8 +276,8 @@ static bool types_are_semantic_equivalent(struct hlsl_ctx *ctx, const struct hls == base_type_get_semantic_equivalent(type2->e.numeric.type); } -static struct hlsl_ir_var *add_semantic_var(struct hlsl_ctx *ctx, struct hlsl_ir_var *var, - struct hlsl_type *type, uint32_t modifiers, struct hlsl_semantic *semantic, +static struct hlsl_ir_var *add_semantic_var(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func, + struct hlsl_ir_var *var, struct hlsl_type *type, uint32_t modifiers, struct hlsl_semantic *semantic, uint32_t index, bool output, const struct vkd3d_shader_location *loc) { struct hlsl_semantic new_semantic; @@ -287,7 +287,7 @@ static struct hlsl_ir_var *add_semantic_var(struct hlsl_ctx *ctx, struct hlsl_ir if (!(new_name = hlsl_sprintf_alloc(ctx, "<%s-%s%u>", output ? "output" : "input", semantic->name, index))) return NULL; - LIST_FOR_EACH_ENTRY(ext_var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + LIST_FOR_EACH_ENTRY(ext_var, &func->extern_vars, struct hlsl_ir_var, extern_entry) { if (!ascii_strcasecmp(ext_var->name, new_name)) { @@ -339,12 +339,12 @@ static struct hlsl_ir_var *add_semantic_var(struct hlsl_ctx *ctx, struct hlsl_ir ext_var->is_input_semantic = 1; ext_var->is_param = var->is_param; list_add_before(&var->scope_entry, &ext_var->scope_entry); - list_add_tail(&ctx->extern_vars, &ext_var->extern_entry); + list_add_tail(&func->extern_vars, &ext_var->extern_entry); return ext_var; } -static void prepend_input_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_load *lhs, +static void prepend_input_copy(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func, struct hlsl_ir_load *lhs, uint32_t modifiers, struct hlsl_semantic *semantic, uint32_t semantic_index) { struct hlsl_type *type = lhs->node.data_type, *vector_type_src, *vector_type_dst; @@ -375,7 +375,7 @@ static void prepend_input_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, s struct hlsl_ir_var *input; struct hlsl_ir_load *load; - if (!(input = add_semantic_var(ctx, var, vector_type_src, modifiers, semantic, + if (!(input = add_semantic_var(ctx, func, var, vector_type_src, modifiers, semantic, semantic_index + i, false, loc))) return; @@ -408,8 +408,8 @@ static void prepend_input_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, s } } -static void prepend_input_copy_recurse(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_load *lhs, - uint32_t modifiers, struct hlsl_semantic *semantic, uint32_t semantic_index) +static void prepend_input_copy_recurse(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func, + struct hlsl_ir_load *lhs, uint32_t modifiers, struct hlsl_semantic *semantic, uint32_t semantic_index) { struct vkd3d_shader_location *loc = &lhs->node.loc; struct hlsl_type *type = lhs->node.data_type; @@ -466,30 +466,30 @@ static void prepend_input_copy_recurse(struct hlsl_ctx *ctx, struct hlsl_block * return; list_add_after(&c->entry, &element_load->node.entry); - prepend_input_copy_recurse(ctx, block, element_load, element_modifiers, semantic, elem_semantic_index); + prepend_input_copy_recurse(ctx, func, element_load, element_modifiers, semantic, elem_semantic_index); } } else { - prepend_input_copy(ctx, block, lhs, modifiers, semantic, semantic_index); + prepend_input_copy(ctx, func, lhs, modifiers, semantic, semantic_index); } } /* Split inputs into two variables representing the semantic and temp registers, * and copy the former to the latter, so that writes to input variables work. */ -static void prepend_input_var_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_var *var) +static void prepend_input_var_copy(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func, struct hlsl_ir_var *var) { struct hlsl_ir_load *load; /* This redundant load is expected to be deleted later by DCE. */ if (!(load = hlsl_new_var_load(ctx, var, &var->loc))) return; - list_add_head(&block->instrs, &load->node.entry); + list_add_head(&func->body.instrs, &load->node.entry); - prepend_input_copy_recurse(ctx, block, load, var->storage_modifiers, &var->semantic, var->semantic.index); + prepend_input_copy_recurse(ctx, func, load, var->storage_modifiers, &var->semantic, var->semantic.index); } -static void append_output_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_load *rhs, +static void append_output_copy(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func, struct hlsl_ir_load *rhs, uint32_t modifiers, struct hlsl_semantic *semantic, uint32_t semantic_index) { struct hlsl_type *type = rhs->node.data_type, *vector_type; @@ -517,18 +517,19 @@ static void append_output_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, s struct hlsl_ir_var *output; struct hlsl_ir_load *load; - if (!(output = add_semantic_var(ctx, var, vector_type, modifiers, semantic, semantic_index + i, true, loc))) + if (!(output = add_semantic_var(ctx, func, var, vector_type, + modifiers, semantic, semantic_index + i, true, loc))) return; if (type->class == HLSL_CLASS_MATRIX) { if (!(c = hlsl_new_uint_constant(ctx, i, &var->loc))) return; - hlsl_block_add_instr(block, c); + hlsl_block_add_instr(&func->body, c); if (!(load = hlsl_new_load_index(ctx, &rhs->src, c, &var->loc))) return; - hlsl_block_add_instr(block, &load->node); + hlsl_block_add_instr(&func->body, &load->node); } else { @@ -536,17 +537,17 @@ static void append_output_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, s if (!(load = hlsl_new_load_index(ctx, &rhs->src, NULL, &var->loc))) return; - hlsl_block_add_instr(block, &load->node); + hlsl_block_add_instr(&func->body, &load->node); } if (!(store = hlsl_new_simple_store(ctx, output, &load->node))) return; - hlsl_block_add_instr(block, store); + hlsl_block_add_instr(&func->body, store); } } -static void append_output_copy_recurse(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_load *rhs, - uint32_t modifiers, struct hlsl_semantic *semantic, uint32_t semantic_index) +static void append_output_copy_recurse(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func, + struct hlsl_ir_load *rhs, uint32_t modifiers, struct hlsl_semantic *semantic, uint32_t semantic_index) { struct vkd3d_shader_location *loc = &rhs->node.loc; struct hlsl_type *type = rhs->node.data_type; @@ -580,34 +581,34 @@ static void append_output_copy_recurse(struct hlsl_ctx *ctx, struct hlsl_block * if (!(c = hlsl_new_uint_constant(ctx, i, &var->loc))) return; - hlsl_block_add_instr(block, c); + hlsl_block_add_instr(&func->body, c); if (!(element_load = hlsl_new_load_index(ctx, &rhs->src, c, loc))) return; - hlsl_block_add_instr(block, &element_load->node); + hlsl_block_add_instr(&func->body, &element_load->node); - append_output_copy_recurse(ctx, block, element_load, modifiers, semantic, elem_semantic_index); + append_output_copy_recurse(ctx, func, element_load, modifiers, semantic, elem_semantic_index); } } else { - append_output_copy(ctx, block, rhs, modifiers, semantic, semantic_index); + append_output_copy(ctx, func, rhs, modifiers, semantic, semantic_index); } } /* Split outputs into two variables representing the temp and semantic * registers, and copy the former to the latter, so that reads from output * variables work. */ -static void append_output_var_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_var *var) +static void append_output_var_copy(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func, struct hlsl_ir_var *var) { struct hlsl_ir_load *load; /* This redundant load is expected to be deleted later by DCE. */ if (!(load = hlsl_new_var_load(ctx, var, &var->loc))) return; - hlsl_block_add_instr(block, &load->node); + hlsl_block_add_instr(&func->body, &load->node); - append_output_copy_recurse(ctx, block, load, var->storage_modifiers, &var->semantic, var->semantic.index); + append_output_copy_recurse(ctx, func, load, var->storage_modifiers, &var->semantic, var->semantic.index); } bool hlsl_transform_ir(struct hlsl_ctx *ctx, bool (*func)(struct hlsl_ctx *ctx, struct hlsl_ir_node *, void *), @@ -1655,11 +1656,16 @@ static bool copy_propagation_transform_load(struct hlsl_ctx *ctx, case HLSL_CLASS_MATRIX: case HLSL_CLASS_ARRAY: case HLSL_CLASS_STRUCT: - case HLSL_CLASS_CONSTANT_BUFFER: - /* FIXME: Actually we shouldn't even get here, but we don't split - * matrices yet. */ + /* We can't handle complex types here. + * They should have been already split anyway by earlier passes, + * but they may not have been deleted yet. We can't rely on DCE to + * solve that problem for us, since we may be called on a partial + * block, but DCE deletes dead stores, so it needs to be able to + * see the whole program. */ + case HLSL_CLASS_ERROR: return false; + case HLSL_CLASS_CONSTANT_BUFFER: case HLSL_CLASS_EFFECT_GROUP: case HLSL_CLASS_PASS: case HLSL_CLASS_TECHNIQUE: @@ -4050,12 +4056,14 @@ static bool dce(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) switch (instr->type) { case HLSL_IR_CONSTANT: + case HLSL_IR_COMPILE: case HLSL_IR_EXPR: case HLSL_IR_INDEX: case HLSL_IR_LOAD: case HLSL_IR_RESOURCE_LOAD: case HLSL_IR_STRING_CONSTANT: case HLSL_IR_SWIZZLE: + case HLSL_IR_SAMPLER_STATE: if (list_empty(&instr->uses)) { list_remove(&instr->entry); @@ -4106,7 +4114,7 @@ static void dump_function(struct rb_entry *entry, void *context) } } -static bool mark_indexable_vars(struct hlsl_ctx *ctx, struct hlsl_deref *deref, +static bool mark_indexable_var(struct hlsl_ctx *ctx, struct hlsl_deref *deref, struct hlsl_ir_node *instr) { if (!deref->rel_offset.node) @@ -4119,6 +4127,20 @@ static bool mark_indexable_vars(struct hlsl_ctx *ctx, struct hlsl_deref *deref, return true; } +void mark_indexable_vars(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) +{ + struct hlsl_scope *scope; + struct hlsl_ir_var *var; + + LIST_FOR_EACH_ENTRY(scope, &ctx->scopes, struct hlsl_scope, entry) + { + LIST_FOR_EACH_ENTRY(var, &scope->vars, struct hlsl_ir_var, scope_entry) + var->indexable = false; + } + + transform_derefs(ctx, mark_indexable_var, &entry_func->body); +} + static char get_regset_name(enum hlsl_regset regset) { switch (regset) @@ -4135,11 +4157,11 @@ static char get_regset_name(enum hlsl_regset regset) vkd3d_unreachable(); } -static void allocate_register_reservations(struct hlsl_ctx *ctx) +static void allocate_register_reservations(struct hlsl_ctx *ctx, struct list *extern_vars) { struct hlsl_ir_var *var; - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + LIST_FOR_EACH_ENTRY(var, extern_vars, struct hlsl_ir_var, extern_entry) { const struct hlsl_reg_reservation *reservation = &var->reg_reservation; unsigned int r; @@ -4337,11 +4359,23 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop case HLSL_IR_CONSTANT: case HLSL_IR_STRING_CONSTANT: break; + case HLSL_IR_COMPILE: + case HLSL_IR_SAMPLER_STATE: + /* These types are skipped as they are only relevant to effects. */ + break; } } } -static void compute_liveness(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) +static void init_var_liveness(struct hlsl_ir_var *var) +{ + if (var->is_uniform || var->is_input_semantic) + var->first_write = 1; + else if (var->is_output_semantic) + var->last_read = UINT_MAX; +} + +void compute_liveness(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) { struct hlsl_scope *scope; struct hlsl_ir_var *var; @@ -4355,16 +4389,29 @@ static void compute_liveness(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl } LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) - { - if (var->is_uniform || var->is_input_semantic) - var->first_write = 1; - else if (var->is_output_semantic) - var->last_read = UINT_MAX; - } + init_var_liveness(var); + + LIST_FOR_EACH_ENTRY(var, &entry_func->extern_vars, struct hlsl_ir_var, extern_entry) + init_var_liveness(var); compute_liveness_recurse(&entry_func->body, 0, 0); } +static void mark_vars_usage(struct hlsl_ctx *ctx) +{ + struct hlsl_scope *scope; + struct hlsl_ir_var *var; + + LIST_FOR_EACH_ENTRY(scope, &ctx->scopes, struct hlsl_scope, entry) + { + LIST_FOR_EACH_ENTRY(var, &scope->vars, struct hlsl_ir_var, scope_entry) + { + if (var->last_read) + var->is_read = true; + } + } +} + struct register_allocator { struct allocation @@ -4816,7 +4863,8 @@ static void allocate_temp_registers_recurse(struct hlsl_ctx *ctx, } } -static void record_constant(struct hlsl_ctx *ctx, unsigned int component_index, float f) +static void record_constant(struct hlsl_ctx *ctx, unsigned int component_index, float f, + const struct vkd3d_shader_location *loc) { struct hlsl_constant_defs *defs = &ctx->constant_defs; struct hlsl_constant_register *reg; @@ -4838,6 +4886,7 @@ static void record_constant(struct hlsl_ctx *ctx, unsigned int component_index, memset(reg, 0, sizeof(*reg)); reg->index = component_index / 4; reg->value.f[component_index % 4] = f; + reg->loc = *loc; } static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, @@ -4898,7 +4947,7 @@ static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, vkd3d_unreachable(); } - record_constant(ctx, constant->reg.id * 4 + x, f); + record_constant(ctx, constant->reg.id * 4 + x, f, &constant->node.loc); } break; @@ -4991,17 +5040,17 @@ static void allocate_sincos_const_registers(struct hlsl_ctx *ctx, struct hlsl_bl ctx->d3dsincosconst1 = allocate_numeric_registers_for_type(ctx, allocator, 1, UINT_MAX, type); TRACE("Allocated D3DSINCOSCONST1 to %s.\n", debug_register('c', ctx->d3dsincosconst1, type)); - record_constant(ctx, ctx->d3dsincosconst1.id * 4 + 0, -1.55009923e-06f); - record_constant(ctx, ctx->d3dsincosconst1.id * 4 + 1, -2.17013894e-05f); - record_constant(ctx, ctx->d3dsincosconst1.id * 4 + 2, 2.60416674e-03f); - record_constant(ctx, ctx->d3dsincosconst1.id * 4 + 3, 2.60416680e-04f); + record_constant(ctx, ctx->d3dsincosconst1.id * 4 + 0, -1.55009923e-06f, &instr->loc); + record_constant(ctx, ctx->d3dsincosconst1.id * 4 + 1, -2.17013894e-05f, &instr->loc); + record_constant(ctx, ctx->d3dsincosconst1.id * 4 + 2, 2.60416674e-03f, &instr->loc); + record_constant(ctx, ctx->d3dsincosconst1.id * 4 + 3, 2.60416680e-04f, &instr->loc); ctx->d3dsincosconst2 = allocate_numeric_registers_for_type(ctx, allocator, 1, UINT_MAX, type); TRACE("Allocated D3DSINCOSCONST2 to %s.\n", debug_register('c', ctx->d3dsincosconst2, type)); - record_constant(ctx, ctx->d3dsincosconst2.id * 4 + 0, -2.08333340e-02f); - record_constant(ctx, ctx->d3dsincosconst2.id * 4 + 1, -1.25000000e-01f); - record_constant(ctx, ctx->d3dsincosconst2.id * 4 + 2, 1.00000000e+00f); - record_constant(ctx, ctx->d3dsincosconst2.id * 4 + 3, 5.00000000e-01f); + record_constant(ctx, ctx->d3dsincosconst2.id * 4 + 0, -2.08333340e-02f, &instr->loc); + record_constant(ctx, ctx->d3dsincosconst2.id * 4 + 1, -1.25000000e-01f, &instr->loc); + record_constant(ctx, ctx->d3dsincosconst2.id * 4 + 2, 1.00000000e+00f, &instr->loc); + record_constant(ctx, ctx->d3dsincosconst2.id * 4 + 3, 5.00000000e-01f, &instr->loc); return; } @@ -5081,9 +5130,21 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi * index to all (simultaneously live) variables or intermediate values. Agnostic * as to how many registers are actually available for the current backend, and * does not handle constants. */ -static void allocate_temp_registers(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) +uint32_t allocate_temp_registers(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) { struct register_allocator allocator = {0}; + struct hlsl_scope *scope; + struct hlsl_ir_var *var; + + /* Reset variable temp register allocations. */ + LIST_FOR_EACH_ENTRY(scope, &ctx->scopes, struct hlsl_scope, entry) + { + LIST_FOR_EACH_ENTRY(var, &scope->vars, struct hlsl_ir_var, scope_entry) + { + if (!(var->is_input_semantic || var->is_output_semantic || var->is_uniform)) + memset(var->regs, 0, sizeof(var->regs)); + } + } /* ps_1_* outputs are special and go in temp register 0. */ if (ctx->profile->major_version == 1 && ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL) @@ -5092,8 +5153,7 @@ static void allocate_temp_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functio for (i = 0; i < entry_func->parameters.count; ++i) { - const struct hlsl_ir_var *var = entry_func->parameters.vars[i]; - + var = entry_func->parameters.vars[i]; if (var->is_output_semantic) { record_allocation(ctx, &allocator, 0, VKD3DSP_WRITEMASK_ALL, var->first_write, var->last_read); @@ -5103,11 +5163,13 @@ static void allocate_temp_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functio } allocate_temp_registers_recurse(ctx, &entry_func->body, &allocator); - ctx->temp_count = allocator.reg_count; vkd3d_free(allocator.allocations); + + return allocator.reg_count; } -static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var *var, unsigned int *counter, bool output) +static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var *var, + unsigned int *counter, bool output, bool is_patch_constant_func) { static const char *const shader_names[] = { @@ -5120,27 +5182,28 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var }; enum vkd3d_shader_register_type type; + struct vkd3d_shader_version version; uint32_t reg; bool builtin; VKD3D_ASSERT(var->semantic.name); - if (ctx->profile->major_version < 4) + version.major = ctx->profile->major_version; + version.minor = ctx->profile->minor_version; + version.type = ctx->profile->type; + + if (version.major < 4) { - struct vkd3d_shader_version version; - D3DDECLUSAGE usage; + enum vkd3d_decl_usage usage; uint32_t usage_idx; /* ps_1_* outputs are special and go in temp register 0. */ - if (ctx->profile->major_version == 1 && output && ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL) + if (version.major == 1 && output && version.type == VKD3D_SHADER_TYPE_PIXEL) return; - version.major = ctx->profile->major_version; - version.minor = ctx->profile->minor_version; - version.type = ctx->profile->type; - builtin = hlsl_sm1_register_from_semantic(&version, + builtin = sm1_register_from_semantic_name(&version, var->semantic.name, var->semantic.index, output, &type, ®); - if (!builtin && !hlsl_sm1_usage_from_semantic(var->semantic.name, var->semantic.index, &usage, &usage_idx)) + if (!builtin && !sm1_usage_from_semantic_name(var->semantic.name, var->semantic.index, &usage, &usage_idx)) { hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC, "Invalid semantic '%s'.", var->semantic.name); @@ -5152,22 +5215,24 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var } else { - D3D_NAME usage; + enum vkd3d_shader_sysval_semantic semantic; bool has_idx; - if (!hlsl_sm4_usage_from_semantic(ctx, &var->semantic, output, &usage)) + if (!sm4_sysval_semantic_from_semantic_name(&semantic, &version, ctx->semantic_compat_mapping, + ctx->domain, var->semantic.name, var->semantic.index, output, is_patch_constant_func)) { hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC, "Invalid semantic '%s'.", var->semantic.name); return; } - if ((builtin = hlsl_sm4_register_from_semantic(ctx, &var->semantic, output, &type, &has_idx))) + + if ((builtin = sm4_register_from_semantic_name(&version, var->semantic.name, output, &type, &has_idx))) reg = has_idx ? var->semantic.index : 0; } if (builtin) { - TRACE("%s %s semantic %s[%u] matches predefined register %#x[%u].\n", shader_names[ctx->profile->type], + TRACE("%s %s semantic %s[%u] matches predefined register %#x[%u].\n", shader_names[version.type], output ? "output" : "input", var->semantic.name, var->semantic.index, type, reg); } else @@ -5181,21 +5246,23 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var } } -static void allocate_semantic_registers(struct hlsl_ctx *ctx) +static void allocate_semantic_registers(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) { + bool is_patch_constant_func = entry_func == ctx->patch_constant_func; unsigned int input_counter = 0, output_counter = 0; struct hlsl_ir_var *var; - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + LIST_FOR_EACH_ENTRY(var, &entry_func->extern_vars, struct hlsl_ir_var, extern_entry) { if (var->is_input_semantic) - allocate_semantic_register(ctx, var, &input_counter, false); + allocate_semantic_register(ctx, var, &input_counter, false, is_patch_constant_func); if (var->is_output_semantic) - allocate_semantic_register(ctx, var, &output_counter, true); + allocate_semantic_register(ctx, var, &output_counter, true, is_patch_constant_func); } } -static const struct hlsl_buffer *get_reserved_buffer(struct hlsl_ctx *ctx, uint32_t space, uint32_t index) +static const struct hlsl_buffer *get_reserved_buffer(struct hlsl_ctx *ctx, + uint32_t space, uint32_t index, bool allocated_only) { const struct hlsl_buffer *buffer; @@ -5203,7 +5270,12 @@ static const struct hlsl_buffer *get_reserved_buffer(struct hlsl_ctx *ctx, uint3 { if (buffer->reservation.reg_type == 'b' && buffer->reservation.reg_space == space && buffer->reservation.reg_index == index) + { + if (allocated_only && !buffer->reg.allocated) + continue; + return buffer; + } } return NULL; } @@ -5260,7 +5332,7 @@ static void hlsl_calculate_buffer_offset(struct hlsl_ctx *ctx, struct hlsl_ir_va TRACE("Allocated buffer offset %u to %s.\n", var->buffer_offset, var->name); buffer->size = max(buffer->size, var->buffer_offset + var_reg_size); - if (var->last_read) + if (var->is_read) buffer->used_size = max(buffer->used_size, var->buffer_offset + var_reg_size); } @@ -5386,8 +5458,8 @@ static void allocate_buffers(struct hlsl_ctx *ctx) if (reservation->reg_type == 'b') { - const struct hlsl_buffer *reserved_buffer = get_reserved_buffer(ctx, - reservation->reg_space, reservation->reg_index); + const struct hlsl_buffer *allocated_buffer = get_reserved_buffer(ctx, + reservation->reg_space, reservation->reg_index, true); unsigned int max_index = get_max_cbuffer_reg_index(ctx); if (buffer->reservation.reg_index > max_index) @@ -5395,14 +5467,14 @@ static void allocate_buffers(struct hlsl_ctx *ctx) "Buffer reservation cb%u exceeds target's maximum (cb%u).", buffer->reservation.reg_index, max_index); - if (reserved_buffer && reserved_buffer != buffer) + if (allocated_buffer && allocated_buffer != buffer) { hlsl_error(ctx, &buffer->loc, VKD3D_SHADER_ERROR_HLSL_OVERLAPPING_RESERVATIONS, "Multiple buffers bound to space %u, index %u.", reservation->reg_space, reservation->reg_index); - hlsl_note(ctx, &reserved_buffer->loc, VKD3D_SHADER_LOG_ERROR, + hlsl_note(ctx, &allocated_buffer->loc, VKD3D_SHADER_LOG_ERROR, "Buffer %s is already bound to space %u, index %u.", - reserved_buffer->name, reservation->reg_space, reservation->reg_index); + allocated_buffer->name, reservation->reg_space, reservation->reg_index); } buffer->reg.space = reservation->reg_space; @@ -5419,12 +5491,12 @@ static void allocate_buffers(struct hlsl_ctx *ctx) else if (!reservation->reg_type) { unsigned int max_index = get_max_cbuffer_reg_index(ctx); - while (get_reserved_buffer(ctx, 0, index)) + while (get_reserved_buffer(ctx, 0, index, false)) ++index; if (index > max_index) hlsl_error(ctx, &buffer->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, - "Too many buffers allocated, target's maximum is %u.", max_index); + "Too many buffers reserved, target's maximum is %u.", max_index); buffer->reg.space = 0; buffer->reg.index = index; @@ -5491,15 +5563,15 @@ static const struct hlsl_ir_var *get_allocated_object(struct hlsl_ctx *ctx, enum return NULL; } -static void allocate_objects(struct hlsl_ctx *ctx, enum hlsl_regset regset) +static void allocate_objects(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func, enum hlsl_regset regset) { char regset_name = get_regset_name(regset); uint32_t min_index = 0, id = 0; struct hlsl_ir_var *var; - if (regset == HLSL_REGSET_UAVS) + if (regset == HLSL_REGSET_UAVS && ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL) { - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + LIST_FOR_EACH_ENTRY(var, &func->extern_vars, struct hlsl_ir_var, extern_entry) { if (var->semantic.name && (!ascii_strcasecmp(var->semantic.name, "color") || !ascii_strcasecmp(var->semantic.name, "sv_target"))) @@ -5786,6 +5858,26 @@ struct hlsl_reg hlsl_reg_from_deref(struct hlsl_ctx *ctx, const struct hlsl_dere return ret; } +static const char *get_string_argument_value(struct hlsl_ctx *ctx, const struct hlsl_attribute *attr, unsigned int i) +{ + const struct hlsl_ir_node *instr = attr->args[i].node; + const struct hlsl_type *type = instr->data_type; + + if (type->class != HLSL_CLASS_STRING) + { + struct vkd3d_string_buffer *string; + + if ((string = hlsl_type_to_string(ctx, type))) + hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Wrong type for the argument %u of [%s]: expected string, but got %s.", + i, attr->name, string->buffer); + hlsl_release_string_buffer(ctx, string); + return NULL; + } + + return hlsl_ir_string_constant(instr)->string; +} + static void parse_numthreads_attribute(struct hlsl_ctx *ctx, const struct hlsl_attribute *attr) { unsigned int i; @@ -5834,6 +5926,263 @@ static void parse_numthreads_attribute(struct hlsl_ctx *ctx, const struct hlsl_a } } +static void parse_domain_attribute(struct hlsl_ctx *ctx, const struct hlsl_attribute *attr) +{ + const char *value; + + if (attr->args_count != 1) + { + hlsl_error(ctx, &attr->loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, + "Expected 1 parameter for [domain] attribute, but got %u.", attr->args_count); + return; + } + + if (!(value = get_string_argument_value(ctx, attr, 0))) + return; + + if (!strcmp(value, "isoline")) + ctx->domain = VKD3D_TESSELLATOR_DOMAIN_LINE; + else if (!strcmp(value, "tri")) + ctx->domain = VKD3D_TESSELLATOR_DOMAIN_TRIANGLE; + else if (!strcmp(value, "quad")) + ctx->domain = VKD3D_TESSELLATOR_DOMAIN_QUAD; + else + hlsl_error(ctx, &attr->args[0].node->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_DOMAIN, + "Invalid tessellator domain \"%s\": expected \"isoline\", \"tri\", or \"quad\".", + value); +} + +static void parse_outputcontrolpoints_attribute(struct hlsl_ctx *ctx, const struct hlsl_attribute *attr) +{ + const struct hlsl_ir_node *instr; + const struct hlsl_type *type; + const struct hlsl_ir_constant *constant; + + if (attr->args_count != 1) + { + hlsl_error(ctx, &attr->loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, + "Expected 1 parameter for [outputcontrolpoints] attribute, but got %u.", attr->args_count); + return; + } + + instr = attr->args[0].node; + type = instr->data_type; + + if (type->class != HLSL_CLASS_SCALAR + || (type->e.numeric.type != HLSL_TYPE_INT && type->e.numeric.type != HLSL_TYPE_UINT)) + { + struct vkd3d_string_buffer *string; + + if ((string = hlsl_type_to_string(ctx, type))) + hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Wrong type for argument 0 of [outputcontrolpoints]: expected int or uint, but got %s.", + string->buffer); + hlsl_release_string_buffer(ctx, string); + return; + } + + if (instr->type != HLSL_IR_CONSTANT) + { + hlsl_fixme(ctx, &instr->loc, "Non-constant expression in [outputcontrolpoints] initializer."); + return; + } + constant = hlsl_ir_constant(instr); + + if ((type->e.numeric.type == HLSL_TYPE_INT && constant->value.u[0].i < 0) + || constant->value.u[0].u > 32) + hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_CONTROL_POINT_COUNT, + "Output control point count must be between 0 and 32."); + + ctx->output_control_point_count = constant->value.u[0].u; +} + +static void parse_outputtopology_attribute(struct hlsl_ctx *ctx, const struct hlsl_attribute *attr) +{ + const char *value; + + if (attr->args_count != 1) + { + hlsl_error(ctx, &attr->loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, + "Expected 1 parameter for [outputtopology] attribute, but got %u.", attr->args_count); + return; + } + + if (!(value = get_string_argument_value(ctx, attr, 0))) + return; + + if (!strcmp(value, "point")) + ctx->output_primitive = VKD3D_SHADER_TESSELLATOR_OUTPUT_POINT; + else if (!strcmp(value, "line")) + ctx->output_primitive = VKD3D_SHADER_TESSELLATOR_OUTPUT_LINE; + else if (!strcmp(value, "triangle_cw")) + ctx->output_primitive = VKD3D_SHADER_TESSELLATOR_OUTPUT_TRIANGLE_CW; + else if (!strcmp(value, "triangle_ccw")) + ctx->output_primitive = VKD3D_SHADER_TESSELLATOR_OUTPUT_TRIANGLE_CCW; + else + hlsl_error(ctx, &attr->args[0].node->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_OUTPUT_PRIMITIVE, + "Invalid tessellator output topology \"%s\": " + "expected \"point\", \"line\", \"triangle_cw\", or \"triangle_ccw\".", value); +} + +static void parse_partitioning_attribute(struct hlsl_ctx *ctx, const struct hlsl_attribute *attr) +{ + const char *value; + + if (attr->args_count != 1) + { + hlsl_error(ctx, &attr->loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, + "Expected 1 parameter for [partitioning] attribute, but got %u.", attr->args_count); + return; + } + + if (!(value = get_string_argument_value(ctx, attr, 0))) + return; + + if (!strcmp(value, "integer")) + ctx->partitioning = VKD3D_SHADER_TESSELLATOR_PARTITIONING_INTEGER; + else if (!strcmp(value, "pow2")) + ctx->partitioning = VKD3D_SHADER_TESSELLATOR_PARTITIONING_POW2; + else if (!strcmp(value, "fractional_even")) + ctx->partitioning = VKD3D_SHADER_TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN; + else if (!strcmp(value, "fractional_odd")) + ctx->partitioning = VKD3D_SHADER_TESSELLATOR_PARTITIONING_FRACTIONAL_ODD; + else + hlsl_error(ctx, &attr->args[0].node->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_PARTITIONING, + "Invalid tessellator partitioning \"%s\": " + "expected \"integer\", \"pow2\", \"fractional_even\", or \"fractional_odd\".", value); +} + +static void parse_patchconstantfunc_attribute(struct hlsl_ctx *ctx, const struct hlsl_attribute *attr) +{ + const char *name; + struct hlsl_ir_function *func; + struct hlsl_ir_function_decl *decl; + + if (attr->args_count != 1) + { + hlsl_error(ctx, &attr->loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, + "Expected 1 parameter for [patchconstantfunc] attribute, but got %u.", attr->args_count); + return; + } + + if (!(name = get_string_argument_value(ctx, attr, 0))) + return; + + ctx->patch_constant_func = NULL; + if ((func = hlsl_get_function(ctx, name))) + { + /* Pick the last overload with a body. */ + LIST_FOR_EACH_ENTRY_REV(decl, &func->overloads, struct hlsl_ir_function_decl, entry) + { + if (decl->has_body) + { + ctx->patch_constant_func = decl; + break; + } + } + } + + if (!ctx->patch_constant_func) + hlsl_error(ctx, &attr->loc, VKD3D_SHADER_ERROR_HLSL_NOT_DEFINED, + "Patch constant function \"%s\" is not defined.", name); +} + +static void parse_entry_function_attributes(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) +{ + const struct hlsl_profile_info *profile = ctx->profile; + unsigned int i; + + for (i = 0; i < entry_func->attr_count; ++i) + { + const struct hlsl_attribute *attr = entry_func->attrs[i]; + + if (!strcmp(attr->name, "numthreads") && profile->type == VKD3D_SHADER_TYPE_COMPUTE) + parse_numthreads_attribute(ctx, attr); + else if (!strcmp(attr->name, "domain") + && (profile->type == VKD3D_SHADER_TYPE_HULL || profile->type == VKD3D_SHADER_TYPE_DOMAIN)) + parse_domain_attribute(ctx, attr); + else if (!strcmp(attr->name, "outputcontrolpoints") && profile->type == VKD3D_SHADER_TYPE_HULL) + parse_outputcontrolpoints_attribute(ctx, attr); + else if (!strcmp(attr->name, "outputtopology") && profile->type == VKD3D_SHADER_TYPE_HULL) + parse_outputtopology_attribute(ctx, attr); + else if (!strcmp(attr->name, "partitioning") && profile->type == VKD3D_SHADER_TYPE_HULL) + parse_partitioning_attribute(ctx, attr); + else if (!strcmp(attr->name, "patchconstantfunc") && profile->type == VKD3D_SHADER_TYPE_HULL) + parse_patchconstantfunc_attribute(ctx, attr); + else if (!strcmp(attr->name, "earlydepthstencil") && profile->type == VKD3D_SHADER_TYPE_PIXEL) + entry_func->early_depth_test = true; + else + hlsl_warning(ctx, &entry_func->attrs[i]->loc, VKD3D_SHADER_WARNING_HLSL_UNKNOWN_ATTRIBUTE, + "Ignoring unknown attribute \"%s\".", entry_func->attrs[i]->name); + } +} + +static void validate_hull_shader_attributes(struct hlsl_ctx *ctx, const struct hlsl_ir_function_decl *entry_func) +{ + if (ctx->domain == VKD3D_TESSELLATOR_DOMAIN_INVALID) + { + hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_ATTRIBUTE, + "Entry point \"%s\" is missing a [domain] attribute.", entry_func->func->name); + } + + if (ctx->output_control_point_count == UINT_MAX) + { + hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_ATTRIBUTE, + "Entry point \"%s\" is missing a [outputcontrolpoints] attribute.", entry_func->func->name); + } + + if (!ctx->output_primitive) + { + hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_ATTRIBUTE, + "Entry point \"%s\" is missing a [outputtopology] attribute.", entry_func->func->name); + } + + if (!ctx->partitioning) + { + hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_ATTRIBUTE, + "Entry point \"%s\" is missing a [partitioning] attribute.", entry_func->func->name); + } + + if (!ctx->patch_constant_func) + { + hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_ATTRIBUTE, + "Entry point \"%s\" is missing a [patchconstantfunc] attribute.", entry_func->func->name); + } + else if (ctx->patch_constant_func == entry_func) + { + hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_RECURSIVE_CALL, + "Patch constant function cannot be the entry point function."); + /* Native returns E_NOTIMPL instead of E_FAIL here. */ + ctx->result = VKD3D_ERROR_NOT_IMPLEMENTED; + return; + } + + switch (ctx->domain) + { + case VKD3D_TESSELLATOR_DOMAIN_LINE: + if (ctx->output_primitive == VKD3D_SHADER_TESSELLATOR_OUTPUT_TRIANGLE_CW + || ctx->output_primitive == VKD3D_SHADER_TESSELLATOR_OUTPUT_TRIANGLE_CCW) + hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_OUTPUT_PRIMITIVE, + "Triangle output topologies are not available for isoline domains."); + break; + + case VKD3D_TESSELLATOR_DOMAIN_TRIANGLE: + if (ctx->output_primitive == VKD3D_SHADER_TESSELLATOR_OUTPUT_LINE) + hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_OUTPUT_PRIMITIVE, + "Line output topologies are not available for triangle domains."); + break; + + case VKD3D_TESSELLATOR_DOMAIN_QUAD: + if (ctx->output_primitive == VKD3D_SHADER_TESSELLATOR_OUTPUT_LINE) + hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_OUTPUT_PRIMITIVE, + "Line output topologies are not available for quad domains."); + break; + + default: + break; + } +} + static void remove_unreachable_code(struct hlsl_ctx *ctx, struct hlsl_block *body) { struct hlsl_ir_node *instr, *next; @@ -5890,12 +6239,16 @@ static void remove_unreachable_code(struct hlsl_ctx *ctx, struct hlsl_block *bod } } +void hlsl_lower_index_loads(struct hlsl_ctx *ctx, struct hlsl_block *body) +{ + lower_ir(ctx, lower_index_loads, body); +} + void hlsl_run_const_passes(struct hlsl_ctx *ctx, struct hlsl_block *body) { bool progress; lower_ir(ctx, lower_matrix_swizzles, body); - lower_ir(ctx, lower_index_loads, body); lower_ir(ctx, lower_broadcasts, body); while (hlsl_transform_ir(ctx, fold_redundant_casts, body, NULL)); @@ -5928,22 +6281,104 @@ void hlsl_run_const_passes(struct hlsl_ctx *ctx, struct hlsl_block *body) } while (progress); } -static void sm1_generate_vsir_signature_entry(struct hlsl_ctx *ctx, - struct vsir_program *program, bool output, struct hlsl_ir_var *var) +static void generate_vsir_signature_entry(struct hlsl_ctx *ctx, struct vsir_program *program, + struct shader_signature *signature, bool output, struct hlsl_ir_var *var) { enum vkd3d_shader_sysval_semantic sysval = VKD3D_SHADER_SV_NONE; + enum vkd3d_shader_component_type component_type; + unsigned int register_index, mask, use_mask; + const char *name = var->semantic.name; enum vkd3d_shader_register_type type; - struct shader_signature *signature; struct signature_element *element; - unsigned int register_index, mask; - if ((!output && !var->last_read) || (output && !var->first_write)) - return; + if (hlsl_version_ge(ctx, 4, 0)) + { + struct vkd3d_string_buffer *string; + bool has_idx, ret; - if (output) - signature = &program->output_signature; + ret = sm4_sysval_semantic_from_semantic_name(&sysval, &program->shader_version, + ctx->semantic_compat_mapping, ctx->domain, var->semantic.name, var->semantic.index, + output, signature == &program->patch_constant_signature); + VKD3D_ASSERT(ret); + if (sysval == ~0u) + return; + + if (sm4_register_from_semantic_name(&program->shader_version, var->semantic.name, output, &type, &has_idx)) + { + register_index = has_idx ? var->semantic.index : ~0u; + } + else + { + VKD3D_ASSERT(var->regs[HLSL_REGSET_NUMERIC].allocated); + register_index = var->regs[HLSL_REGSET_NUMERIC].id; + } + + /* NOTE: remember to change this to the actually allocated mask once + * we start optimizing interstage signatures. */ + mask = (1u << var->data_type->dimx) - 1; + use_mask = mask; /* FIXME: retrieve use mask accurately. */ + + switch (var->data_type->e.numeric.type) + { + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_HALF: + component_type = VKD3D_SHADER_COMPONENT_FLOAT; + break; + + case HLSL_TYPE_INT: + component_type = VKD3D_SHADER_COMPONENT_INT; + break; + + case HLSL_TYPE_BOOL: + case HLSL_TYPE_UINT: + component_type = VKD3D_SHADER_COMPONENT_UINT; + break; + + default: + if ((string = hlsl_type_to_string(ctx, var->data_type))) + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Invalid data type %s for semantic variable %s.", string->buffer, var->name); + hlsl_release_string_buffer(ctx, string); + component_type = VKD3D_SHADER_COMPONENT_VOID; + break; + } + + if (sysval == VKD3D_SHADER_SV_TARGET && !ascii_strcasecmp(name, "color")) + name = "SV_Target"; + else if (sysval == VKD3D_SHADER_SV_DEPTH && !ascii_strcasecmp(name, "depth")) + name ="SV_Depth"; + else if (sysval == VKD3D_SHADER_SV_POSITION && !ascii_strcasecmp(name, "position")) + name = "SV_Position"; + } else - signature = &program->input_signature; + { + if ((!output && !var->last_read) || (output && !var->first_write)) + return; + + if (!sm1_register_from_semantic_name(&program->shader_version, + var->semantic.name, var->semantic.index, output, &type, ®ister_index)) + { + enum vkd3d_decl_usage usage; + unsigned int usage_idx; + bool ret; + + register_index = var->regs[HLSL_REGSET_NUMERIC].id; + + ret = sm1_usage_from_semantic_name(var->semantic.name, var->semantic.index, &usage, &usage_idx); + VKD3D_ASSERT(ret); + /* With the exception of vertex POSITION output, none of these are + * system values. Pixel POSITION input is not equivalent to + * SV_Position; the closer equivalent is VPOS, which is not declared + * as a semantic. */ + if (program->shader_version.type == VKD3D_SHADER_TYPE_VERTEX + && output && usage == VKD3D_DECL_USAGE_POSITION) + sysval = VKD3D_SHADER_SV_POSITION; + } + + mask = (1 << var->data_type->dimx) - 1; + use_mask = mask; /* FIXME: retrieve use mask accurately. */ + component_type = VKD3D_SHADER_COMPONENT_FLOAT; + } if (!vkd3d_array_reserve((void **)&signature->elements, &signature->elements_capacity, signature->element_count + 1, sizeof(*signature->elements))) @@ -5952,73 +6387,986 @@ static void sm1_generate_vsir_signature_entry(struct hlsl_ctx *ctx, return; } element = &signature->elements[signature->element_count++]; + memset(element, 0, sizeof(*element)); - if (!hlsl_sm1_register_from_semantic(&program->shader_version, - var->semantic.name, var->semantic.index, output, &type, ®ister_index)) + if (!(element->semantic_name = vkd3d_strdup(name))) { - unsigned int usage_idx; - D3DDECLUSAGE usage; - bool ret; - - register_index = var->regs[HLSL_REGSET_NUMERIC].id; - - ret = hlsl_sm1_usage_from_semantic(var->semantic.name, var->semantic.index, &usage, &usage_idx); - VKD3D_ASSERT(ret); - /* With the exception of vertex POSITION output, none of these are - * system values. Pixel POSITION input is not equivalent to - * SV_Position; the closer equivalent is VPOS, which is not declared - * as a semantic. */ - if (program->shader_version.type == VKD3D_SHADER_TYPE_VERTEX - && output && usage == D3DDECLUSAGE_POSITION) - sysval = VKD3D_SHADER_SV_POSITION; - } - mask = (1 << var->data_type->dimx) - 1; + --signature->element_count; + ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; + return; + } + element->semantic_index = var->semantic.index; + element->sysval_semantic = sysval; + element->component_type = component_type; + element->register_index = register_index; + element->target_location = register_index; + element->register_count = 1; + element->mask = mask; + element->used_mask = use_mask; + if (program->shader_version.type == VKD3D_SHADER_TYPE_PIXEL && !output) + element->interpolation_mode = VKD3DSIM_LINEAR; +} + +static void generate_vsir_signature(struct hlsl_ctx *ctx, + struct vsir_program *program, struct hlsl_ir_function_decl *func) +{ + struct hlsl_ir_var *var; + + LIST_FOR_EACH_ENTRY(var, &func->extern_vars, struct hlsl_ir_var, extern_entry) + { + if (func == ctx->patch_constant_func) + { + generate_vsir_signature_entry(ctx, program, + &program->patch_constant_signature, var->is_output_semantic, var); + } + else + { + if (var->is_input_semantic) + generate_vsir_signature_entry(ctx, program, &program->input_signature, false, var); + if (var->is_output_semantic) + generate_vsir_signature_entry(ctx, program, &program->output_signature, true, var); + } + } +} + +static uint32_t sm1_generate_vsir_get_src_swizzle(uint32_t src_writemask, uint32_t dst_writemask) +{ + uint32_t swizzle; + + swizzle = hlsl_swizzle_from_writemask(src_writemask); + swizzle = hlsl_map_swizzle(swizzle, dst_writemask); + swizzle = vsir_swizzle_from_hlsl(swizzle); + return swizzle; +} + +static void sm1_generate_vsir_constant_defs(struct hlsl_ctx *ctx, struct vsir_program *program, + struct hlsl_block *block) +{ + struct vkd3d_shader_instruction_array *instructions = &program->instructions; + struct vkd3d_shader_dst_param *dst_param; + struct vkd3d_shader_src_param *src_param; + struct vkd3d_shader_instruction *ins; + unsigned int i, x; + + for (i = 0; i < ctx->constant_defs.count; ++i) + { + const struct hlsl_constant_register *constant_reg = &ctx->constant_defs.regs[i]; + + if (!shader_instruction_array_reserve(instructions, instructions->count + 1)) + { + ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; + return; + } + + ins = &instructions->elements[instructions->count]; + if (!vsir_instruction_init_with_params(program, ins, &constant_reg->loc, VKD3DSIH_DEF, 1, 1)) + { + ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; + return; + } + ++instructions->count; + + dst_param = &ins->dst[0]; + vsir_register_init(&dst_param->reg, VKD3DSPR_CONST, VKD3D_DATA_FLOAT, 1); + ins->dst[0].reg.dimension = VSIR_DIMENSION_VEC4; + ins->dst[0].reg.idx[0].offset = constant_reg->index; + ins->dst[0].write_mask = VKD3DSP_WRITEMASK_ALL; + + src_param = &ins->src[0]; + vsir_register_init(&src_param->reg, VKD3DSPR_IMMCONST, VKD3D_DATA_FLOAT, 0); + src_param->reg.type = VKD3DSPR_IMMCONST; + src_param->reg.precision = VKD3D_SHADER_REGISTER_PRECISION_DEFAULT; + src_param->reg.non_uniform = false; + src_param->reg.data_type = VKD3D_DATA_FLOAT; + src_param->reg.dimension = VSIR_DIMENSION_VEC4; + for (x = 0; x < 4; ++x) + src_param->reg.u.immconst_f32[x] = constant_reg->value.f[x]; + src_param->swizzle = VKD3D_SHADER_NO_SWIZZLE; + } +} + +static void sm1_generate_vsir_sampler_dcls(struct hlsl_ctx *ctx, + struct vsir_program *program, struct hlsl_block *block) +{ + struct vkd3d_shader_instruction_array *instructions = &program->instructions; + enum vkd3d_shader_resource_type resource_type; + struct vkd3d_shader_register_range *range; + struct vkd3d_shader_dst_param *dst_param; + struct vkd3d_shader_semantic *semantic; + struct vkd3d_shader_instruction *ins; + enum hlsl_sampler_dim sampler_dim; + struct hlsl_ir_var *var; + unsigned int i, count; + + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + if (!var->regs[HLSL_REGSET_SAMPLERS].allocated) + continue; + + count = var->bind_count[HLSL_REGSET_SAMPLERS]; + for (i = 0; i < count; ++i) + { + if (var->objects_usage[HLSL_REGSET_SAMPLERS][i].used) + { + sampler_dim = var->objects_usage[HLSL_REGSET_SAMPLERS][i].sampler_dim; + + switch (sampler_dim) + { + case HLSL_SAMPLER_DIM_2D: + resource_type = VKD3D_SHADER_RESOURCE_TEXTURE_2D; + break; + + case HLSL_SAMPLER_DIM_CUBE: + resource_type = VKD3D_SHADER_RESOURCE_TEXTURE_CUBE; + break; + + case HLSL_SAMPLER_DIM_3D: + resource_type = VKD3D_SHADER_RESOURCE_TEXTURE_3D; + break; + + case HLSL_SAMPLER_DIM_GENERIC: + /* These can appear in sm4-style combined sample instructions. */ + hlsl_fixme(ctx, &var->loc, "Generic samplers need to be lowered."); + continue; + + default: + vkd3d_unreachable(); + break; + } + + if (!shader_instruction_array_reserve(instructions, instructions->count + 1)) + { + ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; + return; + } + + ins = &instructions->elements[instructions->count]; + if (!vsir_instruction_init_with_params(program, ins, &var->loc, VKD3DSIH_DCL, 0, 0)) + { + ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; + return; + } + ++instructions->count; + + semantic = &ins->declaration.semantic; + semantic->resource_type = resource_type; + + dst_param = &semantic->resource.reg; + vsir_register_init(&dst_param->reg, VKD3DSPR_SAMPLER, VKD3D_DATA_FLOAT, 1); + dst_param->reg.dimension = VSIR_DIMENSION_NONE; + dst_param->reg.idx[0].offset = var->regs[HLSL_REGSET_SAMPLERS].index + i; + dst_param->write_mask = 0; + range = &semantic->resource.range; + range->space = 0; + range->first = range->last = dst_param->reg.idx[0].offset; + } + } + } +} + +static struct vkd3d_shader_instruction *generate_vsir_add_program_instruction( + struct hlsl_ctx *ctx, struct vsir_program *program, + const struct vkd3d_shader_location *loc, enum vkd3d_shader_opcode opcode, + unsigned int dst_count, unsigned int src_count) +{ + struct vkd3d_shader_instruction_array *instructions = &program->instructions; + struct vkd3d_shader_instruction *ins; + + if (!shader_instruction_array_reserve(instructions, instructions->count + 1)) + { + ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; + return NULL; + } + ins = &instructions->elements[instructions->count]; + if (!vsir_instruction_init_with_params(program, ins, loc, opcode, dst_count, src_count)) + { + ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; + return NULL; + } + ++instructions->count; + return ins; +} + +static void sm1_generate_vsir_instr_constant(struct hlsl_ctx *ctx, + struct vsir_program *program, struct hlsl_ir_constant *constant) +{ + struct hlsl_ir_node *instr = &constant->node; + struct vkd3d_shader_dst_param *dst_param; + struct vkd3d_shader_src_param *src_param; + struct vkd3d_shader_instruction *ins; + + VKD3D_ASSERT(instr->reg.allocated); + VKD3D_ASSERT(constant->reg.allocated); + + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_MOV, 1, 1))) + return; + + src_param = &ins->src[0]; + vsir_register_init(&src_param->reg, VKD3DSPR_CONST, VKD3D_DATA_FLOAT, 1); + src_param->reg.idx[0].offset = constant->reg.id; + src_param->swizzle = sm1_generate_vsir_get_src_swizzle(constant->reg.writemask, instr->reg.writemask); + + dst_param = &ins->dst[0]; + vsir_register_init(&dst_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); + dst_param->reg.idx[0].offset = instr->reg.id; + dst_param->write_mask = instr->reg.writemask; +} + +/* Translate ops that can be mapped to a single vsir instruction with only one dst register. */ +static void sm1_generate_vsir_instr_expr_single_instr_op(struct hlsl_ctx *ctx, struct vsir_program *program, + struct hlsl_ir_expr *expr, enum vkd3d_shader_opcode opcode, uint32_t src_mod, uint32_t dst_mod, + bool map_src_swizzles) +{ + struct hlsl_ir_node *instr = &expr->node; + struct vkd3d_shader_dst_param *dst_param; + struct vkd3d_shader_src_param *src_param; + struct vkd3d_shader_instruction *ins; + unsigned int i, src_count = 0; + + VKD3D_ASSERT(instr->reg.allocated); + + for (i = 0; i < HLSL_MAX_OPERANDS; ++i) + { + if (expr->operands[i].node) + src_count = i + 1; + } + VKD3D_ASSERT(!src_mod || src_count == 1); + + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, opcode, 1, src_count))) + return; + + dst_param = &ins->dst[0]; + vsir_register_init(&dst_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); + dst_param->reg.idx[0].offset = instr->reg.id; + dst_param->write_mask = instr->reg.writemask; + dst_param->modifiers = dst_mod; + + for (i = 0; i < src_count; ++i) + { + struct hlsl_ir_node *operand = expr->operands[i].node; + + src_param = &ins->src[i]; + vsir_register_init(&src_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); + src_param->reg.idx[0].offset = operand->reg.id; + src_param->swizzle = sm1_generate_vsir_get_src_swizzle(operand->reg.writemask, + map_src_swizzles ? dst_param->write_mask : VKD3DSP_WRITEMASK_ALL); + src_param->modifiers = src_mod; + } +} + +/* Translate ops that have 1 src and need one instruction for each component in + * the d3dbc backend. */ +static void sm1_generate_vsir_instr_expr_per_component_instr_op(struct hlsl_ctx *ctx, + struct vsir_program *program, struct hlsl_ir_expr *expr, enum vkd3d_shader_opcode opcode) +{ + struct hlsl_ir_node *operand = expr->operands[0].node; + struct hlsl_ir_node *instr = &expr->node; + struct vkd3d_shader_dst_param *dst_param; + struct vkd3d_shader_src_param *src_param; + struct vkd3d_shader_instruction *ins; + uint32_t src_swizzle; + unsigned int i, c; + + VKD3D_ASSERT(instr->reg.allocated); + VKD3D_ASSERT(operand); + + src_swizzle = sm1_generate_vsir_get_src_swizzle(operand->reg.writemask, instr->reg.writemask); + for (i = 0; i < 4; ++i) + { + if (instr->reg.writemask & (1u << i)) + { + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, opcode, 1, 1))) + return; + + dst_param = &ins->dst[0]; + vsir_register_init(&dst_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); + dst_param->reg.idx[0].offset = instr->reg.id; + dst_param->write_mask = 1u << i; + + src_param = &ins->src[0]; + vsir_register_init(&src_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); + src_param->reg.idx[0].offset = operand->reg.id; + c = vsir_swizzle_get_component(src_swizzle, i); + src_param->swizzle = vsir_swizzle_from_writemask(1u << c); + } + } +} + +static void sm1_generate_vsir_instr_expr_sincos(struct hlsl_ctx *ctx, struct vsir_program *program, + struct hlsl_ir_expr *expr) +{ + struct hlsl_ir_node *operand = expr->operands[0].node; + struct hlsl_ir_node *instr = &expr->node; + struct vkd3d_shader_dst_param *dst_param; + struct vkd3d_shader_src_param *src_param; + struct vkd3d_shader_instruction *ins; + unsigned int src_count = 0; + + VKD3D_ASSERT(instr->reg.allocated); + src_count = (ctx->profile->major_version < 3) ? 3 : 1; + + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_SINCOS, 1, src_count))) + return; + + dst_param = &ins->dst[0]; + vsir_register_init(&dst_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); + dst_param->reg.idx[0].offset = instr->reg.id; + dst_param->write_mask = instr->reg.writemask; + + src_param = &ins->src[0]; + vsir_register_init(&src_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); + src_param->reg.idx[0].offset = operand->reg.id; + src_param->swizzle = sm1_generate_vsir_get_src_swizzle(operand->reg.writemask, VKD3DSP_WRITEMASK_ALL); + + if (ctx->profile->major_version < 3) + { + src_param = &ins->src[1]; + vsir_register_init(&src_param->reg, VKD3DSPR_CONST, VKD3D_DATA_FLOAT, 1); + src_param->reg.idx[0].offset = ctx->d3dsincosconst1.id; + src_param->swizzle = VKD3D_SHADER_NO_SWIZZLE; + + src_param = &ins->src[1]; + vsir_register_init(&src_param->reg, VKD3DSPR_CONST, VKD3D_DATA_FLOAT, 1); + src_param->reg.idx[0].offset = ctx->d3dsincosconst2.id; + src_param->swizzle = VKD3D_SHADER_NO_SWIZZLE; + } +} + +static bool sm1_generate_vsir_instr_expr_cast(struct hlsl_ctx *ctx, + struct vsir_program *program, struct hlsl_ir_expr *expr) +{ + const struct hlsl_type *src_type, *dst_type; + const struct hlsl_ir_node *arg1, *instr; + + arg1 = expr->operands[0].node; + src_type = arg1->data_type; + instr = &expr->node; + dst_type = instr->data_type; + + /* Narrowing casts were already lowered. */ + VKD3D_ASSERT(src_type->dimx == dst_type->dimx); + + switch (dst_type->e.numeric.type) + { + case HLSL_TYPE_HALF: + case HLSL_TYPE_FLOAT: + switch (src_type->e.numeric.type) + { + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: + case HLSL_TYPE_BOOL: + /* Integrals are internally represented as floats, so no change is necessary.*/ + case HLSL_TYPE_HALF: + case HLSL_TYPE_FLOAT: + sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, 0, true); + return true; + + case HLSL_TYPE_DOUBLE: + if (ctx->double_as_float_alias) + { + sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, 0, true); + return true; + } + hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "The 'double' type is not supported for the %s profile.", ctx->profile->name); + break; + + default: + vkd3d_unreachable(); + } + break; + + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: + switch(src_type->e.numeric.type) + { + case HLSL_TYPE_HALF: + case HLSL_TYPE_FLOAT: + /* A compilation pass turns these into FLOOR+REINTERPRET, so we should not + * reach this case unless we are missing something. */ + hlsl_fixme(ctx, &instr->loc, "Unlowered SM1 cast from float to integer."); + break; + + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: + sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, 0, true); + return true; + + case HLSL_TYPE_BOOL: + hlsl_fixme(ctx, &instr->loc, "SM1 cast from bool to integer."); + break; + + case HLSL_TYPE_DOUBLE: + hlsl_fixme(ctx, &instr->loc, "SM1 cast from double to integer."); + break; + + default: + vkd3d_unreachable(); + } + break; + + case HLSL_TYPE_DOUBLE: + switch (src_type->e.numeric.type) + { + case HLSL_TYPE_FLOAT: + if (ctx->double_as_float_alias) + { + sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, 0, true); + return true; + } + hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "The 'double' type is not supported for the %s profile.", ctx->profile->name); + break; + + default: + hlsl_fixme(ctx, &instr->loc, "SM1 cast to double."); + break; + } + break; + + case HLSL_TYPE_BOOL: + /* Casts to bool should have already been lowered. */ + default: + hlsl_fixme(ctx, &expr->node.loc, "SM1 cast from %s to %s.", + debug_hlsl_type(ctx, src_type), debug_hlsl_type(ctx, dst_type)); + break; + } + + return false; +} + +static bool sm1_generate_vsir_instr_expr(struct hlsl_ctx *ctx, struct vsir_program *program, + struct hlsl_ir_expr *expr) +{ + struct hlsl_ir_node *instr = &expr->node; + + if (expr->op != HLSL_OP1_REINTERPRET && expr->op != HLSL_OP1_CAST + && instr->data_type->e.numeric.type != HLSL_TYPE_FLOAT) + { + /* These need to be lowered. */ + hlsl_fixme(ctx, &instr->loc, "SM1 non-float expression."); + return false; + } + + switch (expr->op) + { + case HLSL_OP1_ABS: + sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_ABS, 0, 0, true); + break; + + case HLSL_OP1_CAST: + return sm1_generate_vsir_instr_expr_cast(ctx, program, expr); + + case HLSL_OP1_COS_REDUCED: + VKD3D_ASSERT(expr->node.reg.writemask == VKD3DSP_WRITEMASK_0); + sm1_generate_vsir_instr_expr_sincos(ctx, program, expr); + break; + + case HLSL_OP1_DSX: + sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DSX, 0, 0, true); + break; + + case HLSL_OP1_DSY: + sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DSY, 0, 0, true); + break; + + case HLSL_OP1_EXP2: + sm1_generate_vsir_instr_expr_per_component_instr_op(ctx, program, expr, VKD3DSIH_EXP); + break; + + case HLSL_OP1_LOG2: + sm1_generate_vsir_instr_expr_per_component_instr_op(ctx, program, expr, VKD3DSIH_LOG); + break; + + case HLSL_OP1_NEG: + sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, VKD3DSPSM_NEG, 0, true); + break; + + case HLSL_OP1_RCP: + sm1_generate_vsir_instr_expr_per_component_instr_op(ctx, program, expr, VKD3DSIH_RCP); + break; + + case HLSL_OP1_REINTERPRET: + sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, 0, true); + break; + + case HLSL_OP1_RSQ: + sm1_generate_vsir_instr_expr_per_component_instr_op(ctx, program, expr, VKD3DSIH_RSQ); + break; + + case HLSL_OP1_SAT: + sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, VKD3DSPDM_SATURATE, true); + break; + + case HLSL_OP1_SIN_REDUCED: + VKD3D_ASSERT(expr->node.reg.writemask == VKD3DSP_WRITEMASK_1); + sm1_generate_vsir_instr_expr_sincos(ctx, program, expr); + break; + + case HLSL_OP2_ADD: + sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_ADD, 0, 0, true); + break; + + case HLSL_OP2_DOT: + switch (expr->operands[0].node->data_type->dimx) + { + case 3: + sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DP3, 0, 0, false); + break; + + case 4: + sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DP4, 0, 0, false); + break; + + default: + vkd3d_unreachable(); + return false; + } + break; + + case HLSL_OP2_MAX: + sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MAX, 0, 0, true); + break; + + case HLSL_OP2_MIN: + sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MIN, 0, 0, true); + break; + + case HLSL_OP2_MUL: + sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MUL, 0, 0, true); + break; + + case HLSL_OP1_FRACT: + sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_FRC, 0, 0, true); + break; + + case HLSL_OP2_LOGIC_AND: + sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MIN, 0, 0, true); + break; + + case HLSL_OP2_LOGIC_OR: + sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MAX, 0, 0, true); + break; + + case HLSL_OP2_SLT: + sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_SLT, 0, 0, true); + break; + + case HLSL_OP3_CMP: + sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_CMP, 0, 0, true); + break; + + case HLSL_OP3_DP2ADD: + sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DP2ADD, 0, 0, false); + break; + + case HLSL_OP3_MAD: + sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MAD, 0, 0, true); + break; + + default: + hlsl_fixme(ctx, &instr->loc, "SM1 \"%s\" expression.", debug_hlsl_expr_op(expr->op)); + return false; + } + + return true; +} + +static void sm1_generate_vsir_init_dst_param_from_deref(struct hlsl_ctx *ctx, + struct vkd3d_shader_dst_param *dst_param, struct hlsl_deref *deref, + const struct vkd3d_shader_location *loc, unsigned int writemask) +{ + enum vkd3d_shader_register_type type = VKD3DSPR_TEMP; + struct vkd3d_shader_version version; + uint32_t register_index; + struct hlsl_reg reg; + + reg = hlsl_reg_from_deref(ctx, deref); + register_index = reg.id; + writemask = hlsl_combine_writemasks(reg.writemask, writemask); + + if (deref->var->is_output_semantic) + { + version.major = ctx->profile->major_version; + version.minor = ctx->profile->minor_version; + version.type = ctx->profile->type; + + if (version.type == VKD3D_SHADER_TYPE_PIXEL && version.major == 1) + { + type = VKD3DSPR_TEMP; + register_index = 0; + } + else if (!sm1_register_from_semantic_name(&version, deref->var->semantic.name, + deref->var->semantic.index, true, &type, ®ister_index)) + { + VKD3D_ASSERT(reg.allocated); + type = VKD3DSPR_OUTPUT; + register_index = reg.id; + } + else + writemask = (1u << deref->var->data_type->dimx) - 1; + } + else + VKD3D_ASSERT(reg.allocated); + + vsir_register_init(&dst_param->reg, type, VKD3D_DATA_FLOAT, 1); + dst_param->write_mask = writemask; + dst_param->reg.idx[0].offset = register_index; + + if (deref->rel_offset.node) + hlsl_fixme(ctx, loc, "Translate relative addressing on dst register for vsir."); +} + +static void sm1_generate_vsir_init_src_param_from_deref(struct hlsl_ctx *ctx, + struct vkd3d_shader_src_param *src_param, struct hlsl_deref *deref, + unsigned int dst_writemask, const struct vkd3d_shader_location *loc) +{ + enum vkd3d_shader_register_type type = VKD3DSPR_TEMP; + struct vkd3d_shader_version version; + uint32_t register_index; + unsigned int writemask; + struct hlsl_reg reg; - memset(element, 0, sizeof(*element)); - if (!(element->semantic_name = vkd3d_strdup(var->semantic.name))) + if (hlsl_type_is_resource(deref->var->data_type)) { - --signature->element_count; - ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; + unsigned int sampler_offset; + + type = VKD3DSPR_COMBINED_SAMPLER; + + sampler_offset = hlsl_offset_from_deref_safe(ctx, deref); + register_index = deref->var->regs[HLSL_REGSET_SAMPLERS].index + sampler_offset; + writemask = VKD3DSP_WRITEMASK_ALL; + } + else if (deref->var->is_uniform) + { + type = VKD3DSPR_CONST; + + reg = hlsl_reg_from_deref(ctx, deref); + register_index = reg.id; + writemask = reg.writemask; + VKD3D_ASSERT(reg.allocated); + } + else if (deref->var->is_input_semantic) + { + version.major = ctx->profile->major_version; + version.minor = ctx->profile->minor_version; + version.type = ctx->profile->type; + if (sm1_register_from_semantic_name(&version, deref->var->semantic.name, + deref->var->semantic.index, false, &type, ®ister_index)) + { + writemask = (1 << deref->var->data_type->dimx) - 1; + } + else + { + type = VKD3DSPR_INPUT; + + reg = hlsl_reg_from_deref(ctx, deref); + register_index = reg.id; + writemask = reg.writemask; + VKD3D_ASSERT(reg.allocated); + } + } + else + { + type = VKD3DSPR_TEMP; + + reg = hlsl_reg_from_deref(ctx, deref); + register_index = reg.id; + writemask = reg.writemask; + } + + vsir_register_init(&src_param->reg, type, VKD3D_DATA_FLOAT, 1); + src_param->reg.idx[0].offset = register_index; + src_param->swizzle = sm1_generate_vsir_get_src_swizzle(writemask, dst_writemask); + + if (deref->rel_offset.node) + hlsl_fixme(ctx, loc, "Translate relative addressing on src register for vsir."); +} + +static void sm1_generate_vsir_instr_load(struct hlsl_ctx *ctx, struct vsir_program *program, + struct hlsl_ir_load *load) +{ + struct hlsl_ir_node *instr = &load->node; + struct vkd3d_shader_dst_param *dst_param; + struct vkd3d_shader_instruction *ins; + + VKD3D_ASSERT(instr->reg.allocated); + + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_MOV, 1, 1))) return; + + dst_param = &ins->dst[0]; + vsir_register_init(&dst_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); + dst_param->reg.idx[0].offset = instr->reg.id; + dst_param->write_mask = instr->reg.writemask; + + sm1_generate_vsir_init_src_param_from_deref(ctx, &ins->src[0], &load->src, dst_param->write_mask, + &ins->location); +} + +static void sm1_generate_vsir_instr_resource_load(struct hlsl_ctx *ctx, + struct vsir_program *program, struct hlsl_ir_resource_load *load) +{ + struct hlsl_ir_node *coords = load->coords.node; + struct hlsl_ir_node *ddx = load->ddx.node; + struct hlsl_ir_node *ddy = load->ddy.node; + struct hlsl_ir_node *instr = &load->node; + struct vkd3d_shader_dst_param *dst_param; + struct vkd3d_shader_src_param *src_param; + struct vkd3d_shader_instruction *ins; + enum vkd3d_shader_opcode opcode; + unsigned int src_count = 2; + uint32_t flags = 0; + + VKD3D_ASSERT(instr->reg.allocated); + + switch (load->load_type) + { + case HLSL_RESOURCE_SAMPLE: + opcode = VKD3DSIH_TEX; + break; + + case HLSL_RESOURCE_SAMPLE_PROJ: + opcode = VKD3DSIH_TEX; + flags |= VKD3DSI_TEXLD_PROJECT; + break; + + case HLSL_RESOURCE_SAMPLE_LOD_BIAS: + opcode = VKD3DSIH_TEX; + flags |= VKD3DSI_TEXLD_BIAS; + break; + + case HLSL_RESOURCE_SAMPLE_GRAD: + opcode = VKD3DSIH_TEXLDD; + src_count += 2; + break; + + default: + hlsl_fixme(ctx, &instr->loc, "Resource load type %u.", load->load_type); + return; + } + + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, opcode, 1, src_count))) + return; + ins->flags = flags; + + dst_param = &ins->dst[0]; + vsir_register_init(&dst_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); + dst_param->reg.idx[0].offset = instr->reg.id; + dst_param->write_mask = instr->reg.writemask; + + src_param = &ins->src[0]; + vsir_register_init(&src_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); + src_param->reg.idx[0].offset = coords->reg.id; + src_param->swizzle = sm1_generate_vsir_get_src_swizzle(coords->reg.writemask, VKD3DSP_WRITEMASK_ALL); + + sm1_generate_vsir_init_src_param_from_deref(ctx, &ins->src[1], &load->resource, + VKD3DSP_WRITEMASK_ALL, &ins->location); + + if (load->load_type == HLSL_RESOURCE_SAMPLE_GRAD) + { + src_param = &ins->src[2]; + vsir_register_init(&src_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); + src_param->reg.idx[0].offset = ddx->reg.id; + src_param->swizzle = sm1_generate_vsir_get_src_swizzle(ddx->reg.writemask, VKD3DSP_WRITEMASK_ALL); + + src_param = &ins->src[3]; + vsir_register_init(&src_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); + src_param->reg.idx[0].offset = ddy->reg.id; + src_param->swizzle = sm1_generate_vsir_get_src_swizzle(ddy->reg.writemask, VKD3DSP_WRITEMASK_ALL); } - element->semantic_index = var->semantic.index; - element->sysval_semantic = sysval; - element->component_type = VKD3D_SHADER_COMPONENT_FLOAT; - element->register_index = register_index; - element->target_location = register_index; - element->register_count = 1; - element->mask = mask; - element->used_mask = mask; - if (program->shader_version.type == VKD3D_SHADER_TYPE_PIXEL && !output) - element->interpolation_mode = VKD3DSIM_LINEAR; } -static void sm1_generate_vsir_signature(struct hlsl_ctx *ctx, struct vsir_program *program) +static void sm1_generate_vsir_instr_swizzle(struct hlsl_ctx *ctx, struct vsir_program *program, + struct hlsl_ir_swizzle *swizzle_instr) { - struct hlsl_ir_var *var; + struct hlsl_ir_node *instr = &swizzle_instr->node, *val = swizzle_instr->val.node; + struct vkd3d_shader_dst_param *dst_param; + struct vkd3d_shader_src_param *src_param; + struct vkd3d_shader_instruction *ins; + uint32_t swizzle; - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + VKD3D_ASSERT(instr->reg.allocated); + + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_MOV, 1, 1))) + return; + + dst_param = &ins->dst[0]; + vsir_register_init(&dst_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); + dst_param->reg.idx[0].offset = instr->reg.id; + dst_param->write_mask = instr->reg.writemask; + + swizzle = hlsl_swizzle_from_writemask(val->reg.writemask); + swizzle = hlsl_combine_swizzles(swizzle, swizzle_instr->swizzle, instr->data_type->dimx); + swizzle = hlsl_map_swizzle(swizzle, ins->dst[0].write_mask); + swizzle = vsir_swizzle_from_hlsl(swizzle); + + src_param = &ins->src[0]; + vsir_register_init(&src_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); + src_param->reg.idx[0].offset = val->reg.id; + src_param->swizzle = swizzle; +} + +static void sm1_generate_vsir_instr_store(struct hlsl_ctx *ctx, struct vsir_program *program, + struct hlsl_ir_store *store) +{ + struct hlsl_ir_node *rhs = store->rhs.node; + struct hlsl_ir_node *instr = &store->node; + struct vkd3d_shader_instruction *ins; + struct vkd3d_shader_src_param *src_param; + + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_MOV, 1, 1))) + return; + + sm1_generate_vsir_init_dst_param_from_deref(ctx, &ins->dst[0], &store->lhs, &ins->location, store->writemask); + + src_param = &ins->src[0]; + vsir_register_init(&src_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); + src_param->reg.idx[0].offset = rhs->reg.id; + src_param->swizzle = sm1_generate_vsir_get_src_swizzle(rhs->reg.writemask, ins->dst[0].write_mask); +} + +static void sm1_generate_vsir_instr_jump(struct hlsl_ctx *ctx, + struct vsir_program *program, struct hlsl_ir_jump *jump) +{ + struct hlsl_ir_node *condition = jump->condition.node; + struct hlsl_ir_node *instr = &jump->node; + struct vkd3d_shader_dst_param *dst_param; + struct vkd3d_shader_instruction *ins; + + if (jump->type == HLSL_IR_JUMP_DISCARD_NEG) { - if (var->is_input_semantic) - sm1_generate_vsir_signature_entry(ctx, program, false, var); - if (var->is_output_semantic) - sm1_generate_vsir_signature_entry(ctx, program, true, var); + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_TEXKILL, 1, 0))) + return; + + dst_param = &ins->dst[0]; + vsir_register_init(&dst_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); + dst_param->reg.idx[0].offset = condition->reg.id; + dst_param->write_mask = condition->reg.writemask; + } + else + { + hlsl_fixme(ctx, &instr->loc, "Jump type %s.", hlsl_jump_type_to_string(jump->type)); + } +} + +static void sm1_generate_vsir_block(struct hlsl_ctx *ctx, struct hlsl_block *block, struct vsir_program *program); + +static void sm1_generate_vsir_instr_if(struct hlsl_ctx *ctx, struct vsir_program *program, struct hlsl_ir_if *iff) +{ + struct hlsl_ir_node *condition = iff->condition.node; + struct vkd3d_shader_src_param *src_param; + struct hlsl_ir_node *instr = &iff->node; + struct vkd3d_shader_instruction *ins; + uint32_t swizzle; + + if (hlsl_version_lt(ctx, 2, 1)) + { + hlsl_fixme(ctx, &instr->loc, "Flatten \"if\" conditionals branches."); + return; + } + VKD3D_ASSERT(condition->data_type->dimx == 1 && condition->data_type->dimy == 1); + + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_IFC, 0, 2))) + return; + ins->flags = VKD3D_SHADER_REL_OP_NE; + + swizzle = hlsl_swizzle_from_writemask(condition->reg.writemask); + swizzle = vsir_swizzle_from_hlsl(swizzle); + + src_param = &ins->src[0]; + vsir_register_init(&src_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); + src_param->reg.idx[0].offset = condition->reg.id; + src_param->swizzle = swizzle; + src_param->modifiers = 0; + + src_param = &ins->src[1]; + vsir_register_init(&src_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); + src_param->reg.idx[0].offset = condition->reg.id; + src_param->swizzle = swizzle; + src_param->modifiers = VKD3DSPSM_NEG; + + sm1_generate_vsir_block(ctx, &iff->then_block, program); + + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_ELSE, 0, 0))) + return; + + sm1_generate_vsir_block(ctx, &iff->else_block, program); + + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_ENDIF, 0, 0))) + return; +} + +static void sm1_generate_vsir_block(struct hlsl_ctx *ctx, struct hlsl_block *block, struct vsir_program *program) +{ + struct hlsl_ir_node *instr, *next; + + LIST_FOR_EACH_ENTRY_SAFE(instr, next, &block->instrs, struct hlsl_ir_node, entry) + { + if (instr->data_type) + { + if (instr->data_type->class != HLSL_CLASS_SCALAR && instr->data_type->class != HLSL_CLASS_VECTOR) + { + hlsl_fixme(ctx, &instr->loc, "Class %#x should have been lowered or removed.", instr->data_type->class); + break; + } + } + + switch (instr->type) + { + case HLSL_IR_CALL: + vkd3d_unreachable(); + + case HLSL_IR_CONSTANT: + sm1_generate_vsir_instr_constant(ctx, program, hlsl_ir_constant(instr)); + break; + + case HLSL_IR_EXPR: + sm1_generate_vsir_instr_expr(ctx, program, hlsl_ir_expr(instr)); + break; + + case HLSL_IR_IF: + sm1_generate_vsir_instr_if(ctx, program, hlsl_ir_if(instr)); + break; + + case HLSL_IR_JUMP: + sm1_generate_vsir_instr_jump(ctx, program, hlsl_ir_jump(instr)); + break; + + case HLSL_IR_LOAD: + sm1_generate_vsir_instr_load(ctx, program, hlsl_ir_load(instr)); + break; + + case HLSL_IR_RESOURCE_LOAD: + sm1_generate_vsir_instr_resource_load(ctx, program, hlsl_ir_resource_load(instr)); + break; + + case HLSL_IR_STORE: + sm1_generate_vsir_instr_store(ctx, program, hlsl_ir_store(instr)); + break; + + case HLSL_IR_SWIZZLE: + sm1_generate_vsir_instr_swizzle(ctx, program, hlsl_ir_swizzle(instr)); + break; + + default: + hlsl_fixme(ctx, &instr->loc, "Instruction type %s.", hlsl_node_type_to_string(instr->type)); + break; + } } } -/* OBJECTIVE: Translate all the information from ctx and entry_func to the - * vsir_program and ctab blob, so they can be used as input to d3dbc_compile() - * without relying on ctx and entry_func. */ static void sm1_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, uint64_t config_flags, struct vsir_program *program, struct vkd3d_shader_code *ctab) { struct vkd3d_shader_version version = {0}; struct vkd3d_bytecode_buffer buffer = {0}; + struct hlsl_block block; version.major = ctx->profile->major_version; version.minor = ctx->profile->minor_version; version.type = ctx->profile->type; - if (!vsir_program_init(program, NULL, &version, 0)) + if (!vsir_program_init(program, NULL, &version, 0, VSIR_CF_STRUCTURED, false)) { ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; return; @@ -6034,7 +7382,37 @@ static void sm1_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl ctab->code = buffer.data; ctab->size = buffer.size; - sm1_generate_vsir_signature(ctx, program); + generate_vsir_signature(ctx, program, entry_func); + + hlsl_block_init(&block); + sm1_generate_vsir_constant_defs(ctx, program, &block); + sm1_generate_vsir_sampler_dcls(ctx, program, &block); + list_move_head(&entry_func->body.instrs, &block.instrs); + + sm1_generate_vsir_block(ctx, &entry_func->body, program); +} + +/* OBJECTIVE: Translate all the information from ctx and entry_func to the + * vsir_program, so it can be used as input to tpf_compile() without relying + * on ctx and entry_func. */ +static void sm4_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, + uint64_t config_flags, struct vsir_program *program) +{ + struct vkd3d_shader_version version = {0}; + + version.major = ctx->profile->major_version; + version.minor = ctx->profile->minor_version; + version.type = ctx->profile->type; + + if (!vsir_program_init(program, NULL, &version, 0, VSIR_CF_STRUCTURED, false)) + { + ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; + return; + } + + generate_vsir_signature(ctx, program, entry_func); + if (version.type == VKD3D_SHADER_TYPE_HULL) + generate_vsir_signature(ctx, program, ctx->patch_constant_func); } static struct hlsl_ir_jump *loop_unrolling_find_jump(struct hlsl_block *block, struct hlsl_ir_node *stop_point, @@ -6337,16 +7715,23 @@ static bool lower_f16tof32(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, stru return true; } -int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, - enum vkd3d_shader_target_type target_type, struct vkd3d_shader_code *out) +static void process_entry_function(struct hlsl_ctx *ctx, + const struct hlsl_block *global_uniform_block, struct hlsl_ir_function_decl *entry_func) { const struct hlsl_profile_info *profile = ctx->profile; + struct hlsl_block static_initializers, global_uniforms; struct hlsl_block *const body = &entry_func->body; struct recursive_call_ctx recursive_call_ctx; struct hlsl_ir_var *var; unsigned int i; - list_move_head(&body->instrs, &ctx->static_initializers.instrs); + if (!hlsl_clone_block(ctx, &static_initializers, &ctx->static_initializers)) + return; + list_move_head(&body->instrs, &static_initializers.instrs); + + if (!hlsl_clone_block(ctx, &global_uniforms, global_uniform_block)) + return; + list_move_head(&body->instrs, &global_uniforms.instrs); memset(&recursive_call_ctx, 0, sizeof(recursive_call_ctx)); hlsl_transform_ir(ctx, find_recursive_calls, body, &recursive_call_ctx); @@ -6355,7 +7740,7 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry /* Avoid going into an infinite loop when processing call instructions. * lower_return() recurses into inferior calls. */ if (ctx->result) - return ctx->result; + return; if (hlsl_version_ge(ctx, 4, 0) && hlsl_version_lt(ctx, 5, 0)) lower_ir(ctx, lower_f16tof32, body); @@ -6367,20 +7752,22 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry lower_ir(ctx, lower_matrix_swizzles, body); lower_ir(ctx, lower_index_loads, body); - LIST_FOR_EACH_ENTRY(var, &ctx->globals->vars, struct hlsl_ir_var, scope_entry) - { - if (var->storage_modifiers & HLSL_STORAGE_UNIFORM) - prepend_uniform_copy(ctx, body, var); - } - for (i = 0; i < entry_func->parameters.count; ++i) { var = entry_func->parameters.vars[i]; - if (hlsl_type_is_resource(var->data_type) || (var->storage_modifiers & HLSL_STORAGE_UNIFORM)) + if (hlsl_type_is_resource(var->data_type)) { prepend_uniform_copy(ctx, body, var); } + else if ((var->storage_modifiers & HLSL_STORAGE_UNIFORM)) + { + if (ctx->profile->type == VKD3D_SHADER_TYPE_HULL && entry_func == ctx->patch_constant_func) + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, + "Patch constant function parameter \"%s\" cannot be uniform.", var->name); + else + prepend_uniform_copy(ctx, body, var); + } else { if (hlsl_get_multiarray_element_type(var->data_type)->class != HLSL_CLASS_STRUCT @@ -6392,9 +7779,9 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry } if (var->storage_modifiers & HLSL_STORAGE_IN) - prepend_input_var_copy(ctx, body, var); + prepend_input_var_copy(ctx, entry_func, var); if (var->storage_modifiers & HLSL_STORAGE_OUT) - append_output_var_copy(ctx, body, var); + append_output_var_copy(ctx, entry_func, var); } } if (entry_func->return_var) @@ -6403,24 +7790,9 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_SEMANTIC, "Entry point \"%s\" is missing a return value semantic.", entry_func->func->name); - append_output_var_copy(ctx, body, entry_func->return_var); - } - - for (i = 0; i < entry_func->attr_count; ++i) - { - const struct hlsl_attribute *attr = entry_func->attrs[i]; - - if (!strcmp(attr->name, "numthreads") && profile->type == VKD3D_SHADER_TYPE_COMPUTE) - parse_numthreads_attribute(ctx, attr); - else - hlsl_warning(ctx, &entry_func->attrs[i]->loc, VKD3D_SHADER_WARNING_HLSL_UNKNOWN_ATTRIBUTE, - "Ignoring unknown attribute \"%s\".", entry_func->attrs[i]->name); + append_output_var_copy(ctx, entry_func, entry_func->return_var); } - if (profile->type == VKD3D_SHADER_TYPE_COMPUTE && !ctx->found_numthreads) - hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_ATTRIBUTE, - "Entry point \"%s\" is missing a [numthreads] attribute.", entry_func->func->name); - if (profile->major_version >= 4) { hlsl_transform_ir(ctx, lower_discard_neg, body, NULL); @@ -6496,29 +7868,69 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry while (hlsl_transform_ir(ctx, dce, body, NULL)); compute_liveness(ctx, entry_func); + mark_vars_usage(ctx); - if (TRACE_ON()) - rb_for_each_entry(&ctx->functions, dump_function, ctx); + calculate_resource_register_counts(ctx); - transform_derefs(ctx, mark_indexable_vars, body); + allocate_register_reservations(ctx, &ctx->extern_vars); + allocate_register_reservations(ctx, &entry_func->extern_vars); + allocate_semantic_registers(ctx, entry_func); +} - calculate_resource_register_counts(ctx); +int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, + enum vkd3d_shader_target_type target_type, struct vkd3d_shader_code *out) +{ + const struct hlsl_profile_info *profile = ctx->profile; + struct hlsl_block global_uniform_block; + struct hlsl_ir_var *var; + + parse_entry_function_attributes(ctx, entry_func); + if (ctx->result) + return ctx->result; - allocate_register_reservations(ctx); + if (profile->type == VKD3D_SHADER_TYPE_HULL) + validate_hull_shader_attributes(ctx, entry_func); + else if (profile->type == VKD3D_SHADER_TYPE_COMPUTE && !ctx->found_numthreads) + hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_ATTRIBUTE, + "Entry point \"%s\" is missing a [numthreads] attribute.", entry_func->func->name); + + hlsl_block_init(&global_uniform_block); + + LIST_FOR_EACH_ENTRY(var, &ctx->globals->vars, struct hlsl_ir_var, scope_entry) + { + if (var->storage_modifiers & HLSL_STORAGE_UNIFORM) + prepend_uniform_copy(ctx, &global_uniform_block, var); + } + + process_entry_function(ctx, &global_uniform_block, entry_func); + if (ctx->result) + return ctx->result; + + if (profile->type == VKD3D_SHADER_TYPE_HULL) + { + process_entry_function(ctx, &global_uniform_block, ctx->patch_constant_func); + if (ctx->result) + return ctx->result; + } + + hlsl_block_cleanup(&global_uniform_block); - allocate_temp_registers(ctx, entry_func); if (profile->major_version < 4) { + mark_indexable_vars(ctx, entry_func); + allocate_temp_registers(ctx, entry_func); allocate_const_registers(ctx, entry_func); } else { allocate_buffers(ctx); - allocate_objects(ctx, HLSL_REGSET_TEXTURES); - allocate_objects(ctx, HLSL_REGSET_UAVS); + allocate_objects(ctx, entry_func, HLSL_REGSET_TEXTURES); + allocate_objects(ctx, entry_func, HLSL_REGSET_UAVS); } - allocate_semantic_registers(ctx); - allocate_objects(ctx, HLSL_REGSET_SAMPLERS); + allocate_objects(ctx, entry_func, HLSL_REGSET_SAMPLERS); + + if (TRACE_ON()) + rb_for_each_entry(&ctx->functions, dump_function, ctx); if (ctx->result) return ctx->result; @@ -6540,14 +7952,29 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry return ctx->result; } - result = d3dbc_compile(&program, config_flags, NULL, &ctab, out, ctx->message_context, ctx, entry_func); + result = d3dbc_compile(&program, config_flags, NULL, &ctab, out, ctx->message_context); vsir_program_cleanup(&program); vkd3d_shader_free_shader_code(&ctab); return result; } case VKD3D_SHADER_TARGET_DXBC_TPF: - return hlsl_sm4_write(ctx, entry_func, out); + { + uint32_t config_flags = vkd3d_shader_init_config_flags(); + struct vsir_program program; + int result; + + sm4_generate_vsir(ctx, entry_func, config_flags, &program); + if (ctx->result) + { + vsir_program_cleanup(&program); + return ctx->result; + } + + result = tpf_compile(&program, config_flags, out, ctx->message_context, ctx, entry_func); + vsir_program_cleanup(&program); + return result; + } default: ERR("Unsupported shader target type %#x.\n", target_type); diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c index db4913b7c62..716adb15f08 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c @@ -1452,11 +1452,15 @@ static bool constant_is_one(struct hlsl_ir_constant *const_arg) case HLSL_TYPE_UINT: case HLSL_TYPE_INT: - case HLSL_TYPE_BOOL: if (const_arg->value.u[k].u != 1) return false; break; + case HLSL_TYPE_BOOL: + if (const_arg->value.u[k].u != ~0) + return false; + break; + default: return false; } @@ -1514,6 +1518,20 @@ bool hlsl_fold_constant_identities(struct hlsl_ctx *ctx, struct hlsl_ir_node *in res_node = mut_arg; break; + case HLSL_OP2_LOGIC_AND: + if (constant_is_zero(const_arg)) + res_node = &const_arg->node; + else if (constant_is_one(const_arg)) + res_node = mut_arg; + break; + + case HLSL_OP2_LOGIC_OR: + if (constant_is_zero(const_arg)) + res_node = mut_arg; + else if (constant_is_one(const_arg)) + res_node = &const_arg->node; + break; + default: break; } diff --git a/libs/vkd3d/libs/vkd3d-shader/ir.c b/libs/vkd3d/libs/vkd3d-shader/ir.c index 747238e2fee..b47f12d2188 100644 --- a/libs/vkd3d/libs/vkd3d-shader/ir.c +++ b/libs/vkd3d/libs/vkd3d-shader/ir.c @@ -19,6 +19,15 @@ #include "vkd3d_shader_private.h" #include "vkd3d_types.h" +struct vsir_transformation_context +{ + enum vkd3d_result result; + struct vsir_program *program; + uint64_t config_flags; + const struct vkd3d_shader_compile_info *compile_info; + struct vkd3d_shader_message_context *message_context; +}; + static int convert_parameter_info(const struct vkd3d_shader_compile_info *compile_info, unsigned int *ret_count, const struct vkd3d_shader_parameter1 **ret_parameters) { @@ -65,7 +74,8 @@ static int convert_parameter_info(const struct vkd3d_shader_compile_info *compil } bool vsir_program_init(struct vsir_program *program, const struct vkd3d_shader_compile_info *compile_info, - const struct vkd3d_shader_version *version, unsigned int reserve) + const struct vkd3d_shader_version *version, unsigned int reserve, enum vsir_control_flow_type cf_type, + bool normalised_io) { memset(program, 0, sizeof(*program)); @@ -87,6 +97,9 @@ bool vsir_program_init(struct vsir_program *program, const struct vkd3d_shader_c } program->shader_version = *version; + program->cf_type = cf_type; + program->normalised_io = normalised_io; + program->normalised_hull_cp_io = normalised_io; return shader_instruction_array_init(&program->instructions, reserve); } @@ -117,26 +130,156 @@ const struct vkd3d_shader_parameter1 *vsir_program_get_parameter( return NULL; } +void vsir_register_init(struct vkd3d_shader_register *reg, enum vkd3d_shader_register_type reg_type, + enum vkd3d_data_type data_type, unsigned int idx_count) +{ + reg->type = reg_type; + reg->precision = VKD3D_SHADER_REGISTER_PRECISION_DEFAULT; + reg->non_uniform = false; + reg->data_type = data_type; + reg->idx[0].offset = ~0u; + reg->idx[0].rel_addr = NULL; + reg->idx[0].is_in_bounds = false; + reg->idx[1].offset = ~0u; + reg->idx[1].rel_addr = NULL; + reg->idx[1].is_in_bounds = false; + reg->idx[2].offset = ~0u; + reg->idx[2].rel_addr = NULL; + reg->idx[2].is_in_bounds = false; + reg->idx_count = idx_count; + reg->dimension = VSIR_DIMENSION_SCALAR; + reg->alignment = 0; +} + static inline bool shader_register_is_phase_instance_id(const struct vkd3d_shader_register *reg) { return reg->type == VKD3DSPR_FORKINSTID || reg->type == VKD3DSPR_JOININSTID; } -static bool vsir_instruction_is_dcl(const struct vkd3d_shader_instruction *instruction) +void vsir_src_param_init(struct vkd3d_shader_src_param *param, enum vkd3d_shader_register_type reg_type, + enum vkd3d_data_type data_type, unsigned int idx_count) { - enum vkd3d_shader_opcode opcode = instruction->opcode; - return (VKD3DSIH_DCL <= opcode && opcode <= VKD3DSIH_DCL_VERTICES_OUT) - || opcode == VKD3DSIH_HS_DECLS; + vsir_register_init(¶m->reg, reg_type, data_type, idx_count); + param->swizzle = 0; + param->modifiers = VKD3DSPSM_NONE; } -static void vkd3d_shader_instruction_make_nop(struct vkd3d_shader_instruction *ins) +static void src_param_init_const_uint(struct vkd3d_shader_src_param *src, uint32_t value) { - struct vkd3d_shader_location location = ins->location; + vsir_src_param_init(src, VKD3DSPR_IMMCONST, VKD3D_DATA_UINT, 0); + src->reg.u.immconst_u32[0] = value; +} - vsir_instruction_init(ins, &location, VKD3DSIH_NOP); +void vsir_src_param_init_label(struct vkd3d_shader_src_param *param, unsigned int label_id) +{ + vsir_src_param_init(param, VKD3DSPR_LABEL, VKD3D_DATA_UNUSED, 1); + param->reg.dimension = VSIR_DIMENSION_NONE; + param->reg.idx[0].offset = label_id; +} + +static void src_param_init_parameter(struct vkd3d_shader_src_param *src, uint32_t idx, enum vkd3d_data_type type) +{ + vsir_src_param_init(src, VKD3DSPR_PARAMETER, type, 1); + src->reg.idx[0].offset = idx; +} + +static void vsir_src_param_init_resource(struct vkd3d_shader_src_param *src, unsigned int id, unsigned int idx) +{ + vsir_src_param_init(src, VKD3DSPR_RESOURCE, VKD3D_DATA_UNUSED, 2); + src->reg.idx[0].offset = id; + src->reg.idx[1].offset = idx; + src->reg.dimension = VSIR_DIMENSION_VEC4; + src->swizzle = VKD3D_SHADER_NO_SWIZZLE; +} + +static void vsir_src_param_init_sampler(struct vkd3d_shader_src_param *src, unsigned int id, unsigned int idx) +{ + vsir_src_param_init(src, VKD3DSPR_SAMPLER, VKD3D_DATA_UNUSED, 2); + src->reg.idx[0].offset = id; + src->reg.idx[1].offset = idx; + src->reg.dimension = VSIR_DIMENSION_NONE; +} + +static void src_param_init_ssa_bool(struct vkd3d_shader_src_param *src, unsigned int idx) +{ + vsir_src_param_init(src, VKD3DSPR_SSA, VKD3D_DATA_BOOL, 1); + src->reg.idx[0].offset = idx; +} + +static void src_param_init_ssa_float(struct vkd3d_shader_src_param *src, unsigned int idx) +{ + vsir_src_param_init(src, VKD3DSPR_SSA, VKD3D_DATA_FLOAT, 1); + src->reg.idx[0].offset = idx; +} + +static void src_param_init_temp_bool(struct vkd3d_shader_src_param *src, unsigned int idx) +{ + vsir_src_param_init(src, VKD3DSPR_TEMP, VKD3D_DATA_BOOL, 1); + src->reg.idx[0].offset = idx; +} + +static void src_param_init_temp_float(struct vkd3d_shader_src_param *src, unsigned int idx) +{ + vsir_src_param_init(src, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); + src->reg.idx[0].offset = idx; +} + +static void src_param_init_temp_float4(struct vkd3d_shader_src_param *src, unsigned int idx) +{ + vsir_src_param_init(src, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); + src->reg.dimension = VSIR_DIMENSION_VEC4; + src->swizzle = VKD3D_SHADER_NO_SWIZZLE; + src->reg.idx[0].offset = idx; +} + +static void src_param_init_temp_uint(struct vkd3d_shader_src_param *src, unsigned int idx) +{ + vsir_src_param_init(src, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1); + src->reg.idx[0].offset = idx; +} + +void vsir_dst_param_init(struct vkd3d_shader_dst_param *param, enum vkd3d_shader_register_type reg_type, + enum vkd3d_data_type data_type, unsigned int idx_count) +{ + vsir_register_init(¶m->reg, reg_type, data_type, idx_count); + param->write_mask = VKD3DSP_WRITEMASK_0; + param->modifiers = VKD3DSPDM_NONE; + param->shift = 0; +} + +static void dst_param_init_ssa_bool(struct vkd3d_shader_dst_param *dst, unsigned int idx) +{ + vsir_dst_param_init(dst, VKD3DSPR_SSA, VKD3D_DATA_BOOL, 1); + dst->reg.idx[0].offset = idx; +} + +static void dst_param_init_ssa_float(struct vkd3d_shader_dst_param *dst, unsigned int idx) +{ + vsir_dst_param_init(dst, VKD3DSPR_SSA, VKD3D_DATA_FLOAT, 1); + dst->reg.idx[0].offset = idx; +} + +static void dst_param_init_temp_bool(struct vkd3d_shader_dst_param *dst, unsigned int idx) +{ + vsir_dst_param_init(dst, VKD3DSPR_TEMP, VKD3D_DATA_BOOL, 1); + dst->reg.idx[0].offset = idx; +} + +static void dst_param_init_temp_uint(struct vkd3d_shader_dst_param *dst, unsigned int idx) +{ + vsir_dst_param_init(dst, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1); + dst->reg.idx[0].offset = idx; +} + +void vsir_instruction_init(struct vkd3d_shader_instruction *ins, const struct vkd3d_shader_location *location, + enum vkd3d_shader_opcode opcode) +{ + memset(ins, 0, sizeof(*ins)); + ins->location = *location; + ins->opcode = opcode; } -static bool vsir_instruction_init_with_params(struct vsir_program *program, +bool vsir_instruction_init_with_params(struct vsir_program *program, struct vkd3d_shader_instruction *ins, const struct vkd3d_shader_location *location, enum vkd3d_shader_opcode opcode, unsigned int dst_count, unsigned int src_count) { @@ -161,6 +304,37 @@ static bool vsir_instruction_init_with_params(struct vsir_program *program, return true; } +static bool vsir_instruction_init_label(struct vkd3d_shader_instruction *ins, + const struct vkd3d_shader_location *location, unsigned int label_id, struct vsir_program *program) +{ + struct vkd3d_shader_src_param *src_param; + + if (!(src_param = vsir_program_get_src_params(program, 1))) + return false; + + vsir_src_param_init_label(src_param, label_id); + + vsir_instruction_init(ins, location, VKD3DSIH_LABEL); + ins->src = src_param; + ins->src_count = 1; + + return true; +} + +static bool vsir_instruction_is_dcl(const struct vkd3d_shader_instruction *instruction) +{ + enum vkd3d_shader_opcode opcode = instruction->opcode; + return (VKD3DSIH_DCL <= opcode && opcode <= VKD3DSIH_DCL_VERTICES_OUT) + || opcode == VKD3DSIH_HS_DECLS; +} + +static void vkd3d_shader_instruction_make_nop(struct vkd3d_shader_instruction *ins) +{ + struct vkd3d_shader_location location = ins->location; + + vsir_instruction_init(ins, &location, VKD3DSIH_NOP); +} + static bool get_opcode_from_rel_op(enum vkd3d_shader_rel_op rel_op, enum vkd3d_data_type data_type, enum vkd3d_shader_opcode *opcode, bool *requires_swap) { @@ -441,10 +615,58 @@ static enum vkd3d_result vsir_program_lower_sm1_sincos(struct vsir_program *prog return VKD3D_OK; } +static enum vkd3d_result vsir_program_lower_tex(struct vsir_program *program, struct vkd3d_shader_instruction *tex) +{ + unsigned int idx = tex->src[1].reg.idx[0].offset; + struct vkd3d_shader_src_param *srcs; + + VKD3D_ASSERT(tex->src[1].reg.idx_count == 1); + VKD3D_ASSERT(!tex->src[1].reg.idx[0].rel_addr); + + if (!(srcs = shader_src_param_allocator_get(&program->instructions.src_params, 3))) + return VKD3D_ERROR_OUT_OF_MEMORY; + + srcs[0] = tex->src[0]; + vsir_src_param_init_resource(&srcs[1], idx, idx); + vsir_src_param_init_sampler(&srcs[2], idx, idx); + + tex->opcode = VKD3DSIH_SAMPLE; + tex->src = srcs; + tex->src_count = 3; + + return VKD3D_OK; +} + +static enum vkd3d_result vsir_program_lower_texldd(struct vsir_program *program, + struct vkd3d_shader_instruction *texldd) +{ + unsigned int idx = texldd->src[1].reg.idx[0].offset; + struct vkd3d_shader_src_param *srcs; + + VKD3D_ASSERT(texldd->src[1].reg.idx_count == 1); + VKD3D_ASSERT(!texldd->src[1].reg.idx[0].rel_addr); + + if (!(srcs = shader_src_param_allocator_get(&program->instructions.src_params, 5))) + return VKD3D_ERROR_OUT_OF_MEMORY; + + srcs[0] = texldd->src[0]; + vsir_src_param_init_resource(&srcs[1], idx, idx); + vsir_src_param_init_sampler(&srcs[2], idx, idx); + srcs[3] = texldd->src[2]; + srcs[4] = texldd->src[3]; + + texldd->opcode = VKD3DSIH_SAMPLE_GRAD; + texldd->src = srcs; + texldd->src_count = 5; + + return VKD3D_OK; +} + static enum vkd3d_result vsir_program_lower_instructions(struct vsir_program *program, - struct vkd3d_shader_message_context *message_context) + struct vsir_transformation_context *ctx) { struct vkd3d_shader_instruction_array *instructions = &program->instructions; + struct vkd3d_shader_message_context *message_context = ctx->message_context; unsigned int tmp_idx = ~0u, i; enum vkd3d_result ret; @@ -473,6 +695,8 @@ static enum vkd3d_result vsir_program_lower_instructions(struct vsir_program *pr case VKD3DSIH_DCL_CONSTANT_BUFFER: case VKD3DSIH_DCL_SAMPLER: case VKD3DSIH_DCL_TEMPS: + case VKD3DSIH_DCL_THREAD_GROUP: + case VKD3DSIH_DCL_UAV_TYPED: vkd3d_shader_instruction_make_nop(ins); break; @@ -481,6 +705,38 @@ static enum vkd3d_result vsir_program_lower_instructions(struct vsir_program *pr return ret; break; + case VKD3DSIH_TEX: + if ((ret = vsir_program_lower_tex(program, ins)) < 0) + return ret; + break; + + case VKD3DSIH_TEXLDD: + if ((ret = vsir_program_lower_texldd(program, ins)) < 0) + return ret; + break; + + case VKD3DSIH_TEXBEM: + case VKD3DSIH_TEXBEML: + case VKD3DSIH_TEXCOORD: + case VKD3DSIH_TEXDEPTH: + case VKD3DSIH_TEXDP3: + case VKD3DSIH_TEXDP3TEX: + case VKD3DSIH_TEXLDL: + case VKD3DSIH_TEXM3x2PAD: + case VKD3DSIH_TEXM3x2TEX: + case VKD3DSIH_TEXM3x3DIFF: + case VKD3DSIH_TEXM3x3PAD: + case VKD3DSIH_TEXM3x3SPEC: + case VKD3DSIH_TEXM3x3TEX: + case VKD3DSIH_TEXM3x3VSPEC: + case VKD3DSIH_TEXREG2AR: + case VKD3DSIH_TEXREG2GB: + case VKD3DSIH_TEXREG2RGB: + vkd3d_shader_error(ctx->message_context, &ins->location, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, + "Aborting due to unimplemented feature: Combined sampler instruction %#x.", + ins->opcode); + return VKD3D_ERROR_NOT_IMPLEMENTED; + default: break; } @@ -523,7 +779,7 @@ static void shader_instruction_eliminate_phase_instance_id(struct vkd3d_shader_i } for (i = 0; i < ins->dst_count; ++i) - shader_register_eliminate_phase_addressing((struct vkd3d_shader_register *)&ins->dst[i].reg, instance_id); + shader_register_eliminate_phase_addressing(&ins->dst[i].reg, instance_id); } static const struct vkd3d_shader_varying_map *find_varying_map( @@ -541,9 +797,11 @@ static const struct vkd3d_shader_varying_map *find_varying_map( } static enum vkd3d_result vsir_program_remap_output_signature(struct vsir_program *program, - const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context) + struct vsir_transformation_context *ctx) { - const struct vkd3d_shader_location location = {.source_name = compile_info->source_name}; + const struct vkd3d_shader_location location = {.source_name = ctx->compile_info->source_name}; + struct vkd3d_shader_message_context *message_context = ctx->message_context; + const struct vkd3d_shader_compile_info *compile_info = ctx->compile_info; struct shader_signature *signature = &program->output_signature; const struct vkd3d_shader_varying_map_info *varying_map; unsigned int i; @@ -727,144 +985,20 @@ static enum vkd3d_result flattener_flatten_phases(struct hull_flattener *normali return VKD3D_OK; } -void vsir_register_init(struct vkd3d_shader_register *reg, enum vkd3d_shader_register_type reg_type, - enum vkd3d_data_type data_type, unsigned int idx_count) +static enum vkd3d_result vsir_program_flatten_hull_shader_phases(struct vsir_program *program, + struct vsir_transformation_context *ctx) { - reg->type = reg_type; - reg->precision = VKD3D_SHADER_REGISTER_PRECISION_DEFAULT; - reg->non_uniform = false; - reg->data_type = data_type; - reg->idx[0].offset = ~0u; - reg->idx[0].rel_addr = NULL; - reg->idx[0].is_in_bounds = false; - reg->idx[1].offset = ~0u; - reg->idx[1].rel_addr = NULL; - reg->idx[1].is_in_bounds = false; - reg->idx[2].offset = ~0u; - reg->idx[2].rel_addr = NULL; - reg->idx[2].is_in_bounds = false; - reg->idx_count = idx_count; - reg->dimension = VSIR_DIMENSION_SCALAR; - reg->alignment = 0; -} + struct hull_flattener flattener = {program->instructions}; + struct vkd3d_shader_instruction_array *instructions; + struct shader_phase_location_array locations; + enum vkd3d_result result = VKD3D_OK; + unsigned int i; -void vsir_src_param_init(struct vkd3d_shader_src_param *param, enum vkd3d_shader_register_type reg_type, - enum vkd3d_data_type data_type, unsigned int idx_count) -{ - vsir_register_init(¶m->reg, reg_type, data_type, idx_count); - param->swizzle = 0; - param->modifiers = VKD3DSPSM_NONE; -} + instructions = &flattener.instructions; -void vsir_dst_param_init(struct vkd3d_shader_dst_param *param, enum vkd3d_shader_register_type reg_type, - enum vkd3d_data_type data_type, unsigned int idx_count) -{ - vsir_register_init(¶m->reg, reg_type, data_type, idx_count); - param->write_mask = VKD3DSP_WRITEMASK_0; - param->modifiers = VKD3DSPDM_NONE; - param->shift = 0; -} - -void vsir_src_param_init_label(struct vkd3d_shader_src_param *param, unsigned int label_id) -{ - vsir_src_param_init(param, VKD3DSPR_LABEL, VKD3D_DATA_UNUSED, 1); - param->reg.dimension = VSIR_DIMENSION_NONE; - param->reg.idx[0].offset = label_id; -} - -static void src_param_init_ssa_bool(struct vkd3d_shader_src_param *src, unsigned int idx) -{ - vsir_src_param_init(src, VKD3DSPR_SSA, VKD3D_DATA_BOOL, 1); - src->reg.idx[0].offset = idx; -} - -static void src_param_init_temp_bool(struct vkd3d_shader_src_param *src, unsigned int idx) -{ - vsir_src_param_init(src, VKD3DSPR_TEMP, VKD3D_DATA_BOOL, 1); - src->reg.idx[0].offset = idx; -} - -static void dst_param_init_ssa_bool(struct vkd3d_shader_dst_param *dst, unsigned int idx) -{ - vsir_dst_param_init(dst, VKD3DSPR_SSA, VKD3D_DATA_BOOL, 1); - dst->reg.idx[0].offset = idx; -} - -static void dst_param_init_temp_bool(struct vkd3d_shader_dst_param *dst, unsigned int idx) -{ - vsir_dst_param_init(dst, VKD3DSPR_TEMP, VKD3D_DATA_BOOL, 1); - dst->reg.idx[0].offset = idx; -} - -static void dst_param_init_temp_uint(struct vkd3d_shader_dst_param *dst, unsigned int idx) -{ - vsir_dst_param_init(dst, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1); - dst->reg.idx[0].offset = idx; - dst->write_mask = VKD3DSP_WRITEMASK_0; -} - -static void src_param_init_temp_float(struct vkd3d_shader_src_param *src, unsigned int idx) -{ - vsir_src_param_init(src, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); - src->reg.idx[0].offset = idx; -} - -static void src_param_init_temp_uint(struct vkd3d_shader_src_param *src, unsigned int idx) -{ - vsir_src_param_init(src, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1); - src->reg.idx[0].offset = idx; -} - -static void src_param_init_const_uint(struct vkd3d_shader_src_param *src, uint32_t value) -{ - vsir_src_param_init(src, VKD3DSPR_IMMCONST, VKD3D_DATA_UINT, 0); - src->reg.u.immconst_u32[0] = value; -} - -static void src_param_init_parameter(struct vkd3d_shader_src_param *src, uint32_t idx, enum vkd3d_data_type type) -{ - vsir_src_param_init(src, VKD3DSPR_PARAMETER, type, 1); - src->reg.idx[0].offset = idx; -} - -void vsir_instruction_init(struct vkd3d_shader_instruction *ins, const struct vkd3d_shader_location *location, - enum vkd3d_shader_opcode opcode) -{ - memset(ins, 0, sizeof(*ins)); - ins->location = *location; - ins->opcode = opcode; -} - -static bool vsir_instruction_init_label(struct vkd3d_shader_instruction *ins, - const struct vkd3d_shader_location *location, unsigned int label_id, struct vsir_program *program) -{ - struct vkd3d_shader_src_param *src_param; - - if (!(src_param = vsir_program_get_src_params(program, 1))) - return false; - - vsir_src_param_init_label(src_param, label_id); - - vsir_instruction_init(ins, location, VKD3DSIH_LABEL); - ins->src = src_param; - ins->src_count = 1; - - return true; -} - -static enum vkd3d_result instruction_array_flatten_hull_shader_phases(struct vkd3d_shader_instruction_array *src_instructions) -{ - struct hull_flattener flattener = {*src_instructions}; - struct vkd3d_shader_instruction_array *instructions; - struct shader_phase_location_array locations; - enum vkd3d_result result = VKD3D_OK; - unsigned int i; - - instructions = &flattener.instructions; - - flattener.phase = VKD3DSIH_INVALID; - for (i = 0, locations.count = 0; i < instructions->count; ++i) - flattener_eliminate_phase_related_dcls(&flattener, i, &locations); + flattener.phase = VKD3DSIH_INVALID; + for (i = 0, locations.count = 0; i < instructions->count; ++i) + flattener_eliminate_phase_related_dcls(&flattener, i, &locations); if ((result = flattener_flatten_phases(&flattener, &locations)) < 0) return result; @@ -876,7 +1010,7 @@ static enum vkd3d_result instruction_array_flatten_hull_shader_phases(struct vkd vsir_instruction_init(&instructions->elements[instructions->count++], &flattener.last_ret_location, VKD3DSIH_RET); } - *src_instructions = flattener.instructions; + program->instructions = flattener.instructions; return result; } @@ -892,9 +1026,9 @@ static bool control_point_normaliser_is_in_control_point_phase(const struct cont return normaliser->phase == VKD3DSIH_HS_CONTROL_POINT_PHASE; } -struct vkd3d_shader_src_param *instruction_array_create_outpointid_param( - struct vkd3d_shader_instruction_array *instructions) +struct vkd3d_shader_src_param *vsir_program_create_outpointid_param(struct vsir_program *program) { + struct vkd3d_shader_instruction_array *instructions = &program->instructions; struct vkd3d_shader_src_param *rel_addr; if (instructions->outpointid_param) @@ -991,7 +1125,7 @@ static enum vkd3d_result control_point_normaliser_emit_hs_input(struct control_p } static enum vkd3d_result instruction_array_normalise_hull_shader_control_point_io( - struct vkd3d_shader_instruction_array *src_instructions, const struct shader_signature *input_signature) + struct vsir_program *program, struct vsir_transformation_context *ctx) { struct vkd3d_shader_instruction_array *instructions; struct control_point_normaliser normaliser; @@ -1001,12 +1135,20 @@ static enum vkd3d_result instruction_array_normalise_hull_shader_control_point_i enum vkd3d_result ret; unsigned int i, j; - if (!(normaliser.outpointid_param = instruction_array_create_outpointid_param(src_instructions))) + VKD3D_ASSERT(!program->normalised_hull_cp_io); + + if (program->shader_version.type != VKD3D_SHADER_TYPE_HULL) + { + program->normalised_hull_cp_io = true; + return VKD3D_OK; + } + + if (!(normaliser.outpointid_param = vsir_program_create_outpointid_param(program))) { ERR("Failed to allocate src param.\n"); return VKD3D_ERROR_OUT_OF_MEMORY; } - normaliser.instructions = *src_instructions; + normaliser.instructions = program->instructions; instructions = &normaliser.instructions; normaliser.phase = VKD3DSIH_INVALID; @@ -1043,22 +1185,25 @@ static enum vkd3d_result instruction_array_normalise_hull_shader_control_point_i input_control_point_count = ins->declaration.count; break; case VKD3DSIH_HS_CONTROL_POINT_PHASE: - *src_instructions = normaliser.instructions; + program->instructions = normaliser.instructions; + program->normalised_hull_cp_io = true; return VKD3D_OK; case VKD3DSIH_HS_FORK_PHASE: case VKD3DSIH_HS_JOIN_PHASE: /* ins may be relocated if the instruction array expands. */ location = ins->location; - ret = control_point_normaliser_emit_hs_input(&normaliser, input_signature, + ret = control_point_normaliser_emit_hs_input(&normaliser, &program->input_signature, input_control_point_count, i, &location); - *src_instructions = normaliser.instructions; + program->instructions = normaliser.instructions; + program->normalised_hull_cp_io = true; return ret; default: break; } } - *src_instructions = normaliser.instructions; + program->instructions = normaliser.instructions; + program->normalised_hull_cp_io = true; return VKD3D_OK; } @@ -1098,36 +1243,35 @@ static bool io_normaliser_is_in_control_point_phase(const struct io_normaliser * return normaliser->phase == VKD3DSIH_HS_CONTROL_POINT_PHASE; } -static unsigned int shader_signature_find_element_for_reg(const struct shader_signature *signature, - unsigned int reg_idx, unsigned int write_mask) +static bool shader_signature_find_element_for_reg(const struct shader_signature *signature, + unsigned int reg_idx, unsigned int write_mask, unsigned int *element_idx) { - unsigned int i, base_write_mask; + const struct signature_element *e; + unsigned int i; for (i = 0; i < signature->element_count; ++i) { - struct signature_element *e = &signature->elements[i]; - if (e->register_index <= reg_idx && e->register_index + e->register_count > reg_idx + e = &signature->elements[i]; + if (e->register_index <= reg_idx && e->register_count > reg_idx - e->register_index && (e->mask & write_mask) == write_mask) { - return i; + *element_idx = i; + return true; } } - /* Validated in the TPF reader, but failure in signature_element_range_expand_mask() - * can land us here on an unmatched vector mask. */ - FIXME("Failed to find signature element for register index %u, mask %#x; using scalar mask.\n", - reg_idx, write_mask); - base_write_mask = 1u << vsir_write_mask_get_component_idx(write_mask); - if (base_write_mask != write_mask) - return shader_signature_find_element_for_reg(signature, reg_idx, base_write_mask); - - vkd3d_unreachable(); + return false; } struct signature_element *vsir_signature_find_element_for_reg(const struct shader_signature *signature, unsigned int reg_idx, unsigned int write_mask) { - return &signature->elements[shader_signature_find_element_for_reg(signature, reg_idx, write_mask)]; + unsigned int element_idx; + + if (shader_signature_find_element_for_reg(signature, reg_idx, write_mask, &element_idx)) + return &signature->elements[element_idx]; + + return NULL; } static unsigned int range_map_get_register_count(uint8_t range_map[][VKD3D_VEC4_SIZE], @@ -1181,9 +1325,10 @@ static void io_normaliser_add_index_range(struct io_normaliser *normaliser, { const struct vkd3d_shader_index_range *range = &ins->declaration.index_range; const struct vkd3d_shader_register *reg = &range->dst.reg; - unsigned int reg_idx, write_mask, element_idx; const struct shader_signature *signature; uint8_t (*range_map)[VKD3D_VEC4_SIZE]; + struct signature_element *element; + unsigned int reg_idx, write_mask; switch (reg->type) { @@ -1215,9 +1360,8 @@ static void io_normaliser_add_index_range(struct io_normaliser *normaliser, reg_idx = reg->idx[reg->idx_count - 1].offset; write_mask = range->dst.write_mask; - element_idx = shader_signature_find_element_for_reg(signature, reg_idx, write_mask); - range_map_set_register_range(range_map, reg_idx, range->register_count, - signature->elements[element_idx].mask, true); + element = vsir_signature_find_element_for_reg(signature, reg_idx, write_mask); + range_map_set_register_range(range_map, reg_idx, range->register_count, element->mask, true); } static int signature_element_mask_compare(const void *a, const void *b) @@ -1345,6 +1489,9 @@ static bool shader_signature_merge(struct shader_signature *s, uint8_t range_map return false; memcpy(elements, s->elements, element_count * sizeof(*elements)); + for (i = 0; i < element_count; ++i) + elements[i].sort_index = i; + qsort(elements, element_count, sizeof(elements[0]), signature_element_register_compare); for (i = 0, new_count = 0; i < element_count; i = j, elements[new_count++] = *e) @@ -1388,6 +1535,8 @@ static bool shader_signature_merge(struct shader_signature *s, uint8_t range_map else e->interpolation_mode = f->interpolation_mode; } + + vkd3d_free((void *)f->semantic_name); } } element_count = new_count; @@ -1415,6 +1564,12 @@ static bool shader_signature_merge(struct shader_signature *s, uint8_t range_map TRACE("Merging %s, base reg %u, count %u.\n", e->semantic_name, e->register_index, register_count); e->register_count = register_count; e->mask = signature_element_range_expand_mask(e, register_count, range_map); + + for (j = 1; j < register_count; ++j) + { + f = &elements[i + j]; + vkd3d_free((void *)f->semantic_name); + } } } element_count = new_count; @@ -1518,6 +1673,9 @@ static bool shader_dst_param_io_normalise(struct vkd3d_shader_dst_param *dst_par break; case VKD3DSPR_RASTOUT: + /* Leave point size as a system value for the backends to consume. */ + if (reg->idx[0].offset == VSIR_RASTOUT_POINT_SIZE) + return true; reg_idx = SM1_RASTOUT_REGISTER_OFFSET + reg->idx[0].offset; signature = normaliser->output_signature; reg->type = VKD3DSPR_OUTPUT; @@ -1530,10 +1688,10 @@ static bool shader_dst_param_io_normalise(struct vkd3d_shader_dst_param *dst_par id_idx = reg->idx_count - 1; write_mask = dst_param->write_mask; - element_idx = shader_signature_find_element_for_reg(signature, reg_idx, write_mask); + if (!shader_signature_find_element_for_reg(signature, reg_idx, write_mask, &element_idx)) + vkd3d_unreachable(); e = &signature->elements[element_idx]; - dst_param->write_mask >>= vsir_write_mask_get_component_idx(e->mask); if (is_io_dcl) { /* Validated in the TPF reader. */ @@ -1653,7 +1811,8 @@ static void shader_src_param_io_normalise(struct vkd3d_shader_src_param *src_par id_idx = reg->idx_count - 1; write_mask = VKD3DSP_WRITEMASK_0 << vsir_swizzle_get_component(src_param->swizzle, 0); - element_idx = shader_signature_find_element_for_reg(signature, reg_idx, write_mask); + if (!shader_signature_find_element_for_reg(signature, reg_idx, write_mask, &element_idx)) + vkd3d_unreachable(); e = &signature->elements[element_idx]; if ((e->register_count > 1 || vsir_sysval_semantic_is_tess_factor(e->sysval_semantic))) @@ -1737,13 +1896,13 @@ static bool use_flat_interpolation(const struct vsir_program *program, if (parameter->type != VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT) { vkd3d_shader_error(message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, - "Unsupported flat interpolation parameter type %#x.\n", parameter->type); + "Unsupported flat interpolation parameter type %#x.", parameter->type); return false; } if (parameter->data_type != VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32) { vkd3d_shader_error(message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, - "Invalid flat interpolation parameter data type %#x.\n", parameter->data_type); + "Invalid flat interpolation parameter data type %#x.", parameter->data_type); return false; } @@ -1751,12 +1910,14 @@ static bool use_flat_interpolation(const struct vsir_program *program, } static enum vkd3d_result vsir_program_normalise_io_registers(struct vsir_program *program, - struct vkd3d_shader_message_context *message_context) + struct vsir_transformation_context *ctx) { + struct vkd3d_shader_message_context *message_context = ctx->message_context; struct io_normaliser normaliser = {program->instructions}; struct vkd3d_shader_instruction *ins; - bool has_control_point_phase; - unsigned int i, j; + unsigned int i; + + VKD3D_ASSERT(!program->normalised_io); normaliser.phase = VKD3DSIH_INVALID; normaliser.shader_type = program->shader_version.type; @@ -1765,7 +1926,7 @@ static enum vkd3d_result vsir_program_normalise_io_registers(struct vsir_program normaliser.output_signature = &program->output_signature; normaliser.patch_constant_signature = &program->patch_constant_signature; - for (i = 0, has_control_point_phase = false; i < program->instructions.count; ++i) + for (i = 0; i < program->instructions.count; ++i) { ins = &program->instructions.elements[i]; @@ -1779,8 +1940,6 @@ static enum vkd3d_result vsir_program_normalise_io_registers(struct vsir_program vkd3d_shader_instruction_make_nop(ins); break; case VKD3DSIH_HS_CONTROL_POINT_PHASE: - has_control_point_phase = true; - /* fall through */ case VKD3DSIH_HS_FORK_PHASE: case VKD3DSIH_HS_JOIN_PHASE: normaliser.phase = ins->opcode; @@ -1790,22 +1949,6 @@ static enum vkd3d_result vsir_program_normalise_io_registers(struct vsir_program } } - if (normaliser.shader_type == VKD3D_SHADER_TYPE_HULL && !has_control_point_phase) - { - /* Inputs and outputs must match for the default phase, so merge ranges must match too. */ - for (i = 0; i < MAX_REG_OUTPUT; ++i) - { - for (j = 0; j < VKD3D_VEC4_SIZE; ++j) - { - if (!normaliser.input_range_map[i][j] && normaliser.output_range_map[i][j]) - normaliser.input_range_map[i][j] = normaliser.output_range_map[i][j]; - else if (normaliser.input_range_map[i][j] && !normaliser.output_range_map[i][j]) - normaliser.output_range_map[i][j] = normaliser.input_range_map[i][j]; - else VKD3D_ASSERT(normaliser.input_range_map[i][j] == normaliser.output_range_map[i][j]); - } - } - } - if (!shader_signature_merge(&program->input_signature, normaliser.input_range_map, false) || !shader_signature_merge(&program->output_signature, normaliser.output_range_map, false) || !shader_signature_merge(&program->patch_constant_signature, normaliser.pc_range_map, true)) @@ -1832,6 +1975,7 @@ static enum vkd3d_result vsir_program_normalise_io_registers(struct vsir_program program->instructions = normaliser.instructions; program->use_vocp = normaliser.use_vocp; + program->normalised_io = true; return VKD3D_OK; } @@ -1918,7 +2062,8 @@ static void shader_register_normalise_flat_constants(struct vkd3d_shader_src_par param->reg.idx_count = 3; } -static enum vkd3d_result instruction_array_normalise_flat_constants(struct vsir_program *program) +static enum vkd3d_result vsir_program_normalise_flat_constants(struct vsir_program *program, + struct vsir_transformation_context *ctx) { struct flat_constants_normaliser normaliser = {0}; unsigned int i, j; @@ -1957,7 +2102,8 @@ static enum vkd3d_result instruction_array_normalise_flat_constants(struct vsir_ return VKD3D_OK; } -static void remove_dead_code(struct vsir_program *program) +static enum vkd3d_result vsir_program_remove_dead_code(struct vsir_program *program, + struct vsir_transformation_context *ctx) { size_t i, depth = 0; bool dead = false; @@ -2045,103 +2191,6 @@ static void remove_dead_code(struct vsir_program *program) break; } } -} - -static enum vkd3d_result vsir_program_normalise_combined_samplers(struct vsir_program *program, - struct vkd3d_shader_message_context *message_context) -{ - unsigned int i; - - for (i = 0; i < program->instructions.count; ++i) - { - struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; - struct vkd3d_shader_src_param *srcs; - - switch (ins->opcode) - { - case VKD3DSIH_TEX: - if (!(srcs = shader_src_param_allocator_get(&program->instructions.src_params, 3))) - return VKD3D_ERROR_OUT_OF_MEMORY; - memset(srcs, 0, sizeof(*srcs) * 3); - - ins->opcode = VKD3DSIH_SAMPLE; - - srcs[0] = ins->src[0]; - - srcs[1].reg.type = VKD3DSPR_RESOURCE; - srcs[1].reg.idx[0] = ins->src[1].reg.idx[0]; - srcs[1].reg.idx[1] = ins->src[1].reg.idx[0]; - srcs[1].reg.idx_count = 2; - srcs[1].reg.data_type = VKD3D_DATA_RESOURCE; - srcs[1].reg.dimension = VSIR_DIMENSION_VEC4; - srcs[1].swizzle = VKD3D_SHADER_NO_SWIZZLE; - - srcs[2].reg.type = VKD3DSPR_SAMPLER; - srcs[2].reg.idx[0] = ins->src[1].reg.idx[0]; - srcs[2].reg.idx[1] = ins->src[1].reg.idx[0]; - srcs[2].reg.idx_count = 2; - srcs[2].reg.data_type = VKD3D_DATA_SAMPLER; - - ins->src = srcs; - ins->src_count = 3; - break; - - case VKD3DSIH_TEXLDD: - if (!(srcs = shader_src_param_allocator_get(&program->instructions.src_params, 5))) - return VKD3D_ERROR_OUT_OF_MEMORY; - memset(srcs, 0, sizeof(*srcs) * 5); - - ins->opcode = VKD3DSIH_SAMPLE_GRAD; - - srcs[0] = ins->src[0]; - - srcs[1].reg.type = VKD3DSPR_RESOURCE; - srcs[1].reg.idx[0] = ins->src[1].reg.idx[0]; - srcs[1].reg.idx[1] = ins->src[1].reg.idx[0]; - srcs[1].reg.idx_count = 2; - srcs[1].reg.data_type = VKD3D_DATA_RESOURCE; - srcs[1].reg.dimension = VSIR_DIMENSION_VEC4; - srcs[1].swizzle = VKD3D_SHADER_NO_SWIZZLE; - - srcs[2].reg.type = VKD3DSPR_SAMPLER; - srcs[2].reg.idx[0] = ins->src[1].reg.idx[0]; - srcs[2].reg.idx[1] = ins->src[1].reg.idx[0]; - srcs[2].reg.idx_count = 2; - srcs[2].reg.data_type = VKD3D_DATA_SAMPLER; - - srcs[3] = ins->src[2]; - srcs[4] = ins->src[3]; - - ins->src = srcs; - ins->src_count = 5; - break; - - case VKD3DSIH_TEXBEM: - case VKD3DSIH_TEXBEML: - case VKD3DSIH_TEXCOORD: - case VKD3DSIH_TEXDEPTH: - case VKD3DSIH_TEXDP3: - case VKD3DSIH_TEXDP3TEX: - case VKD3DSIH_TEXLDL: - case VKD3DSIH_TEXM3x2PAD: - case VKD3DSIH_TEXM3x2TEX: - case VKD3DSIH_TEXM3x3DIFF: - case VKD3DSIH_TEXM3x3PAD: - case VKD3DSIH_TEXM3x3SPEC: - case VKD3DSIH_TEXM3x3TEX: - case VKD3DSIH_TEXM3x3VSPEC: - case VKD3DSIH_TEXREG2AR: - case VKD3DSIH_TEXREG2GB: - case VKD3DSIH_TEXREG2RGB: - vkd3d_shader_error(message_context, &ins->location, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, - "Aborting due to not yet implemented feature: " - "Combined sampler instruction %#x.", ins->opcode); - return VKD3D_ERROR_NOT_IMPLEMENTED; - - default: - break; - } - } return VKD3D_OK; } @@ -2789,11 +2838,14 @@ static enum vkd3d_result cf_flattener_iterate_instruction_array(struct cf_flatte } static enum vkd3d_result vsir_program_flatten_control_flow_constructs(struct vsir_program *program, - struct vkd3d_shader_message_context *message_context) + struct vsir_transformation_context *ctx) { + struct vkd3d_shader_message_context *message_context = ctx->message_context; struct cf_flattener flattener = {.program = program}; enum vkd3d_result result; + VKD3D_ASSERT(program->cf_type == VSIR_CF_STRUCTURED); + if ((result = cf_flattener_iterate_instruction_array(&flattener, message_context)) >= 0) { vkd3d_free(program->instructions.elements); @@ -2801,6 +2853,7 @@ static enum vkd3d_result vsir_program_flatten_control_flow_constructs(struct vsi program->instructions.capacity = flattener.instruction_capacity; program->instructions.count = flattener.instruction_count; program->block_count = flattener.block_id; + program->cf_type = VSIR_CF_BLOCKS; } else { @@ -2860,13 +2913,16 @@ static bool lower_switch_to_if_ladder_add_block_mapping(struct lower_switch_to_i return true; } -static enum vkd3d_result lower_switch_to_if_ladder(struct vsir_program *program) +static enum vkd3d_result vsir_program_lower_switch_to_selection_ladder(struct vsir_program *program, + struct vsir_transformation_context *ctx) { unsigned int block_count = program->block_count, ssa_count = program->ssa_count, current_label = 0, if_label; size_t ins_capacity = 0, ins_count = 0, i, map_capacity = 0, map_count = 0; struct vkd3d_shader_instruction *instructions = NULL; struct lower_switch_to_if_ladder_block_mapping *block_map = NULL; + VKD3D_ASSERT(program->cf_type == VSIR_CF_BLOCKS); + if (!reserve_instructions(&instructions, &ins_capacity, program->instructions.count)) goto fail; @@ -3050,7 +3106,8 @@ static void ssas_to_temps_block_info_cleanup(struct ssas_to_temps_block_info *bl vkd3d_free(block_info); } -static enum vkd3d_result vsir_program_materialise_phi_ssas_to_temps(struct vsir_program *program) +static enum vkd3d_result vsir_program_materialise_phi_ssas_to_temps(struct vsir_program *program, + struct vsir_transformation_context *ctx) { size_t ins_capacity = 0, ins_count = 0, phi_count, incoming_count, i; struct ssas_to_temps_block_info *info, *block_info = NULL; @@ -3058,6 +3115,8 @@ static enum vkd3d_result vsir_program_materialise_phi_ssas_to_temps(struct vsir_ struct ssas_to_temps_alloc alloc = {0}; unsigned int current_label = 0; + VKD3D_ASSERT(program->cf_type == VSIR_CF_BLOCKS); + if (!(block_info = vkd3d_calloc(program->block_count, sizeof(*block_info)))) { ERR("Failed to allocate block info array.\n"); @@ -5271,12 +5330,15 @@ out: } static enum vkd3d_result vsir_program_structurize(struct vsir_program *program, - struct vkd3d_shader_message_context *message_context) + struct vsir_transformation_context *ctx) { + struct vkd3d_shader_message_context *message_context = ctx->message_context; struct vsir_cfg_emit_target target = {0}; enum vkd3d_result ret; size_t i; + VKD3D_ASSERT(program->cf_type == VSIR_CF_BLOCKS); + target.jump_target_temp_idx = program->temp_count; target.temp_count = program->temp_count + 1; @@ -5324,6 +5386,7 @@ static enum vkd3d_result vsir_program_structurize(struct vsir_program *program, program->instructions.capacity = target.ins_capacity; program->instructions.count = target.ins_count; program->temp_count = target.temp_count; + program->cf_type = VSIR_CF_STRUCTURED; return VKD3D_OK; @@ -5451,11 +5514,14 @@ static enum vkd3d_result vsir_program_materialize_undominated_ssas_to_temps_in_f } static enum vkd3d_result vsir_program_materialize_undominated_ssas_to_temps(struct vsir_program *program, - struct vkd3d_shader_message_context *message_context) + struct vsir_transformation_context *ctx) { + struct vkd3d_shader_message_context *message_context = ctx->message_context; enum vkd3d_result ret; size_t i; + VKD3D_ASSERT(program->cf_type == VSIR_CF_BLOCKS); + for (i = 0; i < program->instructions.count;) { struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; @@ -5508,9 +5574,11 @@ static bool find_colour_signature_idx(const struct shader_signature *signature, static enum vkd3d_result insert_alpha_test_before_ret(struct vsir_program *program, const struct vkd3d_shader_instruction *ret, enum vkd3d_shader_comparison_func compare_func, - const struct vkd3d_shader_parameter1 *ref, uint32_t colour_signature_idx, uint32_t colour_temp, size_t *ret_pos) + const struct vkd3d_shader_parameter1 *ref, uint32_t colour_signature_idx, + uint32_t colour_temp, size_t *ret_pos, struct vkd3d_shader_message_context *message_context) { struct vkd3d_shader_instruction_array *instructions = &program->instructions; + static const struct vkd3d_shader_location no_loc; size_t pos = ret - instructions->elements; struct vkd3d_shader_instruction *ins; @@ -5565,6 +5633,11 @@ static enum vkd3d_result insert_alpha_test_before_ret(struct vsir_program *progr VKD3D_SHADER_PARAMETER_NAME_ALPHA_TEST_REF, VKD3D_DATA_UINT); break; + case VKD3D_SHADER_PARAMETER_DATA_TYPE_FLOAT32_VEC4: + vkd3d_shader_error(message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_INVALID_PARAMETER, + "Alpha test reference data type must be a single component."); + return VKD3D_ERROR_INVALID_ARGUMENT; + default: FIXME("Unhandled parameter data type %#x.\n", ref->data_type); return VKD3D_ERROR_NOT_IMPLEMENTED; @@ -5596,8 +5669,9 @@ static enum vkd3d_result insert_alpha_test_before_ret(struct vsir_program *progr } static enum vkd3d_result vsir_program_insert_alpha_test(struct vsir_program *program, - struct vkd3d_shader_message_context *message_context) + struct vsir_transformation_context *ctx) { + struct vkd3d_shader_message_context *message_context = ctx->message_context; const struct vkd3d_shader_parameter1 *func = NULL, *ref = NULL; static const struct vkd3d_shader_location no_loc; enum vkd3d_shader_comparison_func compare_func; @@ -5620,13 +5694,13 @@ static enum vkd3d_result vsir_program_insert_alpha_test(struct vsir_program *pro if (func->type != VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT) { vkd3d_shader_error(message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, - "Unsupported alpha test function parameter type %#x.\n", func->type); + "Unsupported alpha test function parameter type %#x.", func->type); return VKD3D_ERROR_NOT_IMPLEMENTED; } if (func->data_type != VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32) { vkd3d_shader_error(message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, - "Invalid alpha test function parameter data type %#x.\n", func->data_type); + "Invalid alpha test function parameter data type %#x.", func->data_type); return VKD3D_ERROR_INVALID_ARGUMENT; } compare_func = func->u.immediate_constant.u.u32; @@ -5650,7 +5724,7 @@ static enum vkd3d_result vsir_program_insert_alpha_test(struct vsir_program *pro if (ins->opcode == VKD3DSIH_RET) { if ((ret = insert_alpha_test_before_ret(program, ins, compare_func, - ref, colour_signature_idx, colour_temp, &new_pos)) < 0) + ref, colour_signature_idx, colour_temp, &new_pos, message_context)) < 0) return ret; i = new_pos; continue; @@ -5677,456 +5751,1845 @@ static enum vkd3d_result vsir_program_insert_alpha_test(struct vsir_program *pro return VKD3D_OK; } -struct validation_context +static enum vkd3d_result insert_clip_planes_before_ret(struct vsir_program *program, + const struct vkd3d_shader_instruction *ret, uint32_t mask, uint32_t position_signature_idx, + uint32_t position_temp, uint32_t low_signature_idx, uint32_t high_signature_idx, size_t *ret_pos) { - struct vkd3d_shader_message_context *message_context; - const struct vsir_program *program; - size_t instruction_idx; - struct vkd3d_shader_location null_location; - bool invalid_instruction_idx; - enum vkd3d_result status; - bool dcl_temps_found; - enum vkd3d_shader_opcode phase; - enum cf_type - { - CF_TYPE_UNKNOWN = 0, - CF_TYPE_STRUCTURED, - CF_TYPE_BLOCKS, - } cf_type; - bool inside_block; + struct vkd3d_shader_instruction_array *instructions = &program->instructions; + size_t pos = ret - instructions->elements; + struct vkd3d_shader_instruction *ins; + unsigned int output_idx = 0; - struct validation_context_temp_data - { - enum vsir_dimension dimension; - size_t first_seen; - } *temps; + if (!shader_instruction_array_insert_at(&program->instructions, pos, vkd3d_popcount(mask) + 1)) + return VKD3D_ERROR_OUT_OF_MEMORY; - struct validation_context_ssa_data + ins = &program->instructions.elements[pos]; + + for (unsigned int i = 0; i < 8; ++i) { - enum vsir_dimension dimension; - enum vkd3d_data_type data_type; - size_t first_seen; - uint32_t write_mask; - uint32_t read_mask; - size_t first_assigned; - } *ssas; + if (!(mask & (1u << i))) + continue; - enum vkd3d_shader_opcode *blocks; - size_t depth; - size_t blocks_capacity; -}; + vsir_instruction_init_with_params(program, ins, &ret->location, VKD3DSIH_DP4, 1, 2); + src_param_init_temp_float4(&ins->src[0], position_temp); + src_param_init_parameter(&ins->src[1], VKD3D_SHADER_PARAMETER_NAME_CLIP_PLANE_0 + i, VKD3D_DATA_FLOAT); + ins->src[1].swizzle = VKD3D_SHADER_NO_SWIZZLE; + ins->src[1].reg.dimension = VSIR_DIMENSION_VEC4; -static void VKD3D_PRINTF_FUNC(3, 4) validator_error(struct validation_context *ctx, - enum vkd3d_shader_error error, const char *format, ...) -{ - struct vkd3d_string_buffer buf; - va_list args; + vsir_dst_param_init(&ins->dst[0], VKD3DSPR_OUTPUT, VKD3D_DATA_FLOAT, 1); + if (output_idx < 4) + ins->dst[0].reg.idx[0].offset = low_signature_idx; + else + ins->dst[0].reg.idx[0].offset = high_signature_idx; + ins->dst[0].reg.dimension = VSIR_DIMENSION_VEC4; + ins->dst[0].write_mask = (1u << (output_idx % 4)); + ++output_idx; - vkd3d_string_buffer_init(&buf); + ++ins; + } - va_start(args, format); - vkd3d_string_buffer_vprintf(&buf, format, args); - va_end(args); + vsir_instruction_init_with_params(program, ins, &ret->location, VKD3DSIH_MOV, 1, 1); + vsir_dst_param_init(&ins->dst[0], VKD3DSPR_OUTPUT, VKD3D_DATA_FLOAT, 1); + ins->dst[0].reg.idx[0].offset = position_signature_idx; + ins->dst[0].reg.dimension = VSIR_DIMENSION_VEC4; + ins->dst[0].write_mask = program->output_signature.elements[position_signature_idx].mask; + src_param_init_temp_float(&ins->src[0], position_temp); + ins->src[0].reg.dimension = VSIR_DIMENSION_VEC4; + ins->src[0].swizzle = VKD3D_SHADER_NO_SWIZZLE; - if (ctx->invalid_instruction_idx) - { - vkd3d_shader_error(ctx->message_context, &ctx->null_location, error, "%s", buf.buffer); - ERR("VSIR validation error: %s\n", buf.buffer); - } - else + *ret_pos = pos + vkd3d_popcount(mask) + 1; + return VKD3D_OK; +} + +static bool find_sysval_signature_idx(const struct shader_signature *signature, + enum vkd3d_shader_sysval_semantic sysval, uint32_t *idx) +{ + for (unsigned int i = 0; i < signature->element_count; ++i) { - const struct vkd3d_shader_instruction *ins = &ctx->program->instructions.elements[ctx->instruction_idx]; - vkd3d_shader_error(ctx->message_context, &ins->location, error, - "instruction %zu: %s", ctx->instruction_idx + 1, buf.buffer); - ERR("VSIR validation error: instruction %zu: %s\n", ctx->instruction_idx + 1, buf.buffer); + if (signature->elements[i].sysval_semantic == sysval) + { + *idx = i; + return true; + } } - vkd3d_string_buffer_cleanup(&buf); - - if (!ctx->status) - ctx->status = VKD3D_ERROR_INVALID_SHADER; + return false; } -static void vsir_validate_src_param(struct validation_context *ctx, - const struct vkd3d_shader_src_param *src); - -static void vsir_validate_register(struct validation_context *ctx, - const struct vkd3d_shader_register *reg) +static enum vkd3d_result vsir_program_insert_clip_planes(struct vsir_program *program, + struct vsir_transformation_context *ctx) { - unsigned int i; - - if (reg->type >= VKD3DSPR_COUNT) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, "Invalid register type %#x.", - reg->type); + struct shader_signature *signature = &program->output_signature; + unsigned int low_signature_idx = ~0u, high_signature_idx = ~0u; + const struct vkd3d_shader_parameter1 *mask_parameter = NULL; + struct signature_element *new_elements, *clip_element; + uint32_t position_signature_idx, position_temp, mask; + static const struct vkd3d_shader_location no_loc; + struct vkd3d_shader_instruction *ins; + unsigned int plane_count; + size_t new_pos; + int ret; - if (reg->precision >= VKD3D_SHADER_REGISTER_PRECISION_COUNT) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_PRECISION, "Invalid register precision %#x.", - reg->precision); + if (program->shader_version.type != VKD3D_SHADER_TYPE_VERTEX) + return VKD3D_OK; - if (reg->data_type >= VKD3D_DATA_COUNT) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, "Invalid register data type %#x.", - reg->data_type); + for (unsigned int i = 0; i < program->parameter_count; ++i) + { + const struct vkd3d_shader_parameter1 *parameter = &program->parameters[i]; - if (reg->dimension >= VSIR_DIMENSION_COUNT) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION, "Invalid register dimension %#x.", - reg->dimension); + if (parameter->name == VKD3D_SHADER_PARAMETER_NAME_CLIP_PLANE_MASK) + mask_parameter = parameter; + } - if (reg->idx_count > ARRAY_SIZE(reg->idx)) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, "Invalid register index count %u.", - reg->idx_count); + if (!mask_parameter) + return VKD3D_OK; - for (i = 0; i < min(reg->idx_count, ARRAY_SIZE(reg->idx)); ++i) + if (mask_parameter->type != VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT) { - const struct vkd3d_shader_src_param *param = reg->idx[i].rel_addr; - if (reg->idx[i].rel_addr) - vsir_validate_src_param(ctx, param); + vkd3d_shader_error(ctx->message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, + "Unsupported clip plane mask parameter type %#x.", mask_parameter->type); + return VKD3D_ERROR_NOT_IMPLEMENTED; + } + if (mask_parameter->data_type != VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32) + { + vkd3d_shader_error(ctx->message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, + "Invalid clip plane mask parameter data type %#x.", mask_parameter->data_type); + return VKD3D_ERROR_INVALID_ARGUMENT; } + mask = mask_parameter->u.immediate_constant.u.u32; - switch (reg->type) + if (!mask) + return VKD3D_OK; + + for (unsigned int i = 0; i < signature->element_count; ++i) { - case VKD3DSPR_TEMP: + if (signature->elements[i].sysval_semantic == VKD3D_SHADER_SV_CLIP_DISTANCE) { - struct validation_context_temp_data *data; + vkd3d_shader_error(ctx->message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_INVALID_PARAMETER, + "Clip planes cannot be used if the shader writes clip distance."); + return VKD3D_ERROR_INVALID_ARGUMENT; + } + } - if (reg->idx_count != 1) - { - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, "Invalid index count %u for a TEMP register.", - reg->idx_count); - break; - } + if (!find_sysval_signature_idx(signature, VKD3D_SHADER_SV_POSITION, &position_signature_idx)) + { + vkd3d_shader_error(ctx->message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_MISSING_SEMANTIC, + "Shader does not write position."); + return VKD3D_ERROR_INVALID_SHADER; + } - if (reg->idx[0].rel_addr) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, "Non-NULL relative address for a TEMP register."); + /* Append the clip plane signature indices. */ - if (reg->idx[0].offset >= ctx->program->temp_count) - { - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, "TEMP register index %u exceeds the maximum count %u.", - reg->idx[0].offset, ctx->program->temp_count); - break; - } + plane_count = vkd3d_popcount(mask); - data = &ctx->temps[reg->idx[0].offset]; + if (!(new_elements = vkd3d_realloc(signature->elements, + (signature->element_count + 2) * sizeof(*signature->elements)))) + return VKD3D_ERROR_OUT_OF_MEMORY; + signature->elements = new_elements; + + low_signature_idx = signature->element_count; + clip_element = &signature->elements[signature->element_count++]; + memset(clip_element, 0, sizeof(*clip_element)); + clip_element->sysval_semantic = VKD3D_SHADER_SV_CLIP_DISTANCE; + clip_element->component_type = VKD3D_SHADER_COMPONENT_FLOAT; + clip_element->register_count = 1; + clip_element->mask = vkd3d_write_mask_from_component_count(min(plane_count, 4)); + clip_element->used_mask = clip_element->mask; + clip_element->min_precision = VKD3D_SHADER_MINIMUM_PRECISION_NONE; + + if (plane_count > 4) + { + high_signature_idx = signature->element_count; + clip_element = &signature->elements[signature->element_count++]; + memset(clip_element, 0, sizeof(*clip_element)); + clip_element->sysval_semantic = VKD3D_SHADER_SV_CLIP_DISTANCE; + clip_element->semantic_index = 1; + clip_element->component_type = VKD3D_SHADER_COMPONENT_FLOAT; + clip_element->register_count = 1; + clip_element->mask = vkd3d_write_mask_from_component_count(plane_count - 4); + clip_element->used_mask = clip_element->mask; + clip_element->min_precision = VKD3D_SHADER_MINIMUM_PRECISION_NONE; + } + + /* We're going to be reading from the output position, so we need to go + * through the whole shader and convert it to a temp. */ - if (reg->dimension == VSIR_DIMENSION_NONE) - { - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION, "Invalid dimension NONE for a TEMP register."); - break; - } + position_temp = program->temp_count++; - /* TEMP registers can be scalar or vec4, provided that - * each individual register always appears with the same - * dimension. */ - if (data->dimension == VSIR_DIMENSION_NONE) - { - data->dimension = reg->dimension; - data->first_seen = ctx->instruction_idx; - } - else if (data->dimension != reg->dimension) - { - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION, "Invalid dimension %#x for a TEMP register: " - "it has already been seen with dimension %#x at instruction %zu.", - reg->dimension, data->dimension, data->first_seen); - } - break; + for (size_t i = 0; i < program->instructions.count; ++i) + { + ins = &program->instructions.elements[i]; + + if (vsir_instruction_is_dcl(ins)) + continue; + + if (ins->opcode == VKD3DSIH_RET) + { + if ((ret = insert_clip_planes_before_ret(program, ins, mask, position_signature_idx, + position_temp, low_signature_idx, high_signature_idx, &new_pos)) < 0) + return ret; + i = new_pos; + continue; } - case VKD3DSPR_SSA: + for (size_t j = 0; j < ins->dst_count; ++j) { - struct validation_context_ssa_data *data; + struct vkd3d_shader_dst_param *dst = &ins->dst[j]; - if (reg->idx_count != 1) + /* Note we run after I/O normalization. */ + if (dst->reg.type == VKD3DSPR_OUTPUT && dst->reg.idx[0].offset == position_signature_idx) { - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, "Invalid index count %u for a SSA register.", - reg->idx_count); - break; + dst->reg.type = VKD3DSPR_TEMP; + dst->reg.idx[0].offset = position_temp; } + } + } - if (reg->idx[0].rel_addr) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, "Non-NULL relative address for a SSA register."); + return VKD3D_OK; +} - if (reg->idx[0].offset >= ctx->program->ssa_count) - { - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, - "SSA register index %u exceeds the maximum count %u.", - reg->idx[0].offset, ctx->program->ssa_count); - break; - } +static enum vkd3d_result insert_point_size_before_ret(struct vsir_program *program, + const struct vkd3d_shader_instruction *ret, size_t *ret_pos) +{ + struct vkd3d_shader_instruction_array *instructions = &program->instructions; + size_t pos = ret - instructions->elements; + struct vkd3d_shader_instruction *ins; - data = &ctx->ssas[reg->idx[0].offset]; + if (!shader_instruction_array_insert_at(&program->instructions, pos, 1)) + return VKD3D_ERROR_OUT_OF_MEMORY; - if (reg->dimension == VSIR_DIMENSION_NONE) - { - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION, "Invalid dimension NONE for a SSA register."); - break; - } + ins = &program->instructions.elements[pos]; - /* SSA registers can be scalar or vec4, provided that each - * individual register always appears with the same - * dimension. */ - if (data->dimension == VSIR_DIMENSION_NONE) - { - data->dimension = reg->dimension; - data->data_type = reg->data_type; - data->first_seen = ctx->instruction_idx; - } - else - { - if (data->dimension != reg->dimension) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION, "Invalid dimension %#x for a SSA register: " - "it has already been seen with dimension %#x at instruction %zu.", - reg->dimension, data->dimension, data->first_seen); - - if (data_type_is_64_bit(data->data_type) != data_type_is_64_bit(reg->data_type)) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, "Invalid data type %#x for a SSA register: " - "it has already been seen with data type %#x at instruction %zu.", - reg->data_type, data->data_type, data->first_seen); - } - break; - } + vsir_instruction_init_with_params(program, ins, &ret->location, VKD3DSIH_MOV, 1, 1); + vsir_dst_param_init(&ins->dst[0], VKD3DSPR_RASTOUT, VKD3D_DATA_FLOAT, 1); + ins->dst[0].reg.idx[0].offset = VSIR_RASTOUT_POINT_SIZE; + src_param_init_parameter(&ins->src[0], VKD3D_SHADER_PARAMETER_NAME_POINT_SIZE, VKD3D_DATA_FLOAT); - case VKD3DSPR_LABEL: - if (reg->precision != VKD3D_SHADER_REGISTER_PRECISION_DEFAULT) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_PRECISION, "Invalid precision %#x for a LABEL register.", - reg->precision); + *ret_pos = pos + 1; + return VKD3D_OK; +} - if (reg->data_type != VKD3D_DATA_UNUSED) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, "Invalid data type %#x for a LABEL register.", - reg->data_type); +static enum vkd3d_result vsir_program_insert_point_size(struct vsir_program *program, + struct vsir_transformation_context *ctx) +{ + const struct vkd3d_shader_parameter1 *size_parameter = NULL; + static const struct vkd3d_shader_location no_loc; - if (reg->dimension != VSIR_DIMENSION_NONE) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION, "Invalid dimension %#x for a LABEL register.", - reg->dimension); + if (program->has_point_size) + return VKD3D_OK; - if (reg->idx_count != 1) - { - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, "Invalid index count %u for a LABEL register.", - reg->idx_count); - break; - } + if (program->shader_version.type != VKD3D_SHADER_TYPE_VERTEX + && program->shader_version.type != VKD3D_SHADER_TYPE_GEOMETRY + && program->shader_version.type != VKD3D_SHADER_TYPE_HULL + && program->shader_version.type != VKD3D_SHADER_TYPE_DOMAIN) + return VKD3D_OK; - if (reg->idx[0].rel_addr) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, "Non-NULL relative address for a LABEL register."); - - /* Index == 0 is invalid, but it is temporarily allowed - * for intermediate stages. Once we support validation - * dialects we can selectively check for that. */ - if (reg->idx[0].offset > ctx->program->block_count) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, - "LABEL register index %u exceeds the maximum count %u.", - reg->idx[0].offset, ctx->program->block_count); - break; + for (unsigned int i = 0; i < program->parameter_count; ++i) + { + const struct vkd3d_shader_parameter1 *parameter = &program->parameters[i]; - case VKD3DSPR_NULL: - if (reg->idx_count != 0) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, "Invalid index count %u for a NULL register.", - reg->idx_count); - break; + if (parameter->name == VKD3D_SHADER_PARAMETER_NAME_POINT_SIZE) + size_parameter = parameter; + } - case VKD3DSPR_IMMCONST: - if (reg->idx_count != 0) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, "Invalid index count %u for a IMMCONST register.", - reg->idx_count); - break; + if (!size_parameter) + return VKD3D_OK; - case VKD3DSPR_IMMCONST64: - if (reg->idx_count != 0) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, "Invalid index count %u for a IMMCONST64 register.", - reg->idx_count); - break; + if (size_parameter->data_type != VKD3D_SHADER_PARAMETER_DATA_TYPE_FLOAT32) + { + vkd3d_shader_error(ctx->message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, + "Invalid point size parameter data type %#x.", size_parameter->data_type); + return VKD3D_ERROR_INVALID_ARGUMENT; + } - default: - break; + program->has_point_size = true; + + /* Append a point size write before each ret. */ + for (size_t i = 0; i < program->instructions.count; ++i) + { + struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; + + if (ins->opcode == VKD3DSIH_RET) + { + size_t new_pos; + int ret; + + if ((ret = insert_point_size_before_ret(program, ins, &new_pos)) < 0) + return ret; + i = new_pos; + } } + + return VKD3D_OK; } -static void vsir_validate_dst_param(struct validation_context *ctx, - const struct vkd3d_shader_dst_param *dst) +static enum vkd3d_result vsir_program_insert_point_size_clamp(struct vsir_program *program, + struct vsir_transformation_context *ctx) { - vsir_validate_register(ctx, &dst->reg); + const struct vkd3d_shader_parameter1 *min_parameter = NULL, *max_parameter = NULL; + static const struct vkd3d_shader_location no_loc; - if (dst->write_mask & ~VKD3DSP_WRITEMASK_ALL) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_WRITE_MASK, "Destination has invalid write mask %#x.", - dst->write_mask); + if (!program->has_point_size) + return VKD3D_OK; - switch (dst->reg.dimension) - { - case VSIR_DIMENSION_SCALAR: - if (dst->write_mask != VKD3DSP_WRITEMASK_0) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_WRITE_MASK, "Scalar destination has invalid write mask %#x.", - dst->write_mask); - break; + if (program->shader_version.type != VKD3D_SHADER_TYPE_VERTEX + && program->shader_version.type != VKD3D_SHADER_TYPE_GEOMETRY + && program->shader_version.type != VKD3D_SHADER_TYPE_HULL + && program->shader_version.type != VKD3D_SHADER_TYPE_DOMAIN) + return VKD3D_OK; - case VSIR_DIMENSION_VEC4: - if (dst->write_mask == 0) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_WRITE_MASK, "Vec4 destination has empty write mask."); - break; + for (unsigned int i = 0; i < program->parameter_count; ++i) + { + const struct vkd3d_shader_parameter1 *parameter = &program->parameters[i]; - default: - if (dst->write_mask != 0) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_WRITE_MASK, "Destination of dimension %u has invalid write mask %#x.", - dst->reg.dimension, dst->write_mask); - break; + if (parameter->name == VKD3D_SHADER_PARAMETER_NAME_POINT_SIZE_MIN) + min_parameter = parameter; + else if (parameter->name == VKD3D_SHADER_PARAMETER_NAME_POINT_SIZE_MAX) + max_parameter = parameter; } - if (dst->modifiers & ~VKD3DSPDM_MASK) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_MODIFIERS, "Destination has invalid modifiers %#x.", - dst->modifiers); + if (!min_parameter && !max_parameter) + return VKD3D_OK; - switch (dst->shift) + if (min_parameter && min_parameter->data_type != VKD3D_SHADER_PARAMETER_DATA_TYPE_FLOAT32) { - case 0: - case 1: - case 2: - case 3: - case 13: - case 14: - case 15: - break; + vkd3d_shader_error(ctx->message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, + "Invalid minimum point size parameter data type %#x.", min_parameter->data_type); + return VKD3D_ERROR_INVALID_ARGUMENT; + } - default: - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SHIFT, "Destination has invalid shift %#x.", - dst->shift); + if (max_parameter && max_parameter->data_type != VKD3D_SHADER_PARAMETER_DATA_TYPE_FLOAT32) + { + vkd3d_shader_error(ctx->message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, + "Invalid maximum point size parameter data type %#x.", max_parameter->data_type); + return VKD3D_ERROR_INVALID_ARGUMENT; } - switch (dst->reg.type) + /* Replace writes to the point size by inserting a clamp before each write. */ + + for (size_t i = 0; i < program->instructions.count; ++i) { - case VKD3DSPR_SSA: - if (dst->reg.idx[0].offset < ctx->program->ssa_count) - { - struct validation_context_ssa_data *data = &ctx->ssas[dst->reg.idx[0].offset]; + struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; + const struct vkd3d_shader_location *loc; + unsigned int ssa_value; + bool clamp = false; - if (data->write_mask == 0) - { - data->write_mask = dst->write_mask; - data->first_assigned = ctx->instruction_idx; - } - else - { - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SSA_USAGE, - "SSA register is already assigned at instruction %zu.", - data->first_assigned); - } + if (vsir_instruction_is_dcl(ins)) + continue; + + for (size_t j = 0; j < ins->dst_count; ++j) + { + struct vkd3d_shader_dst_param *dst = &ins->dst[j]; + + /* Note we run after I/O normalization. */ + if (dst->reg.type == VKD3DSPR_RASTOUT) + { + dst_param_init_ssa_float(dst, program->ssa_count); + ssa_value = program->ssa_count++; + clamp = true; } - break; + } - case VKD3DSPR_IMMCONST: - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, - "Invalid IMMCONST register used as destination parameter."); - break; + if (!clamp) + continue; - case VKD3DSPR_IMMCONST64: - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, - "Invalid IMMCONST64 register used as destination parameter."); - break; + if (!shader_instruction_array_insert_at(&program->instructions, i + 1, !!min_parameter + !!max_parameter)) + return VKD3D_ERROR_OUT_OF_MEMORY; - default: - break; - } -} + loc = &program->instructions.elements[i].location; + ins = &program->instructions.elements[i + 1]; + + if (min_parameter) + { + vsir_instruction_init_with_params(program, ins, loc, VKD3DSIH_MAX, 1, 2); + src_param_init_ssa_float(&ins->src[0], ssa_value); + src_param_init_parameter(&ins->src[1], VKD3D_SHADER_PARAMETER_NAME_POINT_SIZE_MIN, VKD3D_DATA_FLOAT); + if (max_parameter) + { + dst_param_init_ssa_float(&ins->dst[0], program->ssa_count); + ssa_value = program->ssa_count++; + } + else + { + vsir_dst_param_init(&ins->dst[0], VKD3DSPR_RASTOUT, VKD3D_DATA_FLOAT, 1); + ins->dst[0].reg.idx[0].offset = VSIR_RASTOUT_POINT_SIZE; + } + ++ins; + ++i; + } + + if (max_parameter) + { + vsir_instruction_init_with_params(program, ins, loc, VKD3DSIH_MIN, 1, 2); + src_param_init_ssa_float(&ins->src[0], ssa_value); + src_param_init_parameter(&ins->src[1], VKD3D_SHADER_PARAMETER_NAME_POINT_SIZE_MAX, VKD3D_DATA_FLOAT); + vsir_dst_param_init(&ins->dst[0], VKD3DSPR_RASTOUT, VKD3D_DATA_FLOAT, 1); + ins->dst[0].reg.idx[0].offset = VSIR_RASTOUT_POINT_SIZE; + + ++i; + } + } + + return VKD3D_OK; +} + +struct validation_context +{ + struct vkd3d_shader_message_context *message_context; + const struct vsir_program *program; + size_t instruction_idx; + struct vkd3d_shader_location null_location; + bool invalid_instruction_idx; + enum vkd3d_result status; + bool dcl_temps_found; + enum vkd3d_shader_opcode phase; + bool inside_block; + + struct validation_context_temp_data + { + enum vsir_dimension dimension; + size_t first_seen; + } *temps; + + struct validation_context_ssa_data + { + enum vsir_dimension dimension; + enum vkd3d_data_type data_type; + size_t first_seen; + uint32_t write_mask; + uint32_t read_mask; + size_t first_assigned; + } *ssas; + + enum vkd3d_shader_opcode *blocks; + size_t depth; + size_t blocks_capacity; +}; + +static void VKD3D_PRINTF_FUNC(3, 4) validator_error(struct validation_context *ctx, + enum vkd3d_shader_error error, const char *format, ...) +{ + struct vkd3d_string_buffer buf; + va_list args; + + vkd3d_string_buffer_init(&buf); + + va_start(args, format); + vkd3d_string_buffer_vprintf(&buf, format, args); + va_end(args); + + if (ctx->invalid_instruction_idx) + { + vkd3d_shader_error(ctx->message_context, &ctx->null_location, error, "%s", buf.buffer); + WARN("VSIR validation error: %s\n", buf.buffer); + } + else + { + const struct vkd3d_shader_instruction *ins = &ctx->program->instructions.elements[ctx->instruction_idx]; + vkd3d_shader_error(ctx->message_context, &ins->location, error, + "instruction %zu: %s", ctx->instruction_idx + 1, buf.buffer); + WARN("VSIR validation error: instruction %zu: %s\n", ctx->instruction_idx + 1, buf.buffer); + } + + vkd3d_string_buffer_cleanup(&buf); + + if (!ctx->status) + ctx->status = VKD3D_ERROR_INVALID_SHADER; +} + +static void vsir_validate_register_without_indices(struct validation_context *ctx, + const struct vkd3d_shader_register *reg) +{ + if (reg->idx_count != 0) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, + "Invalid index count %u for a register of type %#x.", + reg->idx_count, reg->type); +} + +static void vsir_validate_io_register(struct validation_context *ctx, + const struct vkd3d_shader_register *reg) +{ + const struct shader_signature *signature; + bool has_control_point = false; + + switch (reg->type) + { + case VKD3DSPR_INPUT: + signature = &ctx->program->input_signature; + + switch (ctx->program->shader_version.type) + { + case VKD3D_SHADER_TYPE_GEOMETRY: + case VKD3D_SHADER_TYPE_HULL: + case VKD3D_SHADER_TYPE_DOMAIN: + has_control_point = true; + break; + + default: + break; + } + break; + + case VKD3DSPR_OUTPUT: + switch (ctx->program->shader_version.type) + { + case VKD3D_SHADER_TYPE_HULL: + if (ctx->phase == VKD3DSIH_HS_CONTROL_POINT_PHASE) + { + signature = &ctx->program->output_signature; + has_control_point = ctx->program->normalised_hull_cp_io; + } + else if (ctx->program->normalised_io) + { + signature = &ctx->program->output_signature; + has_control_point = true; + } + else + { + signature = &ctx->program->patch_constant_signature; + } + break; + + default: + signature = &ctx->program->output_signature; + break; + } + break; + + case VKD3DSPR_INCONTROLPOINT: + signature = &ctx->program->input_signature; + has_control_point = true; + break; + + case VKD3DSPR_OUTCONTROLPOINT: + signature = &ctx->program->output_signature; + has_control_point = true; + break; + + case VKD3DSPR_PATCHCONST: + signature = &ctx->program->patch_constant_signature; + break; + + default: + vkd3d_unreachable(); + } + + if (!ctx->program->normalised_io) + { + /* Indices are [register] or [control point, register]. Both are + * allowed to have a relative address. */ + unsigned int expected_idx_count = 1 + !!has_control_point; + + if (reg->idx_count != expected_idx_count) + { + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, + "Invalid index count %u for a register of type %#x.", + reg->idx_count, reg->type); + return; + } + } + else + { + struct signature_element *element; + unsigned int expected_idx_count; + unsigned int signature_idx; + bool is_array = false; + + /* If the signature element is not an array, indices are + * [signature] or [control point, signature]. If the signature + * element is an array, indices are [array, signature] or + * [control point, array, signature]. In any case `signature' is + * not allowed to have a relative address, while the others are. + */ + if (reg->idx_count < 1) + { + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, + "Invalid index count %u for a register of type %#x.", + reg->idx_count, reg->type); + return; + } + + if (reg->idx[reg->idx_count - 1].rel_addr) + { + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, + "Non-NULL relative address for the signature index of a register of type %#x.", + reg->type); + return; + } + + signature_idx = reg->idx[reg->idx_count - 1].offset; + + if (signature_idx >= signature->element_count) + { + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, + "Signature index %u exceeds the signature size %u in a register of type %#x.", + signature_idx, signature->element_count, reg->type); + return; + } + + element = &signature->elements[signature_idx]; + if (element->register_count > 1 || vsir_sysval_semantic_is_tess_factor(element->sysval_semantic)) + is_array = true; + + expected_idx_count = 1 + !!has_control_point + !!is_array; + + if (reg->idx_count != expected_idx_count) + { + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, + "Invalid index count %u for a register of type %#x.", + reg->idx_count, reg->type); + return; + } + } +} + +static void vsir_validate_temp_register(struct validation_context *ctx, + const struct vkd3d_shader_register *reg) +{ + struct validation_context_temp_data *data; + + if (reg->idx_count != 1) + { + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, + "Invalid index count %u for a TEMP register.", + reg->idx_count); + return; + } + + if (reg->idx[0].rel_addr) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, + "Non-NULL relative address for a TEMP register."); + + if (reg->idx[0].offset >= ctx->program->temp_count) + { + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, + "TEMP register index %u exceeds the maximum count %u.", + reg->idx[0].offset, ctx->program->temp_count); + return; + } + + data = &ctx->temps[reg->idx[0].offset]; + + if (reg->dimension == VSIR_DIMENSION_NONE) + { + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION, + "Invalid dimension NONE for a TEMP register."); + return; + } + + /* TEMP registers can be scalar or vec4, provided that + * each individual register always appears with the same + * dimension. */ + if (data->dimension == VSIR_DIMENSION_NONE) + { + data->dimension = reg->dimension; + data->first_seen = ctx->instruction_idx; + } + else if (data->dimension != reg->dimension) + { + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION, + "Invalid dimension %#x for a TEMP register: " + "it has already been seen with dimension %#x at instruction %zu.", + reg->dimension, data->dimension, data->first_seen); + } +} + +static void vsir_validate_rastout_register(struct validation_context *ctx, + const struct vkd3d_shader_register *reg) +{ + if (reg->idx_count != 1) + { + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, + "Invalid index count %u for a RASTOUT register.", + reg->idx_count); + return; + } + + if (reg->idx[0].rel_addr) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, + "Non-NULL relative address for a RASTOUT register."); + + if (reg->idx[0].offset >= 3) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, + "Invalid offset for a RASTOUT register."); +} + +static void vsir_validate_misctype_register(struct validation_context *ctx, + const struct vkd3d_shader_register *reg) +{ + if (reg->idx_count != 1) + { + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, + "Invalid index count %u for a MISCTYPE register.", + reg->idx_count); + return; + } + + if (reg->idx[0].rel_addr) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, + "Non-NULL relative address for a MISCTYPE register."); + + if (reg->idx[0].offset >= 2) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, + "Invalid offset for a MISCTYPE register."); +} + +static void vsir_validate_label_register(struct validation_context *ctx, + const struct vkd3d_shader_register *reg) +{ + if (reg->precision != VKD3D_SHADER_REGISTER_PRECISION_DEFAULT) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_PRECISION, + "Invalid precision %#x for a LABEL register.", reg->precision); + + if (reg->data_type != VKD3D_DATA_UNUSED) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, + "Invalid data type %#x for a LABEL register.", reg->data_type); + + if (reg->dimension != VSIR_DIMENSION_NONE) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION, + "Invalid dimension %#x for a LABEL register.", reg->dimension); + + if (reg->idx_count != 1) + { + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, + "Invalid index count %u for a LABEL register.", reg->idx_count); + return; + } + + if (reg->idx[0].rel_addr) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, + "Non-NULL relative address for a LABEL register."); + + /* Index == 0 is invalid, but it is temporarily allowed + * for intermediate stages. Once we support validation + * dialects we can selectively check for that. */ + if (reg->idx[0].offset > ctx->program->block_count) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, + "LABEL register index %u exceeds the maximum count %u.", + reg->idx[0].offset, ctx->program->block_count); +} + +static void vsir_validate_sampler_register(struct validation_context *ctx, + const struct vkd3d_shader_register *reg) +{ + if (reg->precision != VKD3D_SHADER_REGISTER_PRECISION_DEFAULT) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_PRECISION, + "Invalid precision %#x for a SAMPLER register.", reg->precision); + + if (reg->data_type != VKD3D_DATA_UNUSED) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, + "Invalid data type %#x for a SAMPLER register.", reg->data_type); + + /* VEC4 is allowed in gather operations. */ + if (reg->dimension == VSIR_DIMENSION_SCALAR) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION, + "Invalid dimension SCALAR for a SAMPLER register."); + + if (reg->idx_count != 2) + { + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, + "Invalid index count %u for a SAMPLER register.", reg->idx_count); + return; + } + + if (reg->idx[0].rel_addr) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, + "Non-NULL relative address for the descriptor index of a SAMPLER register."); +} + +static void vsir_validate_resource_register(struct validation_context *ctx, + const struct vkd3d_shader_register *reg) +{ + if (reg->precision != VKD3D_SHADER_REGISTER_PRECISION_DEFAULT) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_PRECISION, + "Invalid precision %#x for a RESOURCE register.", reg->precision); + + if (reg->data_type != VKD3D_DATA_UNUSED) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, + "Invalid data type %#x for a RESOURCE register.", reg->data_type); + + if (reg->dimension != VSIR_DIMENSION_VEC4) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION, + "Invalid dimension %#x for a RESOURCE register.", reg->dimension); + + if (reg->idx_count != 2) + { + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, + "Invalid index count %u for a RESOURCE register.", reg->idx_count); + return; + } + + if (reg->idx[0].rel_addr) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, + "Non-NULL relative address for the descriptor index of a RESOURCE register."); +} + +static void vsir_validate_uav_register(struct validation_context *ctx, + const struct vkd3d_shader_register *reg) +{ + if (reg->precision != VKD3D_SHADER_REGISTER_PRECISION_DEFAULT) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_PRECISION, + "Invalid precision %#x for a UAV register.", + reg->precision); + + if (reg->data_type != VKD3D_DATA_UNUSED) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, + "Invalid data type %#x for a UAV register.", + reg->data_type); + + /* NONE is allowed in counter operations. */ + if (reg->dimension == VSIR_DIMENSION_SCALAR) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION, + "Invalid dimension %#x for a UAV register.", + reg->dimension); + + if (reg->idx_count != 2) + { + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, + "Invalid index count %u for a UAV register.", + reg->idx_count); + return; + } + + if (reg->idx[0].rel_addr) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, + "Non-NULL relative address for the descriptor index of a UAV register."); +} + +static void vsir_validate_ssa_register(struct validation_context *ctx, + const struct vkd3d_shader_register *reg) +{ + struct validation_context_ssa_data *data; + + if (reg->idx_count != 1) + { + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, + "Invalid index count %u for a SSA register.", + reg->idx_count); + return; + } + + if (reg->idx[0].rel_addr) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, + "Non-NULL relative address for a SSA register."); + + if (reg->idx[0].offset >= ctx->program->ssa_count) + { + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, + "SSA register index %u exceeds the maximum count %u.", + reg->idx[0].offset, ctx->program->ssa_count); + return; + } + + data = &ctx->ssas[reg->idx[0].offset]; + + if (reg->dimension == VSIR_DIMENSION_NONE) + { + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION, + "Invalid dimension NONE for a SSA register."); + return; + } + + /* SSA registers can be scalar or vec4, provided that each + * individual register always appears with the same + * dimension. */ + if (data->dimension == VSIR_DIMENSION_NONE) + { + data->dimension = reg->dimension; + data->data_type = reg->data_type; + data->first_seen = ctx->instruction_idx; + } + else + { + if (data->dimension != reg->dimension) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION, + "Invalid dimension %#x for a SSA register: " + "it has already been seen with dimension %#x at instruction %zu.", + reg->dimension, data->dimension, data->first_seen); + + if (data_type_is_64_bit(data->data_type) != data_type_is_64_bit(reg->data_type)) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, + "Invalid data type %#x for a SSA register: " + "it has already been seen with data type %#x at instruction %zu.", + reg->data_type, data->data_type, data->first_seen); + } +} + +static void vsir_validate_src_param(struct validation_context *ctx, + const struct vkd3d_shader_src_param *src); + +static void vsir_validate_register(struct validation_context *ctx, + const struct vkd3d_shader_register *reg) +{ + unsigned int i; + + if (reg->type >= VKD3DSPR_COUNT) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, "Invalid register type %#x.", + reg->type); + + if (reg->precision >= VKD3D_SHADER_REGISTER_PRECISION_COUNT) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_PRECISION, "Invalid register precision %#x.", + reg->precision); + + if (reg->data_type >= VKD3D_DATA_COUNT) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, "Invalid register data type %#x.", + reg->data_type); + + if (reg->dimension >= VSIR_DIMENSION_COUNT) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION, "Invalid register dimension %#x.", + reg->dimension); + + if (reg->idx_count > ARRAY_SIZE(reg->idx)) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, "Invalid register index count %u.", + reg->idx_count); + + for (i = 0; i < min(reg->idx_count, ARRAY_SIZE(reg->idx)); ++i) + { + const struct vkd3d_shader_src_param *param = reg->idx[i].rel_addr; + if (reg->idx[i].rel_addr) + vsir_validate_src_param(ctx, param); + } + + switch (reg->type) + { + case VKD3DSPR_TEMP: + vsir_validate_temp_register(ctx, reg); + break; + + case VKD3DSPR_INPUT: + vsir_validate_io_register(ctx, reg); + break; + + case VKD3DSPR_RASTOUT: + vsir_validate_rastout_register(ctx, reg); + break; + + case VKD3DSPR_OUTPUT: + vsir_validate_io_register(ctx, reg); + break; + + case VKD3DSPR_DEPTHOUT: + vsir_validate_register_without_indices(ctx, reg); + break; + + case VKD3DSPR_MISCTYPE: + vsir_validate_misctype_register(ctx, reg); + break; + + case VKD3DSPR_LABEL: + vsir_validate_label_register(ctx, reg); + break; + + case VKD3DSPR_IMMCONST: + vsir_validate_register_without_indices(ctx, reg); + break; + + case VKD3DSPR_IMMCONST64: + vsir_validate_register_without_indices(ctx, reg); + break; + + case VKD3DSPR_NULL: + vsir_validate_register_without_indices(ctx, reg); + break; + + case VKD3DSPR_SAMPLER: + vsir_validate_sampler_register(ctx, reg); + break; + + case VKD3DSPR_RESOURCE: + vsir_validate_resource_register(ctx, reg); + break; + + case VKD3DSPR_UAV: + vsir_validate_uav_register(ctx, reg); + break; + + case VKD3DSPR_INCONTROLPOINT: + vsir_validate_io_register(ctx, reg); + break; + + case VKD3DSPR_OUTCONTROLPOINT: + vsir_validate_io_register(ctx, reg); + break; + + case VKD3DSPR_PATCHCONST: + vsir_validate_io_register(ctx, reg); + break; + + case VKD3DSPR_DEPTHOUTGE: + vsir_validate_register_without_indices(ctx, reg); + break; + + case VKD3DSPR_DEPTHOUTLE: + vsir_validate_register_without_indices(ctx, reg); + break; + + case VKD3DSPR_SSA: + vsir_validate_ssa_register(ctx, reg); + break; + + default: + break; + } +} + +static void vsir_validate_dst_param(struct validation_context *ctx, + const struct vkd3d_shader_dst_param *dst) +{ + vsir_validate_register(ctx, &dst->reg); + + if (dst->write_mask & ~VKD3DSP_WRITEMASK_ALL) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_WRITE_MASK, "Destination has invalid write mask %#x.", + dst->write_mask); + + switch (dst->reg.dimension) + { + case VSIR_DIMENSION_SCALAR: + if (dst->write_mask != VKD3DSP_WRITEMASK_0) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_WRITE_MASK, "Scalar destination has invalid write mask %#x.", + dst->write_mask); + break; + + case VSIR_DIMENSION_VEC4: + if (dst->write_mask == 0) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_WRITE_MASK, "Vec4 destination has empty write mask."); + break; + + default: + if (dst->write_mask != 0) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_WRITE_MASK, "Destination of dimension %u has invalid write mask %#x.", + dst->reg.dimension, dst->write_mask); + break; + } + + if (dst->modifiers & ~VKD3DSPDM_MASK) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_MODIFIERS, "Destination has invalid modifiers %#x.", + dst->modifiers); + + switch (dst->shift) + { + case 0: + case 1: + case 2: + case 3: + case 13: + case 14: + case 15: + break; + + default: + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SHIFT, "Destination has invalid shift %#x.", + dst->shift); + } + + switch (dst->reg.type) + { + case VKD3DSPR_SSA: + if (dst->reg.idx[0].offset < ctx->program->ssa_count) + { + struct validation_context_ssa_data *data = &ctx->ssas[dst->reg.idx[0].offset]; + + if (data->write_mask == 0) + { + data->write_mask = dst->write_mask; + data->first_assigned = ctx->instruction_idx; + } + else + { + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SSA_USAGE, + "SSA register is already assigned at instruction %zu.", + data->first_assigned); + } + } + break; + + case VKD3DSPR_IMMCONST: + case VKD3DSPR_IMMCONST64: + case VKD3DSPR_SAMPLER: + case VKD3DSPR_RESOURCE: + case VKD3DSPR_INPUT: + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, + "Invalid %#x register used as destination parameter.", dst->reg.type); + break; + + case VKD3DSPR_PATCHCONST: + if (ctx->program->shader_version.type != VKD3D_SHADER_TYPE_HULL) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, + "PATCHCONST register used as destination parameters are only allowed in Hull Shaders."); + break; + + default: + break; + } +} static void vsir_validate_src_param(struct validation_context *ctx, const struct vkd3d_shader_src_param *src) { - vsir_validate_register(ctx, &src->reg); + vsir_validate_register(ctx, &src->reg); + + if (src->swizzle & ~0x03030303u) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SWIZZLE, "Source has invalid swizzle %#x.", + src->swizzle); + + if (src->reg.dimension != VSIR_DIMENSION_VEC4 && src->swizzle != 0) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SWIZZLE, "Source of dimension %u has invalid swizzle %#x.", + src->reg.dimension, src->swizzle); + + if (src->modifiers >= VKD3DSPSM_COUNT) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_MODIFIERS, "Source has invalid modifiers %#x.", + src->modifiers); + + switch (src->reg.type) + { + case VKD3DSPR_SSA: + if (src->reg.idx[0].offset < ctx->program->ssa_count) + { + struct validation_context_ssa_data *data = &ctx->ssas[src->reg.idx[0].offset]; + unsigned int i; + + for (i = 0; i < VKD3D_VEC4_SIZE; ++i) + data->read_mask |= (1u << vsir_swizzle_get_component(src->swizzle, i)); + } + break; + + case VKD3DSPR_NULL: + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, + "Invalid NULL register used as source parameter."); + break; + + case VKD3DSPR_OUTPUT: + if (ctx->program->shader_version.type != VKD3D_SHADER_TYPE_HULL + || (ctx->phase != VKD3DSIH_HS_FORK_PHASE && ctx->phase != VKD3DSIH_HS_JOIN_PHASE)) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, + "Invalid OUTPUT register used as source parameter."); + break; + + case VKD3DSPR_PATCHCONST: + if (ctx->program->shader_version.type != VKD3D_SHADER_TYPE_DOMAIN + && ctx->program->shader_version.type != VKD3D_SHADER_TYPE_HULL) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, + "PATCHCONST register used as source parameters are only allowed in Hull and Domain Shaders."); + break; + + default: + break; + } +} + +static void vsir_validate_dst_count(struct validation_context *ctx, + const struct vkd3d_shader_instruction *instruction, unsigned int count) +{ + if (instruction->dst_count != count) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DEST_COUNT, + "Invalid destination count %u for an instruction of type %#x, expected %u.", + instruction->dst_count, instruction->opcode, count); +} - if (src->swizzle & ~0x03030303u) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SWIZZLE, "Source has invalid swizzle %#x.", - src->swizzle); +static void vsir_validate_src_count(struct validation_context *ctx, + const struct vkd3d_shader_instruction *instruction, unsigned int count) +{ + if (instruction->src_count != count) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SOURCE_COUNT, + "Invalid source count %u for an instruction of type %#x, expected %u.", + instruction->src_count, instruction->opcode, count); +} - if (src->reg.dimension != VSIR_DIMENSION_VEC4 && src->swizzle != 0) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SWIZZLE, "Source of dimension %u has invalid swizzle %#x.", - src->reg.dimension, src->swizzle); +static bool vsir_validate_src_min_count(struct validation_context *ctx, + const struct vkd3d_shader_instruction *instruction, unsigned int count) +{ + if (instruction->src_count < count) + { + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SOURCE_COUNT, + "Invalid source count %u for an instruction of type %#x, expected at least %u.", + instruction->src_count, instruction->opcode, count); + return false; + } - if (src->modifiers >= VKD3DSPSM_COUNT) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_MODIFIERS, "Source has invalid modifiers %#x.", - src->modifiers); + return true; +} + +static bool vsir_validate_src_max_count(struct validation_context *ctx, + const struct vkd3d_shader_instruction *instruction, unsigned int count) +{ + if (instruction->src_count > count) + { + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SOURCE_COUNT, + "Invalid source count %u for an instruction of type %#x, expected at most %u.", + instruction->src_count, instruction->opcode, count); + return false; + } + + return true; +} + +enum vsir_signature_type +{ + SIGNATURE_TYPE_INPUT, + SIGNATURE_TYPE_OUTPUT, + SIGNATURE_TYPE_PATCH_CONSTANT, +}; + +static const char * const signature_type_names[] = +{ + [SIGNATURE_TYPE_INPUT] = "input", + [SIGNATURE_TYPE_OUTPUT] = "output", + [SIGNATURE_TYPE_PATCH_CONSTANT] = "patch constant", +}; + +#define PS_BIT (1u << VKD3D_SHADER_TYPE_PIXEL) +#define VS_BIT (1u << VKD3D_SHADER_TYPE_VERTEX) +#define GS_BIT (1u << VKD3D_SHADER_TYPE_GEOMETRY) +#define HS_BIT (1u << VKD3D_SHADER_TYPE_HULL) +#define DS_BIT (1u << VKD3D_SHADER_TYPE_DOMAIN) +#define CS_BIT (1u << VKD3D_SHADER_TYPE_COMPUTE) + +static const struct sysval_validation_data_element +{ + unsigned int input; + unsigned int output; + unsigned int patch_constant; + enum vkd3d_shader_component_type data_type; + unsigned int component_count; +} +sysval_validation_data[] = +{ + [VKD3D_SHADER_SV_POSITION] = {PS_BIT | GS_BIT | HS_BIT | DS_BIT, VS_BIT | GS_BIT | HS_BIT | DS_BIT, 0, + VKD3D_SHADER_COMPONENT_FLOAT, 4}, + [VKD3D_SHADER_SV_CLIP_DISTANCE] = {PS_BIT | GS_BIT | HS_BIT | DS_BIT, PS_BIT | VS_BIT | GS_BIT | HS_BIT | DS_BIT, 0, + VKD3D_SHADER_COMPONENT_FLOAT, 4}, + [VKD3D_SHADER_SV_CULL_DISTANCE] = {PS_BIT | GS_BIT | HS_BIT | DS_BIT, PS_BIT | VS_BIT | GS_BIT | HS_BIT | DS_BIT, 0, + VKD3D_SHADER_COMPONENT_FLOAT, 4}, + [VKD3D_SHADER_SV_TESS_FACTOR_QUADEDGE] = {0, 0, HS_BIT | DS_BIT, VKD3D_SHADER_COMPONENT_FLOAT, 1}, + [VKD3D_SHADER_SV_TESS_FACTOR_QUADINT] = {0, 0, HS_BIT | DS_BIT, VKD3D_SHADER_COMPONENT_FLOAT, 1}, + [VKD3D_SHADER_SV_TESS_FACTOR_TRIEDGE] = {0, 0, HS_BIT | DS_BIT, VKD3D_SHADER_COMPONENT_FLOAT, 1}, + [VKD3D_SHADER_SV_TESS_FACTOR_TRIINT] = {0, 0, HS_BIT | DS_BIT, VKD3D_SHADER_COMPONENT_FLOAT, 1}, + [VKD3D_SHADER_SV_TESS_FACTOR_LINEDET] = {0, 0, HS_BIT | DS_BIT, VKD3D_SHADER_COMPONENT_FLOAT, 1}, + [VKD3D_SHADER_SV_TESS_FACTOR_LINEDEN] = {0, 0, HS_BIT | DS_BIT, VKD3D_SHADER_COMPONENT_FLOAT, 1}, +}; + +static void vsir_validate_signature_element(struct validation_context *ctx, + const struct shader_signature *signature, enum vsir_signature_type signature_type, + unsigned int idx) +{ + const char *signature_type_name = signature_type_names[signature_type]; + const struct signature_element *element = &signature->elements[idx]; + bool integer_type = false; + + if (element->register_count == 0) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, + "element %u of %s signature: Invalid zero register count.", idx, signature_type_name); + + if (element->mask == 0 || (element->mask & ~0xf)) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, + "element %u of %s signature: Invalid mask %#x.", idx, signature_type_name, element->mask); + + if (!vkd3d_bitmask_is_contiguous(element->mask)) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, + "element %u of %s signature: Non-contiguous mask %#x.", + idx, signature_type_name, element->mask); + + /* Here we'd likely want to validate that the usage mask is a subset of the + * signature mask. Unfortunately the D3DBC parser sometimes violates this. + * For example I've seen a shader like this: + * ps_3_0 + * [...] + * dcl_texcoord0 v0 + * [...] + * texld r2.xyzw, v0.xyzw, s1.xyzw + * [...] + * + * The dcl_textcoord0 instruction secretly has a .xy mask, which is used to + * compute the signature mask, but the texld instruction apparently uses all + * the components. Of course the last two components are ignored, but + * formally they seem to be used. So we end up with a signature element with + * mask .xy and usage mask .xyzw. + * + * The correct fix would probably be to make the D3DBC parser aware of which + * components are really used for each instruction, but that would take some + * time. */ + if (element->used_mask & ~0xf) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, + "element %u of %s signature: Invalid usage mask %#x.", + idx, signature_type_name, element->used_mask); + + switch (element->sysval_semantic) + { + case VKD3D_SHADER_SV_NONE: + case VKD3D_SHADER_SV_POSITION: + case VKD3D_SHADER_SV_CLIP_DISTANCE: + case VKD3D_SHADER_SV_CULL_DISTANCE: + case VKD3D_SHADER_SV_RENDER_TARGET_ARRAY_INDEX: + case VKD3D_SHADER_SV_VIEWPORT_ARRAY_INDEX: + case VKD3D_SHADER_SV_VERTEX_ID: + case VKD3D_SHADER_SV_PRIMITIVE_ID: + case VKD3D_SHADER_SV_INSTANCE_ID: + case VKD3D_SHADER_SV_IS_FRONT_FACE: + case VKD3D_SHADER_SV_SAMPLE_INDEX: + case VKD3D_SHADER_SV_TESS_FACTOR_QUADEDGE: + case VKD3D_SHADER_SV_TESS_FACTOR_QUADINT: + case VKD3D_SHADER_SV_TESS_FACTOR_TRIEDGE: + case VKD3D_SHADER_SV_TESS_FACTOR_TRIINT: + case VKD3D_SHADER_SV_TESS_FACTOR_LINEDET: + case VKD3D_SHADER_SV_TESS_FACTOR_LINEDEN: + case VKD3D_SHADER_SV_TARGET: + case VKD3D_SHADER_SV_DEPTH: + case VKD3D_SHADER_SV_COVERAGE: + case VKD3D_SHADER_SV_DEPTH_GREATER_EQUAL: + case VKD3D_SHADER_SV_DEPTH_LESS_EQUAL: + case VKD3D_SHADER_SV_STENCIL_REF: + break; + + default: + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, + "element %u of %s signature: Invalid system value semantic %#x.", + idx, signature_type_name, element->sysval_semantic); + break; + } + + if (element->sysval_semantic < ARRAY_SIZE(sysval_validation_data)) + { + const struct sysval_validation_data_element *data = &sysval_validation_data[element->sysval_semantic]; + + if (data->input || data->output || data->patch_constant) + { + unsigned int mask; + + switch (signature_type) + { + case SIGNATURE_TYPE_INPUT: + mask = data->input; + break; + + case SIGNATURE_TYPE_OUTPUT: + mask = data->output; + break; + + case SIGNATURE_TYPE_PATCH_CONSTANT: + mask = data->patch_constant; + break; + + default: + vkd3d_unreachable(); + } + + if (!(mask & (1u << ctx->program->shader_version.type))) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, + "element %u of %s signature: Invalid system value semantic %#x.", + idx, signature_type_name, element->sysval_semantic); + } + + if (data->component_count != 0) + { + if (element->component_type != data->data_type) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, + "element %u of %s signature: Invalid data type %#x for system value semantic %#x.", + idx, signature_type_name, element->component_type, element->sysval_semantic); + + if (vsir_write_mask_component_count(element->mask) > data->component_count) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, + "element %u of %s signature: Invalid mask %#x for system value semantic %#x.", + idx, signature_type_name, element->mask, element->sysval_semantic); + } + } + + switch (element->component_type) + { + case VKD3D_SHADER_COMPONENT_INT: + case VKD3D_SHADER_COMPONENT_UINT: + integer_type = true; + break; + + case VKD3D_SHADER_COMPONENT_FLOAT: + break; + + default: + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, + "element %u of %s signature: Invalid component type %#x.", + idx, signature_type_name, element->component_type); + break; + } + + if (element->min_precision >= VKD3D_SHADER_MINIMUM_PRECISION_COUNT) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, + "element %u of %s signature: Invalid minimum precision %#x.", + idx, signature_type_name, element->min_precision); + + if (element->interpolation_mode >= VKD3DSIM_COUNT) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, + "element %u of %s signature: Invalid interpolation mode %#x.", + idx, signature_type_name, element->interpolation_mode); + + if (integer_type && element->interpolation_mode != VKD3DSIM_NONE + && element->interpolation_mode != VKD3DSIM_CONSTANT) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, + "element %u of %s signature: Invalid interpolation mode %#x for integer component type.", + idx, signature_type_name, element->interpolation_mode); +} + +static const unsigned int allowed_signature_phases[] = +{ + [SIGNATURE_TYPE_INPUT] = PS_BIT | VS_BIT | GS_BIT | HS_BIT | DS_BIT, + [SIGNATURE_TYPE_OUTPUT] = PS_BIT | VS_BIT | GS_BIT | HS_BIT | DS_BIT, + [SIGNATURE_TYPE_PATCH_CONSTANT] = HS_BIT | DS_BIT, +}; + +static void vsir_validate_signature(struct validation_context *ctx, + const struct shader_signature *signature, enum vsir_signature_type signature_type) +{ + unsigned int i; + + if (signature->element_count != 0 && !(allowed_signature_phases[signature_type] + & (1u << ctx->program->shader_version.type))) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, + "Unexpected %s signature.", signature_type_names[signature_type]); + + for (i = 0; i < signature->element_count; ++i) + vsir_validate_signature_element(ctx, signature, signature_type, i); +} + +static const char *name_from_cf_type(enum vsir_control_flow_type type) +{ + switch (type) + { + case VSIR_CF_STRUCTURED: + return "structured"; + case VSIR_CF_BLOCKS: + return "block-based"; + default: + vkd3d_unreachable(); + } +} + +static void vsir_validate_cf_type(struct validation_context *ctx, + const struct vkd3d_shader_instruction *instruction, enum vsir_control_flow_type expected_type) +{ + if (ctx->program->cf_type != expected_type) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, "Invalid instruction %#x in %s shader.", + instruction->opcode, name_from_cf_type(ctx->program->cf_type)); +} + +static void vsir_validator_push_block(struct validation_context *ctx, enum vkd3d_shader_opcode opcode) +{ + if (!vkd3d_array_reserve((void **)&ctx->blocks, &ctx->blocks_capacity, ctx->depth + 1, sizeof(*ctx->blocks))) + { + ctx->status = VKD3D_ERROR_OUT_OF_MEMORY; + return; + } + ctx->blocks[ctx->depth++] = opcode; +} + +static void vsir_validate_hull_shader_phase(struct validation_context *ctx, + const struct vkd3d_shader_instruction *instruction) +{ + if (ctx->program->shader_version.type != VKD3D_SHADER_TYPE_HULL) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_HANDLER, + "Phase instruction %#x is only valid in a hull shader.", + instruction->opcode); + if (ctx->depth != 0) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, + "Phase instruction %#x must appear to top level.", + instruction->opcode); + ctx->phase = instruction->opcode; + ctx->dcl_temps_found = false; +} + +static void vsir_validate_branch(struct validation_context *ctx, const struct vkd3d_shader_instruction *instruction) +{ + size_t i; + + vsir_validate_cf_type(ctx, instruction, VSIR_CF_BLOCKS); + vsir_validate_dst_count(ctx, instruction, 0); + + if (!vsir_validate_src_min_count(ctx, instruction, 1)) + return; + + if (vsir_register_is_label(&instruction->src[0].reg)) + { + /* Unconditional branch: parameters are jump label, + * optional merge label, optional continue label. */ + vsir_validate_src_max_count(ctx, instruction, 3); + + for (i = 0; i < instruction->src_count; ++i) + { + if (!vsir_register_is_label(&instruction->src[i].reg)) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, + "Invalid register of type %#x in unconditional BRANCH instruction, expected LABEL.", + instruction->src[i].reg.type); + } + } + else + { + /* Conditional branch: parameters are condition, true + * jump label, false jump label, optional merge label, + * optional continue label. */ + vsir_validate_src_min_count(ctx, instruction, 3); + vsir_validate_src_max_count(ctx, instruction, 5); + + for (i = 1; i < instruction->src_count; ++i) + { + if (!vsir_register_is_label(&instruction->src[i].reg)) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, + "Invalid register of type %#x in conditional BRANCH instruction, expected LABEL.", + instruction->src[i].reg.type); + } + } + + ctx->inside_block = false; +} + +static void vsir_validate_dcl_gs_instances(struct validation_context *ctx, + const struct vkd3d_shader_instruction *instruction) +{ + if (!instruction->declaration.count || instruction->declaration.count > 32) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_GS, "GS instance count %u is invalid.", + instruction->declaration.count); +} + +static void vsir_validate_dcl_hs_max_tessfactor(struct validation_context *ctx, + const struct vkd3d_shader_instruction *instruction) +{ + /* Exclude non-finite values. */ + if (!(instruction->declaration.max_tessellation_factor >= 1.0f + && instruction->declaration.max_tessellation_factor <= 64.0f)) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION, + "Max tessellation factor %f is invalid.", + instruction->declaration.max_tessellation_factor); +} + +static void vsir_validate_dcl_input_primitive(struct validation_context *ctx, + const struct vkd3d_shader_instruction *instruction) +{ + if (instruction->declaration.primitive_type.type == VKD3D_PT_UNDEFINED + || instruction->declaration.primitive_type.type >= VKD3D_PT_COUNT) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_GS, "GS input primitive %u is invalid.", + instruction->declaration.primitive_type.type); +} + +static void vsir_validate_dcl_output_control_point_count(struct validation_context *ctx, + const struct vkd3d_shader_instruction *instruction) +{ + if (!instruction->declaration.count || instruction->declaration.count > 32) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION, + "Output control point count %u is invalid.", + instruction->declaration.count); +} + +static void vsir_validate_dcl_output_topology(struct validation_context *ctx, + const struct vkd3d_shader_instruction *instruction) +{ + if (instruction->declaration.primitive_type.type == VKD3D_PT_UNDEFINED + || instruction->declaration.primitive_type.type >= VKD3D_PT_COUNT) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_GS, "GS output primitive %u is invalid.", + instruction->declaration.primitive_type.type); +} + +static void vsir_validate_dcl_temps(struct validation_context *ctx, + const struct vkd3d_shader_instruction *instruction) +{ + if (ctx->dcl_temps_found) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_DUPLICATE_DCL_TEMPS, + "Duplicate DCL_TEMPS instruction."); + if (instruction->declaration.count > ctx->program->temp_count) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DCL_TEMPS, + "Invalid DCL_TEMPS count %u, expected at most %u.", + instruction->declaration.count, ctx->program->temp_count); + ctx->dcl_temps_found = true; +} + +static void vsir_validate_dcl_tessellator_domain(struct validation_context *ctx, + const struct vkd3d_shader_instruction *instruction) +{ + if (instruction->declaration.tessellator_domain == VKD3D_TESSELLATOR_DOMAIN_INVALID + || instruction->declaration.tessellator_domain >= VKD3D_TESSELLATOR_DOMAIN_COUNT) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION, + "Tessellator domain %#x is invalid.", instruction->declaration.tessellator_domain); +} + +static void vsir_validate_dcl_tessellator_output_primitive(struct validation_context *ctx, + const struct vkd3d_shader_instruction *instruction) +{ + if (!instruction->declaration.tessellator_output_primitive + || instruction->declaration.tessellator_output_primitive + > VKD3D_SHADER_TESSELLATOR_OUTPUT_TRIANGLE_CCW) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION, + "Tessellator output primitive %#x is invalid.", + instruction->declaration.tessellator_output_primitive); +} + +static void vsir_validate_dcl_tessellator_partitioning(struct validation_context *ctx, + const struct vkd3d_shader_instruction *instruction) +{ + if (!instruction->declaration.tessellator_partitioning + || instruction->declaration.tessellator_partitioning + > VKD3D_SHADER_TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION, + "Tessellator partitioning %#x is invalid.", + instruction->declaration.tessellator_partitioning); +} + +static void vsir_validate_dcl_vertices_out(struct validation_context *ctx, + const struct vkd3d_shader_instruction *instruction) +{ + if (instruction->declaration.count > 1024) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_GS, "GS output vertex count %u is invalid.", + instruction->declaration.count); +} + +static void vsir_validate_else(struct validation_context *ctx, const struct vkd3d_shader_instruction *instruction) +{ + vsir_validate_cf_type(ctx, instruction, VSIR_CF_STRUCTURED); + if (ctx->depth == 0 || ctx->blocks[ctx->depth - 1] != VKD3DSIH_IF) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, + "ELSE instruction doesn't terminate IF block."); + else + ctx->blocks[ctx->depth - 1] = VKD3DSIH_ELSE; +} + +static void vsir_validate_endif(struct validation_context *ctx, const struct vkd3d_shader_instruction *instruction) +{ + vsir_validate_cf_type(ctx, instruction, VSIR_CF_STRUCTURED); + if (ctx->depth == 0 || (ctx->blocks[ctx->depth - 1] != VKD3DSIH_IF + && ctx->blocks[ctx->depth - 1] != VKD3DSIH_ELSE)) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, + "ENDIF instruction doesn't terminate IF/ELSE block."); + else + --ctx->depth; +} + +static void vsir_validate_endloop(struct validation_context *ctx, const struct vkd3d_shader_instruction *instruction) +{ + vsir_validate_cf_type(ctx, instruction, VSIR_CF_STRUCTURED); + if (ctx->depth == 0 || ctx->blocks[ctx->depth - 1] != VKD3DSIH_LOOP) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, + "ENDLOOP instruction doesn't terminate LOOP block."); + else + --ctx->depth; +} + +static void vsir_validate_endrep(struct validation_context *ctx, const struct vkd3d_shader_instruction *instruction) +{ + vsir_validate_cf_type(ctx, instruction, VSIR_CF_STRUCTURED); + if (ctx->depth == 0 || ctx->blocks[ctx->depth - 1] != VKD3DSIH_REP) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, + "ENDREP instruction doesn't terminate REP block."); + else + --ctx->depth; +} + +static void vsir_validate_endswitch(struct validation_context *ctx, const struct vkd3d_shader_instruction *instruction) +{ + vsir_validate_cf_type(ctx, instruction, VSIR_CF_STRUCTURED); + if (ctx->depth == 0 || ctx->blocks[ctx->depth - 1] != VKD3DSIH_SWITCH) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, + "ENDSWITCH instruction doesn't terminate SWITCH block."); + else + --ctx->depth; +} + +static void vsir_validate_if(struct validation_context *ctx, const struct vkd3d_shader_instruction *instruction) +{ + vsir_validate_cf_type(ctx, instruction, VSIR_CF_STRUCTURED); + vsir_validator_push_block(ctx, VKD3DSIH_IF); +} + +static void vsir_validate_ifc(struct validation_context *ctx, const struct vkd3d_shader_instruction *instruction) +{ + vsir_validate_cf_type(ctx, instruction, VSIR_CF_STRUCTURED); + vsir_validator_push_block(ctx, VKD3DSIH_IF); +} + +static void vsir_validate_label(struct validation_context *ctx, const struct vkd3d_shader_instruction *instruction) +{ + vsir_validate_cf_type(ctx, instruction, VSIR_CF_BLOCKS); + if (instruction->src_count >= 1 && !vsir_register_is_label(&instruction->src[0].reg)) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, + "Invalid register of type %#x in a LABEL instruction, expected LABEL.", + instruction->src[0].reg.type); + + if (ctx->inside_block) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, + "Invalid LABEL instruction inside a block."); + ctx->inside_block = true; +} + +static void vsir_validate_loop(struct validation_context *ctx, const struct vkd3d_shader_instruction *instruction) +{ + vsir_validate_cf_type(ctx, instruction, VSIR_CF_STRUCTURED); + vsir_validate_src_count(ctx, instruction, ctx->program->shader_version.major <= 3 ? 2 : 0); + vsir_validator_push_block(ctx, VKD3DSIH_LOOP); +} + +static void vsir_validate_nop(struct validation_context *ctx, const struct vkd3d_shader_instruction *instruction) +{ +} + +static void vsir_validate_phi(struct validation_context *ctx, const struct vkd3d_shader_instruction *instruction) +{ + unsigned int i, incoming_count; + + vsir_validate_cf_type(ctx, instruction, VSIR_CF_BLOCKS); + + vsir_validate_src_min_count(ctx, instruction, 2); + + if (instruction->src_count % 2 != 0) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SOURCE_COUNT, + "Invalid source count %u for a PHI instruction, it must be an even number.", + instruction->src_count); + incoming_count = instruction->src_count / 2; - switch (src->reg.type) + for (i = 0; i < incoming_count; ++i) { - case VKD3DSPR_SSA: - if (src->reg.idx[0].offset < ctx->program->ssa_count) - { - struct validation_context_ssa_data *data = &ctx->ssas[src->reg.idx[0].offset]; - unsigned int i; + unsigned int value_idx = 2 * i; + unsigned int label_idx = 2 * i + 1; - for (i = 0; i < VKD3D_VEC4_SIZE; ++i) - data->read_mask |= (1u << vsir_swizzle_get_component(src->swizzle, i)); - } - break; + if (!register_is_constant_or_undef(&instruction->src[value_idx].reg) + && !register_is_ssa(&instruction->src[value_idx].reg)) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, + "Invalid value register for incoming %u of type %#x in PHI instruction, " + "expected SSA, IMMCONST or IMMCONST64.", i, instruction->src[value_idx].reg.type); - default: - break; + if (instruction->src[value_idx].reg.dimension != VSIR_DIMENSION_SCALAR) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION, + "Invalid value dimension %#x for incoming %u in PHI instruction, expected scalar.", + instruction->src[value_idx].reg.dimension, i); + + if (!vsir_register_is_label(&instruction->src[label_idx].reg)) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, + "Invalid label register for case %u of type %#x in PHI instruction, " + "expected LABEL.", i, instruction->src[value_idx].reg.type); } + + if (instruction->dst_count < 1) + return; + + if (!register_is_ssa(&instruction->dst[0].reg)) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, + "Invalid destination of type %#x in PHI instruction, expected SSA.", + instruction->dst[0].reg.type); + + if (instruction->dst[0].reg.dimension != VSIR_DIMENSION_SCALAR) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION, + "Invalid destination dimension %#x in PHI instruction, expected scalar.", + instruction->dst[0].reg.dimension); + + if (instruction->dst[0].modifiers != VKD3DSPDM_NONE) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_MODIFIERS, + "Invalid modifiers %#x for the destination of a PHI instruction, expected none.", + instruction->dst[0].modifiers); + + if (instruction->dst[0].shift != 0) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SHIFT, + "Invalid shift %#x for the destination of a PHI instruction, expected none.", + instruction->dst[0].shift); } -static void vsir_validate_dst_count(struct validation_context *ctx, - const struct vkd3d_shader_instruction *instruction, unsigned int count) +static void vsir_validate_rep(struct validation_context *ctx, const struct vkd3d_shader_instruction *instruction) { - if (instruction->dst_count != count) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DEST_COUNT, - "Invalid destination count %u for an instruction of type %#x, expected %u.", - instruction->dst_count, instruction->opcode, count); + vsir_validate_cf_type(ctx, instruction, VSIR_CF_STRUCTURED); + vsir_validator_push_block(ctx, VKD3DSIH_REP); } -static void vsir_validate_src_count(struct validation_context *ctx, - const struct vkd3d_shader_instruction *instruction, unsigned int count) +static void vsir_validate_ret(struct validation_context *ctx, const struct vkd3d_shader_instruction *instruction) { - if (instruction->src_count != count) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SOURCE_COUNT, - "Invalid source count %u for an instruction of type %#x, expected %u.", - instruction->src_count, instruction->opcode, count); + ctx->inside_block = false; } -static bool vsir_validate_src_min_count(struct validation_context *ctx, - const struct vkd3d_shader_instruction *instruction, unsigned int count) +static void vsir_validate_switch(struct validation_context *ctx, const struct vkd3d_shader_instruction *instruction) { - if (instruction->src_count < count) - { - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SOURCE_COUNT, - "Invalid source count %u for an instruction of type %#x, expected at least %u.", - instruction->src_count, instruction->opcode, count); - return false; - } - - return true; + vsir_validate_cf_type(ctx, instruction, VSIR_CF_STRUCTURED); + vsir_validator_push_block(ctx, VKD3DSIH_SWITCH); } -static bool vsir_validate_src_max_count(struct validation_context *ctx, - const struct vkd3d_shader_instruction *instruction, unsigned int count) +static void vsir_validate_switch_monolithic(struct validation_context *ctx, + const struct vkd3d_shader_instruction *instruction) { - if (instruction->src_count > count) - { + unsigned int i, case_count; + + vsir_validate_cf_type(ctx, instruction, VSIR_CF_BLOCKS); + + /* Parameters are source, default label, merge label and + * then pairs of constant value and case label. */ + + if (!vsir_validate_src_min_count(ctx, instruction, 3)) + return; + + if (instruction->src_count % 2 != 1) validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SOURCE_COUNT, - "Invalid source count %u for an instruction of type %#x, expected at most %u.", - instruction->src_count, instruction->opcode, count); - return false; - } + "Invalid source count %u for a monolithic SWITCH instruction, it must be an odd number.", + instruction->src_count); - return true; -} + if (!vsir_register_is_label(&instruction->src[1].reg)) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, + "Invalid default label register of type %#x in monolithic SWITCH instruction, expected LABEL.", + instruction->src[1].reg.type); -static const char *name_from_cf_type(enum cf_type type) -{ - switch (type) + if (!vsir_register_is_label(&instruction->src[2].reg)) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, + "Invalid merge label register of type %#x in monolithic SWITCH instruction, expected LABEL.", + instruction->src[2].reg.type); + + case_count = (instruction->src_count - 3) / 2; + + for (i = 0; i < case_count; ++i) { - case CF_TYPE_STRUCTURED: - return "structured"; - case CF_TYPE_BLOCKS: - return "block-based"; - default: - vkd3d_unreachable(); + unsigned int value_idx = 3 + 2 * i; + unsigned int label_idx = 3 + 2 * i + 1; + + if (!register_is_constant(&instruction->src[value_idx].reg)) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, + "Invalid value register for case %u of type %#x in monolithic SWITCH instruction, " + "expected IMMCONST or IMMCONST64.", i, instruction->src[value_idx].reg.type); + + if (!vsir_register_is_label(&instruction->src[label_idx].reg)) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, + "Invalid label register for case %u of type %#x in monolithic SWITCH instruction, " + "expected LABEL.", i, instruction->src[value_idx].reg.type); } + + ctx->inside_block = false; } -static void vsir_validate_cf_type(struct validation_context *ctx, - const struct vkd3d_shader_instruction *instruction, enum cf_type expected_type) +struct vsir_validator_instruction_desc { - VKD3D_ASSERT(ctx->cf_type != CF_TYPE_UNKNOWN); - VKD3D_ASSERT(expected_type != CF_TYPE_UNKNOWN); - if (ctx->cf_type != expected_type) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, "Invalid instruction %#x in %s shader.", - instruction->opcode, name_from_cf_type(ctx->cf_type)); -} + unsigned int dst_param_count; + unsigned int src_param_count; + void (*validate)(struct validation_context *ctx, const struct vkd3d_shader_instruction *instruction); +}; + +static const struct vsir_validator_instruction_desc vsir_validator_instructions[] = +{ + [VKD3DSIH_BRANCH] = {0, ~0u, vsir_validate_branch}, + [VKD3DSIH_HS_CONTROL_POINT_PHASE] = {0, 0, vsir_validate_hull_shader_phase}, + [VKD3DSIH_HS_DECLS] = {0, 0, vsir_validate_hull_shader_phase}, + [VKD3DSIH_HS_FORK_PHASE] = {0, 0, vsir_validate_hull_shader_phase}, + [VKD3DSIH_HS_JOIN_PHASE] = {0, 0, vsir_validate_hull_shader_phase}, + [VKD3DSIH_DCL_GS_INSTANCES] = {0, 0, vsir_validate_dcl_gs_instances}, + [VKD3DSIH_DCL_HS_MAX_TESSFACTOR] = {0, 0, vsir_validate_dcl_hs_max_tessfactor}, + [VKD3DSIH_DCL_INPUT_PRIMITIVE] = {0, 0, vsir_validate_dcl_input_primitive}, + [VKD3DSIH_DCL_OUTPUT_CONTROL_POINT_COUNT] = {0, 0, vsir_validate_dcl_output_control_point_count}, + [VKD3DSIH_DCL_OUTPUT_TOPOLOGY] = {0, 0, vsir_validate_dcl_output_topology}, + [VKD3DSIH_DCL_TEMPS] = {0, 0, vsir_validate_dcl_temps}, + [VKD3DSIH_DCL_TESSELLATOR_DOMAIN] = {0, 0, vsir_validate_dcl_tessellator_domain}, + [VKD3DSIH_DCL_TESSELLATOR_OUTPUT_PRIMITIVE] = {0, 0, vsir_validate_dcl_tessellator_output_primitive}, + [VKD3DSIH_DCL_TESSELLATOR_PARTITIONING] = {0, 0, vsir_validate_dcl_tessellator_partitioning}, + [VKD3DSIH_DCL_VERTICES_OUT] = {0, 0, vsir_validate_dcl_vertices_out}, + [VKD3DSIH_ELSE] = {0, 0, vsir_validate_else}, + [VKD3DSIH_ENDIF] = {0, 0, vsir_validate_endif}, + [VKD3DSIH_ENDLOOP] = {0, 0, vsir_validate_endloop}, + [VKD3DSIH_ENDREP] = {0, 0, vsir_validate_endrep}, + [VKD3DSIH_ENDSWITCH] = {0, 0, vsir_validate_endswitch}, + [VKD3DSIH_IF] = {0, 1, vsir_validate_if}, + [VKD3DSIH_IFC] = {0, 2, vsir_validate_ifc}, + [VKD3DSIH_LABEL] = {0, 1, vsir_validate_label}, + [VKD3DSIH_LOOP] = {0, ~0u, vsir_validate_loop}, + [VKD3DSIH_NOP] = {0, 0, vsir_validate_nop}, + [VKD3DSIH_PHI] = {1, ~0u, vsir_validate_phi}, + [VKD3DSIH_REP] = {0, 1, vsir_validate_rep}, + [VKD3DSIH_RET] = {0, 0, vsir_validate_ret}, + [VKD3DSIH_SWITCH] = {0, 1, vsir_validate_switch}, + [VKD3DSIH_SWITCH_MONOLITHIC] = {0, ~0u, vsir_validate_switch_monolithic}, +}; static void vsir_validate_instruction(struct validation_context *ctx) { @@ -6148,136 +7611,40 @@ static void vsir_validate_instruction(struct validation_context *ctx) instruction->opcode); } - switch (instruction->opcode) + if (version->type == VKD3D_SHADER_TYPE_HULL && ctx->phase == VKD3DSIH_INVALID) { - case VKD3DSIH_HS_DECLS: - case VKD3DSIH_HS_CONTROL_POINT_PHASE: - case VKD3DSIH_HS_FORK_PHASE: - case VKD3DSIH_HS_JOIN_PHASE: - vsir_validate_dst_count(ctx, instruction, 0); - vsir_validate_src_count(ctx, instruction, 0); - if (version->type != VKD3D_SHADER_TYPE_HULL) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_HANDLER, - "Phase instruction %#x is only valid in a hull shader.", - instruction->opcode); - if (ctx->depth != 0) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, - "Phase instruction %#x must appear to top level.", - instruction->opcode); - ctx->phase = instruction->opcode; - ctx->dcl_temps_found = false; - return; - - case VKD3DSIH_DCL_HS_MAX_TESSFACTOR: - /* Exclude non-finite values. */ - if (!(instruction->declaration.max_tessellation_factor >= 1.0f - && instruction->declaration.max_tessellation_factor <= 64.0f)) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION, "Max tessellation factor %f is invalid.", - instruction->declaration.max_tessellation_factor); - return; - - case VKD3DSIH_DCL_INPUT_PRIMITIVE: - if (instruction->declaration.primitive_type.type == VKD3D_PT_UNDEFINED - || instruction->declaration.primitive_type.type >= VKD3D_PT_COUNT) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_GS, "GS input primitive %u is invalid.", - instruction->declaration.primitive_type.type); - return; - - case VKD3DSIH_DCL_VERTICES_OUT: - if (instruction->declaration.count > 1024) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_GS, "GS output vertex count %u is invalid.", - instruction->declaration.count); - return; - - case VKD3DSIH_DCL_OUTPUT_TOPOLOGY: - if (instruction->declaration.primitive_type.type == VKD3D_PT_UNDEFINED - || instruction->declaration.primitive_type.type >= VKD3D_PT_COUNT) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_GS, "GS output primitive %u is invalid.", - instruction->declaration.primitive_type.type); - return; - - case VKD3DSIH_DCL_GS_INSTANCES: - if (!instruction->declaration.count || instruction->declaration.count > 32) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_GS, "GS instance count %u is invalid.", - instruction->declaration.count); - return; - - case VKD3DSIH_DCL_OUTPUT_CONTROL_POINT_COUNT: - if (!instruction->declaration.count || instruction->declaration.count > 32) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION, "Output control point count %u is invalid.", - instruction->declaration.count); - return; - - case VKD3DSIH_DCL_TESSELLATOR_DOMAIN: - if (instruction->declaration.tessellator_domain == VKD3D_TESSELLATOR_DOMAIN_INVALID - || instruction->declaration.tessellator_domain >= VKD3D_TESSELLATOR_DOMAIN_COUNT) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION, - "Tessellator domain %#x is invalid.", instruction->declaration.tessellator_domain); - return; - - case VKD3DSIH_DCL_TESSELLATOR_OUTPUT_PRIMITIVE: - if (!instruction->declaration.tessellator_output_primitive - || instruction->declaration.tessellator_output_primitive > VKD3D_SHADER_TESSELLATOR_OUTPUT_TRIANGLE_CCW) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION, - "Tessellator output primitive %#x is invalid.", instruction->declaration.tessellator_output_primitive); - return; - - case VKD3DSIH_DCL_TESSELLATOR_PARTITIONING: - if (!instruction->declaration.tessellator_partitioning - || instruction->declaration.tessellator_partitioning > VKD3D_SHADER_TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION, - "Tessellator partitioning %#x is invalid.", instruction->declaration.tessellator_partitioning); - return; - - default: - break; - } - - /* Only DCL instructions may occur outside hull shader phases. */ - if (!vsir_instruction_is_dcl(instruction) && version->type == VKD3D_SHADER_TYPE_HULL - && ctx->phase == VKD3DSIH_INVALID) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_HANDLER, - "Instruction %#x appear before any phase instruction in a hull shader.", - instruction->opcode); + switch (instruction->opcode) + { + case VKD3DSIH_NOP: + case VKD3DSIH_HS_DECLS: + case VKD3DSIH_HS_CONTROL_POINT_PHASE: + case VKD3DSIH_HS_FORK_PHASE: + case VKD3DSIH_HS_JOIN_PHASE: + break; - /* We support two different control flow types in shaders: - * block-based, like DXIL and SPIR-V, and structured, like D3DBC - * and TPF. The shader is detected as block-based when its first - * instruction, except for DCL_* and phases, is a LABEL. Currently - * we mandate that each shader is either purely block-based or - * purely structured. In principle we could allow structured - * constructs in a block, provided they are confined in a single - * block, but need for that hasn't arisen yet, so we don't. */ - if (ctx->cf_type == CF_TYPE_UNKNOWN && !vsir_instruction_is_dcl(instruction)) - { - if (instruction->opcode == VKD3DSIH_LABEL) - ctx->cf_type = CF_TYPE_BLOCKS; - else - ctx->cf_type = CF_TYPE_STRUCTURED; + default: + if (!vsir_instruction_is_dcl(instruction)) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_HANDLER, + "Instruction %#x appear before any phase instruction in a hull shader.", + instruction->opcode); + break; + } } - if (ctx->cf_type == CF_TYPE_BLOCKS && !vsir_instruction_is_dcl(instruction)) + if (ctx->program->cf_type == VSIR_CF_BLOCKS && !ctx->inside_block) { switch (instruction->opcode) { + case VKD3DSIH_NOP: case VKD3DSIH_LABEL: - if (ctx->inside_block) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, "Invalid LABEL instruction inside a block."); - ctx->inside_block = true; - break; - - case VKD3DSIH_RET: - case VKD3DSIH_BRANCH: - case VKD3DSIH_SWITCH_MONOLITHIC: - if (!ctx->inside_block) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, - "Invalid instruction %#x outside any block.", - instruction->opcode); - ctx->inside_block = false; + case VKD3DSIH_HS_DECLS: + case VKD3DSIH_HS_CONTROL_POINT_PHASE: + case VKD3DSIH_HS_FORK_PHASE: + case VKD3DSIH_HS_JOIN_PHASE: break; default: - if (!ctx->inside_block) + if (!vsir_instruction_is_dcl(instruction)) validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, "Invalid instruction %#x outside any block.", instruction->opcode); @@ -6285,271 +7652,20 @@ static void vsir_validate_instruction(struct validation_context *ctx) } } - switch (instruction->opcode) + if (instruction->opcode < ARRAY_SIZE(vsir_validator_instructions)) { - case VKD3DSIH_DCL_TEMPS: - vsir_validate_dst_count(ctx, instruction, 0); - vsir_validate_src_count(ctx, instruction, 0); - if (ctx->dcl_temps_found) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_DUPLICATE_DCL_TEMPS, "Duplicate DCL_TEMPS instruction."); - if (instruction->declaration.count > ctx->program->temp_count) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DCL_TEMPS, - "Invalid DCL_TEMPS count %u, expected at most %u.", - instruction->declaration.count, ctx->program->temp_count); - ctx->dcl_temps_found = true; - break; - - case VKD3DSIH_IF: - vsir_validate_cf_type(ctx, instruction, CF_TYPE_STRUCTURED); - vsir_validate_dst_count(ctx, instruction, 0); - vsir_validate_src_count(ctx, instruction, 1); - if (!vkd3d_array_reserve((void **)&ctx->blocks, &ctx->blocks_capacity, ctx->depth + 1, sizeof(*ctx->blocks))) - return; - ctx->blocks[ctx->depth++] = instruction->opcode; - break; - - case VKD3DSIH_IFC: - vsir_validate_cf_type(ctx, instruction, CF_TYPE_STRUCTURED); - vsir_validate_dst_count(ctx, instruction, 0); - vsir_validate_src_count(ctx, instruction, 2); - if (!vkd3d_array_reserve((void **)&ctx->blocks, &ctx->blocks_capacity, ctx->depth + 1, sizeof(*ctx->blocks))) - return; - ctx->blocks[ctx->depth++] = VKD3DSIH_IF; - break; - - case VKD3DSIH_ELSE: - vsir_validate_cf_type(ctx, instruction, CF_TYPE_STRUCTURED); - vsir_validate_dst_count(ctx, instruction, 0); - vsir_validate_src_count(ctx, instruction, 0); - if (ctx->depth == 0 || ctx->blocks[ctx->depth - 1] != VKD3DSIH_IF) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, "ELSE instruction doesn't terminate IF block."); - else - ctx->blocks[ctx->depth - 1] = instruction->opcode; - break; - - case VKD3DSIH_ENDIF: - vsir_validate_cf_type(ctx, instruction, CF_TYPE_STRUCTURED); - vsir_validate_dst_count(ctx, instruction, 0); - vsir_validate_src_count(ctx, instruction, 0); - if (ctx->depth == 0 || (ctx->blocks[ctx->depth - 1] != VKD3DSIH_IF && ctx->blocks[ctx->depth - 1] != VKD3DSIH_ELSE)) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, "ENDIF instruction doesn't terminate IF/ELSE block."); - else - --ctx->depth; - break; - - case VKD3DSIH_LOOP: - vsir_validate_cf_type(ctx, instruction, CF_TYPE_STRUCTURED); - vsir_validate_dst_count(ctx, instruction, 0); - vsir_validate_src_count(ctx, instruction, version->major <= 3 ? 2 : 0); - if (!vkd3d_array_reserve((void **)&ctx->blocks, &ctx->blocks_capacity, ctx->depth + 1, sizeof(*ctx->blocks))) - return; - ctx->blocks[ctx->depth++] = instruction->opcode; - break; - - case VKD3DSIH_ENDLOOP: - vsir_validate_cf_type(ctx, instruction, CF_TYPE_STRUCTURED); - vsir_validate_dst_count(ctx, instruction, 0); - vsir_validate_src_count(ctx, instruction, 0); - if (ctx->depth == 0 || ctx->blocks[ctx->depth - 1] != VKD3DSIH_LOOP) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, "ENDLOOP instruction doesn't terminate LOOP block."); - else - --ctx->depth; - break; - - case VKD3DSIH_REP: - vsir_validate_cf_type(ctx, instruction, CF_TYPE_STRUCTURED); - vsir_validate_dst_count(ctx, instruction, 0); - vsir_validate_src_count(ctx, instruction, 1); - if (!vkd3d_array_reserve((void **)&ctx->blocks, &ctx->blocks_capacity, ctx->depth + 1, sizeof(*ctx->blocks))) - return; - ctx->blocks[ctx->depth++] = instruction->opcode; - break; - - case VKD3DSIH_ENDREP: - vsir_validate_cf_type(ctx, instruction, CF_TYPE_STRUCTURED); - vsir_validate_dst_count(ctx, instruction, 0); - vsir_validate_src_count(ctx, instruction, 0); - if (ctx->depth == 0 || ctx->blocks[ctx->depth - 1] != VKD3DSIH_REP) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, "ENDREP instruction doesn't terminate REP block."); - else - --ctx->depth; - break; - - case VKD3DSIH_SWITCH: - vsir_validate_cf_type(ctx, instruction, CF_TYPE_STRUCTURED); - vsir_validate_dst_count(ctx, instruction, 0); - vsir_validate_src_count(ctx, instruction, 1); - if (!vkd3d_array_reserve((void **)&ctx->blocks, &ctx->blocks_capacity, ctx->depth + 1, sizeof(*ctx->blocks))) - return; - ctx->blocks[ctx->depth++] = instruction->opcode; - break; - - case VKD3DSIH_ENDSWITCH: - vsir_validate_cf_type(ctx, instruction, CF_TYPE_STRUCTURED); - vsir_validate_dst_count(ctx, instruction, 0); - vsir_validate_src_count(ctx, instruction, 0); - if (ctx->depth == 0 || ctx->blocks[ctx->depth - 1] != VKD3DSIH_SWITCH) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, "ENDSWITCH instruction doesn't terminate SWITCH block."); - else - --ctx->depth; - break; - - case VKD3DSIH_RET: - vsir_validate_dst_count(ctx, instruction, 0); - vsir_validate_src_count(ctx, instruction, 0); - break; - - case VKD3DSIH_LABEL: - vsir_validate_cf_type(ctx, instruction, CF_TYPE_BLOCKS); - vsir_validate_dst_count(ctx, instruction, 0); - vsir_validate_src_count(ctx, instruction, 1); - if (instruction->src_count >= 1 && !vsir_register_is_label(&instruction->src[0].reg)) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, - "Invalid register of type %#x in a LABEL instruction, expected LABEL.", - instruction->src[0].reg.type); - break; - - case VKD3DSIH_BRANCH: - vsir_validate_cf_type(ctx, instruction, CF_TYPE_BLOCKS); - vsir_validate_dst_count(ctx, instruction, 0); - if (!vsir_validate_src_min_count(ctx, instruction, 1)) - break; - if (vsir_register_is_label(&instruction->src[0].reg)) - { - /* Unconditional branch: parameters are jump label, - * optional merge label, optional continue label. */ - vsir_validate_src_max_count(ctx, instruction, 3); - - for (i = 0; i < instruction->src_count; ++i) - { - if (!vsir_register_is_label(&instruction->src[i].reg)) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, - "Invalid register of type %#x in unconditional BRANCH instruction, expected LABEL.", - instruction->src[i].reg.type); - } - } - else - { - /* Conditional branch: parameters are condition, true - * jump label, false jump label, optional merge label, - * optional continue label. */ - vsir_validate_src_min_count(ctx, instruction, 3); - vsir_validate_src_max_count(ctx, instruction, 5); - - for (i = 1; i < instruction->src_count; ++i) - { - if (!vsir_register_is_label(&instruction->src[i].reg)) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, - "Invalid register of type %#x in conditional BRANCH instruction, expected LABEL.", - instruction->src[i].reg.type); - } - } - break; - - case VKD3DSIH_SWITCH_MONOLITHIC: - { - unsigned int case_count; - - vsir_validate_cf_type(ctx, instruction, CF_TYPE_BLOCKS); - vsir_validate_dst_count(ctx, instruction, 0); - /* Parameters are source, default label, merge label and - * then pairs of constant value and case label. */ - if (!vsir_validate_src_min_count(ctx, instruction, 3)) - break; - if (instruction->src_count % 2 != 1) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SOURCE_COUNT, - "Invalid source count %u for a monolithic SWITCH instruction, it must be an odd number.", - instruction->src_count); + const struct vsir_validator_instruction_desc *desc; - if (!vsir_register_is_label(&instruction->src[1].reg)) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, - "Invalid default label register of type %#x in monolithic SWITCH instruction, expected LABEL.", - instruction->src[1].reg.type); - - if (!vsir_register_is_label(&instruction->src[2].reg)) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, - "Invalid merge label register of type %#x in monolithic SWITCH instruction, expected LABEL.", - instruction->src[2].reg.type); - - case_count = (instruction->src_count - 3) / 2; - - for (i = 0; i < case_count; ++i) - { - unsigned int value_idx = 3 + 2 * i; - unsigned int label_idx = 3 + 2 * i + 1; - - if (!register_is_constant(&instruction->src[value_idx].reg)) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, - "Invalid value register for case %zu of type %#x in monolithic SWITCH instruction, " - "expected IMMCONST or IMMCONST64.", i, instruction->src[value_idx].reg.type); - - if (!vsir_register_is_label(&instruction->src[label_idx].reg)) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, - "Invalid label register for case %zu of type %#x in monolithic SWITCH instruction, " - "expected LABEL.", i, instruction->src[value_idx].reg.type); - } - break; - } + desc = &vsir_validator_instructions[instruction->opcode]; - case VKD3DSIH_PHI: + if (desc->validate) { - unsigned int incoming_count; - - vsir_validate_cf_type(ctx, instruction, CF_TYPE_BLOCKS); - vsir_validate_dst_count(ctx, instruction, 1); - vsir_validate_src_min_count(ctx, instruction, 2); - if (instruction->src_count % 2 != 0) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SOURCE_COUNT, - "Invalid source count %u for a PHI instruction, it must be an even number.", - instruction->src_count); - incoming_count = instruction->src_count / 2; - - if (!register_is_ssa(&instruction->dst[0].reg)) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, - "Invalid destination of type %#x in PHI instruction, expected SSA.", - instruction->dst[0].reg.type); - - if (instruction->dst[0].reg.dimension != VSIR_DIMENSION_SCALAR) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION, - "Invalid destination dimension %#x in PHI instruction, expected scalar.", - instruction->dst[0].reg.dimension); - - if (instruction->dst[0].modifiers != VKD3DSPDM_NONE) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_MODIFIERS, - "Invalid modifiers %#x for the destination of a PHI instruction, expected none.", - instruction->dst[0].modifiers); - - if (instruction->dst[0].shift != 0) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SHIFT, - "Invalid shift %#x for the destination of a PHI instruction, expected none.", - instruction->dst[0].shift); - - for (i = 0; i < incoming_count; ++i) - { - unsigned int value_idx = 2 * i; - unsigned int label_idx = 2 * i + 1; - - if (!register_is_constant_or_undef(&instruction->src[value_idx].reg) - && !register_is_ssa(&instruction->src[value_idx].reg)) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, - "Invalid value register for incoming %zu of type %#x in PHI instruction, " - "expected SSA, IMMCONST or IMMCONST64.", i, instruction->src[value_idx].reg.type); - - if (instruction->src[value_idx].reg.dimension != VSIR_DIMENSION_SCALAR) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION, - "Invalid value dimension %#x for incoming %zu in PHI instruction, expected scalar.", - instruction->src[value_idx].reg.dimension, i); - - if (!vsir_register_is_label(&instruction->src[label_idx].reg)) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, - "Invalid label register for case %zu of type %#x in PHI instruction, " - "expected LABEL.", i, instruction->src[value_idx].reg.type); - } - break; + if (desc->dst_param_count != ~0u) + vsir_validate_dst_count(ctx, instruction, desc->dst_param_count); + if (desc->src_param_count != ~0u) + vsir_validate_src_count(ctx, instruction, desc->src_param_count); + desc->validate(ctx, instruction); } - - default: - break; } } @@ -6563,19 +7679,71 @@ enum vkd3d_result vsir_program_validate(struct vsir_program *program, uint64_t c .null_location = {.source_name = source_name}, .status = VKD3D_OK, .phase = VKD3DSIH_INVALID, + .invalid_instruction_idx = true, }; unsigned int i; if (!(config_flags & VKD3D_SHADER_CONFIG_FLAG_FORCE_VALIDATION)) return VKD3D_OK; + switch (program->shader_version.type) + { + case VKD3D_SHADER_TYPE_HULL: + case VKD3D_SHADER_TYPE_DOMAIN: + break; + + default: + if (program->patch_constant_signature.element_count != 0) + validator_error(&ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, + "Patch constant signature is only valid for hull and domain shaders."); + } + + switch (program->shader_version.type) + { + case VKD3D_SHADER_TYPE_HULL: + case VKD3D_SHADER_TYPE_DOMAIN: + case VKD3D_SHADER_TYPE_GEOMETRY: + if (program->input_control_point_count == 0) + validator_error(&ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, + "Invalid zero input control point count."); + break; + + default: + if (program->input_control_point_count != 0) + validator_error(&ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, + "Invalid input control point count %u.", + program->input_control_point_count); + } + + switch (program->shader_version.type) + { + case VKD3D_SHADER_TYPE_HULL: + if (program->output_control_point_count == 0) + validator_error(&ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, + "Invalid zero output control point count."); + break; + + default: + if (program->output_control_point_count != 0) + validator_error(&ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, + "Invalid output control point count %u.", + program->output_control_point_count); + } + + vsir_validate_signature(&ctx, &program->input_signature, SIGNATURE_TYPE_INPUT); + vsir_validate_signature(&ctx, &program->output_signature, SIGNATURE_TYPE_OUTPUT); + vsir_validate_signature(&ctx, &program->patch_constant_signature, SIGNATURE_TYPE_PATCH_CONSTANT); + if (!(ctx.temps = vkd3d_calloc(ctx.program->temp_count, sizeof(*ctx.temps)))) goto fail; if (!(ctx.ssas = vkd3d_calloc(ctx.program->ssa_count, sizeof(*ctx.ssas)))) goto fail; - for (ctx.instruction_idx = 0; ctx.instruction_idx < program->instructions.count; ++ctx.instruction_idx) + ctx.invalid_instruction_idx = false; + + for (ctx.instruction_idx = 0; ctx.instruction_idx < program->instructions.count + && ctx.status != VKD3D_ERROR_OUT_OF_MEMORY; ++ctx.instruction_idx) vsir_validate_instruction(&ctx); ctx.invalid_instruction_idx = true; @@ -6610,74 +7778,75 @@ fail: return VKD3D_ERROR_OUT_OF_MEMORY; } -enum vkd3d_result vsir_program_normalise(struct vsir_program *program, uint64_t config_flags, - const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context) +#define vsir_transform(ctx, step) vsir_transform_(ctx, #step, step) +static void vsir_transform_( + struct vsir_transformation_context *ctx, const char *step_name, + enum vkd3d_result (*step)(struct vsir_program *program, struct vsir_transformation_context *ctx)) { - enum vkd3d_result result = VKD3D_OK; - - if ((result = vsir_program_lower_instructions(program, message_context)) < 0) - return result; + if (ctx->result < 0) + return; - if (program->shader_version.major >= 6) + if ((ctx->result = step(ctx->program, ctx)) < 0) { - if ((result = vsir_program_materialise_phi_ssas_to_temps(program)) < 0) - return result; + WARN("Transformation \"%s\" failed with result %d.\n", step_name, ctx->result); + return; + } - if ((result = lower_switch_to_if_ladder(program)) < 0) - return result; + if ((ctx->result = vsir_program_validate(ctx->program, ctx->config_flags, + ctx->compile_info->source_name, ctx->message_context)) < 0) + { + WARN("Validation failed with result %d after transformation \"%s\".\n", ctx->result, step_name); + return; + } +} - if ((result = vsir_program_structurize(program, message_context)) < 0) - return result; +enum vkd3d_result vsir_program_transform(struct vsir_program *program, uint64_t config_flags, + const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context) +{ + struct vsir_transformation_context ctx = + { + .result = VKD3D_OK, + .program = program, + .config_flags = config_flags, + .compile_info = compile_info, + .message_context = message_context, + }; - if ((result = vsir_program_flatten_control_flow_constructs(program, message_context)) < 0) - return result; + vsir_transform(&ctx, vsir_program_lower_instructions); - if ((result = vsir_program_materialize_undominated_ssas_to_temps(program, message_context)) < 0) - return result; + if (program->shader_version.major >= 6) + { + vsir_transform(&ctx, vsir_program_materialise_phi_ssas_to_temps); + vsir_transform(&ctx, vsir_program_lower_switch_to_selection_ladder); + vsir_transform(&ctx, vsir_program_structurize); + vsir_transform(&ctx, vsir_program_flatten_control_flow_constructs); + vsir_transform(&ctx, vsir_program_materialize_undominated_ssas_to_temps); } else { if (program->shader_version.type != VKD3D_SHADER_TYPE_PIXEL) - { - if ((result = vsir_program_remap_output_signature(program, compile_info, message_context)) < 0) - return result; - } + vsir_transform(&ctx, vsir_program_remap_output_signature); if (program->shader_version.type == VKD3D_SHADER_TYPE_HULL) - { - if ((result = instruction_array_flatten_hull_shader_phases(&program->instructions)) < 0) - return result; - - if ((result = instruction_array_normalise_hull_shader_control_point_io(&program->instructions, - &program->input_signature)) < 0) - return result; - } - - if ((result = vsir_program_normalise_io_registers(program, message_context)) < 0) - return result; - - if ((result = instruction_array_normalise_flat_constants(program)) < 0) - return result; - - remove_dead_code(program); + vsir_transform(&ctx, vsir_program_flatten_hull_shader_phases); - if ((result = vsir_program_normalise_combined_samplers(program, message_context)) < 0) - return result; + vsir_transform(&ctx, instruction_array_normalise_hull_shader_control_point_io); + vsir_transform(&ctx, vsir_program_normalise_io_registers); + vsir_transform(&ctx, vsir_program_normalise_flat_constants); + vsir_transform(&ctx, vsir_program_remove_dead_code); if (compile_info->target_type != VKD3D_SHADER_TARGET_GLSL - && (result = vsir_program_flatten_control_flow_constructs(program, message_context)) < 0) - return result; + && compile_info->target_type != VKD3D_SHADER_TARGET_MSL) + vsir_transform(&ctx, vsir_program_flatten_control_flow_constructs); } - if ((result = vsir_program_insert_alpha_test(program, message_context)) < 0) - return result; + vsir_transform(&ctx, vsir_program_insert_alpha_test); + vsir_transform(&ctx, vsir_program_insert_clip_planes); + vsir_transform(&ctx, vsir_program_insert_point_size); + vsir_transform(&ctx, vsir_program_insert_point_size_clamp); if (TRACE_ON()) - vkd3d_shader_trace(program); - - if ((result = vsir_program_validate(program, config_flags, - compile_info->source_name, message_context)) < 0) - return result; + vsir_program_trace(program); - return result; + return ctx.result; } diff --git a/libs/vkd3d/libs/vkd3d-shader/msl.c b/libs/vkd3d/libs/vkd3d-shader/msl.c new file mode 100644 index 00000000000..5baefbc1f44 --- /dev/null +++ b/libs/vkd3d/libs/vkd3d-shader/msl.c @@ -0,0 +1,881 @@ +/* + * Copyright 2024 Feifan He for CodeWeavers + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#include "vkd3d_shader_private.h" + +struct msl_src +{ + struct vkd3d_string_buffer *str; +}; + +struct msl_dst +{ + const struct vkd3d_shader_dst_param *vsir; + struct vkd3d_string_buffer *register_name; + struct vkd3d_string_buffer *mask; +}; + +struct msl_generator +{ + struct vsir_program *program; + struct vkd3d_string_buffer_cache string_buffers; + struct vkd3d_string_buffer *buffer; + struct vkd3d_shader_location location; + struct vkd3d_shader_message_context *message_context; + unsigned int indent; + const char *prefix; + const struct vkd3d_shader_interface_info *interface_info; + const struct vkd3d_shader_scan_descriptor_info1 *descriptor_info; +}; + +static void VKD3D_PRINTF_FUNC(3, 4) msl_compiler_error(struct msl_generator *gen, + enum vkd3d_shader_error error, const char *fmt, ...) +{ + va_list args; + + va_start(args, fmt); + vkd3d_shader_verror(gen->message_context, &gen->location, error, fmt, args); + va_end(args); +} + +static const char *msl_get_prefix(enum vkd3d_shader_type type) +{ + switch (type) + { + case VKD3D_SHADER_TYPE_VERTEX: + return "vs"; + case VKD3D_SHADER_TYPE_HULL: + return "hs"; + case VKD3D_SHADER_TYPE_DOMAIN: + return "ds"; + case VKD3D_SHADER_TYPE_GEOMETRY: + return "gs"; + case VKD3D_SHADER_TYPE_PIXEL: + return "ps"; + case VKD3D_SHADER_TYPE_COMPUTE: + return "cs"; + default: + return NULL; + } +} + +static void msl_print_indent(struct vkd3d_string_buffer *buffer, unsigned int indent) +{ + vkd3d_string_buffer_printf(buffer, "%*s", 4 * indent, ""); +} + +static void msl_print_register_datatype(struct vkd3d_string_buffer *buffer, + struct msl_generator *gen, enum vkd3d_data_type data_type) +{ + vkd3d_string_buffer_printf(buffer, "."); + switch (data_type) + { + case VKD3D_DATA_FLOAT: + vkd3d_string_buffer_printf(buffer, "f"); + break; + case VKD3D_DATA_INT: + vkd3d_string_buffer_printf(buffer, "i"); + break; + case VKD3D_DATA_UINT: + vkd3d_string_buffer_printf(buffer, "u"); + break; + default: + msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, + "Internal compiler error: Unhandled register datatype %#x.", data_type); + vkd3d_string_buffer_printf(buffer, "", data_type); + break; + } +} + +static void msl_print_register_name(struct vkd3d_string_buffer *buffer, + struct msl_generator *gen, const struct vkd3d_shader_register *reg) +{ + switch (reg->type) + { + case VKD3DSPR_TEMP: + vkd3d_string_buffer_printf(buffer, "r[%u]", reg->idx[0].offset); + msl_print_register_datatype(buffer, gen, reg->data_type); + break; + + case VKD3DSPR_INPUT: + if (reg->idx_count != 1) + { + msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, + "Internal compiler error: Unhandled input register index count %u.", reg->idx_count); + vkd3d_string_buffer_printf(buffer, "", reg->type); + break; + } + if (reg->idx[0].rel_addr) + { + msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, + "Internal compiler error: Unhandled input register indirect addressing."); + vkd3d_string_buffer_printf(buffer, "", reg->type); + break; + } + vkd3d_string_buffer_printf(buffer, "v[%u]", reg->idx[0].offset); + msl_print_register_datatype(buffer, gen, reg->data_type); + break; + + case VKD3DSPR_OUTPUT: + if (reg->idx_count != 1) + { + msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, + "Internal compiler error: Unhandled output register index count %u.", reg->idx_count); + vkd3d_string_buffer_printf(buffer, "", reg->type); + break; + } + if (reg->idx[0].rel_addr) + { + msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, + "Internal compiler error: Unhandled output register indirect addressing."); + vkd3d_string_buffer_printf(buffer, "", reg->type); + break; + } + vkd3d_string_buffer_printf(buffer, "o[%u]", reg->idx[0].offset); + msl_print_register_datatype(buffer, gen, reg->data_type); + break; + + case VKD3DSPR_CONSTBUFFER: + if (reg->idx_count != 3) + { + msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, + "Internal compiler error: Unhandled constant buffer register index count %u.", reg->idx_count); + vkd3d_string_buffer_printf(buffer, "", reg->type); + break; + } + if (reg->idx[0].rel_addr || reg->idx[2].rel_addr) + { + msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, + "Internal compiler error: Unhandled constant buffer register indirect addressing."); + vkd3d_string_buffer_printf(buffer, "", reg->type); + break; + } + vkd3d_string_buffer_printf(buffer, "descriptors.cb_%u[%u]", reg->idx[0].offset, reg->idx[2].offset); + msl_print_register_datatype(buffer, gen, reg->data_type); + break; + + default: + msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, + "Internal compiler error: Unhandled register type %#x.", reg->type); + vkd3d_string_buffer_printf(buffer, "", reg->type); + break; + } +} + +static void msl_print_swizzle(struct vkd3d_string_buffer *buffer, uint32_t swizzle, uint32_t mask) +{ + const char swizzle_chars[] = "xyzw"; + unsigned int i; + + vkd3d_string_buffer_printf(buffer, "."); + for (i = 0; i < VKD3D_VEC4_SIZE; ++i) + { + if (mask & (VKD3DSP_WRITEMASK_0 << i)) + vkd3d_string_buffer_printf(buffer, "%c", swizzle_chars[vsir_swizzle_get_component(swizzle, i)]); + } +} + +static void msl_print_write_mask(struct vkd3d_string_buffer *buffer, uint32_t write_mask) +{ + vkd3d_string_buffer_printf(buffer, "."); + if (write_mask & VKD3DSP_WRITEMASK_0) + vkd3d_string_buffer_printf(buffer, "x"); + if (write_mask & VKD3DSP_WRITEMASK_1) + vkd3d_string_buffer_printf(buffer, "y"); + if (write_mask & VKD3DSP_WRITEMASK_2) + vkd3d_string_buffer_printf(buffer, "z"); + if (write_mask & VKD3DSP_WRITEMASK_3) + vkd3d_string_buffer_printf(buffer, "w"); +} + +static void msl_src_cleanup(struct msl_src *src, struct vkd3d_string_buffer_cache *cache) +{ + vkd3d_string_buffer_release(cache, src->str); +} + +static void msl_src_init(struct msl_src *msl_src, struct msl_generator *gen, + const struct vkd3d_shader_src_param *vsir_src, uint32_t mask) +{ + const struct vkd3d_shader_register *reg = &vsir_src->reg; + + msl_src->str = vkd3d_string_buffer_get(&gen->string_buffers); + + if (reg->non_uniform) + msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, + "Internal compiler error: Unhandled 'non-uniform' modifier."); + if (vsir_src->modifiers) + msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, + "Internal compiler error: Unhandled source modifier(s) %#x.", vsir_src->modifiers); + + msl_print_register_name(msl_src->str, gen, reg); + if (reg->dimension == VSIR_DIMENSION_VEC4) + msl_print_swizzle(msl_src->str, vsir_src->swizzle, mask); +} + +static void msl_dst_cleanup(struct msl_dst *dst, struct vkd3d_string_buffer_cache *cache) +{ + vkd3d_string_buffer_release(cache, dst->mask); + vkd3d_string_buffer_release(cache, dst->register_name); +} + +static uint32_t msl_dst_init(struct msl_dst *msl_dst, struct msl_generator *gen, + const struct vkd3d_shader_instruction *ins, const struct vkd3d_shader_dst_param *vsir_dst) +{ + uint32_t write_mask = vsir_dst->write_mask; + + if (ins->flags & VKD3DSI_PRECISE_XYZW) + msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, + "Internal compiler error: Unhandled 'precise' modifier."); + if (vsir_dst->reg.non_uniform) + msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, + "Internal compiler error: Unhandled 'non-uniform' modifier."); + + msl_dst->vsir = vsir_dst; + msl_dst->register_name = vkd3d_string_buffer_get(&gen->string_buffers); + msl_dst->mask = vkd3d_string_buffer_get(&gen->string_buffers); + + msl_print_register_name(msl_dst->register_name, gen, &vsir_dst->reg); + msl_print_write_mask(msl_dst->mask, write_mask); + + return write_mask; +} + +static void VKD3D_PRINTF_FUNC(3, 4) msl_print_assignment( + struct msl_generator *gen, struct msl_dst *dst, const char *format, ...) +{ + va_list args; + + if (dst->vsir->shift) + msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, + "Internal compiler error: Unhandled destination shift %#x.", dst->vsir->shift); + if (dst->vsir->modifiers) + msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, + "Internal compiler error: Unhandled destination modifier(s) %#x.", dst->vsir->modifiers); + + msl_print_indent(gen->buffer, gen->indent); + vkd3d_string_buffer_printf(gen->buffer, "%s%s = ", dst->register_name->buffer, dst->mask->buffer); + + va_start(args, format); + vkd3d_string_buffer_vprintf(gen->buffer, format, args); + va_end(args); + + vkd3d_string_buffer_printf(gen->buffer, ";\n"); +} + +static void msl_unhandled(struct msl_generator *gen, const struct vkd3d_shader_instruction *ins) +{ + msl_print_indent(gen->buffer, gen->indent); + vkd3d_string_buffer_printf(gen->buffer, "/* */\n", ins->opcode); + msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, + "Internal compiler error: Unhandled instruction %#x.", ins->opcode); +} + +static void msl_mov(struct msl_generator *gen, const struct vkd3d_shader_instruction *ins) +{ + struct msl_src src; + struct msl_dst dst; + uint32_t mask; + + mask = msl_dst_init(&dst, gen, ins, &ins->dst[0]); + msl_src_init(&src, gen, &ins->src[0], mask); + + msl_print_assignment(gen, &dst, "%s", src.str->buffer); + + msl_src_cleanup(&src, &gen->string_buffers); + msl_dst_cleanup(&dst, &gen->string_buffers); +} + +static void msl_ret(struct msl_generator *gen, const struct vkd3d_shader_instruction *ins) +{ + msl_print_indent(gen->buffer, gen->indent); + vkd3d_string_buffer_printf(gen->buffer, "return;\n"); +} + +static void msl_handle_instruction(struct msl_generator *gen, const struct vkd3d_shader_instruction *ins) +{ + gen->location = ins->location; + + switch (ins->opcode) + { + case VKD3DSIH_NOP: + break; + case VKD3DSIH_MOV: + msl_mov(gen, ins); + break; + case VKD3DSIH_RET: + msl_ret(gen, ins); + break; + default: + msl_unhandled(gen, ins); + break; + } +} + +static bool msl_check_shader_visibility(const struct msl_generator *gen, + enum vkd3d_shader_visibility visibility) +{ + enum vkd3d_shader_type t = gen->program->shader_version.type; + + switch (visibility) + { + case VKD3D_SHADER_VISIBILITY_ALL: + return true; + case VKD3D_SHADER_VISIBILITY_VERTEX: + return t == VKD3D_SHADER_TYPE_VERTEX; + case VKD3D_SHADER_VISIBILITY_HULL: + return t == VKD3D_SHADER_TYPE_HULL; + case VKD3D_SHADER_VISIBILITY_DOMAIN: + return t == VKD3D_SHADER_TYPE_DOMAIN; + case VKD3D_SHADER_VISIBILITY_GEOMETRY: + return t == VKD3D_SHADER_TYPE_GEOMETRY; + case VKD3D_SHADER_VISIBILITY_PIXEL: + return t == VKD3D_SHADER_TYPE_PIXEL; + case VKD3D_SHADER_VISIBILITY_COMPUTE: + return t == VKD3D_SHADER_TYPE_COMPUTE; + default: + WARN("Invalid shader visibility %#x.\n", visibility); + return false; + } +} + +static bool msl_get_cbv_binding(const struct msl_generator *gen, + unsigned int register_space, unsigned int register_idx, unsigned int *binding_idx) +{ + const struct vkd3d_shader_interface_info *interface_info = gen->interface_info; + const struct vkd3d_shader_resource_binding *binding; + unsigned int i; + + if (!interface_info) + return false; + + for (i = 0; i < interface_info->binding_count; ++i) + { + binding = &interface_info->bindings[i]; + + if (binding->type != VKD3D_SHADER_DESCRIPTOR_TYPE_CBV) + continue; + if (binding->register_space != register_space) + continue; + if (binding->register_index != register_idx) + continue; + if (!msl_check_shader_visibility(gen, binding->shader_visibility)) + continue; + if (!(binding->flags & VKD3D_SHADER_BINDING_FLAG_BUFFER)) + continue; + *binding_idx = i; + return true; + } + + return false; +} + +static void msl_generate_cbv_declaration(struct msl_generator *gen, + const struct vkd3d_shader_descriptor_info1 *cbv) +{ + const struct vkd3d_shader_descriptor_binding *binding; + struct vkd3d_string_buffer *buffer = gen->buffer; + unsigned int binding_idx; + size_t size; + + if (cbv->count != 1) + { + msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_BINDING_NOT_FOUND, + "Constant buffer %u has unsupported descriptor array size %u.", cbv->register_id, cbv->count); + return; + } + + if (!msl_get_cbv_binding(gen, cbv->register_space, cbv->register_index, &binding_idx)) + { + msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_BINDING_NOT_FOUND, + "No descriptor binding specified for constant buffer %u.", cbv->register_id); + return; + } + + binding = &gen->interface_info->bindings[binding_idx].binding; + + if (binding->set != 0) + { + msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_BINDING_NOT_FOUND, + "Unsupported binding set %u specified for constant buffer %u.", binding->set, cbv->register_id); + return; + } + + if (binding->count != 1) + { + msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_BINDING_NOT_FOUND, + "Unsupported binding count %u specified for constant buffer %u.", binding->count, cbv->register_id); + return; + } + + size = align(cbv->buffer_size, VKD3D_VEC4_SIZE * sizeof(uint32_t)); + size /= VKD3D_VEC4_SIZE * sizeof(uint32_t); + + vkd3d_string_buffer_printf(buffer, + "constant vkd3d_vec4 (&cb_%u)[%zu] [[id(%u)]];", cbv->register_id, size, binding->binding); +}; + +static void msl_generate_descriptor_struct_declarations(struct msl_generator *gen) +{ + const struct vkd3d_shader_scan_descriptor_info1 *info = gen->descriptor_info; + const struct vkd3d_shader_descriptor_info1 *descriptor; + struct vkd3d_string_buffer *buffer = gen->buffer; + unsigned int i; + + if (!info->descriptor_count) + return; + + vkd3d_string_buffer_printf(buffer, "struct vkd3d_%s_descriptors\n{\n", gen->prefix); + + for (i = 0; i < info->descriptor_count; ++i) + { + descriptor = &info->descriptors[i]; + + msl_print_indent(buffer, 1); + switch (descriptor->type) + { + case VKD3D_SHADER_DESCRIPTOR_TYPE_CBV: + msl_generate_cbv_declaration(gen, descriptor); + break; + + default: + vkd3d_string_buffer_printf(buffer, "/* */", descriptor->type); + msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, + "Internal compiler error: Unhandled descriptor type %#x.", descriptor->type); + break; + } + vkd3d_string_buffer_printf(buffer, "\n"); + } + + vkd3d_string_buffer_printf(buffer, "};\n\n"); +} + +static void msl_generate_input_struct_declarations(struct msl_generator *gen) +{ + const struct shader_signature *signature = &gen->program->input_signature; + enum vkd3d_shader_type type = gen->program->shader_version.type; + struct vkd3d_string_buffer *buffer = gen->buffer; + const struct signature_element *e; + unsigned int i; + + vkd3d_string_buffer_printf(buffer, "struct vkd3d_%s_in\n{\n", gen->prefix); + + for (i = 0; i < signature->element_count; ++i) + { + e = &signature->elements[i]; + + if (e->target_location == SIGNATURE_TARGET_LOCATION_UNUSED) + continue; + + if (e->sysval_semantic) + { + msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, + "Internal compiler error: Unhandled system value %#x.", e->sysval_semantic); + continue; + } + + if (e->min_precision != VKD3D_SHADER_MINIMUM_PRECISION_NONE) + { + msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, + "Internal compiler error: Unhandled minimum precision %#x.", e->min_precision); + continue; + } + + if (e->interpolation_mode != VKD3DSIM_NONE) + { + msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, + "Internal compiler error: Unhandled interpolation mode %#x.", e->interpolation_mode); + continue; + } + + if(e->register_count > 1) + { + msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, + "Internal compiler error: Unhandled register count %u.", e->register_count); + continue; + } + + msl_print_indent(gen->buffer, 1); + + switch(e->component_type) + { + case VKD3D_SHADER_COMPONENT_FLOAT: + vkd3d_string_buffer_printf(buffer, "float4 "); + break; + case VKD3D_SHADER_COMPONENT_INT: + vkd3d_string_buffer_printf(buffer, "int4 "); + break; + case VKD3D_SHADER_COMPONENT_UINT: + vkd3d_string_buffer_printf(buffer, "uint4 "); + break; + default: + vkd3d_string_buffer_printf(buffer, " ", e->component_type); + msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, + "Internal compiler error: Unhandled component type %#x.", e->component_type); + break; + } + + vkd3d_string_buffer_printf(buffer, "shader_in_%u ", i); + + switch (type) + { + case VKD3D_SHADER_TYPE_VERTEX: + vkd3d_string_buffer_printf(gen->buffer, "[[attribute(%u)]]", e->target_location); + break; + case VKD3D_SHADER_TYPE_PIXEL: + vkd3d_string_buffer_printf(gen->buffer, "[[user(locn%u)]]", e->target_location); + break; + default: + msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, + "Internal compiler error: Unhandled shader type %#x.", type); + break; + } + + vkd3d_string_buffer_printf(buffer, ";\n"); + } + + vkd3d_string_buffer_printf(buffer, "};\n\n"); +} + +static void msl_generate_vertex_output_element_attribute(struct msl_generator *gen, const struct signature_element *e) +{ + switch (e->sysval_semantic) + { + case VKD3D_SHADER_SV_POSITION: + vkd3d_string_buffer_printf(gen->buffer, "[[position]]"); + break; + case VKD3D_SHADER_SV_NONE: + vkd3d_string_buffer_printf(gen->buffer, "[[user(locn%u)]]", e->target_location); + break; + default: + msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, + "Internal compiler error: Unhandled vertex shader system value %#x.", e->sysval_semantic); + break; + } +} + +static void msl_generate_pixel_output_element_attribute(struct msl_generator *gen, const struct signature_element *e) +{ + switch (e->sysval_semantic) + { + case VKD3D_SHADER_SV_TARGET: + vkd3d_string_buffer_printf(gen->buffer, "[[color(%u)]]", e->target_location); + break; + default: + msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, + "Internal compiler error: Unhandled pixel shader system value %#x.", e->sysval_semantic); + break; + } +} + +static void msl_generate_output_struct_declarations(struct msl_generator *gen) +{ + const struct shader_signature *signature = &gen->program->output_signature; + enum vkd3d_shader_type type = gen->program->shader_version.type; + struct vkd3d_string_buffer *buffer = gen->buffer; + const struct signature_element *e; + unsigned int i; + + vkd3d_string_buffer_printf(buffer, "struct vkd3d_%s_out\n{\n", gen->prefix); + + for (i = 0; i < signature->element_count; ++i) + { + e = &signature->elements[i]; + + if (e->target_location == SIGNATURE_TARGET_LOCATION_UNUSED) + continue; + + if (e->min_precision != VKD3D_SHADER_MINIMUM_PRECISION_NONE) + { + msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, + "Internal compiler error: Unhandled minimum precision %#x.", e->min_precision); + continue; + } + + if (e->interpolation_mode != VKD3DSIM_NONE) + { + msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, + "Internal compiler error: Unhandled interpolation mode %#x.", e->interpolation_mode); + continue; + } + + if(e->register_count > 1) + { + msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, + "Internal compiler error: Unhandled register count %u.", e->register_count); + continue; + } + + msl_print_indent(gen->buffer, 1); + + switch(e->component_type) + { + case VKD3D_SHADER_COMPONENT_FLOAT: + vkd3d_string_buffer_printf(buffer, "float4 "); + break; + case VKD3D_SHADER_COMPONENT_INT: + vkd3d_string_buffer_printf(buffer, "int4 "); + break; + case VKD3D_SHADER_COMPONENT_UINT: + vkd3d_string_buffer_printf(buffer, "uint4 "); + break; + default: + vkd3d_string_buffer_printf(buffer, " ", e->component_type); + msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, + "Internal compiler error: Unhandled component type %#x.", e->component_type); + break; + } + + vkd3d_string_buffer_printf(buffer, "shader_out_%u ", i); + + switch (type) + { + case VKD3D_SHADER_TYPE_VERTEX: + msl_generate_vertex_output_element_attribute(gen, e); + break; + case VKD3D_SHADER_TYPE_PIXEL: + msl_generate_pixel_output_element_attribute(gen, e); + break; + default: + msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, + "Internal compiler error: Unhandled shader type %#x.", type); + break; + } + + vkd3d_string_buffer_printf(buffer, ";\n"); + } + + vkd3d_string_buffer_printf(buffer, "};\n\n"); +} + +static void msl_generate_entrypoint_prologue(struct msl_generator *gen) +{ + const struct shader_signature *signature = &gen->program->input_signature; + struct vkd3d_string_buffer *buffer = gen->buffer; + const struct signature_element *e; + unsigned int i; + + for (i = 0; i < signature->element_count; ++i) + { + e = &signature->elements[i]; + + if (e->target_location == SIGNATURE_TARGET_LOCATION_UNUSED) + continue; + + vkd3d_string_buffer_printf(buffer, " %s_in[%u]", gen->prefix, e->register_index); + if (e->sysval_semantic == VKD3D_SHADER_SV_NONE) + { + msl_print_register_datatype(buffer, gen, vkd3d_data_type_from_component_type(e->component_type)); + msl_print_write_mask(buffer, e->mask); + vkd3d_string_buffer_printf(buffer, " = input.shader_in_%u", i); + msl_print_write_mask(buffer, e->mask); + } + else + { + vkd3d_string_buffer_printf(buffer, " = ", e->sysval_semantic); + msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, + "Internal compiler error: Unhandled system value %#x input.", e->sysval_semantic); + } + vkd3d_string_buffer_printf(buffer, ";\n"); + } +} + +static void msl_generate_entrypoint_epilogue(struct msl_generator *gen) +{ + const struct shader_signature *signature = &gen->program->output_signature; + struct vkd3d_string_buffer *buffer = gen->buffer; + const struct signature_element *e; + unsigned int i; + + for (i = 0; i < signature->element_count; ++i) + { + e = &signature->elements[i]; + + if (e->target_location == SIGNATURE_TARGET_LOCATION_UNUSED) + continue; + + switch (e->sysval_semantic) + { + case VKD3D_SHADER_SV_NONE: + case VKD3D_SHADER_SV_TARGET: + case VKD3D_SHADER_SV_POSITION: + vkd3d_string_buffer_printf(buffer, " output.shader_out_%u", i); + msl_print_write_mask(buffer, e->mask); + vkd3d_string_buffer_printf(buffer, " = %s_out", gen->prefix); + msl_print_register_datatype(buffer, gen, vkd3d_data_type_from_component_type(e->component_type)); + msl_print_write_mask(buffer, e->mask); + break; + default: + vkd3d_string_buffer_printf(buffer, " ", e->sysval_semantic); + msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, + "Internal compiler error: Unhandled system value %#x input.", e->sysval_semantic); + } + vkd3d_string_buffer_printf(buffer, ";\n"); + } +} + +static void msl_generate_entrypoint(struct msl_generator *gen) +{ + enum vkd3d_shader_type type = gen->program->shader_version.type; + + switch (type) + { + case VKD3D_SHADER_TYPE_VERTEX: + vkd3d_string_buffer_printf(gen->buffer, "vertex "); + break; + case VKD3D_SHADER_TYPE_PIXEL: + vkd3d_string_buffer_printf(gen->buffer, "fragment "); + break; + default: + msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, + "Internal compiler error: Unhandled shader type %#x.", type); + return; + } + + vkd3d_string_buffer_printf(gen->buffer, "vkd3d_%s_out shader_entry(\n", gen->prefix); + + if (gen->descriptor_info->descriptor_count) + { + msl_print_indent(gen->buffer, 2); + /* TODO: Configurable argument buffer binding location. */ + vkd3d_string_buffer_printf(gen->buffer, + "constant vkd3d_%s_descriptors& descriptors [[buffer(0)]],\n", gen->prefix); + } + + msl_print_indent(gen->buffer, 2); + vkd3d_string_buffer_printf(gen->buffer, "vkd3d_%s_in input [[stage_in]])\n{\n", gen->prefix); + + /* TODO: declare #maximum_register + 1 */ + vkd3d_string_buffer_printf(gen->buffer, " vkd3d_vec4 %s_in[%u];\n", gen->prefix, 32); + vkd3d_string_buffer_printf(gen->buffer, " vkd3d_vec4 %s_out[%u];\n", gen->prefix, 32); + vkd3d_string_buffer_printf(gen->buffer, " vkd3d_%s_out output;\n", gen->prefix); + + msl_generate_entrypoint_prologue(gen); + + vkd3d_string_buffer_printf(gen->buffer, " %s_main(%s_in, %s_out", gen->prefix, gen->prefix, gen->prefix); + if (gen->descriptor_info->descriptor_count) + vkd3d_string_buffer_printf(gen->buffer, ", descriptors"); + vkd3d_string_buffer_printf(gen->buffer, ");\n"); + + msl_generate_entrypoint_epilogue(gen); + + vkd3d_string_buffer_printf(gen->buffer, " return output;\n}\n"); +} + +static void msl_generator_generate(struct msl_generator *gen) +{ + const struct vkd3d_shader_instruction_array *instructions = &gen->program->instructions; + unsigned int i; + + MESSAGE("Generating a MSL shader. This is unsupported; you get to keep all the pieces if it breaks.\n"); + + vkd3d_string_buffer_printf(gen->buffer, "/* Generated by %s. */\n\n", vkd3d_shader_get_version(NULL, NULL)); + + vkd3d_string_buffer_printf(gen->buffer, "union vkd3d_vec4\n{\n"); + vkd3d_string_buffer_printf(gen->buffer, " uint4 u;\n"); + vkd3d_string_buffer_printf(gen->buffer, " int4 i;\n"); + vkd3d_string_buffer_printf(gen->buffer, " float4 f;\n};\n\n"); + + msl_generate_descriptor_struct_declarations(gen); + msl_generate_input_struct_declarations(gen); + msl_generate_output_struct_declarations(gen); + + vkd3d_string_buffer_printf(gen->buffer, + "void %s_main(thread vkd3d_vec4 *v, " + "thread vkd3d_vec4 *o", + gen->prefix); + if (gen->descriptor_info->descriptor_count) + vkd3d_string_buffer_printf(gen->buffer, ", constant vkd3d_%s_descriptors& descriptors", gen->prefix); + vkd3d_string_buffer_printf(gen->buffer, ")\n{\n"); + + ++gen->indent; + + if (gen->program->temp_count) + { + msl_print_indent(gen->buffer, gen->indent); + vkd3d_string_buffer_printf(gen->buffer, "vkd3d_vec4 r[%u];\n\n", gen->program->temp_count); + } + + for (i = 0; i < instructions->count; ++i) + { + msl_handle_instruction(gen, &instructions->elements[i]); + } + + --gen->indent; + + vkd3d_string_buffer_printf(gen->buffer, "}\n\n"); + + msl_generate_entrypoint(gen); + + if (TRACE_ON()) + vkd3d_string_buffer_trace(gen->buffer); +} + +static void msl_generator_cleanup(struct msl_generator *gen) +{ + vkd3d_string_buffer_release(&gen->string_buffers, gen->buffer); + vkd3d_string_buffer_cache_cleanup(&gen->string_buffers); +} + +static int msl_generator_init(struct msl_generator *gen, struct vsir_program *program, + const struct vkd3d_shader_compile_info *compile_info, + const struct vkd3d_shader_scan_descriptor_info1 *descriptor_info, + struct vkd3d_shader_message_context *message_context) +{ + enum vkd3d_shader_type type = program->shader_version.type; + + memset(gen, 0, sizeof(*gen)); + gen->program = program; + vkd3d_string_buffer_cache_init(&gen->string_buffers); + if (!(gen->buffer = vkd3d_string_buffer_get(&gen->string_buffers))) + { + vkd3d_string_buffer_cache_cleanup(&gen->string_buffers); + return VKD3D_ERROR_OUT_OF_MEMORY; + } + gen->message_context = message_context; + if (!(gen->prefix = msl_get_prefix(type))) + { + msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, + "Internal compiler error: Unhandled shader type %#x.", type); + return VKD3D_ERROR_INVALID_SHADER; + } + gen->interface_info = vkd3d_find_struct(compile_info->next, INTERFACE_INFO); + gen->descriptor_info = descriptor_info; + + return VKD3D_OK; +} + +int msl_compile(struct vsir_program *program, uint64_t config_flags, + const struct vkd3d_shader_scan_descriptor_info1 *descriptor_info, + const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context) +{ + struct msl_generator generator; + int ret; + + if ((ret = vsir_program_transform(program, config_flags, compile_info, message_context)) < 0) + return ret; + + VKD3D_ASSERT(program->normalised_io); + VKD3D_ASSERT(program->normalised_hull_cp_io); + + if ((ret = msl_generator_init(&generator, program, compile_info, descriptor_info, message_context)) < 0) + return ret; + msl_generator_generate(&generator); + msl_generator_cleanup(&generator); + + return VKD3D_ERROR_INVALID_SHADER; +} diff --git a/libs/vkd3d/libs/vkd3d-shader/preproc.h b/libs/vkd3d/libs/vkd3d-shader/preproc.h index 9806614a35b..a98c8ae3df5 100644 --- a/libs/vkd3d/libs/vkd3d-shader/preproc.h +++ b/libs/vkd3d/libs/vkd3d-shader/preproc.h @@ -60,6 +60,7 @@ struct preproc_expansion { struct preproc_buffer buffer; const struct preproc_text *text; + struct preproc_text *arg_values; /* Back-pointer to the macro, if this expansion a macro body. This is * necessary so that argument tokens can be correctly replaced. */ struct preproc_macro *macro; @@ -72,7 +73,6 @@ struct preproc_macro char **arg_names; size_t arg_count; - struct preproc_text *arg_values; struct preproc_text body; }; @@ -117,6 +117,7 @@ struct preproc_ctx STATE_ARGS, } state; unsigned int paren_depth; + struct preproc_text *arg_values; } text_func, directive_func; int current_directive; diff --git a/libs/vkd3d/libs/vkd3d-shader/preproc.l b/libs/vkd3d/libs/vkd3d-shader/preproc.l index 2b7455a5c30..d167415c356 100644 --- a/libs/vkd3d/libs/vkd3d-shader/preproc.l +++ b/libs/vkd3d/libs/vkd3d-shader/preproc.l @@ -20,6 +20,7 @@ %{ +#include "preproc.h" #include "preproc.tab.h" #undef ERROR /* defined in wingdi.h */ @@ -29,11 +30,11 @@ #define YY_DECL static int preproc_lexer_lex(YYSTYPE *yylval_param, YYLTYPE *yylloc_param, yyscan_t yyscanner) -static struct preproc_macro *preproc_get_top_macro(struct preproc_ctx *ctx) +static struct preproc_expansion *preproc_get_top_expansion(struct preproc_ctx *ctx) { if (!ctx->expansion_count) return NULL; - return ctx->expansion_stack[ctx->expansion_count - 1].macro; + return &ctx->expansion_stack[ctx->expansion_count - 1]; } static void update_location(struct preproc_ctx *ctx); @@ -66,7 +67,7 @@ static void update_location(struct preproc_ctx *ctx); NEWLINE \r?\n WS [ \t\r] -IDENTIFIER [A-Za-z_][A-Za-z0-9_]* +IDENTIFIER (::)?[A-Za-z_]((::)?[A-Za-z0-9_]+)* INT_SUFFIX [uUlL]{0,2} %% @@ -132,14 +133,14 @@ INT_SUFFIX [uUlL]{0,2} if (!ctx->last_was_newline) { - struct preproc_macro *macro; + struct preproc_expansion *exp; /* Stringification is only done for function-like macro bodies. * Anywhere else, we need to parse it as two separate tokens. * We could use a state for this, but yyless() is easier and cheap. */ - if ((macro = preproc_get_top_macro(ctx)) && macro->arg_count) + if ((exp = preproc_get_top_expansion(ctx)) && exp->macro && exp->macro->arg_count) return T_HASHSTRING; yyless(1); @@ -258,6 +259,12 @@ static void preproc_pop_buffer(struct preproc_ctx *ctx) yy_delete_buffer(exp->buffer.lexer_buffer, ctx->scanner); + if (exp->macro) + { + for (unsigned int i = 0; i < exp->macro->arg_count; ++i) + vkd3d_string_buffer_cleanup(&exp->arg_values[i].text); + free(exp->arg_values); + } --ctx->expansion_count; TRACE("Expansion stack size is now %zu.\n", ctx->expansion_count); } @@ -310,15 +317,15 @@ static int return_token(int token, YYSTYPE *lval, const char *text) static const struct preproc_text *find_arg_expansion(struct preproc_ctx *ctx, const char *s) { - struct preproc_macro *macro; + struct preproc_expansion *exp; unsigned int i; - if ((macro = preproc_get_top_macro(ctx))) + if ((exp = preproc_get_top_expansion(ctx)) && exp->macro) { - for (i = 0; i < macro->arg_count; ++i) + for (i = 0; i < exp->macro->arg_count; ++i) { - if (!strcmp(s, macro->arg_names[i])) - return ¯o->arg_values[i]; + if (!strcmp(s, exp->macro->arg_names[i])) + return &exp->arg_values[i]; } } return NULL; @@ -330,7 +337,7 @@ static void preproc_text_add(struct preproc_text *text, const char *string) } static bool preproc_push_expansion(struct preproc_ctx *ctx, - const struct preproc_text *text, struct preproc_macro *macro) + const struct preproc_text *text, struct preproc_macro *macro, struct preproc_text *arg_values) { struct preproc_expansion *exp; @@ -342,6 +349,7 @@ static bool preproc_push_expansion(struct preproc_ctx *ctx, exp->buffer.lexer_buffer = yy_scan_bytes(text->text.buffer, text->text.content_size, ctx->scanner); exp->buffer.location = text->location; exp->macro = macro; + exp->arg_values = arg_values; TRACE("Expansion stack size is now %zu.\n", ctx->expansion_count); return true; } @@ -542,7 +550,7 @@ int yylex(YYSTYPE *lval, YYLTYPE *lloc, yyscan_t scanner) if ((expansion = find_arg_expansion(ctx, text))) { - preproc_push_expansion(ctx, expansion, NULL); + preproc_push_expansion(ctx, expansion, NULL, NULL); continue; } @@ -550,7 +558,7 @@ int yylex(YYSTYPE *lval, YYLTYPE *lloc, yyscan_t scanner) { if (!macro->arg_count) { - preproc_push_expansion(ctx, ¯o->body, macro); + preproc_push_expansion(ctx, ¯o->body, macro, NULL); } else { @@ -616,16 +624,19 @@ int yylex(YYSTYPE *lval, YYLTYPE *lloc, yyscan_t scanner) case STATE_IDENTIFIER: if (token == '(') { - struct preproc_text *first_arg = &func_state->macro->arg_values[0]; - unsigned int i; + struct preproc_text *arg_values; + + if (!(arg_values = calloc(func_state->macro->arg_count, sizeof(*arg_values)))) + return 0; + + for (unsigned int i = 0; i < func_state->macro->arg_count; ++i) + vkd3d_string_buffer_init(&arg_values[i].text); + arg_values[0].location = *lloc; func_state->arg_count = 0; func_state->paren_depth = 1; func_state->state = STATE_ARGS; - for (i = 0; i < func_state->macro->arg_count; ++i) - func_state->macro->arg_values[i].text.content_size = 0; - - first_arg->location = *lloc; + func_state->arg_values = arg_values; } else { @@ -649,7 +660,7 @@ int yylex(YYSTYPE *lval, YYLTYPE *lloc, yyscan_t scanner) VKD3D_ASSERT(func_state->macro->arg_count); if (func_state->arg_count < func_state->macro->arg_count) - current_arg = &func_state->macro->arg_values[func_state->arg_count]; + current_arg = &func_state->arg_values[func_state->arg_count]; switch (token) { @@ -664,7 +675,7 @@ int yylex(YYSTYPE *lval, YYLTYPE *lloc, yyscan_t scanner) if ((expansion = find_arg_expansion(ctx, text))) { - preproc_push_expansion(ctx, expansion, NULL); + preproc_push_expansion(ctx, expansion, NULL, NULL); continue; } @@ -700,7 +711,8 @@ int yylex(YYSTYPE *lval, YYLTYPE *lloc, yyscan_t scanner) { if (++func_state->arg_count == func_state->macro->arg_count) { - preproc_push_expansion(ctx, &func_state->macro->body, func_state->macro); + preproc_push_expansion(ctx, &func_state->macro->body, + func_state->macro, func_state->arg_values); } else { diff --git a/libs/vkd3d/libs/vkd3d-shader/preproc.y b/libs/vkd3d/libs/vkd3d-shader/preproc.y index 366e351e3b5..c6be17bd230 100644 --- a/libs/vkd3d/libs/vkd3d-shader/preproc.y +++ b/libs/vkd3d/libs/vkd3d-shader/preproc.y @@ -91,7 +91,6 @@ bool preproc_add_macro(struct preproc_ctx *ctx, const struct vkd3d_shader_locati size_t arg_count, const struct vkd3d_shader_location *body_loc, struct vkd3d_string_buffer *body) { struct preproc_macro *macro; - unsigned int i; int ret; if ((macro = preproc_find_macro(ctx, name))) @@ -108,14 +107,6 @@ bool preproc_add_macro(struct preproc_ctx *ctx, const struct vkd3d_shader_locati macro->name = name; macro->arg_names = arg_names; macro->arg_count = arg_count; - macro->arg_values = NULL; - if (arg_count && !(macro->arg_values = vkd3d_calloc(arg_count, sizeof(*macro->arg_values)))) - { - vkd3d_free(macro); - return false; - } - for (i = 0; i < arg_count; ++i) - vkd3d_string_buffer_init(¯o->arg_values[i].text); macro->body.text = *body; macro->body.location = *body_loc; ret = rb_put(&ctx->macros, name, ¯o->entry); @@ -129,12 +120,8 @@ void preproc_free_macro(struct preproc_macro *macro) vkd3d_free(macro->name); for (i = 0; i < macro->arg_count; ++i) - { - vkd3d_string_buffer_cleanup(¯o->arg_values[i].text); vkd3d_free(macro->arg_names[i]); - } vkd3d_free(macro->arg_names); - vkd3d_free(macro->arg_values); vkd3d_string_buffer_cleanup(¯o->body.text); vkd3d_free(macro); } diff --git a/libs/vkd3d/libs/vkd3d-shader/spirv.c b/libs/vkd3d/libs/vkd3d-shader/spirv.c index 49979ab2491..6a28e2cd68e 100644 --- a/libs/vkd3d/libs/vkd3d-shader/spirv.c +++ b/libs/vkd3d/libs/vkd3d-shader/spirv.c @@ -97,15 +97,37 @@ static enum vkd3d_result vkd3d_spirv_binary_to_text(const struct vkd3d_shader_co if (!(spvret = spvBinaryToText(context, spirv->code, spirv->size / sizeof(uint32_t), get_binary_to_text_options(formatting), &text, &diagnostic))) { - void *code = vkd3d_malloc(text->length); - if (code) + const char *p, *q, *end, *pad, *truncate; + struct vkd3d_string_buffer buffer; + size_t line_len; + + vkd3d_string_buffer_init(&buffer); + + for (p = text->str, end = p + text->length; p < end; p = q) { - memcpy(code, text->str, text->length); - out->size = text->length; - out->code = code; + if (!(q = memchr(p, '\n', end - p))) + q = end; + else + ++q; + + /* FIXME: Note that when colour output is enabled, we count colour + * escape codes towards the line length. It's possible to fix + * that, but not completely trivial. */ + for (pad = "", line_len = 100; q - p > line_len; line_len = 100 - strlen(pad)) + { + if (!(truncate = memchr(p + line_len, ' ', q - p - line_len))) + break; + vkd3d_string_buffer_printf(&buffer, "%s%.*s\n", pad, (int)(truncate - p), p); + p = truncate + 1; + if (formatting & VKD3D_SHADER_COMPILE_OPTION_FORMATTING_INDENT) + pad = " "; + else + pad = " "; + } + vkd3d_string_buffer_printf(&buffer, "%s%.*s", pad, (int)(q - p), p); } - else - result = VKD3D_ERROR_OUT_OF_MEMORY; + + vkd3d_shader_code_from_string_buffer(out, &buffer); } else { @@ -277,6 +299,16 @@ static void vkd3d_spirv_stream_free(struct vkd3d_spirv_stream *stream) vkd3d_spirv_stream_clear(stream); } +static void vkd3d_shader_code_from_spirv_stream(struct vkd3d_shader_code *code, struct vkd3d_spirv_stream *stream) +{ + code->code = stream->words; + code->size = stream->word_count * sizeof(*stream->words); + + stream->words = NULL; + stream->capacity = 0; + stream->word_count = 0; +} + static size_t vkd3d_spirv_stream_current_location(struct vkd3d_spirv_stream *stream) { return stream->word_count; @@ -362,6 +394,7 @@ struct vkd3d_spirv_builder uint32_t type_bool_id; uint32_t type_void_id; uint32_t scope_subgroup_id; + uint32_t numeric_type_ids[VKD3D_SHADER_COMPONENT_TYPE_COUNT][VKD3D_VEC4_SIZE]; struct vkd3d_spirv_stream debug_stream; /* debug instructions */ struct vkd3d_spirv_stream annotation_stream; /* decoration instructions */ @@ -1195,6 +1228,13 @@ static uint32_t vkd3d_spirv_build_op_constant_composite(struct vkd3d_spirv_build SpvOpConstantComposite, result_type, constituents, constituent_count); } +static uint32_t vkd3d_spirv_build_op_spec_constant_composite(struct vkd3d_spirv_builder *builder, + uint32_t result_type, const uint32_t *constituents, unsigned int constituent_count) +{ + return vkd3d_spirv_build_op_trv(builder, &builder->global_stream, + SpvOpSpecConstantComposite, result_type, constituents, constituent_count); +} + static uint32_t vkd3d_spirv_get_op_constant_composite(struct vkd3d_spirv_builder *builder, uint32_t result_type, const uint32_t *constituents, unsigned int constituent_count) { @@ -1870,29 +1910,37 @@ static uint32_t vkd3d_spirv_build_op_glsl_std450_nclamp(struct vkd3d_spirv_build static uint32_t vkd3d_spirv_get_type_id(struct vkd3d_spirv_builder *builder, enum vkd3d_shader_component_type component_type, unsigned int component_count) { - uint32_t scalar_id; + uint32_t scalar_id, type_id; + + VKD3D_ASSERT(component_type < VKD3D_SHADER_COMPONENT_TYPE_COUNT); + VKD3D_ASSERT(1 <= component_count && component_count <= VKD3D_VEC4_SIZE); + + if ((type_id = builder->numeric_type_ids[component_type][component_count - 1])) + return type_id; if (component_count == 1) { switch (component_type) { case VKD3D_SHADER_COMPONENT_VOID: - return vkd3d_spirv_get_op_type_void(builder); + type_id = vkd3d_spirv_get_op_type_void(builder); break; case VKD3D_SHADER_COMPONENT_FLOAT: - return vkd3d_spirv_get_op_type_float(builder, 32); + type_id = vkd3d_spirv_get_op_type_float(builder, 32); break; case VKD3D_SHADER_COMPONENT_INT: case VKD3D_SHADER_COMPONENT_UINT: - return vkd3d_spirv_get_op_type_int(builder, 32, component_type == VKD3D_SHADER_COMPONENT_INT); + type_id = vkd3d_spirv_get_op_type_int(builder, 32, component_type == VKD3D_SHADER_COMPONENT_INT); break; case VKD3D_SHADER_COMPONENT_BOOL: - return vkd3d_spirv_get_op_type_bool(builder); + type_id = vkd3d_spirv_get_op_type_bool(builder); break; case VKD3D_SHADER_COMPONENT_DOUBLE: - return vkd3d_spirv_get_op_type_float(builder, 64); + type_id = vkd3d_spirv_get_op_type_float(builder, 64); + break; case VKD3D_SHADER_COMPONENT_UINT64: - return vkd3d_spirv_get_op_type_int(builder, 64, 0); + type_id = vkd3d_spirv_get_op_type_int(builder, 64, 0); + break; default: FIXME("Unhandled component type %#x.\n", component_type); return 0; @@ -1902,46 +1950,21 @@ static uint32_t vkd3d_spirv_get_type_id(struct vkd3d_spirv_builder *builder, { VKD3D_ASSERT(component_type != VKD3D_SHADER_COMPONENT_VOID); scalar_id = vkd3d_spirv_get_type_id(builder, component_type, 1); - return vkd3d_spirv_get_op_type_vector(builder, scalar_id, component_count); + type_id = vkd3d_spirv_get_op_type_vector(builder, scalar_id, component_count); } + + builder->numeric_type_ids[component_type][component_count - 1] = type_id; + + return type_id; } static uint32_t vkd3d_spirv_get_type_id_for_data_type(struct vkd3d_spirv_builder *builder, enum vkd3d_data_type data_type, unsigned int component_count) { - uint32_t scalar_id; + enum vkd3d_shader_component_type component_type; - if (component_count == 1) - { - switch (data_type) - { - case VKD3D_DATA_HALF: /* Minimum precision. TODO: native 16-bit */ - case VKD3D_DATA_FLOAT: - case VKD3D_DATA_SNORM: - case VKD3D_DATA_UNORM: - return vkd3d_spirv_get_op_type_float(builder, 32); - break; - case VKD3D_DATA_INT: - case VKD3D_DATA_UINT: - case VKD3D_DATA_UINT16: /* Minimum precision. TODO: native 16-bit */ - return vkd3d_spirv_get_op_type_int(builder, 32, data_type == VKD3D_DATA_INT); - break; - case VKD3D_DATA_DOUBLE: - return vkd3d_spirv_get_op_type_float(builder, 64); - case VKD3D_DATA_UINT64: - return vkd3d_spirv_get_op_type_int(builder, 64, 0); - case VKD3D_DATA_BOOL: - return vkd3d_spirv_get_op_type_bool(builder); - default: - FIXME("Unhandled data type %#x.\n", data_type); - return 0; - } - } - else - { - scalar_id = vkd3d_spirv_get_type_id_for_data_type(builder, data_type, 1); - return vkd3d_spirv_get_op_type_vector(builder, scalar_id, component_count); - } + component_type = vkd3d_component_type_from_data_type(data_type); + return vkd3d_spirv_get_type_id(builder, component_type, component_count); } static void vkd3d_spirv_builder_init(struct vkd3d_spirv_builder *builder, const char *entry_point) @@ -1996,9 +2019,7 @@ static bool vkd3d_spirv_compile_module(struct vkd3d_spirv_builder *builder, { uint64_t capability_mask = builder->capability_mask; struct vkd3d_spirv_stream stream; - uint32_t *code; unsigned int i; - size_t size; vkd3d_spirv_stream_init(&stream); @@ -2053,26 +2074,20 @@ static bool vkd3d_spirv_compile_module(struct vkd3d_spirv_builder *builder, if (builder->invocation_count) vkd3d_spirv_build_op_execution_mode(&builder->execution_mode_stream, builder->main_function_id, SpvExecutionModeInvocations, &builder->invocation_count, 1); - vkd3d_spirv_stream_append(&stream, &builder->execution_mode_stream); - vkd3d_spirv_stream_append(&stream, &builder->debug_stream); - vkd3d_spirv_stream_append(&stream, &builder->annotation_stream); - vkd3d_spirv_stream_append(&stream, &builder->global_stream); - vkd3d_spirv_stream_append(&stream, &builder->function_stream); - - if (!(code = vkd3d_calloc(stream.word_count, sizeof(*code)))) + if (!vkd3d_spirv_stream_append(&stream, &builder->execution_mode_stream) + || !vkd3d_spirv_stream_append(&stream, &builder->debug_stream) + || !vkd3d_spirv_stream_append(&stream, &builder->annotation_stream) + || !vkd3d_spirv_stream_append(&stream, &builder->global_stream) + || !vkd3d_spirv_stream_append(&stream, &builder->function_stream)) { vkd3d_spirv_stream_free(&stream); return false; } - size = stream.word_count * sizeof(*code); - memcpy(code, stream.words, size); + vkd3d_shader_code_from_spirv_stream(spirv, &stream); vkd3d_spirv_stream_free(&stream); - spirv->code = code; - spirv->size = size; - return true; } @@ -2647,8 +2662,6 @@ static struct spirv_compiler *spirv_compiler_create(const struct vsir_program *p if ((shader_interface = vkd3d_find_struct(compile_info->next, INTERFACE_INFO))) { compiler->xfb_info = vkd3d_find_struct(compile_info->next, TRANSFORM_FEEDBACK_INFO); - compiler->emit_point_size = compiler->xfb_info && compiler->xfb_info->element_count - && compiler->shader_type != VKD3D_SHADER_TYPE_GEOMETRY; compiler->shader_interface = *shader_interface; if (shader_interface->push_constant_buffer_count) @@ -2675,6 +2688,11 @@ static struct spirv_compiler *spirv_compiler_create(const struct vsir_program *p } } + if (compiler->shader_type == VKD3D_SHADER_TYPE_VERTEX) + compiler->emit_point_size = true; + else if (compiler->shader_type != VKD3D_SHADER_TYPE_GEOMETRY) + compiler->emit_point_size = compiler->xfb_info && compiler->xfb_info->element_count; + compiler->scan_descriptor_info = scan_descriptor_info; compiler->phase = VKD3DSIH_INVALID; @@ -3252,18 +3270,6 @@ static void spirv_compiler_emit_register_debug_name(struct vkd3d_spirv_builder * vkd3d_spirv_build_op_name(builder, id, "%s", debug_name); } -static uint32_t spirv_compiler_emit_variable(struct spirv_compiler *compiler, - struct vkd3d_spirv_stream *stream, SpvStorageClass storage_class, - enum vkd3d_shader_component_type component_type, unsigned int component_count) -{ - struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; - uint32_t type_id, ptr_type_id; - - type_id = vkd3d_spirv_get_type_id(builder, component_type, component_count); - ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, storage_class, type_id); - return vkd3d_spirv_build_op_variable(builder, stream, ptr_type_id, storage_class, 0); -} - static uint32_t spirv_compiler_emit_array_variable(struct spirv_compiler *compiler, struct vkd3d_spirv_stream *stream, SpvStorageClass storage_class, enum vkd3d_shader_component_type component_type, unsigned int component_count, @@ -3273,10 +3279,6 @@ static uint32_t spirv_compiler_emit_array_variable(struct spirv_compiler *compil uint32_t type_id, length_id, ptr_type_id; unsigned int i; - if (!length_count) - return spirv_compiler_emit_variable(compiler, - stream, storage_class, component_type, component_count); - type_id = vkd3d_spirv_get_type_id(builder, component_type, component_count); for (i = 0; i < length_count; ++i) { @@ -3290,6 +3292,14 @@ static uint32_t spirv_compiler_emit_array_variable(struct spirv_compiler *compil return vkd3d_spirv_build_op_variable(builder, stream, ptr_type_id, storage_class, 0); } +static uint32_t spirv_compiler_emit_variable(struct spirv_compiler *compiler, + struct vkd3d_spirv_stream *stream, SpvStorageClass storage_class, + enum vkd3d_shader_component_type component_type, unsigned int component_count) +{ + return spirv_compiler_emit_array_variable(compiler, stream, storage_class, + component_type, component_count, NULL, 0); +} + static const struct vkd3d_spec_constant_info { enum vkd3d_shader_parameter_name name; @@ -3316,8 +3326,10 @@ static const struct vkd3d_spec_constant_info *get_spec_constant_info(enum vkd3d_ return NULL; } -static uint32_t spirv_compiler_alloc_spec_constant_id(struct spirv_compiler *compiler) +static uint32_t spirv_compiler_alloc_spec_constant_id(struct spirv_compiler *compiler, unsigned int count) { + uint32_t ret; + if (!compiler->current_spec_constant_id) { unsigned int i, id = 0; @@ -3327,28 +3339,52 @@ static uint32_t spirv_compiler_alloc_spec_constant_id(struct spirv_compiler *com const struct vkd3d_shader_parameter1 *current = &compiler->program->parameters[i]; if (current->type == VKD3D_SHADER_PARAMETER_TYPE_SPECIALIZATION_CONSTANT) - id = max(current->u.specialization_constant.id + 1, id); + { + switch (current->data_type) + { + case VKD3D_SHADER_PARAMETER_DATA_TYPE_FLOAT32_VEC4: + id = max(current->u.specialization_constant.id + 4, id); + break; + + default: + id = max(current->u.specialization_constant.id + 1, id); + break; + } + } } compiler->current_spec_constant_id = id; } - return compiler->current_spec_constant_id++; + ret = compiler->current_spec_constant_id; + compiler->current_spec_constant_id += count; + return ret; } static uint32_t spirv_compiler_emit_spec_constant(struct spirv_compiler *compiler, - enum vkd3d_shader_parameter_name name, uint32_t spec_id, enum vkd3d_data_type type) + enum vkd3d_shader_parameter_name name, uint32_t spec_id, + enum vkd3d_data_type type, unsigned int component_count) { + uint32_t scalar_type_id, vector_type_id, id, default_value, components[4]; struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; const struct vkd3d_spec_constant_info *info; - uint32_t type_id, id, default_value; info = get_spec_constant_info(name); default_value = info ? info->default_value : 0; - type_id = vkd3d_spirv_get_type_id(builder, vkd3d_component_type_from_data_type(type), 1); - id = vkd3d_spirv_build_op_spec_constant(builder, type_id, default_value); - vkd3d_spirv_build_op_decorate1(builder, id, SpvDecorationSpecId, spec_id); + scalar_type_id = vkd3d_spirv_get_type_id(builder, vkd3d_component_type_from_data_type(type), 1); + vector_type_id = vkd3d_spirv_get_type_id(builder, vkd3d_component_type_from_data_type(type), component_count); + + for (unsigned int i = 0; i < component_count; ++i) + { + components[i] = vkd3d_spirv_build_op_spec_constant(builder, scalar_type_id, default_value); + vkd3d_spirv_build_op_decorate1(builder, components[i], SpvDecorationSpecId, spec_id + i); + } + + if (component_count == 1) + id = components[0]; + else + id = vkd3d_spirv_build_op_spec_constant_composite(builder, vector_type_id, components, component_count); if (info) vkd3d_spirv_build_op_name(builder, id, "%s", info->debug_name); @@ -3365,7 +3401,8 @@ static uint32_t spirv_compiler_emit_spec_constant(struct spirv_compiler *compile } static uint32_t spirv_compiler_get_spec_constant(struct spirv_compiler *compiler, - enum vkd3d_shader_parameter_name name, uint32_t spec_id, enum vkd3d_data_type type) + enum vkd3d_shader_parameter_name name, uint32_t spec_id, + enum vkd3d_data_type type, unsigned int component_count) { unsigned int i; @@ -3375,17 +3412,17 @@ static uint32_t spirv_compiler_get_spec_constant(struct spirv_compiler *compiler return compiler->spec_constants[i].id; } - return spirv_compiler_emit_spec_constant(compiler, name, spec_id, type); + return spirv_compiler_emit_spec_constant(compiler, name, spec_id, type, component_count); } static uint32_t spirv_compiler_get_buffer_parameter(struct spirv_compiler *compiler, - const struct vkd3d_shader_parameter1 *parameter, enum vkd3d_data_type type) + const struct vkd3d_shader_parameter1 *parameter, enum vkd3d_data_type type, unsigned int component_count) { struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; unsigned int index = parameter - compiler->program->parameters; uint32_t type_id, ptr_id, ptr_type_id; - type_id = vkd3d_spirv_get_type_id(builder, vkd3d_component_type_from_data_type(type), 1); + type_id = vkd3d_spirv_get_type_id(builder, vkd3d_component_type_from_data_type(type), component_count); ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, SpvStorageClassUniform, type_id); ptr_id = vkd3d_spirv_build_op_access_chain1(builder, ptr_type_id, compiler->spirv_parameter_info[index].buffer_id, @@ -3393,48 +3430,49 @@ static uint32_t spirv_compiler_get_buffer_parameter(struct spirv_compiler *compi return vkd3d_spirv_build_op_load(builder, type_id, ptr_id, SpvMemoryAccessMaskNone); } +static const struct +{ + enum vkd3d_data_type type; + unsigned int component_count; +} +parameter_data_type_map[] = +{ + [VKD3D_SHADER_PARAMETER_DATA_TYPE_FLOAT32] = {VKD3D_DATA_FLOAT, 1}, + [VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32] = {VKD3D_DATA_UINT, 1}, + [VKD3D_SHADER_PARAMETER_DATA_TYPE_FLOAT32_VEC4] = {VKD3D_DATA_FLOAT, 4}, +}; + static uint32_t spirv_compiler_emit_shader_parameter(struct spirv_compiler *compiler, - enum vkd3d_shader_parameter_name name, enum vkd3d_data_type type) + enum vkd3d_shader_parameter_name name, enum vkd3d_data_type type, unsigned int component_count) { const struct vkd3d_shader_parameter1 *parameter; - static const struct - { - enum vkd3d_data_type type; - } - type_map[] = - { - [VKD3D_SHADER_PARAMETER_DATA_TYPE_FLOAT32] = {VKD3D_DATA_FLOAT}, - [VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32] = {VKD3D_DATA_UINT}, - }; - if (!(parameter = vsir_program_get_parameter(compiler->program, name))) { WARN("Unresolved shader parameter %#x.\n", name); goto default_parameter; } - if (type_map[parameter->data_type].type != type) - ERR("Expected data type %#x for parameter %#x, got %#x.\n", type, name, parameter->data_type); + if (parameter_data_type_map[parameter->data_type].type != type + || parameter_data_type_map[parameter->data_type].component_count != component_count) + ERR("Expected type %#x, count %u for parameter %#x, got %#x.\n", + type, component_count, name, parameter->data_type); if (parameter->type == VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT) - { - if (parameter->data_type == VKD3D_SHADER_PARAMETER_DATA_TYPE_FLOAT32) - return spirv_compiler_get_constant_float(compiler, parameter->u.immediate_constant.u.f32); - else - return spirv_compiler_get_constant_uint(compiler, parameter->u.immediate_constant.u.u32); - } + return spirv_compiler_get_constant(compiler, vkd3d_component_type_from_data_type(type), + component_count, (const uint32_t *)¶meter->u.immediate_constant); if (parameter->type == VKD3D_SHADER_PARAMETER_TYPE_SPECIALIZATION_CONSTANT) - return spirv_compiler_get_spec_constant(compiler, name, parameter->u.specialization_constant.id, type); + return spirv_compiler_get_spec_constant(compiler, name, + parameter->u.specialization_constant.id, type, component_count); if (parameter->type == VKD3D_SHADER_PARAMETER_TYPE_BUFFER) - return spirv_compiler_get_buffer_parameter(compiler, parameter, type); + return spirv_compiler_get_buffer_parameter(compiler, parameter, type, component_count); FIXME("Unhandled parameter type %#x.\n", parameter->type); default_parameter: return spirv_compiler_get_spec_constant(compiler, - name, spirv_compiler_alloc_spec_constant_id(compiler), type); + name, spirv_compiler_alloc_spec_constant_id(compiler, component_count), type, component_count); } static uint32_t spirv_compiler_emit_construct_vector(struct spirv_compiler *compiler, @@ -4210,7 +4248,8 @@ static uint32_t spirv_compiler_emit_load_reg(struct spirv_compiler *compiler, else if (reg->type == VKD3DSPR_UNDEF) return spirv_compiler_emit_load_undef(compiler, reg, write_mask); else if (reg->type == VKD3DSPR_PARAMETER) - return spirv_compiler_emit_shader_parameter(compiler, reg->idx[0].offset, reg->data_type); + return spirv_compiler_emit_shader_parameter(compiler, reg->idx[0].offset, + reg->data_type, reg->dimension == VSIR_DIMENSION_VEC4 ? 4 : 1); component_count = vsir_write_mask_component_count(write_mask); component_type = vkd3d_component_type_from_data_type(reg->data_type); @@ -4500,9 +4539,24 @@ static uint32_t spirv_compiler_emit_sat(struct spirv_compiler *compiler, static void spirv_compiler_emit_store_dst(struct spirv_compiler *compiler, const struct vkd3d_shader_dst_param *dst, uint32_t val_id) { - VKD3D_ASSERT(!(dst->modifiers & ~VKD3DSPDM_SATURATE)); - if (dst->modifiers & VKD3DSPDM_SATURATE) + uint32_t modifiers = dst->modifiers; + + /* It is always legitimate to ignore _pp. */ + modifiers &= ~VKD3DSPDM_PARTIALPRECISION; + + if (modifiers & VKD3DSPDM_SATURATE) + { val_id = spirv_compiler_emit_sat(compiler, &dst->reg, dst->write_mask, val_id); + modifiers &= ~VKD3DSPDM_SATURATE; + } + + if (dst->modifiers & VKD3DSPDM_MSAMPCENTROID) + { + FIXME("Ignoring _centroid modifier.\n"); + modifiers &= ~VKD3DSPDM_MSAMPCENTROID; + } + + VKD3D_ASSERT(!modifiers); spirv_compiler_emit_store_reg(compiler, &dst->reg, dst->write_mask, val_id); } @@ -4809,6 +4863,10 @@ static const struct vkd3d_spirv_builtin vkd3d_pixel_shader_position_builtin = { VKD3D_SHADER_COMPONENT_FLOAT, 4, SpvBuiltInFragCoord, frag_coord_fixup, }; +static const struct vkd3d_spirv_builtin vkd3d_output_point_size_builtin = +{ + VKD3D_SHADER_COMPONENT_FLOAT, 1, SpvBuiltInPointSize, +}; static const struct { enum vkd3d_shader_register_type reg_type; @@ -5398,7 +5456,11 @@ static void spirv_compiler_emit_output_register(struct spirv_compiler *compiler, VKD3D_ASSERT(!reg->idx_count || !reg->idx[0].rel_addr); VKD3D_ASSERT(reg->idx_count < 2); - if (!(builtin = get_spirv_builtin_for_register(reg->type))) + if (reg->type == VKD3DSPR_RASTOUT && reg->idx[0].offset == VSIR_RASTOUT_POINT_SIZE) + { + builtin = &vkd3d_output_point_size_builtin; + } + else if (!(builtin = get_spirv_builtin_for_register(reg->type))) { FIXME("Unhandled register %#x.\n", reg->type); return; @@ -5451,7 +5513,7 @@ static void spirv_compiler_emit_output(struct spirv_compiler *compiler, const struct shader_signature *shader_signature; const struct vkd3d_spirv_builtin *builtin; enum vkd3d_shader_sysval_semantic sysval; - uint32_t write_mask, reg_write_mask; + uint32_t write_mask; bool use_private_variable = false; struct vkd3d_symbol reg_symbol; SpvStorageClass storage_class; @@ -5502,7 +5564,6 @@ static void spirv_compiler_emit_output(struct spirv_compiler *compiler, use_private_variable = true; } - reg_write_mask = write_mask >> component_idx; vkd3d_symbol_make_io(®_symbol, reg_type, element_idx); if (rb_get(&compiler->symbol_table, ®_symbol)) @@ -5580,7 +5641,7 @@ static void spirv_compiler_emit_output(struct spirv_compiler *compiler, vkd3d_symbol_set_register_info(®_symbol, var_id, storage_class, use_private_variable ? VKD3D_SHADER_COMPONENT_FLOAT : component_type, - use_private_variable ? VKD3DSP_WRITEMASK_ALL : reg_write_mask); + use_private_variable ? VKD3DSP_WRITEMASK_ALL : write_mask); reg_symbol.info.reg.is_aggregate = array_sizes[0] || array_sizes[1]; VKD3D_ASSERT(!builtin || !builtin->spirv_array_size || use_private_variable || array_sizes[0] || array_sizes[1]); @@ -5591,7 +5652,7 @@ static void spirv_compiler_emit_output(struct spirv_compiler *compiler, if (use_private_variable) { compiler->private_output_variable[element_idx] = var_id; - compiler->private_output_variable_write_mask[element_idx] |= reg_write_mask; + compiler->private_output_variable_write_mask[element_idx] |= write_mask >> component_idx; if (!compiler->epilogue_function_id) compiler->epilogue_function_id = vkd3d_spirv_alloc_id(builder); } @@ -6120,12 +6181,12 @@ static void spirv_compiler_decorate_descriptor(struct spirv_compiler *compiler, static uint32_t spirv_compiler_build_descriptor_variable(struct spirv_compiler *compiler, SpvStorageClass storage_class, uint32_t type_id, const struct vkd3d_shader_register *reg, const struct vkd3d_shader_register_range *range, enum vkd3d_shader_resource_type resource_type, - bool is_uav, bool is_uav_counter, struct vkd3d_descriptor_variable_info *var_info) + const struct vkd3d_shader_descriptor_info1 *descriptor, bool is_uav_counter, + struct vkd3d_descriptor_variable_info *var_info) { struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; struct vkd3d_descriptor_binding_address binding_address; struct vkd3d_shader_descriptor_binding binding; - const struct vkd3d_shader_descriptor_info1 *d; uint32_t array_type_id, ptr_type_id, var_id; bool write_only = false, coherent = false; struct vkd3d_symbol symbol; @@ -6135,12 +6196,11 @@ static uint32_t spirv_compiler_build_descriptor_variable(struct spirv_compiler * resource_type, is_uav_counter, &binding_address); var_info->binding_base_idx = binding_address.binding_base_idx; - if (is_uav) + if (descriptor->type == VKD3D_SHADER_DESCRIPTOR_TYPE_UAV && !is_uav_counter) { - d = spirv_compiler_get_descriptor_info(compiler, VKD3D_SHADER_DESCRIPTOR_TYPE_UAV, range); - write_only = !(d->flags & VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_READ); + write_only = !(descriptor->flags & VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_READ); /* ROVs are implicitly globally coherent. */ - coherent = d->uav_flags & (VKD3DSUF_GLOBALLY_COHERENT | VKD3DSUF_RASTERISER_ORDERED_VIEW); + coherent = descriptor->uav_flags & (VKD3DSUF_GLOBALLY_COHERENT | VKD3DSUF_RASTERISER_ORDERED_VIEW); } if (binding.count == 1 && range->first == binding_address.binding_base_idx && range->last != ~0u @@ -6194,11 +6254,12 @@ static uint32_t spirv_compiler_build_descriptor_variable(struct spirv_compiler * } static void spirv_compiler_emit_cbv_declaration(struct spirv_compiler *compiler, - const struct vkd3d_shader_register_range *range, unsigned int register_id, unsigned int size_in_bytes) + const struct vkd3d_shader_register_range *range, const struct vkd3d_shader_descriptor_info1 *descriptor) { struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; uint32_t vec4_id, array_type_id, length_id, struct_id, var_id; const SpvStorageClass storage_class = SpvStorageClassUniform; + unsigned int size_in_bytes = descriptor->buffer_size; struct vkd3d_push_constant_buffer_binding *push_cb; struct vkd3d_descriptor_variable_info var_info; struct vkd3d_shader_register reg; @@ -6206,7 +6267,7 @@ static void spirv_compiler_emit_cbv_declaration(struct spirv_compiler *compiler, unsigned int size; vsir_register_init(®, VKD3DSPR_CONSTBUFFER, VKD3D_DATA_FLOAT, 3); - reg.idx[0].offset = register_id; + reg.idx[0].offset = descriptor->register_id; reg.idx[1].offset = range->first; reg.idx[2].offset = range->last; @@ -6239,7 +6300,7 @@ static void spirv_compiler_emit_cbv_declaration(struct spirv_compiler *compiler, vkd3d_spirv_build_op_name(builder, struct_id, "cb%u_struct", size); var_id = spirv_compiler_build_descriptor_variable(compiler, storage_class, struct_id, - ®, range, VKD3D_SHADER_RESOURCE_BUFFER, false, false, &var_info); + ®, range, VKD3D_SHADER_RESOURCE_BUFFER, descriptor, false, &var_info); vkd3d_symbol_make_register(®_symbol, ®); vkd3d_symbol_set_register_info(®_symbol, var_id, storage_class, @@ -6275,7 +6336,7 @@ static void spirv_compiler_emit_dcl_immediate_constant_buffer(struct spirv_compi } static void spirv_compiler_emit_sampler_declaration(struct spirv_compiler *compiler, - const struct vkd3d_shader_register_range *range, unsigned int register_id) + const struct vkd3d_shader_register_range *range, const struct vkd3d_shader_descriptor_info1 *descriptor) { const SpvStorageClass storage_class = SpvStorageClassUniformConstant; struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; @@ -6285,7 +6346,7 @@ static void spirv_compiler_emit_sampler_declaration(struct spirv_compiler *compi uint32_t type_id, var_id; vsir_register_init(®, VKD3DSPR_SAMPLER, VKD3D_DATA_FLOAT, 1); - reg.idx[0].offset = register_id; + reg.idx[0].offset = descriptor->register_id; vkd3d_symbol_make_sampler(®_symbol, ®); reg_symbol.info.sampler.range = *range; @@ -6295,8 +6356,8 @@ static void spirv_compiler_emit_sampler_declaration(struct spirv_compiler *compi return; type_id = vkd3d_spirv_get_op_type_sampler(builder); - var_id = spirv_compiler_build_descriptor_variable(compiler, storage_class, type_id, ®, - range, VKD3D_SHADER_RESOURCE_NONE, false, false, &var_info); + var_id = spirv_compiler_build_descriptor_variable(compiler, storage_class, type_id, + ®, range, VKD3D_SHADER_RESOURCE_NONE, descriptor, false, &var_info); vkd3d_symbol_make_register(®_symbol, ®); vkd3d_symbol_set_register_info(®_symbol, var_id, storage_class, @@ -6346,7 +6407,7 @@ static SpvImageFormat image_format_for_image_read(enum vkd3d_shader_component_ty static uint32_t spirv_compiler_get_image_type_id(struct spirv_compiler *compiler, const struct vkd3d_shader_register *reg, const struct vkd3d_shader_register_range *range, const struct vkd3d_spirv_resource_type *resource_type_info, enum vkd3d_shader_component_type data_type, - bool raw_structured, uint32_t depth) + bool raw_structured) { struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; const struct vkd3d_shader_descriptor_info1 *d; @@ -6369,7 +6430,7 @@ static uint32_t spirv_compiler_get_image_type_id(struct spirv_compiler *compiler sampled_type_id = vkd3d_spirv_get_type_id(builder, data_type, 1); return vkd3d_spirv_get_op_type_image(builder, sampled_type_id, resource_type_info->dim, - depth, resource_type_info->arrayed, resource_type_info->ms, + 2, resource_type_info->arrayed, resource_type_info->ms, reg->type == VKD3DSPR_UAV ? 2 : 1, format); } @@ -6384,18 +6445,14 @@ static void spirv_compiler_emit_combined_sampler_declarations(struct spirv_compi const struct vkd3d_shader_combined_resource_sampler *current; uint32_t image_type_id, type_id, ptr_type_id, var_id; enum vkd3d_shader_binding_flag resource_type_flag; - const struct vkd3d_shader_descriptor_info1 *d; struct vkd3d_symbol symbol; unsigned int i; - bool depth; resource_type_flag = resource_type == VKD3D_SHADER_RESOURCE_BUFFER ? VKD3D_SHADER_BINDING_FLAG_BUFFER : VKD3D_SHADER_BINDING_FLAG_IMAGE; for (i = 0; i < shader_interface->combined_sampler_count; ++i) { - struct vkd3d_shader_register_range sampler_range; - current = &shader_interface->combined_samplers[i]; if (current->resource_space != resource_range->space || current->resource_index != resource_range->first) @@ -6417,16 +6474,8 @@ static void spirv_compiler_emit_combined_sampler_declarations(struct spirv_compi current->sampler_space, current->binding.count); } - sampler_range.space = current->sampler_space; - sampler_range.first = current->sampler_index; - sampler_range.last = current->sampler_index; - d = spirv_compiler_get_descriptor_info(compiler, - VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER, &sampler_range); - depth = current->sampler_index != VKD3D_SHADER_DUMMY_SAMPLER_INDEX - && (d->flags & VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_SAMPLER_COMPARISON_MODE); - image_type_id = spirv_compiler_get_image_type_id(compiler, resource, resource_range, - resource_type_info, sampled_type, structure_stride || raw, depth); + resource_type_info, sampled_type, structure_stride || raw); type_id = vkd3d_spirv_get_op_type_sampled_image(builder, image_type_id); ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, storage_class, type_id); @@ -6461,21 +6510,24 @@ static void spirv_compiler_emit_combined_sampler_declarations(struct spirv_compi } static void spirv_compiler_emit_resource_declaration(struct spirv_compiler *compiler, - const struct vkd3d_shader_register_range *range, unsigned int register_id, - unsigned int sample_count, bool is_uav, enum vkd3d_shader_resource_type resource_type, - enum vkd3d_shader_resource_data_type resource_data_type, unsigned int structure_stride, bool raw) + const struct vkd3d_shader_register_range *range, const struct vkd3d_shader_descriptor_info1 *descriptor) { + bool raw = descriptor->flags & VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_RAW_BUFFER; + enum vkd3d_shader_resource_type resource_type = descriptor->resource_type; struct vkd3d_descriptor_variable_info var_info, counter_var_info = {0}; + bool is_uav = descriptor->type == VKD3D_SHADER_DESCRIPTOR_TYPE_UAV; + unsigned int structure_stride = descriptor->structure_stride / 4; struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; SpvStorageClass storage_class = SpvStorageClassUniformConstant; uint32_t counter_type_id, type_id, var_id, counter_var_id = 0; const struct vkd3d_spirv_resource_type *resource_type_info; + unsigned int sample_count = descriptor->sample_count; enum vkd3d_shader_component_type sampled_type; struct vkd3d_symbol resource_symbol; struct vkd3d_shader_register reg; vsir_register_init(®, is_uav ? VKD3DSPR_UAV : VKD3DSPR_RESOURCE, VKD3D_DATA_FLOAT, 1); - reg.idx[0].offset = register_id; + reg.idx[0].offset = descriptor->register_id; if (resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_2DMS && sample_count == 1) resource_type = VKD3D_SHADER_RESOURCE_TEXTURE_2D; @@ -6489,7 +6541,7 @@ static void spirv_compiler_emit_resource_declaration(struct spirv_compiler *comp return; } - sampled_type = vkd3d_component_type_from_resource_data_type(resource_data_type); + sampled_type = vkd3d_component_type_from_resource_data_type(descriptor->resource_data_type); if (!is_uav && spirv_compiler_has_combined_sampler_for_resource(compiler, range)) { @@ -6517,19 +6569,15 @@ static void spirv_compiler_emit_resource_declaration(struct spirv_compiler *comp else { type_id = spirv_compiler_get_image_type_id(compiler, ®, range, - resource_type_info, sampled_type, structure_stride || raw, 0); + resource_type_info, sampled_type, structure_stride || raw); } - var_id = spirv_compiler_build_descriptor_variable(compiler, storage_class, type_id, ®, - range, resource_type, is_uav, false, &var_info); + var_id = spirv_compiler_build_descriptor_variable(compiler, storage_class, + type_id, ®, range, resource_type, descriptor, false, &var_info); if (is_uav) { - const struct vkd3d_shader_descriptor_info1 *d; - - d = spirv_compiler_get_descriptor_info(compiler, VKD3D_SHADER_DESCRIPTOR_TYPE_UAV, range); - - if (d->uav_flags & VKD3DSUF_RASTERISER_ORDERED_VIEW) + if (descriptor->uav_flags & VKD3DSUF_RASTERISER_ORDERED_VIEW) { if (compiler->shader_type != VKD3D_SHADER_TYPE_PIXEL) spirv_compiler_error(compiler, VKD3D_SHADER_ERROR_SPV_UNSUPPORTED_FEATURE, @@ -6543,7 +6591,7 @@ static void spirv_compiler_emit_resource_declaration(struct spirv_compiler *comp compiler->use_invocation_interlock = true; } - if (d->flags & VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_COUNTER) + if (descriptor->flags & VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_COUNTER) { VKD3D_ASSERT(structure_stride); /* counters are valid only for structured buffers */ @@ -6571,7 +6619,7 @@ static void spirv_compiler_emit_resource_declaration(struct spirv_compiler *comp } counter_var_id = spirv_compiler_build_descriptor_variable(compiler, storage_class, - type_id, ®, range, resource_type, false, true, &counter_var_info); + type_id, ®, range, resource_type, descriptor, true, &counter_var_info); } } @@ -6709,7 +6757,8 @@ static void spirv_compiler_emit_dcl_input_primitive(struct spirv_compiler *compi static void spirv_compiler_emit_point_size(struct spirv_compiler *compiler) { - static const struct vkd3d_spirv_builtin point_size = {VKD3D_SHADER_COMPONENT_FLOAT, 1, SpvBuiltInPointSize}; + if (compiler->program->has_point_size) + return; /* Set the point size. Point sprites are not supported in d3d10+, but * point primitives can still be used with e.g. stream output. Vulkan @@ -6723,7 +6772,8 @@ static void spirv_compiler_emit_point_size(struct spirv_compiler *compiler) || compiler->write_tess_geom_point_size) { vkd3d_spirv_build_op_store(&compiler->spirv_builder, - spirv_compiler_emit_builtin_variable(compiler, &point_size, SpvStorageClassOutput, 0), + spirv_compiler_emit_builtin_variable(compiler, + &vkd3d_output_point_size_builtin, SpvStorageClassOutput, 0), spirv_compiler_get_constant_float(compiler, 1.0f), SpvMemoryAccessMaskNone); } } @@ -6845,10 +6895,9 @@ static void spirv_compiler_emit_tessellator_partitioning(struct spirv_compiler * spirv_compiler_emit_execution_mode(compiler, mode, NULL, 0); } -static void spirv_compiler_emit_dcl_thread_group(struct spirv_compiler *compiler, - const struct vkd3d_shader_instruction *instruction) +static void spirv_compiler_emit_thread_group_size(struct spirv_compiler *compiler, + const struct vsir_thread_group_size *group_size) { - const struct vkd3d_shader_thread_group_size *group_size = &instruction->declaration.thread_group_size; const uint32_t local_size[] = {group_size->x, group_size->y, group_size->z}; spirv_compiler_emit_execution_mode(compiler, @@ -7391,7 +7440,7 @@ static void spirv_compiler_emit_mov(struct spirv_compiler *compiler, uint32_t components[VKD3D_VEC4_SIZE]; if (register_is_constant_or_undef(&src->reg) || src->reg.type == VKD3DSPR_SSA || dst->reg.type == VKD3DSPR_SSA - || dst->modifiers || src->modifiers) + || src->reg.type == VKD3DSPR_PARAMETER || dst->modifiers || src->modifiers) goto general_implementation; spirv_compiler_get_register_info(compiler, &dst->reg, &dst_reg_info); @@ -8433,11 +8482,10 @@ static void spirv_compiler_prepare_image(struct spirv_compiler *compiler, struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; uint32_t sampler_var_id, sampler_id, sampled_image_type_id; const struct vkd3d_symbol *symbol = NULL; - bool load, sampled, depth_comparison; + bool load, sampled; load = !(flags & VKD3D_IMAGE_FLAG_NO_LOAD); sampled = flags & VKD3D_IMAGE_FLAG_SAMPLED; - depth_comparison = flags & VKD3D_IMAGE_FLAG_DEPTH; if (resource_reg->type == VKD3DSPR_RESOURCE) symbol = spirv_compiler_find_combined_sampler(compiler, resource_reg, sampler_reg); @@ -8491,7 +8539,7 @@ static void spirv_compiler_prepare_image(struct spirv_compiler *compiler, image->image_type_id = spirv_compiler_get_image_type_id(compiler, resource_reg, &symbol->info.resource.range, image->resource_type_info, - image->sampled_type, image->structure_stride || image->raw, depth_comparison); + image->sampled_type, image->structure_stride || image->raw); if (sampled) { @@ -9569,7 +9617,7 @@ static uint32_t spirv_compiler_emit_query_sample_count(struct spirv_compiler *co if (src->reg.type == VKD3DSPR_RASTERIZER) { val_id = spirv_compiler_emit_shader_parameter(compiler, - VKD3D_SHADER_PARAMETER_NAME_RASTERIZER_SAMPLE_COUNT, VKD3D_DATA_UINT); + VKD3D_SHADER_PARAMETER_NAME_RASTERIZER_SAMPLE_COUNT, VKD3D_DATA_UINT, 1); } else { @@ -10183,9 +10231,6 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, spirv_compiler_emit_tessellator_partitioning(compiler, instruction->declaration.tessellator_partitioning); break; - case VKD3DSIH_DCL_THREAD_GROUP: - spirv_compiler_emit_dcl_thread_group(compiler, instruction); - break; case VKD3DSIH_HS_CONTROL_POINT_PHASE: case VKD3DSIH_HS_FORK_PHASE: case VKD3DSIH_HS_JOIN_PHASE: @@ -10506,7 +10551,6 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, case VKD3DSIH_DCL_RESOURCE_STRUCTURED: case VKD3DSIH_DCL_UAV_RAW: case VKD3DSIH_DCL_UAV_STRUCTURED: - case VKD3DSIH_DCL_UAV_TYPED: case VKD3DSIH_HS_DECLS: case VKD3DSIH_NOP: /* nothing to do */ @@ -10543,6 +10587,15 @@ static void spirv_compiler_emit_io_declarations(struct spirv_compiler *compiler) else spirv_compiler_emit_input(compiler, VKD3DSPR_PATCHCONST, i); } + + if (compiler->program->has_point_size) + { + struct vkd3d_shader_dst_param dst; + + vsir_dst_param_init(&dst, VKD3DSPR_RASTOUT, VKD3D_DATA_FLOAT, 1); + dst.reg.idx[0].offset = VSIR_RASTOUT_POINT_SIZE; + spirv_compiler_emit_output_register(compiler, &dst); + } } static void spirv_compiler_emit_descriptor_declarations(struct spirv_compiler *compiler) @@ -10564,23 +10617,16 @@ static void spirv_compiler_emit_descriptor_declarations(struct spirv_compiler *c switch (descriptor->type) { case VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER: - spirv_compiler_emit_sampler_declaration(compiler, &range, descriptor->register_id); + spirv_compiler_emit_sampler_declaration(compiler, &range, descriptor); break; case VKD3D_SHADER_DESCRIPTOR_TYPE_CBV: - spirv_compiler_emit_cbv_declaration(compiler, &range, descriptor->register_id, descriptor->buffer_size); + spirv_compiler_emit_cbv_declaration(compiler, &range, descriptor); break; case VKD3D_SHADER_DESCRIPTOR_TYPE_SRV: - spirv_compiler_emit_resource_declaration(compiler, &range, descriptor->register_id, - descriptor->sample_count, false, descriptor->resource_type, descriptor->resource_data_type, - descriptor->structure_stride / 4, descriptor->flags & VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_RAW_BUFFER); - break; - case VKD3D_SHADER_DESCRIPTOR_TYPE_UAV: - spirv_compiler_emit_resource_declaration(compiler, &range, descriptor->register_id, - descriptor->sample_count, true, descriptor->resource_type, descriptor->resource_data_type, - descriptor->structure_stride / 4, descriptor->flags & VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_RAW_BUFFER); + spirv_compiler_emit_resource_declaration(compiler, &range, descriptor); break; default: @@ -10600,10 +10646,13 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, struct enum vkd3d_result result = VKD3D_OK; unsigned int i, max_element_count; - if ((result = vsir_program_normalise(program, compiler->config_flags, + if ((result = vsir_program_transform(program, compiler->config_flags, compile_info, compiler->message_context)) < 0) return result; + VKD3D_ASSERT(program->normalised_io); + VKD3D_ASSERT(program->normalised_hull_cp_io); + max_element_count = max(program->output_signature.element_count, program->patch_constant_signature.element_count); if (!(compiler->output_info = vkd3d_calloc(max_element_count, sizeof(*compiler->output_info)))) return VKD3D_ERROR_OUT_OF_MEMORY; @@ -10612,6 +10661,8 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, struct spirv_compiler_emit_temps(compiler, program->temp_count); if (program->ssa_count) spirv_compiler_allocate_ssa_register_ids(compiler, program->ssa_count); + if (compiler->shader_type == VKD3D_SHADER_TYPE_COMPUTE) + spirv_compiler_emit_thread_group_size(compiler, &program->thread_group_size); spirv_compiler_emit_descriptor_declarations(compiler); @@ -10624,7 +10675,9 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, struct { uint32_t type_id, struct_id, ptr_type_id, var_id; - type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_UINT, 1); + type_id = vkd3d_spirv_get_type_id(builder, + vkd3d_component_type_from_data_type(parameter_data_type_map[parameter->data_type].type), + parameter_data_type_map[parameter->data_type].component_count); struct_id = vkd3d_spirv_build_op_type_struct(builder, &type_id, 1); vkd3d_spirv_build_op_decorate(builder, struct_id, SpvDecorationBlock, NULL, 0); diff --git a/libs/vkd3d/libs/vkd3d-shader/tpf.c b/libs/vkd3d/libs/vkd3d-shader/tpf.c index 84f641cc316..848e78a34d3 100644 --- a/libs/vkd3d/libs/vkd3d-shader/tpf.c +++ b/libs/vkd3d/libs/vkd3d-shader/tpf.c @@ -23,6 +23,7 @@ #include "hlsl.h" #include "vkd3d_shader_private.h" +#include "d3dcommon.h" #define SM4_MAX_SRC_COUNT 6 #define SM4_MAX_DST_COUNT 2 @@ -616,6 +617,47 @@ enum vkd3d_sm4_shader_data_type VKD3D_SM4_SHADER_DATA_MESSAGE = 0x4, }; +enum vkd3d_sm4_stat_field +{ + VKD3D_STAT_UNUSED = 0, + VKD3D_STAT_INSTR_COUNT, + VKD3D_STAT_MOV, + VKD3D_STAT_MOVC, + VKD3D_STAT_CONV, + VKD3D_STAT_FLOAT, + VKD3D_STAT_INT, + VKD3D_STAT_UINT, + VKD3D_STAT_EMIT, + VKD3D_STAT_CUT, + VKD3D_STAT_SAMPLE, + VKD3D_STAT_SAMPLE_C, + VKD3D_STAT_SAMPLE_GRAD, + VKD3D_STAT_SAMPLE_BIAS, + VKD3D_STAT_LOAD, + VKD3D_STAT_STORE, + VKD3D_STAT_DCL_VERTICES_OUT, + VKD3D_STAT_DCL_INPUT_PRIMITIVE, + VKD3D_STAT_DCL_OUTPUT_TOPOLOGY, + VKD3D_STAT_DCL_GS_INSTANCES, + VKD3D_STAT_BITWISE, + VKD3D_STAT_ATOMIC, + VKD3D_STAT_TESS_DOMAIN, + VKD3D_STAT_TESS_PARTITIONING, + VKD3D_STAT_TESS_OUTPUT_PRIMITIVE, + VKD3D_STAT_TESS_CONTROL_POINT_COUNT, + VKD3D_STAT_BARRIER, + VKD3D_STAT_LOD, + VKD3D_STAT_GATHER, + VKD3D_STAT_TEMPS, + VKD3D_STAT_COUNT, +}; + +struct vkd3d_sm4_stat_field_info +{ + enum vkd3d_sm4_opcode opcode; + enum vkd3d_sm4_stat_field field; +}; + struct sm4_index_range { unsigned int index; @@ -634,6 +676,7 @@ struct vkd3d_sm4_lookup_tables const struct vkd3d_sm4_opcode_info *opcode_info_from_sm4[VKD3D_SM4_OP_COUNT]; const struct vkd3d_sm4_register_type_info *register_type_info_from_sm4[VKD3D_SM4_REGISTER_TYPE_COUNT]; const struct vkd3d_sm4_register_type_info *register_type_info_from_vkd3d[VKD3DSPR_COUNT]; + const struct vkd3d_sm4_stat_field_info *stat_field_from_sm4[VKD3D_SM4_OP_COUNT]; }; struct vkd3d_shader_sm4_parser @@ -853,7 +896,7 @@ static void shader_sm4_read_dcl_resource(struct vkd3d_shader_instruction *ins, u >> VKD3D_SM4_RESOURCE_SAMPLE_COUNT_SHIFT; } - reg_data_type = opcode == VKD3D_SM4_OP_DCL_RESOURCE ? VKD3D_DATA_RESOURCE : VKD3D_DATA_UAV; + reg_data_type = VKD3D_DATA_UNUSED; shader_sm4_read_dst_param(priv, &tokens, end, reg_data_type, &semantic->resource.reg); shader_sm4_set_descriptor_register_range(priv, &semantic->resource.reg.reg, &semantic->resource.range); @@ -873,7 +916,7 @@ static void shader_sm4_read_dcl_resource(struct vkd3d_shader_instruction *ins, u } } - if (reg_data_type == VKD3D_DATA_UAV) + if (opcode != VKD3D_SM4_OP_DCL_RESOURCE) ins->flags = (opcode_token & VKD3D_SM5_UAV_FLAGS_MASK) >> VKD3D_SM5_UAV_FLAGS_SHIFT; shader_sm4_read_register_space(priv, &tokens, end, &semantic->resource.range.space); @@ -915,7 +958,7 @@ static void shader_sm4_read_dcl_sampler(struct vkd3d_shader_instruction *ins, ui ins->flags = (opcode_token & VKD3D_SM4_SAMPLER_MODE_MASK) >> VKD3D_SM4_SAMPLER_MODE_SHIFT; if (ins->flags & ~VKD3D_SM4_SAMPLER_COMPARISON) FIXME("Unhandled sampler mode %#x.\n", ins->flags); - shader_sm4_read_src_param(priv, &tokens, end, VKD3D_DATA_SAMPLER, &ins->declaration.sampler.src); + shader_sm4_read_src_param(priv, &tokens, end, VKD3D_DATA_UNUSED, &ins->declaration.sampler.src); shader_sm4_set_descriptor_register_range(priv, &ins->declaration.sampler.src.reg, &ins->declaration.sampler.range); shader_sm4_read_register_space(priv, &tokens, end, &ins->declaration.sampler.range.space); } @@ -1115,7 +1158,18 @@ static void shader_sm4_read_dcl_input_ps(struct vkd3d_shader_instruction *ins, u struct signature_element *e = vsir_signature_find_element_for_reg( &priv->p.program->input_signature, dst->reg.idx[dst->reg.idx_count - 1].offset, dst->write_mask); - e->interpolation_mode = ins->flags; + if (!e) + { + WARN("No matching signature element for input register %u with mask %#x.\n", + dst->reg.idx[dst->reg.idx_count - 1].offset, dst->write_mask); + vkd3d_shader_parser_error(&priv->p, VKD3D_SHADER_ERROR_TPF_INVALID_REGISTER_DCL, + "No matching signature element for input register %u with mask %#x.\n", + dst->reg.idx[dst->reg.idx_count - 1].offset, dst->write_mask); + } + else + { + e->interpolation_mode = ins->flags; + } } } @@ -1130,7 +1184,18 @@ static void shader_sm4_read_dcl_input_ps_siv(struct vkd3d_shader_instruction *in struct signature_element *e = vsir_signature_find_element_for_reg( &priv->p.program->input_signature, dst->reg.idx[dst->reg.idx_count - 1].offset, dst->write_mask); - e->interpolation_mode = ins->flags; + if (!e) + { + WARN("No matching signature element for input register %u with mask %#x.\n", + dst->reg.idx[dst->reg.idx_count - 1].offset, dst->write_mask); + vkd3d_shader_parser_error(&priv->p, VKD3D_SHADER_ERROR_TPF_INVALID_REGISTER_DCL, + "No matching signature element for input register %u with mask %#x.\n", + dst->reg.idx[dst->reg.idx_count - 1].offset, dst->write_mask); + } + else + { + e->interpolation_mode = ins->flags; + } } ins->declaration.register_semantic.sysval_semantic = *tokens; } @@ -1224,11 +1289,14 @@ static void shader_sm5_read_dcl_hs_max_tessfactor(struct vkd3d_shader_instructio } static void shader_sm5_read_dcl_thread_group(struct vkd3d_shader_instruction *ins, uint32_t opcode, - uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *sm4) { + struct vsir_program *program = sm4->p.program; + ins->declaration.thread_group_size.x = *tokens++; ins->declaration.thread_group_size.y = *tokens++; ins->declaration.thread_group_size.z = *tokens++; + program->thread_group_size = ins->declaration.thread_group_size; } static void shader_sm5_read_dcl_uav_raw(struct vkd3d_shader_instruction *ins, uint32_t opcode, uint32_t opcode_token, @@ -1237,7 +1305,7 @@ static void shader_sm5_read_dcl_uav_raw(struct vkd3d_shader_instruction *ins, ui struct vkd3d_shader_raw_resource *resource = &ins->declaration.raw_resource; const uint32_t *end = &tokens[token_count]; - shader_sm4_read_dst_param(priv, &tokens, end, VKD3D_DATA_UAV, &resource->resource.reg); + shader_sm4_read_dst_param(priv, &tokens, end, VKD3D_DATA_UNUSED, &resource->resource.reg); shader_sm4_set_descriptor_register_range(priv, &resource->resource.reg.reg, &resource->resource.range); ins->flags = (opcode_token & VKD3D_SM5_UAV_FLAGS_MASK) >> VKD3D_SM5_UAV_FLAGS_SHIFT; shader_sm4_read_register_space(priv, &tokens, end, &resource->resource.range.space); @@ -1249,7 +1317,7 @@ static void shader_sm5_read_dcl_uav_structured(struct vkd3d_shader_instruction * struct vkd3d_shader_structured_resource *resource = &ins->declaration.structured_resource; const uint32_t *end = &tokens[token_count]; - shader_sm4_read_dst_param(priv, &tokens, end, VKD3D_DATA_UAV, &resource->resource.reg); + shader_sm4_read_dst_param(priv, &tokens, end, VKD3D_DATA_UNUSED, &resource->resource.reg); shader_sm4_set_descriptor_register_range(priv, &resource->resource.reg.reg, &resource->resource.range); ins->flags = (opcode_token & VKD3D_SM5_UAV_FLAGS_MASK) >> VKD3D_SM5_UAV_FLAGS_SHIFT; resource->byte_stride = *tokens++; @@ -1286,7 +1354,7 @@ static void shader_sm5_read_dcl_resource_structured(struct vkd3d_shader_instruct struct vkd3d_shader_structured_resource *resource = &ins->declaration.structured_resource; const uint32_t *end = &tokens[token_count]; - shader_sm4_read_dst_param(priv, &tokens, end, VKD3D_DATA_RESOURCE, &resource->resource.reg); + shader_sm4_read_dst_param(priv, &tokens, end, VKD3D_DATA_UNUSED, &resource->resource.reg); shader_sm4_set_descriptor_register_range(priv, &resource->resource.reg.reg, &resource->resource.range); resource->byte_stride = *tokens++; if (resource->byte_stride % 4) @@ -1300,7 +1368,7 @@ static void shader_sm5_read_dcl_resource_raw(struct vkd3d_shader_instruction *in struct vkd3d_shader_raw_resource *resource = &ins->declaration.raw_resource; const uint32_t *end = &tokens[token_count]; - shader_sm4_read_dst_param(priv, &tokens, end, VKD3D_DATA_RESOURCE, &resource->resource.reg); + shader_sm4_read_dst_param(priv, &tokens, end, VKD3D_DATA_UNUSED, &resource->resource.reg); shader_sm4_set_descriptor_register_range(priv, &resource->resource.reg.reg, &resource->resource.range); shader_sm4_read_register_space(priv, &tokens, end, &resource->resource.range.space); } @@ -1330,11 +1398,21 @@ static const enum vkd3d_shader_register_precision register_precision_table[] = /* VKD3D_SM4_REGISTER_PRECISION_MIN_UINT_16 */ VKD3D_SHADER_REGISTER_PRECISION_MIN_UINT_16, }; -struct tpf_writer +struct sm4_stat { + uint32_t fields[VKD3D_STAT_COUNT]; +}; + +struct tpf_compiler +{ + /* OBJECTIVE: We want to get rid of this HLSL IR specific field. */ struct hlsl_ctx *ctx; - struct vkd3d_bytecode_buffer *buffer; + struct vsir_program *program; struct vkd3d_sm4_lookup_tables lookup; + struct sm4_stat *stat; + + struct vkd3d_bytecode_buffer *buffer; + struct dxbc_writer dxbc; }; static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup) @@ -1400,8 +1478,8 @@ static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup) {VKD3D_SM4_OP_ISHR, VKD3DSIH_ISHR, "i", "ii"}, {VKD3D_SM4_OP_ITOF, VKD3DSIH_ITOF, "f", "i"}, {VKD3D_SM4_OP_LABEL, VKD3DSIH_LABEL, "", "O"}, - {VKD3D_SM4_OP_LD, VKD3DSIH_LD, "u", "iR"}, - {VKD3D_SM4_OP_LD2DMS, VKD3DSIH_LD2DMS, "u", "iRi"}, + {VKD3D_SM4_OP_LD, VKD3DSIH_LD, "u", "i*"}, + {VKD3D_SM4_OP_LD2DMS, VKD3DSIH_LD2DMS, "u", "i*i"}, {VKD3D_SM4_OP_LOG, VKD3DSIH_LOG, "f", "f"}, {VKD3D_SM4_OP_LOOP, VKD3DSIH_LOOP, "", ""}, {VKD3D_SM4_OP_LT, VKD3DSIH_LTO, "u", "ff"}, @@ -1417,7 +1495,7 @@ static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup) {VKD3D_SM4_OP_NOP, VKD3DSIH_NOP, "", ""}, {VKD3D_SM4_OP_NOT, VKD3DSIH_NOT, "u", "u"}, {VKD3D_SM4_OP_OR, VKD3DSIH_OR, "u", "uu"}, - {VKD3D_SM4_OP_RESINFO, VKD3DSIH_RESINFO, "f", "iR"}, + {VKD3D_SM4_OP_RESINFO, VKD3DSIH_RESINFO, "f", "i*"}, {VKD3D_SM4_OP_RET, VKD3DSIH_RET, "", ""}, {VKD3D_SM4_OP_RETC, VKD3DSIH_RETP, "", "u", shader_sm4_read_conditional_op}, @@ -1426,12 +1504,12 @@ static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup) {VKD3D_SM4_OP_ROUND_PI, VKD3DSIH_ROUND_PI, "f", "f"}, {VKD3D_SM4_OP_ROUND_Z, VKD3DSIH_ROUND_Z, "f", "f"}, {VKD3D_SM4_OP_RSQ, VKD3DSIH_RSQ, "f", "f"}, - {VKD3D_SM4_OP_SAMPLE, VKD3DSIH_SAMPLE, "u", "fRS"}, - {VKD3D_SM4_OP_SAMPLE_C, VKD3DSIH_SAMPLE_C, "f", "fRSf"}, - {VKD3D_SM4_OP_SAMPLE_C_LZ, VKD3DSIH_SAMPLE_C_LZ, "f", "fRSf"}, - {VKD3D_SM4_OP_SAMPLE_LOD, VKD3DSIH_SAMPLE_LOD, "u", "fRSf"}, - {VKD3D_SM4_OP_SAMPLE_GRAD, VKD3DSIH_SAMPLE_GRAD, "u", "fRSff"}, - {VKD3D_SM4_OP_SAMPLE_B, VKD3DSIH_SAMPLE_B, "u", "fRSf"}, + {VKD3D_SM4_OP_SAMPLE, VKD3DSIH_SAMPLE, "u", "f**"}, + {VKD3D_SM4_OP_SAMPLE_C, VKD3DSIH_SAMPLE_C, "f", "f**f"}, + {VKD3D_SM4_OP_SAMPLE_C_LZ, VKD3DSIH_SAMPLE_C_LZ, "f", "f**f"}, + {VKD3D_SM4_OP_SAMPLE_LOD, VKD3DSIH_SAMPLE_LOD, "u", "f**f"}, + {VKD3D_SM4_OP_SAMPLE_GRAD, VKD3DSIH_SAMPLE_GRAD, "u", "f**ff"}, + {VKD3D_SM4_OP_SAMPLE_B, VKD3DSIH_SAMPLE_B, "u", "f**f"}, {VKD3D_SM4_OP_SQRT, VKD3DSIH_SQRT, "f", "f"}, {VKD3D_SM4_OP_SWITCH, VKD3DSIH_SWITCH, "", "i"}, {VKD3D_SM4_OP_SINCOS, VKD3DSIH_SINCOS, "ff", "f"}, @@ -1480,10 +1558,10 @@ static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup) shader_sm4_read_dcl_indexable_temp}, {VKD3D_SM4_OP_DCL_GLOBAL_FLAGS, VKD3DSIH_DCL_GLOBAL_FLAGS, "", "", shader_sm4_read_dcl_global_flags}, - {VKD3D_SM4_OP_LOD, VKD3DSIH_LOD, "f", "fRS"}, - {VKD3D_SM4_OP_GATHER4, VKD3DSIH_GATHER4, "u", "fRS"}, - {VKD3D_SM4_OP_SAMPLE_POS, VKD3DSIH_SAMPLE_POS, "f", "Ru"}, - {VKD3D_SM4_OP_SAMPLE_INFO, VKD3DSIH_SAMPLE_INFO, "f", "R"}, + {VKD3D_SM4_OP_LOD, VKD3DSIH_LOD, "f", "f**"}, + {VKD3D_SM4_OP_GATHER4, VKD3DSIH_GATHER4, "u", "f**"}, + {VKD3D_SM4_OP_SAMPLE_POS, VKD3DSIH_SAMPLE_POS, "f", "*u"}, + {VKD3D_SM4_OP_SAMPLE_INFO, VKD3DSIH_SAMPLE_INFO, "f", "*"}, {VKD3D_SM5_OP_HS_DECLS, VKD3DSIH_HS_DECLS, "", ""}, {VKD3D_SM5_OP_HS_CONTROL_POINT_PHASE, VKD3DSIH_HS_CONTROL_POINT_PHASE, "", ""}, {VKD3D_SM5_OP_HS_FORK_PHASE, VKD3DSIH_HS_FORK_PHASE, "", ""}, @@ -1492,14 +1570,14 @@ static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup) {VKD3D_SM5_OP_CUT_STREAM, VKD3DSIH_CUT_STREAM, "", "f"}, {VKD3D_SM5_OP_FCALL, VKD3DSIH_FCALL, "", "O", shader_sm5_read_fcall}, - {VKD3D_SM5_OP_BUFINFO, VKD3DSIH_BUFINFO, "i", "U"}, + {VKD3D_SM5_OP_BUFINFO, VKD3DSIH_BUFINFO, "i", "*"}, {VKD3D_SM5_OP_DERIV_RTX_COARSE, VKD3DSIH_DSX_COARSE, "f", "f"}, {VKD3D_SM5_OP_DERIV_RTX_FINE, VKD3DSIH_DSX_FINE, "f", "f"}, {VKD3D_SM5_OP_DERIV_RTY_COARSE, VKD3DSIH_DSY_COARSE, "f", "f"}, {VKD3D_SM5_OP_DERIV_RTY_FINE, VKD3DSIH_DSY_FINE, "f", "f"}, - {VKD3D_SM5_OP_GATHER4_C, VKD3DSIH_GATHER4_C, "f", "fRSf"}, - {VKD3D_SM5_OP_GATHER4_PO, VKD3DSIH_GATHER4_PO, "f", "fiRS"}, - {VKD3D_SM5_OP_GATHER4_PO_C, VKD3DSIH_GATHER4_PO_C, "f", "fiRSf"}, + {VKD3D_SM5_OP_GATHER4_C, VKD3DSIH_GATHER4_C, "f", "f**f"}, + {VKD3D_SM5_OP_GATHER4_PO, VKD3DSIH_GATHER4_PO, "f", "fi**"}, + {VKD3D_SM5_OP_GATHER4_PO_C, VKD3DSIH_GATHER4_PO_C, "f", "fi**f"}, {VKD3D_SM5_OP_RCP, VKD3DSIH_RCP, "f", "f"}, {VKD3D_SM5_OP_F32TOF16, VKD3DSIH_F32TOF16, "u", "f"}, {VKD3D_SM5_OP_F16TOF32, VKD3DSIH_F16TOF32, "f", "u"}, @@ -1551,33 +1629,33 @@ static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup) shader_sm5_read_dcl_resource_raw}, {VKD3D_SM5_OP_DCL_RESOURCE_STRUCTURED, VKD3DSIH_DCL_RESOURCE_STRUCTURED, "", "", shader_sm5_read_dcl_resource_structured}, - {VKD3D_SM5_OP_LD_UAV_TYPED, VKD3DSIH_LD_UAV_TYPED, "u", "iU"}, - {VKD3D_SM5_OP_STORE_UAV_TYPED, VKD3DSIH_STORE_UAV_TYPED, "U", "iu"}, - {VKD3D_SM5_OP_LD_RAW, VKD3DSIH_LD_RAW, "u", "iU"}, - {VKD3D_SM5_OP_STORE_RAW, VKD3DSIH_STORE_RAW, "U", "uu"}, - {VKD3D_SM5_OP_LD_STRUCTURED, VKD3DSIH_LD_STRUCTURED, "u", "iiR"}, - {VKD3D_SM5_OP_STORE_STRUCTURED, VKD3DSIH_STORE_STRUCTURED, "U", "iiu"}, - {VKD3D_SM5_OP_ATOMIC_AND, VKD3DSIH_ATOMIC_AND, "U", "iu"}, - {VKD3D_SM5_OP_ATOMIC_OR, VKD3DSIH_ATOMIC_OR, "U", "iu"}, - {VKD3D_SM5_OP_ATOMIC_XOR, VKD3DSIH_ATOMIC_XOR, "U", "iu"}, - {VKD3D_SM5_OP_ATOMIC_CMP_STORE, VKD3DSIH_ATOMIC_CMP_STORE, "U", "iuu"}, - {VKD3D_SM5_OP_ATOMIC_IADD, VKD3DSIH_ATOMIC_IADD, "U", "ii"}, - {VKD3D_SM5_OP_ATOMIC_IMAX, VKD3DSIH_ATOMIC_IMAX, "U", "ii"}, - {VKD3D_SM5_OP_ATOMIC_IMIN, VKD3DSIH_ATOMIC_IMIN, "U", "ii"}, - {VKD3D_SM5_OP_ATOMIC_UMAX, VKD3DSIH_ATOMIC_UMAX, "U", "iu"}, - {VKD3D_SM5_OP_ATOMIC_UMIN, VKD3DSIH_ATOMIC_UMIN, "U", "iu"}, - {VKD3D_SM5_OP_IMM_ATOMIC_ALLOC, VKD3DSIH_IMM_ATOMIC_ALLOC, "u", "U"}, - {VKD3D_SM5_OP_IMM_ATOMIC_CONSUME, VKD3DSIH_IMM_ATOMIC_CONSUME, "u", "U"}, - {VKD3D_SM5_OP_IMM_ATOMIC_IADD, VKD3DSIH_IMM_ATOMIC_IADD, "uU", "ii"}, - {VKD3D_SM5_OP_IMM_ATOMIC_AND, VKD3DSIH_IMM_ATOMIC_AND, "uU", "iu"}, - {VKD3D_SM5_OP_IMM_ATOMIC_OR, VKD3DSIH_IMM_ATOMIC_OR, "uU", "iu"}, - {VKD3D_SM5_OP_IMM_ATOMIC_XOR, VKD3DSIH_IMM_ATOMIC_XOR, "uU", "iu"}, - {VKD3D_SM5_OP_IMM_ATOMIC_EXCH, VKD3DSIH_IMM_ATOMIC_EXCH, "uU", "iu"}, - {VKD3D_SM5_OP_IMM_ATOMIC_CMP_EXCH, VKD3DSIH_IMM_ATOMIC_CMP_EXCH, "uU", "iuu"}, - {VKD3D_SM5_OP_IMM_ATOMIC_IMAX, VKD3DSIH_IMM_ATOMIC_IMAX, "iU", "ii"}, - {VKD3D_SM5_OP_IMM_ATOMIC_IMIN, VKD3DSIH_IMM_ATOMIC_IMIN, "iU", "ii"}, - {VKD3D_SM5_OP_IMM_ATOMIC_UMAX, VKD3DSIH_IMM_ATOMIC_UMAX, "uU", "iu"}, - {VKD3D_SM5_OP_IMM_ATOMIC_UMIN, VKD3DSIH_IMM_ATOMIC_UMIN, "uU", "iu"}, + {VKD3D_SM5_OP_LD_UAV_TYPED, VKD3DSIH_LD_UAV_TYPED, "u", "i*"}, + {VKD3D_SM5_OP_STORE_UAV_TYPED, VKD3DSIH_STORE_UAV_TYPED, "*", "iu"}, + {VKD3D_SM5_OP_LD_RAW, VKD3DSIH_LD_RAW, "u", "i*"}, + {VKD3D_SM5_OP_STORE_RAW, VKD3DSIH_STORE_RAW, "*", "uu"}, + {VKD3D_SM5_OP_LD_STRUCTURED, VKD3DSIH_LD_STRUCTURED, "u", "ii*"}, + {VKD3D_SM5_OP_STORE_STRUCTURED, VKD3DSIH_STORE_STRUCTURED, "*", "iiu"}, + {VKD3D_SM5_OP_ATOMIC_AND, VKD3DSIH_ATOMIC_AND, "*", "iu"}, + {VKD3D_SM5_OP_ATOMIC_OR, VKD3DSIH_ATOMIC_OR, "*", "iu"}, + {VKD3D_SM5_OP_ATOMIC_XOR, VKD3DSIH_ATOMIC_XOR, "*", "iu"}, + {VKD3D_SM5_OP_ATOMIC_CMP_STORE, VKD3DSIH_ATOMIC_CMP_STORE, "*", "iuu"}, + {VKD3D_SM5_OP_ATOMIC_IADD, VKD3DSIH_ATOMIC_IADD, "*", "ii"}, + {VKD3D_SM5_OP_ATOMIC_IMAX, VKD3DSIH_ATOMIC_IMAX, "*", "ii"}, + {VKD3D_SM5_OP_ATOMIC_IMIN, VKD3DSIH_ATOMIC_IMIN, "*", "ii"}, + {VKD3D_SM5_OP_ATOMIC_UMAX, VKD3DSIH_ATOMIC_UMAX, "*", "iu"}, + {VKD3D_SM5_OP_ATOMIC_UMIN, VKD3DSIH_ATOMIC_UMIN, "*", "iu"}, + {VKD3D_SM5_OP_IMM_ATOMIC_ALLOC, VKD3DSIH_IMM_ATOMIC_ALLOC, "u", "*"}, + {VKD3D_SM5_OP_IMM_ATOMIC_CONSUME, VKD3DSIH_IMM_ATOMIC_CONSUME, "u", "*"}, + {VKD3D_SM5_OP_IMM_ATOMIC_IADD, VKD3DSIH_IMM_ATOMIC_IADD, "u*", "ii"}, + {VKD3D_SM5_OP_IMM_ATOMIC_AND, VKD3DSIH_IMM_ATOMIC_AND, "u*", "iu"}, + {VKD3D_SM5_OP_IMM_ATOMIC_OR, VKD3DSIH_IMM_ATOMIC_OR, "u*", "iu"}, + {VKD3D_SM5_OP_IMM_ATOMIC_XOR, VKD3DSIH_IMM_ATOMIC_XOR, "u*", "iu"}, + {VKD3D_SM5_OP_IMM_ATOMIC_EXCH, VKD3DSIH_IMM_ATOMIC_EXCH, "u*", "iu"}, + {VKD3D_SM5_OP_IMM_ATOMIC_CMP_EXCH, VKD3DSIH_IMM_ATOMIC_CMP_EXCH, "u*", "iuu"}, + {VKD3D_SM5_OP_IMM_ATOMIC_IMAX, VKD3DSIH_IMM_ATOMIC_IMAX, "i*", "ii"}, + {VKD3D_SM5_OP_IMM_ATOMIC_IMIN, VKD3DSIH_IMM_ATOMIC_IMIN, "i*", "ii"}, + {VKD3D_SM5_OP_IMM_ATOMIC_UMAX, VKD3DSIH_IMM_ATOMIC_UMAX, "u*", "iu"}, + {VKD3D_SM5_OP_IMM_ATOMIC_UMIN, VKD3DSIH_IMM_ATOMIC_UMIN, "u*", "iu"}, {VKD3D_SM5_OP_SYNC, VKD3DSIH_SYNC, "", "", shader_sm5_read_sync}, {VKD3D_SM5_OP_DADD, VKD3DSIH_DADD, "d", "dd"}, @@ -1604,21 +1682,21 @@ static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup) {VKD3D_SM5_OP_DTOU, VKD3DSIH_DTOU, "u", "d"}, {VKD3D_SM5_OP_ITOD, VKD3DSIH_ITOD, "d", "i"}, {VKD3D_SM5_OP_UTOD, VKD3DSIH_UTOD, "d", "u"}, - {VKD3D_SM5_OP_GATHER4_S, VKD3DSIH_GATHER4_S, "uu", "fRS"}, - {VKD3D_SM5_OP_GATHER4_C_S, VKD3DSIH_GATHER4_C_S, "fu", "fRSf"}, - {VKD3D_SM5_OP_GATHER4_PO_S, VKD3DSIH_GATHER4_PO_S, "fu", "fiRS"}, - {VKD3D_SM5_OP_GATHER4_PO_C_S, VKD3DSIH_GATHER4_PO_C_S, "fu", "fiRSf"}, - {VKD3D_SM5_OP_LD_S, VKD3DSIH_LD_S, "uu", "iR"}, - {VKD3D_SM5_OP_LD2DMS_S, VKD3DSIH_LD2DMS_S, "uu", "iRi"}, + {VKD3D_SM5_OP_GATHER4_S, VKD3DSIH_GATHER4_S, "uu", "f**"}, + {VKD3D_SM5_OP_GATHER4_C_S, VKD3DSIH_GATHER4_C_S, "fu", "f**f"}, + {VKD3D_SM5_OP_GATHER4_PO_S, VKD3DSIH_GATHER4_PO_S, "fu", "fi**"}, + {VKD3D_SM5_OP_GATHER4_PO_C_S, VKD3DSIH_GATHER4_PO_C_S, "fu", "fi**f"}, + {VKD3D_SM5_OP_LD_S, VKD3DSIH_LD_S, "uu", "i*"}, + {VKD3D_SM5_OP_LD2DMS_S, VKD3DSIH_LD2DMS_S, "uu", "i*i"}, {VKD3D_SM5_OP_LD_UAV_TYPED_S, VKD3DSIH_LD_UAV_TYPED_S, "uu", "iU"}, {VKD3D_SM5_OP_LD_RAW_S, VKD3DSIH_LD_RAW_S, "uu", "iU"}, - {VKD3D_SM5_OP_LD_STRUCTURED_S, VKD3DSIH_LD_STRUCTURED_S, "uu", "iiR"}, - {VKD3D_SM5_OP_SAMPLE_LOD_S, VKD3DSIH_SAMPLE_LOD_S, "uu", "fRSf"}, - {VKD3D_SM5_OP_SAMPLE_C_LZ_S, VKD3DSIH_SAMPLE_C_LZ_S, "fu", "fRSf"}, - {VKD3D_SM5_OP_SAMPLE_CL_S, VKD3DSIH_SAMPLE_CL_S, "uu", "fRSf"}, - {VKD3D_SM5_OP_SAMPLE_B_CL_S, VKD3DSIH_SAMPLE_B_CL_S, "uu", "fRSff"}, - {VKD3D_SM5_OP_SAMPLE_GRAD_CL_S, VKD3DSIH_SAMPLE_GRAD_CL_S, "uu", "fRSfff"}, - {VKD3D_SM5_OP_SAMPLE_C_CL_S, VKD3DSIH_SAMPLE_C_CL_S, "fu", "fRSff"}, + {VKD3D_SM5_OP_LD_STRUCTURED_S, VKD3DSIH_LD_STRUCTURED_S, "uu", "ii*"}, + {VKD3D_SM5_OP_SAMPLE_LOD_S, VKD3DSIH_SAMPLE_LOD_S, "uu", "f**f"}, + {VKD3D_SM5_OP_SAMPLE_C_LZ_S, VKD3DSIH_SAMPLE_C_LZ_S, "fu", "f**f"}, + {VKD3D_SM5_OP_SAMPLE_CL_S, VKD3DSIH_SAMPLE_CL_S, "uu", "f**f"}, + {VKD3D_SM5_OP_SAMPLE_B_CL_S, VKD3DSIH_SAMPLE_B_CL_S, "uu", "f**ff"}, + {VKD3D_SM5_OP_SAMPLE_GRAD_CL_S, VKD3DSIH_SAMPLE_GRAD_CL_S, "uu", "f**fff"}, + {VKD3D_SM5_OP_SAMPLE_C_CL_S, VKD3DSIH_SAMPLE_C_CL_S, "fu", "f**ff"}, {VKD3D_SM5_OP_CHECK_ACCESS_FULLY_MAPPED, VKD3DSIH_CHECK_ACCESS_FULLY_MAPPED, "u", "u"}, }; @@ -1662,6 +1740,161 @@ static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup) {VKD3D_SM5_RT_OUTPUT_STENCIL_REF, VKD3DSPR_OUTSTENCILREF, VKD3D_SM4_SWIZZLE_VEC4}, }; + static const struct vkd3d_sm4_stat_field_info stat_field_table[] = + { + {VKD3D_SM4_OP_MOV, VKD3D_STAT_MOV}, + {VKD3D_SM4_OP_MOVC, VKD3D_STAT_MOVC}, + {VKD3D_SM5_OP_DMOV, VKD3D_STAT_MOV}, + {VKD3D_SM5_OP_DMOVC, VKD3D_STAT_MOVC}, + + {VKD3D_SM4_OP_ITOF, VKD3D_STAT_CONV}, + {VKD3D_SM4_OP_FTOI, VKD3D_STAT_CONV}, + {VKD3D_SM4_OP_FTOU, VKD3D_STAT_CONV}, + {VKD3D_SM4_OP_UTOF, VKD3D_STAT_CONV}, + {VKD3D_SM5_OP_DTOU, VKD3D_STAT_CONV}, + {VKD3D_SM5_OP_UTOD, VKD3D_STAT_CONV}, + {VKD3D_SM5_OP_DTOF, VKD3D_STAT_CONV}, + {VKD3D_SM5_OP_FTOD, VKD3D_STAT_CONV}, + {VKD3D_SM5_OP_DTOI, VKD3D_STAT_CONV}, + {VKD3D_SM5_OP_ITOD, VKD3D_STAT_CONV}, + {VKD3D_SM5_OP_F32TOF16, VKD3D_STAT_CONV}, + {VKD3D_SM5_OP_F16TOF32, VKD3D_STAT_CONV}, + + {VKD3D_SM4_OP_ADD, VKD3D_STAT_FLOAT}, + {VKD3D_SM4_OP_DIV, VKD3D_STAT_FLOAT}, + {VKD3D_SM4_OP_DP2, VKD3D_STAT_FLOAT}, + {VKD3D_SM4_OP_DP3, VKD3D_STAT_FLOAT}, + {VKD3D_SM4_OP_DP4, VKD3D_STAT_FLOAT}, + {VKD3D_SM4_OP_EQ, VKD3D_STAT_FLOAT}, + {VKD3D_SM4_OP_EXP, VKD3D_STAT_FLOAT}, + {VKD3D_SM4_OP_FRC, VKD3D_STAT_FLOAT}, + {VKD3D_SM4_OP_GE, VKD3D_STAT_FLOAT}, + {VKD3D_SM4_OP_LT, VKD3D_STAT_FLOAT}, + {VKD3D_SM4_OP_MAD, VKD3D_STAT_FLOAT}, + {VKD3D_SM4_OP_MIN, VKD3D_STAT_FLOAT}, + {VKD3D_SM4_OP_MAX, VKD3D_STAT_FLOAT}, + {VKD3D_SM4_OP_MUL, VKD3D_STAT_FLOAT}, + {VKD3D_SM4_OP_NE, VKD3D_STAT_FLOAT}, + {VKD3D_SM4_OP_ROUND_NE, VKD3D_STAT_FLOAT}, + {VKD3D_SM4_OP_ROUND_NI, VKD3D_STAT_FLOAT}, + {VKD3D_SM4_OP_ROUND_PI, VKD3D_STAT_FLOAT}, + {VKD3D_SM4_OP_ROUND_Z, VKD3D_STAT_FLOAT}, + {VKD3D_SM4_OP_RSQ, VKD3D_STAT_FLOAT}, + {VKD3D_SM4_OP_SQRT, VKD3D_STAT_FLOAT}, + {VKD3D_SM4_OP_SINCOS, VKD3D_STAT_FLOAT}, + {VKD3D_SM5_OP_RCP, VKD3D_STAT_FLOAT}, + {VKD3D_SM5_OP_DADD, VKD3D_STAT_FLOAT}, + {VKD3D_SM5_OP_DMAX, VKD3D_STAT_FLOAT}, + {VKD3D_SM5_OP_DMIN, VKD3D_STAT_FLOAT}, + {VKD3D_SM5_OP_DMUL, VKD3D_STAT_FLOAT}, + {VKD3D_SM5_OP_DEQ, VKD3D_STAT_FLOAT}, + {VKD3D_SM5_OP_DGE, VKD3D_STAT_FLOAT}, + {VKD3D_SM5_OP_DLT, VKD3D_STAT_FLOAT}, + {VKD3D_SM5_OP_DNE, VKD3D_STAT_FLOAT}, + {VKD3D_SM5_OP_DDIV, VKD3D_STAT_FLOAT}, + {VKD3D_SM5_OP_DFMA, VKD3D_STAT_FLOAT}, + {VKD3D_SM5_OP_DRCP, VKD3D_STAT_FLOAT}, + + {VKD3D_SM4_OP_IADD, VKD3D_STAT_INT}, + {VKD3D_SM4_OP_IEQ, VKD3D_STAT_INT}, + {VKD3D_SM4_OP_IGE, VKD3D_STAT_INT}, + {VKD3D_SM4_OP_ILT, VKD3D_STAT_INT}, + {VKD3D_SM4_OP_IMAD, VKD3D_STAT_INT}, + {VKD3D_SM4_OP_IMAX, VKD3D_STAT_INT}, + {VKD3D_SM4_OP_IMIN, VKD3D_STAT_INT}, + {VKD3D_SM4_OP_IMUL, VKD3D_STAT_INT}, + {VKD3D_SM4_OP_INE, VKD3D_STAT_INT}, + {VKD3D_SM4_OP_INEG, VKD3D_STAT_INT}, + {VKD3D_SM4_OP_ISHL, VKD3D_STAT_INT}, + {VKD3D_SM4_OP_ISHR, VKD3D_STAT_INT}, + {VKD3D_SM4_OP_ITOF, VKD3D_STAT_INT}, + + {VKD3D_SM4_OP_UDIV, VKD3D_STAT_UINT}, + {VKD3D_SM4_OP_ULT, VKD3D_STAT_UINT}, + {VKD3D_SM4_OP_UGE, VKD3D_STAT_UINT}, + {VKD3D_SM4_OP_UMUL, VKD3D_STAT_UINT}, + {VKD3D_SM4_OP_UMAX, VKD3D_STAT_UINT}, + {VKD3D_SM4_OP_UMIN, VKD3D_STAT_UINT}, + {VKD3D_SM4_OP_USHR, VKD3D_STAT_UINT}, + + {VKD3D_SM4_OP_EMIT, VKD3D_STAT_EMIT}, + {VKD3D_SM4_OP_CUT, VKD3D_STAT_CUT}, + {VKD3D_SM5_OP_EMIT_STREAM, VKD3D_STAT_EMIT}, + {VKD3D_SM5_OP_CUT_STREAM, VKD3D_STAT_CUT}, + + {VKD3D_SM4_OP_SAMPLE, VKD3D_STAT_SAMPLE}, + {VKD3D_SM4_OP_SAMPLE_LOD, VKD3D_STAT_SAMPLE}, + {VKD3D_SM5_OP_SAMPLE_LOD_S, VKD3D_STAT_SAMPLE}, + {VKD3D_SM5_OP_SAMPLE_CL_S, VKD3D_STAT_SAMPLE}, + {VKD3D_SM4_OP_SAMPLE_C, VKD3D_STAT_SAMPLE_C}, + {VKD3D_SM4_OP_SAMPLE_C_LZ, VKD3D_STAT_SAMPLE_C}, + {VKD3D_SM5_OP_SAMPLE_C_LZ_S, VKD3D_STAT_SAMPLE_C}, + {VKD3D_SM5_OP_SAMPLE_C_CL_S, VKD3D_STAT_SAMPLE_C}, + {VKD3D_SM4_OP_SAMPLE_GRAD, VKD3D_STAT_SAMPLE_GRAD}, + {VKD3D_SM5_OP_SAMPLE_GRAD_CL_S, VKD3D_STAT_SAMPLE_GRAD}, + {VKD3D_SM4_OP_SAMPLE_B, VKD3D_STAT_SAMPLE_BIAS}, + {VKD3D_SM4_OP_GATHER4, VKD3D_STAT_GATHER}, + {VKD3D_SM5_OP_GATHER4_PO, VKD3D_STAT_GATHER}, + {VKD3D_SM4_OP_LOD, VKD3D_STAT_LOD}, + + {VKD3D_SM4_OP_LD, VKD3D_STAT_LOAD}, + {VKD3D_SM4_OP_LD2DMS, VKD3D_STAT_LOAD}, + {VKD3D_SM5_OP_LD_UAV_TYPED, VKD3D_STAT_LOAD}, + {VKD3D_SM5_OP_LD_RAW, VKD3D_STAT_LOAD}, + {VKD3D_SM5_OP_LD_STRUCTURED, VKD3D_STAT_LOAD}, + {VKD3D_SM5_OP_LD_S, VKD3D_STAT_LOAD}, + {VKD3D_SM5_OP_LD2DMS_S, VKD3D_STAT_LOAD}, + {VKD3D_SM5_OP_LD_UAV_TYPED_S, VKD3D_STAT_LOAD}, + {VKD3D_SM5_OP_LD_RAW_S, VKD3D_STAT_LOAD}, + {VKD3D_SM5_OP_LD_STRUCTURED_S, VKD3D_STAT_LOAD}, + + {VKD3D_SM5_OP_STORE_UAV_TYPED, VKD3D_STAT_STORE}, + {VKD3D_SM5_OP_STORE_RAW, VKD3D_STAT_STORE}, + {VKD3D_SM5_OP_STORE_STRUCTURED,VKD3D_STAT_STORE}, + + {VKD3D_SM4_OP_DCL_VERTICES_OUT, VKD3D_STAT_DCL_VERTICES_OUT}, + {VKD3D_SM4_OP_DCL_INPUT_PRIMITIVE, VKD3D_STAT_DCL_INPUT_PRIMITIVE}, + {VKD3D_SM4_OP_DCL_OUTPUT_TOPOLOGY, VKD3D_STAT_DCL_OUTPUT_TOPOLOGY}, + {VKD3D_SM5_OP_DCL_GS_INSTANCES, VKD3D_STAT_DCL_GS_INSTANCES}, + + {VKD3D_SM4_OP_AND, VKD3D_STAT_BITWISE}, + {VKD3D_SM4_OP_NOT, VKD3D_STAT_BITWISE}, + {VKD3D_SM4_OP_OR, VKD3D_STAT_BITWISE}, + {VKD3D_SM4_OP_XOR, VKD3D_STAT_BITWISE}, + + {VKD3D_SM5_OP_ATOMIC_AND, VKD3D_STAT_ATOMIC}, + {VKD3D_SM5_OP_ATOMIC_OR, VKD3D_STAT_ATOMIC}, + {VKD3D_SM5_OP_ATOMIC_XOR, VKD3D_STAT_ATOMIC}, + {VKD3D_SM5_OP_ATOMIC_CMP_STORE, VKD3D_STAT_ATOMIC}, + {VKD3D_SM5_OP_ATOMIC_IADD, VKD3D_STAT_ATOMIC}, + {VKD3D_SM5_OP_ATOMIC_IMAX, VKD3D_STAT_ATOMIC}, + {VKD3D_SM5_OP_ATOMIC_IMIN, VKD3D_STAT_ATOMIC}, + {VKD3D_SM5_OP_ATOMIC_UMAX, VKD3D_STAT_ATOMIC}, + {VKD3D_SM5_OP_ATOMIC_UMIN, VKD3D_STAT_ATOMIC}, + {VKD3D_SM5_OP_IMM_ATOMIC_ALLOC, VKD3D_STAT_ATOMIC}, + {VKD3D_SM5_OP_IMM_ATOMIC_CONSUME, VKD3D_STAT_ATOMIC}, + {VKD3D_SM5_OP_IMM_ATOMIC_IADD, VKD3D_STAT_ATOMIC}, + {VKD3D_SM5_OP_IMM_ATOMIC_AND, VKD3D_STAT_ATOMIC}, + {VKD3D_SM5_OP_IMM_ATOMIC_OR, VKD3D_STAT_ATOMIC}, + {VKD3D_SM5_OP_IMM_ATOMIC_XOR, VKD3D_STAT_ATOMIC}, + {VKD3D_SM5_OP_IMM_ATOMIC_EXCH, VKD3D_STAT_ATOMIC}, + {VKD3D_SM5_OP_IMM_ATOMIC_CMP_EXCH, VKD3D_STAT_ATOMIC}, + {VKD3D_SM5_OP_IMM_ATOMIC_IMAX, VKD3D_STAT_ATOMIC}, + {VKD3D_SM5_OP_IMM_ATOMIC_IMIN, VKD3D_STAT_ATOMIC}, + {VKD3D_SM5_OP_IMM_ATOMIC_UMAX, VKD3D_STAT_ATOMIC}, + {VKD3D_SM5_OP_IMM_ATOMIC_UMIN, VKD3D_STAT_ATOMIC}, + + {VKD3D_SM5_OP_DCL_TESSELLATOR_DOMAIN, VKD3D_STAT_TESS_DOMAIN}, + {VKD3D_SM5_OP_DCL_TESSELLATOR_PARTITIONING, VKD3D_STAT_TESS_PARTITIONING}, + {VKD3D_SM5_OP_DCL_TESSELLATOR_OUTPUT_PRIMITIVE, VKD3D_STAT_TESS_OUTPUT_PRIMITIVE}, + {VKD3D_SM5_OP_DCL_INPUT_CONTROL_POINT_COUNT, VKD3D_STAT_TESS_CONTROL_POINT_COUNT}, + {VKD3D_SM5_OP_DCL_OUTPUT_CONTROL_POINT_COUNT, VKD3D_STAT_TESS_CONTROL_POINT_COUNT}, + + {VKD3D_SM5_OP_SYNC, VKD3D_STAT_BARRIER}, + + {VKD3D_SM4_OP_DCL_TEMPS, VKD3D_STAT_TEMPS}, + }; + memset(lookup, 0, sizeof(*lookup)); for (i = 0; i < ARRAY_SIZE(opcode_table); ++i) @@ -1678,13 +1911,13 @@ static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup) lookup->register_type_info_from_sm4[info->sm4_type] = info; lookup->register_type_info_from_vkd3d[info->vkd3d_type] = info; } -} -static void tpf_writer_init(struct tpf_writer *tpf, struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer) -{ - tpf->ctx = ctx; - tpf->buffer = buffer; - init_sm4_lookup_tables(&tpf->lookup); + for (i = 0; i < ARRAY_SIZE(stat_field_table); ++i) + { + const struct vkd3d_sm4_stat_field_info *info = &stat_field_table[i]; + + lookup->stat_field_from_sm4[info->opcode] = info; + } } static const struct vkd3d_sm4_opcode_info *get_info_from_sm4_opcode( @@ -1721,6 +1954,16 @@ static enum vkd3d_sm4_swizzle_type vkd3d_sm4_get_default_swizzle_type( return register_type_info->default_src_swizzle_type; } +static enum vkd3d_sm4_stat_field get_stat_field_from_sm4_opcode( + const struct vkd3d_sm4_lookup_tables *lookup, enum vkd3d_sm4_opcode sm4_opcode) +{ + const struct vkd3d_sm4_stat_field_info *field_info; + + if (sm4_opcode >= VKD3D_SM4_OP_COUNT || !(field_info = lookup->stat_field_from_sm4[sm4_opcode])) + return VKD3D_STAT_UNUSED; + return field_info->field; +} + static enum vkd3d_data_type map_data_type(char t) { switch (t) @@ -1735,12 +1978,8 @@ static enum vkd3d_data_type map_data_type(char t) return VKD3D_DATA_UINT; case 'O': return VKD3D_DATA_OPAQUE; - case 'R': - return VKD3D_DATA_RESOURCE; - case 'S': - return VKD3D_DATA_SAMPLER; - case 'U': - return VKD3D_DATA_UAV; + case '*': + return VKD3D_DATA_UNUSED; default: ERR("Invalid data type '%c'.\n", t); return VKD3D_DATA_FLOAT; @@ -2553,7 +2792,8 @@ static bool shader_sm4_init(struct vkd3d_shader_sm4_parser *sm4, struct vsir_pro version.minor = VKD3D_SM4_VERSION_MINOR(version_token); /* Estimate instruction count to avoid reallocation in most shaders. */ - if (!vsir_program_init(program, compile_info, &version, token_count / 7u + 20)) + if (!vsir_program_init(program, compile_info, + &version, token_count / 7u + 20, VSIR_CF_STRUCTURED, false)) return false; vkd3d_shader_parser_init(&sm4->p, program, message_context, compile_info->source_name); sm4->ptr = sm4->start; @@ -2670,6 +2910,21 @@ int tpf_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t con if (program->shader_version.type == VKD3D_SHADER_TYPE_HULL) uninvert_used_masks(&program->patch_constant_signature); + switch (program->shader_version.type) + { + case VKD3D_SHADER_TYPE_HULL: + case VKD3D_SHADER_TYPE_DOMAIN: + break; + + default: + if (program->patch_constant_signature.element_count != 0) + { + WARN("The patch constant signature only makes sense for Hull and Domain Shaders, ignoring it.\n"); + shader_signature_cleanup(&program->patch_constant_signature); + } + break; + } + if (!shader_sm4_parser_validate_signature(&sm4, &program->input_signature, sm4.input_register_masks, "Input") || !shader_sm4_parser_validate_signature(&sm4, &program->output_signature, @@ -2706,9 +2961,6 @@ int tpf_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t con && !sm4.has_control_point_phase && !sm4.p.failed) shader_sm4_validate_default_phase_index_ranges(&sm4); - if (!sm4.p.failed) - vkd3d_shader_parser_validate(&sm4.p, config_flags); - if (sm4.p.failed) { WARN("Failed to parse shader.\n"); @@ -2716,10 +2968,21 @@ int tpf_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t con return VKD3D_ERROR_INVALID_SHADER; } + if ((ret = vkd3d_shader_parser_validate(&sm4.p, config_flags)) < 0) + { + WARN("Failed to validate shader after parsing, ret %d.\n", ret); + + if (TRACE_ON()) + vsir_program_trace(program); + + vsir_program_cleanup(program); + return ret; + } + return VKD3D_OK; } -static void write_sm4_block(const struct tpf_writer *tpf, const struct hlsl_block *block); +static void write_sm4_block(const struct tpf_compiler *tpf, const struct hlsl_block *block); static bool type_is_integer(const struct hlsl_type *type) { @@ -2735,8 +2998,8 @@ static bool type_is_integer(const struct hlsl_type *type) } } -bool hlsl_sm4_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, - bool output, enum vkd3d_shader_register_type *type, bool *has_idx) +bool sm4_register_from_semantic_name(const struct vkd3d_shader_version *version, + const char *semantic_name, bool output, enum vkd3d_shader_register_type *type, bool *has_idx) { unsigned int i; @@ -2756,6 +3019,9 @@ bool hlsl_sm4_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_sem {"sv_primitiveid", false, VKD3D_SHADER_TYPE_GEOMETRY, VKD3DSPR_PRIMID, false}, + {"sv_outputcontrolpointid", false, VKD3D_SHADER_TYPE_HULL, VKD3DSPR_OUTPOINTID, false}, + {"sv_primitiveid", false, VKD3D_SHADER_TYPE_HULL, VKD3DSPR_PRIMID, false}, + /* Put sv_target in this table, instead of letting it fall through to * default varying allocation, so that the register index matches the * usage index. */ @@ -2768,9 +3034,9 @@ bool hlsl_sm4_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_sem for (i = 0; i < ARRAY_SIZE(register_table); ++i) { - if (!ascii_strcasecmp(semantic->name, register_table[i].semantic) + if (!ascii_strcasecmp(semantic_name, register_table[i].semantic) && output == register_table[i].output - && ctx->profile->type == register_table[i].shader_type) + && version->type == register_table[i].shader_type) { if (type) *type = register_table[i].type; @@ -2782,8 +3048,57 @@ bool hlsl_sm4_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_sem return false; } -bool hlsl_sm4_usage_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, - bool output, D3D_NAME *usage) +static bool get_tessfactor_sysval_semantic(enum vkd3d_shader_sysval_semantic *semantic, + enum vkd3d_tessellator_domain domain, uint32_t index) +{ + switch (domain) + { + case VKD3D_TESSELLATOR_DOMAIN_LINE: + if (index == 0) + *semantic = VKD3D_SHADER_SV_TESS_FACTOR_LINEDEN; + else if (index == 1) + *semantic = VKD3D_SHADER_SV_TESS_FACTOR_LINEDET; + else + return false; + return true; + + case VKD3D_TESSELLATOR_DOMAIN_TRIANGLE: + *semantic = VKD3D_SHADER_SV_TESS_FACTOR_TRIEDGE; + return index < 3; + + case VKD3D_TESSELLATOR_DOMAIN_QUAD: + *semantic = VKD3D_SHADER_SV_TESS_FACTOR_QUADEDGE; + return index < 4; + + default: + vkd3d_unreachable(); + } +} + +static bool get_insidetessfactor_sysval_semantic(enum vkd3d_shader_sysval_semantic *semantic, + enum vkd3d_tessellator_domain domain, uint32_t index) +{ + switch (domain) + { + case VKD3D_TESSELLATOR_DOMAIN_LINE: + return false; + + case VKD3D_TESSELLATOR_DOMAIN_TRIANGLE: + *semantic = VKD3D_SHADER_SV_TESS_FACTOR_TRIINT; + return index == 0; + + case VKD3D_TESSELLATOR_DOMAIN_QUAD: + *semantic = VKD3D_SHADER_SV_TESS_FACTOR_QUADINT; + return index < 2; + + default: + vkd3d_unreachable(); + } +} + +bool sm4_sysval_semantic_from_semantic_name(enum vkd3d_shader_sysval_semantic *sysval_semantic, + const struct vkd3d_shader_version *version, bool semantic_compat_mapping, enum vkd3d_tessellator_domain domain, + const char *semantic_name, unsigned int semantic_idx, bool output, bool is_patch_constant_func) { unsigned int i; @@ -2792,7 +3107,7 @@ bool hlsl_sm4_usage_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semant const char *name; bool output; enum vkd3d_shader_type shader_type; - D3D_NAME usage; + enum vkd3d_shader_sysval_semantic semantic; } semantics[] = { @@ -2800,46 +3115,79 @@ bool hlsl_sm4_usage_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semant {"sv_groupid", false, VKD3D_SHADER_TYPE_COMPUTE, ~0u}, {"sv_groupthreadid", false, VKD3D_SHADER_TYPE_COMPUTE, ~0u}, - {"position", false, VKD3D_SHADER_TYPE_GEOMETRY, D3D_NAME_POSITION}, - {"sv_position", false, VKD3D_SHADER_TYPE_GEOMETRY, D3D_NAME_POSITION}, - {"sv_primitiveid", false, VKD3D_SHADER_TYPE_GEOMETRY, D3D_NAME_PRIMITIVE_ID}, - - {"position", true, VKD3D_SHADER_TYPE_GEOMETRY, D3D_NAME_POSITION}, - {"sv_position", true, VKD3D_SHADER_TYPE_GEOMETRY, D3D_NAME_POSITION}, - {"sv_primitiveid", true, VKD3D_SHADER_TYPE_GEOMETRY, D3D_NAME_PRIMITIVE_ID}, - - {"position", false, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_POSITION}, - {"sv_position", false, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_POSITION}, - {"sv_primitiveid", false, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_PRIMITIVE_ID}, - {"sv_isfrontface", false, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_IS_FRONT_FACE}, - {"sv_rendertargetarrayindex", false, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_RENDER_TARGET_ARRAY_INDEX}, - {"sv_viewportarrayindex", false, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_VIEWPORT_ARRAY_INDEX}, - - {"color", true, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_TARGET}, - {"depth", true, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_DEPTH}, - {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_TARGET}, - {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_DEPTH}, - {"sv_coverage", true, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_COVERAGE}, - - {"sv_position", false, VKD3D_SHADER_TYPE_VERTEX, D3D_NAME_UNDEFINED}, - {"sv_vertexid", false, VKD3D_SHADER_TYPE_VERTEX, D3D_NAME_VERTEX_ID}, - {"sv_instanceid", false, VKD3D_SHADER_TYPE_VERTEX, D3D_NAME_INSTANCE_ID}, - - {"position", true, VKD3D_SHADER_TYPE_VERTEX, D3D_NAME_POSITION}, - {"sv_position", true, VKD3D_SHADER_TYPE_VERTEX, D3D_NAME_POSITION}, - {"sv_rendertargetarrayindex", true, VKD3D_SHADER_TYPE_VERTEX, D3D_NAME_RENDER_TARGET_ARRAY_INDEX}, - {"sv_viewportarrayindex", true, VKD3D_SHADER_TYPE_VERTEX, D3D_NAME_VIEWPORT_ARRAY_INDEX}, + {"position", false, VKD3D_SHADER_TYPE_GEOMETRY, VKD3D_SHADER_SV_POSITION}, + {"sv_position", false, VKD3D_SHADER_TYPE_GEOMETRY, VKD3D_SHADER_SV_POSITION}, + {"sv_primitiveid", false, VKD3D_SHADER_TYPE_GEOMETRY, VKD3D_SHADER_SV_PRIMITIVE_ID}, + + {"position", true, VKD3D_SHADER_TYPE_GEOMETRY, VKD3D_SHADER_SV_POSITION}, + {"sv_position", true, VKD3D_SHADER_TYPE_GEOMETRY, VKD3D_SHADER_SV_POSITION}, + {"sv_primitiveid", true, VKD3D_SHADER_TYPE_GEOMETRY, VKD3D_SHADER_SV_PRIMITIVE_ID}, + + {"sv_outputcontrolpointid", false, VKD3D_SHADER_TYPE_HULL, ~0u}, + {"sv_position", false, VKD3D_SHADER_TYPE_HULL, ~0u}, + {"sv_primitiveid", false, VKD3D_SHADER_TYPE_HULL, ~0u}, + + {"sv_position", true, VKD3D_SHADER_TYPE_HULL, VKD3D_SHADER_SV_POSITION}, + + {"position", false, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SHADER_SV_POSITION}, + {"sv_position", false, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SHADER_SV_POSITION}, + {"sv_primitiveid", false, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SHADER_SV_PRIMITIVE_ID}, + {"sv_isfrontface", false, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SHADER_SV_IS_FRONT_FACE}, + {"sv_rendertargetarrayindex", false, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SHADER_SV_RENDER_TARGET_ARRAY_INDEX}, + {"sv_viewportarrayindex", false, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SHADER_SV_VIEWPORT_ARRAY_INDEX}, + {"sv_sampleindex", false, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SHADER_SV_SAMPLE_INDEX}, + + {"color", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SHADER_SV_TARGET}, + {"depth", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SHADER_SV_DEPTH}, + {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SHADER_SV_TARGET}, + {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SHADER_SV_DEPTH}, + {"sv_coverage", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SHADER_SV_COVERAGE}, + + {"sv_position", false, VKD3D_SHADER_TYPE_VERTEX, VKD3D_SHADER_SV_NONE}, + {"sv_vertexid", false, VKD3D_SHADER_TYPE_VERTEX, VKD3D_SHADER_SV_VERTEX_ID}, + {"sv_instanceid", false, VKD3D_SHADER_TYPE_VERTEX, VKD3D_SHADER_SV_INSTANCE_ID}, + + {"position", true, VKD3D_SHADER_TYPE_VERTEX, VKD3D_SHADER_SV_POSITION}, + {"sv_position", true, VKD3D_SHADER_TYPE_VERTEX, VKD3D_SHADER_SV_POSITION}, + {"sv_rendertargetarrayindex", true, VKD3D_SHADER_TYPE_VERTEX, VKD3D_SHADER_SV_RENDER_TARGET_ARRAY_INDEX}, + {"sv_viewportarrayindex", true, VKD3D_SHADER_TYPE_VERTEX, VKD3D_SHADER_SV_VIEWPORT_ARRAY_INDEX}, }; - bool needs_compat_mapping = ascii_strncasecmp(semantic->name, "sv_", 3); + bool needs_compat_mapping = ascii_strncasecmp(semantic_name, "sv_", 3); + + if (is_patch_constant_func) + { + if (output) + { + if (!ascii_strcasecmp(semantic_name, "sv_tessfactor")) + return get_tessfactor_sysval_semantic(sysval_semantic, domain, semantic_idx); + if (!ascii_strcasecmp(semantic_name, "sv_insidetessfactor")) + return get_insidetessfactor_sysval_semantic(sysval_semantic, domain, semantic_idx); + if (!ascii_strcasecmp(semantic_name, "sv_position")) + { + *sysval_semantic = VKD3D_SHADER_SV_NONE; + return true; + } + } + else + { + if (!ascii_strcasecmp(semantic_name, "sv_primitiveid") + || !ascii_strcasecmp(semantic_name, "sv_position")) + { + *sysval_semantic = ~0u; + return true; + } + return false; + } + } for (i = 0; i < ARRAY_SIZE(semantics); ++i) { - if (!ascii_strcasecmp(semantic->name, semantics[i].name) + if (!ascii_strcasecmp(semantic_name, semantics[i].name) && output == semantics[i].output - && (ctx->semantic_compat_mapping == needs_compat_mapping || !needs_compat_mapping) - && ctx->profile->type == semantics[i].shader_type) + && (semantic_compat_mapping == needs_compat_mapping || !needs_compat_mapping) + && version->type == semantics[i].shader_type) { - *usage = semantics[i].usage; + *sysval_semantic = semantics[i].semantic; return true; } } @@ -2847,7 +3195,7 @@ bool hlsl_sm4_usage_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semant if (!needs_compat_mapping) return false; - *usage = D3D_NAME_UNDEFINED; + *sysval_semantic = VKD3D_SHADER_SV_NONE; return true; } @@ -2865,110 +3213,46 @@ static void add_section(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc, ctx->result = buffer->status; } -static void write_sm4_signature(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc, bool output) +static void tpf_write_signature(struct tpf_compiler *tpf, const struct shader_signature *signature, uint32_t tag) { + bool output = tag == TAG_OSGN || tag == TAG_PCSG; struct vkd3d_bytecode_buffer buffer = {0}; - struct vkd3d_string_buffer *string; - const struct hlsl_ir_var *var; - size_t count_position; unsigned int i; - bool ret; - count_position = put_u32(&buffer, 0); + put_u32(&buffer, signature->element_count); put_u32(&buffer, 8); /* unknown */ - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + for (i = 0; i < signature->element_count; ++i) { - unsigned int width = (1u << var->data_type->dimx) - 1, use_mask; - uint32_t usage_idx, reg_idx; - D3D_NAME usage; - bool has_idx; + const struct signature_element *element = &signature->elements[i]; + enum vkd3d_shader_sysval_semantic sysval; + uint32_t used_mask = element->used_mask; - if ((output && !var->is_output_semantic) || (!output && !var->is_input_semantic)) - continue; - - ret = hlsl_sm4_usage_from_semantic(ctx, &var->semantic, output, &usage); - VKD3D_ASSERT(ret); - if (usage == ~0u) - continue; - usage_idx = var->semantic.index; - - if (hlsl_sm4_register_from_semantic(ctx, &var->semantic, output, NULL, &has_idx)) - { - reg_idx = has_idx ? var->semantic.index : ~0u; - } - else - { - VKD3D_ASSERT(var->regs[HLSL_REGSET_NUMERIC].allocated); - reg_idx = var->regs[HLSL_REGSET_NUMERIC].id; - } - - use_mask = width; /* FIXME: accurately report use mask */ if (output) - use_mask = 0xf ^ use_mask; + used_mask = 0xf ^ used_mask; - /* Special pixel shader semantics (TARGET, DEPTH, COVERAGE). */ - if (usage >= 64) - usage = 0; + sysval = element->sysval_semantic; + if (sysval >= VKD3D_SHADER_SV_TARGET) + sysval = VKD3D_SHADER_SV_NONE; put_u32(&buffer, 0); /* name */ - put_u32(&buffer, usage_idx); - put_u32(&buffer, usage); - switch (var->data_type->e.numeric.type) - { - case HLSL_TYPE_FLOAT: - case HLSL_TYPE_HALF: - put_u32(&buffer, D3D_REGISTER_COMPONENT_FLOAT32); - break; - - case HLSL_TYPE_INT: - put_u32(&buffer, D3D_REGISTER_COMPONENT_SINT32); - break; - - case HLSL_TYPE_BOOL: - case HLSL_TYPE_UINT: - put_u32(&buffer, D3D_REGISTER_COMPONENT_UINT32); - break; - - default: - if ((string = hlsl_type_to_string(ctx, var->data_type))) - hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, - "Invalid data type %s for semantic variable %s.", string->buffer, var->name); - hlsl_release_string_buffer(ctx, string); - put_u32(&buffer, D3D_REGISTER_COMPONENT_UNKNOWN); - } - put_u32(&buffer, reg_idx); - put_u32(&buffer, vkd3d_make_u16(width, use_mask)); + put_u32(&buffer, element->semantic_index); + put_u32(&buffer, sysval); + put_u32(&buffer, element->component_type); + put_u32(&buffer, element->register_index); + put_u32(&buffer, vkd3d_make_u16(element->mask, used_mask)); } - i = 0; - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + for (i = 0; i < signature->element_count; ++i) { - const char *semantic = var->semantic.name; + const struct signature_element *element = &signature->elements[i]; size_t string_offset; - D3D_NAME usage; - - if ((output && !var->is_output_semantic) || (!output && !var->is_input_semantic)) - continue; - - hlsl_sm4_usage_from_semantic(ctx, &var->semantic, output, &usage); - if (usage == ~0u) - continue; - if (usage == D3D_NAME_TARGET && !ascii_strcasecmp(semantic, "color")) - string_offset = put_string(&buffer, "SV_Target"); - else if (usage == D3D_NAME_DEPTH && !ascii_strcasecmp(semantic, "depth")) - string_offset = put_string(&buffer, "SV_Depth"); - else if (usage == D3D_NAME_POSITION && !ascii_strcasecmp(semantic, "position")) - string_offset = put_string(&buffer, "SV_Position"); - else - string_offset = put_string(&buffer, semantic); - set_u32(&buffer, (2 + i++ * 6) * sizeof(uint32_t), string_offset); + string_offset = put_string(&buffer, element->semantic_name); + set_u32(&buffer, (2 + i * 6) * sizeof(uint32_t), string_offset); } - set_u32(&buffer, count_position, i); - - add_section(ctx, dxbc, output ? TAG_OSGN : TAG_ISGN, &buffer); + add_section(tpf->ctx, &tpf->dxbc, tag, &buffer); } static D3D_SHADER_VARIABLE_CLASS sm4_class(const struct hlsl_type *type) @@ -2990,6 +3274,7 @@ static D3D_SHADER_VARIABLE_CLASS sm4_class(const struct hlsl_type *type) case HLSL_CLASS_DEPTH_STENCIL_STATE: case HLSL_CLASS_DEPTH_STENCIL_VIEW: case HLSL_CLASS_EFFECT_GROUP: + case HLSL_CLASS_ERROR: case HLSL_CLASS_STRUCT: case HLSL_CLASS_PASS: case HLSL_CLASS_PIXEL_SHADER: @@ -3123,24 +3408,24 @@ static D3D_SHADER_INPUT_TYPE sm4_resource_type(const struct hlsl_type *type) vkd3d_unreachable(); } -static D3D_RESOURCE_RETURN_TYPE sm4_resource_format(const struct hlsl_type *type) +static enum vkd3d_sm4_data_type sm4_data_type(const struct hlsl_type *type) { switch (type->e.resource.format->e.numeric.type) { case HLSL_TYPE_DOUBLE: - return D3D_RETURN_TYPE_DOUBLE; + return VKD3D_SM4_DATA_DOUBLE; case HLSL_TYPE_FLOAT: case HLSL_TYPE_HALF: - return D3D_RETURN_TYPE_FLOAT; + return VKD3D_SM4_DATA_FLOAT; case HLSL_TYPE_INT: - return D3D_RETURN_TYPE_SINT; + return VKD3D_SM4_DATA_INT; break; case HLSL_TYPE_BOOL: case HLSL_TYPE_UINT: - return D3D_RETURN_TYPE_UINT; + return VKD3D_SM4_DATA_UINT; default: vkd3d_unreachable(); @@ -3398,6 +3683,48 @@ static struct extern_resource *sm4_get_extern_resources(struct hlsl_ctx *ctx, un return extern_resources; } +/* For some reason, for matrices, values from default value initializers end up in different + * components than from regular initializers. Default value initializers fill the matrix in + * vertical reading order (left-to-right top-to-bottom) instead of regular reading order + * (top-to-bottom left-to-right), so they have to be adjusted. + * An exception is that the order of matrix initializers for function parameters are row-major + * (top-to-bottom left-to-right). */ +static unsigned int get_component_index_from_default_initializer_index(struct hlsl_type *type, unsigned int index) +{ + unsigned int element_comp_count, element, x, y, i; + unsigned int base = 0; + + switch (type->class) + { + case HLSL_CLASS_MATRIX: + x = index / type->dimy; + y = index % type->dimy; + return y * type->dimx + x; + + case HLSL_CLASS_ARRAY: + element_comp_count = hlsl_type_component_count(type->e.array.type); + element = index / element_comp_count; + base = element * element_comp_count; + return base + get_component_index_from_default_initializer_index(type->e.array.type, index - base); + + case HLSL_CLASS_STRUCT: + for (i = 0; i < type->e.record.field_count; ++i) + { + struct hlsl_type *field_type = type->e.record.fields[i].type; + + element_comp_count = hlsl_type_component_count(field_type); + if (index - base < element_comp_count) + return base + get_component_index_from_default_initializer_index(field_type, index - base); + base += element_comp_count; + } + break; + + default: + return index; + } + vkd3d_unreachable(); +} + static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) { uint32_t binding_desc_size = (hlsl_version_ge(ctx, 5, 1) ? 10 : 8) * sizeof(uint32_t); @@ -3471,7 +3798,7 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) { unsigned int dimx = resource->component_type->e.resource.format->dimx; - put_u32(&buffer, sm4_resource_format(resource->component_type)); + put_u32(&buffer, sm4_data_type(resource->component_type)); put_u32(&buffer, sm4_rdef_resource_dimension(resource->component_type)); put_u32(&buffer, ~0u); /* FIXME: multisample count */ flags |= (dimx - 1) << VKD3D_SM4_SIF_TEXTURE_COMPONENTS_SHIFT; @@ -3552,7 +3879,7 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) { uint32_t flags = 0; - if (var->last_read) + if (var->is_read) flags |= D3D_SVF_USED; put_u32(&buffer, 0); /* name */ @@ -3598,7 +3925,7 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) for (k = 0; k < comp_count; ++k) { struct hlsl_type *comp_type = hlsl_type_get_component_type(ctx, var->data_type, k); - unsigned int comp_offset; + unsigned int comp_offset, comp_index; enum hlsl_regset regset; if (comp_type->class == HLSL_CLASS_STRING) @@ -3608,7 +3935,8 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) continue; } - comp_offset = hlsl_type_get_component_offset(ctx, var->data_type, k, ®set); + comp_index = get_component_index_from_default_initializer_index(var->data_type, k); + comp_offset = hlsl_type_get_component_offset(ctx, var->data_type, comp_index, ®set); if (regset == HLSL_REGSET_NUMERIC) { if (comp_type->e.numeric.type == HLSL_TYPE_DOUBLE) @@ -3779,11 +4107,13 @@ static void sm4_numeric_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_s *writemask = hlsl_combine_writemasks(var->regs[HLSL_REGSET_NUMERIC].writemask, *writemask); } -static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_register *reg, +static void sm4_register_from_deref(const struct tpf_compiler *tpf, struct vkd3d_shader_register *reg, uint32_t *writemask, const struct hlsl_deref *deref, struct sm4_instruction *sm4_instr) { - const struct hlsl_type *data_type = hlsl_deref_get_type(ctx, deref); + const struct vkd3d_shader_version *version = &tpf->program->shader_version; + const struct hlsl_type *data_type = hlsl_deref_get_type(tpf->ctx, deref); const struct hlsl_ir_var *var = deref->var; + struct hlsl_ctx *ctx = tpf->ctx; if (var->is_uniform) { @@ -3793,7 +4123,7 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_re { reg->type = VKD3DSPR_RESOURCE; reg->dimension = VSIR_DIMENSION_VEC4; - if (hlsl_version_ge(ctx, 5, 1)) + if (vkd3d_shader_ver_ge(version, 5, 1)) { reg->idx[0].offset = var->regs[HLSL_REGSET_TEXTURES].id; reg->idx[1].offset = var->regs[HLSL_REGSET_TEXTURES].index; /* FIXME: array index */ @@ -3812,7 +4142,7 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_re { reg->type = VKD3DSPR_UAV; reg->dimension = VSIR_DIMENSION_VEC4; - if (hlsl_version_ge(ctx, 5, 1)) + if (vkd3d_shader_ver_ge(version, 5, 1)) { reg->idx[0].offset = var->regs[HLSL_REGSET_UAVS].id; reg->idx[1].offset = var->regs[HLSL_REGSET_UAVS].index; /* FIXME: array index */ @@ -3831,7 +4161,7 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_re { reg->type = VKD3DSPR_SAMPLER; reg->dimension = VSIR_DIMENSION_NONE; - if (hlsl_version_ge(ctx, 5, 1)) + if (vkd3d_shader_ver_ge(version, 5, 1)) { reg->idx[0].offset = var->regs[HLSL_REGSET_SAMPLERS].id; reg->idx[1].offset = var->regs[HLSL_REGSET_SAMPLERS].index; /* FIXME: array index */ @@ -3853,7 +4183,7 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_re VKD3D_ASSERT(data_type->class <= HLSL_CLASS_VECTOR); reg->type = VKD3DSPR_CONSTBUFFER; reg->dimension = VSIR_DIMENSION_VEC4; - if (hlsl_version_ge(ctx, 5, 1)) + if (vkd3d_shader_ver_ge(version, 5, 1)) { reg->idx[0].offset = var->buffer->reg.id; reg->idx[1].offset = var->buffer->reg.index; /* FIXME: array index */ @@ -3873,7 +4203,7 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_re { bool has_idx; - if (hlsl_sm4_register_from_semantic(ctx, &var->semantic, false, ®->type, &has_idx)) + if (sm4_register_from_semantic_name(version, var->semantic.name, false, ®->type, &has_idx)) { unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref); @@ -3883,7 +4213,10 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_re reg->idx_count = 1; } - reg->dimension = VSIR_DIMENSION_VEC4; + if (shader_sm4_is_scalar_register(reg)) + reg->dimension = VSIR_DIMENSION_SCALAR; + else + reg->dimension = VSIR_DIMENSION_VEC4; *writemask = ((1u << data_type->dimx) - 1) << (offset % 4); } else @@ -3902,7 +4235,7 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_re { bool has_idx; - if (hlsl_sm4_register_from_semantic(ctx, &var->semantic, true, ®->type, &has_idx)) + if (sm4_register_from_semantic_name(version, var->semantic.name, true, ®->type, &has_idx)) { unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref); @@ -3912,7 +4245,7 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_re reg->idx_count = 1; } - if (reg->type == VKD3DSPR_DEPTHOUT) + if (shader_sm4_is_scalar_register(reg)) reg->dimension = VSIR_DIMENSION_SCALAR; else reg->dimension = VSIR_DIMENSION_VEC4; @@ -3938,13 +4271,13 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_re } } -static void sm4_src_from_deref(const struct tpf_writer *tpf, struct vkd3d_shader_src_param *src, +static void sm4_src_from_deref(const struct tpf_compiler *tpf, struct vkd3d_shader_src_param *src, const struct hlsl_deref *deref, unsigned int map_writemask, struct sm4_instruction *sm4_instr) { unsigned int hlsl_swizzle; uint32_t writemask; - sm4_register_from_deref(tpf->ctx, &src->reg, &writemask, deref, sm4_instr); + sm4_register_from_deref(tpf, &src->reg, &writemask, deref, sm4_instr); if (vkd3d_sm4_get_default_swizzle_type(&tpf->lookup, src->reg.type) == VKD3D_SM4_SWIZZLE_VEC4) { hlsl_swizzle = hlsl_map_swizzle(hlsl_swizzle_from_writemask(writemask), map_writemask); @@ -3982,7 +4315,7 @@ static void sm4_src_from_constant_value(struct vkd3d_shader_src_param *src, } } -static void sm4_src_from_node(const struct tpf_writer *tpf, struct vkd3d_shader_src_param *src, +static void sm4_src_from_node(const struct tpf_compiler *tpf, struct vkd3d_shader_src_param *src, const struct hlsl_ir_node *instr, uint32_t map_writemask) { unsigned int hlsl_swizzle; @@ -4018,7 +4351,7 @@ static unsigned int sm4_get_index_addressing_from_reg(const struct vkd3d_shader_ return 0; } -static uint32_t sm4_encode_register(const struct tpf_writer *tpf, const struct vkd3d_shader_register *reg, +static uint32_t sm4_encode_register(const struct tpf_compiler *tpf, const struct vkd3d_shader_register *reg, enum vkd3d_sm4_swizzle_type sm4_swizzle_type, uint32_t sm4_swizzle) { const struct vkd3d_sm4_register_type_info *register_type_info; @@ -4078,7 +4411,7 @@ static uint32_t sm4_encode_register(const struct tpf_writer *tpf, const struct v return token; } -static void sm4_write_register_index(const struct tpf_writer *tpf, const struct vkd3d_shader_register *reg, +static void sm4_write_register_index(const struct tpf_compiler *tpf, const struct vkd3d_shader_register *reg, unsigned int j) { unsigned int addressing = sm4_get_index_addressing_from_reg(reg, j); @@ -4108,7 +4441,7 @@ static void sm4_write_register_index(const struct tpf_writer *tpf, const struct } } -static void sm4_write_dst_register(const struct tpf_writer *tpf, const struct vkd3d_shader_dst_param *dst) +static void sm4_write_dst_register(const struct tpf_compiler *tpf, const struct vkd3d_shader_dst_param *dst) { struct vkd3d_bytecode_buffer *buffer = tpf->buffer; uint32_t token = 0; @@ -4121,7 +4454,7 @@ static void sm4_write_dst_register(const struct tpf_writer *tpf, const struct vk sm4_write_register_index(tpf, &dst->reg, j); } -static void sm4_write_src_register(const struct tpf_writer *tpf, const struct vkd3d_shader_src_param *src) +static void sm4_write_src_register(const struct tpf_compiler *tpf, const struct vkd3d_shader_src_param *src) { struct vkd3d_bytecode_buffer *buffer = tpf->buffer; uint32_t token = 0, mod_token = 0; @@ -4182,10 +4515,55 @@ static void sm4_write_src_register(const struct tpf_writer *tpf, const struct vk } } -static void write_sm4_instruction(const struct tpf_writer *tpf, const struct sm4_instruction *instr) +static void sm4_update_stat_counters(const struct tpf_compiler *tpf, const struct sm4_instruction *instr) +{ + enum vkd3d_shader_type shader_type = tpf->program->shader_version.type; + enum vkd3d_sm4_stat_field stat_field; + uint32_t opcode; + + ++tpf->stat->fields[VKD3D_STAT_INSTR_COUNT]; + + opcode = instr->opcode & VKD3D_SM4_OPCODE_MASK; + stat_field = get_stat_field_from_sm4_opcode(&tpf->lookup, opcode); + + switch (opcode) + { + case VKD3D_SM4_OP_DCL_TEMPS: + tpf->stat->fields[stat_field] = max(tpf->stat->fields[stat_field], instr->idx[0]); + break; + case VKD3D_SM4_OP_DCL_OUTPUT_TOPOLOGY: + case VKD3D_SM4_OP_DCL_INPUT_PRIMITIVE: + tpf->stat->fields[stat_field] = (instr->opcode & VKD3D_SM4_PRIMITIVE_TYPE_MASK) + >> VKD3D_SM4_PRIMITIVE_TYPE_SHIFT; + break; + case VKD3D_SM4_OP_DCL_VERTICES_OUT: + case VKD3D_SM5_OP_DCL_GS_INSTANCES: + tpf->stat->fields[stat_field] = instr->idx[0]; + break; + case VKD3D_SM5_OP_DCL_TESSELLATOR_DOMAIN: + case VKD3D_SM5_OP_DCL_TESSELLATOR_PARTITIONING: + case VKD3D_SM5_OP_DCL_TESSELLATOR_OUTPUT_PRIMITIVE: + tpf->stat->fields[stat_field] = (instr->opcode & VKD3D_SM5_TESSELLATOR_MASK) >> VKD3D_SM5_TESSELLATOR_SHIFT; + break; + case VKD3D_SM5_OP_DCL_INPUT_CONTROL_POINT_COUNT: + case VKD3D_SM5_OP_DCL_OUTPUT_CONTROL_POINT_COUNT: + if ((shader_type == VKD3D_SHADER_TYPE_HULL && opcode == VKD3D_SM5_OP_DCL_OUTPUT_CONTROL_POINT_COUNT) + || (shader_type == VKD3D_SHADER_TYPE_DOMAIN + && opcode == VKD3D_SM5_OP_DCL_INPUT_CONTROL_POINT_COUNT)) + { + tpf->stat->fields[stat_field] = (instr->opcode & VKD3D_SM5_CONTROL_POINT_COUNT_MASK) + >> VKD3D_SM5_CONTROL_POINT_COUNT_SHIFT; + } + break; + default: + ++tpf->stat->fields[stat_field]; + } +} + +static void write_sm4_instruction(const struct tpf_compiler *tpf, const struct sm4_instruction *instr) { - struct vkd3d_bytecode_buffer *buffer = tpf->buffer; uint32_t token = instr->opcode | instr->extra_bits; + struct vkd3d_bytecode_buffer *buffer = tpf->buffer; unsigned int size, i, j; size_t token_position; @@ -4218,6 +4596,8 @@ static void write_sm4_instruction(const struct tpf_writer *tpf, const struct sm4 size = (bytecode_get_size(buffer) - token_position) / sizeof(uint32_t); token |= (size << VKD3D_SM4_INSTRUCTION_LENGTH_SHIFT); set_u32(buffer, token_position, token); + + sm4_update_stat_counters(tpf, instr); } static bool encode_texel_offset_as_aoffimmi(struct sm4_instruction *instr, @@ -4247,7 +4627,7 @@ static bool encode_texel_offset_as_aoffimmi(struct sm4_instruction *instr, return true; } -static void write_sm4_dcl_constant_buffer(const struct tpf_writer *tpf, const struct hlsl_buffer *cbuffer) +static void write_sm4_dcl_constant_buffer(const struct tpf_compiler *tpf, const struct hlsl_buffer *cbuffer) { size_t size = (cbuffer->used_size + 3) / 4; @@ -4282,7 +4662,7 @@ static void write_sm4_dcl_constant_buffer(const struct tpf_writer *tpf, const st write_sm4_instruction(tpf, &instr); } -static void write_sm4_dcl_samplers(const struct tpf_writer *tpf, const struct extern_resource *resource) +static void write_sm4_dcl_samplers(const struct tpf_compiler *tpf, const struct extern_resource *resource) { unsigned int i; struct sm4_instruction instr = @@ -4323,9 +4703,10 @@ static void write_sm4_dcl_samplers(const struct tpf_writer *tpf, const struct ex } } -static void write_sm4_dcl_textures(const struct tpf_writer *tpf, const struct extern_resource *resource, +static void write_sm4_dcl_textures(const struct tpf_compiler *tpf, const struct extern_resource *resource, bool uav) { + const struct vkd3d_shader_version *version = &tpf->program->shader_version; enum hlsl_regset regset = uav ? HLSL_REGSET_UAVS : HLSL_REGSET_TEXTURES; struct hlsl_type *component_type; struct sm4_instruction instr; @@ -4348,21 +4729,21 @@ static void write_sm4_dcl_textures(const struct tpf_writer *tpf, const struct ex .dsts[0].reg.idx_count = 1, .dst_count = 1, - .idx[0] = sm4_resource_format(component_type) * 0x1111, + .idx[0] = sm4_data_type(component_type) * 0x1111, .idx_count = 1, }; multisampled = component_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS || component_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY; - if (hlsl_version_lt(tpf->ctx, 4, 1) && multisampled && !component_type->sample_count) + if (!vkd3d_shader_ver_ge(version, 4, 1) && multisampled && !component_type->sample_count) { hlsl_error(tpf->ctx, &resource->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, - "Multisampled texture object declaration needs sample count for profile %s.", - tpf->ctx->profile->name); + "Multisampled texture object declaration needs sample count for profile %u.%u.", + version->major, version->minor); } - if (hlsl_version_ge(tpf->ctx, 5, 1)) + if (vkd3d_shader_ver_ge(version, 5, 1)) { VKD3D_ASSERT(!i); instr.dsts[0].reg.idx[0].offset = resource->id; @@ -4408,11 +4789,12 @@ static void write_sm4_dcl_textures(const struct tpf_writer *tpf, const struct ex } } -static void write_sm4_dcl_semantic(const struct tpf_writer *tpf, const struct hlsl_ir_var *var) +static void tpf_write_dcl_semantic(const struct tpf_compiler *tpf, + const struct hlsl_ir_var *var, bool is_patch_constant_func) { - const struct hlsl_profile_info *profile = tpf->ctx->profile; + const struct vkd3d_shader_version *version = &tpf->program->shader_version; const bool output = var->is_output_semantic; - D3D_NAME usage; + enum vkd3d_shader_sysval_semantic semantic; bool has_idx; struct sm4_instruction instr = @@ -4421,7 +4803,7 @@ static void write_sm4_dcl_semantic(const struct tpf_writer *tpf, const struct hl .dst_count = 1, }; - if (hlsl_sm4_register_from_semantic(tpf->ctx, &var->semantic, output, &instr.dsts[0].reg.type, &has_idx)) + if (sm4_register_from_semantic_name(version, var->semantic.name, output, &instr.dsts[0].reg.type, &has_idx)) { if (has_idx) { @@ -4442,36 +4824,39 @@ static void write_sm4_dcl_semantic(const struct tpf_writer *tpf, const struct hl instr.dsts[0].write_mask = var->regs[HLSL_REGSET_NUMERIC].writemask; } - if (instr.dsts[0].reg.type == VKD3DSPR_DEPTHOUT) + if (shader_sm4_is_scalar_register(&instr.dsts[0].reg)) instr.dsts[0].reg.dimension = VSIR_DIMENSION_SCALAR; - hlsl_sm4_usage_from_semantic(tpf->ctx, &var->semantic, output, &usage); - if (usage == ~0u) - usage = D3D_NAME_UNDEFINED; + sm4_sysval_semantic_from_semantic_name(&semantic, version, tpf->ctx->semantic_compat_mapping, + tpf->ctx->domain, var->semantic.name, var->semantic.index, output, is_patch_constant_func); + if (semantic == ~0u) + semantic = VKD3D_SHADER_SV_NONE; if (var->is_input_semantic) { - switch (usage) + switch (semantic) { - case D3D_NAME_UNDEFINED: - instr.opcode = (profile->type == VKD3D_SHADER_TYPE_PIXEL) + case VKD3D_SHADER_SV_NONE: + instr.opcode = (version->type == VKD3D_SHADER_TYPE_PIXEL) ? VKD3D_SM4_OP_DCL_INPUT_PS : VKD3D_SM4_OP_DCL_INPUT; break; - case D3D_NAME_INSTANCE_ID: - case D3D_NAME_PRIMITIVE_ID: - case D3D_NAME_VERTEX_ID: - instr.opcode = (profile->type == VKD3D_SHADER_TYPE_PIXEL) + case VKD3D_SHADER_SV_INSTANCE_ID: + case VKD3D_SHADER_SV_IS_FRONT_FACE: + case VKD3D_SHADER_SV_PRIMITIVE_ID: + case VKD3D_SHADER_SV_SAMPLE_INDEX: + case VKD3D_SHADER_SV_VERTEX_ID: + instr.opcode = (version->type == VKD3D_SHADER_TYPE_PIXEL) ? VKD3D_SM4_OP_DCL_INPUT_PS_SGV : VKD3D_SM4_OP_DCL_INPUT_SGV; break; default: - instr.opcode = (profile->type == VKD3D_SHADER_TYPE_PIXEL) + instr.opcode = (version->type == VKD3D_SHADER_TYPE_PIXEL) ? VKD3D_SM4_OP_DCL_INPUT_PS_SIV : VKD3D_SM4_OP_DCL_INPUT_SIV; break; } - if (profile->type == VKD3D_SHADER_TYPE_PIXEL) + if (version->type == VKD3D_SHADER_TYPE_PIXEL) { enum vkd3d_shader_interpolation_mode mode = VKD3DSIM_LINEAR; @@ -4510,32 +4895,32 @@ static void write_sm4_dcl_semantic(const struct tpf_writer *tpf, const struct hl } else { - if (usage == D3D_NAME_UNDEFINED || profile->type == VKD3D_SHADER_TYPE_PIXEL) + if (semantic == VKD3D_SHADER_SV_NONE || version->type == VKD3D_SHADER_TYPE_PIXEL) instr.opcode = VKD3D_SM4_OP_DCL_OUTPUT; else instr.opcode = VKD3D_SM4_OP_DCL_OUTPUT_SIV; } - switch (usage) + if (instr.opcode == VKD3D_SM4_OP_DCL_OUTPUT) { - case D3D_NAME_COVERAGE: - case D3D_NAME_DEPTH: - case D3D_NAME_DEPTH_GREATER_EQUAL: - case D3D_NAME_DEPTH_LESS_EQUAL: - case D3D_NAME_TARGET: - case D3D_NAME_UNDEFINED: - break; - - default: - instr.idx_count = 1; - instr.idx[0] = usage; - break; + VKD3D_ASSERT(semantic == VKD3D_SHADER_SV_NONE || semantic == VKD3D_SHADER_SV_TARGET + || instr.dsts[0].reg.type != VKD3DSPR_OUTPUT); + } + else if (instr.opcode == VKD3D_SM4_OP_DCL_INPUT || instr.opcode == VKD3D_SM4_OP_DCL_INPUT_PS) + { + VKD3D_ASSERT(semantic == VKD3D_SHADER_SV_NONE); + } + else + { + VKD3D_ASSERT(semantic != VKD3D_SHADER_SV_NONE); + instr.idx_count = 1; + instr.idx[0] = vkd3d_siv_from_sysval_indexed(semantic, var->semantic.index); } write_sm4_instruction(tpf, &instr); } -static void write_sm4_dcl_temps(const struct tpf_writer *tpf, uint32_t temp_count) +static void write_sm4_dcl_temps(const struct tpf_compiler *tpf, uint32_t temp_count) { struct sm4_instruction instr = { @@ -4548,7 +4933,7 @@ static void write_sm4_dcl_temps(const struct tpf_writer *tpf, uint32_t temp_coun write_sm4_instruction(tpf, &instr); } -static void write_sm4_dcl_indexable_temp(const struct tpf_writer *tpf, uint32_t idx, +static void write_sm4_dcl_indexable_temp(const struct tpf_compiler *tpf, uint32_t idx, uint32_t size, uint32_t comp_count) { struct sm4_instruction instr = @@ -4562,7 +4947,7 @@ static void write_sm4_dcl_indexable_temp(const struct tpf_writer *tpf, uint32_t write_sm4_instruction(tpf, &instr); } -static void write_sm4_dcl_thread_group(const struct tpf_writer *tpf, const uint32_t thread_count[3]) +static void write_sm4_dcl_thread_group(const struct tpf_compiler *tpf, const uint32_t thread_count[3]) { struct sm4_instruction instr = { @@ -4577,7 +4962,105 @@ static void write_sm4_dcl_thread_group(const struct tpf_writer *tpf, const uint3 write_sm4_instruction(tpf, &instr); } -static void write_sm4_ret(const struct tpf_writer *tpf) +static void write_sm4_dcl_global_flags(const struct tpf_compiler *tpf, uint32_t flags) +{ + struct sm4_instruction instr = + { + .opcode = VKD3D_SM4_OP_DCL_GLOBAL_FLAGS, + .extra_bits = flags << VKD3D_SM4_GLOBAL_FLAGS_SHIFT, + }; + + write_sm4_instruction(tpf, &instr); +} + +static void tpf_write_hs_decls(const struct tpf_compiler *tpf) +{ + struct sm4_instruction instr = + { + .opcode = VKD3D_SM5_OP_HS_DECLS, + }; + + write_sm4_instruction(tpf, &instr); +} + +static void tpf_write_hs_control_point_phase(const struct tpf_compiler *tpf) +{ + struct sm4_instruction instr = + { + .opcode = VKD3D_SM5_OP_HS_CONTROL_POINT_PHASE, + }; + + write_sm4_instruction(tpf, &instr); +} + +static void tpf_write_hs_fork_phase(const struct tpf_compiler *tpf) +{ + struct sm4_instruction instr = + { + .opcode = VKD3D_SM5_OP_HS_FORK_PHASE, + }; + + write_sm4_instruction(tpf, &instr); +} + +static void tpf_write_dcl_input_control_point_count(const struct tpf_compiler *tpf, const uint32_t count) +{ + struct sm4_instruction instr = + { + .opcode = VKD3D_SM5_OP_DCL_INPUT_CONTROL_POINT_COUNT, + .extra_bits = count << VKD3D_SM5_CONTROL_POINT_COUNT_SHIFT, + }; + + write_sm4_instruction(tpf, &instr); +} + +static void tpf_write_dcl_output_control_point_count(const struct tpf_compiler *tpf, const uint32_t count) +{ + struct sm4_instruction instr = + { + .opcode = VKD3D_SM5_OP_DCL_OUTPUT_CONTROL_POINT_COUNT, + .extra_bits = count << VKD3D_SM5_CONTROL_POINT_COUNT_SHIFT, + }; + + write_sm4_instruction(tpf, &instr); +} + +static void tpf_write_dcl_tessellator_domain(const struct tpf_compiler *tpf, enum vkd3d_tessellator_domain domain) +{ + struct sm4_instruction instr = + { + .opcode = VKD3D_SM5_OP_DCL_TESSELLATOR_DOMAIN, + .extra_bits = domain << VKD3D_SM5_TESSELLATOR_SHIFT, + }; + + write_sm4_instruction(tpf, &instr); +} + +static void tpf_write_dcl_tessellator_partitioning(const struct tpf_compiler *tpf, + enum vkd3d_shader_tessellator_partitioning partitioning) +{ + struct sm4_instruction instr = + { + .opcode = VKD3D_SM5_OP_DCL_TESSELLATOR_PARTITIONING, + .extra_bits = partitioning << VKD3D_SM5_TESSELLATOR_SHIFT, + }; + + write_sm4_instruction(tpf, &instr); +} + +static void tpf_write_dcl_tessellator_output_primitive(const struct tpf_compiler *tpf, + enum vkd3d_shader_tessellator_output_primitive output_primitive) +{ + struct sm4_instruction instr = + { + .opcode = VKD3D_SM5_OP_DCL_TESSELLATOR_OUTPUT_PRIMITIVE, + .extra_bits = output_primitive << VKD3D_SM5_TESSELLATOR_SHIFT, + }; + + write_sm4_instruction(tpf, &instr); +} + +static void write_sm4_ret(const struct tpf_compiler *tpf) { struct sm4_instruction instr = { @@ -4587,7 +5070,7 @@ static void write_sm4_ret(const struct tpf_writer *tpf) write_sm4_instruction(tpf, &instr); } -static void write_sm4_unary_op(const struct tpf_writer *tpf, enum vkd3d_sm4_opcode opcode, +static void write_sm4_unary_op(const struct tpf_compiler *tpf, enum vkd3d_sm4_opcode opcode, const struct hlsl_ir_node *dst, const struct hlsl_ir_node *src, enum vkd3d_shader_src_modifier src_mod) { struct sm4_instruction instr; @@ -4605,7 +5088,7 @@ static void write_sm4_unary_op(const struct tpf_writer *tpf, enum vkd3d_sm4_opco write_sm4_instruction(tpf, &instr); } -static void write_sm4_unary_op_with_two_destinations(const struct tpf_writer *tpf, enum vkd3d_sm4_opcode opcode, +static void write_sm4_unary_op_with_two_destinations(const struct tpf_compiler *tpf, enum vkd3d_sm4_opcode opcode, const struct hlsl_ir_node *dst, unsigned int dst_idx, const struct hlsl_ir_node *src) { struct sm4_instruction instr; @@ -4626,7 +5109,7 @@ static void write_sm4_unary_op_with_two_destinations(const struct tpf_writer *tp write_sm4_instruction(tpf, &instr); } -static void write_sm4_binary_op(const struct tpf_writer *tpf, enum vkd3d_sm4_opcode opcode, +static void write_sm4_binary_op(const struct tpf_compiler *tpf, enum vkd3d_sm4_opcode opcode, const struct hlsl_ir_node *dst, const struct hlsl_ir_node *src1, const struct hlsl_ir_node *src2) { struct sm4_instruction instr; @@ -4645,7 +5128,7 @@ static void write_sm4_binary_op(const struct tpf_writer *tpf, enum vkd3d_sm4_opc } /* dp# instructions don't map the swizzle. */ -static void write_sm4_binary_op_dot(const struct tpf_writer *tpf, enum vkd3d_sm4_opcode opcode, +static void write_sm4_binary_op_dot(const struct tpf_compiler *tpf, enum vkd3d_sm4_opcode opcode, const struct hlsl_ir_node *dst, const struct hlsl_ir_node *src1, const struct hlsl_ir_node *src2) { struct sm4_instruction instr; @@ -4663,7 +5146,7 @@ static void write_sm4_binary_op_dot(const struct tpf_writer *tpf, enum vkd3d_sm4 write_sm4_instruction(tpf, &instr); } -static void write_sm4_binary_op_with_two_destinations(const struct tpf_writer *tpf, +static void write_sm4_binary_op_with_two_destinations(const struct tpf_compiler *tpf, enum vkd3d_sm4_opcode opcode, const struct hlsl_ir_node *dst, unsigned int dst_idx, const struct hlsl_ir_node *src1, const struct hlsl_ir_node *src2) { @@ -4686,7 +5169,7 @@ static void write_sm4_binary_op_with_two_destinations(const struct tpf_writer *t write_sm4_instruction(tpf, &instr); } -static void write_sm4_ternary_op(const struct tpf_writer *tpf, enum vkd3d_sm4_opcode opcode, +static void write_sm4_ternary_op(const struct tpf_compiler *tpf, enum vkd3d_sm4_opcode opcode, const struct hlsl_ir_node *dst, const struct hlsl_ir_node *src1, const struct hlsl_ir_node *src2, const struct hlsl_ir_node *src3) { @@ -4706,7 +5189,7 @@ static void write_sm4_ternary_op(const struct tpf_writer *tpf, enum vkd3d_sm4_op write_sm4_instruction(tpf, &instr); } -static void write_sm4_ld(const struct tpf_writer *tpf, const struct hlsl_ir_node *dst, +static void write_sm4_ld(const struct tpf_compiler *tpf, const struct hlsl_ir_node *dst, const struct hlsl_deref *resource, const struct hlsl_ir_node *coords, const struct hlsl_ir_node *sample_index, const struct hlsl_ir_node *texel_offset, enum hlsl_sampler_dim dim) @@ -4715,6 +5198,7 @@ static void write_sm4_ld(const struct tpf_writer *tpf, const struct hlsl_ir_node bool multisampled = resource_type->class == HLSL_CLASS_TEXTURE && (resource_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS || resource_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY); bool uav = (hlsl_deref_get_regset(tpf->ctx, resource) == HLSL_REGSET_UAVS); + const struct vkd3d_shader_version *version = &tpf->program->shader_version; unsigned int coords_writemask = VKD3DSP_WRITEMASK_ALL; struct sm4_instruction instr; @@ -4769,7 +5253,7 @@ static void write_sm4_ld(const struct tpf_writer *tpf, const struct hlsl_ir_node reg->dimension = VSIR_DIMENSION_SCALAR; reg->u.immconst_u32[0] = index->value.u[0].u; } - else if (tpf->ctx->profile->major_version == 4 && tpf->ctx->profile->minor_version == 0) + else if (version->major == 4 && version->minor == 0) { hlsl_error(tpf->ctx, &sample_index->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Expected literal sample index."); } @@ -4784,7 +5268,7 @@ static void write_sm4_ld(const struct tpf_writer *tpf, const struct hlsl_ir_node write_sm4_instruction(tpf, &instr); } -static void write_sm4_sample(const struct tpf_writer *tpf, const struct hlsl_ir_resource_load *load) +static void write_sm4_sample(const struct tpf_compiler *tpf, const struct hlsl_ir_resource_load *load) { const struct hlsl_ir_node *texel_offset = load->texel_offset.node; const struct hlsl_ir_node *coords = load->coords.node; @@ -4864,7 +5348,7 @@ static void write_sm4_sample(const struct tpf_writer *tpf, const struct hlsl_ir_ write_sm4_instruction(tpf, &instr); } -static void write_sm4_sampleinfo(const struct tpf_writer *tpf, const struct hlsl_ir_resource_load *load) +static void write_sm4_sampleinfo(const struct tpf_compiler *tpf, const struct hlsl_ir_resource_load *load) { const struct hlsl_deref *resource = &load->resource; const struct hlsl_ir_node *dst = &load->node; @@ -4886,7 +5370,7 @@ static void write_sm4_sampleinfo(const struct tpf_writer *tpf, const struct hlsl write_sm4_instruction(tpf, &instr); } -static void write_sm4_resinfo(const struct tpf_writer *tpf, const struct hlsl_ir_resource_load *load) +static void write_sm4_resinfo(const struct tpf_compiler *tpf, const struct hlsl_ir_resource_load *load) { const struct hlsl_deref *resource = &load->resource; const struct hlsl_ir_node *dst = &load->node; @@ -4921,7 +5405,7 @@ static bool type_is_float(const struct hlsl_type *type) return type->e.numeric.type == HLSL_TYPE_FLOAT || type->e.numeric.type == HLSL_TYPE_HALF; } -static void write_sm4_cast_from_bool(const struct tpf_writer *tpf, const struct hlsl_ir_expr *expr, +static void write_sm4_cast_from_bool(const struct tpf_compiler *tpf, const struct hlsl_ir_expr *expr, const struct hlsl_ir_node *arg, uint32_t mask) { struct sm4_instruction instr; @@ -4941,7 +5425,7 @@ static void write_sm4_cast_from_bool(const struct tpf_writer *tpf, const struct write_sm4_instruction(tpf, &instr); } -static void write_sm4_cast(const struct tpf_writer *tpf, const struct hlsl_ir_expr *expr) +static void write_sm4_cast(const struct tpf_compiler *tpf, const struct hlsl_ir_expr *expr) { static const union { @@ -5050,7 +5534,7 @@ static void write_sm4_cast(const struct tpf_writer *tpf, const struct hlsl_ir_ex } } -static void write_sm4_store_uav_typed(const struct tpf_writer *tpf, const struct hlsl_deref *dst, +static void write_sm4_store_uav_typed(const struct tpf_compiler *tpf, const struct hlsl_deref *dst, const struct hlsl_ir_node *coords, const struct hlsl_ir_node *value) { struct sm4_instruction instr; @@ -5058,7 +5542,7 @@ static void write_sm4_store_uav_typed(const struct tpf_writer *tpf, const struct memset(&instr, 0, sizeof(instr)); instr.opcode = VKD3D_SM5_OP_STORE_UAV_TYPED; - sm4_register_from_deref(tpf->ctx, &instr.dsts[0].reg, &instr.dsts[0].write_mask, dst, &instr); + sm4_register_from_deref(tpf, &instr.dsts[0].reg, &instr.dsts[0].write_mask, dst, &instr); instr.dst_count = 1; sm4_src_from_node(tpf, &instr.srcs[0], coords, VKD3DSP_WRITEMASK_ALL); @@ -5068,7 +5552,7 @@ static void write_sm4_store_uav_typed(const struct tpf_writer *tpf, const struct write_sm4_instruction(tpf, &instr); } -static void write_sm4_rasterizer_sample_count(const struct tpf_writer *tpf, const struct hlsl_ir_node *dst) +static void write_sm4_rasterizer_sample_count(const struct tpf_compiler *tpf, const struct hlsl_ir_node *dst) { struct sm4_instruction instr; @@ -5087,8 +5571,9 @@ static void write_sm4_rasterizer_sample_count(const struct tpf_writer *tpf, cons write_sm4_instruction(tpf, &instr); } -static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_expr *expr) +static void write_sm4_expr(const struct tpf_compiler *tpf, const struct hlsl_ir_expr *expr) { + const struct vkd3d_shader_version *version = &tpf->program->shader_version; const struct hlsl_ir_node *arg1 = expr->operands[0].node; const struct hlsl_ir_node *arg2 = expr->operands[1].node; const struct hlsl_ir_node *arg3 = expr->operands[2].node; @@ -5103,7 +5588,7 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex switch (expr->op) { case HLSL_OP0_RASTERIZER_SAMPLE_COUNT: - if (tpf->ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL && hlsl_version_ge(tpf->ctx, 4, 1)) + if (version->type == VKD3D_SHADER_TYPE_PIXEL && vkd3d_shader_ver_ge(version, 4, 1)) write_sm4_rasterizer_sample_count(tpf, &expr->node); else hlsl_error(tpf->ctx, &expr->node.loc, VKD3D_SHADER_ERROR_HLSL_INCOMPATIBLE_PROFILE, @@ -5224,7 +5709,7 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex { case HLSL_TYPE_FLOAT: /* SM5 comes with a RCP opcode */ - if (tpf->ctx->profile->major_version >= 5) + if (vkd3d_shader_ver_ge(version, 5, 0)) { write_sm4_unary_op(tpf, VKD3D_SM5_OP_RCP, &expr->node, arg1, 0); } @@ -5578,6 +6063,23 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex write_sm4_ternary_op(tpf, VKD3D_SM4_OP_MOVC, &expr->node, arg1, arg2, arg3); break; + case HLSL_OP3_MAD: + switch (dst_type->e.numeric.type) + { + case HLSL_TYPE_FLOAT: + write_sm4_ternary_op(tpf, VKD3D_SM4_OP_MAD, &expr->node, arg1, arg2, arg3); + break; + + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: + write_sm4_ternary_op(tpf, VKD3D_SM4_OP_IMAD, &expr->node, arg1, arg2, arg3); + break; + + default: + hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s negation expression.", dst_type_string->buffer); + } + break; + default: hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s expression.", debug_hlsl_expr_op(expr->op)); } @@ -5585,7 +6087,7 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex hlsl_release_string_buffer(tpf->ctx, dst_type_string); } -static void write_sm4_if(const struct tpf_writer *tpf, const struct hlsl_ir_if *iff) +static void write_sm4_if(const struct tpf_compiler *tpf, const struct hlsl_ir_if *iff) { struct sm4_instruction instr = { @@ -5614,7 +6116,7 @@ static void write_sm4_if(const struct tpf_writer *tpf, const struct hlsl_ir_if * write_sm4_instruction(tpf, &instr); } -static void write_sm4_jump(const struct tpf_writer *tpf, const struct hlsl_ir_jump *jump) +static void write_sm4_jump(const struct tpf_compiler *tpf, const struct hlsl_ir_jump *jump) { struct sm4_instruction instr = {0}; @@ -5653,16 +6155,17 @@ static void write_sm4_jump(const struct tpf_writer *tpf, const struct hlsl_ir_ju /* Does this variable's data come directly from the API user, rather than being * temporary or from a previous shader stage? * I.e. is it a uniform or VS input? */ -static bool var_is_user_input(struct hlsl_ctx *ctx, const struct hlsl_ir_var *var) +static bool var_is_user_input(const struct vkd3d_shader_version *version, const struct hlsl_ir_var *var) { if (var->is_uniform) return true; - return var->is_input_semantic && ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX; + return var->is_input_semantic && version->type == VKD3D_SHADER_TYPE_VERTEX; } -static void write_sm4_load(const struct tpf_writer *tpf, const struct hlsl_ir_load *load) +static void write_sm4_load(const struct tpf_compiler *tpf, const struct hlsl_ir_load *load) { + const struct vkd3d_shader_version *version = &tpf->program->shader_version; const struct hlsl_type *type = load->node.data_type; struct sm4_instruction instr; @@ -5672,7 +6175,7 @@ static void write_sm4_load(const struct tpf_writer *tpf, const struct hlsl_ir_lo instr.dst_count = 1; VKD3D_ASSERT(hlsl_is_numeric_type(type)); - if (type->e.numeric.type == HLSL_TYPE_BOOL && var_is_user_input(tpf->ctx, load->src.var)) + if (type->e.numeric.type == HLSL_TYPE_BOOL && var_is_user_input(version, load->src.var)) { struct hlsl_constant_value value; @@ -5700,7 +6203,7 @@ static void write_sm4_load(const struct tpf_writer *tpf, const struct hlsl_ir_lo write_sm4_instruction(tpf, &instr); } -static void write_sm4_loop(const struct tpf_writer *tpf, const struct hlsl_ir_loop *loop) +static void write_sm4_loop(const struct tpf_compiler *tpf, const struct hlsl_ir_loop *loop) { struct sm4_instruction instr = { @@ -5715,10 +6218,11 @@ static void write_sm4_loop(const struct tpf_writer *tpf, const struct hlsl_ir_lo write_sm4_instruction(tpf, &instr); } -static void write_sm4_gather(const struct tpf_writer *tpf, const struct hlsl_ir_node *dst, +static void write_sm4_gather(const struct tpf_compiler *tpf, const struct hlsl_ir_node *dst, const struct hlsl_deref *resource, const struct hlsl_deref *sampler, const struct hlsl_ir_node *coords, uint32_t swizzle, const struct hlsl_ir_node *texel_offset) { + const struct vkd3d_shader_version *version = &tpf->program->shader_version; struct vkd3d_shader_src_param *src; struct sm4_instruction instr; @@ -5735,7 +6239,7 @@ static void write_sm4_gather(const struct tpf_writer *tpf, const struct hlsl_ir_ { if (!encode_texel_offset_as_aoffimmi(&instr, texel_offset)) { - if (tpf->ctx->profile->major_version < 5) + if (!vkd3d_shader_ver_ge(version, 5, 0)) { hlsl_error(tpf->ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, "Offset must resolve to integer literal in the range -8 to 7 for profiles < 5."); @@ -5756,7 +6260,7 @@ static void write_sm4_gather(const struct tpf_writer *tpf, const struct hlsl_ir_ write_sm4_instruction(tpf, &instr); } -static void write_sm4_resource_load(const struct tpf_writer *tpf, const struct hlsl_ir_resource_load *load) +static void write_sm4_resource_load(const struct tpf_compiler *tpf, const struct hlsl_ir_resource_load *load) { const struct hlsl_ir_node *texel_offset = load->texel_offset.node; const struct hlsl_ir_node *sample_index = load->sample_index.node; @@ -5825,7 +6329,7 @@ static void write_sm4_resource_load(const struct tpf_writer *tpf, const struct h } } -static void write_sm4_resource_store(const struct tpf_writer *tpf, const struct hlsl_ir_resource_store *store) +static void write_sm4_resource_store(const struct tpf_compiler *tpf, const struct hlsl_ir_resource_store *store) { struct hlsl_type *resource_type = hlsl_deref_get_type(tpf->ctx, &store->resource); @@ -5844,7 +6348,7 @@ static void write_sm4_resource_store(const struct tpf_writer *tpf, const struct write_sm4_store_uav_typed(tpf, &store->resource, store->coords.node, store->value.node); } -static void write_sm4_store(const struct tpf_writer *tpf, const struct hlsl_ir_store *store) +static void write_sm4_store(const struct tpf_compiler *tpf, const struct hlsl_ir_store *store) { const struct hlsl_ir_node *rhs = store->rhs.node; struct sm4_instruction instr; @@ -5853,7 +6357,7 @@ static void write_sm4_store(const struct tpf_writer *tpf, const struct hlsl_ir_s memset(&instr, 0, sizeof(instr)); instr.opcode = VKD3D_SM4_OP_MOV; - sm4_register_from_deref(tpf->ctx, &instr.dsts[0].reg, &writemask, &store->lhs, &instr); + sm4_register_from_deref(tpf, &instr.dsts[0].reg, &writemask, &store->lhs, &instr); instr.dsts[0].write_mask = hlsl_combine_writemasks(writemask, store->writemask); instr.dst_count = 1; @@ -5863,7 +6367,7 @@ static void write_sm4_store(const struct tpf_writer *tpf, const struct hlsl_ir_s write_sm4_instruction(tpf, &instr); } -static void write_sm4_switch(const struct tpf_writer *tpf, const struct hlsl_ir_switch *s) +static void write_sm4_switch(const struct tpf_compiler *tpf, const struct hlsl_ir_switch *s) { const struct hlsl_ir_node *selector = s->selector.node; struct hlsl_ir_switch_case *c; @@ -5903,7 +6407,7 @@ static void write_sm4_switch(const struct tpf_writer *tpf, const struct hlsl_ir_ write_sm4_instruction(tpf, &instr); } -static void write_sm4_swizzle(const struct tpf_writer *tpf, const struct hlsl_ir_swizzle *swizzle) +static void write_sm4_swizzle(const struct tpf_compiler *tpf, const struct hlsl_ir_swizzle *swizzle) { unsigned int hlsl_swizzle; struct sm4_instruction instr; @@ -5924,7 +6428,7 @@ static void write_sm4_swizzle(const struct tpf_writer *tpf, const struct hlsl_ir write_sm4_instruction(tpf, &instr); } -static void write_sm4_block(const struct tpf_writer *tpf, const struct hlsl_block *block) +static void write_sm4_block(const struct tpf_compiler *tpf, const struct hlsl_block *block) { const struct hlsl_ir_node *instr; @@ -5998,18 +6502,65 @@ static void write_sm4_block(const struct tpf_writer *tpf, const struct hlsl_bloc } } -static void write_sm4_shdr(struct hlsl_ctx *ctx, - const struct hlsl_ir_function_decl *entry_func, struct dxbc_writer *dxbc) +static void tpf_write_shader_function(struct tpf_compiler *tpf, struct hlsl_ir_function_decl *func) { - const struct hlsl_profile_info *profile = ctx->profile; + struct hlsl_ctx *ctx = tpf->ctx; + const struct hlsl_scope *scope; + const struct hlsl_ir_var *var; + uint32_t temp_count; + + compute_liveness(ctx, func); + mark_indexable_vars(ctx, func); + temp_count = allocate_temp_registers(ctx, func); + if (ctx->result) + return; + + LIST_FOR_EACH_ENTRY(var, &func->extern_vars, struct hlsl_ir_var, extern_entry) + { + if ((var->is_input_semantic && var->last_read) + || (var->is_output_semantic && var->first_write)) + tpf_write_dcl_semantic(tpf, var, func == ctx->patch_constant_func); + } + + if (tpf->program->shader_version.type == VKD3D_SHADER_TYPE_COMPUTE) + write_sm4_dcl_thread_group(tpf, ctx->thread_count); + + if (temp_count) + write_sm4_dcl_temps(tpf, temp_count); + + LIST_FOR_EACH_ENTRY(scope, &ctx->scopes, struct hlsl_scope, entry) + { + LIST_FOR_EACH_ENTRY(var, &scope->vars, struct hlsl_ir_var, scope_entry) + { + if (var->is_uniform || var->is_input_semantic || var->is_output_semantic) + continue; + if (!var->regs[HLSL_REGSET_NUMERIC].allocated) + continue; + + if (var->indexable) + { + unsigned int id = var->regs[HLSL_REGSET_NUMERIC].id; + unsigned int size = align(var->data_type->reg_size[HLSL_REGSET_NUMERIC], 4) / 4; + + write_sm4_dcl_indexable_temp(tpf, id, size, 4); + } + } + } + + write_sm4_block(tpf, &func->body); + + write_sm4_ret(tpf); +} + +static void tpf_write_shdr(struct tpf_compiler *tpf, struct hlsl_ir_function_decl *entry_func) +{ + const struct vkd3d_shader_version *version = &tpf->program->shader_version; struct vkd3d_bytecode_buffer buffer = {0}; struct extern_resource *extern_resources; unsigned int extern_resources_count, i; const struct hlsl_buffer *cbuffer; - const struct hlsl_scope *scope; - const struct hlsl_ir_var *var; + struct hlsl_ctx *ctx = tpf->ctx; size_t token_count_position; - struct tpf_writer tpf; static const uint16_t shader_types[VKD3D_SHADER_TYPE_COUNT] = { @@ -6024,17 +6575,28 @@ static void write_sm4_shdr(struct hlsl_ctx *ctx, VKD3D_SM4_LIB, }; - tpf_writer_init(&tpf, ctx, &buffer); + tpf->buffer = &buffer; extern_resources = sm4_get_extern_resources(ctx, &extern_resources_count); - put_u32(&buffer, vkd3d_make_u32((profile->major_version << 4) | profile->minor_version, shader_types[profile->type])); + put_u32(&buffer, vkd3d_make_u32((version->major << 4) | version->minor, shader_types[version->type])); token_count_position = put_u32(&buffer, 0); + if (version->type == VKD3D_SHADER_TYPE_HULL) + { + tpf_write_hs_decls(tpf); + + tpf_write_dcl_input_control_point_count(tpf, 1); /* TODO: Obtain from InputPatch */ + tpf_write_dcl_output_control_point_count(tpf, ctx->output_control_point_count); + tpf_write_dcl_tessellator_domain(tpf, ctx->domain); + tpf_write_dcl_tessellator_partitioning(tpf, ctx->partitioning); + tpf_write_dcl_tessellator_output_primitive(tpf, ctx->output_primitive); + } + LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) { if (cbuffer->reg.allocated) - write_sm4_dcl_constant_buffer(&tpf, cbuffer); + write_sm4_dcl_constant_buffer(tpf, cbuffer); } for (i = 0; i < extern_resources_count; ++i) @@ -6042,59 +6604,40 @@ static void write_sm4_shdr(struct hlsl_ctx *ctx, const struct extern_resource *resource = &extern_resources[i]; if (resource->regset == HLSL_REGSET_SAMPLERS) - write_sm4_dcl_samplers(&tpf, resource); + write_sm4_dcl_samplers(tpf, resource); else if (resource->regset == HLSL_REGSET_TEXTURES) - write_sm4_dcl_textures(&tpf, resource, false); + write_sm4_dcl_textures(tpf, resource, false); else if (resource->regset == HLSL_REGSET_UAVS) - write_sm4_dcl_textures(&tpf, resource, true); + write_sm4_dcl_textures(tpf, resource, true); } - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) - { - if ((var->is_input_semantic && var->last_read) || (var->is_output_semantic && var->first_write)) - write_sm4_dcl_semantic(&tpf, var); - } + if (entry_func->early_depth_test && vkd3d_shader_ver_ge(version, 5, 0)) + write_sm4_dcl_global_flags(tpf, VKD3DSGF_FORCE_EARLY_DEPTH_STENCIL); - if (profile->type == VKD3D_SHADER_TYPE_COMPUTE) - write_sm4_dcl_thread_group(&tpf, ctx->thread_count); + if (version->type == VKD3D_SHADER_TYPE_HULL) + tpf_write_hs_control_point_phase(tpf); - if (ctx->temp_count) - write_sm4_dcl_temps(&tpf, ctx->temp_count); + tpf_write_shader_function(tpf, entry_func); - LIST_FOR_EACH_ENTRY(scope, &ctx->scopes, struct hlsl_scope, entry) + if (version->type == VKD3D_SHADER_TYPE_HULL) { - LIST_FOR_EACH_ENTRY(var, &scope->vars, struct hlsl_ir_var, scope_entry) - { - if (var->is_uniform || var->is_input_semantic || var->is_output_semantic) - continue; - if (!var->regs[HLSL_REGSET_NUMERIC].allocated) - continue; - - if (var->indexable) - { - unsigned int id = var->regs[HLSL_REGSET_NUMERIC].id; - unsigned int size = align(var->data_type->reg_size[HLSL_REGSET_NUMERIC], 4) / 4; - - write_sm4_dcl_indexable_temp(&tpf, id, size, 4); - } - } + tpf_write_hs_fork_phase(tpf); + tpf_write_shader_function(tpf, ctx->patch_constant_func); } - write_sm4_block(&tpf, &entry_func->body); - - write_sm4_ret(&tpf); - set_u32(&buffer, token_count_position, bytecode_get_size(&buffer) / sizeof(uint32_t)); - add_section(ctx, dxbc, TAG_SHDR, &buffer); + add_section(ctx, &tpf->dxbc, TAG_SHDR, &buffer); + tpf->buffer = NULL; sm4_free_extern_resources(extern_resources, extern_resources_count); } -static void write_sm4_sfi0(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) +static void tpf_write_sfi0(struct tpf_compiler *tpf) { struct extern_resource *extern_resources; unsigned int extern_resources_count; + struct hlsl_ctx *ctx = tpf->ctx; uint64_t *flags; flags = vkd3d_calloc(1, sizeof(*flags)); @@ -6110,29 +6653,94 @@ static void write_sm4_sfi0(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) /* FIXME: We also emit code that should require UAVS_AT_EVERY_STAGE, * STENCIL_REF, and TYPED_UAV_LOAD_ADDITIONAL_FORMATS. */ - if (flags) - dxbc_writer_add_section(dxbc, TAG_SFI0, flags, sizeof(*flags)); + if (*flags) + dxbc_writer_add_section(&tpf->dxbc, TAG_SFI0, flags, sizeof(*flags)); else vkd3d_free(flags); } -int hlsl_sm4_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, struct vkd3d_shader_code *out) +static void tpf_write_stat(struct tpf_compiler *tpf) { - struct dxbc_writer dxbc; + struct vkd3d_bytecode_buffer buffer = {0}; + const struct sm4_stat *stat = tpf->stat; + struct hlsl_ctx *ctx = tpf->ctx; + + put_u32(&buffer, stat->fields[VKD3D_STAT_INSTR_COUNT]); + put_u32(&buffer, stat->fields[VKD3D_STAT_TEMPS]); + put_u32(&buffer, 0); /* Def count */ + put_u32(&buffer, 0); /* DCL count */ + put_u32(&buffer, stat->fields[VKD3D_STAT_FLOAT]); + put_u32(&buffer, stat->fields[VKD3D_STAT_INT]); + put_u32(&buffer, stat->fields[VKD3D_STAT_UINT]); + put_u32(&buffer, 0); /* Static flow control count */ + put_u32(&buffer, 0); /* Dynamic flow control count */ + put_u32(&buffer, 0); /* Macro instruction count */ + put_u32(&buffer, 0); /* Temp array count */ + put_u32(&buffer, 0); /* Array instr count */ + put_u32(&buffer, stat->fields[VKD3D_STAT_CUT]); + put_u32(&buffer, stat->fields[VKD3D_STAT_EMIT]); + put_u32(&buffer, stat->fields[VKD3D_STAT_SAMPLE]); + put_u32(&buffer, stat->fields[VKD3D_STAT_LOAD]); + put_u32(&buffer, stat->fields[VKD3D_STAT_SAMPLE_C]); + put_u32(&buffer, stat->fields[VKD3D_STAT_SAMPLE_BIAS]); + put_u32(&buffer, stat->fields[VKD3D_STAT_SAMPLE_GRAD]); + put_u32(&buffer, stat->fields[VKD3D_STAT_MOV]); + put_u32(&buffer, stat->fields[VKD3D_STAT_MOVC]); + put_u32(&buffer, stat->fields[VKD3D_STAT_CONV]); + put_u32(&buffer, stat->fields[VKD3D_STAT_BITWISE]); + put_u32(&buffer, stat->fields[VKD3D_STAT_DCL_INPUT_PRIMITIVE]); + put_u32(&buffer, stat->fields[VKD3D_STAT_DCL_OUTPUT_TOPOLOGY]); + put_u32(&buffer, stat->fields[VKD3D_STAT_DCL_VERTICES_OUT]); + put_u32(&buffer, stat->fields[VKD3D_STAT_GATHER]); + put_u32(&buffer, stat->fields[VKD3D_STAT_LOD]); + put_u32(&buffer, 0); /* Sample frequency */ + + if (hlsl_version_ge(ctx, 5, 0)) + { + put_u32(&buffer, stat->fields[VKD3D_STAT_DCL_GS_INSTANCES]); + put_u32(&buffer, stat->fields[VKD3D_STAT_TESS_CONTROL_POINT_COUNT]); + put_u32(&buffer, stat->fields[VKD3D_STAT_TESS_OUTPUT_PRIMITIVE]); + put_u32(&buffer, stat->fields[VKD3D_STAT_TESS_PARTITIONING]); + put_u32(&buffer, stat->fields[VKD3D_STAT_TESS_DOMAIN]); + put_u32(&buffer, stat->fields[VKD3D_STAT_BARRIER]); + put_u32(&buffer, stat->fields[VKD3D_STAT_ATOMIC]); + put_u32(&buffer, stat->fields[VKD3D_STAT_STORE]); + } + + add_section(ctx, &tpf->dxbc, TAG_STAT, &buffer); +} + +/* OBJECTIVE: Stop relying on ctx and entry_func on this function, receiving + * data from the other parameters instead, so they can be removed from the + * arguments and this function can be independent of HLSL structs. */ +int tpf_compile(struct vsir_program *program, uint64_t config_flags, + struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context, + struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) +{ + struct tpf_compiler tpf = {0}; + struct sm4_stat stat = {0}; size_t i; int ret; - dxbc_writer_init(&dxbc); - - write_sm4_signature(ctx, &dxbc, false); - write_sm4_signature(ctx, &dxbc, true); - write_sm4_rdef(ctx, &dxbc); - write_sm4_shdr(ctx, entry_func, &dxbc); - write_sm4_sfi0(ctx, &dxbc); + tpf.ctx = ctx; + tpf.program = program; + tpf.buffer = NULL; + tpf.stat = &stat; + init_sm4_lookup_tables(&tpf.lookup); + dxbc_writer_init(&tpf.dxbc); + + tpf_write_signature(&tpf, &program->input_signature, TAG_ISGN); + tpf_write_signature(&tpf, &program->output_signature, TAG_OSGN); + if (ctx->profile->type == VKD3D_SHADER_TYPE_HULL) + tpf_write_signature(&tpf, &program->patch_constant_signature, TAG_PCSG); + write_sm4_rdef(ctx, &tpf.dxbc); + tpf_write_shdr(&tpf, entry_func); + tpf_write_sfi0(&tpf); + tpf_write_stat(&tpf); if (!(ret = ctx->result)) - ret = dxbc_writer_write(&dxbc, out); - for (i = 0; i < dxbc.section_count; ++i) - vkd3d_shader_free_shader_code(&dxbc.sections[i].data); + ret = dxbc_writer_write(&tpf.dxbc, out); + for (i = 0; i < tpf.dxbc.section_count; ++i) + vkd3d_shader_free_shader_code(&tpf.dxbc.sections[i].data); return ret; } diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c index 306c1ca0dd8..ca012d4948a 100644 --- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c +++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c @@ -23,6 +23,8 @@ #include #include +/* VKD3D_DEBUG_ENV_NAME("VKD3D_SHADER_DEBUG"); */ + static inline int char_to_int(char c) { if ('0' <= c && c <= '9') @@ -443,20 +445,47 @@ void set_string(struct vkd3d_bytecode_buffer *buffer, size_t offset, const char bytecode_set_bytes(buffer, offset, string, length); } -static void vkd3d_shader_dump_blob(const char *path, const char *profile, - const char *suffix, const void *data, size_t size) +struct shader_dump_data +{ + uint8_t checksum[16]; + const char *path; + const char *profile; + const char *source_suffix; + const char *target_suffix; +}; + +static void vkd3d_shader_dump_shader(const struct shader_dump_data *dump_data, + const void *data, size_t size, bool source) { - static unsigned int shader_id = 0; + static const char hexadecimal_digits[] = "0123456789abcdef"; + const uint8_t *checksum = dump_data->checksum; + char str_checksum[33]; + unsigned int pos = 0; char filename[1024]; - unsigned int id; + unsigned int i; FILE *f; - id = vkd3d_atomic_increment_u32(&shader_id) - 1; + if (!dump_data->path) + return; + + for (i = 0; i < ARRAY_SIZE(dump_data->checksum); ++i) + { + str_checksum[2 * i] = hexadecimal_digits[checksum[i] >> 4]; + str_checksum[2 * i + 1] = hexadecimal_digits[checksum[i] & 0xf]; + } + str_checksum[32] = '\0'; + + pos = snprintf(filename, ARRAY_SIZE(filename), "%s/vkd3d-shader-%s", dump_data->path, str_checksum); + + if (dump_data->profile) + pos += snprintf(filename + pos, ARRAY_SIZE(filename) - pos, "-%s", dump_data->profile); - if (profile) - snprintf(filename, ARRAY_SIZE(filename), "%s/vkd3d-shader-%u-%s.%s", path, id, profile, suffix); + if (source) + pos += snprintf(filename + pos, ARRAY_SIZE(filename) - pos, "-source.%s", dump_data->source_suffix); else - snprintf(filename, ARRAY_SIZE(filename), "%s/vkd3d-shader-%u.%s", path, id, suffix); + pos += snprintf(filename + pos, ARRAY_SIZE(filename) - pos, "-target.%s", dump_data->target_suffix); + + TRACE("Dumping shader to \"%s\".\n", filename); if ((f = fopen(filename, "wb"))) { if (fwrite(data, 1, size, f) != size) @@ -488,37 +517,61 @@ static const char *shader_get_source_type_suffix(enum vkd3d_shader_source_type t } } -void vkd3d_shader_dump_shader(const struct vkd3d_shader_compile_info *compile_info) +static const char *shader_get_target_type_suffix(enum vkd3d_shader_target_type type) +{ + switch (type) + { + case VKD3D_SHADER_TARGET_SPIRV_BINARY: + return "spv"; + case VKD3D_SHADER_TARGET_SPIRV_TEXT: + return "spv.s"; + case VKD3D_SHADER_TARGET_D3D_ASM: + return "d3d.s"; + case VKD3D_SHADER_TARGET_D3D_BYTECODE: + return "d3dbc"; + case VKD3D_SHADER_TARGET_DXBC_TPF: + return "dxbc"; + case VKD3D_SHADER_TARGET_GLSL: + return "glsl"; + case VKD3D_SHADER_TARGET_FX: + return "fx"; + case VKD3D_SHADER_TARGET_MSL: + return "msl"; + default: + FIXME("Unhandled target type %#x.\n", type); + return "bin"; + } +} + +static void fill_shader_dump_data(const struct vkd3d_shader_compile_info *compile_info, + struct shader_dump_data *data) { - const struct vkd3d_shader_code *shader = &compile_info->source; - const struct vkd3d_shader_hlsl_source_info *hlsl_source_info; - const struct hlsl_profile_info *profile; - const char *profile_name = NULL; static bool enabled = true; - const char *path; + + data->path = NULL; if (!enabled) return; - if (!(path = getenv("VKD3D_SHADER_DUMP_PATH"))) + if (!(data->path = getenv("VKD3D_SHADER_DUMP_PATH"))) { enabled = false; return; } + data->profile = NULL; if (compile_info->source_type == VKD3D_SHADER_SOURCE_HLSL) { - if (!(hlsl_source_info = vkd3d_find_struct(compile_info->next, HLSL_SOURCE_INFO))) - return; - - if (!(profile = hlsl_get_target_info(hlsl_source_info->profile))) - return; + const struct vkd3d_shader_hlsl_source_info *hlsl_source_info; - profile_name = profile->name; + if ((hlsl_source_info = vkd3d_find_struct(compile_info->next, HLSL_SOURCE_INFO))) + data->profile = hlsl_source_info->profile; } - vkd3d_shader_dump_blob(path, profile_name, shader_get_source_type_suffix(compile_info->source_type), - shader->code, shader->size); + vkd3d_compute_md5(compile_info->source.code, compile_info->source.size, + (uint32_t *)data->checksum, VKD3D_MD5_STANDARD); + data->source_suffix = shader_get_source_type_suffix(compile_info->source_type); + data->target_suffix = shader_get_target_type_suffix(compile_info->target_type); } static void init_scan_signature_info(const struct vkd3d_shader_compile_info *info) @@ -1436,7 +1489,7 @@ static int vsir_program_scan(struct vsir_program *program, const struct vkd3d_sh descriptor_info1, combined_sampler_info, message_context); if (TRACE_ON()) - vkd3d_shader_trace(program); + vsir_program_trace(program); for (i = 0; i < program->instructions.count; ++i) { @@ -1497,6 +1550,7 @@ static int vsir_program_scan(struct vsir_program *program, const struct vkd3d_sh int vkd3d_shader_scan(const struct vkd3d_shader_compile_info *compile_info, char **messages) { struct vkd3d_shader_message_context message_context; + struct shader_dump_data dump_data; int ret; TRACE("compile_info %p, messages %p.\n", compile_info, messages); @@ -1511,7 +1565,8 @@ int vkd3d_shader_scan(const struct vkd3d_shader_compile_info *compile_info, char vkd3d_shader_message_context_init(&message_context, compile_info->log_level); - vkd3d_shader_dump_shader(compile_info); + fill_shader_dump_data(compile_info, &dump_data); + vkd3d_shader_dump_shader(&dump_data, compile_info->source.code, compile_info->source.size, true); if (compile_info->source_type == VKD3D_SHADER_SOURCE_HLSL) { @@ -1565,6 +1620,7 @@ int vsir_program_compile(struct vsir_program *program, uint64_t config_flags, const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context) { + struct vkd3d_shader_scan_combined_resource_sampler_info combined_sampler_info; struct vkd3d_shader_scan_descriptor_info1 scan_descriptor_info; struct vkd3d_shader_compile_info scan_info; int ret; @@ -1578,9 +1634,14 @@ int vsir_program_compile(struct vsir_program *program, uint64_t config_flags, break; case VKD3D_SHADER_TARGET_GLSL: + combined_sampler_info.type = VKD3D_SHADER_STRUCTURE_TYPE_SCAN_COMBINED_RESOURCE_SAMPLER_INFO; + combined_sampler_info.next = scan_info.next; + scan_info.next = &combined_sampler_info; if ((ret = vsir_program_scan(program, &scan_info, message_context, &scan_descriptor_info)) < 0) return ret; - ret = glsl_compile(program, config_flags, compile_info, out, message_context); + ret = glsl_compile(program, config_flags, &scan_descriptor_info, + &combined_sampler_info, compile_info, out, message_context); + vkd3d_shader_free_scan_combined_resource_sampler_info(&combined_sampler_info); vkd3d_shader_free_scan_descriptor_info1(&scan_descriptor_info); break; @@ -1593,6 +1654,13 @@ int vsir_program_compile(struct vsir_program *program, uint64_t config_flags, vkd3d_shader_free_scan_descriptor_info1(&scan_descriptor_info); break; + case VKD3D_SHADER_TARGET_MSL: + if ((ret = vsir_program_scan(program, &scan_info, message_context, &scan_descriptor_info)) < 0) + return ret; + ret = msl_compile(program, config_flags, &scan_descriptor_info, compile_info, message_context); + vkd3d_shader_free_scan_descriptor_info1(&scan_descriptor_info); + break; + default: /* Validation should prevent us from reaching this. */ vkd3d_unreachable(); @@ -1620,6 +1688,7 @@ int vkd3d_shader_compile(const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *out, char **messages) { struct vkd3d_shader_message_context message_context; + struct shader_dump_data dump_data; int ret; TRACE("compile_info %p, out %p, messages %p.\n", compile_info, out, messages); @@ -1634,12 +1703,17 @@ int vkd3d_shader_compile(const struct vkd3d_shader_compile_info *compile_info, vkd3d_shader_message_context_init(&message_context, compile_info->log_level); - vkd3d_shader_dump_shader(compile_info); + fill_shader_dump_data(compile_info, &dump_data); + vkd3d_shader_dump_shader(&dump_data, compile_info->source.code, compile_info->source.size, true); if (compile_info->source_type == VKD3D_SHADER_SOURCE_HLSL) { ret = compile_hlsl(compile_info, out, &message_context); } + else if (compile_info->source_type == VKD3D_SHADER_SOURCE_FX) + { + ret = fx_parse(compile_info, out, &message_context); + } else { uint64_t config_flags = vkd3d_shader_init_config_flags(); @@ -1676,6 +1750,8 @@ int vkd3d_shader_compile(const struct vkd3d_shader_compile_info *compile_info, } } + vkd3d_shader_dump_shader(&dump_data, out->code, out->size, false); + vkd3d_shader_message_context_trace_messages(&message_context); if (!vkd3d_shader_message_context_copy_messages(&message_context, messages)) ret = VKD3D_ERROR_OUT_OF_MEMORY; @@ -1777,6 +1853,8 @@ void shader_signature_cleanup(struct shader_signature *signature) } vkd3d_free(signature->elements); signature->elements = NULL; + signature->elements_capacity = 0; + signature->element_count = 0; } int vkd3d_shader_parse_input_signature(const struct vkd3d_shader_code *dxbc, @@ -1868,6 +1946,7 @@ const enum vkd3d_shader_source_type *vkd3d_shader_get_supported_source_types(uns #ifdef VKD3D_SHADER_UNSUPPORTED_DXIL VKD3D_SHADER_SOURCE_DXBC_DXIL, #endif + VKD3D_SHADER_SOURCE_FX, }; TRACE("count %p.\n", count); @@ -1888,6 +1967,9 @@ const enum vkd3d_shader_target_type *vkd3d_shader_get_supported_target_types( VKD3D_SHADER_TARGET_D3D_ASM, #ifdef VKD3D_SHADER_UNSUPPORTED_GLSL VKD3D_SHADER_TARGET_GLSL, +#endif +#ifdef VKD3D_SHADER_UNSUPPORTED_MSL + VKD3D_SHADER_TARGET_MSL, #endif }; @@ -1923,6 +2005,11 @@ const enum vkd3d_shader_target_type *vkd3d_shader_get_supported_target_types( }; #endif + static const enum vkd3d_shader_target_type fx_types[] = + { + VKD3D_SHADER_TARGET_D3D_ASM, + }; + TRACE("source_type %#x, count %p.\n", source_type, count); switch (source_type) @@ -1945,6 +2032,10 @@ const enum vkd3d_shader_target_type *vkd3d_shader_get_supported_target_types( return dxbc_dxil_types; #endif + case VKD3D_SHADER_SOURCE_FX: + *count = ARRAY_SIZE(fx_types); + return fx_types; + default: *count = 0; return NULL; diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h index ef66a8ca07a..9df538a0da0 100644 --- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h +++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h @@ -59,6 +59,9 @@ #define VKD3D_VEC4_SIZE 4 #define VKD3D_DVEC2_SIZE 2 +#define VKD3D_SHADER_COMPONENT_TYPE_COUNT (VKD3D_SHADER_COMPONENT_UINT64 + 1) +#define VKD3D_SHADER_MINIMUM_PRECISION_COUNT (VKD3D_SHADER_MINIMUM_PRECISION_UINT_16 + 1) + enum vkd3d_shader_error { VKD3D_SHADER_ERROR_DXBC_INVALID_SIZE = 1, @@ -80,6 +83,7 @@ enum vkd3d_shader_error VKD3D_SHADER_ERROR_TPF_INVALID_CASE_VALUE = 1007, VKD3D_SHADER_ERROR_TPF_INVALID_REGISTER_DIMENSION = 1008, VKD3D_SHADER_ERROR_TPF_INVALID_REGISTER_SWIZZLE = 1009, + VKD3D_SHADER_ERROR_TPF_INVALID_REGISTER_DCL = 1010, VKD3D_SHADER_WARNING_TPF_MASK_NOT_CONTIGUOUS = 1300, VKD3D_SHADER_WARNING_TPF_UNHANDLED_INDEX_RANGE_MASK = 1301, @@ -152,6 +156,13 @@ enum vkd3d_shader_error VKD3D_SHADER_ERROR_HLSL_UNKNOWN_MODIFIER = 5030, VKD3D_SHADER_ERROR_HLSL_INVALID_STATE_BLOCK_ENTRY = 5031, VKD3D_SHADER_ERROR_HLSL_FAILED_FORCED_UNROLL = 5032, + VKD3D_SHADER_ERROR_HLSL_INVALID_PROFILE = 5033, + VKD3D_SHADER_ERROR_HLSL_MISPLACED_COMPILE = 5034, + VKD3D_SHADER_ERROR_HLSL_INVALID_DOMAIN = 5035, + VKD3D_SHADER_ERROR_HLSL_INVALID_CONTROL_POINT_COUNT = 5036, + VKD3D_SHADER_ERROR_HLSL_INVALID_OUTPUT_PRIMITIVE = 5037, + VKD3D_SHADER_ERROR_HLSL_INVALID_PARTITIONING = 5038, + VKD3D_SHADER_ERROR_HLSL_MISPLACED_SAMPLER_STATE = 5039, VKD3D_SHADER_WARNING_HLSL_IMPLICIT_TRUNCATION = 5300, VKD3D_SHADER_WARNING_HLSL_DIVISION_BY_ZERO = 5301, @@ -159,8 +170,11 @@ enum vkd3d_shader_error VKD3D_SHADER_WARNING_HLSL_IMAGINARY_NUMERIC_RESULT = 5303, VKD3D_SHADER_WARNING_HLSL_NON_FINITE_RESULT = 5304, VKD3D_SHADER_WARNING_HLSL_IGNORED_ATTRIBUTE = 5305, + VKD3D_SHADER_WARNING_HLSL_IGNORED_DEFAULT_VALUE = 5306, VKD3D_SHADER_ERROR_GLSL_INTERNAL = 6000, + VKD3D_SHADER_ERROR_GLSL_BINDING_NOT_FOUND = 6001, + VKD3D_SHADER_ERROR_GLSL_UNSUPPORTED = 6002, VKD3D_SHADER_ERROR_D3DBC_UNEXPECTED_EOF = 7000, VKD3D_SHADER_ERROR_D3DBC_INVALID_VERSION_TOKEN = 7001, @@ -169,6 +183,11 @@ enum vkd3d_shader_error VKD3D_SHADER_ERROR_D3DBC_OUT_OF_MEMORY = 7004, VKD3D_SHADER_ERROR_D3DBC_INVALID_REGISTER_INDEX = 7005, VKD3D_SHADER_ERROR_D3DBC_UNDECLARED_SEMANTIC = 7006, + VKD3D_SHADER_ERROR_D3DBC_INVALID_REGISTER_TYPE = 7007, + VKD3D_SHADER_ERROR_D3DBC_INVALID_REGISTER_COUNT = 7008, + VKD3D_SHADER_ERROR_D3DBC_NOT_IMPLEMENTED = 7009, + VKD3D_SHADER_ERROR_D3DBC_INVALID_PROFILE = 7010, + VKD3D_SHADER_ERROR_D3DBC_INVALID_WRITEMASK = 7011, VKD3D_SHADER_WARNING_D3DBC_IGNORED_INSTRUCTION_FLAGS= 7300, @@ -225,8 +244,18 @@ enum vkd3d_shader_error VKD3D_SHADER_ERROR_VSIR_INVALID_SSA_USAGE = 9017, VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION = 9018, VKD3D_SHADER_ERROR_VSIR_INVALID_GS = 9019, + VKD3D_SHADER_ERROR_VSIR_INVALID_PARAMETER = 9020, + VKD3D_SHADER_ERROR_VSIR_MISSING_SEMANTIC = 9021, + VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE = 9022, VKD3D_SHADER_WARNING_VSIR_DYNAMIC_DESCRIPTOR_ARRAY = 9300, + + VKD3D_SHADER_ERROR_MSL_INTERNAL = 10000, + VKD3D_SHADER_ERROR_MSL_BINDING_NOT_FOUND = 10001, + + VKD3D_SHADER_ERROR_FX_NOT_IMPLEMENTED = 11000, + VKD3D_SHADER_ERROR_FX_INVALID_VERSION = 11001, + VKD3D_SHADER_ERROR_FX_INVALID_DATA = 11002, }; enum vkd3d_shader_opcode @@ -625,6 +654,13 @@ enum vkd3d_shader_register_type VKD3DSPR_INVALID = ~0u, }; +enum vsir_rastout_register +{ + VSIR_RASTOUT_POSITION = 0x0, + VSIR_RASTOUT_FOG = 0x1, + VSIR_RASTOUT_POINT_SIZE = 0x2, +}; + enum vkd3d_shader_register_precision { VKD3D_SHADER_REGISTER_PRECISION_DEFAULT, @@ -642,9 +678,6 @@ enum vkd3d_data_type { VKD3D_DATA_FLOAT, VKD3D_DATA_INT, - VKD3D_DATA_RESOURCE, - VKD3D_DATA_SAMPLER, - VKD3D_DATA_UAV, VKD3D_DATA_UINT, VKD3D_DATA_UNORM, VKD3D_DATA_SNORM, @@ -1042,6 +1075,9 @@ enum vkd3d_shader_input_sysval_semantic struct signature_element { + /* sort_index is not a property of the signature element, it is just a + * convenience field used to retain the original order in a signature and + * recover it after having permuted the signature itself. */ unsigned int sort_index; const char *semantic_name; unsigned int semantic_index; @@ -1145,7 +1181,7 @@ struct vkd3d_shader_tgsm_structured bool zero_init; }; -struct vkd3d_shader_thread_group_size +struct vsir_thread_group_size { unsigned int x, y, z; }; @@ -1224,7 +1260,7 @@ struct vkd3d_shader_instruction struct vkd3d_shader_structured_resource structured_resource; struct vkd3d_shader_tgsm_raw tgsm_raw; struct vkd3d_shader_tgsm_structured tgsm_structured; - struct vkd3d_shader_thread_group_size thread_group_size; + struct vsir_thread_group_size thread_group_size; enum vkd3d_tessellator_domain tessellator_domain; enum vkd3d_shader_tessellator_output_primitive tessellator_output_primitive; enum vkd3d_shader_tessellator_partitioning tessellator_partitioning; @@ -1344,8 +1380,6 @@ bool shader_instruction_array_add_icb(struct vkd3d_shader_instruction_array *ins struct vkd3d_shader_immediate_constant_buffer *icb); bool shader_instruction_array_clone_instruction(struct vkd3d_shader_instruction_array *instructions, unsigned int dst, unsigned int src); -struct vkd3d_shader_src_param *instruction_array_create_outpointid_param( - struct vkd3d_shader_instruction_array *instructions); void shader_instruction_array_destroy(struct vkd3d_shader_instruction_array *instructions); enum vkd3d_shader_config_flags @@ -1353,6 +1387,12 @@ enum vkd3d_shader_config_flags VKD3D_SHADER_CONFIG_FLAG_FORCE_VALIDATION = 0x00000001, }; +enum vsir_control_flow_type +{ + VSIR_CF_STRUCTURED, + VSIR_CF_BLOCKS, +}; + struct vsir_program { struct vkd3d_shader_version shader_version; @@ -1367,11 +1407,16 @@ struct vsir_program bool free_parameters; unsigned int input_control_point_count, output_control_point_count; + struct vsir_thread_group_size thread_group_size; unsigned int flat_constant_count[3]; unsigned int block_count; unsigned int temp_count; unsigned int ssa_count; bool use_vocp; + bool has_point_size; + enum vsir_control_flow_type cf_type; + bool normalised_io; + bool normalised_hull_cp_io; const char **block_names; size_t block_name_count; @@ -1384,11 +1429,17 @@ int vsir_program_compile(struct vsir_program *program, uint64_t config_flags, const struct vkd3d_shader_parameter1 *vsir_program_get_parameter( const struct vsir_program *program, enum vkd3d_shader_parameter_name name); bool vsir_program_init(struct vsir_program *program, const struct vkd3d_shader_compile_info *compile_info, - const struct vkd3d_shader_version *version, unsigned int reserve); -enum vkd3d_result vsir_program_normalise(struct vsir_program *program, uint64_t config_flags, + const struct vkd3d_shader_version *version, unsigned int reserve, enum vsir_control_flow_type cf_type, + bool normalised_io); +enum vkd3d_result vsir_program_transform(struct vsir_program *program, uint64_t config_flags, const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context); enum vkd3d_result vsir_program_validate(struct vsir_program *program, uint64_t config_flags, const char *source_name, struct vkd3d_shader_message_context *message_context); +struct vkd3d_shader_src_param *vsir_program_create_outpointid_param( + struct vsir_program *program); +bool vsir_instruction_init_with_params(struct vsir_program *program, + struct vkd3d_shader_instruction *ins, const struct vkd3d_shader_location *location, + enum vkd3d_shader_opcode opcode, unsigned int dst_count, unsigned int src_count); static inline struct vkd3d_shader_dst_param *vsir_program_get_dst_params( struct vsir_program *program, unsigned int count) @@ -1445,7 +1496,7 @@ struct vkd3d_shader_scan_descriptor_info1 unsigned int descriptor_count; }; -void vkd3d_shader_trace(const struct vsir_program *program); +void vsir_program_trace(const struct vsir_program *program); const char *shader_get_type_prefix(enum vkd3d_shader_type type); @@ -1465,6 +1516,7 @@ enum vsir_asm_flags { VSIR_ASM_FLAG_NONE = 0, VSIR_ASM_FLAG_DUMP_TYPES = 0x1, + VSIR_ASM_FLAG_DUMP_ALL_INDICES = 0x2, }; enum vkd3d_result d3d_asm_compile(const struct vsir_program *program, @@ -1549,18 +1601,29 @@ void vkd3d_shader_warning(struct vkd3d_shader_message_context *context, const st void vkd3d_shader_vwarning(struct vkd3d_shader_message_context *context, const struct vkd3d_shader_location *location, enum vkd3d_shader_error error, const char *format, va_list args); -void vkd3d_shader_dump_shader(const struct vkd3d_shader_compile_info *compile_info); uint64_t vkd3d_shader_init_config_flags(void); void vkd3d_shader_trace_text_(const char *text, size_t size, const char *function); #define vkd3d_shader_trace_text(text, size) \ vkd3d_shader_trace_text_(text, size, __FUNCTION__) +bool sm1_register_from_semantic_name(const struct vkd3d_shader_version *version, const char *semantic_name, + unsigned int semantic_index, bool output, enum vkd3d_shader_register_type *type, unsigned int *reg); +bool sm1_usage_from_semantic_name(const char *semantic_name, + uint32_t semantic_index, enum vkd3d_decl_usage *usage, uint32_t *usage_idx); +bool sm4_register_from_semantic_name(const struct vkd3d_shader_version *version, + const char *semantic_name, bool output, enum vkd3d_shader_register_type *type, bool *has_idx); +bool sm4_sysval_semantic_from_semantic_name(enum vkd3d_shader_sysval_semantic *sysval_semantic, + const struct vkd3d_shader_version *version, bool semantic_compat_mapping, enum vkd3d_tessellator_domain domain, + const char *semantic_name, unsigned int semantic_idx, bool output, bool is_patch_constant_func); + int d3dbc_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t config_flags, struct vkd3d_shader_message_context *message_context, struct vsir_program *program); int dxil_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t config_flags, struct vkd3d_shader_message_context *message_context, struct vsir_program *program); int tpf_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t config_flags, struct vkd3d_shader_message_context *message_context, struct vsir_program *program); +int fx_parse(const struct vkd3d_shader_compile_info *compile_info, + struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context); void free_dxbc_shader_desc(struct dxbc_shader_desc *desc); @@ -1570,8 +1633,10 @@ int shader_parse_input_signature(const struct vkd3d_shader_code *dxbc, struct vkd3d_shader_message_context *message_context, struct shader_signature *signature); int glsl_compile(struct vsir_program *program, uint64_t config_flags, - const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *out, - struct vkd3d_shader_message_context *message_context); + const struct vkd3d_shader_scan_descriptor_info1 *descriptor_info, + const struct vkd3d_shader_scan_combined_resource_sampler_info *combined_sampler_info, + const struct vkd3d_shader_compile_info *compile_info, + struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context); #define SPIRV_MAX_SRC_COUNT 6 @@ -1580,7 +1645,17 @@ int spirv_compile(struct vsir_program *program, uint64_t config_flags, const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context); -void vkd3d_compute_dxbc_checksum(const void *dxbc, size_t size, uint32_t checksum[4]); +int msl_compile(struct vsir_program *program, uint64_t config_flags, + const struct vkd3d_shader_scan_descriptor_info1 *descriptor_info, + const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context); + +enum vkd3d_md5_variant +{ + VKD3D_MD5_STANDARD, + VKD3D_MD5_DXBC, +}; + +void vkd3d_compute_md5(const void *dxbc, size_t size, uint32_t checksum[4], enum vkd3d_md5_variant variant); int preproc_lexer_parse(const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context); @@ -1853,7 +1928,7 @@ static inline void *vkd3d_find_struct_(const struct vkd3d_struct *chain, #define VKD3D_DXBC_HEADER_SIZE (8 * sizeof(uint32_t)) #define VKD3D_DXBC_CHUNK_ALIGNMENT sizeof(uint32_t) -#define DXBC_MAX_SECTION_COUNT 5 +#define DXBC_MAX_SECTION_COUNT 7 struct dxbc_writer { diff --git a/libs/vkd3d/libs/vkd3d/command.c b/libs/vkd3d/libs/vkd3d/command.c index dcc7690876f..5495809fcb9 100644 --- a/libs/vkd3d/libs/vkd3d/command.c +++ b/libs/vkd3d/libs/vkd3d/command.c @@ -19,6 +19,7 @@ */ #include "vkd3d_private.h" +#include static void d3d12_fence_incref(struct d3d12_fence *fence); static void d3d12_fence_decref(struct d3d12_fence *fence); @@ -2451,6 +2452,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_list_Close(ID3D12GraphicsCommandL } list->is_recording = false; + list->has_depth_bounds = false; if (!list->is_valid) { @@ -2479,7 +2481,7 @@ static void d3d12_command_list_reset_state(struct d3d12_command_list *list, list->fb_layer_count = 0; list->xfb_enabled = false; - + list->has_depth_bounds = false; list->is_predicated = false; list->current_framebuffer = VK_NULL_HANDLE; @@ -2793,39 +2795,30 @@ static bool vk_write_descriptor_set_from_d3d12_desc(VkWriteDescriptorSet *vk_des /* We use separate bindings for buffer and texture SRVs/UAVs. * See d3d12_root_signature_init(). For unbounded ranges the * descriptors exist in two consecutive sets, otherwise they occur - * in pairs in one set. */ - if (range->descriptor_count == UINT_MAX) - { - if (vk_descriptor_type != VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER - && vk_descriptor_type != VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER) - { - vk_descriptor_write->dstSet = vk_descriptor_sets[set + 1]; - vk_descriptor_write->dstBinding = 0; - } - } - else - { - if (!use_array) - vk_descriptor_write->dstBinding = vk_binding + 2 * index; - if (vk_descriptor_type != VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER - && vk_descriptor_type != VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER) - ++vk_descriptor_write->dstBinding; - } - + * as consecutive ranges within a set. */ if (vk_descriptor_type == VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER || vk_descriptor_type == VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER) { vk_descriptor_write->pTexelBufferView = &u.view->v.u.vk_buffer_view; + break; + } + + if (range->descriptor_count == UINT_MAX) + { + vk_descriptor_write->dstSet = vk_descriptor_sets[set + 1]; + vk_descriptor_write->dstBinding = 0; } else { - vk_image_info->sampler = VK_NULL_HANDLE; - vk_image_info->imageView = u.view->v.u.vk_image_view; - vk_image_info->imageLayout = u.header->magic == VKD3D_DESCRIPTOR_MAGIC_SRV - ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : VK_IMAGE_LAYOUT_GENERAL; - - vk_descriptor_write->pImageInfo = vk_image_info; + vk_descriptor_write->dstBinding += use_array ? 1 : range->descriptor_count; } + + vk_image_info->sampler = VK_NULL_HANDLE; + vk_image_info->imageView = u.view->v.u.vk_image_view; + vk_image_info->imageLayout = u.header->magic == VKD3D_DESCRIPTOR_MAGIC_SRV + ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : VK_IMAGE_LAYOUT_GENERAL; + + vk_descriptor_write->pImageInfo = vk_image_info; break; case VKD3D_DESCRIPTOR_MAGIC_SAMPLER: @@ -3078,7 +3071,7 @@ done: vkd3d_free(vk_descriptor_writes); } -static void d3d12_command_list_update_descriptors(struct d3d12_command_list *list, +static void d3d12_command_list_update_virtual_descriptors(struct d3d12_command_list *list, enum vkd3d_pipeline_bind_point bind_point) { struct vkd3d_pipeline_bindings *bindings = &list->pipeline_bindings[bind_point]; @@ -3210,6 +3203,9 @@ static void command_list_flush_vk_heap_updates(struct d3d12_command_list *list) static void command_list_add_descriptor_heap(struct d3d12_command_list *list, struct d3d12_descriptor_heap *heap) { + if (!list->device->use_vk_heaps) + return; + if (!contains_heap(list->descriptor_heaps, list->descriptor_heap_count, heap)) { if (list->descriptor_heap_count == ARRAY_SIZE(list->descriptor_heaps)) @@ -3296,6 +3292,15 @@ static void d3d12_command_list_update_heap_descriptors(struct d3d12_command_list d3d12_command_list_bind_descriptor_heap(list, bind_point, sampler_heap); } +static void d3d12_command_list_update_descriptors(struct d3d12_command_list *list, + enum vkd3d_pipeline_bind_point bind_point) +{ + if (list->device->use_vk_heaps) + d3d12_command_list_update_heap_descriptors(list, bind_point); + else + d3d12_command_list_update_virtual_descriptors(list, bind_point); +} + static bool d3d12_command_list_update_compute_state(struct d3d12_command_list *list) { d3d12_command_list_end_current_render_pass(list); @@ -3303,7 +3308,7 @@ static bool d3d12_command_list_update_compute_state(struct d3d12_command_list *l if (!d3d12_command_list_update_compute_pipeline(list)) return false; - list->update_descriptors(list, VKD3D_PIPELINE_BIND_POINT_COMPUTE); + d3d12_command_list_update_descriptors(list, VKD3D_PIPELINE_BIND_POINT_COMPUTE); return true; } @@ -3320,7 +3325,7 @@ static bool d3d12_command_list_begin_render_pass(struct d3d12_command_list *list if (!d3d12_command_list_update_current_framebuffer(list)) return false; - list->update_descriptors(list, VKD3D_PIPELINE_BIND_POINT_GRAPHICS); + d3d12_command_list_update_descriptors(list, VKD3D_PIPELINE_BIND_POINT_GRAPHICS); if (list->current_render_pass != VK_NULL_HANDLE) return true; @@ -3351,6 +3356,12 @@ static bool d3d12_command_list_begin_render_pass(struct d3d12_command_list *list list->xfb_enabled = true; } + if (graphics->ds_desc.depthBoundsTestEnable && !list->has_depth_bounds) + { + list->has_depth_bounds = true; + VK_CALL(vkCmdSetDepthBounds(list->vk_command_buffer, 0.0f, 1.0f)); + } + return true; } @@ -5939,7 +5950,25 @@ static void STDMETHODCALLTYPE d3d12_command_list_AtomicCopyBufferUINT64(ID3D12Gr static void STDMETHODCALLTYPE d3d12_command_list_OMSetDepthBounds(ID3D12GraphicsCommandList6 *iface, FLOAT min, FLOAT max) { - FIXME("iface %p, min %.8e, max %.8e stub!\n", iface, min, max); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); + const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs; + + TRACE("iface %p, min %.8e, max %.8e.\n", iface, min, max); + + if (isnan(max)) + max = 0.0f; + if (isnan(min)) + min = 0.0f; + + if (!list->device->vk_info.EXT_depth_range_unrestricted && (min < 0.0f || min > 1.0f || max < 0.0f || max > 1.0f)) + { + WARN("VK_EXT_depth_range_unrestricted was not found, clamping depth bounds to 0.0 and 1.0.\n"); + max = vkd3d_clamp(max, 0.0f, 1.0f); + min = vkd3d_clamp(min, 0.0f, 1.0f); + } + + list->has_depth_bounds = true; + VK_CALL(vkCmdSetDepthBounds(list->vk_command_buffer, min, max)); } static void STDMETHODCALLTYPE d3d12_command_list_SetSamplePositions(ID3D12GraphicsCommandList6 *iface, @@ -6189,8 +6218,6 @@ static HRESULT d3d12_command_list_init(struct d3d12_command_list *list, struct d list->allocator = allocator; - list->update_descriptors = device->use_vk_heaps ? d3d12_command_list_update_heap_descriptors - : d3d12_command_list_update_descriptors; list->descriptor_heap_count = 0; if (SUCCEEDED(hr = d3d12_command_allocator_allocate_command_buffer(allocator, list))) diff --git a/libs/vkd3d/libs/vkd3d/device.c b/libs/vkd3d/libs/vkd3d/device.c index 01841c89692..65339c7ba5d 100644 --- a/libs/vkd3d/libs/vkd3d/device.c +++ b/libs/vkd3d/libs/vkd3d/device.c @@ -102,6 +102,7 @@ static const struct vkd3d_optional_extension_info optional_device_extensions[] = VK_EXTENSION(EXT_CALIBRATED_TIMESTAMPS, EXT_calibrated_timestamps), VK_EXTENSION(EXT_CONDITIONAL_RENDERING, EXT_conditional_rendering), VK_DEBUG_EXTENSION(EXT_DEBUG_MARKER, EXT_debug_marker), + VK_EXTENSION(EXT_DEPTH_RANGE_UNRESTRICTED, EXT_depth_range_unrestricted), VK_EXTENSION(EXT_DEPTH_CLIP_ENABLE, EXT_depth_clip_enable), VK_EXTENSION(EXT_DESCRIPTOR_INDEXING, EXT_descriptor_indexing), VK_EXTENSION(EXT_FRAGMENT_SHADER_INTERLOCK, EXT_fragment_shader_interlock), diff --git a/libs/vkd3d/libs/vkd3d/state.c b/libs/vkd3d/libs/vkd3d/state.c index 682d488faa8..8e5ec70a577 100644 --- a/libs/vkd3d/libs/vkd3d/state.c +++ b/libs/vkd3d/libs/vkd3d/state.c @@ -219,6 +219,30 @@ static VkShaderStageFlags stage_flags_from_visibility(D3D12_SHADER_VISIBILITY vi } } +static VkShaderStageFlags stage_flags_from_vkd3d_shader_visibility(enum vkd3d_shader_visibility visibility) +{ + switch (visibility) + { + case VKD3D_SHADER_VISIBILITY_ALL: + return VK_SHADER_STAGE_ALL; + case VKD3D_SHADER_VISIBILITY_VERTEX: + return VK_SHADER_STAGE_VERTEX_BIT; + case VKD3D_SHADER_VISIBILITY_HULL: + return VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT; + case VKD3D_SHADER_VISIBILITY_DOMAIN: + return VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT; + case VKD3D_SHADER_VISIBILITY_GEOMETRY: + return VK_SHADER_STAGE_GEOMETRY_BIT; + case VKD3D_SHADER_VISIBILITY_PIXEL: + return VK_SHADER_STAGE_FRAGMENT_BIT; + case VKD3D_SHADER_VISIBILITY_COMPUTE: + return VK_SHADER_STAGE_COMPUTE_BIT; + default: + FIXME("Unhandled visibility %#x.\n", visibility); + return VKD3D_SHADER_VISIBILITY_ALL; + } +} + static enum vkd3d_shader_visibility vkd3d_shader_visibility_from_d3d12(D3D12_SHADER_VISIBILITY visibility) { switch (visibility) @@ -260,23 +284,6 @@ static VkDescriptorType vk_descriptor_type_from_vkd3d_descriptor_type(enum vkd3d } } -static VkDescriptorType vk_descriptor_type_from_d3d12_root_parameter(D3D12_ROOT_PARAMETER_TYPE type) -{ - switch (type) - { - /* SRV and UAV root parameters are buffer views. */ - case D3D12_ROOT_PARAMETER_TYPE_SRV: - return VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER; - case D3D12_ROOT_PARAMETER_TYPE_UAV: - return VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER; - case D3D12_ROOT_PARAMETER_TYPE_CBV: - return VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; - default: - FIXME("Unhandled descriptor root parameter type %#x.\n", type); - return VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; - } -} - static enum vkd3d_shader_descriptor_type vkd3d_descriptor_type_from_d3d12_range_type( D3D12_DESCRIPTOR_RANGE_TYPE type) { @@ -313,20 +320,6 @@ static enum vkd3d_shader_descriptor_type vkd3d_descriptor_type_from_d3d12_root_p } } -static bool vk_binding_from_d3d12_descriptor_range(struct VkDescriptorSetLayoutBinding *binding_desc, - enum vkd3d_shader_descriptor_type descriptor_type, D3D12_SHADER_VISIBILITY shader_visibility, - bool is_buffer, uint32_t vk_binding, unsigned int descriptor_count) -{ - binding_desc->binding = vk_binding; - binding_desc->descriptorType - = vk_descriptor_type_from_vkd3d_descriptor_type(descriptor_type, is_buffer); - binding_desc->descriptorCount = descriptor_count; - binding_desc->stageFlags = stage_flags_from_visibility(shader_visibility); - binding_desc->pImmutableSamplers = NULL; - - return true; -} - struct d3d12_root_signature_info { size_t binding_count; @@ -719,18 +712,66 @@ static HRESULT d3d12_root_signature_init_push_constants(struct d3d12_root_signat return S_OK; } +struct vk_binding_array +{ + VkDescriptorSetLayoutBinding *bindings; + size_t capacity, count; + + unsigned int table_index; + unsigned int unbounded_offset; + VkDescriptorSetLayoutCreateFlags flags; +}; + +static void vk_binding_array_cleanup(struct vk_binding_array *array) +{ + vkd3d_free(array->bindings); + array->bindings = NULL; +} + +static bool vk_binding_array_add_binding(struct vk_binding_array *array, + VkDescriptorType descriptor_type, unsigned int descriptor_count, + VkShaderStageFlags stage_flags, const VkSampler *immutable_sampler, unsigned int *binding_idx) +{ + unsigned int binding_count = array->count; + VkDescriptorSetLayoutBinding *binding; + + if (!vkd3d_array_reserve((void **)&array->bindings, &array->capacity, + array->count + 1, sizeof(*array->bindings))) + { + ERR("Failed to reallocate the Vulkan binding array.\n"); + return false; + } + + *binding_idx = binding_count; + binding = &array->bindings[binding_count]; + binding->binding = binding_count; + binding->descriptorType = descriptor_type; + binding->descriptorCount = descriptor_count; + binding->stageFlags = stage_flags; + binding->pImmutableSamplers = immutable_sampler; + ++array->count; + + return true; +} + struct vkd3d_descriptor_set_context { - VkDescriptorSetLayoutBinding *current_binding; - VkDescriptorSetLayoutBinding *first_binding; + struct vk_binding_array vk_bindings[VKD3D_MAX_DESCRIPTOR_SETS]; unsigned int table_index; unsigned int unbounded_offset; unsigned int descriptor_index; unsigned int uav_counter_index; unsigned int push_constant_index; - uint32_t descriptor_binding; }; +static void descriptor_set_context_cleanup(struct vkd3d_descriptor_set_context *context) +{ + size_t i; + + for (i = 0; i < ARRAY_SIZE(context->vk_bindings); ++i) + vk_binding_array_cleanup(&context->vk_bindings[i]); +} + static bool vkd3d_validate_descriptor_set_count(struct d3d12_device *device, unsigned int set_count) { uint32_t max_count = min(VKD3D_MAX_DESCRIPTOR_SETS, device->vk_info.device_limits.maxBoundDescriptorSets); @@ -738,63 +779,63 @@ static bool vkd3d_validate_descriptor_set_count(struct d3d12_device *device, uns if (set_count > max_count) { /* NOTE: If maxBoundDescriptorSets is < 9, try VKD3D_CONFIG=virtual_heaps */ - ERR("Required descriptor set count exceeds maximum allowed count of %u.\n", max_count); + WARN("Required descriptor set count exceeds maximum allowed count of %u.\n", max_count); return false; } return true; } -static HRESULT vkd3d_create_descriptor_set_layout(struct d3d12_device *device, - VkDescriptorSetLayoutCreateFlags flags, unsigned int binding_count, bool unbounded, - const VkDescriptorSetLayoutBinding *bindings, VkDescriptorSetLayout *set_layout); - -static HRESULT d3d12_root_signature_append_descriptor_set_layout(struct d3d12_root_signature *root_signature, - struct vkd3d_descriptor_set_context *context, VkDescriptorSetLayoutCreateFlags flags) +static struct vk_binding_array *d3d12_root_signature_current_vk_binding_array( + struct d3d12_root_signature *root_signature, struct vkd3d_descriptor_set_context *context) { - struct d3d12_descriptor_set_layout *layout; - unsigned int index; - HRESULT hr; - - if (!context->descriptor_binding) - return S_OK; + if (root_signature->vk_set_count >= ARRAY_SIZE(context->vk_bindings)) + return NULL; - index = root_signature->vk_set_count; - layout = &root_signature->descriptor_set_layouts[index]; + return &context->vk_bindings[root_signature->vk_set_count]; +} - if (!vkd3d_validate_descriptor_set_count(root_signature->device, index + 1)) - return E_INVALIDARG; +static void d3d12_root_signature_append_vk_binding_array(struct d3d12_root_signature *root_signature, + VkDescriptorSetLayoutCreateFlags flags, struct vkd3d_descriptor_set_context *context) +{ + struct vk_binding_array *array; - if (FAILED(hr = vkd3d_create_descriptor_set_layout(root_signature->device, flags, context->descriptor_binding, - context->unbounded_offset != UINT_MAX, context->first_binding, &layout->vk_layout))) - return hr; - layout->table_index = context->table_index; - layout->unbounded_offset = context->unbounded_offset; - ++root_signature->vk_set_count; + if (!(array = d3d12_root_signature_current_vk_binding_array(root_signature, context)) || !array->count) + return; - context->current_binding = context->first_binding; - context->descriptor_binding = 0; + array->table_index = context->table_index; + array->unbounded_offset = context->unbounded_offset; + array->flags = flags; - return S_OK; + ++root_signature->vk_set_count; } static HRESULT d3d12_root_signature_append_vk_binding(struct d3d12_root_signature *root_signature, - enum vkd3d_shader_descriptor_type descriptor_type, unsigned int register_space, unsigned int register_idx, - bool buffer_descriptor, enum vkd3d_shader_visibility shader_visibility, - unsigned int descriptor_count, struct vkd3d_descriptor_set_context *context) + enum vkd3d_shader_descriptor_type descriptor_type, unsigned int register_space, + unsigned int register_idx, bool buffer_descriptor, enum vkd3d_shader_visibility shader_visibility, + unsigned int descriptor_count, struct vkd3d_descriptor_set_context *context, + const VkSampler *immutable_sampler, unsigned int *binding_idx) { struct vkd3d_shader_descriptor_offset *offset = root_signature->descriptor_offsets ? &root_signature->descriptor_offsets[context->descriptor_index] : NULL; - struct vkd3d_shader_resource_binding *mapping - = &root_signature->descriptor_mapping[context->descriptor_index++]; + struct vkd3d_shader_resource_binding *mapping; + struct vk_binding_array *array; + unsigned int idx; + + if (!(array = d3d12_root_signature_current_vk_binding_array(root_signature, context)) + || !(vk_binding_array_add_binding(&context->vk_bindings[root_signature->vk_set_count], + vk_descriptor_type_from_vkd3d_descriptor_type(descriptor_type, buffer_descriptor), descriptor_count, + stage_flags_from_vkd3d_shader_visibility(shader_visibility), immutable_sampler, &idx))) + return E_OUTOFMEMORY; + mapping = &root_signature->descriptor_mapping[context->descriptor_index++]; mapping->type = descriptor_type; mapping->register_space = register_space; mapping->register_index = register_idx; mapping->shader_visibility = shader_visibility; mapping->flags = buffer_descriptor ? VKD3D_SHADER_BINDING_FLAG_BUFFER : VKD3D_SHADER_BINDING_FLAG_IMAGE; mapping->binding.set = root_signature->vk_set_count; - mapping->binding.binding = context->descriptor_binding++; + mapping->binding.binding = idx; mapping->binding.count = descriptor_count; if (offset) { @@ -803,37 +844,11 @@ static HRESULT d3d12_root_signature_append_vk_binding(struct d3d12_root_signatur } if (context->unbounded_offset != UINT_MAX) - return d3d12_root_signature_append_descriptor_set_layout(root_signature, context, 0); + d3d12_root_signature_append_vk_binding_array(root_signature, 0, context); - return S_OK; -} + if (binding_idx) + *binding_idx = idx; -static HRESULT d3d12_root_signature_assign_vk_bindings(struct d3d12_root_signature *root_signature, - enum vkd3d_shader_descriptor_type descriptor_type, unsigned int register_space, unsigned int base_register_idx, - unsigned int binding_count, bool is_buffer_descriptor, bool duplicate_descriptors, - enum vkd3d_shader_visibility shader_visibility, struct vkd3d_descriptor_set_context *context, - uint32_t *first_binding) -{ - unsigned int i; - HRESULT hr; - - is_buffer_descriptor |= descriptor_type == VKD3D_SHADER_DESCRIPTOR_TYPE_CBV; - duplicate_descriptors = (descriptor_type == VKD3D_SHADER_DESCRIPTOR_TYPE_SRV - || descriptor_type == VKD3D_SHADER_DESCRIPTOR_TYPE_UAV) - && duplicate_descriptors; - - *first_binding = context->descriptor_binding; - for (i = 0; i < binding_count; ++i) - { - if (duplicate_descriptors - && FAILED(hr = d3d12_root_signature_append_vk_binding(root_signature, descriptor_type, - register_space, base_register_idx + i, true, shader_visibility, 1, context))) - return hr; - - if (FAILED(hr = d3d12_root_signature_append_vk_binding(root_signature, descriptor_type, register_space, - base_register_idx + i, is_buffer_descriptor, shader_visibility, 1, context))) - return hr; - } return S_OK; } @@ -895,38 +910,41 @@ static unsigned int vk_binding_count_from_descriptor_range(const struct d3d12_ro return min(count, VKD3D_MAX_VIRTUAL_HEAP_DESCRIPTORS_PER_TYPE); } -static HRESULT d3d12_root_signature_init_descriptor_array_binding(struct d3d12_root_signature *root_signature, +static HRESULT d3d12_root_signature_init_descriptor_table_binding(struct d3d12_root_signature *root_signature, const struct d3d12_root_descriptor_table_range *range, D3D12_SHADER_VISIBILITY visibility, + unsigned int vk_binding_array_count, unsigned int bindings_per_range, struct vkd3d_descriptor_set_context *context) { enum vkd3d_shader_visibility shader_visibility = vkd3d_shader_visibility_from_d3d12(visibility); - bool is_buffer = range->type == VKD3D_SHADER_DESCRIPTOR_TYPE_CBV; + bool is_buffer = range->type != VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER; enum vkd3d_shader_descriptor_type descriptor_type = range->type; + unsigned int i, register_space = range->register_space; HRESULT hr; if (range->descriptor_count == UINT_MAX) context->unbounded_offset = range->offset; - if (descriptor_type == VKD3D_SHADER_DESCRIPTOR_TYPE_SRV || descriptor_type == VKD3D_SHADER_DESCRIPTOR_TYPE_UAV) + for (i = 0; i < bindings_per_range; ++i) { - if (!vk_binding_from_d3d12_descriptor_range(context->current_binding, - descriptor_type, visibility, true, context->descriptor_binding, range->vk_binding_count)) - return E_NOTIMPL; - ++context->current_binding; - - if (FAILED(hr = d3d12_root_signature_append_vk_binding(root_signature, descriptor_type, range->register_space, - range->base_register_idx, true, shader_visibility, range->vk_binding_count, context))) + if (FAILED(hr = d3d12_root_signature_append_vk_binding(root_signature, descriptor_type, + register_space, range->base_register_idx + i, is_buffer, shader_visibility, + vk_binding_array_count, context, NULL, NULL))) return hr; } - if (!vk_binding_from_d3d12_descriptor_range(context->current_binding, - descriptor_type, visibility, is_buffer, context->descriptor_binding, range->vk_binding_count)) - return E_NOTIMPL; - ++context->current_binding; + if (descriptor_type != VKD3D_SHADER_DESCRIPTOR_TYPE_SRV && descriptor_type != VKD3D_SHADER_DESCRIPTOR_TYPE_UAV) + { + context->unbounded_offset = UINT_MAX; + return S_OK; + } - if (FAILED(hr = d3d12_root_signature_append_vk_binding(root_signature, descriptor_type, range->register_space, - range->base_register_idx, is_buffer, shader_visibility, range->vk_binding_count, context))) - return hr; + for (i = 0; i < bindings_per_range; ++i) + { + if (FAILED(hr = d3d12_root_signature_append_vk_binding(root_signature, descriptor_type, + register_space, range->base_register_idx + i, false, shader_visibility, + vk_binding_array_count, context, NULL, NULL))) + return hr; + } context->unbounded_offset = UINT_MAX; @@ -1107,18 +1125,19 @@ static int compare_descriptor_range(const void *a, const void *b) if ((ret = vkd3d_u32_compare(range_a->offset, range_b->offset))) return ret; - return (range_a->descriptor_count == UINT_MAX) - (range_b->descriptor_count == UINT_MAX); + /* Place bounded ranges after unbounded ones of equal offset, + * so the bounded range can be mapped to the unbounded one. */ + return (range_b->descriptor_count == UINT_MAX) - (range_a->descriptor_count == UINT_MAX); } static HRESULT d3d12_root_signature_init_root_descriptor_tables(struct d3d12_root_signature *root_signature, const D3D12_ROOT_SIGNATURE_DESC *desc, const struct d3d12_root_signature_info *info, struct vkd3d_descriptor_set_context *context) { + unsigned int i, j, range_count, bindings_per_range, vk_binding_array_count; const struct d3d12_device *device = root_signature->device; bool use_vk_heaps = root_signature->device->use_vk_heaps; struct d3d12_root_descriptor_table *table; - unsigned int i, j, k, range_count; - uint32_t vk_binding; HRESULT hr; root_signature->descriptor_table_mask = 0; @@ -1175,7 +1194,6 @@ static HRESULT d3d12_root_signature_init_root_descriptor_tables(struct d3d12_roo for (j = 0; j < range_count; ++j) { struct d3d12_root_descriptor_table_range *range; - VkDescriptorSetLayoutBinding *cur_binding; range = &table->ranges[j]; @@ -1221,53 +1239,23 @@ static HRESULT d3d12_root_signature_init_root_descriptor_tables(struct d3d12_roo base_range = range; } - range->binding = context->descriptor_binding; range->vk_binding_count = vk_binding_count_from_descriptor_range(range, info, &device->vk_info.descriptor_limits); - - if (FAILED(hr = d3d12_root_signature_init_descriptor_array_binding(root_signature, - range, p->ShaderVisibility, context))) - return hr; - - continue; + vk_binding_array_count = range->vk_binding_count; + bindings_per_range = 1; } - - cur_binding = context->current_binding; - - if (FAILED(hr = d3d12_root_signature_assign_vk_bindings(root_signature, - range->type, range->register_space, range->base_register_idx, range->descriptor_count, false, true, - shader_visibility, context, &vk_binding))) - return hr; - - /* Unroll descriptor range. */ - for (k = 0; k < range->descriptor_count; ++k) + else { - uint32_t vk_current_binding = vk_binding + k; - - if (range->type == VKD3D_SHADER_DESCRIPTOR_TYPE_SRV - || range->type == VKD3D_SHADER_DESCRIPTOR_TYPE_UAV) - { - vk_current_binding = vk_binding + 2 * k; - - /* Assign binding for image view. */ - if (!vk_binding_from_d3d12_descriptor_range(cur_binding, - range->type, p->ShaderVisibility, false, vk_current_binding + 1, 1)) - return E_NOTIMPL; - - ++cur_binding; - } - - if (!vk_binding_from_d3d12_descriptor_range(cur_binding, - range->type, p->ShaderVisibility, true, vk_current_binding, 1)) - return E_NOTIMPL; - - ++cur_binding; + range->vk_binding_count = range->descriptor_count; + vk_binding_array_count = 1; + bindings_per_range = range->descriptor_count; } - table->ranges[j].vk_binding_count = table->ranges[j].descriptor_count; - table->ranges[j].binding = vk_binding; + range->binding = context->vk_bindings[root_signature->vk_set_count].count; - context->current_binding = cur_binding; + if (FAILED(hr = d3d12_root_signature_init_descriptor_table_binding(root_signature, range, + p->ShaderVisibility, vk_binding_array_count, bindings_per_range, context))) + return hr; } ++context->push_constant_index; } @@ -1278,8 +1266,7 @@ static HRESULT d3d12_root_signature_init_root_descriptor_tables(struct d3d12_roo static HRESULT d3d12_root_signature_init_root_descriptors(struct d3d12_root_signature *root_signature, const D3D12_ROOT_SIGNATURE_DESC *desc, struct vkd3d_descriptor_set_context *context) { - VkDescriptorSetLayoutBinding *cur_binding = context->current_binding; - unsigned int i; + unsigned int binding, i; HRESULT hr; root_signature->push_descriptor_mask = 0; @@ -1294,23 +1281,16 @@ static HRESULT d3d12_root_signature_init_root_descriptors(struct d3d12_root_sign root_signature->push_descriptor_mask |= 1u << i; - if (FAILED(hr = d3d12_root_signature_assign_vk_bindings(root_signature, + if (FAILED(hr = d3d12_root_signature_append_vk_binding(root_signature, vkd3d_descriptor_type_from_d3d12_root_parameter_type(p->ParameterType), - p->u.Descriptor.RegisterSpace, p->u.Descriptor.ShaderRegister, 1, true, false, - vkd3d_shader_visibility_from_d3d12(p->ShaderVisibility), context, &cur_binding->binding))) + p->u.Descriptor.RegisterSpace, p->u.Descriptor.ShaderRegister, true, + vkd3d_shader_visibility_from_d3d12(p->ShaderVisibility), 1, context, NULL, &binding))) return hr; - cur_binding->descriptorType = vk_descriptor_type_from_d3d12_root_parameter(p->ParameterType); - cur_binding->descriptorCount = 1; - cur_binding->stageFlags = stage_flags_from_visibility(p->ShaderVisibility); - cur_binding->pImmutableSamplers = NULL; root_signature->parameters[i].parameter_type = p->ParameterType; - root_signature->parameters[i].u.descriptor.binding = cur_binding->binding; - - ++cur_binding; + root_signature->parameters[i].u.descriptor.binding = binding; } - context->current_binding = cur_binding; return S_OK; } @@ -1318,7 +1298,6 @@ static HRESULT d3d12_root_signature_init_static_samplers(struct d3d12_root_signa struct d3d12_device *device, const D3D12_ROOT_SIGNATURE_DESC *desc, struct vkd3d_descriptor_set_context *context) { - VkDescriptorSetLayoutBinding *cur_binding = context->current_binding; unsigned int i; HRESULT hr; @@ -1330,21 +1309,15 @@ static HRESULT d3d12_root_signature_init_static_samplers(struct d3d12_root_signa if (FAILED(hr = vkd3d_create_static_sampler(device, s, &root_signature->static_samplers[i]))) return hr; - if (FAILED(hr = d3d12_root_signature_assign_vk_bindings(root_signature, - VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER, s->RegisterSpace, s->ShaderRegister, 1, false, false, - vkd3d_shader_visibility_from_d3d12(s->ShaderVisibility), context, &cur_binding->binding))) + if (FAILED(hr = d3d12_root_signature_append_vk_binding(root_signature, + VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER, s->RegisterSpace, s->ShaderRegister, false, + vkd3d_shader_visibility_from_d3d12(s->ShaderVisibility), 1, context, + &root_signature->static_samplers[i], NULL))) return hr; - cur_binding->descriptorType = VK_DESCRIPTOR_TYPE_SAMPLER; - cur_binding->descriptorCount = 1; - cur_binding->stageFlags = stage_flags_from_visibility(s->ShaderVisibility); - cur_binding->pImmutableSamplers = &root_signature->static_samplers[i]; - - ++cur_binding; } - context->current_binding = cur_binding; if (device->use_vk_heaps) - return d3d12_root_signature_append_descriptor_set_layout(root_signature, context, 0); + d3d12_root_signature_append_vk_binding_array(root_signature, 0, context); return S_OK; } @@ -1477,6 +1450,34 @@ static HRESULT vkd3d_create_pipeline_layout(struct d3d12_device *device, return S_OK; } +static HRESULT d3d12_root_signature_create_descriptor_set_layouts(struct d3d12_root_signature *root_signature, + struct vkd3d_descriptor_set_context *context) +{ + unsigned int i; + HRESULT hr; + + d3d12_root_signature_append_vk_binding_array(root_signature, 0, context); + + if (!vkd3d_validate_descriptor_set_count(root_signature->device, root_signature->vk_set_count)) + return E_INVALIDARG; + + for (i = 0; i < root_signature->vk_set_count; ++i) + { + struct d3d12_descriptor_set_layout *layout = &root_signature->descriptor_set_layouts[i]; + struct vk_binding_array *array = &context->vk_bindings[i]; + + VKD3D_ASSERT(array->count); + + if (FAILED(hr = vkd3d_create_descriptor_set_layout(root_signature->device, array->flags, array->count, + array->unbounded_offset != UINT_MAX, array->bindings, &layout->vk_layout))) + return hr; + layout->unbounded_offset = array->unbounded_offset; + layout->table_index = array->table_index; + } + + return S_OK; +} + static unsigned int d3d12_root_signature_copy_descriptor_set_layouts(const struct d3d12_root_signature *root_signature, VkDescriptorSetLayout *vk_set_layouts) { @@ -1508,7 +1509,6 @@ static HRESULT d3d12_root_signature_init(struct d3d12_root_signature *root_signa VkDescriptorSetLayout vk_layouts[VKD3D_MAX_DESCRIPTOR_SETS]; const struct vkd3d_vulkan_info *vk_info = &device->vk_info; struct vkd3d_descriptor_set_context context; - VkDescriptorSetLayoutBinding *binding_desc; struct d3d12_root_signature_info info; bool use_vk_heaps; unsigned int i; @@ -1516,7 +1516,6 @@ static HRESULT d3d12_root_signature_init(struct d3d12_root_signature *root_signa memset(&context, 0, sizeof(context)); context.unbounded_offset = UINT_MAX; - binding_desc = NULL; root_signature->ID3D12RootSignature_iface.lpVtbl = &d3d12_root_signature_vtbl; root_signature->refcount = 1; @@ -1578,20 +1577,14 @@ static HRESULT d3d12_root_signature_init(struct d3d12_root_signature *root_signa sizeof(*root_signature->static_samplers)))) goto fail; - if (!(binding_desc = vkd3d_calloc(info.binding_count, sizeof(*binding_desc)))) - goto fail; - context.first_binding = binding_desc; - context.current_binding = binding_desc; - if (FAILED(hr = d3d12_root_signature_init_root_descriptors(root_signature, desc, &context))) goto fail; /* We use KHR_push_descriptor for root descriptor parameters. */ if (vk_info->KHR_push_descriptor) { - if (FAILED(hr = d3d12_root_signature_append_descriptor_set_layout(root_signature, - &context, VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR))) - goto fail; + d3d12_root_signature_append_vk_binding_array(root_signature, + VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR, &context); } root_signature->main_set = root_signature->vk_set_count; @@ -1607,11 +1600,10 @@ static HRESULT d3d12_root_signature_init(struct d3d12_root_signature *root_signa if (use_vk_heaps) d3d12_root_signature_init_descriptor_table_push_constants(root_signature, &context); - if (FAILED(hr = d3d12_root_signature_append_descriptor_set_layout(root_signature, &context, 0))) + if (FAILED(hr = d3d12_root_signature_create_descriptor_set_layouts(root_signature, &context))) goto fail; - vkd3d_free(binding_desc); - binding_desc = NULL; + descriptor_set_context_cleanup(&context); i = d3d12_root_signature_copy_descriptor_set_layouts(root_signature, vk_layouts); if (FAILED(hr = vkd3d_create_pipeline_layout(device, i, @@ -1627,7 +1619,7 @@ static HRESULT d3d12_root_signature_init(struct d3d12_root_signature *root_signa return S_OK; fail: - vkd3d_free(binding_desc); + descriptor_set_context_cleanup(&context); d3d12_root_signature_cleanup(root_signature, device); return hr; } @@ -3867,6 +3859,7 @@ VkPipeline d3d12_pipeline_state_get_or_create_pipeline(struct d3d12_pipeline_sta VK_DYNAMIC_STATE_SCISSOR, VK_DYNAMIC_STATE_BLEND_CONSTANTS, VK_DYNAMIC_STATE_STENCIL_REFERENCE, + VK_DYNAMIC_STATE_DEPTH_BOUNDS, }; static const VkPipelineDynamicStateCreateInfo dynamic_desc = { diff --git a/libs/vkd3d/libs/vkd3d/utils.c b/libs/vkd3d/libs/vkd3d/utils.c index 831dc07af56..839bb173854 100644 --- a/libs/vkd3d/libs/vkd3d/utils.c +++ b/libs/vkd3d/libs/vkd3d/utils.c @@ -703,7 +703,7 @@ const char *debug_vk_extent_3d(VkExtent3D extent) const char *debug_vk_queue_flags(VkQueueFlags flags) { - char buffer[159]; + char buffer[191]; buffer[0] = '\0'; #define FLAG_TO_STR(f) if (flags & f) { strcat(buffer, " | "#f); flags &= ~f; } @@ -715,6 +715,7 @@ const char *debug_vk_queue_flags(VkQueueFlags flags) #undef FLAG_TO_STR #define FLAG_TO_STR(f, n) if (flags & f) { strcat(buffer, " | "#n); flags &= ~f; } FLAG_TO_STR(0x20, VK_QUEUE_VIDEO_DECODE_BIT_KHR) + FLAG_TO_STR(0x40, VK_QUEUE_VIDEO_ENCODE_BIT_KHR) #undef FLAG_TO_STR if (flags) FIXME("Unrecognized flag(s) %#x.\n", flags); diff --git a/libs/vkd3d/libs/vkd3d/vkd3d_main.c b/libs/vkd3d/libs/vkd3d/vkd3d_main.c index 9eccec111c7..5215cf8ef86 100644 --- a/libs/vkd3d/libs/vkd3d/vkd3d_main.c +++ b/libs/vkd3d/libs/vkd3d/vkd3d_main.c @@ -415,6 +415,7 @@ HRESULT vkd3d_create_versioned_root_signature_deserializer(const void *data, SIZ if (FAILED(hr = d3d12_versioned_root_signature_deserializer_init(object, &dxbc))) { vkd3d_free(object); + *deserializer = NULL; return hr; } diff --git a/libs/vkd3d/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/libs/vkd3d/vkd3d_private.h index ba4e2e8488d..e6d477a5c12 100644 --- a/libs/vkd3d/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/libs/vkd3d/vkd3d_private.h @@ -131,6 +131,7 @@ struct vkd3d_vulkan_info bool EXT_calibrated_timestamps; bool EXT_conditional_rendering; bool EXT_debug_marker; + bool EXT_depth_range_unrestricted; bool EXT_depth_clip_enable; bool EXT_descriptor_indexing; bool EXT_fragment_shader_interlock; @@ -1254,7 +1255,7 @@ struct d3d12_command_list VkFormat dsv_format; bool xfb_enabled; - + bool has_depth_bounds; bool is_predicated; VkFramebuffer current_framebuffer; @@ -1271,7 +1272,6 @@ struct d3d12_command_list VkBuffer so_counter_buffers[D3D12_SO_BUFFER_SLOT_COUNT]; VkDeviceSize so_counter_buffer_offsets[D3D12_SO_BUFFER_SLOT_COUNT]; - void (*update_descriptors)(struct d3d12_command_list *list, enum vkd3d_pipeline_bind_point bind_point); struct d3d12_descriptor_heap *descriptor_heaps[64]; unsigned int descriptor_heap_count; -- 2.45.2