From 8197153f5099eec17d017d9369b8e23a37963dba Mon Sep 17 00:00:00 2001 From: Alistair Leslie-Hughes Date: Tue, 3 Sep 2024 07:18:49 +1000 Subject: [PATCH] Updated vkd3d to 2ac7f650a196e47a18ea1957eac5953255cf424d. --- libs/vkd3d/Makefile.in | 1 + libs/vkd3d/include/private/vkd3d_common.h | 4 +- libs/vkd3d/include/vkd3d_shader.h | 13 +- libs/vkd3d/libs/vkd3d-common/blob.c | 1 + libs/vkd3d/libs/vkd3d-shader/checksum.c | 49 +- libs/vkd3d/libs/vkd3d-shader/d3d_asm.c | 6 +- libs/vkd3d/libs/vkd3d-shader/d3dbc.c | 1183 ++++-------- libs/vkd3d/libs/vkd3d-shader/dxbc.c | 19 +- libs/vkd3d/libs/vkd3d-shader/dxil.c | 4 +- libs/vkd3d/libs/vkd3d-shader/fx.c | 106 +- libs/vkd3d/libs/vkd3d-shader/glsl.c | 755 +++++++- libs/vkd3d/libs/vkd3d-shader/hlsl.c | 198 +- libs/vkd3d/libs/vkd3d-shader/hlsl.h | 70 +- libs/vkd3d/libs/vkd3d-shader/hlsl.l | 2 + libs/vkd3d/libs/vkd3d-shader/hlsl.y | 509 ++++-- libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 1382 +++++++++++++- .../libs/vkd3d-shader/hlsl_constant_ops.c | 20 +- libs/vkd3d/libs/vkd3d-shader/ir.c | 1605 +++++++++-------- libs/vkd3d/libs/vkd3d-shader/msl.c | 275 +++ libs/vkd3d/libs/vkd3d-shader/preproc.l | 1 + libs/vkd3d/libs/vkd3d-shader/spirv.c | 134 +- libs/vkd3d/libs/vkd3d-shader/tpf.c | 554 +++++- .../libs/vkd3d-shader/vkd3d_shader_main.c | 119 +- .../libs/vkd3d-shader/vkd3d_shader_private.h | 53 +- libs/vkd3d/libs/vkd3d/command.c | 50 +- libs/vkd3d/libs/vkd3d/device.c | 1 + libs/vkd3d/libs/vkd3d/state.c | 3 +- libs/vkd3d/libs/vkd3d/utils.c | 3 +- libs/vkd3d/libs/vkd3d/vkd3d_private.h | 4 +- 29 files changed, 4987 insertions(+), 2137 deletions(-) create mode 100644 libs/vkd3d/libs/vkd3d-shader/msl.c diff --git a/libs/vkd3d/Makefile.in b/libs/vkd3d/Makefile.in index 94e4833dc9a..b073790d986 100644 --- a/libs/vkd3d/Makefile.in +++ b/libs/vkd3d/Makefile.in @@ -25,6 +25,7 @@ SOURCES = \ libs/vkd3d-shader/hlsl_codegen.c \ libs/vkd3d-shader/hlsl_constant_ops.c \ libs/vkd3d-shader/ir.c \ + libs/vkd3d-shader/msl.c \ libs/vkd3d-shader/preproc.l \ libs/vkd3d-shader/preproc.y \ libs/vkd3d-shader/spirv.c \ diff --git a/libs/vkd3d/include/private/vkd3d_common.h b/libs/vkd3d/include/private/vkd3d_common.h index 39145a97df1..fd62730f948 100644 --- a/libs/vkd3d/include/private/vkd3d_common.h +++ b/libs/vkd3d/include/private/vkd3d_common.h @@ -62,6 +62,8 @@ #define VKD3D_STRINGIFY(x) #x #define VKD3D_EXPAND_AND_STRINGIFY(x) VKD3D_EXPAND(VKD3D_STRINGIFY(x)) +#define vkd3d_clamp(value, lower, upper) max(min(value, upper), lower) + #define TAG_AON9 VKD3D_MAKE_TAG('A', 'o', 'n', '9') #define TAG_DXBC VKD3D_MAKE_TAG('D', 'X', 'B', 'C') #define TAG_DXIL VKD3D_MAKE_TAG('D', 'X', 'I', 'L') @@ -273,7 +275,7 @@ static inline unsigned int vkd3d_popcount(unsigned int v) { #ifdef _MSC_VER return __popcnt(v); -#elif defined(__MINGW32__) +#elif defined(HAVE_BUILTIN_POPCOUNT) return __builtin_popcount(v); #else v -= (v >> 1) & 0x55555555; diff --git a/libs/vkd3d/include/vkd3d_shader.h b/libs/vkd3d/include/vkd3d_shader.h index d9a355d3bc9..46feff35138 100644 --- a/libs/vkd3d/include/vkd3d_shader.h +++ b/libs/vkd3d/include/vkd3d_shader.h @@ -1087,6 +1087,10 @@ enum vkd3d_shader_target_type * Output is a raw FX section without container. \since 1.11 */ VKD3D_SHADER_TARGET_FX, + /** + * A 'Metal Shading Language' shader. \since 1.14 + */ + VKD3D_SHADER_TARGET_MSL, VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_TARGET_TYPE), }; @@ -1292,7 +1296,8 @@ typedef int (*PFN_vkd3d_shader_open_include)(const char *filename, bool local, * vkd3d_shader_preprocess_info. * * \param code Contents of the included file, which were allocated by the - * \ref pfn_open_include callback. The user must free them. + * vkd3d_shader_preprocess_info.pfn_open_include callback. + * The user must free them. * * \param context The user-defined pointer passed to struct * vkd3d_shader_preprocess_info. @@ -1319,8 +1324,8 @@ struct vkd3d_shader_preprocess_info /** * Pointer to an array of predefined macros. Each macro in this array will - * be expanded as if a corresponding #define statement were prepended to the - * source code. + * be expanded as if a corresponding \#define statement were prepended to + * the source code. * * If the same macro is specified multiple times, only the last value is * used. @@ -2798,7 +2803,7 @@ VKD3D_SHADER_API void vkd3d_shader_free_scan_signature_info(struct vkd3d_shader_ * \param input_signature The input signature of the second shader. * * \param count On output, contains the number of entries written into - * \ref varyings. + * "varyings". * * \param varyings Pointer to an output array of varyings. * This must point to space for N varyings, where N is the number of elements diff --git a/libs/vkd3d/libs/vkd3d-common/blob.c b/libs/vkd3d/libs/vkd3d-common/blob.c index f60ef7db769..c2c6ad67804 100644 --- a/libs/vkd3d/libs/vkd3d-common/blob.c +++ b/libs/vkd3d/libs/vkd3d-common/blob.c @@ -20,6 +20,7 @@ #define WIDL_C_INLINE_WRAPPERS #endif #define COBJMACROS + #define CONST_VTABLE #include "vkd3d.h" #include "vkd3d_blob.h" diff --git a/libs/vkd3d/libs/vkd3d-shader/checksum.c b/libs/vkd3d/libs/vkd3d-shader/checksum.c index d9560628c77..45de1c92513 100644 --- a/libs/vkd3d/libs/vkd3d-shader/checksum.c +++ b/libs/vkd3d/libs/vkd3d-shader/checksum.c @@ -33,6 +33,11 @@ * will fill a supplied 16-byte array with the digest. */ +/* + * DXBC uses a variation of the MD5 algorithm, which only changes the way + * the message is padded in the final step. + */ + #include "vkd3d_shader_private.h" #define DXBC_CHECKSUM_BLOCK_SIZE 64 @@ -230,10 +235,9 @@ static void md5_update(struct md5_ctx *ctx, const unsigned char *buf, unsigned i memcpy(ctx->in, buf, len); } -static void dxbc_checksum_final(struct md5_ctx *ctx) +static void md5_final(struct md5_ctx *ctx, enum vkd3d_md5_variant variant) { unsigned int padding; - unsigned int length; unsigned int count; unsigned char *p; @@ -260,7 +264,7 @@ static void dxbc_checksum_final(struct md5_ctx *ctx) /* Now fill the next block */ memset(ctx->in, 0, DXBC_CHECKSUM_BLOCK_SIZE); } - else + else if (variant == VKD3D_MD5_DXBC) { /* Make place for bitcount at the beginning of the block */ memmove(&ctx->in[4], ctx->in, count); @@ -268,33 +272,44 @@ static void dxbc_checksum_final(struct md5_ctx *ctx) /* Pad block to 60 bytes */ memset(p + 4, 0, padding - 4); } + else + { + /* Pad block to 56 bytes */ + memset(p, 0, padding - 8); + } /* Append length in bits and transform */ - length = ctx->i[0]; - memcpy(&ctx->in[0], &length, sizeof(length)); - byte_reverse(&ctx->in[4], 14); - length = ctx->i[0] >> 2 | 0x1; - memcpy(&ctx->in[DXBC_CHECKSUM_BLOCK_SIZE - 4], &length, sizeof(length)); + if (variant == VKD3D_MD5_DXBC) + { + unsigned int length; + + length = ctx->i[0]; + memcpy(&ctx->in[0], &length, sizeof(length)); + byte_reverse(&ctx->in[4], 14); + length = ctx->i[0] >> 2 | 0x1; + memcpy(&ctx->in[DXBC_CHECKSUM_BLOCK_SIZE - 4], &length, sizeof(length)); + } + else + { + byte_reverse(ctx->in, 14); + + ((unsigned int *)ctx->in)[14] = ctx->i[0]; + ((unsigned int *)ctx->in)[15] = ctx->i[1]; + } md5_transform(ctx->buf, (unsigned int *)ctx->in); byte_reverse((unsigned char *)ctx->buf, 4); memcpy(ctx->digest, ctx->buf, 16); } -#define DXBC_CHECKSUM_SKIP_BYTE_COUNT 20 - -void vkd3d_compute_dxbc_checksum(const void *dxbc, size_t size, uint32_t checksum[4]) +void vkd3d_compute_md5(const void *data, size_t size, uint32_t checksum[4], enum vkd3d_md5_variant variant) { - const uint8_t *ptr = dxbc; + const uint8_t *ptr = data; struct md5_ctx ctx; - VKD3D_ASSERT(size > DXBC_CHECKSUM_SKIP_BYTE_COUNT); - ptr += DXBC_CHECKSUM_SKIP_BYTE_COUNT; - size -= DXBC_CHECKSUM_SKIP_BYTE_COUNT; - md5_init(&ctx); md5_update(&ctx, ptr, size); - dxbc_checksum_final(&ctx); + md5_final(&ctx, variant); memcpy(checksum, ctx.digest, sizeof(ctx.digest)); } diff --git a/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c index 77e9711300f..cfee053d49c 100644 --- a/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c +++ b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c @@ -79,7 +79,7 @@ static const char * const shader_opcode_names[] = [VKD3DSIH_DCL_INDEXABLE_TEMP ] = "dcl_indexableTemp", [VKD3DSIH_DCL_INPUT ] = "dcl_input", [VKD3DSIH_DCL_INPUT_CONTROL_POINT_COUNT ] = "dcl_input_control_point_count", - [VKD3DSIH_DCL_INPUT_PRIMITIVE ] = "dcl_inputPrimitive", + [VKD3DSIH_DCL_INPUT_PRIMITIVE ] = "dcl_inputprimitive", [VKD3DSIH_DCL_INPUT_PS ] = "dcl_input_ps", [VKD3DSIH_DCL_INPUT_PS_SGV ] = "dcl_input_ps_sgv", [VKD3DSIH_DCL_INPUT_PS_SIV ] = "dcl_input_ps_siv", @@ -89,7 +89,7 @@ static const char * const shader_opcode_names[] = [VKD3DSIH_DCL_OUTPUT ] = "dcl_output", [VKD3DSIH_DCL_OUTPUT_CONTROL_POINT_COUNT ] = "dcl_output_control_point_count", [VKD3DSIH_DCL_OUTPUT_SIV ] = "dcl_output_siv", - [VKD3DSIH_DCL_OUTPUT_TOPOLOGY ] = "dcl_outputTopology", + [VKD3DSIH_DCL_OUTPUT_TOPOLOGY ] = "dcl_outputtopology", [VKD3DSIH_DCL_RESOURCE_RAW ] = "dcl_resource_raw", [VKD3DSIH_DCL_RESOURCE_STRUCTURED ] = "dcl_resource_structured", [VKD3DSIH_DCL_SAMPLER ] = "dcl_sampler", @@ -104,7 +104,7 @@ static const char * const shader_opcode_names[] = [VKD3DSIH_DCL_UAV_RAW ] = "dcl_uav_raw", [VKD3DSIH_DCL_UAV_STRUCTURED ] = "dcl_uav_structured", [VKD3DSIH_DCL_UAV_TYPED ] = "dcl_uav_typed", - [VKD3DSIH_DCL_VERTICES_OUT ] = "dcl_maxOutputVertexCount", + [VKD3DSIH_DCL_VERTICES_OUT ] = "dcl_maxout", [VKD3DSIH_DDIV ] = "ddiv", [VKD3DSIH_DEF ] = "def", [VKD3DSIH_DEFAULT ] = "default", diff --git a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c index d05394c3ab7..10f2e5e5e6d 100644 --- a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c +++ b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c @@ -104,6 +104,19 @@ enum vkd3d_sm1_resource_type VKD3D_SM1_RESOURCE_TEXTURE_3D = 0x4, }; +enum vkd3d_sm1_misc_register +{ + VKD3D_SM1_MISC_POSITION = 0x0, + VKD3D_SM1_MISC_FACE = 0x1, +}; + +enum vkd3d_sm1_rastout_register +{ + VKD3D_SM1_RASTOUT_POSITION = 0x0, + VKD3D_SM1_RASTOUT_FOG = 0x1, + VKD3D_SM1_RASTOUT_POINT_SIZE = 0x2, +}; + enum vkd3d_sm1_opcode { VKD3D_SM1_OP_NOP = 0x00, @@ -1272,7 +1285,8 @@ static enum vkd3d_result shader_sm1_init(struct vkd3d_shader_sm1_parser *sm1, st sm1->end = &code[token_count]; /* Estimate instruction count to avoid reallocation in most shaders. */ - if (!vsir_program_init(program, compile_info, &version, code_size != ~(size_t)0 ? token_count / 4u + 4 : 16)) + if (!vsir_program_init(program, compile_info, &version, + code_size != ~(size_t)0 ? token_count / 4u + 4 : 16, VSIR_CF_STRUCTURED)) return VKD3D_ERROR_OUT_OF_MEMORY; vkd3d_shader_parser_init(&sm1->p, program, message_context, compile_info->source_name); @@ -1384,22 +1398,22 @@ bool hlsl_sm1_register_from_semantic(const struct vkd3d_shader_version *version, {"depth", true, VKD3D_SHADER_TYPE_PIXEL, 3, VKD3DSPR_DEPTHOUT}, {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, 3, VKD3DSPR_DEPTHOUT}, {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, 3, VKD3DSPR_COLOROUT}, - {"sv_position", false, VKD3D_SHADER_TYPE_PIXEL, 3, VKD3DSPR_MISCTYPE, D3DSMO_POSITION}, - {"vface", false, VKD3D_SHADER_TYPE_PIXEL, 3, VKD3DSPR_MISCTYPE, D3DSMO_FACE}, - {"vpos", false, VKD3D_SHADER_TYPE_PIXEL, 3, VKD3DSPR_MISCTYPE, D3DSMO_POSITION}, + {"sv_position", false, VKD3D_SHADER_TYPE_PIXEL, 3, VKD3DSPR_MISCTYPE, VKD3D_SM1_MISC_POSITION}, + {"vface", false, VKD3D_SHADER_TYPE_PIXEL, 3, VKD3DSPR_MISCTYPE, VKD3D_SM1_MISC_FACE}, + {"vpos", false, VKD3D_SHADER_TYPE_PIXEL, 3, VKD3DSPR_MISCTYPE, VKD3D_SM1_MISC_POSITION}, {"color", true, VKD3D_SHADER_TYPE_VERTEX, 1, VKD3DSPR_ATTROUT}, - {"fog", true, VKD3D_SHADER_TYPE_VERTEX, 1, VKD3DSPR_RASTOUT, D3DSRO_FOG}, - {"position", true, VKD3D_SHADER_TYPE_VERTEX, 1, VKD3DSPR_RASTOUT, D3DSRO_POSITION}, - {"psize", true, VKD3D_SHADER_TYPE_VERTEX, 1, VKD3DSPR_RASTOUT, D3DSRO_POINT_SIZE}, - {"sv_position", true, VKD3D_SHADER_TYPE_VERTEX, 1, VKD3DSPR_RASTOUT, D3DSRO_POSITION}, + {"fog", true, VKD3D_SHADER_TYPE_VERTEX, 1, VKD3DSPR_RASTOUT, VKD3D_SM1_RASTOUT_FOG}, + {"position", true, VKD3D_SHADER_TYPE_VERTEX, 1, VKD3DSPR_RASTOUT, VKD3D_SM1_RASTOUT_POSITION}, + {"psize", true, VKD3D_SHADER_TYPE_VERTEX, 1, VKD3DSPR_RASTOUT, VKD3D_SM1_RASTOUT_POINT_SIZE}, + {"sv_position", true, VKD3D_SHADER_TYPE_VERTEX, 1, VKD3DSPR_RASTOUT, VKD3D_SM1_RASTOUT_POSITION}, {"texcoord", true, VKD3D_SHADER_TYPE_VERTEX, 1, VKD3DSPR_TEXCRDOUT}, {"color", true, VKD3D_SHADER_TYPE_VERTEX, 2, VKD3DSPR_ATTROUT}, - {"fog", true, VKD3D_SHADER_TYPE_VERTEX, 2, VKD3DSPR_RASTOUT, D3DSRO_FOG}, - {"position", true, VKD3D_SHADER_TYPE_VERTEX, 2, VKD3DSPR_RASTOUT, D3DSRO_POSITION}, - {"psize", true, VKD3D_SHADER_TYPE_VERTEX, 2, VKD3DSPR_RASTOUT, D3DSRO_POINT_SIZE}, - {"sv_position", true, VKD3D_SHADER_TYPE_VERTEX, 2, VKD3DSPR_RASTOUT, D3DSRO_POSITION}, + {"fog", true, VKD3D_SHADER_TYPE_VERTEX, 2, VKD3DSPR_RASTOUT, VKD3D_SM1_RASTOUT_FOG}, + {"position", true, VKD3D_SHADER_TYPE_VERTEX, 2, VKD3DSPR_RASTOUT, VKD3D_SM1_RASTOUT_POSITION}, + {"psize", true, VKD3D_SHADER_TYPE_VERTEX, 2, VKD3DSPR_RASTOUT, VKD3D_SM1_RASTOUT_POINT_SIZE}, + {"sv_position", true, VKD3D_SHADER_TYPE_VERTEX, 2, VKD3DSPR_RASTOUT, VKD3D_SM1_RASTOUT_POSITION}, {"texcoord", true, VKD3D_SHADER_TYPE_VERTEX, 2, VKD3DSPR_TEXCRDOUT}, }; @@ -1423,32 +1437,32 @@ bool hlsl_sm1_register_from_semantic(const struct vkd3d_shader_version *version, } bool hlsl_sm1_usage_from_semantic(const char *semantic_name, - uint32_t semantic_index, D3DDECLUSAGE *usage, uint32_t *usage_idx) + uint32_t semantic_index, enum vkd3d_decl_usage *usage, uint32_t *usage_idx) { static const struct { const char *name; - D3DDECLUSAGE usage; + enum vkd3d_decl_usage usage; } semantics[] = { - {"binormal", D3DDECLUSAGE_BINORMAL}, - {"blendindices", D3DDECLUSAGE_BLENDINDICES}, - {"blendweight", D3DDECLUSAGE_BLENDWEIGHT}, - {"color", D3DDECLUSAGE_COLOR}, - {"depth", D3DDECLUSAGE_DEPTH}, - {"fog", D3DDECLUSAGE_FOG}, - {"normal", D3DDECLUSAGE_NORMAL}, - {"position", D3DDECLUSAGE_POSITION}, - {"positiont", D3DDECLUSAGE_POSITIONT}, - {"psize", D3DDECLUSAGE_PSIZE}, - {"sample", D3DDECLUSAGE_SAMPLE}, - {"sv_depth", D3DDECLUSAGE_DEPTH}, - {"sv_position", D3DDECLUSAGE_POSITION}, - {"sv_target", D3DDECLUSAGE_COLOR}, - {"tangent", D3DDECLUSAGE_TANGENT}, - {"tessfactor", D3DDECLUSAGE_TESSFACTOR}, - {"texcoord", D3DDECLUSAGE_TEXCOORD}, + {"binormal", VKD3D_DECL_USAGE_BINORMAL}, + {"blendindices", VKD3D_DECL_USAGE_BLEND_INDICES}, + {"blendweight", VKD3D_DECL_USAGE_BLEND_WEIGHT}, + {"color", VKD3D_DECL_USAGE_COLOR}, + {"depth", VKD3D_DECL_USAGE_DEPTH}, + {"fog", VKD3D_DECL_USAGE_FOG}, + {"normal", VKD3D_DECL_USAGE_NORMAL}, + {"position", VKD3D_DECL_USAGE_POSITION}, + {"positiont", VKD3D_DECL_USAGE_POSITIONT}, + {"psize", VKD3D_DECL_USAGE_PSIZE}, + {"sample", VKD3D_DECL_USAGE_SAMPLE}, + {"sv_depth", VKD3D_DECL_USAGE_DEPTH}, + {"sv_position", VKD3D_DECL_USAGE_POSITION}, + {"sv_target", VKD3D_DECL_USAGE_COLOR}, + {"tangent", VKD3D_DECL_USAGE_TANGENT}, + {"tessfactor", VKD3D_DECL_USAGE_TESS_FACTOR}, + {"texcoord", VKD3D_DECL_USAGE_TEXCOORD}, }; unsigned int i; @@ -1468,21 +1482,17 @@ bool hlsl_sm1_usage_from_semantic(const char *semantic_name, struct d3dbc_compiler { + const struct vkd3d_sm1_opcode_info *opcode_table; struct vsir_program *program; struct vkd3d_bytecode_buffer buffer; struct vkd3d_shader_message_context *message_context; - - /* OBJECTIVE: Store all the required information in the other fields so - * that this hlsl_ctx is no longer necessary. */ - struct hlsl_ctx *ctx; + bool failed; }; static uint32_t sm1_version(enum vkd3d_shader_type type, unsigned int major, unsigned int minor) { - if (type == VKD3D_SHADER_TYPE_VERTEX) - return D3DVS_VERSION(major, minor); - else - return D3DPS_VERSION(major, minor); + return vkd3d_make_u32(vkd3d_make_u16(minor, major), + type == VKD3D_SHADER_TYPE_VERTEX ? VKD3D_SM1_VS : VKD3D_SM1_PS); } D3DXPARAMETER_CLASS hlsl_sm1_class(const struct hlsl_type *type) @@ -1860,24 +1870,24 @@ void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buff set_u32(buffer, creator_offset, offset - ctab_start); ctab_end = bytecode_align(buffer); - set_u32(buffer, size_offset, vkd3d_make_u32(D3DSIO_COMMENT, (ctab_end - ctab_offset) / sizeof(uint32_t))); + set_u32(buffer, size_offset, vkd3d_make_u32(VKD3D_SM1_OP_COMMENT, (ctab_end - ctab_offset) / sizeof(uint32_t))); } static uint32_t sm1_encode_register_type(enum vkd3d_shader_register_type type) { - return ((type << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK) - | ((type << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2); + return ((type << VKD3D_SM1_REGISTER_TYPE_SHIFT) & VKD3D_SM1_REGISTER_TYPE_MASK) + | ((type << VKD3D_SM1_REGISTER_TYPE_SHIFT2) & VKD3D_SM1_REGISTER_TYPE_MASK2); } struct sm1_instruction { - D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode; + enum vkd3d_sm1_opcode opcode; unsigned int flags; struct sm1_dst_register { enum vkd3d_shader_register_type type; - D3DSHADER_PARAM_DSTMOD_TYPE mod; + enum vkd3d_shader_dst_modifier mod; unsigned int writemask; uint32_t reg; } dst; @@ -1885,7 +1895,7 @@ struct sm1_instruction struct sm1_src_register { enum vkd3d_shader_register_type type; - D3DSHADER_PARAM_SRCMOD_TYPE mod; + enum vkd3d_shader_src_modifier mod; unsigned int swizzle; uint32_t reg; } srcs[4]; @@ -1900,11 +1910,11 @@ static bool is_inconsequential_instr(const struct sm1_instruction *instr) const struct sm1_dst_register *dst = &instr->dst; unsigned int i; - if (instr->opcode != D3DSIO_MOV) + if (instr->opcode != VKD3D_SM1_OP_MOV) return false; - if (dst->mod != D3DSPDM_NONE) + if (dst->mod != VKD3DSPDM_NONE) return false; - if (src->mod != D3DSPSM_NONE) + if (src->mod != VKD3DSPSM_NONE) return false; if (src->type != dst->type) return false; @@ -1923,13 +1933,19 @@ static bool is_inconsequential_instr(const struct sm1_instruction *instr) static void write_sm1_dst_register(struct vkd3d_bytecode_buffer *buffer, const struct sm1_dst_register *reg) { VKD3D_ASSERT(reg->writemask); - put_u32(buffer, (1u << 31) | sm1_encode_register_type(reg->type) | reg->mod | (reg->writemask << 16) | reg->reg); + put_u32(buffer, VKD3D_SM1_INSTRUCTION_PARAMETER + | sm1_encode_register_type(reg->type) + | (reg->mod << VKD3D_SM1_DST_MODIFIER_SHIFT) + | (reg->writemask << VKD3D_SM1_WRITEMASK_SHIFT) | reg->reg); } static void write_sm1_src_register(struct vkd3d_bytecode_buffer *buffer, const struct sm1_src_register *reg) { - put_u32(buffer, (1u << 31) | sm1_encode_register_type(reg->type) | reg->mod | (reg->swizzle << 16) | reg->reg); + put_u32(buffer, VKD3D_SM1_INSTRUCTION_PARAMETER + | sm1_encode_register_type(reg->type) + | (reg->mod << VKD3D_SM1_SRC_MODIFIER_SHIFT) + | (reg->swizzle << VKD3D_SM1_SWIZZLE_SHIFT) | reg->reg); } static void d3dbc_write_instruction(struct d3dbc_compiler *d3dbc, const struct sm1_instruction *instr) @@ -1945,7 +1961,7 @@ static void d3dbc_write_instruction(struct d3dbc_compiler *d3dbc, const struct s token |= VKD3D_SM1_INSTRUCTION_FLAGS_MASK & (instr->flags << VKD3D_SM1_INSTRUCTION_FLAGS_SHIFT); if (version->major > 1) - token |= (instr->has_dst + instr->src_count) << D3DSI_INSTLENGTH_SHIFT; + token |= (instr->has_dst + instr->src_count) << VKD3D_SM1_INSTRUCTION_LENGTH_SHIFT; put_u32(buffer, token); if (instr->has_dst) @@ -1955,346 +1971,112 @@ static void d3dbc_write_instruction(struct d3dbc_compiler *d3dbc, const struct s write_sm1_src_register(buffer, &instr->srcs[i]); }; -static void sm1_map_src_swizzle(struct sm1_src_register *src, unsigned int map_writemask) -{ - src->swizzle = hlsl_map_swizzle(src->swizzle, map_writemask); -} - -static void d3dbc_write_dp2add(struct d3dbc_compiler *d3dbc, const struct hlsl_reg *dst, - const struct hlsl_reg *src1, const struct hlsl_reg *src2, const struct hlsl_reg *src3) +static const struct vkd3d_sm1_opcode_info *shader_sm1_get_opcode_info_from_vsir( + struct d3dbc_compiler *d3dbc, enum vkd3d_shader_opcode vkd3d_opcode) { - struct sm1_instruction instr = - { - .opcode = D3DSIO_DP2ADD, - - .dst.type = VKD3DSPR_TEMP, - .dst.writemask = dst->writemask, - .dst.reg = dst->id, - .has_dst = 1, - - .srcs[0].type = VKD3DSPR_TEMP, - .srcs[0].swizzle = hlsl_swizzle_from_writemask(src1->writemask), - .srcs[0].reg = src1->id, - .srcs[1].type = VKD3DSPR_TEMP, - .srcs[1].swizzle = hlsl_swizzle_from_writemask(src2->writemask), - .srcs[1].reg = src2->id, - .srcs[2].type = VKD3DSPR_TEMP, - .srcs[2].swizzle = hlsl_swizzle_from_writemask(src3->writemask), - .srcs[2].reg = src3->id, - .src_count = 3, - }; - - d3dbc_write_instruction(d3dbc, &instr); -} - -static void d3dbc_write_ternary_op(struct d3dbc_compiler *d3dbc, - D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, const struct hlsl_reg *dst, - const struct hlsl_reg *src1, const struct hlsl_reg *src2, const struct hlsl_reg *src3) -{ - struct sm1_instruction instr = - { - .opcode = opcode, - - .dst.type = VKD3DSPR_TEMP, - .dst.writemask = dst->writemask, - .dst.reg = dst->id, - .has_dst = 1, - - .srcs[0].type = VKD3DSPR_TEMP, - .srcs[0].swizzle = hlsl_swizzle_from_writemask(src1->writemask), - .srcs[0].reg = src1->id, - .srcs[1].type = VKD3DSPR_TEMP, - .srcs[1].swizzle = hlsl_swizzle_from_writemask(src2->writemask), - .srcs[1].reg = src2->id, - .srcs[2].type = VKD3DSPR_TEMP, - .srcs[2].swizzle = hlsl_swizzle_from_writemask(src3->writemask), - .srcs[2].reg = src3->id, - .src_count = 3, - }; - - sm1_map_src_swizzle(&instr.srcs[0], instr.dst.writemask); - sm1_map_src_swizzle(&instr.srcs[1], instr.dst.writemask); - sm1_map_src_swizzle(&instr.srcs[2], instr.dst.writemask); - d3dbc_write_instruction(d3dbc, &instr); -} - -static void d3dbc_write_binary_op(struct d3dbc_compiler *d3dbc, D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, - const struct hlsl_reg *dst, const struct hlsl_reg *src1, const struct hlsl_reg *src2) -{ - struct sm1_instruction instr = - { - .opcode = opcode, - - .dst.type = VKD3DSPR_TEMP, - .dst.writemask = dst->writemask, - .dst.reg = dst->id, - .has_dst = 1, - - .srcs[0].type = VKD3DSPR_TEMP, - .srcs[0].swizzle = hlsl_swizzle_from_writemask(src1->writemask), - .srcs[0].reg = src1->id, - .srcs[1].type = VKD3DSPR_TEMP, - .srcs[1].swizzle = hlsl_swizzle_from_writemask(src2->writemask), - .srcs[1].reg = src2->id, - .src_count = 2, - }; - - sm1_map_src_swizzle(&instr.srcs[0], instr.dst.writemask); - sm1_map_src_swizzle(&instr.srcs[1], instr.dst.writemask); - d3dbc_write_instruction(d3dbc, &instr); -} + const struct vkd3d_shader_version *version = &d3dbc->program->shader_version; + const struct vkd3d_sm1_opcode_info *info; + unsigned int i = 0; -static void d3dbc_write_dot(struct d3dbc_compiler *d3dbc, D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, - const struct hlsl_reg *dst, const struct hlsl_reg *src1, const struct hlsl_reg *src2) -{ - struct sm1_instruction instr = + for (;;) { - .opcode = opcode, - - .dst.type = VKD3DSPR_TEMP, - .dst.writemask = dst->writemask, - .dst.reg = dst->id, - .has_dst = 1, - - .srcs[0].type = VKD3DSPR_TEMP, - .srcs[0].swizzle = hlsl_swizzle_from_writemask(src1->writemask), - .srcs[0].reg = src1->id, - .srcs[1].type = VKD3DSPR_TEMP, - .srcs[1].swizzle = hlsl_swizzle_from_writemask(src2->writemask), - .srcs[1].reg = src2->id, - .src_count = 2, - }; + info = &d3dbc->opcode_table[i++]; + if (info->vkd3d_opcode == VKD3DSIH_INVALID) + return NULL; - d3dbc_write_instruction(d3dbc, &instr); + if (vkd3d_opcode == info->vkd3d_opcode + && vkd3d_shader_ver_ge(version, info->min_version.major, info->min_version.minor) + && (vkd3d_shader_ver_le(version, info->max_version.major, info->max_version.minor) + || !info->max_version.major)) + return info; + } } -static void d3dbc_write_unary_op(struct d3dbc_compiler *d3dbc, D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, - const struct hlsl_reg *dst, const struct hlsl_reg *src, - D3DSHADER_PARAM_SRCMOD_TYPE src_mod, D3DSHADER_PARAM_DSTMOD_TYPE dst_mod) +static uint32_t swizzle_from_vsir(uint32_t swizzle) { - struct sm1_instruction instr = - { - .opcode = opcode, - - .dst.type = VKD3DSPR_TEMP, - .dst.mod = dst_mod, - .dst.writemask = dst->writemask, - .dst.reg = dst->id, - .has_dst = 1, + uint32_t x = vsir_swizzle_get_component(swizzle, 0); + uint32_t y = vsir_swizzle_get_component(swizzle, 1); + uint32_t z = vsir_swizzle_get_component(swizzle, 2); + uint32_t w = vsir_swizzle_get_component(swizzle, 3); - .srcs[0].type = VKD3DSPR_TEMP, - .srcs[0].swizzle = hlsl_swizzle_from_writemask(src->writemask), - .srcs[0].reg = src->id, - .srcs[0].mod = src_mod, - .src_count = 1, - }; + if (x & ~0x3u || y & ~0x3u || z & ~0x3u || w & ~0x3u) + ERR("Unexpected vsir swizzle: 0x%08x.\n", swizzle); - sm1_map_src_swizzle(&instr.srcs[0], instr.dst.writemask); - d3dbc_write_instruction(d3dbc, &instr); + return ((x & 0x3u) << VKD3D_SM1_SWIZZLE_COMPONENT_SHIFT(0)) + | ((y & 0x3) << VKD3D_SM1_SWIZZLE_COMPONENT_SHIFT(1)) + | ((z & 0x3) << VKD3D_SM1_SWIZZLE_COMPONENT_SHIFT(2)) + | ((w & 0x3) << VKD3D_SM1_SWIZZLE_COMPONENT_SHIFT(3)); } -static void d3dbc_write_cast(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr) +static void sm1_src_reg_from_vsir(struct d3dbc_compiler *d3dbc, const struct vkd3d_shader_src_param *param, + struct sm1_src_register *src, const struct vkd3d_shader_location *loc) { - struct hlsl_ir_expr *expr = hlsl_ir_expr(instr); - const struct hlsl_ir_node *arg1 = expr->operands[0].node; - const struct hlsl_type *dst_type = expr->node.data_type; - const struct hlsl_type *src_type = arg1->data_type; - struct hlsl_ctx *ctx = d3dbc->ctx; + src->mod = param->modifiers; + src->reg = param->reg.idx[0].offset; + src->type = param->reg.type; + src->swizzle = swizzle_from_vsir(param->swizzle); - /* Narrowing casts were already lowered. */ - VKD3D_ASSERT(src_type->dimx == dst_type->dimx); - - switch (dst_type->e.numeric.type) + if (param->reg.idx[0].rel_addr) { - case HLSL_TYPE_HALF: - case HLSL_TYPE_FLOAT: - switch (src_type->e.numeric.type) - { - case HLSL_TYPE_INT: - case HLSL_TYPE_UINT: - case HLSL_TYPE_BOOL: - /* Integrals are internally represented as floats, so no change is necessary.*/ - case HLSL_TYPE_HALF: - case HLSL_TYPE_FLOAT: - d3dbc_write_unary_op(d3dbc, D3DSIO_MOV, &instr->reg, &arg1->reg, 0, 0); - break; - - case HLSL_TYPE_DOUBLE: - hlsl_fixme(ctx, &instr->loc, "SM1 cast from double to float."); - break; - - default: - vkd3d_unreachable(); - } - break; - - case HLSL_TYPE_INT: - case HLSL_TYPE_UINT: - switch(src_type->e.numeric.type) - { - case HLSL_TYPE_HALF: - case HLSL_TYPE_FLOAT: - /* A compilation pass turns these into FLOOR+REINTERPRET, so we should not - * reach this case unless we are missing something. */ - hlsl_fixme(ctx, &instr->loc, "Unlowered SM1 cast from float to integer."); - break; - case HLSL_TYPE_INT: - case HLSL_TYPE_UINT: - d3dbc_write_unary_op(d3dbc, D3DSIO_MOV, &instr->reg, &arg1->reg, 0, 0); - break; - - case HLSL_TYPE_BOOL: - hlsl_fixme(ctx, &instr->loc, "SM1 cast from bool to integer."); - break; - - case HLSL_TYPE_DOUBLE: - hlsl_fixme(ctx, &instr->loc, "SM1 cast from double to integer."); - break; - - default: - vkd3d_unreachable(); - } - break; - - case HLSL_TYPE_DOUBLE: - hlsl_fixme(ctx, &instr->loc, "SM1 cast to double."); - break; - - case HLSL_TYPE_BOOL: - /* Casts to bool should have already been lowered. */ - default: - hlsl_fixme(ctx, &expr->node.loc, "SM1 cast from %s to %s.", - debug_hlsl_type(ctx, src_type), debug_hlsl_type(ctx, dst_type)); - break; + vkd3d_shader_error(d3dbc->message_context, loc, VKD3D_SHADER_ERROR_D3DBC_NOT_IMPLEMENTED, + "Unhandled relative addressing on source register."); + d3dbc->failed = true; } } -static void d3dbc_write_constant_defs(struct d3dbc_compiler *d3dbc) +static void sm1_dst_reg_from_vsir(struct d3dbc_compiler *d3dbc, const struct vkd3d_shader_dst_param *param, + struct sm1_dst_register *dst, const struct vkd3d_shader_location *loc) { - const struct vkd3d_shader_version *version = &d3dbc->program->shader_version; - struct vkd3d_bytecode_buffer *buffer = &d3dbc->buffer; - struct hlsl_ctx *ctx = d3dbc->ctx; - unsigned int i, x; + dst->mod = param->modifiers; + dst->reg = param->reg.idx[0].offset; + dst->type = param->reg.type; + dst->writemask = param->write_mask; - for (i = 0; i < ctx->constant_defs.count; ++i) + if (param->reg.idx[0].rel_addr) { - const struct hlsl_constant_register *constant_reg = &ctx->constant_defs.regs[i]; - uint32_t token = D3DSIO_DEF; - const struct sm1_dst_register reg = - { - .type = VKD3DSPR_CONST, - .writemask = VKD3DSP_WRITEMASK_ALL, - .reg = constant_reg->index, - }; - - if (version->major > 1) - token |= 5 << D3DSI_INSTLENGTH_SHIFT; - put_u32(buffer, token); - - write_sm1_dst_register(buffer, ®); - for (x = 0; x < 4; ++x) - put_f32(buffer, constant_reg->value.f[x]); + vkd3d_shader_error(d3dbc->message_context, loc, VKD3D_SHADER_ERROR_D3DBC_NOT_IMPLEMENTED, + "Unhandled relative addressing on destination register."); + d3dbc->failed = true; } } -static void d3dbc_write_semantic_dcl(struct d3dbc_compiler *d3dbc, - const struct signature_element *element, bool output) +static void d3dbc_write_vsir_def(struct d3dbc_compiler *d3dbc, const struct vkd3d_shader_instruction *ins) { const struct vkd3d_shader_version *version = &d3dbc->program->shader_version; struct vkd3d_bytecode_buffer *buffer = &d3dbc->buffer; - struct sm1_dst_register reg = {0}; - uint32_t token, usage_idx; - D3DDECLUSAGE usage; - bool ret; + uint32_t token; - if (hlsl_sm1_register_from_semantic(version, element->semantic_name, - element->semantic_index, output, ®.type, ®.reg)) - { - usage = 0; - usage_idx = 0; - } - else + const struct sm1_dst_register reg = { - ret = hlsl_sm1_usage_from_semantic(element->semantic_name, element->semantic_index, &usage, &usage_idx); - VKD3D_ASSERT(ret); - reg.type = output ? VKD3DSPR_OUTPUT : VKD3DSPR_INPUT; - reg.reg = element->register_index; - } + .type = VKD3DSPR_CONST, + .writemask = VKD3DSP_WRITEMASK_ALL, + .reg = ins->dst[0].reg.idx[0].offset, + }; - token = D3DSIO_DCL; + token = VKD3D_SM1_OP_DEF; if (version->major > 1) - token |= 2 << D3DSI_INSTLENGTH_SHIFT; + token |= 5 << VKD3D_SM1_INSTRUCTION_LENGTH_SHIFT; put_u32(buffer, token); - token = (1u << 31); - token |= usage << D3DSP_DCL_USAGE_SHIFT; - token |= usage_idx << D3DSP_DCL_USAGEINDEX_SHIFT; - put_u32(buffer, token); - - reg.writemask = element->mask; write_sm1_dst_register(buffer, ®); + for (unsigned int x = 0; x < 4; ++x) + put_f32(buffer, ins->src[0].reg.u.immconst_f32[x]); } -static void d3dbc_write_semantic_dcls(struct d3dbc_compiler *d3dbc) -{ - struct vsir_program *program = d3dbc->program; - const struct vkd3d_shader_version *version; - bool write_in = false, write_out = false; - - version = &program->shader_version; - if (version->type == VKD3D_SHADER_TYPE_PIXEL && version->major >= 2) - write_in = true; - else if (version->type == VKD3D_SHADER_TYPE_VERTEX && version->major == 3) - write_in = write_out = true; - else if (version->type == VKD3D_SHADER_TYPE_VERTEX && version->major < 3) - write_in = true; - - if (write_in) - { - for (unsigned int i = 0; i < program->input_signature.element_count; ++i) - d3dbc_write_semantic_dcl(d3dbc, &program->input_signature.elements[i], false); - } - - if (write_out) - { - for (unsigned int i = 0; i < program->output_signature.element_count; ++i) - d3dbc_write_semantic_dcl(d3dbc, &program->output_signature.elements[i], true); - } -} - -static void d3dbc_write_sampler_dcl(struct d3dbc_compiler *d3dbc, - unsigned int reg_id, enum hlsl_sampler_dim sampler_dim) +static void d3dbc_write_vsir_sampler_dcl(struct d3dbc_compiler *d3dbc, + unsigned int reg_id, enum vkd3d_sm1_resource_type res_type) { const struct vkd3d_shader_version *version = &d3dbc->program->shader_version; struct vkd3d_bytecode_buffer *buffer = &d3dbc->buffer; struct sm1_dst_register reg = {0}; - uint32_t token, res_type = 0; + uint32_t token; - token = D3DSIO_DCL; + token = VKD3D_SM1_OP_DCL; if (version->major > 1) - token |= 2 << D3DSI_INSTLENGTH_SHIFT; + token |= 2 << VKD3D_SM1_INSTRUCTION_LENGTH_SHIFT; put_u32(buffer, token); - switch (sampler_dim) - { - case HLSL_SAMPLER_DIM_2D: - res_type = VKD3D_SM1_RESOURCE_TEXTURE_2D; - break; - - case HLSL_SAMPLER_DIM_CUBE: - res_type = VKD3D_SM1_RESOURCE_TEXTURE_CUBE; - break; - - case HLSL_SAMPLER_DIM_3D: - res_type = VKD3D_SM1_RESOURCE_TEXTURE_3D; - break; - - default: - vkd3d_unreachable(); - break; - } - - token = (1u << 31); + token = VKD3D_SM1_INSTRUCTION_PARAMETER; token |= res_type << VKD3D_SM1_RESOURCE_TYPE_SHIFT; put_u32(buffer, token); @@ -2305,618 +2087,283 @@ static void d3dbc_write_sampler_dcl(struct d3dbc_compiler *d3dbc, write_sm1_dst_register(buffer, ®); } -static void d3dbc_write_sampler_dcls(struct d3dbc_compiler *d3dbc) +static void d3dbc_write_vsir_dcl(struct d3dbc_compiler *d3dbc, const struct vkd3d_shader_instruction *ins) { const struct vkd3d_shader_version *version = &d3dbc->program->shader_version; - struct hlsl_ctx *ctx = d3dbc->ctx; - enum hlsl_sampler_dim sampler_dim; - unsigned int i, count, reg_id; - struct hlsl_ir_var *var; + const struct vkd3d_shader_semantic *semantic = &ins->declaration.semantic; + unsigned int reg_id; if (version->major < 2) return; - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) - { - if (!var->regs[HLSL_REGSET_SAMPLERS].allocated) - continue; - - count = var->bind_count[HLSL_REGSET_SAMPLERS]; + reg_id = semantic->resource.reg.reg.idx[0].offset; - for (i = 0; i < count; ++i) - { - if (var->objects_usage[HLSL_REGSET_SAMPLERS][i].used) - { - sampler_dim = var->objects_usage[HLSL_REGSET_SAMPLERS][i].sampler_dim; - if (sampler_dim == HLSL_SAMPLER_DIM_GENERIC) - { - /* These can appear in sm4-style combined sample instructions. */ - hlsl_fixme(ctx, &var->loc, "Generic samplers need to be lowered."); - continue; - } - - reg_id = var->regs[HLSL_REGSET_SAMPLERS].index + i; - d3dbc_write_sampler_dcl(d3dbc, reg_id, sampler_dim); - } - } - } -} - -static void d3dbc_write_constant(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr) -{ - const struct hlsl_ir_constant *constant = hlsl_ir_constant(instr); - struct sm1_instruction sm1_instr = + if (semantic->resource.reg.reg.type != VKD3DSPR_SAMPLER) { - .opcode = D3DSIO_MOV, - - .dst.type = VKD3DSPR_TEMP, - .dst.reg = instr->reg.id, - .dst.writemask = instr->reg.writemask, - .has_dst = 1, - - .srcs[0].type = VKD3DSPR_CONST, - .srcs[0].reg = constant->reg.id, - .srcs[0].swizzle = hlsl_swizzle_from_writemask(constant->reg.writemask), - .src_count = 1, - }; - - VKD3D_ASSERT(instr->reg.allocated); - VKD3D_ASSERT(constant->reg.allocated); - sm1_map_src_swizzle(&sm1_instr.srcs[0], sm1_instr.dst.writemask); - d3dbc_write_instruction(d3dbc, &sm1_instr); -} - -static void d3dbc_write_per_component_unary_op(struct d3dbc_compiler *d3dbc, - const struct hlsl_ir_node *instr, D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode) -{ - struct hlsl_ir_expr *expr = hlsl_ir_expr(instr); - struct hlsl_ir_node *arg1 = expr->operands[0].node; - unsigned int i; - - for (i = 0; i < instr->data_type->dimx; ++i) - { - struct hlsl_reg src = arg1->reg, dst = instr->reg; - - src.writemask = hlsl_combine_writemasks(src.writemask, 1u << i); - dst.writemask = hlsl_combine_writemasks(dst.writemask, 1u << i); - d3dbc_write_unary_op(d3dbc, opcode, &dst, &src, 0, 0); - } -} - -static void d3dbc_write_sincos(struct d3dbc_compiler *d3dbc, enum hlsl_ir_expr_op op, - const struct hlsl_reg *dst, const struct hlsl_reg *src) -{ - struct sm1_instruction instr = - { - .opcode = D3DSIO_SINCOS, - - .dst.type = VKD3DSPR_TEMP, - .dst.writemask = dst->writemask, - .dst.reg = dst->id, - .has_dst = 1, - - .srcs[0].type = VKD3DSPR_TEMP, - .srcs[0].swizzle = hlsl_swizzle_from_writemask(src->writemask), - .srcs[0].reg = src->id, - .src_count = 1, - }; - - if (op == HLSL_OP1_COS_REDUCED) - VKD3D_ASSERT(dst->writemask == VKD3DSP_WRITEMASK_0); - else /* HLSL_OP1_SIN_REDUCED */ - VKD3D_ASSERT(dst->writemask == VKD3DSP_WRITEMASK_1); - - if (d3dbc->ctx->profile->major_version < 3) - { - instr.src_count = 3; - - instr.srcs[1].type = VKD3DSPR_CONST; - instr.srcs[1].swizzle = hlsl_swizzle_from_writemask(VKD3DSP_WRITEMASK_ALL); - instr.srcs[1].reg = d3dbc->ctx->d3dsincosconst1.id; - - instr.srcs[2].type = VKD3DSPR_CONST; - instr.srcs[2].swizzle = hlsl_swizzle_from_writemask(VKD3DSP_WRITEMASK_ALL); - instr.srcs[2].reg = d3dbc->ctx->d3dsincosconst2.id; - } - - d3dbc_write_instruction(d3dbc, &instr); -} - -static void d3dbc_write_expr(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr) -{ - const struct vkd3d_shader_version *version = &d3dbc->program->shader_version; - struct hlsl_ir_expr *expr = hlsl_ir_expr(instr); - struct hlsl_ir_node *arg1 = expr->operands[0].node; - struct hlsl_ir_node *arg2 = expr->operands[1].node; - struct hlsl_ir_node *arg3 = expr->operands[2].node; - struct hlsl_ctx *ctx = d3dbc->ctx; - - VKD3D_ASSERT(instr->reg.allocated); - - if (expr->op == HLSL_OP1_REINTERPRET) - { - d3dbc_write_unary_op(d3dbc, D3DSIO_MOV, &instr->reg, &arg1->reg, 0, 0); - return; - } - - if (expr->op == HLSL_OP1_CAST) - { - d3dbc_write_cast(d3dbc, instr); + vkd3d_shader_error(d3dbc->message_context, &ins->location, VKD3D_SHADER_ERROR_D3DBC_INVALID_REGISTER_TYPE, + "dcl instruction with register type %u.", semantic->resource.reg.reg.type); + d3dbc->failed = true; return; } - if (instr->data_type->e.numeric.type != HLSL_TYPE_FLOAT) + switch (semantic->resource_type) { - /* These need to be lowered. */ - hlsl_fixme(ctx, &instr->loc, "SM1 non-float expression."); - return; - } - - switch (expr->op) - { - case HLSL_OP1_ABS: - d3dbc_write_unary_op(d3dbc, D3DSIO_ABS, &instr->reg, &arg1->reg, 0, 0); - break; - - case HLSL_OP1_DSX: - d3dbc_write_unary_op(d3dbc, D3DSIO_DSX, &instr->reg, &arg1->reg, 0, 0); - break; - - case HLSL_OP1_DSY: - d3dbc_write_unary_op(d3dbc, D3DSIO_DSY, &instr->reg, &arg1->reg, 0, 0); - break; - - case HLSL_OP1_EXP2: - d3dbc_write_per_component_unary_op(d3dbc, instr, D3DSIO_EXP); - break; - - case HLSL_OP1_LOG2: - d3dbc_write_per_component_unary_op(d3dbc, instr, D3DSIO_LOG); - break; - - case HLSL_OP1_NEG: - d3dbc_write_unary_op(d3dbc, D3DSIO_MOV, &instr->reg, &arg1->reg, D3DSPSM_NEG, 0); - break; - - case HLSL_OP1_SAT: - d3dbc_write_unary_op(d3dbc, D3DSIO_MOV, &instr->reg, &arg1->reg, 0, D3DSPDM_SATURATE); - break; - - case HLSL_OP1_RCP: - d3dbc_write_per_component_unary_op(d3dbc, instr, D3DSIO_RCP); - break; - - case HLSL_OP1_RSQ: - d3dbc_write_per_component_unary_op(d3dbc, instr, D3DSIO_RSQ); - break; - - case HLSL_OP1_COS_REDUCED: - case HLSL_OP1_SIN_REDUCED: - d3dbc_write_sincos(d3dbc, expr->op, &instr->reg, &arg1->reg); - break; - - case HLSL_OP2_ADD: - d3dbc_write_binary_op(d3dbc, D3DSIO_ADD, &instr->reg, &arg1->reg, &arg2->reg); - break; - - case HLSL_OP2_MAX: - d3dbc_write_binary_op(d3dbc, D3DSIO_MAX, &instr->reg, &arg1->reg, &arg2->reg); - break; - - case HLSL_OP2_MIN: - d3dbc_write_binary_op(d3dbc, D3DSIO_MIN, &instr->reg, &arg1->reg, &arg2->reg); - break; - - case HLSL_OP2_MUL: - d3dbc_write_binary_op(d3dbc, D3DSIO_MUL, &instr->reg, &arg1->reg, &arg2->reg); - break; - - case HLSL_OP1_FRACT: - d3dbc_write_unary_op(d3dbc, D3DSIO_FRC, &instr->reg, &arg1->reg, D3DSPSM_NONE, 0); - break; - - case HLSL_OP2_DOT: - switch (arg1->data_type->dimx) - { - case 4: - d3dbc_write_dot(d3dbc, D3DSIO_DP4, &instr->reg, &arg1->reg, &arg2->reg); - break; - - case 3: - d3dbc_write_dot(d3dbc, D3DSIO_DP3, &instr->reg, &arg1->reg, &arg2->reg); - break; - - default: - vkd3d_unreachable(); - } + case VKD3D_SHADER_RESOURCE_TEXTURE_2D: + d3dbc_write_vsir_sampler_dcl(d3dbc, reg_id, VKD3D_SM1_RESOURCE_TEXTURE_2D); break; - case HLSL_OP2_LOGIC_AND: - d3dbc_write_binary_op(d3dbc, D3DSIO_MIN, &instr->reg, &arg1->reg, &arg2->reg); + case VKD3D_SHADER_RESOURCE_TEXTURE_CUBE: + d3dbc_write_vsir_sampler_dcl(d3dbc, reg_id, VKD3D_SM1_RESOURCE_TEXTURE_CUBE); break; - case HLSL_OP2_LOGIC_OR: - d3dbc_write_binary_op(d3dbc, D3DSIO_MAX, &instr->reg, &arg1->reg, &arg2->reg); - break; - - case HLSL_OP2_SLT: - if (version->type == VKD3D_SHADER_TYPE_PIXEL) - hlsl_fixme(ctx, &instr->loc, "Lower SLT instructions for pixel shaders."); - d3dbc_write_binary_op(d3dbc, D3DSIO_SLT, &instr->reg, &arg1->reg, &arg2->reg); - break; - - case HLSL_OP3_CMP: - if (version->type == VKD3D_SHADER_TYPE_VERTEX) - hlsl_fixme(ctx, &instr->loc, "Lower CMP instructions for vertex shaders."); - d3dbc_write_ternary_op(d3dbc, D3DSIO_CMP, &instr->reg, &arg1->reg, &arg2->reg, &arg3->reg); - break; - - case HLSL_OP3_DP2ADD: - d3dbc_write_dp2add(d3dbc, &instr->reg, &arg1->reg, &arg2->reg, &arg3->reg); - break; - - case HLSL_OP3_MAD: - d3dbc_write_ternary_op(d3dbc, D3DSIO_MAD, &instr->reg, &arg1->reg, &arg2->reg, &arg3->reg); + case VKD3D_SHADER_RESOURCE_TEXTURE_3D: + d3dbc_write_vsir_sampler_dcl(d3dbc, reg_id, VKD3D_SM1_RESOURCE_TEXTURE_3D); break; default: - hlsl_fixme(ctx, &instr->loc, "SM1 \"%s\" expression.", debug_hlsl_expr_op(expr->op)); - break; + vkd3d_shader_error(d3dbc->message_context, &ins->location, VKD3D_SHADER_ERROR_D3DBC_INVALID_RESOURCE_TYPE, + "dcl instruction with resource_type %u.", semantic->resource_type); + d3dbc->failed = true; + return; } } -static void d3dbc_write_block(struct d3dbc_compiler *d3dbc, const struct hlsl_block *block); - -static void d3dbc_write_if(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr) +static const struct vkd3d_sm1_opcode_info *shader_sm1_get_opcode_info_from_vsir_instruction( + struct d3dbc_compiler *d3dbc, const struct vkd3d_shader_instruction *ins) { - const struct hlsl_ir_if *iff = hlsl_ir_if(instr); - const struct hlsl_ir_node *condition; - struct sm1_instruction sm1_ifc, sm1_else, sm1_endif; - - condition = iff->condition.node; - VKD3D_ASSERT(condition->data_type->dimx == 1 && condition->data_type->dimy == 1); - - sm1_ifc = (struct sm1_instruction) - { - .opcode = D3DSIO_IFC, - .flags = VKD3D_SHADER_REL_OP_NE, /* Make it a "if_ne" instruction. */ - - .srcs[0].type = VKD3DSPR_TEMP, - .srcs[0].swizzle = hlsl_swizzle_from_writemask(condition->reg.writemask), - .srcs[0].reg = condition->reg.id, - .srcs[0].mod = 0, - - .srcs[1].type = VKD3DSPR_TEMP, - .srcs[1].swizzle = hlsl_swizzle_from_writemask(condition->reg.writemask), - .srcs[1].reg = condition->reg.id, - .srcs[1].mod = D3DSPSM_NEG, - - .src_count = 2, - }; - d3dbc_write_instruction(d3dbc, &sm1_ifc); - d3dbc_write_block(d3dbc, &iff->then_block); + const struct vkd3d_sm1_opcode_info *info; - if (!list_empty(&iff->else_block.instrs)) + if (!(info = shader_sm1_get_opcode_info_from_vsir(d3dbc, ins->opcode))) { - sm1_else = (struct sm1_instruction){.opcode = D3DSIO_ELSE}; - d3dbc_write_instruction(d3dbc, &sm1_else); - d3dbc_write_block(d3dbc, &iff->else_block); + vkd3d_shader_error(d3dbc->message_context, &ins->location, VKD3D_SHADER_ERROR_D3DBC_INVALID_OPCODE, + "Opcode %#x not supported for shader profile.", ins->opcode); + d3dbc->failed = true; + return NULL; } - sm1_endif = (struct sm1_instruction){.opcode = D3DSIO_ENDIF}; - d3dbc_write_instruction(d3dbc, &sm1_endif); -} - -static void d3dbc_write_jump(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr) -{ - const struct hlsl_ir_jump *jump = hlsl_ir_jump(instr); - - switch (jump->type) + if (ins->dst_count != info->dst_count) { - case HLSL_IR_JUMP_DISCARD_NEG: - { - struct hlsl_reg *reg = &jump->condition.node->reg; - - struct sm1_instruction sm1_instr = - { - .opcode = D3DSIO_TEXKILL, - - .dst.type = VKD3DSPR_TEMP, - .dst.reg = reg->id, - .dst.writemask = reg->writemask, - .has_dst = 1, - }; - - d3dbc_write_instruction(d3dbc, &sm1_instr); - break; - } - - default: - hlsl_fixme(d3dbc->ctx, &jump->node.loc, "Jump type %s.", hlsl_jump_type_to_string(jump->type)); + vkd3d_shader_error(d3dbc->message_context, &ins->location, VKD3D_SHADER_ERROR_D3DBC_INVALID_REGISTER_COUNT, + "Invalid destination count %u for vsir instruction %#x (expected %u).", + ins->dst_count, ins->opcode, info->dst_count); + d3dbc->failed = true; + return NULL; } -} - -static void d3dbc_write_load(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr) -{ - const struct hlsl_ir_load *load = hlsl_ir_load(instr); - struct hlsl_ctx *ctx = d3dbc->ctx; - const struct hlsl_reg reg = hlsl_reg_from_deref(ctx, &load->src); - struct sm1_instruction sm1_instr = - { - .opcode = D3DSIO_MOV, - - .dst.type = VKD3DSPR_TEMP, - .dst.reg = instr->reg.id, - .dst.writemask = instr->reg.writemask, - .has_dst = 1, - - .srcs[0].type = VKD3DSPR_TEMP, - .srcs[0].reg = reg.id, - .srcs[0].swizzle = hlsl_swizzle_from_writemask(reg.writemask), - .src_count = 1, - }; - - VKD3D_ASSERT(instr->reg.allocated); - - if (load->src.var->is_uniform) + if (ins->src_count != info->src_count) { - VKD3D_ASSERT(reg.allocated); - sm1_instr.srcs[0].type = VKD3DSPR_CONST; - } - else if (load->src.var->is_input_semantic) - { - if (!hlsl_sm1_register_from_semantic(&d3dbc->program->shader_version, load->src.var->semantic.name, - load->src.var->semantic.index, false, &sm1_instr.srcs[0].type, &sm1_instr.srcs[0].reg)) - { - VKD3D_ASSERT(reg.allocated); - sm1_instr.srcs[0].type = VKD3DSPR_INPUT; - sm1_instr.srcs[0].reg = reg.id; - } - else - sm1_instr.srcs[0].swizzle = hlsl_swizzle_from_writemask((1 << load->src.var->data_type->dimx) - 1); + vkd3d_shader_error(d3dbc->message_context, &ins->location, VKD3D_SHADER_ERROR_D3DBC_INVALID_REGISTER_COUNT, + "Invalid source count %u for vsir instruction %#x (expected %u).", + ins->src_count, ins->opcode, info->src_count); + d3dbc->failed = true; + return NULL; } - sm1_map_src_swizzle(&sm1_instr.srcs[0], sm1_instr.dst.writemask); - d3dbc_write_instruction(d3dbc, &sm1_instr); + return info; } -static void d3dbc_write_resource_load(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr) +static void d3dbc_write_vsir_simple_instruction(struct d3dbc_compiler *d3dbc, + const struct vkd3d_shader_instruction *ins) { - const struct hlsl_ir_resource_load *load = hlsl_ir_resource_load(instr); - struct hlsl_ir_node *coords = load->coords.node; - struct hlsl_ir_node *ddx = load->ddx.node; - struct hlsl_ir_node *ddy = load->ddy.node; - unsigned int sampler_offset, reg_id; - struct hlsl_ctx *ctx = d3dbc->ctx; - struct sm1_instruction sm1_instr; + struct sm1_instruction instr = {0}; + const struct vkd3d_sm1_opcode_info *info; - sampler_offset = hlsl_offset_from_deref_safe(ctx, &load->resource); - reg_id = load->resource.var->regs[HLSL_REGSET_SAMPLERS].index + sampler_offset; + if (!(info = shader_sm1_get_opcode_info_from_vsir_instruction(d3dbc, ins))) + return; - sm1_instr = (struct sm1_instruction) - { - .dst.type = VKD3DSPR_TEMP, - .dst.reg = instr->reg.id, - .dst.writemask = instr->reg.writemask, - .has_dst = 1, + instr.opcode = info->sm1_opcode; + instr.flags = ins->flags; + instr.has_dst = info->dst_count; + instr.src_count = info->src_count; - .srcs[0].type = VKD3DSPR_TEMP, - .srcs[0].reg = coords->reg.id, - .srcs[0].swizzle = hlsl_swizzle_from_writemask(coords->reg.writemask), + if (instr.has_dst) + sm1_dst_reg_from_vsir(d3dbc, &ins->dst[0], &instr.dst, &ins->location); + for (unsigned int i = 0; i < instr.src_count; ++i) + sm1_src_reg_from_vsir(d3dbc, &ins->src[i], &instr.srcs[i], &ins->location); - .srcs[1].type = VKD3DSPR_COMBINED_SAMPLER, - .srcs[1].reg = reg_id, - .srcs[1].swizzle = hlsl_swizzle_from_writemask(VKD3DSP_WRITEMASK_ALL), + d3dbc_write_instruction(d3dbc, &instr); +} - .src_count = 2, - }; +static void d3dbc_write_vsir_instruction(struct d3dbc_compiler *d3dbc, const struct vkd3d_shader_instruction *ins) +{ + uint32_t writemask; - switch (load->load_type) + switch (ins->opcode) { - case HLSL_RESOURCE_SAMPLE: - sm1_instr.opcode = D3DSIO_TEX; + case VKD3DSIH_DEF: + d3dbc_write_vsir_def(d3dbc, ins); break; - case HLSL_RESOURCE_SAMPLE_PROJ: - sm1_instr.opcode = D3DSIO_TEX; - sm1_instr.opcode |= VKD3DSI_TEXLD_PROJECT << VKD3D_SM1_INSTRUCTION_FLAGS_SHIFT; + case VKD3DSIH_DCL: + d3dbc_write_vsir_dcl(d3dbc, ins); break; - case HLSL_RESOURCE_SAMPLE_LOD_BIAS: - sm1_instr.opcode = D3DSIO_TEX; - sm1_instr.opcode |= VKD3DSI_TEXLD_BIAS << VKD3D_SM1_INSTRUCTION_FLAGS_SHIFT; + case VKD3DSIH_ABS: + case VKD3DSIH_ADD: + case VKD3DSIH_CMP: + case VKD3DSIH_DP2ADD: + case VKD3DSIH_DP3: + case VKD3DSIH_DP4: + case VKD3DSIH_DSX: + case VKD3DSIH_DSY: + case VKD3DSIH_ELSE: + case VKD3DSIH_ENDIF: + case VKD3DSIH_FRC: + case VKD3DSIH_IFC: + case VKD3DSIH_MAD: + case VKD3DSIH_MAX: + case VKD3DSIH_MIN: + case VKD3DSIH_MOV: + case VKD3DSIH_MUL: + case VKD3DSIH_SINCOS: + case VKD3DSIH_SLT: + case VKD3DSIH_TEX: + case VKD3DSIH_TEXKILL: + case VKD3DSIH_TEXLDD: + d3dbc_write_vsir_simple_instruction(d3dbc, ins); break; - case HLSL_RESOURCE_SAMPLE_GRAD: - sm1_instr.opcode = D3DSIO_TEXLDD; - - sm1_instr.srcs[2].type = VKD3DSPR_TEMP; - sm1_instr.srcs[2].reg = ddx->reg.id; - sm1_instr.srcs[2].swizzle = hlsl_swizzle_from_writemask(ddx->reg.writemask); - - sm1_instr.srcs[3].type = VKD3DSPR_TEMP; - sm1_instr.srcs[3].reg = ddy->reg.id; - sm1_instr.srcs[3].swizzle = hlsl_swizzle_from_writemask(ddy->reg.writemask); - - sm1_instr.src_count += 2; + case VKD3DSIH_EXP: + case VKD3DSIH_LOG: + case VKD3DSIH_RCP: + case VKD3DSIH_RSQ: + writemask = ins->dst->write_mask; + if (writemask != VKD3DSP_WRITEMASK_0 && writemask != VKD3DSP_WRITEMASK_1 + && writemask != VKD3DSP_WRITEMASK_2 && writemask != VKD3DSP_WRITEMASK_3) + { + vkd3d_shader_error(d3dbc->message_context, &ins->location, + VKD3D_SHADER_ERROR_D3DBC_INVALID_WRITEMASK, + "writemask %#x for vsir instruction with opcode %#x is not single component.", + writemask, ins->opcode); + d3dbc->failed = true; + } + d3dbc_write_vsir_simple_instruction(d3dbc, ins); break; default: - hlsl_fixme(ctx, &instr->loc, "Resource load type %u.", load->load_type); - return; + vkd3d_shader_error(d3dbc->message_context, &ins->location, VKD3D_SHADER_ERROR_D3DBC_INVALID_OPCODE, + "vsir instruction with opcode %#x.", ins->opcode); + d3dbc->failed = true; + break; } - - VKD3D_ASSERT(instr->reg.allocated); - - d3dbc_write_instruction(d3dbc, &sm1_instr); } -static void d3dbc_write_store(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr) +static void d3dbc_write_semantic_dcl(struct d3dbc_compiler *d3dbc, + const struct signature_element *element, bool output) { const struct vkd3d_shader_version *version = &d3dbc->program->shader_version; - const struct hlsl_ir_store *store = hlsl_ir_store(instr); - struct hlsl_ctx *ctx = d3dbc->ctx; - const struct hlsl_reg reg = hlsl_reg_from_deref(ctx, &store->lhs); - const struct hlsl_ir_node *rhs = store->rhs.node; - struct sm1_instruction sm1_instr = - { - .opcode = D3DSIO_MOV, - - .dst.type = VKD3DSPR_TEMP, - .dst.reg = reg.id, - .dst.writemask = hlsl_combine_writemasks(reg.writemask, store->writemask), - .has_dst = 1, - - .srcs[0].type = VKD3DSPR_TEMP, - .srcs[0].reg = rhs->reg.id, - .srcs[0].swizzle = hlsl_swizzle_from_writemask(rhs->reg.writemask), - .src_count = 1, - }; + struct vkd3d_bytecode_buffer *buffer = &d3dbc->buffer; + struct sm1_dst_register reg = {0}; + enum vkd3d_decl_usage usage; + uint32_t token, usage_idx; + bool ret; - if (store->lhs.var->is_output_semantic) + if (hlsl_sm1_register_from_semantic(version, element->semantic_name, + element->semantic_index, output, ®.type, ®.reg)) { - if (version->type == VKD3D_SHADER_TYPE_PIXEL && version->major == 1) - { - sm1_instr.dst.type = VKD3DSPR_TEMP; - sm1_instr.dst.reg = 0; - } - else if (!hlsl_sm1_register_from_semantic(&d3dbc->program->shader_version, store->lhs.var->semantic.name, - store->lhs.var->semantic.index, true, &sm1_instr.dst.type, &sm1_instr.dst.reg)) - { - VKD3D_ASSERT(reg.allocated); - sm1_instr.dst.type = VKD3DSPR_OUTPUT; - sm1_instr.dst.reg = reg.id; - } - else - sm1_instr.dst.writemask = (1u << store->lhs.var->data_type->dimx) - 1; + usage = 0; + usage_idx = 0; } else - VKD3D_ASSERT(reg.allocated); - - sm1_map_src_swizzle(&sm1_instr.srcs[0], sm1_instr.dst.writemask); - d3dbc_write_instruction(d3dbc, &sm1_instr); -} - -static void d3dbc_write_swizzle(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr) -{ - const struct hlsl_ir_swizzle *swizzle = hlsl_ir_swizzle(instr); - const struct hlsl_ir_node *val = swizzle->val.node; - struct sm1_instruction sm1_instr = { - .opcode = D3DSIO_MOV, + ret = hlsl_sm1_usage_from_semantic(element->semantic_name, element->semantic_index, &usage, &usage_idx); + VKD3D_ASSERT(ret); + reg.type = output ? VKD3DSPR_OUTPUT : VKD3DSPR_INPUT; + reg.reg = element->register_index; + } - .dst.type = VKD3DSPR_TEMP, - .dst.reg = instr->reg.id, - .dst.writemask = instr->reg.writemask, - .has_dst = 1, + token = VKD3D_SM1_OP_DCL; + if (version->major > 1) + token |= 2 << VKD3D_SM1_INSTRUCTION_LENGTH_SHIFT; + put_u32(buffer, token); - .srcs[0].type = VKD3DSPR_TEMP, - .srcs[0].reg = val->reg.id, - .srcs[0].swizzle = hlsl_combine_swizzles(hlsl_swizzle_from_writemask(val->reg.writemask), - swizzle->swizzle, instr->data_type->dimx), - .src_count = 1, - }; + token = (1u << 31); + token |= usage << VKD3D_SM1_DCL_USAGE_SHIFT; + token |= usage_idx << VKD3D_SM1_DCL_USAGE_INDEX_SHIFT; + put_u32(buffer, token); - VKD3D_ASSERT(instr->reg.allocated); - VKD3D_ASSERT(val->reg.allocated); - sm1_map_src_swizzle(&sm1_instr.srcs[0], sm1_instr.dst.writemask); - d3dbc_write_instruction(d3dbc, &sm1_instr); + reg.writemask = element->mask; + write_sm1_dst_register(buffer, ®); } -static void d3dbc_write_block(struct d3dbc_compiler *d3dbc, const struct hlsl_block *block) +static void d3dbc_write_semantic_dcls(struct d3dbc_compiler *d3dbc) { - struct hlsl_ctx *ctx = d3dbc->ctx; - const struct hlsl_ir_node *instr; - - LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry) - { - if (instr->data_type) - { - if (instr->data_type->class != HLSL_CLASS_SCALAR && instr->data_type->class != HLSL_CLASS_VECTOR) - { - hlsl_fixme(ctx, &instr->loc, "Class %#x should have been lowered or removed.", instr->data_type->class); - break; - } - } - - switch (instr->type) - { - case HLSL_IR_CALL: - vkd3d_unreachable(); - - case HLSL_IR_CONSTANT: - d3dbc_write_constant(d3dbc, instr); - break; - - case HLSL_IR_EXPR: - d3dbc_write_expr(d3dbc, instr); - break; - - case HLSL_IR_IF: - if (hlsl_version_ge(ctx, 2, 1)) - d3dbc_write_if(d3dbc, instr); - else - hlsl_fixme(ctx, &instr->loc, "Flatten \"if\" conditionals branches."); - break; - - case HLSL_IR_JUMP: - d3dbc_write_jump(d3dbc, instr); - break; + struct vsir_program *program = d3dbc->program; + const struct vkd3d_shader_version *version; + bool write_in = false, write_out = false; - case HLSL_IR_LOAD: - d3dbc_write_load(d3dbc, instr); - break; + version = &program->shader_version; + if (version->type == VKD3D_SHADER_TYPE_PIXEL && version->major >= 2) + write_in = true; + else if (version->type == VKD3D_SHADER_TYPE_VERTEX && version->major == 3) + write_in = write_out = true; + else if (version->type == VKD3D_SHADER_TYPE_VERTEX && version->major < 3) + write_in = true; - case HLSL_IR_RESOURCE_LOAD: - d3dbc_write_resource_load(d3dbc, instr); - break; + if (write_in) + { + for (unsigned int i = 0; i < program->input_signature.element_count; ++i) + d3dbc_write_semantic_dcl(d3dbc, &program->input_signature.elements[i], false); + } - case HLSL_IR_STORE: - d3dbc_write_store(d3dbc, instr); - break; + if (write_out) + { + for (unsigned int i = 0; i < program->output_signature.element_count; ++i) + d3dbc_write_semantic_dcl(d3dbc, &program->output_signature.elements[i], true); + } +} - case HLSL_IR_SWIZZLE: - d3dbc_write_swizzle(d3dbc, instr); - break; +static void d3dbc_write_program_instructions(struct d3dbc_compiler *d3dbc) +{ + struct vsir_program *program = d3dbc->program; + unsigned int i; - default: - hlsl_fixme(ctx, &instr->loc, "Instruction type %s.", hlsl_node_type_to_string(instr->type)); - } - } + for (i = 0; i < program->instructions.count; ++i) + d3dbc_write_vsir_instruction(d3dbc, &program->instructions.elements[i]); } -/* OBJECTIVE: Stop relying on ctx and entry_func on this function, receiving - * data from the other parameters instead, so it can be removed as an argument - * and be declared in vkd3d_shader_private.h and used without relying on HLSL - * IR structs. */ int d3dbc_compile(struct vsir_program *program, uint64_t config_flags, const struct vkd3d_shader_compile_info *compile_info, const struct vkd3d_shader_code *ctab, - struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context, - struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) + struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context) { const struct vkd3d_shader_version *version = &program->shader_version; struct d3dbc_compiler d3dbc = {0}; struct vkd3d_bytecode_buffer *buffer = &d3dbc.buffer; + int result; - d3dbc.ctx = ctx; d3dbc.program = program; d3dbc.message_context = message_context; + switch (version->type) + { + case VKD3D_SHADER_TYPE_VERTEX: + d3dbc.opcode_table = vs_opcode_table; + break; + + case VKD3D_SHADER_TYPE_PIXEL: + d3dbc.opcode_table = ps_opcode_table; + break; + + default: + vkd3d_shader_error(message_context, NULL, VKD3D_SHADER_ERROR_D3DBC_INVALID_PROFILE, + "Invalid shader type %u.", version->type); + return VKD3D_ERROR_INVALID_SHADER; + } put_u32(buffer, sm1_version(version->type, version->major, version->minor)); bytecode_put_bytes(buffer, ctab->code, ctab->size); - d3dbc_write_constant_defs(&d3dbc); d3dbc_write_semantic_dcls(&d3dbc); - d3dbc_write_sampler_dcls(&d3dbc); - d3dbc_write_block(&d3dbc, &entry_func->body); + d3dbc_write_program_instructions(&d3dbc); - put_u32(buffer, D3DSIO_END); + put_u32(buffer, VKD3D_SM1_OP_END); + result = VKD3D_OK; if (buffer->status) - ctx->result = buffer->status; + result = buffer->status; + if (d3dbc.failed) + result = VKD3D_ERROR_INVALID_SHADER; - if (!ctx->result) + if (!result) { out->code = buffer->data; out->size = buffer->size; @@ -2925,5 +2372,5 @@ int d3dbc_compile(struct vsir_program *program, uint64_t config_flags, { vkd3d_free(buffer->data); } - return ctx->result; + return result; } diff --git a/libs/vkd3d/libs/vkd3d-shader/dxbc.c b/libs/vkd3d/libs/vkd3d-shader/dxbc.c index 184788dc57e..93fc993e0d1 100644 --- a/libs/vkd3d/libs/vkd3d-shader/dxbc.c +++ b/libs/vkd3d/libs/vkd3d-shader/dxbc.c @@ -20,6 +20,19 @@ #include "vkd3d_shader_private.h" +#define DXBC_CHECKSUM_SKIP_BYTE_COUNT 20 + +static void compute_dxbc_checksum(const void *dxbc, size_t size, uint32_t checksum[4]) +{ + const uint8_t *ptr = dxbc; + + VKD3D_ASSERT(size > DXBC_CHECKSUM_SKIP_BYTE_COUNT); + ptr += DXBC_CHECKSUM_SKIP_BYTE_COUNT; + size -= DXBC_CHECKSUM_SKIP_BYTE_COUNT; + + vkd3d_compute_md5(ptr, size, checksum, VKD3D_MD5_DXBC); +} + void dxbc_writer_init(struct dxbc_writer *dxbc) { memset(dxbc, 0, sizeof(*dxbc)); @@ -72,7 +85,7 @@ int vkd3d_shader_serialize_dxbc(size_t section_count, const struct vkd3d_shader_ } set_u32(&buffer, size_position, bytecode_get_size(&buffer)); - vkd3d_compute_dxbc_checksum(buffer.data, buffer.size, checksum); + compute_dxbc_checksum(buffer.data, buffer.size, checksum); for (i = 0; i < 4; ++i) set_u32(&buffer, checksum_position + i * sizeof(uint32_t), checksum[i]); @@ -188,7 +201,7 @@ static int parse_dxbc(const struct vkd3d_shader_code *dxbc, struct vkd3d_shader_ checksum[3] = read_u32(&ptr); if (!(flags & VKD3D_SHADER_PARSE_DXBC_IGNORE_CHECKSUM)) { - vkd3d_compute_dxbc_checksum(data, data_size, calculated_checksum); + compute_dxbc_checksum(data, data_size, calculated_checksum); if (memcmp(checksum, calculated_checksum, sizeof(checksum))) { WARN("Checksum {0x%08x, 0x%08x, 0x%08x, 0x%08x} does not match " @@ -1488,7 +1501,7 @@ int vkd3d_shader_serialize_root_signature(const struct vkd3d_shader_versioned_ro dxbc->code = context.buffer.data; dxbc->size = total_size; - vkd3d_compute_dxbc_checksum(dxbc->code, dxbc->size, checksum); + compute_dxbc_checksum(dxbc->code, dxbc->size, checksum); for (i = 0; i < 4; ++i) set_u32(&context.buffer, (i + 1) * sizeof(uint32_t), checksum[i]); diff --git a/libs/vkd3d/libs/vkd3d-shader/dxil.c b/libs/vkd3d/libs/vkd3d-shader/dxil.c index 4a17c62292b..ee78b6251f9 100644 --- a/libs/vkd3d/libs/vkd3d-shader/dxil.c +++ b/libs/vkd3d/libs/vkd3d-shader/dxil.c @@ -3888,7 +3888,7 @@ static void sm6_parser_init_signature(struct sm6_parser *sm6, const struct shade if (is_control_point) { if (reg_type == VKD3DSPR_OUTPUT) - param->reg.idx[count].rel_addr = instruction_array_create_outpointid_param(&sm6->p.program->instructions); + param->reg.idx[count].rel_addr = vsir_program_create_outpointid_param(sm6->p.program); param->reg.idx[count++].offset = 0; } @@ -10303,7 +10303,7 @@ static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, struct vsir_pro /* Estimate instruction count to avoid reallocation in most shaders. */ count = max(token_count, 400) - 400; - if (!vsir_program_init(program, compile_info, &version, (count + (count >> 2)) / 2u + 10)) + if (!vsir_program_init(program, compile_info, &version, (count + (count >> 2)) / 2u + 10, VSIR_CF_BLOCKS)) return VKD3D_ERROR_OUT_OF_MEMORY; vkd3d_shader_parser_init(&sm6->p, program, message_context, compile_info->source_name); sm6->ptr = &sm6->start[1]; diff --git a/libs/vkd3d/libs/vkd3d-shader/fx.c b/libs/vkd3d/libs/vkd3d-shader/fx.c index a1d1fd6572f..1314bc09e73 100644 --- a/libs/vkd3d/libs/vkd3d-shader/fx.c +++ b/libs/vkd3d/libs/vkd3d-shader/fx.c @@ -38,6 +38,7 @@ struct type_entry struct list entry; const char *name; uint32_t elements_count; + uint32_t modifiers; uint32_t offset; }; @@ -278,9 +279,9 @@ static void write_fx_4_state_block(struct hlsl_ir_var *var, unsigned int block_i static uint32_t write_type(const struct hlsl_type *type, struct fx_write_context *fx) { + unsigned int elements_count, modifiers; const struct hlsl_type *element_type; struct type_entry *type_entry; - unsigned int elements_count; const char *name; VKD3D_ASSERT(fx->ctx->profile->major_version >= 4); @@ -297,6 +298,7 @@ static uint32_t write_type(const struct hlsl_type *type, struct fx_write_context } name = get_fx_4_type_name(element_type); + modifiers = element_type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK; LIST_FOR_EACH_ENTRY(type_entry, &fx->types, struct type_entry, entry) { @@ -306,6 +308,9 @@ static uint32_t write_type(const struct hlsl_type *type, struct fx_write_context if (type_entry->elements_count != elements_count) continue; + if (type_entry->modifiers != modifiers) + continue; + return type_entry->offset; } @@ -315,6 +320,7 @@ static uint32_t write_type(const struct hlsl_type *type, struct fx_write_context type_entry->offset = write_fx_4_type(type, fx); type_entry->name = name; type_entry->elements_count = elements_count; + type_entry->modifiers = modifiers; list_add_tail(&fx->types, &type_entry->entry); @@ -564,6 +570,9 @@ static const char * get_fx_4_type_name(const struct hlsl_type *type) case HLSL_CLASS_VERTEX_SHADER: return "VertexShader"; + case HLSL_CLASS_GEOMETRY_SHADER: + return "GeometryShader"; + case HLSL_CLASS_PIXEL_SHADER: return "PixelShader"; @@ -575,6 +584,12 @@ static const char * get_fx_4_type_name(const struct hlsl_type *type) } } +static bool is_numeric_fx_4_type(const struct hlsl_type *type) +{ + type = hlsl_get_multiarray_element_type(type); + return type->class == HLSL_CLASS_STRUCT || hlsl_is_numeric_type(type); +} + static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_context *fx) { struct field_offsets @@ -584,43 +599,41 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co uint32_t offset; uint32_t type; }; - uint32_t name_offset, offset, total_size, packed_size, stride, numeric_desc; + uint32_t name_offset, offset, unpacked_size, packed_size, stride, numeric_desc; struct vkd3d_bytecode_buffer *buffer = &fx->unstructured; struct field_offsets *field_offsets = NULL; + const struct hlsl_type *element_type; struct hlsl_ctx *ctx = fx->ctx; uint32_t elements_count = 0; const char *name; size_t i; - /* Resolve arrays to element type and number of elements. */ if (type->class == HLSL_CLASS_ARRAY) - { elements_count = hlsl_get_multiarray_size(type); - type = hlsl_get_multiarray_element_type(type); - } + element_type = hlsl_get_multiarray_element_type(type); - name = get_fx_4_type_name(type); + name = get_fx_4_type_name(element_type); name_offset = write_string(name, fx); - if (type->class == HLSL_CLASS_STRUCT) + if (element_type->class == HLSL_CLASS_STRUCT) { - if (!(field_offsets = hlsl_calloc(ctx, type->e.record.field_count, sizeof(*field_offsets)))) + if (!(field_offsets = hlsl_calloc(ctx, element_type->e.record.field_count, sizeof(*field_offsets)))) return 0; - for (i = 0; i < type->e.record.field_count; ++i) + for (i = 0; i < element_type->e.record.field_count; ++i) { - const struct hlsl_struct_field *field = &type->e.record.fields[i]; + const struct hlsl_struct_field *field = &element_type->e.record.fields[i]; field_offsets[i].name = write_string(field->name, fx); field_offsets[i].semantic = write_string(field->semantic.raw_name, fx); - field_offsets[i].offset = field->reg_offset[HLSL_REGSET_NUMERIC]; + field_offsets[i].offset = field->reg_offset[HLSL_REGSET_NUMERIC] * sizeof(float); field_offsets[i].type = write_type(field->type, fx); } } offset = put_u32_unaligned(buffer, name_offset); - switch (type->class) + switch (element_type->class) { case HLSL_CLASS_SCALAR: case HLSL_CLASS_VECTOR: @@ -659,32 +672,32 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co vkd3d_unreachable(); case HLSL_CLASS_VOID: - FIXME("Writing type class %u is not implemented.\n", type->class); + FIXME("Writing type class %u is not implemented.\n", element_type->class); set_status(fx, VKD3D_ERROR_NOT_IMPLEMENTED); return 0; } /* Structures can only contain numeric fields, this is validated during variable declaration. */ - total_size = stride = type->reg_size[HLSL_REGSET_NUMERIC] * sizeof(float); + unpacked_size = type->reg_size[HLSL_REGSET_NUMERIC] * sizeof(float); + packed_size = 0; - if (type->class == HLSL_CLASS_STRUCT || hlsl_is_numeric_type(type)) - packed_size = hlsl_type_component_count(type) * sizeof(float); + if (is_numeric_fx_4_type(element_type)) + packed_size = hlsl_type_component_count(element_type) * sizeof(float); if (elements_count) - { - total_size *= elements_count; packed_size *= elements_count; - } + + stride = element_type->reg_size[HLSL_REGSET_NUMERIC] * sizeof(float); stride = align(stride, 4 * sizeof(float)); put_u32_unaligned(buffer, elements_count); - put_u32_unaligned(buffer, total_size); + put_u32_unaligned(buffer, unpacked_size); put_u32_unaligned(buffer, stride); put_u32_unaligned(buffer, packed_size); - if (type->class == HLSL_CLASS_STRUCT) + if (element_type->class == HLSL_CLASS_STRUCT) { - put_u32_unaligned(buffer, type->e.record.field_count); - for (i = 0; i < type->e.record.field_count; ++i) + put_u32_unaligned(buffer, element_type->e.record.field_count); + for (i = 0; i < element_type->e.record.field_count; ++i) { const struct field_offsets *field = &field_offsets[i]; @@ -700,7 +713,7 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co put_u32_unaligned(buffer, 0); /* Interface count */ } } - else if (type->class == HLSL_CLASS_TEXTURE) + else if (element_type->class == HLSL_CLASS_TEXTURE) { static const uint32_t texture_type[] = { @@ -716,13 +729,13 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co [HLSL_SAMPLER_DIM_CUBEARRAY] = 23, }; - put_u32_unaligned(buffer, texture_type[type->sampler_dim]); + put_u32_unaligned(buffer, texture_type[element_type->sampler_dim]); } - else if (type->class == HLSL_CLASS_SAMPLER) + else if (element_type->class == HLSL_CLASS_SAMPLER) { put_u32_unaligned(buffer, 21); } - else if (type->class == HLSL_CLASS_UAV) + else if (element_type->class == HLSL_CLASS_UAV) { static const uint32_t uav_type[] = { @@ -735,60 +748,60 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co [HLSL_SAMPLER_DIM_STRUCTURED_BUFFER] = 40, }; - put_u32_unaligned(buffer, uav_type[type->sampler_dim]); + put_u32_unaligned(buffer, uav_type[element_type->sampler_dim]); } - else if (type->class == HLSL_CLASS_DEPTH_STENCIL_VIEW) + else if (element_type->class == HLSL_CLASS_DEPTH_STENCIL_VIEW) { put_u32_unaligned(buffer, 20); } - else if (type->class == HLSL_CLASS_RENDER_TARGET_VIEW) + else if (element_type->class == HLSL_CLASS_RENDER_TARGET_VIEW) { put_u32_unaligned(buffer, 19); } - else if (type->class == HLSL_CLASS_PIXEL_SHADER) + else if (element_type->class == HLSL_CLASS_PIXEL_SHADER) { put_u32_unaligned(buffer, 5); } - else if (type->class == HLSL_CLASS_VERTEX_SHADER) + else if (element_type->class == HLSL_CLASS_VERTEX_SHADER) { put_u32_unaligned(buffer, 6); } - else if (type->class == HLSL_CLASS_RASTERIZER_STATE) + else if (element_type->class == HLSL_CLASS_RASTERIZER_STATE) { put_u32_unaligned(buffer, 4); } - else if (type->class == HLSL_CLASS_DEPTH_STENCIL_STATE) + else if (element_type->class == HLSL_CLASS_DEPTH_STENCIL_STATE) { put_u32_unaligned(buffer, 3); } - else if (type->class == HLSL_CLASS_BLEND_STATE) + else if (element_type->class == HLSL_CLASS_BLEND_STATE) { put_u32_unaligned(buffer, 2); } - else if (type->class == HLSL_CLASS_STRING) + else if (element_type->class == HLSL_CLASS_STRING) { put_u32_unaligned(buffer, 1); } - else if (hlsl_is_numeric_type(type)) + else if (hlsl_is_numeric_type(element_type)) { - numeric_desc = get_fx_4_numeric_type_description(type, fx); + numeric_desc = get_fx_4_numeric_type_description(element_type, fx); put_u32_unaligned(buffer, numeric_desc); } - else if (type->class == HLSL_CLASS_COMPUTE_SHADER) + else if (element_type->class == HLSL_CLASS_COMPUTE_SHADER) { put_u32_unaligned(buffer, 28); } - else if (type->class == HLSL_CLASS_HULL_SHADER) + else if (element_type->class == HLSL_CLASS_HULL_SHADER) { put_u32_unaligned(buffer, 29); } - else if (type->class == HLSL_CLASS_DOMAIN_SHADER) + else if (element_type->class == HLSL_CLASS_DOMAIN_SHADER) { put_u32_unaligned(buffer, 30); } else { - FIXME("Type %u is not supported.\n", type->class); + FIXME("Type %u is not supported.\n", element_type->class); set_status(fx, VKD3D_ERROR_NOT_IMPLEMENTED); } @@ -2126,7 +2139,7 @@ static unsigned int decompose_fx_4_state_function_call(struct hlsl_ir_var *var, } /* For some states assignment sets all of the elements. This behaviour is limited to certain states of BlendState - object, and only when fx_5_0 profile is used. */ + object, and only when fx_4_1 or fx_5_0 profile is used. */ static unsigned int decompose_fx_4_state_block_expand_array(struct hlsl_ir_var *var, struct hlsl_state_block *block, unsigned int entry_index, struct fx_write_context *fx) { @@ -2140,7 +2153,7 @@ static unsigned int decompose_fx_4_state_block_expand_array(struct hlsl_ir_var * if (type->class != HLSL_CLASS_BLEND_STATE) return 1; - if (ctx->profile->major_version != 5) + if (hlsl_version_lt(ctx, 4, 1)) return 1; if (entry->lhs_has_index) return 1; @@ -2401,6 +2414,9 @@ static void write_fx_4_buffer(struct hlsl_buffer *b, struct fx_write_context *fx size = 0; LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) { + if (!is_numeric_fx_4_type(var->data_type)) + continue; + if (var->buffer != b) continue; diff --git a/libs/vkd3d/libs/vkd3d-shader/glsl.c b/libs/vkd3d/libs/vkd3d-shader/glsl.c index d1f02ab568b..dd1c121d5a8 100644 --- a/libs/vkd3d/libs/vkd3d-shader/glsl.c +++ b/libs/vkd3d/libs/vkd3d-shader/glsl.c @@ -38,7 +38,20 @@ struct vkd3d_glsl_generator struct vkd3d_shader_location location; struct vkd3d_shader_message_context *message_context; unsigned int indent; + const char *prefix; bool failed; + + struct shader_limits + { + unsigned int input_count; + unsigned int output_count; + } limits; + bool interstage_input; + bool interstage_output; + + const struct vkd3d_shader_interface_info *interface_info; + const struct vkd3d_shader_descriptor_offset_info *offset_info; + const struct vkd3d_shader_scan_descriptor_info1 *descriptor_info; }; static void VKD3D_PRINTF_FUNC(3, 4) vkd3d_glsl_compiler_error( @@ -53,6 +66,27 @@ static void VKD3D_PRINTF_FUNC(3, 4) vkd3d_glsl_compiler_error( generator->failed = true; } +static const char *shader_glsl_get_prefix(enum vkd3d_shader_type type) +{ + switch (type) + { + case VKD3D_SHADER_TYPE_VERTEX: + return "vs"; + case VKD3D_SHADER_TYPE_HULL: + return "hs"; + case VKD3D_SHADER_TYPE_DOMAIN: + return "ds"; + case VKD3D_SHADER_TYPE_GEOMETRY: + return "gs"; + case VKD3D_SHADER_TYPE_PIXEL: + return "ps"; + case VKD3D_SHADER_TYPE_COMPUTE: + return "cs"; + default: + return NULL; + } +} + static void shader_glsl_print_indent(struct vkd3d_string_buffer *buffer, unsigned int indent) { vkd3d_string_buffer_printf(buffer, "%*s", 4 * indent, ""); @@ -67,6 +101,82 @@ static void shader_glsl_print_register_name(struct vkd3d_string_buffer *buffer, vkd3d_string_buffer_printf(buffer, "r[%u]", reg->idx[0].offset); break; + case VKD3DSPR_INPUT: + if (reg->idx_count != 1) + { + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, + "Internal compiler error: Unhandled input register index count %u.", reg->idx_count); + vkd3d_string_buffer_printf(buffer, "", reg->type); + break; + } + if (reg->idx[0].rel_addr) + { + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, + "Internal compiler error: Unhandled input register indirect addressing."); + vkd3d_string_buffer_printf(buffer, "", reg->type); + break; + } + vkd3d_string_buffer_printf(buffer, "%s_in[%u]", gen->prefix, reg->idx[0].offset); + break; + + case VKD3DSPR_OUTPUT: + if (reg->idx_count != 1) + { + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, + "Internal compiler error: Unhandled output register index count %u.", reg->idx_count); + vkd3d_string_buffer_printf(buffer, "", reg->type); + break; + } + if (reg->idx[0].rel_addr) + { + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, + "Internal compiler error: Unhandled output register indirect addressing."); + vkd3d_string_buffer_printf(buffer, "", reg->type); + break; + } + vkd3d_string_buffer_printf(buffer, "%s_out[%u]", gen->prefix, reg->idx[0].offset); + break; + + case VKD3DSPR_IMMCONST: + switch (reg->dimension) + { + case VSIR_DIMENSION_SCALAR: + vkd3d_string_buffer_printf(buffer, "%#xu", reg->u.immconst_u32[0]); + break; + + case VSIR_DIMENSION_VEC4: + vkd3d_string_buffer_printf(buffer, "uvec4(%#xu, %#xu, %#xu, %#xu)", + reg->u.immconst_u32[0], reg->u.immconst_u32[1], + reg->u.immconst_u32[2], reg->u.immconst_u32[3]); + break; + + default: + vkd3d_string_buffer_printf(buffer, "", reg->dimension); + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, + "Internal compiler error: Unhandled dimension %#x.", reg->dimension); + break; + } + break; + + case VKD3DSPR_CONSTBUFFER: + if (reg->idx_count != 3) + { + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, + "Internal compiler error: Unhandled constant buffer register index count %u.", reg->idx_count); + vkd3d_string_buffer_printf(buffer, "", reg->type); + break; + } + if (reg->idx[0].rel_addr || reg->idx[2].rel_addr) + { + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, + "Internal compiler error: Unhandled constant buffer register indirect addressing."); + vkd3d_string_buffer_printf(buffer, "", reg->type); + break; + } + vkd3d_string_buffer_printf(buffer, "%s_cb_%u[%u]", + gen->prefix, reg->idx[0].offset, reg->idx[2].offset); + break; + default: vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, "Internal compiler error: Unhandled register type %#x.", reg->type); @@ -106,23 +216,109 @@ static void glsl_src_cleanup(struct glsl_src *src, struct vkd3d_string_buffer_ca vkd3d_string_buffer_release(cache, src->str); } +static void shader_glsl_print_bitcast(struct vkd3d_string_buffer *dst, struct vkd3d_glsl_generator *gen, + const char *src, enum vkd3d_data_type dst_data_type, enum vkd3d_data_type src_data_type, unsigned int size) +{ + if (dst_data_type == VKD3D_DATA_UNORM || dst_data_type == VKD3D_DATA_SNORM) + dst_data_type = VKD3D_DATA_FLOAT; + if (src_data_type == VKD3D_DATA_UNORM || src_data_type == VKD3D_DATA_SNORM) + src_data_type = VKD3D_DATA_FLOAT; + + if (dst_data_type == src_data_type) + { + vkd3d_string_buffer_printf(dst, "%s", src); + return; + } + + if (src_data_type == VKD3D_DATA_FLOAT) + { + switch (dst_data_type) + { + case VKD3D_DATA_INT: + vkd3d_string_buffer_printf(dst, "floatBitsToInt(%s)", src); + return; + case VKD3D_DATA_UINT: + vkd3d_string_buffer_printf(dst, "floatBitsToUint(%s)", src); + return; + default: + break; + } + } + + if (src_data_type == VKD3D_DATA_UINT) + { + switch (dst_data_type) + { + case VKD3D_DATA_FLOAT: + vkd3d_string_buffer_printf(dst, "uintBitsToFloat(%s)", src); + return; + case VKD3D_DATA_INT: + if (size == 1) + vkd3d_string_buffer_printf(dst, "int(%s)", src); + else + vkd3d_string_buffer_printf(dst, "ivec%u(%s)", size, src); + return; + default: + break; + } + } + + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, + "Internal compiler error: Unhandled bitcast from %#x to %#x.", + src_data_type, dst_data_type); + vkd3d_string_buffer_printf(dst, "%s", src); +} + static void glsl_src_init(struct glsl_src *glsl_src, struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_src_param *vsir_src, uint32_t mask) { const struct vkd3d_shader_register *reg = &vsir_src->reg; + struct vkd3d_string_buffer *register_name, *str; + enum vkd3d_data_type src_data_type; + unsigned int size; glsl_src->str = vkd3d_string_buffer_get(&gen->string_buffers); + register_name = vkd3d_string_buffer_get(&gen->string_buffers); if (reg->non_uniform) vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, "Internal compiler error: Unhandled 'non-uniform' modifier."); - if (vsir_src->modifiers) - vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, - "Internal compiler error: Unhandled source modifier(s) %#x.", vsir_src->modifiers); - shader_glsl_print_register_name(glsl_src->str, gen, reg); + if (reg->type == VKD3DSPR_IMMCONST) + src_data_type = VKD3D_DATA_UINT; + else + src_data_type = VKD3D_DATA_FLOAT; + + shader_glsl_print_register_name(register_name, gen, reg); + + if (!vsir_src->modifiers) + str = glsl_src->str; + else + str = vkd3d_string_buffer_get(&gen->string_buffers); + + size = reg->dimension == VSIR_DIMENSION_VEC4 ? 4 : 1; + shader_glsl_print_bitcast(str, gen, register_name->buffer, reg->data_type, src_data_type, size); if (reg->dimension == VSIR_DIMENSION_VEC4) - shader_glsl_print_swizzle(glsl_src->str, vsir_src->swizzle, mask); + shader_glsl_print_swizzle(str, vsir_src->swizzle, mask); + + switch (vsir_src->modifiers) + { + case VKD3DSPSM_NONE: + break; + case VKD3DSPSM_ABS: + vkd3d_string_buffer_printf(glsl_src->str, "abs(%s)", str->buffer); + break; + default: + vkd3d_string_buffer_printf(glsl_src->str, "(%s)", + vsir_src->modifiers, str->buffer); + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, + "Internal compiler error: Unhandled source modifier(s) %#x.", vsir_src->modifiers); + break; + } + + if (str != glsl_src->str) + vkd3d_string_buffer_release(&gen->string_buffers, str); + vkd3d_string_buffer_release(&gen->string_buffers, register_name); } static void glsl_dst_cleanup(struct glsl_dst *dst, struct vkd3d_string_buffer_cache *cache) @@ -156,6 +352,9 @@ static uint32_t glsl_dst_init(struct glsl_dst *glsl_dst, struct vkd3d_glsl_gener static void VKD3D_PRINTF_FUNC(3, 4) shader_glsl_print_assignment( struct vkd3d_glsl_generator *gen, struct glsl_dst *dst, const char *format, ...) { + const struct vkd3d_shader_register *dst_reg = &dst->vsir->reg; + struct vkd3d_string_buffer *buffer = gen->buffer; + bool close = true; va_list args; if (dst->vsir->shift) @@ -165,14 +364,28 @@ static void VKD3D_PRINTF_FUNC(3, 4) shader_glsl_print_assignment( vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, "Internal compiler error: Unhandled destination modifier(s) %#x.", dst->vsir->modifiers); - shader_glsl_print_indent(gen->buffer, gen->indent); - vkd3d_string_buffer_printf(gen->buffer, "%s%s = ", dst->register_name->buffer, dst->mask->buffer); + shader_glsl_print_indent(buffer, gen->indent); + vkd3d_string_buffer_printf(buffer, "%s%s = ", dst->register_name->buffer, dst->mask->buffer); + + switch (dst_reg->data_type) + { + default: + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, + "Internal compiler error: Unhandled destination register data type %#x.", dst_reg->data_type); + /* fall through */ + case VKD3D_DATA_FLOAT: + close = false; + break; + case VKD3D_DATA_UINT: + vkd3d_string_buffer_printf(buffer, "uintBitsToFloat("); + break; + } va_start(args, format); - vkd3d_string_buffer_vprintf(gen->buffer, format, args); + vkd3d_string_buffer_vprintf(buffer, format, args); va_end(args); - vkd3d_string_buffer_printf(gen->buffer, ";\n"); + vkd3d_string_buffer_printf(buffer, "%s;\n", close ? ")" : ""); } static void shader_glsl_unhandled(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_instruction *ins) @@ -183,6 +396,48 @@ static void shader_glsl_unhandled(struct vkd3d_glsl_generator *gen, const struct "Internal compiler error: Unhandled instruction %#x.", ins->opcode); } +static void shader_glsl_binop(struct vkd3d_glsl_generator *gen, + const struct vkd3d_shader_instruction *ins, const char *op) +{ + struct glsl_src src[2]; + struct glsl_dst dst; + uint32_t mask; + + mask = glsl_dst_init(&dst, gen, ins, &ins->dst[0]); + glsl_src_init(&src[0], gen, &ins->src[0], mask); + glsl_src_init(&src[1], gen, &ins->src[1], mask); + + shader_glsl_print_assignment(gen, &dst, "%s %s %s", src[0].str->buffer, op, src[1].str->buffer); + + glsl_src_cleanup(&src[1], &gen->string_buffers); + glsl_src_cleanup(&src[0], &gen->string_buffers); + glsl_dst_cleanup(&dst, &gen->string_buffers); +} + +static void shader_glsl_relop(struct vkd3d_glsl_generator *gen, + const struct vkd3d_shader_instruction *ins, const char *scalar_op, const char *vector_op) +{ + unsigned int mask_size; + struct glsl_src src[2]; + struct glsl_dst dst; + uint32_t mask; + + mask = glsl_dst_init(&dst, gen, ins, &ins->dst[0]); + glsl_src_init(&src[0], gen, &ins->src[0], mask); + glsl_src_init(&src[1], gen, &ins->src[1], mask); + + if ((mask_size = vsir_write_mask_component_count(mask)) > 1) + shader_glsl_print_assignment(gen, &dst, "uvec%u(%s(%s, %s)) * 0xffffffffu", + mask_size, vector_op, src[0].str->buffer, src[1].str->buffer); + else + shader_glsl_print_assignment(gen, &dst, "%s %s %s ? 0xffffffffu : 0u", + src[0].str->buffer, scalar_op, src[1].str->buffer); + + glsl_src_cleanup(&src[1], &gen->string_buffers); + glsl_src_cleanup(&src[0], &gen->string_buffers); + glsl_dst_cleanup(&dst, &gen->string_buffers); +} + static void shader_glsl_mov(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_instruction *ins) { struct glsl_src src; @@ -198,16 +453,138 @@ static void shader_glsl_mov(struct vkd3d_glsl_generator *gen, const struct vkd3d glsl_dst_cleanup(&dst, &gen->string_buffers); } +static void shader_glsl_print_sysval_name(struct vkd3d_string_buffer *buffer, struct vkd3d_glsl_generator *gen, + enum vkd3d_shader_sysval_semantic sysval, unsigned int idx) +{ + const struct vkd3d_shader_version *version = &gen->program->shader_version; + + switch (sysval) + { + case VKD3D_SHADER_SV_POSITION: + if (version->type == VKD3D_SHADER_TYPE_PIXEL || version->type == VKD3D_SHADER_TYPE_COMPUTE) + { + vkd3d_string_buffer_printf(buffer, "", sysval); + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, + "Internal compiler error: Unhandled system value %#x.", sysval); + } + else + { + vkd3d_string_buffer_printf(buffer, "gl_Position"); + if (idx) + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, + "Internal compiler error: Unhandled SV_POSITION index %u.", idx); + } + break; + + case VKD3D_SHADER_SV_TARGET: + if (version->type != VKD3D_SHADER_TYPE_PIXEL) + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, + "Internal compiler error: Unhandled SV_TARGET in shader type #%x.", version->type); + vkd3d_string_buffer_printf(buffer, "shader_out_%u", idx); + break; + + default: + vkd3d_string_buffer_printf(buffer, "", sysval); + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, + "Internal compiler error: Unhandled system value %#x.", sysval); + break; + } +} + +static void shader_glsl_shader_prologue(struct vkd3d_glsl_generator *gen) +{ + const struct shader_signature *signature = &gen->program->input_signature; + struct vkd3d_string_buffer *buffer = gen->buffer; + const struct signature_element *e; + unsigned int i; + + for (i = 0; i < signature->element_count; ++i) + { + e = &signature->elements[i]; + + if (e->target_location == SIGNATURE_TARGET_LOCATION_UNUSED) + continue; + + shader_glsl_print_indent(buffer, gen->indent); + vkd3d_string_buffer_printf(buffer, "%s_in[%u]", gen->prefix, e->register_index); + shader_glsl_print_write_mask(buffer, e->mask); + if (e->sysval_semantic == VKD3D_SHADER_SV_NONE) + { + if (gen->interstage_input) + { + vkd3d_string_buffer_printf(buffer, " = shader_in.reg_%u", e->target_location); + if (e->target_location >= gen->limits.input_count) + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, + "Internal compiler error: Input element %u specifies target location %u, " + "but only %u inputs are supported.", + i, e->target_location, gen->limits.input_count); + } + else + { + vkd3d_string_buffer_printf(buffer, " = shader_in_%u", i); + } + } + else + { + vkd3d_string_buffer_printf(buffer, " = "); + shader_glsl_print_sysval_name(buffer, gen, e->sysval_semantic, e->semantic_index); + } + shader_glsl_print_write_mask(buffer, e->mask); + vkd3d_string_buffer_printf(buffer, ";\n"); + } +} + +static void shader_glsl_shader_epilogue(struct vkd3d_glsl_generator *gen) +{ + const struct shader_signature *signature = &gen->program->output_signature; + struct vkd3d_string_buffer *buffer = gen->buffer; + const struct signature_element *e; + unsigned int i; + + for (i = 0; i < signature->element_count; ++i) + { + e = &signature->elements[i]; + + if (e->target_location == SIGNATURE_TARGET_LOCATION_UNUSED) + continue; + + shader_glsl_print_indent(buffer, gen->indent); + if (e->sysval_semantic == VKD3D_SHADER_SV_NONE) + { + if (gen->interstage_output) + { + vkd3d_string_buffer_printf(buffer, "shader_out.reg_%u", e->target_location); + if (e->target_location >= gen->limits.output_count) + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, + "Internal compiler error: Output element %u specifies target location %u, " + "but only %u outputs are supported.", + i, e->target_location, gen->limits.output_count); + } + else + { + vkd3d_string_buffer_printf(buffer, "", e->target_location); + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, + "Internal compiler error: Unhandled output."); + } + } + else + { + shader_glsl_print_sysval_name(buffer, gen, e->sysval_semantic, e->semantic_index); + } + shader_glsl_print_write_mask(buffer, e->mask); + vkd3d_string_buffer_printf(buffer, " = %s_out[%u]", gen->prefix, e->register_index); + shader_glsl_print_write_mask(buffer, e->mask); + vkd3d_string_buffer_printf(buffer, ";\n"); + } +} + static void shader_glsl_ret(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_instruction *ins) { const struct vkd3d_shader_version *version = &gen->program->shader_version; - /* - * TODO: Implement in_subroutine - * TODO: shader_glsl_generate_shader_epilogue(generator); - */ if (version->major >= 4) { + shader_glsl_shader_epilogue(gen); shader_glsl_print_indent(gen->buffer, gen->indent); vkd3d_string_buffer_printf(gen->buffer, "return;\n"); } @@ -220,14 +597,30 @@ static void vkd3d_glsl_handle_instruction(struct vkd3d_glsl_generator *gen, switch (ins->opcode) { + case VKD3DSIH_ADD: + shader_glsl_binop(gen, ins, "+"); + break; + case VKD3DSIH_AND: + shader_glsl_binop(gen, ins, "&"); + break; case VKD3DSIH_DCL_INPUT: case VKD3DSIH_DCL_OUTPUT: case VKD3DSIH_DCL_OUTPUT_SIV: case VKD3DSIH_NOP: break; + case VKD3DSIH_INE: + case VKD3DSIH_NEU: + shader_glsl_relop(gen, ins, "!=", "notEqual"); + break; case VKD3DSIH_MOV: shader_glsl_mov(gen, ins); break; + case VKD3DSIH_MUL: + shader_glsl_binop(gen, ins, "*"); + break; + case VKD3DSIH_OR: + shader_glsl_binop(gen, ins, "|"); + break; case VKD3DSIH_RET: shader_glsl_ret(gen, ins); break; @@ -237,13 +630,290 @@ static void vkd3d_glsl_handle_instruction(struct vkd3d_glsl_generator *gen, } } +static bool shader_glsl_check_shader_visibility(const struct vkd3d_glsl_generator *gen, + enum vkd3d_shader_visibility visibility) +{ + enum vkd3d_shader_type t = gen->program->shader_version.type; + + switch (visibility) + { + case VKD3D_SHADER_VISIBILITY_ALL: + return true; + case VKD3D_SHADER_VISIBILITY_VERTEX: + return t == VKD3D_SHADER_TYPE_VERTEX; + case VKD3D_SHADER_VISIBILITY_HULL: + return t == VKD3D_SHADER_TYPE_HULL; + case VKD3D_SHADER_VISIBILITY_DOMAIN: + return t == VKD3D_SHADER_TYPE_DOMAIN; + case VKD3D_SHADER_VISIBILITY_GEOMETRY: + return t == VKD3D_SHADER_TYPE_GEOMETRY; + case VKD3D_SHADER_VISIBILITY_PIXEL: + return t == VKD3D_SHADER_TYPE_PIXEL; + case VKD3D_SHADER_VISIBILITY_COMPUTE: + return t == VKD3D_SHADER_TYPE_COMPUTE; + default: + WARN("Invalid shader visibility %#x.\n", visibility); + return false; + } +} + +static bool shader_glsl_get_cbv_binding(const struct vkd3d_glsl_generator *gen, + unsigned int register_space, unsigned int register_idx, unsigned int *binding_idx) +{ + const struct vkd3d_shader_interface_info *interface_info = gen->interface_info; + const struct vkd3d_shader_resource_binding *binding; + unsigned int i; + + if (!interface_info) + return false; + + for (i = 0; i < interface_info->binding_count; ++i) + { + binding = &interface_info->bindings[i]; + + if (binding->type != VKD3D_SHADER_DESCRIPTOR_TYPE_CBV) + continue; + if (binding->register_space != register_space) + continue; + if (binding->register_index != register_idx) + continue; + if (!shader_glsl_check_shader_visibility(gen, binding->shader_visibility)) + continue; + if (!(binding->flags & VKD3D_SHADER_BINDING_FLAG_BUFFER)) + continue; + *binding_idx = i; + return true; + } + + return false; +} + +static void shader_glsl_generate_cbv_declaration(struct vkd3d_glsl_generator *gen, + const struct vkd3d_shader_descriptor_info1 *cbv) +{ + const struct vkd3d_shader_descriptor_binding *binding; + const struct vkd3d_shader_descriptor_offset *offset; + struct vkd3d_string_buffer *buffer = gen->buffer; + const char *prefix = gen->prefix; + unsigned int binding_idx; + size_t size; + + if (cbv->count != 1) + { + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_BINDING_NOT_FOUND, + "Constant buffer %u has unsupported descriptor array size %u.", cbv->register_id, cbv->count); + return; + } + + if (!shader_glsl_get_cbv_binding(gen, cbv->register_space, cbv->register_index, &binding_idx)) + { + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_BINDING_NOT_FOUND, + "No descriptor binding specified for constant buffer %u.", cbv->register_id); + return; + } + + binding = &gen->interface_info->bindings[binding_idx].binding; + + if (binding->set != 0) + { + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_BINDING_NOT_FOUND, + "Unsupported binding set %u specified for constant buffer %u.", binding->set, cbv->register_id); + return; + } + + if (binding->count != 1) + { + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_BINDING_NOT_FOUND, + "Unsupported binding count %u specified for constant buffer %u.", binding->count, cbv->register_id); + return; + } + + if (gen->offset_info && gen->offset_info->binding_offsets) + { + offset = &gen->offset_info->binding_offsets[binding_idx]; + if (offset->static_offset || offset->dynamic_offset_index != ~0u) + { + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, + "Internal compiler error: Unhandled descriptor offset specified for constant buffer %u.", + cbv->register_id); + return; + } + } + + size = align(cbv->buffer_size, VKD3D_VEC4_SIZE * sizeof(uint32_t)); + size /= VKD3D_VEC4_SIZE * sizeof(uint32_t); + + vkd3d_string_buffer_printf(buffer, + "layout(std140, binding = %u) uniform block_%s_cb_%u { vec4 %s_cb_%u[%zu]; };\n", + binding->binding, prefix, cbv->register_id, prefix, cbv->register_id, size); +} + +static void shader_glsl_generate_descriptor_declarations(struct vkd3d_glsl_generator *gen) +{ + const struct vkd3d_shader_scan_descriptor_info1 *info = gen->descriptor_info; + const struct vkd3d_shader_descriptor_info1 *descriptor; + unsigned int i; + + for (i = 0; i < info->descriptor_count; ++i) + { + descriptor = &info->descriptors[i]; + + switch (descriptor->type) + { + case VKD3D_SHADER_DESCRIPTOR_TYPE_CBV: + shader_glsl_generate_cbv_declaration(gen, descriptor); + break; + + default: + vkd3d_string_buffer_printf(gen->buffer, "/* */\n", descriptor->type); + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, + "Internal compiler error: Unhandled descriptor type %#x.", descriptor->type); + break; + } + } + if (info->descriptor_count) + vkd3d_string_buffer_printf(gen->buffer, "\n"); +} + +static void shader_glsl_generate_interface_block(struct vkd3d_string_buffer *buffer, + const char *type, unsigned int count) +{ + unsigned int i; + + vkd3d_string_buffer_printf(buffer, "%s shader_in_out\n{\n", type); + for (i = 0; i < count; ++i) + { + vkd3d_string_buffer_printf(buffer, " vec4 reg_%u;\n", i); + } + vkd3d_string_buffer_printf(buffer, "} shader_%s;\n", type); +} + +static void shader_glsl_generate_input_declarations(struct vkd3d_glsl_generator *gen) +{ + const struct shader_signature *signature = &gen->program->input_signature; + struct vkd3d_string_buffer *buffer = gen->buffer; + const struct signature_element *e; + unsigned int i; + + if (!gen->interstage_input) + { + for (i = 0; i < signature->element_count; ++i) + { + e = &signature->elements[i]; + + if (e->target_location == SIGNATURE_TARGET_LOCATION_UNUSED) + continue; + + if (e->sysval_semantic) + { + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, + "Internal compiler error: Unhandled system value %#x.", e->sysval_semantic); + continue; + } + + if (e->component_type != VKD3D_SHADER_COMPONENT_FLOAT) + { + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, + "Internal compiler error: Unhandled component type %#x.", e->component_type); + continue; + } + + if (e->min_precision != VKD3D_SHADER_MINIMUM_PRECISION_NONE) + { + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, + "Internal compiler error: Unhandled minimum precision %#x.", e->min_precision); + continue; + } + + if (e->interpolation_mode != VKD3DSIM_NONE) + { + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, + "Internal compiler error: Unhandled interpolation mode %#x.", e->interpolation_mode); + continue; + } + + vkd3d_string_buffer_printf(buffer, + "layout(location = %u) in vec4 shader_in_%u;\n", e->target_location, i); + } + } + else if (gen->limits.input_count) + { + shader_glsl_generate_interface_block(buffer, "in", gen->limits.input_count); + } + vkd3d_string_buffer_printf(buffer, "\n"); +} + +static void shader_glsl_generate_output_declarations(struct vkd3d_glsl_generator *gen) +{ + const struct shader_signature *signature = &gen->program->output_signature; + struct vkd3d_string_buffer *buffer = gen->buffer; + const struct signature_element *e; + unsigned int i; + + if (!gen->interstage_output) + { + for (i = 0; i < signature->element_count; ++i) + { + e = &signature->elements[i]; + + if (e->target_location == SIGNATURE_TARGET_LOCATION_UNUSED) + continue; + + if (e->sysval_semantic != VKD3D_SHADER_SV_TARGET) + { + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, + "Internal compiler error: Unhandled system value %#x.", e->sysval_semantic); + continue; + } + + if (e->component_type != VKD3D_SHADER_COMPONENT_FLOAT) + { + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, + "Internal compiler error: Unhandled component type %#x.", e->component_type); + continue; + } + + if (e->min_precision != VKD3D_SHADER_MINIMUM_PRECISION_NONE) + { + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, + "Internal compiler error: Unhandled minimum precision %#x.", e->min_precision); + continue; + } + + if (e->interpolation_mode != VKD3DSIM_NONE) + { + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, + "Internal compiler error: Unhandled interpolation mode %#x.", e->interpolation_mode); + continue; + } + + vkd3d_string_buffer_printf(buffer, + "layout(location = %u) out vec4 shader_out_%u;\n", e->target_location, i); + } + } + else if (gen->limits.output_count) + { + shader_glsl_generate_interface_block(buffer, "out", gen->limits.output_count); + } + vkd3d_string_buffer_printf(buffer, "\n"); +} + static void shader_glsl_generate_declarations(struct vkd3d_glsl_generator *gen) { const struct vsir_program *program = gen->program; struct vkd3d_string_buffer *buffer = gen->buffer; + shader_glsl_generate_descriptor_declarations(gen); + shader_glsl_generate_input_declarations(gen); + shader_glsl_generate_output_declarations(gen); + + if (gen->limits.input_count) + vkd3d_string_buffer_printf(buffer, "vec4 %s_in[%u];\n", gen->prefix, gen->limits.input_count); + if (gen->limits.output_count) + vkd3d_string_buffer_printf(buffer, "vec4 %s_out[%u];\n", gen->prefix, gen->limits.output_count); if (program->temp_count) - vkd3d_string_buffer_printf(buffer, "vec4 r[%u];\n\n", program->temp_count); + vkd3d_string_buffer_printf(buffer, "vec4 r[%u];\n", program->temp_count); + vkd3d_string_buffer_printf(buffer, "\n"); } static int vkd3d_glsl_generator_generate(struct vkd3d_glsl_generator *gen, struct vkd3d_shader_code *out) @@ -264,6 +934,7 @@ static int vkd3d_glsl_generator_generate(struct vkd3d_glsl_generator *gen, struc vkd3d_string_buffer_printf(buffer, "void main()\n{\n"); ++gen->indent; + shader_glsl_shader_prologue(gen); for (i = 0; i < instructions->count; ++i) { vkd3d_glsl_handle_instruction(gen, &instructions->elements[i]); @@ -294,27 +965,73 @@ static void vkd3d_glsl_generator_cleanup(struct vkd3d_glsl_generator *gen) vkd3d_string_buffer_cache_cleanup(&gen->string_buffers); } +static void shader_glsl_init_limits(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_version *version) +{ + struct shader_limits *limits = &gen->limits; + + if (version->major < 4 || version->major >= 6) + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, + "Internal compiler error: Unhandled shader version %u.%u.", version->major, version->minor); + + switch (version->type) + { + case VKD3D_SHADER_TYPE_VERTEX: + limits->input_count = 32; + limits->output_count = 32; + break; + case VKD3D_SHADER_TYPE_PIXEL: + limits->input_count = 32; + limits->output_count = 8; + break; + default: + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, + "Internal compiler error: Unhandled shader type %#x.", version->type); + limits->input_count = 0; + limits->output_count = 0; + break; + } +} + static void vkd3d_glsl_generator_init(struct vkd3d_glsl_generator *gen, - struct vsir_program *program, struct vkd3d_shader_message_context *message_context) + struct vsir_program *program, const struct vkd3d_shader_compile_info *compile_info, + const struct vkd3d_shader_scan_descriptor_info1 *descriptor_info, + struct vkd3d_shader_message_context *message_context) { + enum vkd3d_shader_type type = program->shader_version.type; + memset(gen, 0, sizeof(*gen)); gen->program = program; vkd3d_string_buffer_cache_init(&gen->string_buffers); gen->buffer = vkd3d_string_buffer_get(&gen->string_buffers); + gen->location.source_name = compile_info->source_name; gen->message_context = message_context; + if (!(gen->prefix = shader_glsl_get_prefix(type))) + { + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, + "Internal compiler error: Unhandled shader type %#x.", type); + gen->prefix = "unknown"; + } + shader_glsl_init_limits(gen, &program->shader_version); + gen->interstage_input = type != VKD3D_SHADER_TYPE_VERTEX; + gen->interstage_output = type != VKD3D_SHADER_TYPE_PIXEL; + + gen->interface_info = vkd3d_find_struct(compile_info->next, INTERFACE_INFO); + gen->offset_info = vkd3d_find_struct(compile_info->next, DESCRIPTOR_OFFSET_INFO); + gen->descriptor_info = descriptor_info; } int glsl_compile(struct vsir_program *program, uint64_t config_flags, - const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *out, - struct vkd3d_shader_message_context *message_context) + const struct vkd3d_shader_scan_descriptor_info1 *descriptor_info, + const struct vkd3d_shader_compile_info *compile_info, + struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context) { struct vkd3d_glsl_generator generator; int ret; - if ((ret = vsir_program_normalise(program, config_flags, compile_info, message_context)) < 0) + if ((ret = vsir_program_transform(program, config_flags, compile_info, message_context)) < 0) return ret; - vkd3d_glsl_generator_init(&generator, program, message_context); + vkd3d_glsl_generator_init(&generator, program, compile_info, descriptor_info, message_context); ret = vkd3d_glsl_generator_generate(&generator, out); vkd3d_glsl_generator_cleanup(&generator); diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.c b/libs/vkd3d/libs/vkd3d-shader/hlsl.c index bd5baacd83d..6323260eab7 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.c +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.c @@ -254,6 +254,45 @@ bool hlsl_type_is_resource(const struct hlsl_type *type) } } +bool hlsl_type_is_shader(const struct hlsl_type *type) +{ + switch (type->class) + { + case HLSL_CLASS_ARRAY: + return hlsl_type_is_shader(type->e.array.type); + + case HLSL_CLASS_COMPUTE_SHADER: + case HLSL_CLASS_DOMAIN_SHADER: + case HLSL_CLASS_GEOMETRY_SHADER: + case HLSL_CLASS_HULL_SHADER: + case HLSL_CLASS_PIXEL_SHADER: + case HLSL_CLASS_VERTEX_SHADER: + return true; + + case HLSL_CLASS_SCALAR: + case HLSL_CLASS_VECTOR: + case HLSL_CLASS_MATRIX: + case HLSL_CLASS_STRUCT: + case HLSL_CLASS_DEPTH_STENCIL_STATE: + case HLSL_CLASS_DEPTH_STENCIL_VIEW: + case HLSL_CLASS_EFFECT_GROUP: + case HLSL_CLASS_PASS: + case HLSL_CLASS_RASTERIZER_STATE: + case HLSL_CLASS_RENDER_TARGET_VIEW: + case HLSL_CLASS_SAMPLER: + case HLSL_CLASS_STRING: + case HLSL_CLASS_TECHNIQUE: + case HLSL_CLASS_TEXTURE: + case HLSL_CLASS_UAV: + case HLSL_CLASS_CONSTANT_BUFFER: + case HLSL_CLASS_BLEND_STATE: + case HLSL_CLASS_VOID: + case HLSL_CLASS_NULL: + return false; + } + return false; +} + /* Only intended to be used for derefs (after copies have been lowered to components or vectors) or * resources, since for both their data types span across a single regset. */ static enum hlsl_regset type_get_regset(const struct hlsl_type *type) @@ -930,6 +969,7 @@ static const char * get_case_insensitive_typename(const char *name) { "dword", "float", + "geometryshader", "matrix", "pixelshader", "texture", @@ -1792,6 +1832,65 @@ struct hlsl_ir_node *hlsl_new_swizzle(struct hlsl_ctx *ctx, uint32_t s, unsigned return &swizzle->node; } +struct hlsl_ir_node *hlsl_new_compile(struct hlsl_ctx *ctx, enum hlsl_compile_type compile_type, + const char *profile_name, struct hlsl_ir_node **args, unsigned int args_count, + struct hlsl_block *args_instrs, const struct vkd3d_shader_location *loc) +{ + const struct hlsl_profile_info *profile_info = NULL; + struct hlsl_ir_compile *compile; + struct hlsl_type *type = NULL; + unsigned int i; + + switch (compile_type) + { + case HLSL_COMPILE_TYPE_COMPILE: + if (!(profile_info = hlsl_get_target_info(profile_name))) + { + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_PROFILE, "Unknown profile \"%s\".", profile_name); + return NULL; + } + + if (profile_info->type == VKD3D_SHADER_TYPE_PIXEL) + type = hlsl_get_type(ctx->cur_scope, "PixelShader", true, true); + else if (profile_info->type == VKD3D_SHADER_TYPE_VERTEX) + type = hlsl_get_type(ctx->cur_scope, "VertexShader", true, true); + + if (!type) + { + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_PROFILE, "Invalid profile \"%s\".", profile_name); + return NULL; + } + + break; + + case HLSL_COMPILE_TYPE_CONSTRUCTGSWITHSO: + type = hlsl_get_type(ctx->cur_scope, "GeometryShader", true, true); + break; + } + + if (!(compile = hlsl_alloc(ctx, sizeof(*compile)))) + return NULL; + + init_node(&compile->node, HLSL_IR_COMPILE, type, loc); + + compile->compile_type = compile_type; + compile->profile = profile_info; + + hlsl_block_init(&compile->instrs); + hlsl_block_add_block(&compile->instrs, args_instrs); + + compile->args_count = args_count; + if (!(compile->args = hlsl_alloc(ctx, sizeof(*compile->args) * args_count))) + { + vkd3d_free(compile); + return NULL; + } + for (i = 0; i < compile->args_count; ++i) + hlsl_src_from_node(&compile->args[i], args[i]); + + return &compile->node; +} + struct hlsl_ir_node *hlsl_new_stateblock_constant(struct hlsl_ctx *ctx, const char *name, struct vkd3d_shader_location *loc) { @@ -2142,6 +2241,44 @@ static struct hlsl_ir_node *clone_index(struct hlsl_ctx *ctx, struct clone_instr return dst; } +static struct hlsl_ir_node *clone_compile(struct hlsl_ctx *ctx, + struct clone_instr_map *map, struct hlsl_ir_compile *compile) +{ + const char *profile_name = NULL; + struct hlsl_ir_node **args; + struct hlsl_ir_node *node; + struct hlsl_block block; + unsigned int i; + + if (!(clone_block(ctx, &block, &compile->instrs, map))) + return NULL; + + if (!(args = hlsl_alloc(ctx, sizeof(*args) * compile->args_count))) + { + hlsl_block_cleanup(&block); + return NULL; + } + for (i = 0; i < compile->args_count; ++i) + { + args[i] = map_instr(map, compile->args[i].node); + VKD3D_ASSERT(args[i]); + } + + if (compile->profile) + profile_name = compile->profile->name; + + if (!(node = hlsl_new_compile(ctx, compile->compile_type, profile_name, + args, compile->args_count, &block, &compile->node.loc))) + { + hlsl_block_cleanup(&block); + vkd3d_free(args); + return NULL; + } + + vkd3d_free(args); + return node; +} + static struct hlsl_ir_node *clone_stateblock_constant(struct hlsl_ctx *ctx, struct clone_instr_map *map, struct hlsl_ir_stateblock_constant *constant) { @@ -2284,6 +2421,9 @@ static struct hlsl_ir_node *clone_instr(struct hlsl_ctx *ctx, case HLSL_IR_SWIZZLE: return clone_swizzle(ctx, map, hlsl_ir_swizzle(instr)); + case HLSL_IR_COMPILE: + return clone_compile(ctx, map, hlsl_ir_compile(instr)); + case HLSL_IR_STATEBLOCK_CONSTANT: return clone_stateblock_constant(ctx, map, hlsl_ir_stateblock_constant(instr)); } @@ -2698,6 +2838,8 @@ const char *hlsl_node_type_to_string(enum hlsl_ir_node_type type) [HLSL_IR_STORE ] = "HLSL_IR_STORE", [HLSL_IR_SWITCH ] = "HLSL_IR_SWITCH", [HLSL_IR_SWIZZLE ] = "HLSL_IR_SWIZZLE", + + [HLSL_IR_COMPILE] = "HLSL_IR_COMPILE", [HLSL_IR_STATEBLOCK_CONSTANT] = "HLSL_IR_STATEBLOCK_CONSTANT", }; @@ -3146,6 +3288,34 @@ static void dump_ir_index(struct vkd3d_string_buffer *buffer, const struct hlsl_ vkd3d_string_buffer_printf(buffer, "]"); } +static void dump_ir_compile(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, + const struct hlsl_ir_compile *compile) +{ + unsigned int i; + + switch (compile->compile_type) + { + case HLSL_COMPILE_TYPE_COMPILE: + vkd3d_string_buffer_printf(buffer, "compile %s {\n", compile->profile->name); + break; + + case HLSL_COMPILE_TYPE_CONSTRUCTGSWITHSO: + vkd3d_string_buffer_printf(buffer, "ConstructGSWithSO {\n"); + break; + } + + dump_block(ctx, buffer, &compile->instrs); + + vkd3d_string_buffer_printf(buffer, " %10s } (", ""); + for (i = 0; i < compile->args_count; ++i) + { + dump_src(buffer, &compile->args[i]); + if (i + 1 < compile->args_count) + vkd3d_string_buffer_printf(buffer, ", "); + } + vkd3d_string_buffer_printf(buffer, ")"); +} + static void dump_ir_stateblock_constant(struct vkd3d_string_buffer *buffer, const struct hlsl_ir_stateblock_constant *constant) { @@ -3245,6 +3415,10 @@ static void dump_instr(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, dump_ir_swizzle(buffer, hlsl_ir_swizzle(instr)); break; + case HLSL_IR_COMPILE: + dump_ir_compile(ctx, buffer, hlsl_ir_compile(instr)); + break; + case HLSL_IR_STATEBLOCK_CONSTANT: dump_ir_stateblock_constant(buffer, hlsl_ir_stateblock_constant(instr)); break; @@ -3308,8 +3482,8 @@ void hlsl_replace_node(struct hlsl_ir_node *old, struct hlsl_ir_node *new) { struct hlsl_src *src, *next; - VKD3D_ASSERT(old->data_type->dimx == new->data_type->dimx); - VKD3D_ASSERT(old->data_type->dimy == new->data_type->dimy); + VKD3D_ASSERT(old->data_type == new->data_type || old->data_type->dimx == new->data_type->dimx); + VKD3D_ASSERT(old->data_type == new->data_type || old->data_type->dimy == new->data_type->dimy); LIST_FOR_EACH_ENTRY_SAFE(src, next, &old->uses, struct hlsl_src, entry) { @@ -3459,6 +3633,17 @@ static void free_ir_index(struct hlsl_ir_index *index) vkd3d_free(index); } +static void free_ir_compile(struct hlsl_ir_compile *compile) +{ + unsigned int i; + + for (i = 0; i < compile->args_count; ++i) + hlsl_src_remove(&compile->args[i]); + + hlsl_block_cleanup(&compile->instrs); + vkd3d_free(compile); +} + static void free_ir_stateblock_constant(struct hlsl_ir_stateblock_constant *constant) { vkd3d_free(constant->name); @@ -3527,6 +3712,10 @@ void hlsl_free_instr(struct hlsl_ir_node *node) free_ir_switch(hlsl_ir_switch(node)); break; + case HLSL_IR_COMPILE: + free_ir_compile(hlsl_ir_compile(node)); + break; + case HLSL_IR_STATEBLOCK_CONSTANT: free_ir_stateblock_constant(hlsl_ir_stateblock_constant(node)); break; @@ -4078,6 +4267,11 @@ static bool hlsl_ctx_init(struct hlsl_ctx *ctx, const struct vkd3d_shader_compil } } + ctx->domain = VKD3D_TESSELLATOR_DOMAIN_INVALID; + ctx->output_control_point_count = UINT_MAX; + ctx->output_primitive = 0; + ctx->partitioning = 0; + return true; } diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h index 22e25b23988..20a96692a48 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.h +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h @@ -22,7 +22,6 @@ #include "vkd3d_shader_private.h" #include "wine/rbtree.h" -#include "d3dcommon.h" #include "d3dx9shader.h" /* The general IR structure is inspired by Mesa GLSL hir, even though the code @@ -70,6 +69,14 @@ static inline unsigned int hlsl_swizzle_get_component(uint32_t swizzle, unsigned return (swizzle >> HLSL_SWIZZLE_SHIFT(idx)) & HLSL_SWIZZLE_MASK; } +static inline uint32_t vsir_swizzle_from_hlsl(uint32_t swizzle) +{ + return vkd3d_shader_create_swizzle(hlsl_swizzle_get_component(swizzle, 0), + hlsl_swizzle_get_component(swizzle, 1), + hlsl_swizzle_get_component(swizzle, 2), + hlsl_swizzle_get_component(swizzle, 3)); +} + enum hlsl_type_class { HLSL_CLASS_SCALAR, @@ -316,6 +323,8 @@ enum hlsl_ir_node_type HLSL_IR_STORE, HLSL_IR_SWIZZLE, HLSL_IR_SWITCH, + + HLSL_IR_COMPILE, HLSL_IR_STATEBLOCK_CONSTANT, }; @@ -591,6 +600,8 @@ struct hlsl_ir_function_decl unsigned int attr_count; const struct hlsl_attribute *const *attrs; + bool early_depth_test; + /* Synthetic boolean variable marking whether a return statement has been * executed. Needed to deal with return statements in non-uniform control * flow, since some backends can't handle them. */ @@ -703,7 +714,7 @@ enum hlsl_ir_expr_op HLSL_OP2_SLT, /* DP2ADD(a, b, c) computes the scalar product of a.xy and b.xy, - * then adds c. */ + * then adds c, where c must have dimx=1. */ HLSL_OP3_DP2ADD, /* TERNARY(a, b, c) returns 'b' if 'a' is true and 'c' otherwise. 'a' must always be boolean. * CMP(a, b, c) returns 'b' if 'a' >= 0, and 'c' otherwise. It's used only for SM1-SM3 targets. */ @@ -854,6 +865,35 @@ struct hlsl_ir_string_constant char *string; }; +/* Represents shader compilation call for effects, such as "CompileShader()". + * + * Unlike hlsl_ir_call, it is not flattened, thus, it keeps track of its + * arguments and maintains its own instruction block. */ +struct hlsl_ir_compile +{ + struct hlsl_ir_node node; + + enum hlsl_compile_type + { + /* A shader compilation through the CompileShader() function or the "compile" syntax. */ + HLSL_COMPILE_TYPE_COMPILE, + /* A call to ConstructGSWithSO(), which receives a geometry shader and retrieves one as well. */ + HLSL_COMPILE_TYPE_CONSTRUCTGSWITHSO, + } compile_type; + + /* Special field to store the profile argument for HLSL_COMPILE_TYPE_COMPILE. */ + const struct hlsl_profile_info *profile; + + /* Block containing the instructions required by the arguments of the + * compilation call. */ + struct hlsl_block instrs; + + /* Arguments to the compilation call. For HLSL_COMPILE_TYPE_COMPILE + * args[0] is an hlsl_ir_call to the specified function. */ + struct hlsl_src *args; + unsigned int args_count; +}; + /* Stateblock constants are undeclared values found on state blocks or technique passes descriptions, * that do not concern regular pixel, vertex, or compute shaders, except for parsing. */ struct hlsl_ir_stateblock_constant @@ -1016,6 +1056,7 @@ struct hlsl_ctx { uint32_t index; struct hlsl_vec4 value; + struct vkd3d_shader_location loc; } *regs; size_t count, size; } constant_defs; @@ -1029,6 +1070,12 @@ struct hlsl_ctx * compute shader profiles. It is set using the numthreads() attribute in the entry point. */ uint32_t thread_count[3]; + enum vkd3d_tessellator_domain domain; + unsigned int output_control_point_count; + enum vkd3d_shader_tessellator_output_primitive output_primitive; + enum vkd3d_shader_tessellator_partitioning partitioning; + struct hlsl_ir_function_decl *patch_constant_func; + /* In some cases we generate opcodes by parsing an HLSL function and then * invoking it. If not NULL, this field is the name of the function that we * are currently parsing, "mangled" with an internal prefix to avoid @@ -1149,6 +1196,12 @@ static inline struct hlsl_ir_switch *hlsl_ir_switch(const struct hlsl_ir_node *n return CONTAINING_RECORD(node, struct hlsl_ir_switch, node); } +static inline struct hlsl_ir_compile *hlsl_ir_compile(const struct hlsl_ir_node *node) +{ + VKD3D_ASSERT(node->type == HLSL_IR_COMPILE); + return CONTAINING_RECORD(node, struct hlsl_ir_compile, node); +} + static inline struct hlsl_ir_stateblock_constant *hlsl_ir_stateblock_constant(const struct hlsl_ir_node *node) { VKD3D_ASSERT(node->type == HLSL_IR_STATEBLOCK_CONSTANT); @@ -1428,6 +1481,9 @@ bool hlsl_index_is_noncontiguous(struct hlsl_ir_index *index); bool hlsl_index_is_resource_access(struct hlsl_ir_index *index); bool hlsl_index_chain_has_resource_access(struct hlsl_ir_index *index); +struct hlsl_ir_node *hlsl_new_compile(struct hlsl_ctx *ctx, enum hlsl_compile_type compile_type, + const char *profile_name, struct hlsl_ir_node **args, unsigned int args_count, + struct hlsl_block *args_instrs, const struct vkd3d_shader_location *loc); struct hlsl_ir_node *hlsl_new_index(struct hlsl_ctx *ctx, struct hlsl_ir_node *val, struct hlsl_ir_node *idx, const struct vkd3d_shader_location *loc); struct hlsl_ir_node *hlsl_new_loop(struct hlsl_ctx *ctx, @@ -1493,6 +1549,7 @@ unsigned int hlsl_type_minor_size(const struct hlsl_type *type); unsigned int hlsl_type_major_size(const struct hlsl_type *type); unsigned int hlsl_type_element_count(const struct hlsl_type *type); bool hlsl_type_is_resource(const struct hlsl_type *type); +bool hlsl_type_is_shader(const struct hlsl_type *type); unsigned int hlsl_type_get_sm4_offset(const struct hlsl_type *type, unsigned int offset); bool hlsl_types_are_equal(const struct hlsl_type *t1, const struct hlsl_type *t2); @@ -1528,16 +1585,15 @@ D3DXPARAMETER_TYPE hlsl_sm1_base_type(const struct hlsl_type *type); bool hlsl_sm1_register_from_semantic(const struct vkd3d_shader_version *version, const char *semantic_name, unsigned int semantic_index, bool output, enum vkd3d_shader_register_type *type, unsigned int *reg); bool hlsl_sm1_usage_from_semantic(const char *semantic_name, - uint32_t semantic_index, D3DDECLUSAGE *usage, uint32_t *usage_idx); + uint32_t semantic_index, enum vkd3d_decl_usage *usage, uint32_t *usage_idx); void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer); int d3dbc_compile(struct vsir_program *program, uint64_t config_flags, const struct vkd3d_shader_compile_info *compile_info, const struct vkd3d_shader_code *ctab, - struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context, - struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func); + struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context); -bool hlsl_sm4_usage_from_semantic(struct hlsl_ctx *ctx, - const struct hlsl_semantic *semantic, bool output, D3D_NAME *usage); +bool sysval_semantic_from_hlsl(enum vkd3d_shader_sysval_semantic *semantic, + struct hlsl_ctx *ctx, const struct hlsl_semantic *hlsl_semantic, bool output); bool hlsl_sm4_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, bool output, enum vkd3d_shader_register_type *type, bool *has_idx); int hlsl_sm4_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, struct vkd3d_shader_code *out); diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.l b/libs/vkd3d/libs/vkd3d-shader/hlsl.l index 0c02b27817e..b7c242661e3 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.l +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.l @@ -80,7 +80,9 @@ centroid {return KW_CENTROID; } column_major {return KW_COLUMN_MAJOR; } ComputeShader {return KW_COMPUTESHADER; } compile {return KW_COMPILE; } +CompileShader {return KW_COMPILESHADER; } const {return KW_CONST; } +ConstructGSWithSO {return KW_CONSTRUCTGSWITHSO; } continue {return KW_CONTINUE; } DepthStencilState {return KW_DEPTHSTENCILSTATE; } DepthStencilView {return KW_DEPTHSTENCILVIEW; } diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.y b/libs/vkd3d/libs/vkd3d-shader/hlsl.y index 3f319dea0d8..67262c2ccfd 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.y +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.y @@ -516,7 +516,7 @@ enum loop_type LOOP_DO_WHILE }; -static bool attribute_list_has_duplicates(const struct parse_attribute_list *attrs) +static void check_attribute_list_for_duplicates(struct hlsl_ctx *ctx, const struct parse_attribute_list *attrs) { unsigned int i, j; @@ -525,11 +525,10 @@ static bool attribute_list_has_duplicates(const struct parse_attribute_list *att for (j = i + 1; j < attrs->count; ++j) { if (!strcmp(attrs->attrs[i]->name, attrs->attrs[j]->name)) - return true; + hlsl_error(ctx, &attrs->attrs[j]->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, + "Found duplicate attribute \"%s\".", attrs->attrs[j]->name); } } - - return false; } static void resolve_loop_continue(struct hlsl_ctx *ctx, struct hlsl_block *block, enum loop_type type, @@ -610,6 +609,7 @@ static struct hlsl_default_value evaluate_static_expression(struct hlsl_ctx *ctx { switch (node->type) { + case HLSL_IR_COMPILE: case HLSL_IR_CONSTANT: case HLSL_IR_EXPR: case HLSL_IR_STRING_CONSTANT: @@ -697,9 +697,7 @@ static struct hlsl_block *create_loop(struct hlsl_ctx *ctx, enum loop_type type, unsigned int i, unroll_limit = 0; struct hlsl_ir_node *loop; - if (attribute_list_has_duplicates(attributes)) - hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, "Found duplicate attribute."); - + check_attribute_list_for_duplicates(ctx, attributes); check_loop_attributes(ctx, attributes, loc); /* Ignore unroll(0) attribute, and any invalid attribute. */ @@ -1227,7 +1225,8 @@ static bool add_typedef(struct hlsl_ctx *ctx, struct hlsl_type *const orig_type, } static void initialize_var_components(struct hlsl_ctx *ctx, struct hlsl_block *instrs, - struct hlsl_ir_var *dst, unsigned int *store_index, struct hlsl_ir_node *src); + struct hlsl_ir_var *dst, unsigned int *store_index, struct hlsl_ir_node *src, + bool is_default_values_initializer); static bool add_func_parameter(struct hlsl_ctx *ctx, struct hlsl_func_parameters *parameters, struct parse_parameter *param, const struct vkd3d_shader_location *loc) @@ -1285,7 +1284,8 @@ static bool add_func_parameter(struct hlsl_ctx *ctx, struct hlsl_func_parameters for (i = 0; i < param->initializer.args_count; ++i) { - initialize_var_components(ctx, param->initializer.instrs, var, &store_index, param->initializer.args[i]); + initialize_var_components(ctx, param->initializer.instrs, var, + &store_index, param->initializer.args[i], true); } free_parse_initializer(¶m->initializer); @@ -1673,25 +1673,36 @@ static struct hlsl_ir_node *add_expr(struct hlsl_ctx *ctx, struct hlsl_block *bl return expr; } -static void check_integer_type(struct hlsl_ctx *ctx, const struct hlsl_ir_node *instr) +static bool type_is_integer(enum hlsl_base_type type) { - const struct hlsl_type *type = instr->data_type; - struct vkd3d_string_buffer *string; - - switch (type->e.numeric.type) + switch (type) { case HLSL_TYPE_BOOL: case HLSL_TYPE_INT: case HLSL_TYPE_UINT: - break; + return true; - default: - if ((string = hlsl_type_to_string(ctx, type))) - hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, - "Expression type '%s' is not integer.", string->buffer); - hlsl_release_string_buffer(ctx, string); - break; + case HLSL_TYPE_DOUBLE: + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_HALF: + return false; } + + vkd3d_unreachable(); +} + +static void check_integer_type(struct hlsl_ctx *ctx, const struct hlsl_ir_node *instr) +{ + const struct hlsl_type *type = instr->data_type; + struct vkd3d_string_buffer *string; + + if (type_is_integer(type->e.numeric.type)) + return; + + if ((string = hlsl_type_to_string(ctx, type))) + hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Expression type '%s' is not integer.", string->buffer); + hlsl_release_string_buffer(ctx, string); } static struct hlsl_ir_node *add_unary_arithmetic_expr(struct hlsl_ctx *ctx, struct hlsl_block *block, @@ -2357,7 +2368,8 @@ static unsigned int get_component_index_from_default_initializer_index(struct hl } static void initialize_var_components(struct hlsl_ctx *ctx, struct hlsl_block *instrs, - struct hlsl_ir_var *dst, unsigned int *store_index, struct hlsl_ir_node *src) + struct hlsl_ir_var *dst, unsigned int *store_index, struct hlsl_ir_node *src, + bool is_default_values_initializer) { unsigned int src_comp_count = hlsl_type_component_count(src->data_type); struct hlsl_deref dst_deref; @@ -2376,23 +2388,43 @@ static void initialize_var_components(struct hlsl_ctx *ctx, struct hlsl_block *i dst_comp_type = hlsl_type_get_component_type(ctx, dst->data_type, *store_index); - if (dst->default_values) + if (is_default_values_initializer) { struct hlsl_default_value default_value = {0}; unsigned int dst_index; - if (!hlsl_clone_block(ctx, &block, instrs)) - return; - default_value = evaluate_static_expression(ctx, &block, dst_comp_type, &src->loc); + if (hlsl_is_numeric_type(dst_comp_type)) + { + if (src->type == HLSL_IR_COMPILE) + { + /* Default values are discarded if they contain an object + * literal expression for a numeric component. */ + if (dst->default_values) + { + hlsl_warning(ctx, &src->loc, VKD3D_SHADER_WARNING_HLSL_IGNORED_DEFAULT_VALUE, + "Component %u in variable '%s' initializer is object literal. Default values discarded.", + k, dst->name); + vkd3d_free(dst->default_values); + dst->default_values = NULL; + } + } + else + { + if (!hlsl_clone_block(ctx, &block, instrs)) + return; + default_value = evaluate_static_expression(ctx, &block, dst_comp_type, &src->loc); - if (dst->is_param) - dst_index = *store_index; - else - dst_index = get_component_index_from_default_initializer_index(ctx, dst->data_type, *store_index); + if (dst->is_param) + dst_index = *store_index; + else + dst_index = get_component_index_from_default_initializer_index(ctx, dst->data_type, *store_index); - dst->default_values[dst_index] = default_value; + if (dst->default_values) + dst->default_values[dst_index] = default_value; - hlsl_block_cleanup(&block); + hlsl_block_cleanup(&block); + } + } } else { @@ -2733,13 +2765,15 @@ static struct hlsl_block *initialize_vars(struct hlsl_ctx *ctx, struct list *var if (v->initializer.args_count) { - unsigned int store_index = 0; bool is_default_values_initializer; + unsigned int store_index = 0; unsigned int size, k; is_default_values_initializer = (ctx->cur_buffer != ctx->globals_buffer) || (var->storage_modifiers & HLSL_STORAGE_UNIFORM) || ctx->cur_scope->annotations; + if (hlsl_type_is_shader(type)) + is_default_values_initializer = false; if (is_default_values_initializer) { @@ -2780,7 +2814,8 @@ static struct hlsl_block *initialize_vars(struct hlsl_ctx *ctx, struct list *var for (k = 0; k < v->initializer.args_count; ++k) { - initialize_var_components(ctx, v->initializer.instrs, var, &store_index, v->initializer.args[k]); + initialize_var_components(ctx, v->initializer.instrs, var, + &store_index, v->initializer.args[k], is_default_values_initializer); } if (is_default_values_initializer) @@ -2835,28 +2870,36 @@ static struct hlsl_block *initialize_vars(struct hlsl_ctx *ctx, struct list *var return initializers; } -static bool func_is_compatible_match(struct hlsl_ctx *ctx, - const struct hlsl_ir_function_decl *decl, const struct parse_initializer *args) +static bool func_is_compatible_match(struct hlsl_ctx *ctx, const struct hlsl_ir_function_decl *decl, + bool is_compile, const struct parse_initializer *args) { - unsigned int i; - - if (decl->parameters.count < args->args_count) - return false; + unsigned int i, k; - for (i = 0; i < args->args_count; ++i) + k = 0; + for (i = 0; i < decl->parameters.count; ++i) { - if (!implicit_compatible_data_types(ctx, args->args[i]->data_type, decl->parameters.vars[i]->data_type)) + if (is_compile && !(decl->parameters.vars[i]->storage_modifiers & HLSL_STORAGE_UNIFORM)) + continue; + + if (k >= args->args_count) + { + if (!decl->parameters.vars[i]->default_values) + return false; + return true; + } + + if (!implicit_compatible_data_types(ctx, args->args[k]->data_type, decl->parameters.vars[i]->data_type)) return false; - } - if (args->args_count < decl->parameters.count && !decl->parameters.vars[args->args_count]->default_values) + ++k; + } + if (k < args->args_count) return false; - return true; } static struct hlsl_ir_function_decl *find_function_call(struct hlsl_ctx *ctx, - const char *name, const struct parse_initializer *args, + const char *name, const struct parse_initializer *args, bool is_compile, const struct vkd3d_shader_location *loc) { struct hlsl_ir_function_decl *decl, *compatible_match = NULL; @@ -2869,7 +2912,7 @@ static struct hlsl_ir_function_decl *find_function_call(struct hlsl_ctx *ctx, LIST_FOR_EACH_ENTRY(decl, &func->overloads, struct hlsl_ir_function_decl, entry) { - if (func_is_compatible_match(ctx, decl, args)) + if (func_is_compatible_match(ctx, decl, is_compile, args)) { if (compatible_match) { @@ -2890,26 +2933,35 @@ static struct hlsl_ir_node *hlsl_new_void_expr(struct hlsl_ctx *ctx, const struc return hlsl_new_expr(ctx, HLSL_OP0_VOID, operands, ctx->builtin_types.Void, loc); } -static bool add_user_call(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func, - const struct parse_initializer *args, const struct vkd3d_shader_location *loc) +static struct hlsl_ir_node *add_user_call(struct hlsl_ctx *ctx, + struct hlsl_ir_function_decl *func, const struct parse_initializer *args, + bool is_compile, const struct vkd3d_shader_location *loc) { struct hlsl_ir_node *call; - unsigned int i, j; + unsigned int i, j, k; VKD3D_ASSERT(args->args_count <= func->parameters.count); - for (i = 0; i < args->args_count; ++i) + k = 0; + for (i = 0; i < func->parameters.count; ++i) { struct hlsl_ir_var *param = func->parameters.vars[i]; - struct hlsl_ir_node *arg = args->args[i]; + struct hlsl_ir_node *arg; + + if (is_compile && !(param->storage_modifiers & HLSL_STORAGE_UNIFORM)) + continue; + + if (k >= args->args_count) + break; + arg = args->args[k]; if (!hlsl_types_are_equal(arg->data_type, param->data_type)) { struct hlsl_ir_node *cast; if (!(cast = add_cast(ctx, args->instrs, arg, param->data_type, &arg->loc))) - return false; - args->args[i] = cast; + return NULL; + args->args[k] = cast; arg = cast; } @@ -2918,13 +2970,15 @@ static bool add_user_call(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *fu struct hlsl_ir_node *store; if (!(store = hlsl_new_simple_store(ctx, param, arg))) - return false; + return NULL; hlsl_block_add_instr(args->instrs, store); } + + ++k; } /* Add default values for the remaining parameters. */ - for (i = args->args_count; i < func->parameters.count; ++i) + for (; i < func->parameters.count; ++i) { struct hlsl_ir_var *param = func->parameters.vars[i]; unsigned int comp_count = hlsl_type_component_count(param->data_type); @@ -2932,6 +2986,9 @@ static bool add_user_call(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *fu VKD3D_ASSERT(param->default_values); + if (is_compile && !(param->storage_modifiers & HLSL_STORAGE_UNIFORM)) + continue; + hlsl_init_simple_deref_from_var(¶m_deref, param); for (j = 0; j < comp_count; ++j) @@ -2945,20 +3002,23 @@ static bool add_user_call(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *fu { value.u[0] = param->default_values[j].number; if (!(comp = hlsl_new_constant(ctx, type, &value, loc))) - return false; + return NULL; hlsl_block_add_instr(args->instrs, comp); if (!hlsl_new_store_component(ctx, &store_block, ¶m_deref, j, comp)) - return false; + return NULL; hlsl_block_add_block(args->instrs, &store_block); } } } if (!(call = hlsl_new_call(ctx, func, loc))) - return false; + return NULL; hlsl_block_add_instr(args->instrs, call); + if (is_compile) + return call; + for (i = 0; i < args->args_count; ++i) { struct hlsl_ir_var *param = func->parameters.vars[i]; @@ -2973,11 +3033,11 @@ static bool add_user_call(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *fu "Output argument to \"%s\" is const.", func->func->name); if (!(load = hlsl_new_var_load(ctx, param, &arg->loc))) - return false; + return NULL; hlsl_block_add_instr(args->instrs, &load->node); if (!add_assignment(ctx, args->instrs, arg, ASSIGN_OP_ASSIGN, &load->node)) - return false; + return NULL; } } @@ -2998,7 +3058,7 @@ static bool add_user_call(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *fu hlsl_block_add_instr(args->instrs, expr); } - return true; + return call; } static struct hlsl_ir_node *intrinsic_float_convert_arg(struct hlsl_ctx *ctx, @@ -3006,7 +3066,7 @@ static struct hlsl_ir_node *intrinsic_float_convert_arg(struct hlsl_ctx *ctx, { struct hlsl_type *type = arg->data_type; - if (type->e.numeric.type == HLSL_TYPE_FLOAT || type->e.numeric.type == HLSL_TYPE_HALF) + if (!type_is_integer(type->e.numeric.type)) return arg; type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_FLOAT, type->dimx, type->dimy); @@ -3094,14 +3154,12 @@ static bool elementwise_intrinsic_convert_args(struct hlsl_ctx *ctx, static bool elementwise_intrinsic_float_convert_args(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { - enum hlsl_base_type base_type; struct hlsl_type *type; if (!(type = elementwise_intrinsic_get_common_type(ctx, params, loc))) return false; - - base_type = type->e.numeric.type == HLSL_TYPE_HALF ? HLSL_TYPE_HALF : HLSL_TYPE_FLOAT; - type = hlsl_get_numeric_type(ctx, type->class, base_type, type->dimx, type->dimy); + if (type_is_integer(type->e.numeric.type)) + type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_FLOAT, type->dimx, type->dimy); return convert_args(ctx, params, type, loc); } @@ -3129,6 +3187,7 @@ static bool write_acos_or_asin(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc, bool asin_mode) { struct hlsl_ir_function_decl *func; + struct hlsl_ir_node *arg; struct hlsl_type *type; char *body; @@ -3152,8 +3211,9 @@ static bool write_acos_or_asin(struct hlsl_ctx *ctx, const char *fn_name = asin_mode ? fn_name_asin : fn_name_acos; - type = params->args[0]->data_type; - type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_FLOAT, type->dimx, type->dimy); + if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) + return false; + type = arg->data_type; if (!(body = hlsl_sprintf_alloc(ctx, template, type->name, fn_name, type->name, @@ -3165,7 +3225,7 @@ static bool write_acos_or_asin(struct hlsl_ctx *ctx, if (!func) return false; - return add_user_call(ctx, func, params, loc); + return !!add_user_call(ctx, func, params, false, loc); } static bool intrinsic_acos(struct hlsl_ctx *ctx, @@ -3282,9 +3342,9 @@ static bool write_atan_or_atan2(struct hlsl_ctx *ctx, " : poly_approx;\n" "}"; - if (!(type = elementwise_intrinsic_get_common_type(ctx, params, loc))) + if (!elementwise_intrinsic_float_convert_args(ctx, params, loc)) return false; - type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_FLOAT, type->dimx, type->dimy); + type = params->args[0]->data_type; if (!(buf = hlsl_get_string_buffer(ctx))) return false; @@ -3314,7 +3374,7 @@ static bool write_atan_or_atan2(struct hlsl_ctx *ctx, if (!func) return false; - return add_user_call(ctx, func, params, loc); + return !!add_user_call(ctx, func, params, false, loc); } static bool intrinsic_atan(struct hlsl_ctx *ctx, @@ -3507,7 +3567,7 @@ static bool write_cosh_or_sinh(struct hlsl_ctx *ctx, if (!func) return false; - return add_user_call(ctx, func, params, loc); + return !!add_user_call(ctx, func, params, false, loc); } static bool intrinsic_cosh(struct hlsl_ctx *ctx, @@ -3525,9 +3585,8 @@ static bool intrinsic_cross(struct hlsl_ctx *ctx, struct hlsl_type *cast_type; enum hlsl_base_type base; - if (arg1->data_type->e.numeric.type == HLSL_TYPE_HALF && arg2->data_type->e.numeric.type == HLSL_TYPE_HALF) - base = HLSL_TYPE_HALF; - else + base = expr_common_base_type(arg1->data_type->e.numeric.type, arg2->data_type->e.numeric.type); + if (type_is_integer(base)) base = HLSL_TYPE_FLOAT; cast_type = hlsl_get_vector_type(ctx, base, 3); @@ -3698,15 +3757,14 @@ static bool intrinsic_determinant(struct hlsl_ctx *ctx, return false; } + if (!(arg = intrinsic_float_convert_arg(ctx, params, arg, loc))) + return false; + dim = min(type->dimx, type->dimy); if (dim == 1) - { - if (!(arg = intrinsic_float_convert_arg(ctx, params, arg, loc))) - return false; return hlsl_add_load_component(ctx, params->instrs, arg, 0, loc); - } - typename = type->e.numeric.type == HLSL_TYPE_HALF ? "half" : "float"; + typename = hlsl_get_scalar_type(ctx, arg->data_type->e.numeric.type)->name; template = templates[dim]; switch (dim) @@ -3734,7 +3792,7 @@ static bool intrinsic_determinant(struct hlsl_ctx *ctx, if (!func) return false; - return add_user_call(ctx, func, params, loc); + return !!add_user_call(ctx, func, params, false, loc); } static bool intrinsic_distance(struct hlsl_ctx *ctx, @@ -3766,6 +3824,50 @@ static bool intrinsic_dot(struct hlsl_ctx *ctx, return !!add_binary_dot_expr(ctx, params->instrs, params->args[0], params->args[1], loc); } +static bool intrinsic_dst(struct hlsl_ctx *ctx, const struct parse_initializer *params, + const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_function_decl *func; + struct hlsl_type *type, *vec4_type; + char *body; + + static const char template[] = + "%s dst(%s i0, %s i1)\n" + "{\n" + /* Scalars and vector-4s are both valid inputs, so promote scalars + * if necessary. */ + " %s src0 = i0, src1 = i1;\n" + " return %s(1, src0.y * src1.y, src0.z, src1.w);\n" + "}"; + + if (!elementwise_intrinsic_convert_args(ctx, params, loc)) + return false; + type = params->args[0]->data_type; + if (!(type->class == HLSL_CLASS_SCALAR + || (type->class == HLSL_CLASS_VECTOR && type->dimx == 4))) + { + struct vkd3d_string_buffer *string; + if ((string = hlsl_type_to_string(ctx, type))) + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Wrong dimension for dst(): expected scalar or 4-dimensional vector, but got %s.", + string->buffer); + hlsl_release_string_buffer(ctx, string); + } + vec4_type = hlsl_get_vector_type(ctx, type->e.numeric.type, 4); + + if (!(body = hlsl_sprintf_alloc(ctx, template, + vec4_type->name, type->name, type->name, + vec4_type->name, + vec4_type->name))) + return false; + func = hlsl_compile_internal_function(ctx, "dst", body); + vkd3d_free(body); + if (!func) + return false; + + return !!add_user_call(ctx, func, params, false, loc); +} + static bool intrinsic_exp(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { @@ -3809,9 +3911,9 @@ static bool intrinsic_faceforward(struct hlsl_ctx *ctx, " return dot(i, ng) < 0 ? n : -n;\n" "}\n"; - if (!(type = elementwise_intrinsic_get_common_type(ctx, params, loc))) + if (!elementwise_intrinsic_float_convert_args(ctx, params, loc)) return false; - type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_FLOAT, type->dimx, type->dimy); + type = params->args[0]->data_type; if (!(body = hlsl_sprintf_alloc(ctx, template, type->name, type->name, type->name, type->name))) @@ -3821,7 +3923,7 @@ static bool intrinsic_faceforward(struct hlsl_ctx *ctx, if (!func) return false; - return add_user_call(ctx, func, params, loc); + return !!add_user_call(ctx, func, params, false, loc); } static bool intrinsic_f16tof32(struct hlsl_ctx *ctx, @@ -3926,7 +4028,7 @@ static bool intrinsic_fwidth(struct hlsl_ctx *ctx, if (!func) return false; - return add_user_call(ctx, func, params, loc); + return !!add_user_call(ctx, func, params, false, loc); } static bool intrinsic_ldexp(struct hlsl_ctx *ctx, @@ -4029,7 +4131,7 @@ static bool intrinsic_lit(struct hlsl_ctx *ctx, if (!(func = hlsl_compile_internal_function(ctx, "lit", body))) return false; - return add_user_call(ctx, func, params, loc); + return !!add_user_call(ctx, func, params, false, loc); } static bool intrinsic_log(struct hlsl_ctx *ctx, @@ -4081,6 +4183,20 @@ static bool intrinsic_log2(struct hlsl_ctx *ctx, return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_LOG2, arg, loc); } +static bool intrinsic_mad(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *args[HLSL_MAX_OPERANDS] = {0}; + + if (!elementwise_intrinsic_convert_args(ctx, params, loc)) + return false; + + args[0] = params->args[0]; + args[1] = params->args[1]; + args[2] = params->args[2]; + return add_expr(ctx, params->instrs, HLSL_OP3_MAD, args, args[0]->data_type, loc); +} + static bool intrinsic_max(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { @@ -4285,13 +4401,9 @@ static bool intrinsic_reflect(struct hlsl_ctx *ctx, static bool intrinsic_refract(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { - struct hlsl_type *r_type = params->args[0]->data_type; - struct hlsl_type *n_type = params->args[1]->data_type; - struct hlsl_type *i_type = params->args[2]->data_type; - struct hlsl_type *res_type, *idx_type, *scal_type; - struct parse_initializer mut_params; + struct hlsl_type *type, *scalar_type; struct hlsl_ir_function_decl *func; - enum hlsl_base_type base; + struct hlsl_ir_node *index; char *body; static const char template[] = @@ -4303,28 +4415,34 @@ static bool intrinsic_refract(struct hlsl_ctx *ctx, " return t >= 0.0 ? i.x * r - (i.x * d + sqrt(t)) * n : 0;\n" "}"; - if (r_type->class == HLSL_CLASS_MATRIX - || n_type->class == HLSL_CLASS_MATRIX - || i_type->class == HLSL_CLASS_MATRIX) + if (params->args[0]->data_type->class == HLSL_CLASS_MATRIX + || params->args[1]->data_type->class == HLSL_CLASS_MATRIX + || params->args[2]->data_type->class == HLSL_CLASS_MATRIX) { hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Matrix arguments are not supported."); return false; } - VKD3D_ASSERT(params->args_count == 3); - mut_params = *params; - mut_params.args_count = 2; - if (!(res_type = elementwise_intrinsic_get_common_type(ctx, &mut_params, loc))) + /* This is technically not an elementwise intrinsic, but the first two + * arguments are. + * The third argument is a scalar, but can be passed as a vector, + * which should generate an implicit truncation warning. + * Cast down to scalar explicitly, then we can just use + * elementwise_intrinsic_float_convert_args(). + * This may result in casting the scalar back to a vector, + * which we will only use the first component of. */ + + scalar_type = hlsl_get_scalar_type(ctx, params->args[2]->data_type->e.numeric.type); + if (!(index = add_implicit_conversion(ctx, params->instrs, params->args[2], scalar_type, loc))) return false; + params->args[2] = index; - base = expr_common_base_type(res_type->e.numeric.type, i_type->e.numeric.type); - base = base == HLSL_TYPE_HALF ? HLSL_TYPE_HALF : HLSL_TYPE_FLOAT; - res_type = convert_numeric_type(ctx, res_type, base); - idx_type = convert_numeric_type(ctx, i_type, base); - scal_type = hlsl_get_scalar_type(ctx, base); + if (!elementwise_intrinsic_float_convert_args(ctx, params, loc)) + return false; + type = params->args[0]->data_type; - if (!(body = hlsl_sprintf_alloc(ctx, template, res_type->name, res_type->name, - res_type->name, idx_type->name, scal_type->name))) + if (!(body = hlsl_sprintf_alloc(ctx, template, type->name, type->name, + type->name, type->name, scalar_type->name))) return false; func = hlsl_compile_internal_function(ctx, "refract", body); @@ -4332,7 +4450,7 @@ static bool intrinsic_refract(struct hlsl_ctx *ctx, if (!func) return false; - return add_user_call(ctx, func, params, loc); + return !!add_user_call(ctx, func, params, false, loc); } static bool intrinsic_round(struct hlsl_ctx *ctx, @@ -4415,6 +4533,35 @@ static bool intrinsic_sin(struct hlsl_ctx *ctx, return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_SIN, arg, loc); } +static bool intrinsic_sincos(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_function_decl *func; + struct hlsl_type *type; + char *body; + + static const char template[] = + "void sincos(%s f, out %s s, out %s c)\n" + "{\n" + " s = sin(f);\n" + " c = cos(f);\n" + "}"; + + if (!elementwise_intrinsic_float_convert_args(ctx, params, loc)) + return false; + type = params->args[0]->data_type; + + if (!(body = hlsl_sprintf_alloc(ctx, template, + type->name, type->name, type->name))) + return false; + func = hlsl_compile_internal_function(ctx, "sincos", body); + vkd3d_free(body); + if (!func) + return false; + + return !!add_user_call(ctx, func, params, false, loc); +} + static bool intrinsic_sinh(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { @@ -4436,9 +4583,9 @@ static bool intrinsic_smoothstep(struct hlsl_ctx *ctx, " return (p * p) * (3 - 2 * p);\n" "}"; - if (!(type = elementwise_intrinsic_get_common_type(ctx, params, loc))) + if (!elementwise_intrinsic_float_convert_args(ctx, params, loc)) return false; - type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_FLOAT, type->dimx, type->dimy); + type = params->args[0]->data_type; if (!(body = hlsl_sprintf_alloc(ctx, template, type->name, type->name, type->name, type->name, type->name))) return false; @@ -4447,7 +4594,7 @@ static bool intrinsic_smoothstep(struct hlsl_ctx *ctx, if (!func) return false; - return add_user_call(ctx, func, params, loc); + return !!add_user_call(ctx, func, params, false, loc); } static bool intrinsic_sqrt(struct hlsl_ctx *ctx, @@ -4469,13 +4616,12 @@ static bool intrinsic_step(struct hlsl_ctx *ctx, if (!elementwise_intrinsic_float_convert_args(ctx, params, loc)) return false; + type = params->args[0]->data_type; if (!(ge = add_binary_comparison_expr(ctx, params->instrs, HLSL_OP2_GEQUAL, params->args[1], params->args[0], loc))) return false; - type = ge->data_type; - type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_FLOAT, type->dimx, type->dimy); return !!add_implicit_conversion(ctx, params->instrs, ge, type, loc); } @@ -4523,7 +4669,7 @@ static bool intrinsic_tanh(struct hlsl_ctx *ctx, if (!func) return false; - return add_user_call(ctx, func, params, loc); + return !!add_user_call(ctx, func, params, false, loc); } static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer *params, @@ -4661,17 +4807,17 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * if (!(var = hlsl_new_synthetic_var(ctx, "coords", hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, 2), loc))) return false; - initialize_var_components(ctx, params->instrs, var, &idx, coords); + initialize_var_components(ctx, params->instrs, var, &idx, coords, false); if (hlsl_version_ge(ctx, 4, 0)) { if (!(half = hlsl_new_float_constant(ctx, 0.5f, loc))) return false; hlsl_block_add_instr(params->instrs, half); - initialize_var_components(ctx, params->instrs, var, &idx, half); + initialize_var_components(ctx, params->instrs, var, &idx, half, false); } else - initialize_var_components(ctx, params->instrs, var, &idx, coords); + initialize_var_components(ctx, params->instrs, var, &idx, coords, false); if (!(load = hlsl_new_var_load(ctx, var, loc))) return false; @@ -4937,6 +5083,7 @@ intrinsic_functions[] = {"determinant", 1, true, intrinsic_determinant}, {"distance", 2, true, intrinsic_distance}, {"dot", 2, true, intrinsic_dot}, + {"dst", 2, true, intrinsic_dst}, {"exp", 1, true, intrinsic_exp}, {"exp2", 1, true, intrinsic_exp2}, {"f16tof32", 1, true, intrinsic_f16tof32}, @@ -4952,6 +5099,7 @@ intrinsic_functions[] = {"log", 1, true, intrinsic_log}, {"log10", 1, true, intrinsic_log10}, {"log2", 1, true, intrinsic_log2}, + {"mad", 3, true, intrinsic_mad}, {"max", 2, true, intrinsic_max}, {"min", 2, true, intrinsic_min}, {"mul", 2, true, intrinsic_mul}, @@ -4966,6 +5114,7 @@ intrinsic_functions[] = {"saturate", 1, true, intrinsic_saturate}, {"sign", 1, true, intrinsic_sign}, {"sin", 1, true, intrinsic_sin}, + {"sincos", 3, true, intrinsic_sincos}, {"sinh", 1, true, intrinsic_sinh}, {"smoothstep", 3, true, intrinsic_smoothstep}, {"sqrt", 1, true, intrinsic_sqrt}, @@ -5002,9 +5151,9 @@ static struct hlsl_block *add_call(struct hlsl_ctx *ctx, const char *name, struct intrinsic_function *intrinsic; struct hlsl_ir_function_decl *decl; - if ((decl = find_function_call(ctx, name, args, loc))) + if ((decl = find_function_call(ctx, name, args, false, loc))) { - if (!add_user_call(ctx, decl, args, loc)) + if (!add_user_call(ctx, decl, args, false, loc)) goto fail; } else if ((intrinsic = bsearch(name, intrinsic_functions, ARRAY_SIZE(intrinsic_functions), @@ -5060,6 +5209,84 @@ fail: return NULL; } +static struct hlsl_block *add_shader_compilation(struct hlsl_ctx *ctx, const char *profile_name, + const char *function_name, struct parse_initializer *args, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *compile, *call_to_compile = NULL; + struct hlsl_ir_function_decl *decl; + + if (!ctx->in_state_block && ctx->cur_scope != ctx->globals) + { + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_MISPLACED_COMPILE, + "Shader compilation statements must be in global scope or a state block."); + free_parse_initializer(args); + return NULL; + } + + if (!(decl = find_function_call(ctx, function_name, args, true, loc))) + { + if (rb_get(&ctx->functions, function_name)) + { + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_NOT_DEFINED, + "No compatible \"%s\" declaration with %u uniform parameters found.", + function_name, args->args_count); + } + else + { + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_NOT_DEFINED, + "Function \"%s\" is not defined.", function_name); + } + free_parse_initializer(args); + return NULL; + } + + if (!(call_to_compile = add_user_call(ctx, decl, args, true, loc))) + { + free_parse_initializer(args); + return NULL; + } + + if (!(compile = hlsl_new_compile(ctx, HLSL_COMPILE_TYPE_COMPILE, + profile_name, &call_to_compile, 1, args->instrs, loc))) + { + free_parse_initializer(args); + return NULL; + } + + free_parse_initializer(args); + return make_block(ctx, compile); +} + +static struct hlsl_block *add_compile_variant(struct hlsl_ctx *ctx, enum hlsl_compile_type compile_type, + struct parse_initializer *args, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *compile; + + switch (compile_type) + { + case HLSL_COMPILE_TYPE_COMPILE: + vkd3d_unreachable(); + + case HLSL_COMPILE_TYPE_CONSTRUCTGSWITHSO: + if (args->args_count != 2 && args->args_count != 6) + { + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, + "Wrong number of arguments to ConstructGSWithSO: expected 2 or 6, but got %u.", + args->args_count); + } + break; + } + + if (!(compile = hlsl_new_compile(ctx, compile_type, NULL, args->args, args->args_count, args->instrs, loc))) + { + free_parse_initializer(args); + return NULL; + } + + free_parse_initializer(args); + return make_block(ctx, compile); +} + static struct hlsl_block *add_constructor(struct hlsl_ctx *ctx, struct hlsl_type *type, struct parse_initializer *params, const struct vkd3d_shader_location *loc) { @@ -5071,7 +5298,7 @@ static struct hlsl_block *add_constructor(struct hlsl_ctx *ctx, struct hlsl_type return NULL; for (i = 0; i < params->args_count; ++i) - initialize_var_components(ctx, params->instrs, var, &idx, params->args[i]); + initialize_var_components(ctx, params->instrs, var, &idx, params->args[i], false); if (!(load = hlsl_new_var_load(ctx, var, loc))) return NULL; @@ -6058,8 +6285,10 @@ static bool state_block_add_entry(struct hlsl_state_block *state_block, struct h %token KW_CENTROID %token KW_COLUMN_MAJOR %token KW_COMPILE +%token KW_COMPILESHADER %token KW_COMPUTESHADER %token KW_CONST +%token KW_CONSTRUCTGSWITHSO %token KW_CONTINUE %token KW_DEFAULT %token KW_DEPTHSTENCILSTATE @@ -6827,6 +7056,8 @@ func_prototype: func_prototype_no_attrs | attribute_list func_prototype_no_attrs { + check_attribute_list_for_duplicates(ctx, &$1); + if ($2.first) { $2.decl->attr_count = $1.count; @@ -7619,6 +7850,11 @@ stateblock_lhs_identifier: if (!($$ = hlsl_strdup(ctx, "vertexshader"))) YYABORT; } + | KW_GEOMETRYSHADER + { + if (!($$ = hlsl_strdup(ctx, "geometryshader"))) + YYABORT; + } state_block_index_opt: %empty @@ -8092,8 +8328,7 @@ selection_statement: struct hlsl_ir_node *instr; unsigned int i; - if (attribute_list_has_duplicates(attributes)) - hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, "Found duplicate attribute."); + check_attribute_list_for_duplicates(ctx, attributes); for (i = 0; i < attributes->count; ++i) { @@ -8391,6 +8626,34 @@ primary_expr: { $$ = $2; } + + | KW_COMPILE any_identifier var_identifier '(' func_arguments ')' + { + if (!($$ = add_shader_compilation(ctx, $2, $3, &$5, &@1))) + { + vkd3d_free($2); + vkd3d_free($3); + YYABORT; + } + vkd3d_free($2); + vkd3d_free($3); + } + | KW_COMPILESHADER '(' any_identifier ',' var_identifier '(' func_arguments ')' ')' + { + if (!($$ = add_shader_compilation(ctx, $3, $5, &$7, &@1))) + { + vkd3d_free($3); + vkd3d_free($5); + YYABORT; + } + vkd3d_free($3); + vkd3d_free($5); + } + | KW_CONSTRUCTGSWITHSO '(' func_arguments ')' + { + if (!($$ = add_compile_variant(ctx, HLSL_COMPILE_TYPE_CONSTRUCTGSWITHSO, &$3, &@1))) + YYABORT; + } | var_identifier '(' func_arguments ')' { if (!($$ = add_call(ctx, $1, &$3, &@1))) diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c index 92b5c71c43f..6cae0e3b5c9 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c @@ -4050,6 +4050,7 @@ static bool dce(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) switch (instr->type) { case HLSL_IR_CONSTANT: + case HLSL_IR_COMPILE: case HLSL_IR_EXPR: case HLSL_IR_INDEX: case HLSL_IR_LOAD: @@ -4337,6 +4338,9 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop case HLSL_IR_CONSTANT: case HLSL_IR_STRING_CONSTANT: break; + case HLSL_IR_COMPILE: + /* Compile calls are skipped as they are only relevent to effects. */ + break; } } } @@ -4816,7 +4820,8 @@ static void allocate_temp_registers_recurse(struct hlsl_ctx *ctx, } } -static void record_constant(struct hlsl_ctx *ctx, unsigned int component_index, float f) +static void record_constant(struct hlsl_ctx *ctx, unsigned int component_index, float f, + const struct vkd3d_shader_location *loc) { struct hlsl_constant_defs *defs = &ctx->constant_defs; struct hlsl_constant_register *reg; @@ -4838,6 +4843,7 @@ static void record_constant(struct hlsl_ctx *ctx, unsigned int component_index, memset(reg, 0, sizeof(*reg)); reg->index = component_index / 4; reg->value.f[component_index % 4] = f; + reg->loc = *loc; } static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, @@ -4898,7 +4904,7 @@ static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, vkd3d_unreachable(); } - record_constant(ctx, constant->reg.id * 4 + x, f); + record_constant(ctx, constant->reg.id * 4 + x, f, &constant->node.loc); } break; @@ -4991,17 +4997,17 @@ static void allocate_sincos_const_registers(struct hlsl_ctx *ctx, struct hlsl_bl ctx->d3dsincosconst1 = allocate_numeric_registers_for_type(ctx, allocator, 1, UINT_MAX, type); TRACE("Allocated D3DSINCOSCONST1 to %s.\n", debug_register('c', ctx->d3dsincosconst1, type)); - record_constant(ctx, ctx->d3dsincosconst1.id * 4 + 0, -1.55009923e-06f); - record_constant(ctx, ctx->d3dsincosconst1.id * 4 + 1, -2.17013894e-05f); - record_constant(ctx, ctx->d3dsincosconst1.id * 4 + 2, 2.60416674e-03f); - record_constant(ctx, ctx->d3dsincosconst1.id * 4 + 3, 2.60416680e-04f); + record_constant(ctx, ctx->d3dsincosconst1.id * 4 + 0, -1.55009923e-06f, &instr->loc); + record_constant(ctx, ctx->d3dsincosconst1.id * 4 + 1, -2.17013894e-05f, &instr->loc); + record_constant(ctx, ctx->d3dsincosconst1.id * 4 + 2, 2.60416674e-03f, &instr->loc); + record_constant(ctx, ctx->d3dsincosconst1.id * 4 + 3, 2.60416680e-04f, &instr->loc); ctx->d3dsincosconst2 = allocate_numeric_registers_for_type(ctx, allocator, 1, UINT_MAX, type); TRACE("Allocated D3DSINCOSCONST2 to %s.\n", debug_register('c', ctx->d3dsincosconst2, type)); - record_constant(ctx, ctx->d3dsincosconst2.id * 4 + 0, -2.08333340e-02f); - record_constant(ctx, ctx->d3dsincosconst2.id * 4 + 1, -1.25000000e-01f); - record_constant(ctx, ctx->d3dsincosconst2.id * 4 + 2, 1.00000000e+00f); - record_constant(ctx, ctx->d3dsincosconst2.id * 4 + 3, 5.00000000e-01f); + record_constant(ctx, ctx->d3dsincosconst2.id * 4 + 0, -2.08333340e-02f, &instr->loc); + record_constant(ctx, ctx->d3dsincosconst2.id * 4 + 1, -1.25000000e-01f, &instr->loc); + record_constant(ctx, ctx->d3dsincosconst2.id * 4 + 2, 1.00000000e+00f, &instr->loc); + record_constant(ctx, ctx->d3dsincosconst2.id * 4 + 3, 5.00000000e-01f, &instr->loc); return; } @@ -5128,7 +5134,7 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var if (ctx->profile->major_version < 4) { struct vkd3d_shader_version version; - D3DDECLUSAGE usage; + enum vkd3d_decl_usage usage; uint32_t usage_idx; /* ps_1_* outputs are special and go in temp register 0. */ @@ -5152,10 +5158,10 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var } else { - D3D_NAME usage; + enum vkd3d_shader_sysval_semantic semantic; bool has_idx; - if (!hlsl_sm4_usage_from_semantic(ctx, &var->semantic, output, &usage)) + if (!sysval_semantic_from_hlsl(&semantic, ctx, &var->semantic, output)) { hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC, "Invalid semantic '%s'.", var->semantic.name); @@ -5786,6 +5792,26 @@ struct hlsl_reg hlsl_reg_from_deref(struct hlsl_ctx *ctx, const struct hlsl_dere return ret; } +static const char *get_string_argument_value(struct hlsl_ctx *ctx, const struct hlsl_attribute *attr, unsigned int i) +{ + const struct hlsl_ir_node *instr = attr->args[i].node; + const struct hlsl_type *type = instr->data_type; + + if (type->class != HLSL_CLASS_STRING) + { + struct vkd3d_string_buffer *string; + + if ((string = hlsl_type_to_string(ctx, type))) + hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Wrong type for the argument %u of [%s]: expected string, but got %s.", + i, attr->name, string->buffer); + hlsl_release_string_buffer(ctx, string); + return NULL; + } + + return hlsl_ir_string_constant(instr)->string; +} + static void parse_numthreads_attribute(struct hlsl_ctx *ctx, const struct hlsl_attribute *attr) { unsigned int i; @@ -5834,6 +5860,263 @@ static void parse_numthreads_attribute(struct hlsl_ctx *ctx, const struct hlsl_a } } +static void parse_domain_attribute(struct hlsl_ctx *ctx, const struct hlsl_attribute *attr) +{ + const char *value; + + if (attr->args_count != 1) + { + hlsl_error(ctx, &attr->loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, + "Expected 1 parameter for [domain] attribute, but got %u.", attr->args_count); + return; + } + + if (!(value = get_string_argument_value(ctx, attr, 0))) + return; + + if (!strcmp(value, "isoline")) + ctx->domain = VKD3D_TESSELLATOR_DOMAIN_LINE; + else if (!strcmp(value, "tri")) + ctx->domain = VKD3D_TESSELLATOR_DOMAIN_TRIANGLE; + else if (!strcmp(value, "quad")) + ctx->domain = VKD3D_TESSELLATOR_DOMAIN_QUAD; + else + hlsl_error(ctx, &attr->args[0].node->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_DOMAIN, + "Invalid tessellator domain \"%s\": expected \"isoline\", \"tri\", or \"quad\".", + value); +} + +static void parse_outputcontrolpoints_attribute(struct hlsl_ctx *ctx, const struct hlsl_attribute *attr) +{ + const struct hlsl_ir_node *instr; + const struct hlsl_type *type; + const struct hlsl_ir_constant *constant; + + if (attr->args_count != 1) + { + hlsl_error(ctx, &attr->loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, + "Expected 1 parameter for [outputcontrolpoints] attribute, but got %u.", attr->args_count); + return; + } + + instr = attr->args[0].node; + type = instr->data_type; + + if (type->class != HLSL_CLASS_SCALAR + || (type->e.numeric.type != HLSL_TYPE_INT && type->e.numeric.type != HLSL_TYPE_UINT)) + { + struct vkd3d_string_buffer *string; + + if ((string = hlsl_type_to_string(ctx, type))) + hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Wrong type for argument 0 of [outputcontrolpoints]: expected int or uint, but got %s.", + string->buffer); + hlsl_release_string_buffer(ctx, string); + return; + } + + if (instr->type != HLSL_IR_CONSTANT) + { + hlsl_fixme(ctx, &instr->loc, "Non-constant expression in [outputcontrolpoints] initializer."); + return; + } + constant = hlsl_ir_constant(instr); + + if ((type->e.numeric.type == HLSL_TYPE_INT && constant->value.u[0].i < 0) + || constant->value.u[0].u > 32) + hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_CONTROL_POINT_COUNT, + "Output control point count must be between 0 and 32."); + + ctx->output_control_point_count = constant->value.u[0].u; +} + +static void parse_outputtopology_attribute(struct hlsl_ctx *ctx, const struct hlsl_attribute *attr) +{ + const char *value; + + if (attr->args_count != 1) + { + hlsl_error(ctx, &attr->loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, + "Expected 1 parameter for [outputtopology] attribute, but got %u.", attr->args_count); + return; + } + + if (!(value = get_string_argument_value(ctx, attr, 0))) + return; + + if (!strcmp(value, "point")) + ctx->output_primitive = VKD3D_SHADER_TESSELLATOR_OUTPUT_POINT; + else if (!strcmp(value, "line")) + ctx->output_primitive = VKD3D_SHADER_TESSELLATOR_OUTPUT_LINE; + else if (!strcmp(value, "triangle_cw")) + ctx->output_primitive = VKD3D_SHADER_TESSELLATOR_OUTPUT_TRIANGLE_CW; + else if (!strcmp(value, "triangle_ccw")) + ctx->output_primitive = VKD3D_SHADER_TESSELLATOR_OUTPUT_TRIANGLE_CCW; + else + hlsl_error(ctx, &attr->args[0].node->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_OUTPUT_PRIMITIVE, + "Invalid tessellator output topology \"%s\": " + "expected \"point\", \"line\", \"triangle_cw\", or \"triangle_ccw\".", value); +} + +static void parse_partitioning_attribute(struct hlsl_ctx *ctx, const struct hlsl_attribute *attr) +{ + const char *value; + + if (attr->args_count != 1) + { + hlsl_error(ctx, &attr->loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, + "Expected 1 parameter for [partitioning] attribute, but got %u.", attr->args_count); + return; + } + + if (!(value = get_string_argument_value(ctx, attr, 0))) + return; + + if (!strcmp(value, "integer")) + ctx->partitioning = VKD3D_SHADER_TESSELLATOR_PARTITIONING_INTEGER; + else if (!strcmp(value, "pow2")) + ctx->partitioning = VKD3D_SHADER_TESSELLATOR_PARTITIONING_POW2; + else if (!strcmp(value, "fractional_even")) + ctx->partitioning = VKD3D_SHADER_TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN; + else if (!strcmp(value, "fractional_odd")) + ctx->partitioning = VKD3D_SHADER_TESSELLATOR_PARTITIONING_FRACTIONAL_ODD; + else + hlsl_error(ctx, &attr->args[0].node->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_PARTITIONING, + "Invalid tessellator partitioning \"%s\": " + "expected \"integer\", \"pow2\", \"fractional_even\", or \"fractional_odd\".", value); +} + +static void parse_patchconstantfunc_attribute(struct hlsl_ctx *ctx, const struct hlsl_attribute *attr) +{ + const char *name; + struct hlsl_ir_function *func; + struct hlsl_ir_function_decl *decl; + + if (attr->args_count != 1) + { + hlsl_error(ctx, &attr->loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, + "Expected 1 parameter for [patchconstantfunc] attribute, but got %u.", attr->args_count); + return; + } + + if (!(name = get_string_argument_value(ctx, attr, 0))) + return; + + ctx->patch_constant_func = NULL; + if ((func = hlsl_get_function(ctx, name))) + { + /* Pick the last overload with a body. */ + LIST_FOR_EACH_ENTRY_REV(decl, &func->overloads, struct hlsl_ir_function_decl, entry) + { + if (decl->has_body) + { + ctx->patch_constant_func = decl; + break; + } + } + } + + if (!ctx->patch_constant_func) + hlsl_error(ctx, &attr->loc, VKD3D_SHADER_ERROR_HLSL_NOT_DEFINED, + "Patch constant function \"%s\" is not defined.", name); +} + +static void parse_entry_function_attributes(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) +{ + const struct hlsl_profile_info *profile = ctx->profile; + unsigned int i; + + for (i = 0; i < entry_func->attr_count; ++i) + { + const struct hlsl_attribute *attr = entry_func->attrs[i]; + + if (!strcmp(attr->name, "numthreads") && profile->type == VKD3D_SHADER_TYPE_COMPUTE) + parse_numthreads_attribute(ctx, attr); + else if (!strcmp(attr->name, "domain") + && (profile->type == VKD3D_SHADER_TYPE_HULL || profile->type == VKD3D_SHADER_TYPE_DOMAIN)) + parse_domain_attribute(ctx, attr); + else if (!strcmp(attr->name, "outputcontrolpoints") && profile->type == VKD3D_SHADER_TYPE_HULL) + parse_outputcontrolpoints_attribute(ctx, attr); + else if (!strcmp(attr->name, "outputtopology") && profile->type == VKD3D_SHADER_TYPE_HULL) + parse_outputtopology_attribute(ctx, attr); + else if (!strcmp(attr->name, "partitioning") && profile->type == VKD3D_SHADER_TYPE_HULL) + parse_partitioning_attribute(ctx, attr); + else if (!strcmp(attr->name, "patchconstantfunc") && profile->type == VKD3D_SHADER_TYPE_HULL) + parse_patchconstantfunc_attribute(ctx, attr); + else if (!strcmp(attr->name, "earlydepthstencil") && profile->type == VKD3D_SHADER_TYPE_PIXEL) + entry_func->early_depth_test = true; + else + hlsl_warning(ctx, &entry_func->attrs[i]->loc, VKD3D_SHADER_WARNING_HLSL_UNKNOWN_ATTRIBUTE, + "Ignoring unknown attribute \"%s\".", entry_func->attrs[i]->name); + } +} + +static void validate_hull_shader_attributes(struct hlsl_ctx *ctx, const struct hlsl_ir_function_decl *entry_func) +{ + if (ctx->domain == VKD3D_TESSELLATOR_DOMAIN_INVALID) + { + hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_ATTRIBUTE, + "Entry point \"%s\" is missing a [domain] attribute.", entry_func->func->name); + } + + if (ctx->output_control_point_count == UINT_MAX) + { + hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_ATTRIBUTE, + "Entry point \"%s\" is missing a [outputcontrolpoints] attribute.", entry_func->func->name); + } + + if (!ctx->output_primitive) + { + hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_ATTRIBUTE, + "Entry point \"%s\" is missing a [outputtopology] attribute.", entry_func->func->name); + } + + if (!ctx->partitioning) + { + hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_ATTRIBUTE, + "Entry point \"%s\" is missing a [partitioning] attribute.", entry_func->func->name); + } + + if (!ctx->patch_constant_func) + { + hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_ATTRIBUTE, + "Entry point \"%s\" is missing a [patchconstantfunc] attribute.", entry_func->func->name); + } + else if (ctx->patch_constant_func == entry_func) + { + hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_RECURSIVE_CALL, + "Patch constant function cannot be the entry point function."); + /* Native returns E_NOTIMPL instead of E_FAIL here. */ + ctx->result = VKD3D_ERROR_NOT_IMPLEMENTED; + return; + } + + switch (ctx->domain) + { + case VKD3D_TESSELLATOR_DOMAIN_LINE: + if (ctx->output_primitive == VKD3D_SHADER_TESSELLATOR_OUTPUT_TRIANGLE_CW + || ctx->output_primitive == VKD3D_SHADER_TESSELLATOR_OUTPUT_TRIANGLE_CCW) + hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_OUTPUT_PRIMITIVE, + "Triangle output topologies are not available for isoline domains."); + break; + + case VKD3D_TESSELLATOR_DOMAIN_TRIANGLE: + if (ctx->output_primitive == VKD3D_SHADER_TESSELLATOR_OUTPUT_LINE) + hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_OUTPUT_PRIMITIVE, + "Line output topologies are not available for triangle domains."); + break; + + case VKD3D_TESSELLATOR_DOMAIN_QUAD: + if (ctx->output_primitive == VKD3D_SHADER_TESSELLATOR_OUTPUT_LINE) + hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_OUTPUT_PRIMITIVE, + "Line output topologies are not available for quad domains."); + break; + + default: + break; + } +} + static void remove_unreachable_code(struct hlsl_ctx *ctx, struct hlsl_block *body) { struct hlsl_ir_node *instr, *next; @@ -5956,8 +6239,8 @@ static void sm1_generate_vsir_signature_entry(struct hlsl_ctx *ctx, if (!hlsl_sm1_register_from_semantic(&program->shader_version, var->semantic.name, var->semantic.index, output, &type, ®ister_index)) { + enum vkd3d_decl_usage usage; unsigned int usage_idx; - D3DDECLUSAGE usage; bool ret; register_index = var->regs[HLSL_REGSET_NUMERIC].id; @@ -5969,7 +6252,7 @@ static void sm1_generate_vsir_signature_entry(struct hlsl_ctx *ctx, * SV_Position; the closer equivalent is VPOS, which is not declared * as a semantic. */ if (program->shader_version.type == VKD3D_SHADER_TYPE_VERTEX - && output && usage == D3DDECLUSAGE_POSITION) + && output && usage == VKD3D_DECL_USAGE_POSITION) sysval = VKD3D_SHADER_SV_POSITION; } mask = (1 << var->data_type->dimx) - 1; @@ -6006,104 +6289,1018 @@ static void sm1_generate_vsir_signature(struct hlsl_ctx *ctx, struct vsir_progra } } -/* OBJECTIVE: Translate all the information from ctx and entry_func to the - * vsir_program and ctab blob, so they can be used as input to d3dbc_compile() - * without relying on ctx and entry_func. */ -static void sm1_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, - uint64_t config_flags, struct vsir_program *program, struct vkd3d_shader_code *ctab) +static uint32_t sm1_generate_vsir_get_src_swizzle(uint32_t src_writemask, uint32_t dst_writemask) { - struct vkd3d_shader_version version = {0}; - struct vkd3d_bytecode_buffer buffer = {0}; - - version.major = ctx->profile->major_version; - version.minor = ctx->profile->minor_version; - version.type = ctx->profile->type; - if (!vsir_program_init(program, NULL, &version, 0)) - { - ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; - return; - } + uint32_t swizzle; - write_sm1_uniforms(ctx, &buffer); - if (buffer.status) - { - vkd3d_free(buffer.data); - ctx->result = buffer.status; - return; - } - ctab->code = buffer.data; - ctab->size = buffer.size; - - sm1_generate_vsir_signature(ctx, program); + swizzle = hlsl_swizzle_from_writemask(src_writemask); + swizzle = hlsl_map_swizzle(swizzle, dst_writemask); + swizzle = vsir_swizzle_from_hlsl(swizzle); + return swizzle; } -static struct hlsl_ir_jump *loop_unrolling_find_jump(struct hlsl_block *block, struct hlsl_ir_node *stop_point, - struct hlsl_block **found_block) +static void sm1_generate_vsir_constant_defs(struct hlsl_ctx *ctx, struct vsir_program *program, + struct hlsl_block *block) { - struct hlsl_ir_node *node; + struct vkd3d_shader_instruction_array *instructions = &program->instructions; + struct vkd3d_shader_dst_param *dst_param; + struct vkd3d_shader_src_param *src_param; + struct vkd3d_shader_instruction *ins; + unsigned int i, x; - LIST_FOR_EACH_ENTRY(node, &block->instrs, struct hlsl_ir_node, entry) + for (i = 0; i < ctx->constant_defs.count; ++i) { - if (node == stop_point) - return NULL; + const struct hlsl_constant_register *constant_reg = &ctx->constant_defs.regs[i]; - if (node->type == HLSL_IR_IF) + if (!shader_instruction_array_reserve(instructions, instructions->count + 1)) { - struct hlsl_ir_if *iff = hlsl_ir_if(node); - struct hlsl_ir_jump *jump = NULL; - - if ((jump = loop_unrolling_find_jump(&iff->then_block, stop_point, found_block))) - return jump; - if ((jump = loop_unrolling_find_jump(&iff->else_block, stop_point, found_block))) - return jump; + ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; + return; } - else if (node->type == HLSL_IR_JUMP) + + ins = &instructions->elements[instructions->count]; + if (!vsir_instruction_init_with_params(program, ins, &constant_reg->loc, VKD3DSIH_DEF, 1, 1)) { - struct hlsl_ir_jump *jump = hlsl_ir_jump(node); + ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; + return; + } + ++instructions->count; + + dst_param = &ins->dst[0]; + vsir_register_init(&dst_param->reg, VKD3DSPR_CONST, VKD3D_DATA_FLOAT, 1); + ins->dst[0].reg.dimension = VSIR_DIMENSION_VEC4; + ins->dst[0].reg.idx[0].offset = constant_reg->index; + ins->dst[0].write_mask = VKD3DSP_WRITEMASK_ALL; + + src_param = &ins->src[0]; + vsir_register_init(&src_param->reg, VKD3DSPR_IMMCONST, VKD3D_DATA_FLOAT, 0); + src_param->reg.type = VKD3DSPR_IMMCONST; + src_param->reg.precision = VKD3D_SHADER_REGISTER_PRECISION_DEFAULT; + src_param->reg.non_uniform = false; + src_param->reg.data_type = VKD3D_DATA_FLOAT; + src_param->reg.dimension = VSIR_DIMENSION_VEC4; + for (x = 0; x < 4; ++x) + src_param->reg.u.immconst_f32[x] = constant_reg->value.f[x]; + src_param->swizzle = VKD3D_SHADER_NO_SWIZZLE; + } +} + +static void sm1_generate_vsir_sampler_dcls(struct hlsl_ctx *ctx, + struct vsir_program *program, struct hlsl_block *block) +{ + struct vkd3d_shader_instruction_array *instructions = &program->instructions; + enum vkd3d_shader_resource_type resource_type; + struct vkd3d_shader_register_range *range; + struct vkd3d_shader_dst_param *dst_param; + struct vkd3d_shader_semantic *semantic; + struct vkd3d_shader_instruction *ins; + enum hlsl_sampler_dim sampler_dim; + struct hlsl_ir_var *var; + unsigned int i, count; - if (jump->type == HLSL_IR_JUMP_BREAK || jump->type == HLSL_IR_JUMP_CONTINUE) + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + if (!var->regs[HLSL_REGSET_SAMPLERS].allocated) + continue; + + count = var->bind_count[HLSL_REGSET_SAMPLERS]; + for (i = 0; i < count; ++i) + { + if (var->objects_usage[HLSL_REGSET_SAMPLERS][i].used) { - *found_block = block; - return jump; + sampler_dim = var->objects_usage[HLSL_REGSET_SAMPLERS][i].sampler_dim; + + switch (sampler_dim) + { + case HLSL_SAMPLER_DIM_2D: + resource_type = VKD3D_SHADER_RESOURCE_TEXTURE_2D; + break; + + case HLSL_SAMPLER_DIM_CUBE: + resource_type = VKD3D_SHADER_RESOURCE_TEXTURE_CUBE; + break; + + case HLSL_SAMPLER_DIM_3D: + resource_type = VKD3D_SHADER_RESOURCE_TEXTURE_3D; + break; + + case HLSL_SAMPLER_DIM_GENERIC: + /* These can appear in sm4-style combined sample instructions. */ + hlsl_fixme(ctx, &var->loc, "Generic samplers need to be lowered."); + continue; + + default: + vkd3d_unreachable(); + break; + } + + if (!shader_instruction_array_reserve(instructions, instructions->count + 1)) + { + ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; + return; + } + + ins = &instructions->elements[instructions->count]; + if (!vsir_instruction_init_with_params(program, ins, &var->loc, VKD3DSIH_DCL, 0, 0)) + { + ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; + return; + } + ++instructions->count; + + semantic = &ins->declaration.semantic; + semantic->resource_type = resource_type; + + dst_param = &semantic->resource.reg; + vsir_register_init(&dst_param->reg, VKD3DSPR_SAMPLER, VKD3D_DATA_FLOAT, 1); + dst_param->reg.dimension = VSIR_DIMENSION_NONE; + dst_param->reg.idx[0].offset = var->regs[HLSL_REGSET_SAMPLERS].index + i; + dst_param->write_mask = 0; + range = &semantic->resource.range; + range->space = 0; + range->first = range->last = dst_param->reg.idx[0].offset; } } } +} - return NULL; +static struct vkd3d_shader_instruction *generate_vsir_add_program_instruction( + struct hlsl_ctx *ctx, struct vsir_program *program, + const struct vkd3d_shader_location *loc, enum vkd3d_shader_opcode opcode, + unsigned int dst_count, unsigned int src_count) +{ + struct vkd3d_shader_instruction_array *instructions = &program->instructions; + struct vkd3d_shader_instruction *ins; + + if (!shader_instruction_array_reserve(instructions, instructions->count + 1)) + { + ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; + return NULL; + } + ins = &instructions->elements[instructions->count]; + if (!vsir_instruction_init_with_params(program, ins, loc, opcode, dst_count, src_count)) + { + ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; + return NULL; + } + ++instructions->count; + return ins; } -static unsigned int loop_unrolling_get_max_iterations(struct hlsl_ctx *ctx, struct hlsl_ir_loop *loop) +static void sm1_generate_vsir_instr_constant(struct hlsl_ctx *ctx, + struct vsir_program *program, struct hlsl_ir_constant *constant) { - /* Always use the explicit limit if it has been passed. */ - if (loop->unroll_limit) - return loop->unroll_limit; + struct hlsl_ir_node *instr = &constant->node; + struct vkd3d_shader_dst_param *dst_param; + struct vkd3d_shader_src_param *src_param; + struct vkd3d_shader_instruction *ins; - /* All SMs will default to 1024 if [unroll] has been specified without an explicit limit. */ - if (loop->unroll_type == HLSL_IR_LOOP_FORCE_UNROLL) - return 1024; + VKD3D_ASSERT(instr->reg.allocated); + VKD3D_ASSERT(constant->reg.allocated); - /* SM4 limits implicit unrolling to 254 iterations. */ - if (hlsl_version_ge(ctx, 4, 0)) - return 254; + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_MOV, 1, 1))) + return; - /* SM<3 implicitly unrolls up to 1024 iterations. */ - return 1024; + src_param = &ins->src[0]; + vsir_register_init(&src_param->reg, VKD3DSPR_CONST, VKD3D_DATA_FLOAT, 1); + src_param->reg.idx[0].offset = constant->reg.id; + src_param->swizzle = sm1_generate_vsir_get_src_swizzle(constant->reg.writemask, instr->reg.writemask); + + dst_param = &ins->dst[0]; + vsir_register_init(&dst_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); + dst_param->reg.idx[0].offset = instr->reg.id; + dst_param->write_mask = instr->reg.writemask; } -static bool loop_unrolling_unroll_loop(struct hlsl_ctx *ctx, struct hlsl_block *block, - struct hlsl_block *loop_parent, struct hlsl_ir_loop *loop) +/* Translate ops that can be mapped to a single vsir instruction with only one dst register. */ +static void sm1_generate_vsir_instr_expr_single_instr_op(struct hlsl_ctx *ctx, struct vsir_program *program, + struct hlsl_ir_expr *expr, enum vkd3d_shader_opcode opcode, uint32_t src_mod, uint32_t dst_mod, + bool map_src_swizzles) { - unsigned int max_iterations, i; + struct hlsl_ir_node *instr = &expr->node; + struct vkd3d_shader_dst_param *dst_param; + struct vkd3d_shader_src_param *src_param; + struct vkd3d_shader_instruction *ins; + unsigned int i, src_count = 0; - max_iterations = loop_unrolling_get_max_iterations(ctx, loop); + VKD3D_ASSERT(instr->reg.allocated); - for (i = 0; i < max_iterations; ++i) + for (i = 0; i < HLSL_MAX_OPERANDS; ++i) { - struct hlsl_block tmp_dst, *jump_block; - struct hlsl_ir_jump *jump = NULL; + if (expr->operands[i].node) + src_count = i + 1; + } + VKD3D_ASSERT(!src_mod || src_count == 1); - if (!hlsl_clone_block(ctx, &tmp_dst, &loop->body)) - return false; + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, opcode, 1, src_count))) + return; + + dst_param = &ins->dst[0]; + vsir_register_init(&dst_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); + dst_param->reg.idx[0].offset = instr->reg.id; + dst_param->write_mask = instr->reg.writemask; + dst_param->modifiers = dst_mod; + + for (i = 0; i < src_count; ++i) + { + struct hlsl_ir_node *operand = expr->operands[i].node; + + src_param = &ins->src[i]; + vsir_register_init(&src_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); + src_param->reg.idx[0].offset = operand->reg.id; + src_param->swizzle = sm1_generate_vsir_get_src_swizzle(operand->reg.writemask, + map_src_swizzles ? dst_param->write_mask : VKD3DSP_WRITEMASK_ALL); + src_param->modifiers = src_mod; + } +} + +/* Translate ops that have 1 src and need one instruction for each component in + * the d3dbc backend. */ +static void sm1_generate_vsir_instr_expr_per_component_instr_op(struct hlsl_ctx *ctx, + struct vsir_program *program, struct hlsl_ir_expr *expr, enum vkd3d_shader_opcode opcode) +{ + struct hlsl_ir_node *operand = expr->operands[0].node; + struct hlsl_ir_node *instr = &expr->node; + struct vkd3d_shader_dst_param *dst_param; + struct vkd3d_shader_src_param *src_param; + struct vkd3d_shader_instruction *ins; + uint32_t src_swizzle; + unsigned int i, c; + + VKD3D_ASSERT(instr->reg.allocated); + VKD3D_ASSERT(operand); + + src_swizzle = sm1_generate_vsir_get_src_swizzle(operand->reg.writemask, instr->reg.writemask); + for (i = 0; i < 4; ++i) + { + if (instr->reg.writemask & (1u << i)) + { + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, opcode, 1, 1))) + return; + + dst_param = &ins->dst[0]; + vsir_register_init(&dst_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); + dst_param->reg.idx[0].offset = instr->reg.id; + dst_param->write_mask = 1u << i; + + src_param = &ins->src[0]; + vsir_register_init(&src_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); + src_param->reg.idx[0].offset = operand->reg.id; + c = vsir_swizzle_get_component(src_swizzle, i); + src_param->swizzle = vsir_swizzle_from_writemask(1u << c); + } + } +} + +static void sm1_generate_vsir_instr_expr_sincos(struct hlsl_ctx *ctx, struct vsir_program *program, + struct hlsl_ir_expr *expr) +{ + struct hlsl_ir_node *operand = expr->operands[0].node; + struct hlsl_ir_node *instr = &expr->node; + struct vkd3d_shader_dst_param *dst_param; + struct vkd3d_shader_src_param *src_param; + struct vkd3d_shader_instruction *ins; + unsigned int src_count = 0; + + VKD3D_ASSERT(instr->reg.allocated); + src_count = (ctx->profile->major_version < 3) ? 3 : 1; + + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_SINCOS, 1, src_count))) + return; + + dst_param = &ins->dst[0]; + vsir_register_init(&dst_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); + dst_param->reg.idx[0].offset = instr->reg.id; + dst_param->write_mask = instr->reg.writemask; + + src_param = &ins->src[0]; + vsir_register_init(&src_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); + src_param->reg.idx[0].offset = operand->reg.id; + src_param->swizzle = sm1_generate_vsir_get_src_swizzle(operand->reg.writemask, VKD3DSP_WRITEMASK_ALL); + + if (ctx->profile->major_version < 3) + { + src_param = &ins->src[1]; + vsir_register_init(&src_param->reg, VKD3DSPR_CONST, VKD3D_DATA_FLOAT, 1); + src_param->reg.idx[0].offset = ctx->d3dsincosconst1.id; + src_param->swizzle = VKD3D_SHADER_NO_SWIZZLE; + + src_param = &ins->src[1]; + vsir_register_init(&src_param->reg, VKD3DSPR_CONST, VKD3D_DATA_FLOAT, 1); + src_param->reg.idx[0].offset = ctx->d3dsincosconst2.id; + src_param->swizzle = VKD3D_SHADER_NO_SWIZZLE; + } +} + +static bool sm1_generate_vsir_instr_expr_cast(struct hlsl_ctx *ctx, + struct vsir_program *program, struct hlsl_ir_expr *expr) +{ + const struct hlsl_type *src_type, *dst_type; + const struct hlsl_ir_node *arg1, *instr; + + arg1 = expr->operands[0].node; + src_type = arg1->data_type; + instr = &expr->node; + dst_type = instr->data_type; + + /* Narrowing casts were already lowered. */ + VKD3D_ASSERT(src_type->dimx == dst_type->dimx); + + switch (dst_type->e.numeric.type) + { + case HLSL_TYPE_HALF: + case HLSL_TYPE_FLOAT: + switch (src_type->e.numeric.type) + { + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: + case HLSL_TYPE_BOOL: + /* Integrals are internally represented as floats, so no change is necessary.*/ + case HLSL_TYPE_HALF: + case HLSL_TYPE_FLOAT: + sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, 0, true); + return true; + + case HLSL_TYPE_DOUBLE: + hlsl_fixme(ctx, &instr->loc, "SM1 cast from double to float."); + break; + + default: + vkd3d_unreachable(); + } + break; + + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: + switch(src_type->e.numeric.type) + { + case HLSL_TYPE_HALF: + case HLSL_TYPE_FLOAT: + /* A compilation pass turns these into FLOOR+REINTERPRET, so we should not + * reach this case unless we are missing something. */ + hlsl_fixme(ctx, &instr->loc, "Unlowered SM1 cast from float to integer."); + break; + + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: + sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, 0, true); + return true; + + case HLSL_TYPE_BOOL: + hlsl_fixme(ctx, &instr->loc, "SM1 cast from bool to integer."); + break; + + case HLSL_TYPE_DOUBLE: + hlsl_fixme(ctx, &instr->loc, "SM1 cast from double to integer."); + break; + + default: + vkd3d_unreachable(); + } + break; + + case HLSL_TYPE_DOUBLE: + hlsl_fixme(ctx, &instr->loc, "SM1 cast to double."); + break; + + case HLSL_TYPE_BOOL: + /* Casts to bool should have already been lowered. */ + default: + hlsl_fixme(ctx, &expr->node.loc, "SM1 cast from %s to %s.", + debug_hlsl_type(ctx, src_type), debug_hlsl_type(ctx, dst_type)); + break; + } + + return false; +} + +static bool sm1_generate_vsir_instr_expr(struct hlsl_ctx *ctx, struct vsir_program *program, + struct hlsl_ir_expr *expr) +{ + struct hlsl_ir_node *instr = &expr->node; + + if (expr->op != HLSL_OP1_REINTERPRET && expr->op != HLSL_OP1_CAST + && instr->data_type->e.numeric.type != HLSL_TYPE_FLOAT) + { + /* These need to be lowered. */ + hlsl_fixme(ctx, &instr->loc, "SM1 non-float expression."); + return false; + } + + switch (expr->op) + { + case HLSL_OP1_ABS: + sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_ABS, 0, 0, true); + break; + + case HLSL_OP1_CAST: + return sm1_generate_vsir_instr_expr_cast(ctx, program, expr); + + case HLSL_OP1_COS_REDUCED: + VKD3D_ASSERT(expr->node.reg.writemask == VKD3DSP_WRITEMASK_0); + sm1_generate_vsir_instr_expr_sincos(ctx, program, expr); + break; + + case HLSL_OP1_DSX: + sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DSX, 0, 0, true); + break; + + case HLSL_OP1_DSY: + sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DSY, 0, 0, true); + break; + + case HLSL_OP1_EXP2: + sm1_generate_vsir_instr_expr_per_component_instr_op(ctx, program, expr, VKD3DSIH_EXP); + break; + + case HLSL_OP1_LOG2: + sm1_generate_vsir_instr_expr_per_component_instr_op(ctx, program, expr, VKD3DSIH_LOG); + break; + + case HLSL_OP1_NEG: + sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, VKD3DSPSM_NEG, 0, true); + break; + + case HLSL_OP1_RCP: + sm1_generate_vsir_instr_expr_per_component_instr_op(ctx, program, expr, VKD3DSIH_RCP); + break; + + case HLSL_OP1_REINTERPRET: + sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, 0, true); + break; + + case HLSL_OP1_RSQ: + sm1_generate_vsir_instr_expr_per_component_instr_op(ctx, program, expr, VKD3DSIH_RSQ); + break; + + case HLSL_OP1_SAT: + sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, VKD3DSPDM_SATURATE, true); + break; + + case HLSL_OP1_SIN_REDUCED: + VKD3D_ASSERT(expr->node.reg.writemask == VKD3DSP_WRITEMASK_1); + sm1_generate_vsir_instr_expr_sincos(ctx, program, expr); + break; + + case HLSL_OP2_ADD: + sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_ADD, 0, 0, true); + break; + + case HLSL_OP2_DOT: + switch (expr->operands[0].node->data_type->dimx) + { + case 3: + sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DP3, 0, 0, false); + break; + + case 4: + sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DP4, 0, 0, false); + break; + + default: + vkd3d_unreachable(); + return false; + } + break; + + case HLSL_OP2_MAX: + sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MAX, 0, 0, true); + break; + + case HLSL_OP2_MIN: + sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MIN, 0, 0, true); + break; + + case HLSL_OP2_MUL: + sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MUL, 0, 0, true); + break; + + case HLSL_OP1_FRACT: + sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_FRC, 0, 0, true); + break; + + case HLSL_OP2_LOGIC_AND: + sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MIN, 0, 0, true); + break; + + case HLSL_OP2_LOGIC_OR: + sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MAX, 0, 0, true); + break; + + case HLSL_OP2_SLT: + sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_SLT, 0, 0, true); + break; + + case HLSL_OP3_CMP: + sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_CMP, 0, 0, true); + break; + + case HLSL_OP3_DP2ADD: + sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DP2ADD, 0, 0, false); + break; + + case HLSL_OP3_MAD: + sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MAD, 0, 0, true); + break; + + default: + hlsl_fixme(ctx, &instr->loc, "SM1 \"%s\" expression.", debug_hlsl_expr_op(expr->op)); + return false; + } + + return true; +} + +static void sm1_generate_vsir_init_dst_param_from_deref(struct hlsl_ctx *ctx, + struct vkd3d_shader_dst_param *dst_param, struct hlsl_deref *deref, + const struct vkd3d_shader_location *loc, unsigned int writemask) +{ + enum vkd3d_shader_register_type type = VKD3DSPR_TEMP; + struct vkd3d_shader_version version; + uint32_t register_index; + struct hlsl_reg reg; + + reg = hlsl_reg_from_deref(ctx, deref); + register_index = reg.id; + writemask = hlsl_combine_writemasks(reg.writemask, writemask); + + if (deref->var->is_output_semantic) + { + version.major = ctx->profile->major_version; + version.minor = ctx->profile->minor_version; + version.type = ctx->profile->type; + + if (version.type == VKD3D_SHADER_TYPE_PIXEL && version.major == 1) + { + type = VKD3DSPR_TEMP; + register_index = 0; + } + else if (!hlsl_sm1_register_from_semantic(&version, deref->var->semantic.name, + deref->var->semantic.index, true, &type, ®ister_index)) + { + VKD3D_ASSERT(reg.allocated); + type = VKD3DSPR_OUTPUT; + register_index = reg.id; + } + else + writemask = (1u << deref->var->data_type->dimx) - 1; + } + else + VKD3D_ASSERT(reg.allocated); + + vsir_register_init(&dst_param->reg, type, VKD3D_DATA_FLOAT, 1); + dst_param->write_mask = writemask; + dst_param->reg.idx[0].offset = register_index; + + if (deref->rel_offset.node) + hlsl_fixme(ctx, loc, "Translate relative addressing on dst register for vsir."); +} + +static void sm1_generate_vsir_init_src_param_from_deref(struct hlsl_ctx *ctx, + struct vkd3d_shader_src_param *src_param, struct hlsl_deref *deref, + unsigned int dst_writemask, const struct vkd3d_shader_location *loc) +{ + enum vkd3d_shader_register_type type = VKD3DSPR_TEMP; + struct vkd3d_shader_version version; + uint32_t register_index; + unsigned int writemask; + struct hlsl_reg reg; + + if (hlsl_type_is_resource(deref->var->data_type)) + { + unsigned int sampler_offset; + + type = VKD3DSPR_COMBINED_SAMPLER; + + sampler_offset = hlsl_offset_from_deref_safe(ctx, deref); + register_index = deref->var->regs[HLSL_REGSET_SAMPLERS].index + sampler_offset; + writemask = VKD3DSP_WRITEMASK_ALL; + } + else if (deref->var->is_uniform) + { + type = VKD3DSPR_CONST; + + reg = hlsl_reg_from_deref(ctx, deref); + register_index = reg.id; + writemask = reg.writemask; + VKD3D_ASSERT(reg.allocated); + } + else if (deref->var->is_input_semantic) + { + version.major = ctx->profile->major_version; + version.minor = ctx->profile->minor_version; + version.type = ctx->profile->type; + if (hlsl_sm1_register_from_semantic(&version, deref->var->semantic.name, + deref->var->semantic.index, false, &type, ®ister_index)) + { + writemask = (1 << deref->var->data_type->dimx) - 1; + } + else + { + type = VKD3DSPR_INPUT; + + reg = hlsl_reg_from_deref(ctx, deref); + register_index = reg.id; + writemask = reg.writemask; + VKD3D_ASSERT(reg.allocated); + } + } + else + { + type = VKD3DSPR_TEMP; + + reg = hlsl_reg_from_deref(ctx, deref); + register_index = reg.id; + writemask = reg.writemask; + } + + vsir_register_init(&src_param->reg, type, VKD3D_DATA_FLOAT, 1); + src_param->reg.idx[0].offset = register_index; + src_param->swizzle = sm1_generate_vsir_get_src_swizzle(writemask, dst_writemask); + + if (deref->rel_offset.node) + hlsl_fixme(ctx, loc, "Translate relative addressing on src register for vsir."); +} + +static void sm1_generate_vsir_instr_load(struct hlsl_ctx *ctx, struct vsir_program *program, + struct hlsl_ir_load *load) +{ + struct hlsl_ir_node *instr = &load->node; + struct vkd3d_shader_dst_param *dst_param; + struct vkd3d_shader_instruction *ins; + + VKD3D_ASSERT(instr->reg.allocated); + + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_MOV, 1, 1))) + return; + + dst_param = &ins->dst[0]; + vsir_register_init(&dst_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); + dst_param->reg.idx[0].offset = instr->reg.id; + dst_param->write_mask = instr->reg.writemask; + + sm1_generate_vsir_init_src_param_from_deref(ctx, &ins->src[0], &load->src, dst_param->write_mask, + &ins->location); +} + +static void sm1_generate_vsir_instr_resource_load(struct hlsl_ctx *ctx, + struct vsir_program *program, struct hlsl_ir_resource_load *load) +{ + struct hlsl_ir_node *coords = load->coords.node; + struct hlsl_ir_node *ddx = load->ddx.node; + struct hlsl_ir_node *ddy = load->ddy.node; + struct hlsl_ir_node *instr = &load->node; + struct vkd3d_shader_dst_param *dst_param; + struct vkd3d_shader_src_param *src_param; + struct vkd3d_shader_instruction *ins; + enum vkd3d_shader_opcode opcode; + unsigned int src_count = 2; + uint32_t flags = 0; + + VKD3D_ASSERT(instr->reg.allocated); + + switch (load->load_type) + { + case HLSL_RESOURCE_SAMPLE: + opcode = VKD3DSIH_TEX; + break; + + case HLSL_RESOURCE_SAMPLE_PROJ: + opcode = VKD3DSIH_TEX; + flags |= VKD3DSI_TEXLD_PROJECT; + break; + + case HLSL_RESOURCE_SAMPLE_LOD_BIAS: + opcode = VKD3DSIH_TEX; + flags |= VKD3DSI_TEXLD_BIAS; + break; + + case HLSL_RESOURCE_SAMPLE_GRAD: + opcode = VKD3DSIH_TEXLDD; + src_count += 2; + break; + + default: + hlsl_fixme(ctx, &instr->loc, "Resource load type %u.", load->load_type); + return; + } + + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, opcode, 1, src_count))) + return; + ins->flags = flags; + + dst_param = &ins->dst[0]; + vsir_register_init(&dst_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); + dst_param->reg.idx[0].offset = instr->reg.id; + dst_param->write_mask = instr->reg.writemask; + + src_param = &ins->src[0]; + vsir_register_init(&src_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); + src_param->reg.idx[0].offset = coords->reg.id; + src_param->swizzle = sm1_generate_vsir_get_src_swizzle(coords->reg.writemask, VKD3DSP_WRITEMASK_ALL); + + sm1_generate_vsir_init_src_param_from_deref(ctx, &ins->src[1], &load->resource, + VKD3DSP_WRITEMASK_ALL, &ins->location); + + if (load->load_type == HLSL_RESOURCE_SAMPLE_GRAD) + { + src_param = &ins->src[2]; + vsir_register_init(&src_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); + src_param->reg.idx[0].offset = ddx->reg.id; + src_param->swizzle = sm1_generate_vsir_get_src_swizzle(ddx->reg.writemask, VKD3DSP_WRITEMASK_ALL); + + src_param = &ins->src[3]; + vsir_register_init(&src_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); + src_param->reg.idx[0].offset = ddy->reg.id; + src_param->swizzle = sm1_generate_vsir_get_src_swizzle(ddy->reg.writemask, VKD3DSP_WRITEMASK_ALL); + } +} + +static void sm1_generate_vsir_instr_swizzle(struct hlsl_ctx *ctx, struct vsir_program *program, + struct hlsl_ir_swizzle *swizzle_instr) +{ + struct hlsl_ir_node *instr = &swizzle_instr->node, *val = swizzle_instr->val.node; + struct vkd3d_shader_dst_param *dst_param; + struct vkd3d_shader_src_param *src_param; + struct vkd3d_shader_instruction *ins; + uint32_t swizzle; + + VKD3D_ASSERT(instr->reg.allocated); + + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_MOV, 1, 1))) + return; + + dst_param = &ins->dst[0]; + vsir_register_init(&dst_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); + dst_param->reg.idx[0].offset = instr->reg.id; + dst_param->write_mask = instr->reg.writemask; + + swizzle = hlsl_swizzle_from_writemask(val->reg.writemask); + swizzle = hlsl_combine_swizzles(swizzle, swizzle_instr->swizzle, instr->data_type->dimx); + swizzle = hlsl_map_swizzle(swizzle, ins->dst[0].write_mask); + swizzle = vsir_swizzle_from_hlsl(swizzle); + + src_param = &ins->src[0]; + vsir_register_init(&src_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); + src_param->reg.idx[0].offset = val->reg.id; + src_param->swizzle = swizzle; +} + +static void sm1_generate_vsir_instr_store(struct hlsl_ctx *ctx, struct vsir_program *program, + struct hlsl_ir_store *store) +{ + struct hlsl_ir_node *rhs = store->rhs.node; + struct hlsl_ir_node *instr = &store->node; + struct vkd3d_shader_instruction *ins; + struct vkd3d_shader_src_param *src_param; + + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_MOV, 1, 1))) + return; + + sm1_generate_vsir_init_dst_param_from_deref(ctx, &ins->dst[0], &store->lhs, &ins->location, store->writemask); + + src_param = &ins->src[0]; + vsir_register_init(&src_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); + src_param->reg.idx[0].offset = rhs->reg.id; + src_param->swizzle = sm1_generate_vsir_get_src_swizzle(rhs->reg.writemask, ins->dst[0].write_mask); +} + +static void sm1_generate_vsir_instr_jump(struct hlsl_ctx *ctx, + struct vsir_program *program, struct hlsl_ir_jump *jump) +{ + struct hlsl_ir_node *condition = jump->condition.node; + struct hlsl_ir_node *instr = &jump->node; + struct vkd3d_shader_dst_param *dst_param; + struct vkd3d_shader_instruction *ins; + + if (jump->type == HLSL_IR_JUMP_DISCARD_NEG) + { + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_TEXKILL, 1, 0))) + return; + + dst_param = &ins->dst[0]; + vsir_register_init(&dst_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); + dst_param->reg.idx[0].offset = condition->reg.id; + dst_param->write_mask = condition->reg.writemask; + } + else + { + hlsl_fixme(ctx, &instr->loc, "Jump type %s.", hlsl_jump_type_to_string(jump->type)); + } +} + +static void sm1_generate_vsir_block(struct hlsl_ctx *ctx, struct hlsl_block *block, struct vsir_program *program); + +static void sm1_generate_vsir_instr_if(struct hlsl_ctx *ctx, struct vsir_program *program, struct hlsl_ir_if *iff) +{ + struct hlsl_ir_node *condition = iff->condition.node; + struct vkd3d_shader_src_param *src_param; + struct hlsl_ir_node *instr = &iff->node; + struct vkd3d_shader_instruction *ins; + uint32_t swizzle; + + if (hlsl_version_lt(ctx, 2, 1)) + { + hlsl_fixme(ctx, &instr->loc, "Flatten \"if\" conditionals branches."); + return; + } + VKD3D_ASSERT(condition->data_type->dimx == 1 && condition->data_type->dimy == 1); + + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_IFC, 0, 2))) + return; + ins->flags = VKD3D_SHADER_REL_OP_NE; + + swizzle = hlsl_swizzle_from_writemask(condition->reg.writemask); + swizzle = vsir_swizzle_from_hlsl(swizzle); + + src_param = &ins->src[0]; + vsir_register_init(&src_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); + src_param->reg.idx[0].offset = condition->reg.id; + src_param->swizzle = swizzle; + src_param->modifiers = 0; + + src_param = &ins->src[1]; + vsir_register_init(&src_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); + src_param->reg.idx[0].offset = condition->reg.id; + src_param->swizzle = swizzle; + src_param->modifiers = VKD3DSPSM_NEG; + + sm1_generate_vsir_block(ctx, &iff->then_block, program); + + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_ELSE, 0, 0))) + return; + + sm1_generate_vsir_block(ctx, &iff->else_block, program); + + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_ENDIF, 0, 0))) + return; +} + +static void sm1_generate_vsir_block(struct hlsl_ctx *ctx, struct hlsl_block *block, struct vsir_program *program) +{ + struct hlsl_ir_node *instr, *next; + + LIST_FOR_EACH_ENTRY_SAFE(instr, next, &block->instrs, struct hlsl_ir_node, entry) + { + if (instr->data_type) + { + if (instr->data_type->class != HLSL_CLASS_SCALAR && instr->data_type->class != HLSL_CLASS_VECTOR) + { + hlsl_fixme(ctx, &instr->loc, "Class %#x should have been lowered or removed.", instr->data_type->class); + break; + } + } + + switch (instr->type) + { + case HLSL_IR_CALL: + vkd3d_unreachable(); + + case HLSL_IR_CONSTANT: + sm1_generate_vsir_instr_constant(ctx, program, hlsl_ir_constant(instr)); + break; + + case HLSL_IR_EXPR: + sm1_generate_vsir_instr_expr(ctx, program, hlsl_ir_expr(instr)); + break; + + case HLSL_IR_IF: + sm1_generate_vsir_instr_if(ctx, program, hlsl_ir_if(instr)); + break; + + case HLSL_IR_JUMP: + sm1_generate_vsir_instr_jump(ctx, program, hlsl_ir_jump(instr)); + break; + + case HLSL_IR_LOAD: + sm1_generate_vsir_instr_load(ctx, program, hlsl_ir_load(instr)); + break; + + case HLSL_IR_RESOURCE_LOAD: + sm1_generate_vsir_instr_resource_load(ctx, program, hlsl_ir_resource_load(instr)); + break; + + case HLSL_IR_STORE: + sm1_generate_vsir_instr_store(ctx, program, hlsl_ir_store(instr)); + break; + + case HLSL_IR_SWIZZLE: + sm1_generate_vsir_instr_swizzle(ctx, program, hlsl_ir_swizzle(instr)); + break; + + default: + hlsl_fixme(ctx, &instr->loc, "Instruction type %s.", hlsl_node_type_to_string(instr->type)); + break; + } + } +} + +/* OBJECTIVE: Translate all the information from ctx and entry_func to the + * vsir_program and ctab blob, so they can be used as input to d3dbc_compile() + * without relying on ctx and entry_func. */ +static void sm1_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, + uint64_t config_flags, struct vsir_program *program, struct vkd3d_shader_code *ctab) +{ + struct vkd3d_shader_version version = {0}; + struct vkd3d_bytecode_buffer buffer = {0}; + struct hlsl_block block; + + version.major = ctx->profile->major_version; + version.minor = ctx->profile->minor_version; + version.type = ctx->profile->type; + if (!vsir_program_init(program, NULL, &version, 0, VSIR_CF_STRUCTURED)) + { + ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; + return; + } + + write_sm1_uniforms(ctx, &buffer); + if (buffer.status) + { + vkd3d_free(buffer.data); + ctx->result = buffer.status; + return; + } + ctab->code = buffer.data; + ctab->size = buffer.size; + + sm1_generate_vsir_signature(ctx, program); + + hlsl_block_init(&block); + sm1_generate_vsir_constant_defs(ctx, program, &block); + sm1_generate_vsir_sampler_dcls(ctx, program, &block); + list_move_head(&entry_func->body.instrs, &block.instrs); + + sm1_generate_vsir_block(ctx, &entry_func->body, program); +} + +static struct hlsl_ir_jump *loop_unrolling_find_jump(struct hlsl_block *block, struct hlsl_ir_node *stop_point, + struct hlsl_block **found_block) +{ + struct hlsl_ir_node *node; + + LIST_FOR_EACH_ENTRY(node, &block->instrs, struct hlsl_ir_node, entry) + { + if (node == stop_point) + return NULL; + + if (node->type == HLSL_IR_IF) + { + struct hlsl_ir_if *iff = hlsl_ir_if(node); + struct hlsl_ir_jump *jump = NULL; + + if ((jump = loop_unrolling_find_jump(&iff->then_block, stop_point, found_block))) + return jump; + if ((jump = loop_unrolling_find_jump(&iff->else_block, stop_point, found_block))) + return jump; + } + else if (node->type == HLSL_IR_JUMP) + { + struct hlsl_ir_jump *jump = hlsl_ir_jump(node); + + if (jump->type == HLSL_IR_JUMP_BREAK || jump->type == HLSL_IR_JUMP_CONTINUE) + { + *found_block = block; + return jump; + } + } + } + + return NULL; +} + +static unsigned int loop_unrolling_get_max_iterations(struct hlsl_ctx *ctx, struct hlsl_ir_loop *loop) +{ + /* Always use the explicit limit if it has been passed. */ + if (loop->unroll_limit) + return loop->unroll_limit; + + /* All SMs will default to 1024 if [unroll] has been specified without an explicit limit. */ + if (loop->unroll_type == HLSL_IR_LOOP_FORCE_UNROLL) + return 1024; + + /* SM4 limits implicit unrolling to 254 iterations. */ + if (hlsl_version_ge(ctx, 4, 0)) + return 254; + + /* SM<3 implicitly unrolls up to 1024 iterations. */ + return 1024; +} + +static bool loop_unrolling_unroll_loop(struct hlsl_ctx *ctx, struct hlsl_block *block, + struct hlsl_block *loop_parent, struct hlsl_ir_loop *loop) +{ + unsigned int max_iterations, i; + + max_iterations = loop_unrolling_get_max_iterations(ctx, loop); + + for (i = 0; i < max_iterations; ++i) + { + struct hlsl_block tmp_dst, *jump_block; + struct hlsl_ir_jump *jump = NULL; + + if (!hlsl_clone_block(ctx, &tmp_dst, &loop->body)) + return false; list_move_before(&loop->node.entry, &tmp_dst.instrs); hlsl_block_cleanup(&tmp_dst); @@ -6406,18 +7603,13 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry append_output_var_copy(ctx, body, entry_func->return_var); } - for (i = 0; i < entry_func->attr_count; ++i) - { - const struct hlsl_attribute *attr = entry_func->attrs[i]; - - if (!strcmp(attr->name, "numthreads") && profile->type == VKD3D_SHADER_TYPE_COMPUTE) - parse_numthreads_attribute(ctx, attr); - else - hlsl_warning(ctx, &entry_func->attrs[i]->loc, VKD3D_SHADER_WARNING_HLSL_UNKNOWN_ATTRIBUTE, - "Ignoring unknown attribute \"%s\".", entry_func->attrs[i]->name); - } + parse_entry_function_attributes(ctx, entry_func); + if (ctx->result) + return ctx->result; - if (profile->type == VKD3D_SHADER_TYPE_COMPUTE && !ctx->found_numthreads) + if (profile->type == VKD3D_SHADER_TYPE_HULL) + validate_hull_shader_attributes(ctx, entry_func); + else if (profile->type == VKD3D_SHADER_TYPE_COMPUTE && !ctx->found_numthreads) hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_ATTRIBUTE, "Entry point \"%s\" is missing a [numthreads] attribute.", entry_func->func->name); @@ -6540,7 +7732,7 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry return ctx->result; } - result = d3dbc_compile(&program, config_flags, NULL, &ctab, out, ctx->message_context, ctx, entry_func); + result = d3dbc_compile(&program, config_flags, NULL, &ctab, out, ctx->message_context); vsir_program_cleanup(&program); vkd3d_shader_free_shader_code(&ctab); return result; diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c index db4913b7c62..716adb15f08 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c @@ -1452,11 +1452,15 @@ static bool constant_is_one(struct hlsl_ir_constant *const_arg) case HLSL_TYPE_UINT: case HLSL_TYPE_INT: - case HLSL_TYPE_BOOL: if (const_arg->value.u[k].u != 1) return false; break; + case HLSL_TYPE_BOOL: + if (const_arg->value.u[k].u != ~0) + return false; + break; + default: return false; } @@ -1514,6 +1518,20 @@ bool hlsl_fold_constant_identities(struct hlsl_ctx *ctx, struct hlsl_ir_node *in res_node = mut_arg; break; + case HLSL_OP2_LOGIC_AND: + if (constant_is_zero(const_arg)) + res_node = &const_arg->node; + else if (constant_is_one(const_arg)) + res_node = mut_arg; + break; + + case HLSL_OP2_LOGIC_OR: + if (constant_is_zero(const_arg)) + res_node = mut_arg; + else if (constant_is_one(const_arg)) + res_node = &const_arg->node; + break; + default: break; } diff --git a/libs/vkd3d/libs/vkd3d-shader/ir.c b/libs/vkd3d/libs/vkd3d-shader/ir.c index 747238e2fee..db9992d9715 100644 --- a/libs/vkd3d/libs/vkd3d-shader/ir.c +++ b/libs/vkd3d/libs/vkd3d-shader/ir.c @@ -19,6 +19,15 @@ #include "vkd3d_shader_private.h" #include "vkd3d_types.h" +struct vsir_transformation_context +{ + enum vkd3d_result result; + struct vsir_program *program; + uint64_t config_flags; + const struct vkd3d_shader_compile_info *compile_info; + struct vkd3d_shader_message_context *message_context; +}; + static int convert_parameter_info(const struct vkd3d_shader_compile_info *compile_info, unsigned int *ret_count, const struct vkd3d_shader_parameter1 **ret_parameters) { @@ -65,7 +74,7 @@ static int convert_parameter_info(const struct vkd3d_shader_compile_info *compil } bool vsir_program_init(struct vsir_program *program, const struct vkd3d_shader_compile_info *compile_info, - const struct vkd3d_shader_version *version, unsigned int reserve) + const struct vkd3d_shader_version *version, unsigned int reserve, enum vsir_control_flow_type cf_type) { memset(program, 0, sizeof(*program)); @@ -87,6 +96,7 @@ bool vsir_program_init(struct vsir_program *program, const struct vkd3d_shader_c } program->shader_version = *version; + program->cf_type = cf_type; return shader_instruction_array_init(&program->instructions, reserve); } @@ -117,26 +127,137 @@ const struct vkd3d_shader_parameter1 *vsir_program_get_parameter( return NULL; } +void vsir_register_init(struct vkd3d_shader_register *reg, enum vkd3d_shader_register_type reg_type, + enum vkd3d_data_type data_type, unsigned int idx_count) +{ + reg->type = reg_type; + reg->precision = VKD3D_SHADER_REGISTER_PRECISION_DEFAULT; + reg->non_uniform = false; + reg->data_type = data_type; + reg->idx[0].offset = ~0u; + reg->idx[0].rel_addr = NULL; + reg->idx[0].is_in_bounds = false; + reg->idx[1].offset = ~0u; + reg->idx[1].rel_addr = NULL; + reg->idx[1].is_in_bounds = false; + reg->idx[2].offset = ~0u; + reg->idx[2].rel_addr = NULL; + reg->idx[2].is_in_bounds = false; + reg->idx_count = idx_count; + reg->dimension = VSIR_DIMENSION_SCALAR; + reg->alignment = 0; +} + static inline bool shader_register_is_phase_instance_id(const struct vkd3d_shader_register *reg) { return reg->type == VKD3DSPR_FORKINSTID || reg->type == VKD3DSPR_JOININSTID; } -static bool vsir_instruction_is_dcl(const struct vkd3d_shader_instruction *instruction) +void vsir_src_param_init(struct vkd3d_shader_src_param *param, enum vkd3d_shader_register_type reg_type, + enum vkd3d_data_type data_type, unsigned int idx_count) { - enum vkd3d_shader_opcode opcode = instruction->opcode; - return (VKD3DSIH_DCL <= opcode && opcode <= VKD3DSIH_DCL_VERTICES_OUT) - || opcode == VKD3DSIH_HS_DECLS; + vsir_register_init(¶m->reg, reg_type, data_type, idx_count); + param->swizzle = 0; + param->modifiers = VKD3DSPSM_NONE; } -static void vkd3d_shader_instruction_make_nop(struct vkd3d_shader_instruction *ins) +static void src_param_init_const_uint(struct vkd3d_shader_src_param *src, uint32_t value) { - struct vkd3d_shader_location location = ins->location; + vsir_src_param_init(src, VKD3DSPR_IMMCONST, VKD3D_DATA_UINT, 0); + src->reg.u.immconst_u32[0] = value; +} - vsir_instruction_init(ins, &location, VKD3DSIH_NOP); +void vsir_src_param_init_label(struct vkd3d_shader_src_param *param, unsigned int label_id) +{ + vsir_src_param_init(param, VKD3DSPR_LABEL, VKD3D_DATA_UNUSED, 1); + param->reg.dimension = VSIR_DIMENSION_NONE; + param->reg.idx[0].offset = label_id; +} + +static void src_param_init_parameter(struct vkd3d_shader_src_param *src, uint32_t idx, enum vkd3d_data_type type) +{ + vsir_src_param_init(src, VKD3DSPR_PARAMETER, type, 1); + src->reg.idx[0].offset = idx; +} + +static void vsir_src_param_init_resource(struct vkd3d_shader_src_param *src, unsigned int id, unsigned int idx) +{ + vsir_src_param_init(src, VKD3DSPR_RESOURCE, VKD3D_DATA_RESOURCE, 2); + src->reg.idx[0].offset = id; + src->reg.idx[1].offset = idx; + src->reg.dimension = VSIR_DIMENSION_VEC4; + src->swizzle = VKD3D_SHADER_NO_SWIZZLE; +} + +static void vsir_src_param_init_sampler(struct vkd3d_shader_src_param *src, unsigned int id, unsigned int idx) +{ + vsir_src_param_init(src, VKD3DSPR_SAMPLER, VKD3D_DATA_SAMPLER, 2); + src->reg.idx[0].offset = id; + src->reg.idx[1].offset = idx; + src->reg.dimension = VSIR_DIMENSION_NONE; +} + +static void src_param_init_ssa_bool(struct vkd3d_shader_src_param *src, unsigned int idx) +{ + vsir_src_param_init(src, VKD3DSPR_SSA, VKD3D_DATA_BOOL, 1); + src->reg.idx[0].offset = idx; +} + +static void src_param_init_temp_bool(struct vkd3d_shader_src_param *src, unsigned int idx) +{ + vsir_src_param_init(src, VKD3DSPR_TEMP, VKD3D_DATA_BOOL, 1); + src->reg.idx[0].offset = idx; +} + +static void src_param_init_temp_float(struct vkd3d_shader_src_param *src, unsigned int idx) +{ + vsir_src_param_init(src, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); + src->reg.idx[0].offset = idx; +} + +static void src_param_init_temp_uint(struct vkd3d_shader_src_param *src, unsigned int idx) +{ + vsir_src_param_init(src, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1); + src->reg.idx[0].offset = idx; +} + +void vsir_dst_param_init(struct vkd3d_shader_dst_param *param, enum vkd3d_shader_register_type reg_type, + enum vkd3d_data_type data_type, unsigned int idx_count) +{ + vsir_register_init(¶m->reg, reg_type, data_type, idx_count); + param->write_mask = VKD3DSP_WRITEMASK_0; + param->modifiers = VKD3DSPDM_NONE; + param->shift = 0; +} + +static void dst_param_init_ssa_bool(struct vkd3d_shader_dst_param *dst, unsigned int idx) +{ + vsir_dst_param_init(dst, VKD3DSPR_SSA, VKD3D_DATA_BOOL, 1); + dst->reg.idx[0].offset = idx; +} + +static void dst_param_init_temp_bool(struct vkd3d_shader_dst_param *dst, unsigned int idx) +{ + vsir_dst_param_init(dst, VKD3DSPR_TEMP, VKD3D_DATA_BOOL, 1); + dst->reg.idx[0].offset = idx; +} + +static void dst_param_init_temp_uint(struct vkd3d_shader_dst_param *dst, unsigned int idx) +{ + vsir_dst_param_init(dst, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1); + dst->reg.idx[0].offset = idx; + dst->write_mask = VKD3DSP_WRITEMASK_0; } -static bool vsir_instruction_init_with_params(struct vsir_program *program, +void vsir_instruction_init(struct vkd3d_shader_instruction *ins, const struct vkd3d_shader_location *location, + enum vkd3d_shader_opcode opcode) +{ + memset(ins, 0, sizeof(*ins)); + ins->location = *location; + ins->opcode = opcode; +} + +bool vsir_instruction_init_with_params(struct vsir_program *program, struct vkd3d_shader_instruction *ins, const struct vkd3d_shader_location *location, enum vkd3d_shader_opcode opcode, unsigned int dst_count, unsigned int src_count) { @@ -161,6 +282,37 @@ static bool vsir_instruction_init_with_params(struct vsir_program *program, return true; } +static bool vsir_instruction_init_label(struct vkd3d_shader_instruction *ins, + const struct vkd3d_shader_location *location, unsigned int label_id, struct vsir_program *program) +{ + struct vkd3d_shader_src_param *src_param; + + if (!(src_param = vsir_program_get_src_params(program, 1))) + return false; + + vsir_src_param_init_label(src_param, label_id); + + vsir_instruction_init(ins, location, VKD3DSIH_LABEL); + ins->src = src_param; + ins->src_count = 1; + + return true; +} + +static bool vsir_instruction_is_dcl(const struct vkd3d_shader_instruction *instruction) +{ + enum vkd3d_shader_opcode opcode = instruction->opcode; + return (VKD3DSIH_DCL <= opcode && opcode <= VKD3DSIH_DCL_VERTICES_OUT) + || opcode == VKD3DSIH_HS_DECLS; +} + +static void vkd3d_shader_instruction_make_nop(struct vkd3d_shader_instruction *ins) +{ + struct vkd3d_shader_location location = ins->location; + + vsir_instruction_init(ins, &location, VKD3DSIH_NOP); +} + static bool get_opcode_from_rel_op(enum vkd3d_shader_rel_op rel_op, enum vkd3d_data_type data_type, enum vkd3d_shader_opcode *opcode, bool *requires_swap) { @@ -441,10 +593,58 @@ static enum vkd3d_result vsir_program_lower_sm1_sincos(struct vsir_program *prog return VKD3D_OK; } +static enum vkd3d_result vsir_program_lower_tex(struct vsir_program *program, struct vkd3d_shader_instruction *tex) +{ + unsigned int idx = tex->src[1].reg.idx[0].offset; + struct vkd3d_shader_src_param *srcs; + + VKD3D_ASSERT(tex->src[1].reg.idx_count == 1); + VKD3D_ASSERT(!tex->src[1].reg.idx[0].rel_addr); + + if (!(srcs = shader_src_param_allocator_get(&program->instructions.src_params, 3))) + return VKD3D_ERROR_OUT_OF_MEMORY; + + srcs[0] = tex->src[0]; + vsir_src_param_init_resource(&srcs[1], idx, idx); + vsir_src_param_init_sampler(&srcs[2], idx, idx); + + tex->opcode = VKD3DSIH_SAMPLE; + tex->src = srcs; + tex->src_count = 3; + + return VKD3D_OK; +} + +static enum vkd3d_result vsir_program_lower_texldd(struct vsir_program *program, + struct vkd3d_shader_instruction *texldd) +{ + unsigned int idx = texldd->src[1].reg.idx[0].offset; + struct vkd3d_shader_src_param *srcs; + + VKD3D_ASSERT(texldd->src[1].reg.idx_count == 1); + VKD3D_ASSERT(!texldd->src[1].reg.idx[0].rel_addr); + + if (!(srcs = shader_src_param_allocator_get(&program->instructions.src_params, 5))) + return VKD3D_ERROR_OUT_OF_MEMORY; + + srcs[0] = texldd->src[0]; + vsir_src_param_init_resource(&srcs[1], idx, idx); + vsir_src_param_init_sampler(&srcs[2], idx, idx); + srcs[3] = texldd->src[2]; + srcs[4] = texldd->src[3]; + + texldd->opcode = VKD3DSIH_SAMPLE_GRAD; + texldd->src = srcs; + texldd->src_count = 5; + + return VKD3D_OK; +} + static enum vkd3d_result vsir_program_lower_instructions(struct vsir_program *program, - struct vkd3d_shader_message_context *message_context) + struct vsir_transformation_context *ctx) { struct vkd3d_shader_instruction_array *instructions = &program->instructions; + struct vkd3d_shader_message_context *message_context = ctx->message_context; unsigned int tmp_idx = ~0u, i; enum vkd3d_result ret; @@ -481,6 +681,38 @@ static enum vkd3d_result vsir_program_lower_instructions(struct vsir_program *pr return ret; break; + case VKD3DSIH_TEX: + if ((ret = vsir_program_lower_tex(program, ins)) < 0) + return ret; + break; + + case VKD3DSIH_TEXLDD: + if ((ret = vsir_program_lower_texldd(program, ins)) < 0) + return ret; + break; + + case VKD3DSIH_TEXBEM: + case VKD3DSIH_TEXBEML: + case VKD3DSIH_TEXCOORD: + case VKD3DSIH_TEXDEPTH: + case VKD3DSIH_TEXDP3: + case VKD3DSIH_TEXDP3TEX: + case VKD3DSIH_TEXLDL: + case VKD3DSIH_TEXM3x2PAD: + case VKD3DSIH_TEXM3x2TEX: + case VKD3DSIH_TEXM3x3DIFF: + case VKD3DSIH_TEXM3x3PAD: + case VKD3DSIH_TEXM3x3SPEC: + case VKD3DSIH_TEXM3x3TEX: + case VKD3DSIH_TEXM3x3VSPEC: + case VKD3DSIH_TEXREG2AR: + case VKD3DSIH_TEXREG2GB: + case VKD3DSIH_TEXREG2RGB: + vkd3d_shader_error(ctx->message_context, &ins->location, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, + "Aborting due to unimplemented feature: Combined sampler instruction %#x.", + ins->opcode); + return VKD3D_ERROR_NOT_IMPLEMENTED; + default: break; } @@ -541,9 +773,11 @@ static const struct vkd3d_shader_varying_map *find_varying_map( } static enum vkd3d_result vsir_program_remap_output_signature(struct vsir_program *program, - const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context) + struct vsir_transformation_context *ctx) { - const struct vkd3d_shader_location location = {.source_name = compile_info->source_name}; + const struct vkd3d_shader_location location = {.source_name = ctx->compile_info->source_name}; + struct vkd3d_shader_message_context *message_context = ctx->message_context; + const struct vkd3d_shader_compile_info *compile_info = ctx->compile_info; struct shader_signature *signature = &program->output_signature; const struct vkd3d_shader_varying_map_info *varying_map; unsigned int i; @@ -727,174 +961,50 @@ static enum vkd3d_result flattener_flatten_phases(struct hull_flattener *normali return VKD3D_OK; } -void vsir_register_init(struct vkd3d_shader_register *reg, enum vkd3d_shader_register_type reg_type, - enum vkd3d_data_type data_type, unsigned int idx_count) +static enum vkd3d_result vsir_program_flatten_hull_shader_phases(struct vsir_program *program, + struct vsir_transformation_context *ctx) { - reg->type = reg_type; - reg->precision = VKD3D_SHADER_REGISTER_PRECISION_DEFAULT; - reg->non_uniform = false; - reg->data_type = data_type; - reg->idx[0].offset = ~0u; - reg->idx[0].rel_addr = NULL; - reg->idx[0].is_in_bounds = false; - reg->idx[1].offset = ~0u; - reg->idx[1].rel_addr = NULL; - reg->idx[1].is_in_bounds = false; - reg->idx[2].offset = ~0u; - reg->idx[2].rel_addr = NULL; - reg->idx[2].is_in_bounds = false; - reg->idx_count = idx_count; - reg->dimension = VSIR_DIMENSION_SCALAR; - reg->alignment = 0; + struct hull_flattener flattener = {program->instructions}; + struct vkd3d_shader_instruction_array *instructions; + struct shader_phase_location_array locations; + enum vkd3d_result result = VKD3D_OK; + unsigned int i; + + instructions = &flattener.instructions; + + flattener.phase = VKD3DSIH_INVALID; + for (i = 0, locations.count = 0; i < instructions->count; ++i) + flattener_eliminate_phase_related_dcls(&flattener, i, &locations); + + if ((result = flattener_flatten_phases(&flattener, &locations)) < 0) + return result; + + if (flattener.phase != VKD3DSIH_INVALID) + { + if (!shader_instruction_array_reserve(&flattener.instructions, flattener.instructions.count + 1)) + return VKD3D_ERROR_OUT_OF_MEMORY; + vsir_instruction_init(&instructions->elements[instructions->count++], &flattener.last_ret_location, VKD3DSIH_RET); + } + + program->instructions = flattener.instructions; + return result; } -void vsir_src_param_init(struct vkd3d_shader_src_param *param, enum vkd3d_shader_register_type reg_type, - enum vkd3d_data_type data_type, unsigned int idx_count) +struct control_point_normaliser { - vsir_register_init(¶m->reg, reg_type, data_type, idx_count); - param->swizzle = 0; - param->modifiers = VKD3DSPSM_NONE; + struct vkd3d_shader_instruction_array instructions; + enum vkd3d_shader_opcode phase; + struct vkd3d_shader_src_param *outpointid_param; +}; + +static bool control_point_normaliser_is_in_control_point_phase(const struct control_point_normaliser *normaliser) +{ + return normaliser->phase == VKD3DSIH_HS_CONTROL_POINT_PHASE; } -void vsir_dst_param_init(struct vkd3d_shader_dst_param *param, enum vkd3d_shader_register_type reg_type, - enum vkd3d_data_type data_type, unsigned int idx_count) -{ - vsir_register_init(¶m->reg, reg_type, data_type, idx_count); - param->write_mask = VKD3DSP_WRITEMASK_0; - param->modifiers = VKD3DSPDM_NONE; - param->shift = 0; -} - -void vsir_src_param_init_label(struct vkd3d_shader_src_param *param, unsigned int label_id) -{ - vsir_src_param_init(param, VKD3DSPR_LABEL, VKD3D_DATA_UNUSED, 1); - param->reg.dimension = VSIR_DIMENSION_NONE; - param->reg.idx[0].offset = label_id; -} - -static void src_param_init_ssa_bool(struct vkd3d_shader_src_param *src, unsigned int idx) -{ - vsir_src_param_init(src, VKD3DSPR_SSA, VKD3D_DATA_BOOL, 1); - src->reg.idx[0].offset = idx; -} - -static void src_param_init_temp_bool(struct vkd3d_shader_src_param *src, unsigned int idx) -{ - vsir_src_param_init(src, VKD3DSPR_TEMP, VKD3D_DATA_BOOL, 1); - src->reg.idx[0].offset = idx; -} - -static void dst_param_init_ssa_bool(struct vkd3d_shader_dst_param *dst, unsigned int idx) -{ - vsir_dst_param_init(dst, VKD3DSPR_SSA, VKD3D_DATA_BOOL, 1); - dst->reg.idx[0].offset = idx; -} - -static void dst_param_init_temp_bool(struct vkd3d_shader_dst_param *dst, unsigned int idx) -{ - vsir_dst_param_init(dst, VKD3DSPR_TEMP, VKD3D_DATA_BOOL, 1); - dst->reg.idx[0].offset = idx; -} - -static void dst_param_init_temp_uint(struct vkd3d_shader_dst_param *dst, unsigned int idx) -{ - vsir_dst_param_init(dst, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1); - dst->reg.idx[0].offset = idx; - dst->write_mask = VKD3DSP_WRITEMASK_0; -} - -static void src_param_init_temp_float(struct vkd3d_shader_src_param *src, unsigned int idx) -{ - vsir_src_param_init(src, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); - src->reg.idx[0].offset = idx; -} - -static void src_param_init_temp_uint(struct vkd3d_shader_src_param *src, unsigned int idx) -{ - vsir_src_param_init(src, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1); - src->reg.idx[0].offset = idx; -} - -static void src_param_init_const_uint(struct vkd3d_shader_src_param *src, uint32_t value) -{ - vsir_src_param_init(src, VKD3DSPR_IMMCONST, VKD3D_DATA_UINT, 0); - src->reg.u.immconst_u32[0] = value; -} - -static void src_param_init_parameter(struct vkd3d_shader_src_param *src, uint32_t idx, enum vkd3d_data_type type) -{ - vsir_src_param_init(src, VKD3DSPR_PARAMETER, type, 1); - src->reg.idx[0].offset = idx; -} - -void vsir_instruction_init(struct vkd3d_shader_instruction *ins, const struct vkd3d_shader_location *location, - enum vkd3d_shader_opcode opcode) -{ - memset(ins, 0, sizeof(*ins)); - ins->location = *location; - ins->opcode = opcode; -} - -static bool vsir_instruction_init_label(struct vkd3d_shader_instruction *ins, - const struct vkd3d_shader_location *location, unsigned int label_id, struct vsir_program *program) -{ - struct vkd3d_shader_src_param *src_param; - - if (!(src_param = vsir_program_get_src_params(program, 1))) - return false; - - vsir_src_param_init_label(src_param, label_id); - - vsir_instruction_init(ins, location, VKD3DSIH_LABEL); - ins->src = src_param; - ins->src_count = 1; - - return true; -} - -static enum vkd3d_result instruction_array_flatten_hull_shader_phases(struct vkd3d_shader_instruction_array *src_instructions) -{ - struct hull_flattener flattener = {*src_instructions}; - struct vkd3d_shader_instruction_array *instructions; - struct shader_phase_location_array locations; - enum vkd3d_result result = VKD3D_OK; - unsigned int i; - - instructions = &flattener.instructions; - - flattener.phase = VKD3DSIH_INVALID; - for (i = 0, locations.count = 0; i < instructions->count; ++i) - flattener_eliminate_phase_related_dcls(&flattener, i, &locations); - - if ((result = flattener_flatten_phases(&flattener, &locations)) < 0) - return result; - - if (flattener.phase != VKD3DSIH_INVALID) - { - if (!shader_instruction_array_reserve(&flattener.instructions, flattener.instructions.count + 1)) - return VKD3D_ERROR_OUT_OF_MEMORY; - vsir_instruction_init(&instructions->elements[instructions->count++], &flattener.last_ret_location, VKD3DSIH_RET); - } - - *src_instructions = flattener.instructions; - return result; -} - -struct control_point_normaliser -{ - struct vkd3d_shader_instruction_array instructions; - enum vkd3d_shader_opcode phase; - struct vkd3d_shader_src_param *outpointid_param; -}; - -static bool control_point_normaliser_is_in_control_point_phase(const struct control_point_normaliser *normaliser) -{ - return normaliser->phase == VKD3DSIH_HS_CONTROL_POINT_PHASE; -} - -struct vkd3d_shader_src_param *instruction_array_create_outpointid_param( - struct vkd3d_shader_instruction_array *instructions) +struct vkd3d_shader_src_param *vsir_program_create_outpointid_param(struct vsir_program *program) { + struct vkd3d_shader_instruction_array *instructions = &program->instructions; struct vkd3d_shader_src_param *rel_addr; if (instructions->outpointid_param) @@ -991,7 +1101,7 @@ static enum vkd3d_result control_point_normaliser_emit_hs_input(struct control_p } static enum vkd3d_result instruction_array_normalise_hull_shader_control_point_io( - struct vkd3d_shader_instruction_array *src_instructions, const struct shader_signature *input_signature) + struct vsir_program *program, struct vsir_transformation_context *ctx) { struct vkd3d_shader_instruction_array *instructions; struct control_point_normaliser normaliser; @@ -1001,12 +1111,12 @@ static enum vkd3d_result instruction_array_normalise_hull_shader_control_point_i enum vkd3d_result ret; unsigned int i, j; - if (!(normaliser.outpointid_param = instruction_array_create_outpointid_param(src_instructions))) + if (!(normaliser.outpointid_param = vsir_program_create_outpointid_param(program))) { ERR("Failed to allocate src param.\n"); return VKD3D_ERROR_OUT_OF_MEMORY; } - normaliser.instructions = *src_instructions; + normaliser.instructions = program->instructions; instructions = &normaliser.instructions; normaliser.phase = VKD3DSIH_INVALID; @@ -1043,22 +1153,22 @@ static enum vkd3d_result instruction_array_normalise_hull_shader_control_point_i input_control_point_count = ins->declaration.count; break; case VKD3DSIH_HS_CONTROL_POINT_PHASE: - *src_instructions = normaliser.instructions; + program->instructions = normaliser.instructions; return VKD3D_OK; case VKD3DSIH_HS_FORK_PHASE: case VKD3DSIH_HS_JOIN_PHASE: /* ins may be relocated if the instruction array expands. */ location = ins->location; - ret = control_point_normaliser_emit_hs_input(&normaliser, input_signature, + ret = control_point_normaliser_emit_hs_input(&normaliser, &program->input_signature, input_control_point_count, i, &location); - *src_instructions = normaliser.instructions; + program->instructions = normaliser.instructions; return ret; default: break; } } - *src_instructions = normaliser.instructions; + program->instructions = normaliser.instructions; return VKD3D_OK; } @@ -1098,8 +1208,8 @@ static bool io_normaliser_is_in_control_point_phase(const struct io_normaliser * return normaliser->phase == VKD3DSIH_HS_CONTROL_POINT_PHASE; } -static unsigned int shader_signature_find_element_for_reg(const struct shader_signature *signature, - unsigned int reg_idx, unsigned int write_mask) +static bool shader_signature_find_element_for_reg(const struct shader_signature *signature, + unsigned int reg_idx, unsigned int write_mask, unsigned int *element_idx) { unsigned int i, base_write_mask; @@ -1109,7 +1219,8 @@ static unsigned int shader_signature_find_element_for_reg(const struct shader_si if (e->register_index <= reg_idx && e->register_index + e->register_count > reg_idx && (e->mask & write_mask) == write_mask) { - return i; + *element_idx = i; + return true; } } @@ -1119,15 +1230,20 @@ static unsigned int shader_signature_find_element_for_reg(const struct shader_si reg_idx, write_mask); base_write_mask = 1u << vsir_write_mask_get_component_idx(write_mask); if (base_write_mask != write_mask) - return shader_signature_find_element_for_reg(signature, reg_idx, base_write_mask); + return shader_signature_find_element_for_reg(signature, reg_idx, base_write_mask, element_idx); - vkd3d_unreachable(); + return false; } struct signature_element *vsir_signature_find_element_for_reg(const struct shader_signature *signature, unsigned int reg_idx, unsigned int write_mask) { - return &signature->elements[shader_signature_find_element_for_reg(signature, reg_idx, write_mask)]; + unsigned int element_idx; + + if (shader_signature_find_element_for_reg(signature, reg_idx, write_mask, &element_idx)) + return &signature->elements[element_idx]; + + return NULL; } static unsigned int range_map_get_register_count(uint8_t range_map[][VKD3D_VEC4_SIZE], @@ -1181,9 +1297,10 @@ static void io_normaliser_add_index_range(struct io_normaliser *normaliser, { const struct vkd3d_shader_index_range *range = &ins->declaration.index_range; const struct vkd3d_shader_register *reg = &range->dst.reg; - unsigned int reg_idx, write_mask, element_idx; const struct shader_signature *signature; uint8_t (*range_map)[VKD3D_VEC4_SIZE]; + struct signature_element *element; + unsigned int reg_idx, write_mask; switch (reg->type) { @@ -1215,9 +1332,8 @@ static void io_normaliser_add_index_range(struct io_normaliser *normaliser, reg_idx = reg->idx[reg->idx_count - 1].offset; write_mask = range->dst.write_mask; - element_idx = shader_signature_find_element_for_reg(signature, reg_idx, write_mask); - range_map_set_register_range(range_map, reg_idx, range->register_count, - signature->elements[element_idx].mask, true); + element = vsir_signature_find_element_for_reg(signature, reg_idx, write_mask); + range_map_set_register_range(range_map, reg_idx, range->register_count, element->mask, true); } static int signature_element_mask_compare(const void *a, const void *b) @@ -1388,6 +1504,8 @@ static bool shader_signature_merge(struct shader_signature *s, uint8_t range_map else e->interpolation_mode = f->interpolation_mode; } + + vkd3d_free((void *)f->semantic_name); } } element_count = new_count; @@ -1415,6 +1533,12 @@ static bool shader_signature_merge(struct shader_signature *s, uint8_t range_map TRACE("Merging %s, base reg %u, count %u.\n", e->semantic_name, e->register_index, register_count); e->register_count = register_count; e->mask = signature_element_range_expand_mask(e, register_count, range_map); + + for (j = 1; j < register_count; ++j) + { + f = &elements[i + j]; + vkd3d_free((void *)f->semantic_name); + } } } element_count = new_count; @@ -1530,7 +1654,8 @@ static bool shader_dst_param_io_normalise(struct vkd3d_shader_dst_param *dst_par id_idx = reg->idx_count - 1; write_mask = dst_param->write_mask; - element_idx = shader_signature_find_element_for_reg(signature, reg_idx, write_mask); + if (!shader_signature_find_element_for_reg(signature, reg_idx, write_mask, &element_idx)) + vkd3d_unreachable(); e = &signature->elements[element_idx]; dst_param->write_mask >>= vsir_write_mask_get_component_idx(e->mask); @@ -1653,7 +1778,8 @@ static void shader_src_param_io_normalise(struct vkd3d_shader_src_param *src_par id_idx = reg->idx_count - 1; write_mask = VKD3DSP_WRITEMASK_0 << vsir_swizzle_get_component(src_param->swizzle, 0); - element_idx = shader_signature_find_element_for_reg(signature, reg_idx, write_mask); + if (!shader_signature_find_element_for_reg(signature, reg_idx, write_mask, &element_idx)) + vkd3d_unreachable(); e = &signature->elements[element_idx]; if ((e->register_count > 1 || vsir_sysval_semantic_is_tess_factor(e->sysval_semantic))) @@ -1751,12 +1877,12 @@ static bool use_flat_interpolation(const struct vsir_program *program, } static enum vkd3d_result vsir_program_normalise_io_registers(struct vsir_program *program, - struct vkd3d_shader_message_context *message_context) + struct vsir_transformation_context *ctx) { + struct vkd3d_shader_message_context *message_context = ctx->message_context; struct io_normaliser normaliser = {program->instructions}; struct vkd3d_shader_instruction *ins; - bool has_control_point_phase; - unsigned int i, j; + unsigned int i; normaliser.phase = VKD3DSIH_INVALID; normaliser.shader_type = program->shader_version.type; @@ -1765,7 +1891,7 @@ static enum vkd3d_result vsir_program_normalise_io_registers(struct vsir_program normaliser.output_signature = &program->output_signature; normaliser.patch_constant_signature = &program->patch_constant_signature; - for (i = 0, has_control_point_phase = false; i < program->instructions.count; ++i) + for (i = 0; i < program->instructions.count; ++i) { ins = &program->instructions.elements[i]; @@ -1779,8 +1905,6 @@ static enum vkd3d_result vsir_program_normalise_io_registers(struct vsir_program vkd3d_shader_instruction_make_nop(ins); break; case VKD3DSIH_HS_CONTROL_POINT_PHASE: - has_control_point_phase = true; - /* fall through */ case VKD3DSIH_HS_FORK_PHASE: case VKD3DSIH_HS_JOIN_PHASE: normaliser.phase = ins->opcode; @@ -1790,22 +1914,6 @@ static enum vkd3d_result vsir_program_normalise_io_registers(struct vsir_program } } - if (normaliser.shader_type == VKD3D_SHADER_TYPE_HULL && !has_control_point_phase) - { - /* Inputs and outputs must match for the default phase, so merge ranges must match too. */ - for (i = 0; i < MAX_REG_OUTPUT; ++i) - { - for (j = 0; j < VKD3D_VEC4_SIZE; ++j) - { - if (!normaliser.input_range_map[i][j] && normaliser.output_range_map[i][j]) - normaliser.input_range_map[i][j] = normaliser.output_range_map[i][j]; - else if (normaliser.input_range_map[i][j] && !normaliser.output_range_map[i][j]) - normaliser.output_range_map[i][j] = normaliser.input_range_map[i][j]; - else VKD3D_ASSERT(normaliser.input_range_map[i][j] == normaliser.output_range_map[i][j]); - } - } - } - if (!shader_signature_merge(&program->input_signature, normaliser.input_range_map, false) || !shader_signature_merge(&program->output_signature, normaliser.output_range_map, false) || !shader_signature_merge(&program->patch_constant_signature, normaliser.pc_range_map, true)) @@ -1918,7 +2026,8 @@ static void shader_register_normalise_flat_constants(struct vkd3d_shader_src_par param->reg.idx_count = 3; } -static enum vkd3d_result instruction_array_normalise_flat_constants(struct vsir_program *program) +static enum vkd3d_result vsir_program_normalise_flat_constants(struct vsir_program *program, + struct vsir_transformation_context *ctx) { struct flat_constants_normaliser normaliser = {0}; unsigned int i, j; @@ -1957,7 +2066,8 @@ static enum vkd3d_result instruction_array_normalise_flat_constants(struct vsir_ return VKD3D_OK; } -static void remove_dead_code(struct vsir_program *program) +static enum vkd3d_result vsir_program_remove_dead_code(struct vsir_program *program, + struct vsir_transformation_context *ctx) { size_t i, depth = 0; bool dead = false; @@ -2045,103 +2155,6 @@ static void remove_dead_code(struct vsir_program *program) break; } } -} - -static enum vkd3d_result vsir_program_normalise_combined_samplers(struct vsir_program *program, - struct vkd3d_shader_message_context *message_context) -{ - unsigned int i; - - for (i = 0; i < program->instructions.count; ++i) - { - struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; - struct vkd3d_shader_src_param *srcs; - - switch (ins->opcode) - { - case VKD3DSIH_TEX: - if (!(srcs = shader_src_param_allocator_get(&program->instructions.src_params, 3))) - return VKD3D_ERROR_OUT_OF_MEMORY; - memset(srcs, 0, sizeof(*srcs) * 3); - - ins->opcode = VKD3DSIH_SAMPLE; - - srcs[0] = ins->src[0]; - - srcs[1].reg.type = VKD3DSPR_RESOURCE; - srcs[1].reg.idx[0] = ins->src[1].reg.idx[0]; - srcs[1].reg.idx[1] = ins->src[1].reg.idx[0]; - srcs[1].reg.idx_count = 2; - srcs[1].reg.data_type = VKD3D_DATA_RESOURCE; - srcs[1].reg.dimension = VSIR_DIMENSION_VEC4; - srcs[1].swizzle = VKD3D_SHADER_NO_SWIZZLE; - - srcs[2].reg.type = VKD3DSPR_SAMPLER; - srcs[2].reg.idx[0] = ins->src[1].reg.idx[0]; - srcs[2].reg.idx[1] = ins->src[1].reg.idx[0]; - srcs[2].reg.idx_count = 2; - srcs[2].reg.data_type = VKD3D_DATA_SAMPLER; - - ins->src = srcs; - ins->src_count = 3; - break; - - case VKD3DSIH_TEXLDD: - if (!(srcs = shader_src_param_allocator_get(&program->instructions.src_params, 5))) - return VKD3D_ERROR_OUT_OF_MEMORY; - memset(srcs, 0, sizeof(*srcs) * 5); - - ins->opcode = VKD3DSIH_SAMPLE_GRAD; - - srcs[0] = ins->src[0]; - - srcs[1].reg.type = VKD3DSPR_RESOURCE; - srcs[1].reg.idx[0] = ins->src[1].reg.idx[0]; - srcs[1].reg.idx[1] = ins->src[1].reg.idx[0]; - srcs[1].reg.idx_count = 2; - srcs[1].reg.data_type = VKD3D_DATA_RESOURCE; - srcs[1].reg.dimension = VSIR_DIMENSION_VEC4; - srcs[1].swizzle = VKD3D_SHADER_NO_SWIZZLE; - - srcs[2].reg.type = VKD3DSPR_SAMPLER; - srcs[2].reg.idx[0] = ins->src[1].reg.idx[0]; - srcs[2].reg.idx[1] = ins->src[1].reg.idx[0]; - srcs[2].reg.idx_count = 2; - srcs[2].reg.data_type = VKD3D_DATA_SAMPLER; - - srcs[3] = ins->src[2]; - srcs[4] = ins->src[3]; - - ins->src = srcs; - ins->src_count = 5; - break; - - case VKD3DSIH_TEXBEM: - case VKD3DSIH_TEXBEML: - case VKD3DSIH_TEXCOORD: - case VKD3DSIH_TEXDEPTH: - case VKD3DSIH_TEXDP3: - case VKD3DSIH_TEXDP3TEX: - case VKD3DSIH_TEXLDL: - case VKD3DSIH_TEXM3x2PAD: - case VKD3DSIH_TEXM3x2TEX: - case VKD3DSIH_TEXM3x3DIFF: - case VKD3DSIH_TEXM3x3PAD: - case VKD3DSIH_TEXM3x3SPEC: - case VKD3DSIH_TEXM3x3TEX: - case VKD3DSIH_TEXM3x3VSPEC: - case VKD3DSIH_TEXREG2AR: - case VKD3DSIH_TEXREG2GB: - case VKD3DSIH_TEXREG2RGB: - vkd3d_shader_error(message_context, &ins->location, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, - "Aborting due to not yet implemented feature: " - "Combined sampler instruction %#x.", ins->opcode); - return VKD3D_ERROR_NOT_IMPLEMENTED; - - default: - break; - } - } return VKD3D_OK; } @@ -2789,11 +2802,14 @@ static enum vkd3d_result cf_flattener_iterate_instruction_array(struct cf_flatte } static enum vkd3d_result vsir_program_flatten_control_flow_constructs(struct vsir_program *program, - struct vkd3d_shader_message_context *message_context) + struct vsir_transformation_context *ctx) { + struct vkd3d_shader_message_context *message_context = ctx->message_context; struct cf_flattener flattener = {.program = program}; enum vkd3d_result result; + VKD3D_ASSERT(program->cf_type == VSIR_CF_STRUCTURED); + if ((result = cf_flattener_iterate_instruction_array(&flattener, message_context)) >= 0) { vkd3d_free(program->instructions.elements); @@ -2801,6 +2817,7 @@ static enum vkd3d_result vsir_program_flatten_control_flow_constructs(struct vsi program->instructions.capacity = flattener.instruction_capacity; program->instructions.count = flattener.instruction_count; program->block_count = flattener.block_id; + program->cf_type = VSIR_CF_BLOCKS; } else { @@ -2860,13 +2877,16 @@ static bool lower_switch_to_if_ladder_add_block_mapping(struct lower_switch_to_i return true; } -static enum vkd3d_result lower_switch_to_if_ladder(struct vsir_program *program) +static enum vkd3d_result vsir_program_lower_switch_to_selection_ladder(struct vsir_program *program, + struct vsir_transformation_context *ctx) { unsigned int block_count = program->block_count, ssa_count = program->ssa_count, current_label = 0, if_label; size_t ins_capacity = 0, ins_count = 0, i, map_capacity = 0, map_count = 0; struct vkd3d_shader_instruction *instructions = NULL; struct lower_switch_to_if_ladder_block_mapping *block_map = NULL; + VKD3D_ASSERT(program->cf_type == VSIR_CF_BLOCKS); + if (!reserve_instructions(&instructions, &ins_capacity, program->instructions.count)) goto fail; @@ -3050,7 +3070,8 @@ static void ssas_to_temps_block_info_cleanup(struct ssas_to_temps_block_info *bl vkd3d_free(block_info); } -static enum vkd3d_result vsir_program_materialise_phi_ssas_to_temps(struct vsir_program *program) +static enum vkd3d_result vsir_program_materialise_phi_ssas_to_temps(struct vsir_program *program, + struct vsir_transformation_context *ctx) { size_t ins_capacity = 0, ins_count = 0, phi_count, incoming_count, i; struct ssas_to_temps_block_info *info, *block_info = NULL; @@ -3058,6 +3079,8 @@ static enum vkd3d_result vsir_program_materialise_phi_ssas_to_temps(struct vsir_ struct ssas_to_temps_alloc alloc = {0}; unsigned int current_label = 0; + VKD3D_ASSERT(program->cf_type == VSIR_CF_BLOCKS); + if (!(block_info = vkd3d_calloc(program->block_count, sizeof(*block_info)))) { ERR("Failed to allocate block info array.\n"); @@ -5271,12 +5294,15 @@ out: } static enum vkd3d_result vsir_program_structurize(struct vsir_program *program, - struct vkd3d_shader_message_context *message_context) + struct vsir_transformation_context *ctx) { + struct vkd3d_shader_message_context *message_context = ctx->message_context; struct vsir_cfg_emit_target target = {0}; enum vkd3d_result ret; size_t i; + VKD3D_ASSERT(program->cf_type == VSIR_CF_BLOCKS); + target.jump_target_temp_idx = program->temp_count; target.temp_count = program->temp_count + 1; @@ -5324,6 +5350,7 @@ static enum vkd3d_result vsir_program_structurize(struct vsir_program *program, program->instructions.capacity = target.ins_capacity; program->instructions.count = target.ins_count; program->temp_count = target.temp_count; + program->cf_type = VSIR_CF_STRUCTURED; return VKD3D_OK; @@ -5451,11 +5478,14 @@ static enum vkd3d_result vsir_program_materialize_undominated_ssas_to_temps_in_f } static enum vkd3d_result vsir_program_materialize_undominated_ssas_to_temps(struct vsir_program *program, - struct vkd3d_shader_message_context *message_context) + struct vsir_transformation_context *ctx) { + struct vkd3d_shader_message_context *message_context = ctx->message_context; enum vkd3d_result ret; size_t i; + VKD3D_ASSERT(program->cf_type == VSIR_CF_BLOCKS); + for (i = 0; i < program->instructions.count;) { struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; @@ -5596,8 +5626,9 @@ static enum vkd3d_result insert_alpha_test_before_ret(struct vsir_program *progr } static enum vkd3d_result vsir_program_insert_alpha_test(struct vsir_program *program, - struct vkd3d_shader_message_context *message_context) + struct vsir_transformation_context *ctx) { + struct vkd3d_shader_message_context *message_context = ctx->message_context; const struct vkd3d_shader_parameter1 *func = NULL, *ref = NULL; static const struct vkd3d_shader_location no_loc; enum vkd3d_shader_comparison_func compare_func; @@ -5687,12 +5718,6 @@ struct validation_context enum vkd3d_result status; bool dcl_temps_found; enum vkd3d_shader_opcode phase; - enum cf_type - { - CF_TYPE_UNKNOWN = 0, - CF_TYPE_STRUCTURED, - CF_TYPE_BLOCKS, - } cf_type; bool inside_block; struct validation_context_temp_data @@ -5731,14 +5756,14 @@ static void VKD3D_PRINTF_FUNC(3, 4) validator_error(struct validation_context *c if (ctx->invalid_instruction_idx) { vkd3d_shader_error(ctx->message_context, &ctx->null_location, error, "%s", buf.buffer); - ERR("VSIR validation error: %s\n", buf.buffer); + WARN("VSIR validation error: %s\n", buf.buffer); } else { const struct vkd3d_shader_instruction *ins = &ctx->program->instructions.elements[ctx->instruction_idx]; vkd3d_shader_error(ctx->message_context, &ins->location, error, "instruction %zu: %s", ctx->instruction_idx + 1, buf.buffer); - ERR("VSIR validation error: instruction %zu: %s\n", ctx->instruction_idx + 1, buf.buffer); + WARN("VSIR validation error: instruction %zu: %s\n", ctx->instruction_idx + 1, buf.buffer); } vkd3d_string_buffer_cleanup(&buf); @@ -6105,13 +6130,13 @@ static bool vsir_validate_src_max_count(struct validation_context *ctx, return true; } -static const char *name_from_cf_type(enum cf_type type) +static const char *name_from_cf_type(enum vsir_control_flow_type type) { switch (type) { - case CF_TYPE_STRUCTURED: + case VSIR_CF_STRUCTURED: return "structured"; - case CF_TYPE_BLOCKS: + case VSIR_CF_BLOCKS: return "block-based"; default: vkd3d_unreachable(); @@ -6119,437 +6144,510 @@ static const char *name_from_cf_type(enum cf_type type) } static void vsir_validate_cf_type(struct validation_context *ctx, - const struct vkd3d_shader_instruction *instruction, enum cf_type expected_type) + const struct vkd3d_shader_instruction *instruction, enum vsir_control_flow_type expected_type) { - VKD3D_ASSERT(ctx->cf_type != CF_TYPE_UNKNOWN); - VKD3D_ASSERT(expected_type != CF_TYPE_UNKNOWN); - if (ctx->cf_type != expected_type) + if (ctx->program->cf_type != expected_type) validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, "Invalid instruction %#x in %s shader.", - instruction->opcode, name_from_cf_type(ctx->cf_type)); + instruction->opcode, name_from_cf_type(ctx->program->cf_type)); } -static void vsir_validate_instruction(struct validation_context *ctx) +static void vsir_validator_push_block(struct validation_context *ctx, enum vkd3d_shader_opcode opcode) { - const struct vkd3d_shader_version *version = &ctx->program->shader_version; - const struct vkd3d_shader_instruction *instruction; - size_t i; - - instruction = &ctx->program->instructions.elements[ctx->instruction_idx]; - - for (i = 0; i < instruction->dst_count; ++i) - vsir_validate_dst_param(ctx, &instruction->dst[i]); - - for (i = 0; i < instruction->src_count; ++i) - vsir_validate_src_param(ctx, &instruction->src[i]); - - if (instruction->opcode >= VKD3DSIH_INVALID) + if (!vkd3d_array_reserve((void **)&ctx->blocks, &ctx->blocks_capacity, ctx->depth + 1, sizeof(*ctx->blocks))) { - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_HANDLER, "Invalid instruction handler %#x.", - instruction->opcode); + ctx->status = VKD3D_ERROR_OUT_OF_MEMORY; + return; } + ctx->blocks[ctx->depth++] = opcode; +} - switch (instruction->opcode) - { - case VKD3DSIH_HS_DECLS: - case VKD3DSIH_HS_CONTROL_POINT_PHASE: - case VKD3DSIH_HS_FORK_PHASE: - case VKD3DSIH_HS_JOIN_PHASE: - vsir_validate_dst_count(ctx, instruction, 0); - vsir_validate_src_count(ctx, instruction, 0); - if (version->type != VKD3D_SHADER_TYPE_HULL) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_HANDLER, - "Phase instruction %#x is only valid in a hull shader.", - instruction->opcode); - if (ctx->depth != 0) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, - "Phase instruction %#x must appear to top level.", - instruction->opcode); - ctx->phase = instruction->opcode; - ctx->dcl_temps_found = false; - return; +static void vsir_validate_hull_shader_phase(struct validation_context *ctx, + const struct vkd3d_shader_instruction *instruction) +{ + if (ctx->program->shader_version.type != VKD3D_SHADER_TYPE_HULL) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_HANDLER, + "Phase instruction %#x is only valid in a hull shader.", + instruction->opcode); + if (ctx->depth != 0) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, + "Phase instruction %#x must appear to top level.", + instruction->opcode); + ctx->phase = instruction->opcode; + ctx->dcl_temps_found = false; +} - case VKD3DSIH_DCL_HS_MAX_TESSFACTOR: - /* Exclude non-finite values. */ - if (!(instruction->declaration.max_tessellation_factor >= 1.0f - && instruction->declaration.max_tessellation_factor <= 64.0f)) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION, "Max tessellation factor %f is invalid.", - instruction->declaration.max_tessellation_factor); - return; +static void vsir_validate_branch(struct validation_context *ctx, const struct vkd3d_shader_instruction *instruction) +{ + size_t i; - case VKD3DSIH_DCL_INPUT_PRIMITIVE: - if (instruction->declaration.primitive_type.type == VKD3D_PT_UNDEFINED - || instruction->declaration.primitive_type.type >= VKD3D_PT_COUNT) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_GS, "GS input primitive %u is invalid.", - instruction->declaration.primitive_type.type); - return; + vsir_validate_cf_type(ctx, instruction, VSIR_CF_BLOCKS); + vsir_validate_dst_count(ctx, instruction, 0); - case VKD3DSIH_DCL_VERTICES_OUT: - if (instruction->declaration.count > 1024) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_GS, "GS output vertex count %u is invalid.", - instruction->declaration.count); - return; + if (!vsir_validate_src_min_count(ctx, instruction, 1)) + return; - case VKD3DSIH_DCL_OUTPUT_TOPOLOGY: - if (instruction->declaration.primitive_type.type == VKD3D_PT_UNDEFINED - || instruction->declaration.primitive_type.type >= VKD3D_PT_COUNT) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_GS, "GS output primitive %u is invalid.", - instruction->declaration.primitive_type.type); - return; + if (vsir_register_is_label(&instruction->src[0].reg)) + { + /* Unconditional branch: parameters are jump label, + * optional merge label, optional continue label. */ + vsir_validate_src_max_count(ctx, instruction, 3); - case VKD3DSIH_DCL_GS_INSTANCES: - if (!instruction->declaration.count || instruction->declaration.count > 32) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_GS, "GS instance count %u is invalid.", - instruction->declaration.count); - return; + for (i = 0; i < instruction->src_count; ++i) + { + if (!vsir_register_is_label(&instruction->src[i].reg)) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, + "Invalid register of type %#x in unconditional BRANCH instruction, expected LABEL.", + instruction->src[i].reg.type); + } + } + else + { + /* Conditional branch: parameters are condition, true + * jump label, false jump label, optional merge label, + * optional continue label. */ + vsir_validate_src_min_count(ctx, instruction, 3); + vsir_validate_src_max_count(ctx, instruction, 5); - case VKD3DSIH_DCL_OUTPUT_CONTROL_POINT_COUNT: - if (!instruction->declaration.count || instruction->declaration.count > 32) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION, "Output control point count %u is invalid.", - instruction->declaration.count); - return; + for (i = 1; i < instruction->src_count; ++i) + { + if (!vsir_register_is_label(&instruction->src[i].reg)) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, + "Invalid register of type %#x in conditional BRANCH instruction, expected LABEL.", + instruction->src[i].reg.type); + } + } - case VKD3DSIH_DCL_TESSELLATOR_DOMAIN: - if (instruction->declaration.tessellator_domain == VKD3D_TESSELLATOR_DOMAIN_INVALID - || instruction->declaration.tessellator_domain >= VKD3D_TESSELLATOR_DOMAIN_COUNT) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION, - "Tessellator domain %#x is invalid.", instruction->declaration.tessellator_domain); - return; + ctx->inside_block = false; +} - case VKD3DSIH_DCL_TESSELLATOR_OUTPUT_PRIMITIVE: - if (!instruction->declaration.tessellator_output_primitive - || instruction->declaration.tessellator_output_primitive > VKD3D_SHADER_TESSELLATOR_OUTPUT_TRIANGLE_CCW) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION, - "Tessellator output primitive %#x is invalid.", instruction->declaration.tessellator_output_primitive); - return; +static void vsir_validate_dcl_gs_instances(struct validation_context *ctx, + const struct vkd3d_shader_instruction *instruction) +{ + if (!instruction->declaration.count || instruction->declaration.count > 32) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_GS, "GS instance count %u is invalid.", + instruction->declaration.count); +} - case VKD3DSIH_DCL_TESSELLATOR_PARTITIONING: - if (!instruction->declaration.tessellator_partitioning - || instruction->declaration.tessellator_partitioning > VKD3D_SHADER_TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION, - "Tessellator partitioning %#x is invalid.", instruction->declaration.tessellator_partitioning); - return; +static void vsir_validate_dcl_hs_max_tessfactor(struct validation_context *ctx, + const struct vkd3d_shader_instruction *instruction) +{ + /* Exclude non-finite values. */ + if (!(instruction->declaration.max_tessellation_factor >= 1.0f + && instruction->declaration.max_tessellation_factor <= 64.0f)) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION, + "Max tessellation factor %f is invalid.", + instruction->declaration.max_tessellation_factor); +} - default: - break; - } +static void vsir_validate_dcl_input_primitive(struct validation_context *ctx, + const struct vkd3d_shader_instruction *instruction) +{ + if (instruction->declaration.primitive_type.type == VKD3D_PT_UNDEFINED + || instruction->declaration.primitive_type.type >= VKD3D_PT_COUNT) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_GS, "GS input primitive %u is invalid.", + instruction->declaration.primitive_type.type); +} - /* Only DCL instructions may occur outside hull shader phases. */ - if (!vsir_instruction_is_dcl(instruction) && version->type == VKD3D_SHADER_TYPE_HULL - && ctx->phase == VKD3DSIH_INVALID) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_HANDLER, - "Instruction %#x appear before any phase instruction in a hull shader.", - instruction->opcode); +static void vsir_validate_dcl_output_control_point_count(struct validation_context *ctx, + const struct vkd3d_shader_instruction *instruction) +{ + if (!instruction->declaration.count || instruction->declaration.count > 32) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION, + "Output control point count %u is invalid.", + instruction->declaration.count); +} - /* We support two different control flow types in shaders: - * block-based, like DXIL and SPIR-V, and structured, like D3DBC - * and TPF. The shader is detected as block-based when its first - * instruction, except for DCL_* and phases, is a LABEL. Currently - * we mandate that each shader is either purely block-based or - * purely structured. In principle we could allow structured - * constructs in a block, provided they are confined in a single - * block, but need for that hasn't arisen yet, so we don't. */ - if (ctx->cf_type == CF_TYPE_UNKNOWN && !vsir_instruction_is_dcl(instruction)) - { - if (instruction->opcode == VKD3DSIH_LABEL) - ctx->cf_type = CF_TYPE_BLOCKS; - else - ctx->cf_type = CF_TYPE_STRUCTURED; - } +static void vsir_validate_dcl_output_topology(struct validation_context *ctx, + const struct vkd3d_shader_instruction *instruction) +{ + if (instruction->declaration.primitive_type.type == VKD3D_PT_UNDEFINED + || instruction->declaration.primitive_type.type >= VKD3D_PT_COUNT) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_GS, "GS output primitive %u is invalid.", + instruction->declaration.primitive_type.type); +} + +static void vsir_validate_dcl_temps(struct validation_context *ctx, + const struct vkd3d_shader_instruction *instruction) +{ + if (ctx->dcl_temps_found) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_DUPLICATE_DCL_TEMPS, + "Duplicate DCL_TEMPS instruction."); + if (instruction->declaration.count > ctx->program->temp_count) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DCL_TEMPS, + "Invalid DCL_TEMPS count %u, expected at most %u.", + instruction->declaration.count, ctx->program->temp_count); + ctx->dcl_temps_found = true; +} + +static void vsir_validate_dcl_tessellator_domain(struct validation_context *ctx, + const struct vkd3d_shader_instruction *instruction) +{ + if (instruction->declaration.tessellator_domain == VKD3D_TESSELLATOR_DOMAIN_INVALID + || instruction->declaration.tessellator_domain >= VKD3D_TESSELLATOR_DOMAIN_COUNT) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION, + "Tessellator domain %#x is invalid.", instruction->declaration.tessellator_domain); +} + +static void vsir_validate_dcl_tessellator_output_primitive(struct validation_context *ctx, + const struct vkd3d_shader_instruction *instruction) +{ + if (!instruction->declaration.tessellator_output_primitive + || instruction->declaration.tessellator_output_primitive + > VKD3D_SHADER_TESSELLATOR_OUTPUT_TRIANGLE_CCW) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION, + "Tessellator output primitive %#x is invalid.", + instruction->declaration.tessellator_output_primitive); +} + +static void vsir_validate_dcl_tessellator_partitioning(struct validation_context *ctx, + const struct vkd3d_shader_instruction *instruction) +{ + if (!instruction->declaration.tessellator_partitioning + || instruction->declaration.tessellator_partitioning + > VKD3D_SHADER_TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION, + "Tessellator partitioning %#x is invalid.", + instruction->declaration.tessellator_partitioning); +} - if (ctx->cf_type == CF_TYPE_BLOCKS && !vsir_instruction_is_dcl(instruction)) +static void vsir_validate_dcl_vertices_out(struct validation_context *ctx, + const struct vkd3d_shader_instruction *instruction) +{ + if (instruction->declaration.count > 1024) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_GS, "GS output vertex count %u is invalid.", + instruction->declaration.count); +} + +static void vsir_validate_else(struct validation_context *ctx, const struct vkd3d_shader_instruction *instruction) +{ + vsir_validate_cf_type(ctx, instruction, VSIR_CF_STRUCTURED); + if (ctx->depth == 0 || ctx->blocks[ctx->depth - 1] != VKD3DSIH_IF) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, + "ELSE instruction doesn't terminate IF block."); + else + ctx->blocks[ctx->depth - 1] = VKD3DSIH_ELSE; +} + +static void vsir_validate_endif(struct validation_context *ctx, const struct vkd3d_shader_instruction *instruction) +{ + vsir_validate_cf_type(ctx, instruction, VSIR_CF_STRUCTURED); + if (ctx->depth == 0 || (ctx->blocks[ctx->depth - 1] != VKD3DSIH_IF + && ctx->blocks[ctx->depth - 1] != VKD3DSIH_ELSE)) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, + "ENDIF instruction doesn't terminate IF/ELSE block."); + else + --ctx->depth; +} + +static void vsir_validate_endloop(struct validation_context *ctx, const struct vkd3d_shader_instruction *instruction) +{ + vsir_validate_cf_type(ctx, instruction, VSIR_CF_STRUCTURED); + if (ctx->depth == 0 || ctx->blocks[ctx->depth - 1] != VKD3DSIH_LOOP) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, + "ENDLOOP instruction doesn't terminate LOOP block."); + else + --ctx->depth; +} + +static void vsir_validate_endrep(struct validation_context *ctx, const struct vkd3d_shader_instruction *instruction) +{ + vsir_validate_cf_type(ctx, instruction, VSIR_CF_STRUCTURED); + if (ctx->depth == 0 || ctx->blocks[ctx->depth - 1] != VKD3DSIH_REP) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, + "ENDREP instruction doesn't terminate REP block."); + else + --ctx->depth; +} + +static void vsir_validate_endswitch(struct validation_context *ctx, const struct vkd3d_shader_instruction *instruction) +{ + vsir_validate_cf_type(ctx, instruction, VSIR_CF_STRUCTURED); + if (ctx->depth == 0 || ctx->blocks[ctx->depth - 1] != VKD3DSIH_SWITCH) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, + "ENDSWITCH instruction doesn't terminate SWITCH block."); + else + --ctx->depth; +} + +static void vsir_validate_if(struct validation_context *ctx, const struct vkd3d_shader_instruction *instruction) +{ + vsir_validate_cf_type(ctx, instruction, VSIR_CF_STRUCTURED); + vsir_validator_push_block(ctx, VKD3DSIH_IF); +} + +static void vsir_validate_ifc(struct validation_context *ctx, const struct vkd3d_shader_instruction *instruction) +{ + vsir_validate_cf_type(ctx, instruction, VSIR_CF_STRUCTURED); + vsir_validator_push_block(ctx, VKD3DSIH_IF); +} + +static void vsir_validate_label(struct validation_context *ctx, const struct vkd3d_shader_instruction *instruction) +{ + vsir_validate_cf_type(ctx, instruction, VSIR_CF_BLOCKS); + if (instruction->src_count >= 1 && !vsir_register_is_label(&instruction->src[0].reg)) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, + "Invalid register of type %#x in a LABEL instruction, expected LABEL.", + instruction->src[0].reg.type); + + if (ctx->inside_block) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, + "Invalid LABEL instruction inside a block."); + ctx->inside_block = true; +} + +static void vsir_validate_loop(struct validation_context *ctx, const struct vkd3d_shader_instruction *instruction) +{ + vsir_validate_cf_type(ctx, instruction, VSIR_CF_STRUCTURED); + vsir_validate_src_count(ctx, instruction, ctx->program->shader_version.major <= 3 ? 2 : 0); + vsir_validator_push_block(ctx, VKD3DSIH_LOOP); +} + +static void vsir_validate_nop(struct validation_context *ctx, const struct vkd3d_shader_instruction *instruction) +{ +} + +static void vsir_validate_phi(struct validation_context *ctx, const struct vkd3d_shader_instruction *instruction) +{ + unsigned int i, incoming_count; + + vsir_validate_cf_type(ctx, instruction, VSIR_CF_BLOCKS); + + vsir_validate_src_min_count(ctx, instruction, 2); + + if (instruction->src_count % 2 != 0) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SOURCE_COUNT, + "Invalid source count %u for a PHI instruction, it must be an even number.", + instruction->src_count); + incoming_count = instruction->src_count / 2; + + for (i = 0; i < incoming_count; ++i) { - switch (instruction->opcode) - { - case VKD3DSIH_LABEL: - if (ctx->inside_block) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, "Invalid LABEL instruction inside a block."); - ctx->inside_block = true; - break; + unsigned int value_idx = 2 * i; + unsigned int label_idx = 2 * i + 1; - case VKD3DSIH_RET: - case VKD3DSIH_BRANCH: - case VKD3DSIH_SWITCH_MONOLITHIC: - if (!ctx->inside_block) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, - "Invalid instruction %#x outside any block.", - instruction->opcode); - ctx->inside_block = false; - break; + if (!register_is_constant_or_undef(&instruction->src[value_idx].reg) + && !register_is_ssa(&instruction->src[value_idx].reg)) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, + "Invalid value register for incoming %u of type %#x in PHI instruction, " + "expected SSA, IMMCONST or IMMCONST64.", i, instruction->src[value_idx].reg.type); - default: - if (!ctx->inside_block) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, - "Invalid instruction %#x outside any block.", - instruction->opcode); - break; - } + if (instruction->src[value_idx].reg.dimension != VSIR_DIMENSION_SCALAR) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION, + "Invalid value dimension %#x for incoming %u in PHI instruction, expected scalar.", + instruction->src[value_idx].reg.dimension, i); + + if (!vsir_register_is_label(&instruction->src[label_idx].reg)) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, + "Invalid label register for case %u of type %#x in PHI instruction, " + "expected LABEL.", i, instruction->src[value_idx].reg.type); } - switch (instruction->opcode) - { - case VKD3DSIH_DCL_TEMPS: - vsir_validate_dst_count(ctx, instruction, 0); - vsir_validate_src_count(ctx, instruction, 0); - if (ctx->dcl_temps_found) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_DUPLICATE_DCL_TEMPS, "Duplicate DCL_TEMPS instruction."); - if (instruction->declaration.count > ctx->program->temp_count) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DCL_TEMPS, - "Invalid DCL_TEMPS count %u, expected at most %u.", - instruction->declaration.count, ctx->program->temp_count); - ctx->dcl_temps_found = true; - break; + if (instruction->dst_count < 1) + return; - case VKD3DSIH_IF: - vsir_validate_cf_type(ctx, instruction, CF_TYPE_STRUCTURED); - vsir_validate_dst_count(ctx, instruction, 0); - vsir_validate_src_count(ctx, instruction, 1); - if (!vkd3d_array_reserve((void **)&ctx->blocks, &ctx->blocks_capacity, ctx->depth + 1, sizeof(*ctx->blocks))) - return; - ctx->blocks[ctx->depth++] = instruction->opcode; - break; + if (!register_is_ssa(&instruction->dst[0].reg)) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, + "Invalid destination of type %#x in PHI instruction, expected SSA.", + instruction->dst[0].reg.type); - case VKD3DSIH_IFC: - vsir_validate_cf_type(ctx, instruction, CF_TYPE_STRUCTURED); - vsir_validate_dst_count(ctx, instruction, 0); - vsir_validate_src_count(ctx, instruction, 2); - if (!vkd3d_array_reserve((void **)&ctx->blocks, &ctx->blocks_capacity, ctx->depth + 1, sizeof(*ctx->blocks))) - return; - ctx->blocks[ctx->depth++] = VKD3DSIH_IF; - break; + if (instruction->dst[0].reg.dimension != VSIR_DIMENSION_SCALAR) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION, + "Invalid destination dimension %#x in PHI instruction, expected scalar.", + instruction->dst[0].reg.dimension); - case VKD3DSIH_ELSE: - vsir_validate_cf_type(ctx, instruction, CF_TYPE_STRUCTURED); - vsir_validate_dst_count(ctx, instruction, 0); - vsir_validate_src_count(ctx, instruction, 0); - if (ctx->depth == 0 || ctx->blocks[ctx->depth - 1] != VKD3DSIH_IF) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, "ELSE instruction doesn't terminate IF block."); - else - ctx->blocks[ctx->depth - 1] = instruction->opcode; - break; + if (instruction->dst[0].modifiers != VKD3DSPDM_NONE) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_MODIFIERS, + "Invalid modifiers %#x for the destination of a PHI instruction, expected none.", + instruction->dst[0].modifiers); - case VKD3DSIH_ENDIF: - vsir_validate_cf_type(ctx, instruction, CF_TYPE_STRUCTURED); - vsir_validate_dst_count(ctx, instruction, 0); - vsir_validate_src_count(ctx, instruction, 0); - if (ctx->depth == 0 || (ctx->blocks[ctx->depth - 1] != VKD3DSIH_IF && ctx->blocks[ctx->depth - 1] != VKD3DSIH_ELSE)) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, "ENDIF instruction doesn't terminate IF/ELSE block."); - else - --ctx->depth; - break; + if (instruction->dst[0].shift != 0) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SHIFT, + "Invalid shift %#x for the destination of a PHI instruction, expected none.", + instruction->dst[0].shift); +} - case VKD3DSIH_LOOP: - vsir_validate_cf_type(ctx, instruction, CF_TYPE_STRUCTURED); - vsir_validate_dst_count(ctx, instruction, 0); - vsir_validate_src_count(ctx, instruction, version->major <= 3 ? 2 : 0); - if (!vkd3d_array_reserve((void **)&ctx->blocks, &ctx->blocks_capacity, ctx->depth + 1, sizeof(*ctx->blocks))) - return; - ctx->blocks[ctx->depth++] = instruction->opcode; - break; +static void vsir_validate_rep(struct validation_context *ctx, const struct vkd3d_shader_instruction *instruction) +{ + vsir_validate_cf_type(ctx, instruction, VSIR_CF_STRUCTURED); + vsir_validator_push_block(ctx, VKD3DSIH_REP); +} - case VKD3DSIH_ENDLOOP: - vsir_validate_cf_type(ctx, instruction, CF_TYPE_STRUCTURED); - vsir_validate_dst_count(ctx, instruction, 0); - vsir_validate_src_count(ctx, instruction, 0); - if (ctx->depth == 0 || ctx->blocks[ctx->depth - 1] != VKD3DSIH_LOOP) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, "ENDLOOP instruction doesn't terminate LOOP block."); - else - --ctx->depth; - break; +static void vsir_validate_ret(struct validation_context *ctx, const struct vkd3d_shader_instruction *instruction) +{ + ctx->inside_block = false; +} - case VKD3DSIH_REP: - vsir_validate_cf_type(ctx, instruction, CF_TYPE_STRUCTURED); - vsir_validate_dst_count(ctx, instruction, 0); - vsir_validate_src_count(ctx, instruction, 1); - if (!vkd3d_array_reserve((void **)&ctx->blocks, &ctx->blocks_capacity, ctx->depth + 1, sizeof(*ctx->blocks))) - return; - ctx->blocks[ctx->depth++] = instruction->opcode; - break; +static void vsir_validate_switch(struct validation_context *ctx, const struct vkd3d_shader_instruction *instruction) +{ + vsir_validate_cf_type(ctx, instruction, VSIR_CF_STRUCTURED); + vsir_validator_push_block(ctx, VKD3DSIH_SWITCH); +} - case VKD3DSIH_ENDREP: - vsir_validate_cf_type(ctx, instruction, CF_TYPE_STRUCTURED); - vsir_validate_dst_count(ctx, instruction, 0); - vsir_validate_src_count(ctx, instruction, 0); - if (ctx->depth == 0 || ctx->blocks[ctx->depth - 1] != VKD3DSIH_REP) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, "ENDREP instruction doesn't terminate REP block."); - else - --ctx->depth; - break; +static void vsir_validate_switch_monolithic(struct validation_context *ctx, + const struct vkd3d_shader_instruction *instruction) +{ + unsigned int i, case_count; - case VKD3DSIH_SWITCH: - vsir_validate_cf_type(ctx, instruction, CF_TYPE_STRUCTURED); - vsir_validate_dst_count(ctx, instruction, 0); - vsir_validate_src_count(ctx, instruction, 1); - if (!vkd3d_array_reserve((void **)&ctx->blocks, &ctx->blocks_capacity, ctx->depth + 1, sizeof(*ctx->blocks))) - return; - ctx->blocks[ctx->depth++] = instruction->opcode; - break; + vsir_validate_cf_type(ctx, instruction, VSIR_CF_BLOCKS); - case VKD3DSIH_ENDSWITCH: - vsir_validate_cf_type(ctx, instruction, CF_TYPE_STRUCTURED); - vsir_validate_dst_count(ctx, instruction, 0); - vsir_validate_src_count(ctx, instruction, 0); - if (ctx->depth == 0 || ctx->blocks[ctx->depth - 1] != VKD3DSIH_SWITCH) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, "ENDSWITCH instruction doesn't terminate SWITCH block."); - else - --ctx->depth; - break; + /* Parameters are source, default label, merge label and + * then pairs of constant value and case label. */ - case VKD3DSIH_RET: - vsir_validate_dst_count(ctx, instruction, 0); - vsir_validate_src_count(ctx, instruction, 0); - break; + if (!vsir_validate_src_min_count(ctx, instruction, 3)) + return; - case VKD3DSIH_LABEL: - vsir_validate_cf_type(ctx, instruction, CF_TYPE_BLOCKS); - vsir_validate_dst_count(ctx, instruction, 0); - vsir_validate_src_count(ctx, instruction, 1); - if (instruction->src_count >= 1 && !vsir_register_is_label(&instruction->src[0].reg)) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, - "Invalid register of type %#x in a LABEL instruction, expected LABEL.", - instruction->src[0].reg.type); - break; + if (instruction->src_count % 2 != 1) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SOURCE_COUNT, + "Invalid source count %u for a monolithic SWITCH instruction, it must be an odd number.", + instruction->src_count); - case VKD3DSIH_BRANCH: - vsir_validate_cf_type(ctx, instruction, CF_TYPE_BLOCKS); - vsir_validate_dst_count(ctx, instruction, 0); - if (!vsir_validate_src_min_count(ctx, instruction, 1)) - break; - if (vsir_register_is_label(&instruction->src[0].reg)) - { - /* Unconditional branch: parameters are jump label, - * optional merge label, optional continue label. */ - vsir_validate_src_max_count(ctx, instruction, 3); + if (!vsir_register_is_label(&instruction->src[1].reg)) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, + "Invalid default label register of type %#x in monolithic SWITCH instruction, expected LABEL.", + instruction->src[1].reg.type); - for (i = 0; i < instruction->src_count; ++i) - { - if (!vsir_register_is_label(&instruction->src[i].reg)) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, - "Invalid register of type %#x in unconditional BRANCH instruction, expected LABEL.", - instruction->src[i].reg.type); - } - } - else - { - /* Conditional branch: parameters are condition, true - * jump label, false jump label, optional merge label, - * optional continue label. */ - vsir_validate_src_min_count(ctx, instruction, 3); - vsir_validate_src_max_count(ctx, instruction, 5); + if (!vsir_register_is_label(&instruction->src[2].reg)) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, + "Invalid merge label register of type %#x in monolithic SWITCH instruction, expected LABEL.", + instruction->src[2].reg.type); - for (i = 1; i < instruction->src_count; ++i) - { - if (!vsir_register_is_label(&instruction->src[i].reg)) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, - "Invalid register of type %#x in conditional BRANCH instruction, expected LABEL.", - instruction->src[i].reg.type); - } - } - break; + case_count = (instruction->src_count - 3) / 2; - case VKD3DSIH_SWITCH_MONOLITHIC: - { - unsigned int case_count; + for (i = 0; i < case_count; ++i) + { + unsigned int value_idx = 3 + 2 * i; + unsigned int label_idx = 3 + 2 * i + 1; - vsir_validate_cf_type(ctx, instruction, CF_TYPE_BLOCKS); - vsir_validate_dst_count(ctx, instruction, 0); - /* Parameters are source, default label, merge label and - * then pairs of constant value and case label. */ - if (!vsir_validate_src_min_count(ctx, instruction, 3)) - break; - if (instruction->src_count % 2 != 1) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SOURCE_COUNT, - "Invalid source count %u for a monolithic SWITCH instruction, it must be an odd number.", - instruction->src_count); + if (!register_is_constant(&instruction->src[value_idx].reg)) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, + "Invalid value register for case %u of type %#x in monolithic SWITCH instruction, " + "expected IMMCONST or IMMCONST64.", i, instruction->src[value_idx].reg.type); - if (!vsir_register_is_label(&instruction->src[1].reg)) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, - "Invalid default label register of type %#x in monolithic SWITCH instruction, expected LABEL.", - instruction->src[1].reg.type); + if (!vsir_register_is_label(&instruction->src[label_idx].reg)) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, + "Invalid label register for case %u of type %#x in monolithic SWITCH instruction, " + "expected LABEL.", i, instruction->src[value_idx].reg.type); + } - if (!vsir_register_is_label(&instruction->src[2].reg)) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, - "Invalid merge label register of type %#x in monolithic SWITCH instruction, expected LABEL.", - instruction->src[2].reg.type); + ctx->inside_block = false; +} - case_count = (instruction->src_count - 3) / 2; +struct vsir_validator_instruction_desc +{ + unsigned int dst_param_count; + unsigned int src_param_count; + void (*validate)(struct validation_context *ctx, const struct vkd3d_shader_instruction *instruction); +}; - for (i = 0; i < case_count; ++i) - { - unsigned int value_idx = 3 + 2 * i; - unsigned int label_idx = 3 + 2 * i + 1; - - if (!register_is_constant(&instruction->src[value_idx].reg)) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, - "Invalid value register for case %zu of type %#x in monolithic SWITCH instruction, " - "expected IMMCONST or IMMCONST64.", i, instruction->src[value_idx].reg.type); - - if (!vsir_register_is_label(&instruction->src[label_idx].reg)) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, - "Invalid label register for case %zu of type %#x in monolithic SWITCH instruction, " - "expected LABEL.", i, instruction->src[value_idx].reg.type); - } - break; +static const struct vsir_validator_instruction_desc vsir_validator_instructions[] = +{ + [VKD3DSIH_BRANCH] = {0, ~0u, vsir_validate_branch}, + [VKD3DSIH_HS_CONTROL_POINT_PHASE] = {0, 0, vsir_validate_hull_shader_phase}, + [VKD3DSIH_HS_DECLS] = {0, 0, vsir_validate_hull_shader_phase}, + [VKD3DSIH_HS_FORK_PHASE] = {0, 0, vsir_validate_hull_shader_phase}, + [VKD3DSIH_HS_JOIN_PHASE] = {0, 0, vsir_validate_hull_shader_phase}, + [VKD3DSIH_DCL_GS_INSTANCES] = {0, 0, vsir_validate_dcl_gs_instances}, + [VKD3DSIH_DCL_HS_MAX_TESSFACTOR] = {0, 0, vsir_validate_dcl_hs_max_tessfactor}, + [VKD3DSIH_DCL_INPUT_PRIMITIVE] = {0, 0, vsir_validate_dcl_input_primitive}, + [VKD3DSIH_DCL_OUTPUT_CONTROL_POINT_COUNT] = {0, 0, vsir_validate_dcl_output_control_point_count}, + [VKD3DSIH_DCL_OUTPUT_TOPOLOGY] = {0, 0, vsir_validate_dcl_output_topology}, + [VKD3DSIH_DCL_TEMPS] = {0, 0, vsir_validate_dcl_temps}, + [VKD3DSIH_DCL_TESSELLATOR_DOMAIN] = {0, 0, vsir_validate_dcl_tessellator_domain}, + [VKD3DSIH_DCL_TESSELLATOR_OUTPUT_PRIMITIVE] = {0, 0, vsir_validate_dcl_tessellator_output_primitive}, + [VKD3DSIH_DCL_TESSELLATOR_PARTITIONING] = {0, 0, vsir_validate_dcl_tessellator_partitioning}, + [VKD3DSIH_DCL_VERTICES_OUT] = {0, 0, vsir_validate_dcl_vertices_out}, + [VKD3DSIH_ELSE] = {0, 0, vsir_validate_else}, + [VKD3DSIH_ENDIF] = {0, 0, vsir_validate_endif}, + [VKD3DSIH_ENDLOOP] = {0, 0, vsir_validate_endloop}, + [VKD3DSIH_ENDREP] = {0, 0, vsir_validate_endrep}, + [VKD3DSIH_ENDSWITCH] = {0, 0, vsir_validate_endswitch}, + [VKD3DSIH_IF] = {0, 1, vsir_validate_if}, + [VKD3DSIH_IFC] = {0, 2, vsir_validate_ifc}, + [VKD3DSIH_LABEL] = {0, 1, vsir_validate_label}, + [VKD3DSIH_LOOP] = {0, ~0u, vsir_validate_loop}, + [VKD3DSIH_NOP] = {0, 0, vsir_validate_nop}, + [VKD3DSIH_PHI] = {1, ~0u, vsir_validate_phi}, + [VKD3DSIH_REP] = {0, 1, vsir_validate_rep}, + [VKD3DSIH_RET] = {0, 0, vsir_validate_ret}, + [VKD3DSIH_SWITCH] = {0, 1, vsir_validate_switch}, + [VKD3DSIH_SWITCH_MONOLITHIC] = {0, ~0u, vsir_validate_switch_monolithic}, +}; + +static void vsir_validate_instruction(struct validation_context *ctx) +{ + const struct vkd3d_shader_version *version = &ctx->program->shader_version; + const struct vkd3d_shader_instruction *instruction; + size_t i; + + instruction = &ctx->program->instructions.elements[ctx->instruction_idx]; + + for (i = 0; i < instruction->dst_count; ++i) + vsir_validate_dst_param(ctx, &instruction->dst[i]); + + for (i = 0; i < instruction->src_count; ++i) + vsir_validate_src_param(ctx, &instruction->src[i]); + + if (instruction->opcode >= VKD3DSIH_INVALID) + { + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_HANDLER, "Invalid instruction handler %#x.", + instruction->opcode); + } + + if (version->type == VKD3D_SHADER_TYPE_HULL && ctx->phase == VKD3DSIH_INVALID) + { + switch (instruction->opcode) + { + case VKD3DSIH_NOP: + case VKD3DSIH_HS_DECLS: + case VKD3DSIH_HS_CONTROL_POINT_PHASE: + case VKD3DSIH_HS_FORK_PHASE: + case VKD3DSIH_HS_JOIN_PHASE: + break; + + default: + if (!vsir_instruction_is_dcl(instruction)) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_HANDLER, + "Instruction %#x appear before any phase instruction in a hull shader.", + instruction->opcode); + break; } + } - case VKD3DSIH_PHI: + if (ctx->program->cf_type == VSIR_CF_BLOCKS && !ctx->inside_block) + { + switch (instruction->opcode) { - unsigned int incoming_count; - - vsir_validate_cf_type(ctx, instruction, CF_TYPE_BLOCKS); - vsir_validate_dst_count(ctx, instruction, 1); - vsir_validate_src_min_count(ctx, instruction, 2); - if (instruction->src_count % 2 != 0) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SOURCE_COUNT, - "Invalid source count %u for a PHI instruction, it must be an even number.", - instruction->src_count); - incoming_count = instruction->src_count / 2; - - if (!register_is_ssa(&instruction->dst[0].reg)) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, - "Invalid destination of type %#x in PHI instruction, expected SSA.", - instruction->dst[0].reg.type); + case VKD3DSIH_NOP: + case VKD3DSIH_LABEL: + case VKD3DSIH_HS_DECLS: + case VKD3DSIH_HS_CONTROL_POINT_PHASE: + case VKD3DSIH_HS_FORK_PHASE: + case VKD3DSIH_HS_JOIN_PHASE: + break; - if (instruction->dst[0].reg.dimension != VSIR_DIMENSION_SCALAR) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION, - "Invalid destination dimension %#x in PHI instruction, expected scalar.", - instruction->dst[0].reg.dimension); + default: + if (!vsir_instruction_is_dcl(instruction)) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, + "Invalid instruction %#x outside any block.", + instruction->opcode); + break; + } + } - if (instruction->dst[0].modifiers != VKD3DSPDM_NONE) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_MODIFIERS, - "Invalid modifiers %#x for the destination of a PHI instruction, expected none.", - instruction->dst[0].modifiers); + if (instruction->opcode < ARRAY_SIZE(vsir_validator_instructions)) + { + const struct vsir_validator_instruction_desc *desc; - if (instruction->dst[0].shift != 0) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SHIFT, - "Invalid shift %#x for the destination of a PHI instruction, expected none.", - instruction->dst[0].shift); + desc = &vsir_validator_instructions[instruction->opcode]; - for (i = 0; i < incoming_count; ++i) - { - unsigned int value_idx = 2 * i; - unsigned int label_idx = 2 * i + 1; - - if (!register_is_constant_or_undef(&instruction->src[value_idx].reg) - && !register_is_ssa(&instruction->src[value_idx].reg)) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, - "Invalid value register for incoming %zu of type %#x in PHI instruction, " - "expected SSA, IMMCONST or IMMCONST64.", i, instruction->src[value_idx].reg.type); - - if (instruction->src[value_idx].reg.dimension != VSIR_DIMENSION_SCALAR) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION, - "Invalid value dimension %#x for incoming %zu in PHI instruction, expected scalar.", - instruction->src[value_idx].reg.dimension, i); - - if (!vsir_register_is_label(&instruction->src[label_idx].reg)) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, - "Invalid label register for case %zu of type %#x in PHI instruction, " - "expected LABEL.", i, instruction->src[value_idx].reg.type); - } - break; + if (desc->validate) + { + if (desc->dst_param_count != ~0u) + vsir_validate_dst_count(ctx, instruction, desc->dst_param_count); + if (desc->src_param_count != ~0u) + vsir_validate_src_count(ctx, instruction, desc->src_param_count); + desc->validate(ctx, instruction); } - - default: - break; } } @@ -6575,7 +6673,8 @@ enum vkd3d_result vsir_program_validate(struct vsir_program *program, uint64_t c if (!(ctx.ssas = vkd3d_calloc(ctx.program->ssa_count, sizeof(*ctx.ssas)))) goto fail; - for (ctx.instruction_idx = 0; ctx.instruction_idx < program->instructions.count; ++ctx.instruction_idx) + for (ctx.instruction_idx = 0; ctx.instruction_idx < program->instructions.count + && ctx.status != VKD3D_ERROR_OUT_OF_MEMORY; ++ctx.instruction_idx) vsir_validate_instruction(&ctx); ctx.invalid_instruction_idx = true; @@ -6610,74 +6709,74 @@ fail: return VKD3D_ERROR_OUT_OF_MEMORY; } -enum vkd3d_result vsir_program_normalise(struct vsir_program *program, uint64_t config_flags, - const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context) +#define vsir_transform(ctx, step) vsir_transform_(ctx, #step, step) +static void vsir_transform_( + struct vsir_transformation_context *ctx, const char *step_name, + enum vkd3d_result (*step)(struct vsir_program *program, struct vsir_transformation_context *ctx)) { - enum vkd3d_result result = VKD3D_OK; - - if ((result = vsir_program_lower_instructions(program, message_context)) < 0) - return result; + if (ctx->result < 0) + return; - if (program->shader_version.major >= 6) + if ((ctx->result = step(ctx->program, ctx)) < 0) { - if ((result = vsir_program_materialise_phi_ssas_to_temps(program)) < 0) - return result; + WARN("Transformation \"%s\" failed with result %d.\n", step_name, ctx->result); + return; + } - if ((result = lower_switch_to_if_ladder(program)) < 0) - return result; + if ((ctx->result = vsir_program_validate(ctx->program, ctx->config_flags, + ctx->compile_info->source_name, ctx->message_context)) < 0) + { + WARN("Validation failed with result %d after transformation \"%s\".\n", ctx->result, step_name); + return; + } +} - if ((result = vsir_program_structurize(program, message_context)) < 0) - return result; +enum vkd3d_result vsir_program_transform(struct vsir_program *program, uint64_t config_flags, + const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context) +{ + struct vsir_transformation_context ctx = + { + .result = VKD3D_OK, + .program = program, + .config_flags = config_flags, + .compile_info = compile_info, + .message_context = message_context, + }; - if ((result = vsir_program_flatten_control_flow_constructs(program, message_context)) < 0) - return result; + vsir_transform(&ctx, vsir_program_lower_instructions); - if ((result = vsir_program_materialize_undominated_ssas_to_temps(program, message_context)) < 0) - return result; + if (program->shader_version.major >= 6) + { + vsir_transform(&ctx, vsir_program_materialise_phi_ssas_to_temps); + vsir_transform(&ctx, vsir_program_lower_switch_to_selection_ladder); + vsir_transform(&ctx, vsir_program_structurize); + vsir_transform(&ctx, vsir_program_flatten_control_flow_constructs); + vsir_transform(&ctx, vsir_program_materialize_undominated_ssas_to_temps); } else { if (program->shader_version.type != VKD3D_SHADER_TYPE_PIXEL) - { - if ((result = vsir_program_remap_output_signature(program, compile_info, message_context)) < 0) - return result; - } + vsir_transform(&ctx, vsir_program_remap_output_signature); if (program->shader_version.type == VKD3D_SHADER_TYPE_HULL) { - if ((result = instruction_array_flatten_hull_shader_phases(&program->instructions)) < 0) - return result; - - if ((result = instruction_array_normalise_hull_shader_control_point_io(&program->instructions, - &program->input_signature)) < 0) - return result; + vsir_transform(&ctx, vsir_program_flatten_hull_shader_phases); + vsir_transform(&ctx, instruction_array_normalise_hull_shader_control_point_io); } - if ((result = vsir_program_normalise_io_registers(program, message_context)) < 0) - return result; - - if ((result = instruction_array_normalise_flat_constants(program)) < 0) - return result; - - remove_dead_code(program); - - if ((result = vsir_program_normalise_combined_samplers(program, message_context)) < 0) - return result; + vsir_transform(&ctx, vsir_program_normalise_io_registers); + vsir_transform(&ctx, vsir_program_normalise_flat_constants); + vsir_transform(&ctx, vsir_program_remove_dead_code); if (compile_info->target_type != VKD3D_SHADER_TARGET_GLSL - && (result = vsir_program_flatten_control_flow_constructs(program, message_context)) < 0) - return result; + && compile_info->target_type != VKD3D_SHADER_TARGET_MSL) + vsir_transform(&ctx, vsir_program_flatten_control_flow_constructs); } - if ((result = vsir_program_insert_alpha_test(program, message_context)) < 0) - return result; + vsir_transform(&ctx, vsir_program_insert_alpha_test); if (TRACE_ON()) vkd3d_shader_trace(program); - if ((result = vsir_program_validate(program, config_flags, - compile_info->source_name, message_context)) < 0) - return result; - - return result; + return ctx.result; } diff --git a/libs/vkd3d/libs/vkd3d-shader/msl.c b/libs/vkd3d/libs/vkd3d-shader/msl.c new file mode 100644 index 00000000000..7d2e713cddc --- /dev/null +++ b/libs/vkd3d/libs/vkd3d-shader/msl.c @@ -0,0 +1,275 @@ +/* + * Copyright 2024 Feifan He for CodeWeavers + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#include "vkd3d_shader_private.h" + +struct msl_src +{ + struct vkd3d_string_buffer *str; +}; + +struct msl_dst +{ + const struct vkd3d_shader_dst_param *vsir; + struct vkd3d_string_buffer *register_name; + struct vkd3d_string_buffer *mask; +}; + +struct msl_generator +{ + struct vsir_program *program; + struct vkd3d_string_buffer_cache string_buffers; + struct vkd3d_string_buffer *buffer; + struct vkd3d_shader_location location; + struct vkd3d_shader_message_context *message_context; + unsigned int indent; +}; + +static void VKD3D_PRINTF_FUNC(3, 4) msl_compiler_error(struct msl_generator *gen, + enum vkd3d_shader_error error, const char *fmt, ...) +{ + va_list args; + + va_start(args, fmt); + vkd3d_shader_verror(gen->message_context, &gen->location, error, fmt, args); + va_end(args); +} + +static void msl_print_indent(struct vkd3d_string_buffer *buffer, unsigned int indent) +{ + vkd3d_string_buffer_printf(buffer, "%*s", 4 * indent, ""); +} + +static void msl_print_register_name(struct vkd3d_string_buffer *buffer, + struct msl_generator *gen, const struct vkd3d_shader_register *reg) +{ + msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, + "Internal compiler error: Unhandled register type %#x.", reg->type); + vkd3d_string_buffer_printf(buffer, "", reg->type); +} + +static void msl_print_swizzle(struct vkd3d_string_buffer *buffer, uint32_t swizzle, uint32_t mask) +{ + const char swizzle_chars[] = "xyzw"; + unsigned int i; + + vkd3d_string_buffer_printf(buffer, "."); + for (i = 0; i < VKD3D_VEC4_SIZE; ++i) + { + if (mask & (VKD3DSP_WRITEMASK_0 << i)) + vkd3d_string_buffer_printf(buffer, "%c", swizzle_chars[vsir_swizzle_get_component(swizzle, i)]); + } +} + +static void msl_print_write_mask(struct vkd3d_string_buffer *buffer, uint32_t write_mask) +{ + vkd3d_string_buffer_printf(buffer, "."); + if (write_mask & VKD3DSP_WRITEMASK_0) + vkd3d_string_buffer_printf(buffer, "x"); + if (write_mask & VKD3DSP_WRITEMASK_1) + vkd3d_string_buffer_printf(buffer, "y"); + if (write_mask & VKD3DSP_WRITEMASK_2) + vkd3d_string_buffer_printf(buffer, "z"); + if (write_mask & VKD3DSP_WRITEMASK_3) + vkd3d_string_buffer_printf(buffer, "w"); +} + +static void msl_src_cleanup(struct msl_src *src, struct vkd3d_string_buffer_cache *cache) +{ + vkd3d_string_buffer_release(cache, src->str); +} + +static void msl_src_init(struct msl_src *msl_src, struct msl_generator *gen, + const struct vkd3d_shader_src_param *vsir_src, uint32_t mask) +{ + const struct vkd3d_shader_register *reg = &vsir_src->reg; + + msl_src->str = vkd3d_string_buffer_get(&gen->string_buffers); + + if (reg->non_uniform) + msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, + "Internal compiler error: Unhandled 'non-uniform' modifier."); + if (vsir_src->modifiers) + msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, + "Internal compiler error: Unhandled source modifier(s) %#x.", vsir_src->modifiers); + + msl_print_register_name(msl_src->str, gen, reg); + if (reg->dimension == VSIR_DIMENSION_VEC4) + msl_print_swizzle(msl_src->str, vsir_src->swizzle, mask); +} + +static void msl_dst_cleanup(struct msl_dst *dst, struct vkd3d_string_buffer_cache *cache) +{ + vkd3d_string_buffer_release(cache, dst->mask); + vkd3d_string_buffer_release(cache, dst->register_name); +} + +static uint32_t msl_dst_init(struct msl_dst *msl_dst, struct msl_generator *gen, + const struct vkd3d_shader_instruction *ins, const struct vkd3d_shader_dst_param *vsir_dst) +{ + uint32_t write_mask = vsir_dst->write_mask; + + if (ins->flags & VKD3DSI_PRECISE_XYZW) + msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, + "Internal compiler error: Unhandled 'precise' modifier."); + if (vsir_dst->reg.non_uniform) + msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, + "Internal compiler error: Unhandled 'non-uniform' modifier."); + + msl_dst->vsir = vsir_dst; + msl_dst->register_name = vkd3d_string_buffer_get(&gen->string_buffers); + msl_dst->mask = vkd3d_string_buffer_get(&gen->string_buffers); + + msl_print_register_name(msl_dst->register_name, gen, &vsir_dst->reg); + msl_print_write_mask(msl_dst->mask, write_mask); + + return write_mask; +} + +static void VKD3D_PRINTF_FUNC(3, 4) msl_print_assignment( + struct msl_generator *gen, struct msl_dst *dst, const char *format, ...) +{ + va_list args; + + if (dst->vsir->shift) + msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, + "Internal compiler error: Unhandled destination shift %#x.", dst->vsir->shift); + if (dst->vsir->modifiers) + msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, + "Internal compiler error: Unhandled destination modifier(s) %#x.", dst->vsir->modifiers); + + msl_print_indent(gen->buffer, gen->indent); + vkd3d_string_buffer_printf(gen->buffer, "%s%s = ", dst->register_name->buffer, dst->mask->buffer); + + va_start(args, format); + vkd3d_string_buffer_vprintf(gen->buffer, format, args); + va_end(args); + + vkd3d_string_buffer_printf(gen->buffer, ";\n"); +} + +static void msl_unhandled(struct msl_generator *gen, const struct vkd3d_shader_instruction *ins) +{ + msl_print_indent(gen->buffer, gen->indent); + vkd3d_string_buffer_printf(gen->buffer, "/* */\n", ins->opcode); + msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, + "Internal compiler error: Unhandled instruction %#x.", ins->opcode); +} + +static void msl_mov(struct msl_generator *gen, const struct vkd3d_shader_instruction *ins) +{ + struct msl_src src; + struct msl_dst dst; + uint32_t mask; + + mask = msl_dst_init(&dst, gen, ins, &ins->dst[0]); + msl_src_init(&src, gen, &ins->src[0], mask); + + msl_print_assignment(gen, &dst, "%s", src.str->buffer); + + msl_src_cleanup(&src, &gen->string_buffers); + msl_dst_cleanup(&dst, &gen->string_buffers); +} + +static void msl_ret(struct msl_generator *gen, const struct vkd3d_shader_instruction *ins) +{ + msl_print_indent(gen->buffer, gen->indent); + vkd3d_string_buffer_printf(gen->buffer, "return;\n"); +} + +static void msl_handle_instruction(struct msl_generator *gen, const struct vkd3d_shader_instruction *ins) +{ + gen->location = ins->location; + + switch (ins->opcode) + { + case VKD3DSIH_NOP: + break; + case VKD3DSIH_MOV: + msl_mov(gen, ins); + break; + case VKD3DSIH_RET: + msl_ret(gen, ins); + break; + default: + msl_unhandled(gen, ins); + break; + } +} + +static void msl_generator_generate(struct msl_generator *gen) +{ + const struct vkd3d_shader_instruction_array *instructions = &gen->program->instructions; + unsigned int i; + + MESSAGE("Generating a MSL shader. This is unsupported; you get to keep all the pieces if it breaks.\n"); + + vkd3d_string_buffer_printf(gen->buffer, "/* Generated by %s. */\n\n", vkd3d_shader_get_version(NULL, NULL)); + + vkd3d_string_buffer_printf(gen->buffer, "void shader_main()\n{\n"); + + ++gen->indent; + for (i = 0; i < instructions->count; ++i) + { + msl_handle_instruction(gen, &instructions->elements[i]); + } + + vkd3d_string_buffer_printf(gen->buffer, "}\n"); + + if (TRACE_ON()) + vkd3d_string_buffer_trace(gen->buffer); +} + +static void msl_generator_cleanup(struct msl_generator *gen) +{ + vkd3d_string_buffer_release(&gen->string_buffers, gen->buffer); + vkd3d_string_buffer_cache_cleanup(&gen->string_buffers); +} + +static int msl_generator_init(struct msl_generator *gen, struct vsir_program *program, + struct vkd3d_shader_message_context *message_context) +{ + memset(gen, 0, sizeof(*gen)); + gen->program = program; + vkd3d_string_buffer_cache_init(&gen->string_buffers); + if (!(gen->buffer = vkd3d_string_buffer_get(&gen->string_buffers))) + { + vkd3d_string_buffer_cache_cleanup(&gen->string_buffers); + return VKD3D_ERROR_OUT_OF_MEMORY; + } + gen->message_context = message_context; + + return VKD3D_OK; +} + +int msl_compile(struct vsir_program *program, uint64_t config_flags, + const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context) +{ + struct msl_generator generator; + int ret; + + if ((ret = vsir_program_transform(program, config_flags, compile_info, message_context)) < 0) + return ret; + + if ((ret = msl_generator_init(&generator, program, message_context)) < 0) + return ret; + msl_generator_generate(&generator); + msl_generator_cleanup(&generator); + + return VKD3D_ERROR_INVALID_SHADER; +} diff --git a/libs/vkd3d/libs/vkd3d-shader/preproc.l b/libs/vkd3d/libs/vkd3d-shader/preproc.l index 2b7455a5c30..7fc963192cf 100644 --- a/libs/vkd3d/libs/vkd3d-shader/preproc.l +++ b/libs/vkd3d/libs/vkd3d-shader/preproc.l @@ -20,6 +20,7 @@ %{ +#include "preproc.h" #include "preproc.tab.h" #undef ERROR /* defined in wingdi.h */ diff --git a/libs/vkd3d/libs/vkd3d-shader/spirv.c b/libs/vkd3d/libs/vkd3d-shader/spirv.c index 49979ab2491..7f1e0fea2c3 100644 --- a/libs/vkd3d/libs/vkd3d-shader/spirv.c +++ b/libs/vkd3d/libs/vkd3d-shader/spirv.c @@ -97,15 +97,37 @@ static enum vkd3d_result vkd3d_spirv_binary_to_text(const struct vkd3d_shader_co if (!(spvret = spvBinaryToText(context, spirv->code, spirv->size / sizeof(uint32_t), get_binary_to_text_options(formatting), &text, &diagnostic))) { - void *code = vkd3d_malloc(text->length); - if (code) + const char *p, *q, *end, *pad, *truncate; + struct vkd3d_string_buffer buffer; + size_t line_len; + + vkd3d_string_buffer_init(&buffer); + + for (p = text->str, end = p + text->length; p < end; p = q) { - memcpy(code, text->str, text->length); - out->size = text->length; - out->code = code; + if (!(q = memchr(p, '\n', end - p))) + q = end; + else + ++q; + + /* FIXME: Note that when colour output is enabled, we count colour + * escape codes towards the line length. It's possible to fix + * that, but not completely trivial. */ + for (pad = "", line_len = 100; q - p > line_len; line_len = 100 - strlen(pad)) + { + if (!(truncate = memchr(p + line_len, ' ', q - p - line_len))) + break; + vkd3d_string_buffer_printf(&buffer, "%s%.*s\n", pad, (int)(truncate - p), p); + p = truncate + 1; + if (formatting & VKD3D_SHADER_COMPILE_OPTION_FORMATTING_INDENT) + pad = " "; + else + pad = " "; + } + vkd3d_string_buffer_printf(&buffer, "%s%.*s", pad, (int)(q - p), p); } - else - result = VKD3D_ERROR_OUT_OF_MEMORY; + + vkd3d_shader_code_from_string_buffer(out, &buffer); } else { @@ -277,6 +299,16 @@ static void vkd3d_spirv_stream_free(struct vkd3d_spirv_stream *stream) vkd3d_spirv_stream_clear(stream); } +static void vkd3d_shader_code_from_spirv_stream(struct vkd3d_shader_code *code, struct vkd3d_spirv_stream *stream) +{ + code->code = stream->words; + code->size = stream->word_count * sizeof(*stream->words); + + stream->words = NULL; + stream->capacity = 0; + stream->word_count = 0; +} + static size_t vkd3d_spirv_stream_current_location(struct vkd3d_spirv_stream *stream) { return stream->word_count; @@ -1996,9 +2028,7 @@ static bool vkd3d_spirv_compile_module(struct vkd3d_spirv_builder *builder, { uint64_t capability_mask = builder->capability_mask; struct vkd3d_spirv_stream stream; - uint32_t *code; unsigned int i; - size_t size; vkd3d_spirv_stream_init(&stream); @@ -2053,26 +2083,20 @@ static bool vkd3d_spirv_compile_module(struct vkd3d_spirv_builder *builder, if (builder->invocation_count) vkd3d_spirv_build_op_execution_mode(&builder->execution_mode_stream, builder->main_function_id, SpvExecutionModeInvocations, &builder->invocation_count, 1); - vkd3d_spirv_stream_append(&stream, &builder->execution_mode_stream); - - vkd3d_spirv_stream_append(&stream, &builder->debug_stream); - vkd3d_spirv_stream_append(&stream, &builder->annotation_stream); - vkd3d_spirv_stream_append(&stream, &builder->global_stream); - vkd3d_spirv_stream_append(&stream, &builder->function_stream); - if (!(code = vkd3d_calloc(stream.word_count, sizeof(*code)))) + if (!vkd3d_spirv_stream_append(&stream, &builder->execution_mode_stream) + || !vkd3d_spirv_stream_append(&stream, &builder->debug_stream) + || !vkd3d_spirv_stream_append(&stream, &builder->annotation_stream) + || !vkd3d_spirv_stream_append(&stream, &builder->global_stream) + || !vkd3d_spirv_stream_append(&stream, &builder->function_stream)) { vkd3d_spirv_stream_free(&stream); return false; } - size = stream.word_count * sizeof(*code); - memcpy(code, stream.words, size); + vkd3d_shader_code_from_spirv_stream(spirv, &stream); vkd3d_spirv_stream_free(&stream); - spirv->code = code; - spirv->size = size; - return true; } @@ -6120,12 +6144,12 @@ static void spirv_compiler_decorate_descriptor(struct spirv_compiler *compiler, static uint32_t spirv_compiler_build_descriptor_variable(struct spirv_compiler *compiler, SpvStorageClass storage_class, uint32_t type_id, const struct vkd3d_shader_register *reg, const struct vkd3d_shader_register_range *range, enum vkd3d_shader_resource_type resource_type, - bool is_uav, bool is_uav_counter, struct vkd3d_descriptor_variable_info *var_info) + const struct vkd3d_shader_descriptor_info1 *descriptor, bool is_uav_counter, + struct vkd3d_descriptor_variable_info *var_info) { struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; struct vkd3d_descriptor_binding_address binding_address; struct vkd3d_shader_descriptor_binding binding; - const struct vkd3d_shader_descriptor_info1 *d; uint32_t array_type_id, ptr_type_id, var_id; bool write_only = false, coherent = false; struct vkd3d_symbol symbol; @@ -6135,12 +6159,11 @@ static uint32_t spirv_compiler_build_descriptor_variable(struct spirv_compiler * resource_type, is_uav_counter, &binding_address); var_info->binding_base_idx = binding_address.binding_base_idx; - if (is_uav) + if (descriptor->type == VKD3D_SHADER_DESCRIPTOR_TYPE_UAV && !is_uav_counter) { - d = spirv_compiler_get_descriptor_info(compiler, VKD3D_SHADER_DESCRIPTOR_TYPE_UAV, range); - write_only = !(d->flags & VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_READ); + write_only = !(descriptor->flags & VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_READ); /* ROVs are implicitly globally coherent. */ - coherent = d->uav_flags & (VKD3DSUF_GLOBALLY_COHERENT | VKD3DSUF_RASTERISER_ORDERED_VIEW); + coherent = descriptor->uav_flags & (VKD3DSUF_GLOBALLY_COHERENT | VKD3DSUF_RASTERISER_ORDERED_VIEW); } if (binding.count == 1 && range->first == binding_address.binding_base_idx && range->last != ~0u @@ -6194,11 +6217,12 @@ static uint32_t spirv_compiler_build_descriptor_variable(struct spirv_compiler * } static void spirv_compiler_emit_cbv_declaration(struct spirv_compiler *compiler, - const struct vkd3d_shader_register_range *range, unsigned int register_id, unsigned int size_in_bytes) + const struct vkd3d_shader_register_range *range, const struct vkd3d_shader_descriptor_info1 *descriptor) { struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; uint32_t vec4_id, array_type_id, length_id, struct_id, var_id; const SpvStorageClass storage_class = SpvStorageClassUniform; + unsigned int size_in_bytes = descriptor->buffer_size; struct vkd3d_push_constant_buffer_binding *push_cb; struct vkd3d_descriptor_variable_info var_info; struct vkd3d_shader_register reg; @@ -6206,7 +6230,7 @@ static void spirv_compiler_emit_cbv_declaration(struct spirv_compiler *compiler, unsigned int size; vsir_register_init(®, VKD3DSPR_CONSTBUFFER, VKD3D_DATA_FLOAT, 3); - reg.idx[0].offset = register_id; + reg.idx[0].offset = descriptor->register_id; reg.idx[1].offset = range->first; reg.idx[2].offset = range->last; @@ -6239,7 +6263,7 @@ static void spirv_compiler_emit_cbv_declaration(struct spirv_compiler *compiler, vkd3d_spirv_build_op_name(builder, struct_id, "cb%u_struct", size); var_id = spirv_compiler_build_descriptor_variable(compiler, storage_class, struct_id, - ®, range, VKD3D_SHADER_RESOURCE_BUFFER, false, false, &var_info); + ®, range, VKD3D_SHADER_RESOURCE_BUFFER, descriptor, false, &var_info); vkd3d_symbol_make_register(®_symbol, ®); vkd3d_symbol_set_register_info(®_symbol, var_id, storage_class, @@ -6275,7 +6299,7 @@ static void spirv_compiler_emit_dcl_immediate_constant_buffer(struct spirv_compi } static void spirv_compiler_emit_sampler_declaration(struct spirv_compiler *compiler, - const struct vkd3d_shader_register_range *range, unsigned int register_id) + const struct vkd3d_shader_register_range *range, const struct vkd3d_shader_descriptor_info1 *descriptor) { const SpvStorageClass storage_class = SpvStorageClassUniformConstant; struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; @@ -6285,7 +6309,7 @@ static void spirv_compiler_emit_sampler_declaration(struct spirv_compiler *compi uint32_t type_id, var_id; vsir_register_init(®, VKD3DSPR_SAMPLER, VKD3D_DATA_FLOAT, 1); - reg.idx[0].offset = register_id; + reg.idx[0].offset = descriptor->register_id; vkd3d_symbol_make_sampler(®_symbol, ®); reg_symbol.info.sampler.range = *range; @@ -6295,8 +6319,8 @@ static void spirv_compiler_emit_sampler_declaration(struct spirv_compiler *compi return; type_id = vkd3d_spirv_get_op_type_sampler(builder); - var_id = spirv_compiler_build_descriptor_variable(compiler, storage_class, type_id, ®, - range, VKD3D_SHADER_RESOURCE_NONE, false, false, &var_info); + var_id = spirv_compiler_build_descriptor_variable(compiler, storage_class, type_id, + ®, range, VKD3D_SHADER_RESOURCE_NONE, descriptor, false, &var_info); vkd3d_symbol_make_register(®_symbol, ®); vkd3d_symbol_set_register_info(®_symbol, var_id, storage_class, @@ -6461,21 +6485,24 @@ static void spirv_compiler_emit_combined_sampler_declarations(struct spirv_compi } static void spirv_compiler_emit_resource_declaration(struct spirv_compiler *compiler, - const struct vkd3d_shader_register_range *range, unsigned int register_id, - unsigned int sample_count, bool is_uav, enum vkd3d_shader_resource_type resource_type, - enum vkd3d_shader_resource_data_type resource_data_type, unsigned int structure_stride, bool raw) + const struct vkd3d_shader_register_range *range, const struct vkd3d_shader_descriptor_info1 *descriptor) { + bool raw = descriptor->flags & VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_RAW_BUFFER; + enum vkd3d_shader_resource_type resource_type = descriptor->resource_type; struct vkd3d_descriptor_variable_info var_info, counter_var_info = {0}; + bool is_uav = descriptor->type == VKD3D_SHADER_DESCRIPTOR_TYPE_UAV; + unsigned int structure_stride = descriptor->structure_stride / 4; struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; SpvStorageClass storage_class = SpvStorageClassUniformConstant; uint32_t counter_type_id, type_id, var_id, counter_var_id = 0; const struct vkd3d_spirv_resource_type *resource_type_info; + unsigned int sample_count = descriptor->sample_count; enum vkd3d_shader_component_type sampled_type; struct vkd3d_symbol resource_symbol; struct vkd3d_shader_register reg; vsir_register_init(®, is_uav ? VKD3DSPR_UAV : VKD3DSPR_RESOURCE, VKD3D_DATA_FLOAT, 1); - reg.idx[0].offset = register_id; + reg.idx[0].offset = descriptor->register_id; if (resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_2DMS && sample_count == 1) resource_type = VKD3D_SHADER_RESOURCE_TEXTURE_2D; @@ -6489,7 +6516,7 @@ static void spirv_compiler_emit_resource_declaration(struct spirv_compiler *comp return; } - sampled_type = vkd3d_component_type_from_resource_data_type(resource_data_type); + sampled_type = vkd3d_component_type_from_resource_data_type(descriptor->resource_data_type); if (!is_uav && spirv_compiler_has_combined_sampler_for_resource(compiler, range)) { @@ -6520,16 +6547,12 @@ static void spirv_compiler_emit_resource_declaration(struct spirv_compiler *comp resource_type_info, sampled_type, structure_stride || raw, 0); } - var_id = spirv_compiler_build_descriptor_variable(compiler, storage_class, type_id, ®, - range, resource_type, is_uav, false, &var_info); + var_id = spirv_compiler_build_descriptor_variable(compiler, storage_class, + type_id, ®, range, resource_type, descriptor, false, &var_info); if (is_uav) { - const struct vkd3d_shader_descriptor_info1 *d; - - d = spirv_compiler_get_descriptor_info(compiler, VKD3D_SHADER_DESCRIPTOR_TYPE_UAV, range); - - if (d->uav_flags & VKD3DSUF_RASTERISER_ORDERED_VIEW) + if (descriptor->uav_flags & VKD3DSUF_RASTERISER_ORDERED_VIEW) { if (compiler->shader_type != VKD3D_SHADER_TYPE_PIXEL) spirv_compiler_error(compiler, VKD3D_SHADER_ERROR_SPV_UNSUPPORTED_FEATURE, @@ -6543,7 +6566,7 @@ static void spirv_compiler_emit_resource_declaration(struct spirv_compiler *comp compiler->use_invocation_interlock = true; } - if (d->flags & VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_COUNTER) + if (descriptor->flags & VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_COUNTER) { VKD3D_ASSERT(structure_stride); /* counters are valid only for structured buffers */ @@ -6571,7 +6594,7 @@ static void spirv_compiler_emit_resource_declaration(struct spirv_compiler *comp } counter_var_id = spirv_compiler_build_descriptor_variable(compiler, storage_class, - type_id, ®, range, resource_type, false, true, &counter_var_info); + type_id, ®, range, resource_type, descriptor, true, &counter_var_info); } } @@ -10564,23 +10587,16 @@ static void spirv_compiler_emit_descriptor_declarations(struct spirv_compiler *c switch (descriptor->type) { case VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER: - spirv_compiler_emit_sampler_declaration(compiler, &range, descriptor->register_id); + spirv_compiler_emit_sampler_declaration(compiler, &range, descriptor); break; case VKD3D_SHADER_DESCRIPTOR_TYPE_CBV: - spirv_compiler_emit_cbv_declaration(compiler, &range, descriptor->register_id, descriptor->buffer_size); + spirv_compiler_emit_cbv_declaration(compiler, &range, descriptor); break; case VKD3D_SHADER_DESCRIPTOR_TYPE_SRV: - spirv_compiler_emit_resource_declaration(compiler, &range, descriptor->register_id, - descriptor->sample_count, false, descriptor->resource_type, descriptor->resource_data_type, - descriptor->structure_stride / 4, descriptor->flags & VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_RAW_BUFFER); - break; - case VKD3D_SHADER_DESCRIPTOR_TYPE_UAV: - spirv_compiler_emit_resource_declaration(compiler, &range, descriptor->register_id, - descriptor->sample_count, true, descriptor->resource_type, descriptor->resource_data_type, - descriptor->structure_stride / 4, descriptor->flags & VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_RAW_BUFFER); + spirv_compiler_emit_resource_declaration(compiler, &range, descriptor); break; default: @@ -10600,7 +10616,7 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, struct enum vkd3d_result result = VKD3D_OK; unsigned int i, max_element_count; - if ((result = vsir_program_normalise(program, compiler->config_flags, + if ((result = vsir_program_transform(program, compiler->config_flags, compile_info, compiler->message_context)) < 0) return result; diff --git a/libs/vkd3d/libs/vkd3d-shader/tpf.c b/libs/vkd3d/libs/vkd3d-shader/tpf.c index 84f641cc316..884a2998d5b 100644 --- a/libs/vkd3d/libs/vkd3d-shader/tpf.c +++ b/libs/vkd3d/libs/vkd3d-shader/tpf.c @@ -23,6 +23,7 @@ #include "hlsl.h" #include "vkd3d_shader_private.h" +#include "d3dcommon.h" #define SM4_MAX_SRC_COUNT 6 #define SM4_MAX_DST_COUNT 2 @@ -616,6 +617,47 @@ enum vkd3d_sm4_shader_data_type VKD3D_SM4_SHADER_DATA_MESSAGE = 0x4, }; +enum vkd3d_sm4_stat_field +{ + VKD3D_STAT_UNUSED = 0, + VKD3D_STAT_INSTR_COUNT, + VKD3D_STAT_MOV, + VKD3D_STAT_MOVC, + VKD3D_STAT_CONV, + VKD3D_STAT_FLOAT, + VKD3D_STAT_INT, + VKD3D_STAT_UINT, + VKD3D_STAT_EMIT, + VKD3D_STAT_CUT, + VKD3D_STAT_SAMPLE, + VKD3D_STAT_SAMPLE_C, + VKD3D_STAT_SAMPLE_GRAD, + VKD3D_STAT_SAMPLE_BIAS, + VKD3D_STAT_LOAD, + VKD3D_STAT_STORE, + VKD3D_STAT_DCL_VERTICES_OUT, + VKD3D_STAT_DCL_INPUT_PRIMITIVE, + VKD3D_STAT_DCL_OUTPUT_TOPOLOGY, + VKD3D_STAT_DCL_GS_INSTANCES, + VKD3D_STAT_BITWISE, + VKD3D_STAT_ATOMIC, + VKD3D_STAT_TESS_DOMAIN, + VKD3D_STAT_TESS_PARTITIONING, + VKD3D_STAT_TESS_OUTPUT_PRIMITIVE, + VKD3D_STAT_TESS_CONTROL_POINT_COUNT, + VKD3D_STAT_BARRIER, + VKD3D_STAT_LOD, + VKD3D_STAT_GATHER, + VKD3D_STAT_TEMPS, + VKD3D_STAT_COUNT, +}; + +struct vkd3d_sm4_stat_field_info +{ + enum vkd3d_sm4_opcode opcode; + enum vkd3d_sm4_stat_field field; +}; + struct sm4_index_range { unsigned int index; @@ -634,6 +676,7 @@ struct vkd3d_sm4_lookup_tables const struct vkd3d_sm4_opcode_info *opcode_info_from_sm4[VKD3D_SM4_OP_COUNT]; const struct vkd3d_sm4_register_type_info *register_type_info_from_sm4[VKD3D_SM4_REGISTER_TYPE_COUNT]; const struct vkd3d_sm4_register_type_info *register_type_info_from_vkd3d[VKD3DSPR_COUNT]; + const struct vkd3d_sm4_stat_field_info *stat_field_from_sm4[VKD3D_SM4_OP_COUNT]; }; struct vkd3d_shader_sm4_parser @@ -1115,7 +1158,18 @@ static void shader_sm4_read_dcl_input_ps(struct vkd3d_shader_instruction *ins, u struct signature_element *e = vsir_signature_find_element_for_reg( &priv->p.program->input_signature, dst->reg.idx[dst->reg.idx_count - 1].offset, dst->write_mask); - e->interpolation_mode = ins->flags; + if (!e) + { + WARN("No matching signature element for input register %u with mask %#x.\n", + dst->reg.idx[dst->reg.idx_count - 1].offset, dst->write_mask); + vkd3d_shader_parser_error(&priv->p, VKD3D_SHADER_ERROR_TPF_INVALID_REGISTER_DCL, + "No matching signature element for input register %u with mask %#x.\n", + dst->reg.idx[dst->reg.idx_count - 1].offset, dst->write_mask); + } + else + { + e->interpolation_mode = ins->flags; + } } } @@ -1130,7 +1184,18 @@ static void shader_sm4_read_dcl_input_ps_siv(struct vkd3d_shader_instruction *in struct signature_element *e = vsir_signature_find_element_for_reg( &priv->p.program->input_signature, dst->reg.idx[dst->reg.idx_count - 1].offset, dst->write_mask); - e->interpolation_mode = ins->flags; + if (!e) + { + WARN("No matching signature element for input register %u with mask %#x.\n", + dst->reg.idx[dst->reg.idx_count - 1].offset, dst->write_mask); + vkd3d_shader_parser_error(&priv->p, VKD3D_SHADER_ERROR_TPF_INVALID_REGISTER_DCL, + "No matching signature element for input register %u with mask %#x.\n", + dst->reg.idx[dst->reg.idx_count - 1].offset, dst->write_mask); + } + else + { + e->interpolation_mode = ins->flags; + } } ins->declaration.register_semantic.sysval_semantic = *tokens; } @@ -1330,11 +1395,17 @@ static const enum vkd3d_shader_register_precision register_precision_table[] = /* VKD3D_SM4_REGISTER_PRECISION_MIN_UINT_16 */ VKD3D_SHADER_REGISTER_PRECISION_MIN_UINT_16, }; +struct sm4_stat +{ + uint32_t fields[VKD3D_STAT_COUNT]; +}; + struct tpf_writer { struct hlsl_ctx *ctx; struct vkd3d_bytecode_buffer *buffer; struct vkd3d_sm4_lookup_tables lookup; + struct sm4_stat *stat; }; static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup) @@ -1662,6 +1733,161 @@ static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup) {VKD3D_SM5_RT_OUTPUT_STENCIL_REF, VKD3DSPR_OUTSTENCILREF, VKD3D_SM4_SWIZZLE_VEC4}, }; + static const struct vkd3d_sm4_stat_field_info stat_field_table[] = + { + {VKD3D_SM4_OP_MOV, VKD3D_STAT_MOV}, + {VKD3D_SM4_OP_MOVC, VKD3D_STAT_MOVC}, + {VKD3D_SM5_OP_DMOV, VKD3D_STAT_MOV}, + {VKD3D_SM5_OP_DMOVC, VKD3D_STAT_MOVC}, + + {VKD3D_SM4_OP_ITOF, VKD3D_STAT_CONV}, + {VKD3D_SM4_OP_FTOI, VKD3D_STAT_CONV}, + {VKD3D_SM4_OP_FTOU, VKD3D_STAT_CONV}, + {VKD3D_SM4_OP_UTOF, VKD3D_STAT_CONV}, + {VKD3D_SM5_OP_DTOU, VKD3D_STAT_CONV}, + {VKD3D_SM5_OP_UTOD, VKD3D_STAT_CONV}, + {VKD3D_SM5_OP_DTOF, VKD3D_STAT_CONV}, + {VKD3D_SM5_OP_FTOD, VKD3D_STAT_CONV}, + {VKD3D_SM5_OP_DTOI, VKD3D_STAT_CONV}, + {VKD3D_SM5_OP_ITOD, VKD3D_STAT_CONV}, + {VKD3D_SM5_OP_F32TOF16, VKD3D_STAT_CONV}, + {VKD3D_SM5_OP_F16TOF32, VKD3D_STAT_CONV}, + + {VKD3D_SM4_OP_ADD, VKD3D_STAT_FLOAT}, + {VKD3D_SM4_OP_DIV, VKD3D_STAT_FLOAT}, + {VKD3D_SM4_OP_DP2, VKD3D_STAT_FLOAT}, + {VKD3D_SM4_OP_DP3, VKD3D_STAT_FLOAT}, + {VKD3D_SM4_OP_DP4, VKD3D_STAT_FLOAT}, + {VKD3D_SM4_OP_EQ, VKD3D_STAT_FLOAT}, + {VKD3D_SM4_OP_EXP, VKD3D_STAT_FLOAT}, + {VKD3D_SM4_OP_FRC, VKD3D_STAT_FLOAT}, + {VKD3D_SM4_OP_GE, VKD3D_STAT_FLOAT}, + {VKD3D_SM4_OP_LT, VKD3D_STAT_FLOAT}, + {VKD3D_SM4_OP_MAD, VKD3D_STAT_FLOAT}, + {VKD3D_SM4_OP_MIN, VKD3D_STAT_FLOAT}, + {VKD3D_SM4_OP_MAX, VKD3D_STAT_FLOAT}, + {VKD3D_SM4_OP_MUL, VKD3D_STAT_FLOAT}, + {VKD3D_SM4_OP_NE, VKD3D_STAT_FLOAT}, + {VKD3D_SM4_OP_ROUND_NE, VKD3D_STAT_FLOAT}, + {VKD3D_SM4_OP_ROUND_NI, VKD3D_STAT_FLOAT}, + {VKD3D_SM4_OP_ROUND_PI, VKD3D_STAT_FLOAT}, + {VKD3D_SM4_OP_ROUND_Z, VKD3D_STAT_FLOAT}, + {VKD3D_SM4_OP_RSQ, VKD3D_STAT_FLOAT}, + {VKD3D_SM4_OP_SQRT, VKD3D_STAT_FLOAT}, + {VKD3D_SM4_OP_SINCOS, VKD3D_STAT_FLOAT}, + {VKD3D_SM5_OP_RCP, VKD3D_STAT_FLOAT}, + {VKD3D_SM5_OP_DADD, VKD3D_STAT_FLOAT}, + {VKD3D_SM5_OP_DMAX, VKD3D_STAT_FLOAT}, + {VKD3D_SM5_OP_DMIN, VKD3D_STAT_FLOAT}, + {VKD3D_SM5_OP_DMUL, VKD3D_STAT_FLOAT}, + {VKD3D_SM5_OP_DEQ, VKD3D_STAT_FLOAT}, + {VKD3D_SM5_OP_DGE, VKD3D_STAT_FLOAT}, + {VKD3D_SM5_OP_DLT, VKD3D_STAT_FLOAT}, + {VKD3D_SM5_OP_DNE, VKD3D_STAT_FLOAT}, + {VKD3D_SM5_OP_DDIV, VKD3D_STAT_FLOAT}, + {VKD3D_SM5_OP_DFMA, VKD3D_STAT_FLOAT}, + {VKD3D_SM5_OP_DRCP, VKD3D_STAT_FLOAT}, + + {VKD3D_SM4_OP_IADD, VKD3D_STAT_INT}, + {VKD3D_SM4_OP_IEQ, VKD3D_STAT_INT}, + {VKD3D_SM4_OP_IGE, VKD3D_STAT_INT}, + {VKD3D_SM4_OP_ILT, VKD3D_STAT_INT}, + {VKD3D_SM4_OP_IMAD, VKD3D_STAT_INT}, + {VKD3D_SM4_OP_IMAX, VKD3D_STAT_INT}, + {VKD3D_SM4_OP_IMIN, VKD3D_STAT_INT}, + {VKD3D_SM4_OP_IMUL, VKD3D_STAT_INT}, + {VKD3D_SM4_OP_INE, VKD3D_STAT_INT}, + {VKD3D_SM4_OP_INEG, VKD3D_STAT_INT}, + {VKD3D_SM4_OP_ISHL, VKD3D_STAT_INT}, + {VKD3D_SM4_OP_ISHR, VKD3D_STAT_INT}, + {VKD3D_SM4_OP_ITOF, VKD3D_STAT_INT}, + + {VKD3D_SM4_OP_UDIV, VKD3D_STAT_UINT}, + {VKD3D_SM4_OP_ULT, VKD3D_STAT_UINT}, + {VKD3D_SM4_OP_UGE, VKD3D_STAT_UINT}, + {VKD3D_SM4_OP_UMUL, VKD3D_STAT_UINT}, + {VKD3D_SM4_OP_UMAX, VKD3D_STAT_UINT}, + {VKD3D_SM4_OP_UMIN, VKD3D_STAT_UINT}, + {VKD3D_SM4_OP_USHR, VKD3D_STAT_UINT}, + + {VKD3D_SM4_OP_EMIT, VKD3D_STAT_EMIT}, + {VKD3D_SM4_OP_CUT, VKD3D_STAT_CUT}, + {VKD3D_SM5_OP_EMIT_STREAM, VKD3D_STAT_EMIT}, + {VKD3D_SM5_OP_CUT_STREAM, VKD3D_STAT_CUT}, + + {VKD3D_SM4_OP_SAMPLE, VKD3D_STAT_SAMPLE}, + {VKD3D_SM4_OP_SAMPLE_LOD, VKD3D_STAT_SAMPLE}, + {VKD3D_SM5_OP_SAMPLE_LOD_S, VKD3D_STAT_SAMPLE}, + {VKD3D_SM5_OP_SAMPLE_CL_S, VKD3D_STAT_SAMPLE}, + {VKD3D_SM4_OP_SAMPLE_C, VKD3D_STAT_SAMPLE_C}, + {VKD3D_SM4_OP_SAMPLE_C_LZ, VKD3D_STAT_SAMPLE_C}, + {VKD3D_SM5_OP_SAMPLE_C_LZ_S, VKD3D_STAT_SAMPLE_C}, + {VKD3D_SM5_OP_SAMPLE_C_CL_S, VKD3D_STAT_SAMPLE_C}, + {VKD3D_SM4_OP_SAMPLE_GRAD, VKD3D_STAT_SAMPLE_GRAD}, + {VKD3D_SM5_OP_SAMPLE_GRAD_CL_S, VKD3D_STAT_SAMPLE_GRAD}, + {VKD3D_SM4_OP_SAMPLE_B, VKD3D_STAT_SAMPLE_BIAS}, + {VKD3D_SM4_OP_GATHER4, VKD3D_STAT_GATHER}, + {VKD3D_SM5_OP_GATHER4_PO, VKD3D_STAT_GATHER}, + {VKD3D_SM4_OP_LOD, VKD3D_STAT_LOD}, + + {VKD3D_SM4_OP_LD, VKD3D_STAT_LOAD}, + {VKD3D_SM4_OP_LD2DMS, VKD3D_STAT_LOAD}, + {VKD3D_SM5_OP_LD_UAV_TYPED, VKD3D_STAT_LOAD}, + {VKD3D_SM5_OP_LD_RAW, VKD3D_STAT_LOAD}, + {VKD3D_SM5_OP_LD_STRUCTURED, VKD3D_STAT_LOAD}, + {VKD3D_SM5_OP_LD_S, VKD3D_STAT_LOAD}, + {VKD3D_SM5_OP_LD2DMS_S, VKD3D_STAT_LOAD}, + {VKD3D_SM5_OP_LD_UAV_TYPED_S, VKD3D_STAT_LOAD}, + {VKD3D_SM5_OP_LD_RAW_S, VKD3D_STAT_LOAD}, + {VKD3D_SM5_OP_LD_STRUCTURED_S, VKD3D_STAT_LOAD}, + + {VKD3D_SM5_OP_STORE_UAV_TYPED, VKD3D_STAT_STORE}, + {VKD3D_SM5_OP_STORE_RAW, VKD3D_STAT_STORE}, + {VKD3D_SM5_OP_STORE_STRUCTURED,VKD3D_STAT_STORE}, + + {VKD3D_SM4_OP_DCL_VERTICES_OUT, VKD3D_STAT_DCL_VERTICES_OUT}, + {VKD3D_SM4_OP_DCL_INPUT_PRIMITIVE, VKD3D_STAT_DCL_INPUT_PRIMITIVE}, + {VKD3D_SM4_OP_DCL_OUTPUT_TOPOLOGY, VKD3D_STAT_DCL_OUTPUT_TOPOLOGY}, + {VKD3D_SM5_OP_DCL_GS_INSTANCES, VKD3D_STAT_DCL_GS_INSTANCES}, + + {VKD3D_SM4_OP_AND, VKD3D_STAT_BITWISE}, + {VKD3D_SM4_OP_NOT, VKD3D_STAT_BITWISE}, + {VKD3D_SM4_OP_OR, VKD3D_STAT_BITWISE}, + {VKD3D_SM4_OP_XOR, VKD3D_STAT_BITWISE}, + + {VKD3D_SM5_OP_ATOMIC_AND, VKD3D_STAT_ATOMIC}, + {VKD3D_SM5_OP_ATOMIC_OR, VKD3D_STAT_ATOMIC}, + {VKD3D_SM5_OP_ATOMIC_XOR, VKD3D_STAT_ATOMIC}, + {VKD3D_SM5_OP_ATOMIC_CMP_STORE, VKD3D_STAT_ATOMIC}, + {VKD3D_SM5_OP_ATOMIC_IADD, VKD3D_STAT_ATOMIC}, + {VKD3D_SM5_OP_ATOMIC_IMAX, VKD3D_STAT_ATOMIC}, + {VKD3D_SM5_OP_ATOMIC_IMIN, VKD3D_STAT_ATOMIC}, + {VKD3D_SM5_OP_ATOMIC_UMAX, VKD3D_STAT_ATOMIC}, + {VKD3D_SM5_OP_ATOMIC_UMIN, VKD3D_STAT_ATOMIC}, + {VKD3D_SM5_OP_IMM_ATOMIC_ALLOC, VKD3D_STAT_ATOMIC}, + {VKD3D_SM5_OP_IMM_ATOMIC_CONSUME, VKD3D_STAT_ATOMIC}, + {VKD3D_SM5_OP_IMM_ATOMIC_IADD, VKD3D_STAT_ATOMIC}, + {VKD3D_SM5_OP_IMM_ATOMIC_AND, VKD3D_STAT_ATOMIC}, + {VKD3D_SM5_OP_IMM_ATOMIC_OR, VKD3D_STAT_ATOMIC}, + {VKD3D_SM5_OP_IMM_ATOMIC_XOR, VKD3D_STAT_ATOMIC}, + {VKD3D_SM5_OP_IMM_ATOMIC_EXCH, VKD3D_STAT_ATOMIC}, + {VKD3D_SM5_OP_IMM_ATOMIC_CMP_EXCH, VKD3D_STAT_ATOMIC}, + {VKD3D_SM5_OP_IMM_ATOMIC_IMAX, VKD3D_STAT_ATOMIC}, + {VKD3D_SM5_OP_IMM_ATOMIC_IMIN, VKD3D_STAT_ATOMIC}, + {VKD3D_SM5_OP_IMM_ATOMIC_UMAX, VKD3D_STAT_ATOMIC}, + {VKD3D_SM5_OP_IMM_ATOMIC_UMIN, VKD3D_STAT_ATOMIC}, + + {VKD3D_SM5_OP_DCL_TESSELLATOR_DOMAIN, VKD3D_STAT_TESS_DOMAIN}, + {VKD3D_SM5_OP_DCL_TESSELLATOR_PARTITIONING, VKD3D_STAT_TESS_PARTITIONING}, + {VKD3D_SM5_OP_DCL_TESSELLATOR_OUTPUT_PRIMITIVE, VKD3D_STAT_TESS_OUTPUT_PRIMITIVE}, + {VKD3D_SM5_OP_DCL_INPUT_CONTROL_POINT_COUNT, VKD3D_STAT_TESS_CONTROL_POINT_COUNT}, + {VKD3D_SM5_OP_DCL_OUTPUT_CONTROL_POINT_COUNT, VKD3D_STAT_TESS_CONTROL_POINT_COUNT}, + + {VKD3D_SM5_OP_SYNC, VKD3D_STAT_BARRIER}, + + {VKD3D_SM4_OP_DCL_TEMPS, VKD3D_STAT_TEMPS}, + }; + memset(lookup, 0, sizeof(*lookup)); for (i = 0; i < ARRAY_SIZE(opcode_table); ++i) @@ -1678,12 +1904,21 @@ static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup) lookup->register_type_info_from_sm4[info->sm4_type] = info; lookup->register_type_info_from_vkd3d[info->vkd3d_type] = info; } + + for (i = 0; i < ARRAY_SIZE(stat_field_table); ++i) + { + const struct vkd3d_sm4_stat_field_info *info = &stat_field_table[i]; + + lookup->stat_field_from_sm4[info->opcode] = info; + } } -static void tpf_writer_init(struct tpf_writer *tpf, struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer) +static void tpf_writer_init(struct tpf_writer *tpf, struct hlsl_ctx *ctx, struct sm4_stat *stat, + struct vkd3d_bytecode_buffer *buffer) { tpf->ctx = ctx; tpf->buffer = buffer; + tpf->stat = stat; init_sm4_lookup_tables(&tpf->lookup); } @@ -1721,6 +1956,16 @@ static enum vkd3d_sm4_swizzle_type vkd3d_sm4_get_default_swizzle_type( return register_type_info->default_src_swizzle_type; } +static enum vkd3d_sm4_stat_field get_stat_field_from_sm4_opcode( + const struct vkd3d_sm4_lookup_tables *lookup, enum vkd3d_sm4_opcode sm4_opcode) +{ + const struct vkd3d_sm4_stat_field_info *field_info; + + if (sm4_opcode >= VKD3D_SM4_OP_COUNT || !(field_info = lookup->stat_field_from_sm4[sm4_opcode])) + return VKD3D_STAT_UNUSED; + return field_info->field; +} + static enum vkd3d_data_type map_data_type(char t) { switch (t) @@ -2553,7 +2798,7 @@ static bool shader_sm4_init(struct vkd3d_shader_sm4_parser *sm4, struct vsir_pro version.minor = VKD3D_SM4_VERSION_MINOR(version_token); /* Estimate instruction count to avoid reallocation in most shaders. */ - if (!vsir_program_init(program, compile_info, &version, token_count / 7u + 20)) + if (!vsir_program_init(program, compile_info, &version, token_count / 7u + 20, VSIR_CF_STRUCTURED)) return false; vkd3d_shader_parser_init(&sm4->p, program, message_context, compile_info->source_name); sm4->ptr = sm4->start; @@ -2782,8 +3027,8 @@ bool hlsl_sm4_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_sem return false; } -bool hlsl_sm4_usage_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, - bool output, D3D_NAME *usage) +bool sysval_semantic_from_hlsl(enum vkd3d_shader_sysval_semantic *semantic, + struct hlsl_ctx *ctx, const struct hlsl_semantic *hlsl_semantic, bool output) { unsigned int i; @@ -2792,7 +3037,7 @@ bool hlsl_sm4_usage_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semant const char *name; bool output; enum vkd3d_shader_type shader_type; - D3D_NAME usage; + enum vkd3d_shader_sysval_semantic semantic; } semantics[] = { @@ -2800,46 +3045,47 @@ bool hlsl_sm4_usage_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semant {"sv_groupid", false, VKD3D_SHADER_TYPE_COMPUTE, ~0u}, {"sv_groupthreadid", false, VKD3D_SHADER_TYPE_COMPUTE, ~0u}, - {"position", false, VKD3D_SHADER_TYPE_GEOMETRY, D3D_NAME_POSITION}, - {"sv_position", false, VKD3D_SHADER_TYPE_GEOMETRY, D3D_NAME_POSITION}, - {"sv_primitiveid", false, VKD3D_SHADER_TYPE_GEOMETRY, D3D_NAME_PRIMITIVE_ID}, - - {"position", true, VKD3D_SHADER_TYPE_GEOMETRY, D3D_NAME_POSITION}, - {"sv_position", true, VKD3D_SHADER_TYPE_GEOMETRY, D3D_NAME_POSITION}, - {"sv_primitiveid", true, VKD3D_SHADER_TYPE_GEOMETRY, D3D_NAME_PRIMITIVE_ID}, - - {"position", false, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_POSITION}, - {"sv_position", false, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_POSITION}, - {"sv_primitiveid", false, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_PRIMITIVE_ID}, - {"sv_isfrontface", false, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_IS_FRONT_FACE}, - {"sv_rendertargetarrayindex", false, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_RENDER_TARGET_ARRAY_INDEX}, - {"sv_viewportarrayindex", false, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_VIEWPORT_ARRAY_INDEX}, - - {"color", true, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_TARGET}, - {"depth", true, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_DEPTH}, - {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_TARGET}, - {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_DEPTH}, - {"sv_coverage", true, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_COVERAGE}, - - {"sv_position", false, VKD3D_SHADER_TYPE_VERTEX, D3D_NAME_UNDEFINED}, - {"sv_vertexid", false, VKD3D_SHADER_TYPE_VERTEX, D3D_NAME_VERTEX_ID}, - {"sv_instanceid", false, VKD3D_SHADER_TYPE_VERTEX, D3D_NAME_INSTANCE_ID}, - - {"position", true, VKD3D_SHADER_TYPE_VERTEX, D3D_NAME_POSITION}, - {"sv_position", true, VKD3D_SHADER_TYPE_VERTEX, D3D_NAME_POSITION}, - {"sv_rendertargetarrayindex", true, VKD3D_SHADER_TYPE_VERTEX, D3D_NAME_RENDER_TARGET_ARRAY_INDEX}, - {"sv_viewportarrayindex", true, VKD3D_SHADER_TYPE_VERTEX, D3D_NAME_VIEWPORT_ARRAY_INDEX}, + {"position", false, VKD3D_SHADER_TYPE_GEOMETRY, VKD3D_SHADER_SV_POSITION}, + {"sv_position", false, VKD3D_SHADER_TYPE_GEOMETRY, VKD3D_SHADER_SV_POSITION}, + {"sv_primitiveid", false, VKD3D_SHADER_TYPE_GEOMETRY, VKD3D_SHADER_SV_PRIMITIVE_ID}, + + {"position", true, VKD3D_SHADER_TYPE_GEOMETRY, VKD3D_SHADER_SV_POSITION}, + {"sv_position", true, VKD3D_SHADER_TYPE_GEOMETRY, VKD3D_SHADER_SV_POSITION}, + {"sv_primitiveid", true, VKD3D_SHADER_TYPE_GEOMETRY, VKD3D_SHADER_SV_PRIMITIVE_ID}, + + {"position", false, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SHADER_SV_POSITION}, + {"sv_position", false, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SHADER_SV_POSITION}, + {"sv_primitiveid", false, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SHADER_SV_PRIMITIVE_ID}, + {"sv_isfrontface", false, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SHADER_SV_IS_FRONT_FACE}, + {"sv_rendertargetarrayindex", false, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SHADER_SV_RENDER_TARGET_ARRAY_INDEX}, + {"sv_viewportarrayindex", false, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SHADER_SV_VIEWPORT_ARRAY_INDEX}, + {"sv_sampleindex", false, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SHADER_SV_SAMPLE_INDEX}, + + {"color", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SHADER_SV_TARGET}, + {"depth", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SHADER_SV_DEPTH}, + {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SHADER_SV_TARGET}, + {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SHADER_SV_DEPTH}, + {"sv_coverage", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SHADER_SV_COVERAGE}, + + {"sv_position", false, VKD3D_SHADER_TYPE_VERTEX, VKD3D_SHADER_SV_NONE}, + {"sv_vertexid", false, VKD3D_SHADER_TYPE_VERTEX, VKD3D_SHADER_SV_VERTEX_ID}, + {"sv_instanceid", false, VKD3D_SHADER_TYPE_VERTEX, VKD3D_SHADER_SV_INSTANCE_ID}, + + {"position", true, VKD3D_SHADER_TYPE_VERTEX, VKD3D_SHADER_SV_POSITION}, + {"sv_position", true, VKD3D_SHADER_TYPE_VERTEX, VKD3D_SHADER_SV_POSITION}, + {"sv_rendertargetarrayindex", true, VKD3D_SHADER_TYPE_VERTEX, VKD3D_SHADER_SV_RENDER_TARGET_ARRAY_INDEX}, + {"sv_viewportarrayindex", true, VKD3D_SHADER_TYPE_VERTEX, VKD3D_SHADER_SV_VIEWPORT_ARRAY_INDEX}, }; - bool needs_compat_mapping = ascii_strncasecmp(semantic->name, "sv_", 3); + bool needs_compat_mapping = ascii_strncasecmp(hlsl_semantic->name, "sv_", 3); for (i = 0; i < ARRAY_SIZE(semantics); ++i) { - if (!ascii_strcasecmp(semantic->name, semantics[i].name) + if (!ascii_strcasecmp(hlsl_semantic->name, semantics[i].name) && output == semantics[i].output && (ctx->semantic_compat_mapping == needs_compat_mapping || !needs_compat_mapping) && ctx->profile->type == semantics[i].shader_type) { - *usage = semantics[i].usage; + *semantic = semantics[i].semantic; return true; } } @@ -2847,7 +3093,7 @@ bool hlsl_sm4_usage_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semant if (!needs_compat_mapping) return false; - *usage = D3D_NAME_UNDEFINED; + *semantic = VKD3D_SHADER_SV_NONE; return true; } @@ -2880,16 +3126,16 @@ static void write_sm4_signature(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc, LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) { unsigned int width = (1u << var->data_type->dimx) - 1, use_mask; + enum vkd3d_shader_sysval_semantic semantic; uint32_t usage_idx, reg_idx; - D3D_NAME usage; bool has_idx; if ((output && !var->is_output_semantic) || (!output && !var->is_input_semantic)) continue; - ret = hlsl_sm4_usage_from_semantic(ctx, &var->semantic, output, &usage); + ret = sysval_semantic_from_hlsl(&semantic, ctx, &var->semantic, output); VKD3D_ASSERT(ret); - if (usage == ~0u) + if (semantic == ~0u) continue; usage_idx = var->semantic.index; @@ -2908,26 +3154,26 @@ static void write_sm4_signature(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc, use_mask = 0xf ^ use_mask; /* Special pixel shader semantics (TARGET, DEPTH, COVERAGE). */ - if (usage >= 64) - usage = 0; + if (semantic >= VKD3D_SHADER_SV_TARGET) + semantic = VKD3D_SHADER_SV_NONE; put_u32(&buffer, 0); /* name */ put_u32(&buffer, usage_idx); - put_u32(&buffer, usage); + put_u32(&buffer, semantic); switch (var->data_type->e.numeric.type) { case HLSL_TYPE_FLOAT: case HLSL_TYPE_HALF: - put_u32(&buffer, D3D_REGISTER_COMPONENT_FLOAT32); + put_u32(&buffer, VKD3D_SHADER_COMPONENT_FLOAT); break; case HLSL_TYPE_INT: - put_u32(&buffer, D3D_REGISTER_COMPONENT_SINT32); + put_u32(&buffer, VKD3D_SHADER_COMPONENT_INT); break; case HLSL_TYPE_BOOL: case HLSL_TYPE_UINT: - put_u32(&buffer, D3D_REGISTER_COMPONENT_UINT32); + put_u32(&buffer, VKD3D_SHADER_COMPONENT_UINT); break; default: @@ -2935,7 +3181,7 @@ static void write_sm4_signature(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc, hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Invalid data type %s for semantic variable %s.", string->buffer, var->name); hlsl_release_string_buffer(ctx, string); - put_u32(&buffer, D3D_REGISTER_COMPONENT_UNKNOWN); + put_u32(&buffer, VKD3D_SHADER_COMPONENT_VOID); } put_u32(&buffer, reg_idx); put_u32(&buffer, vkd3d_make_u16(width, use_mask)); @@ -2944,25 +3190,25 @@ static void write_sm4_signature(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc, i = 0; LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) { - const char *semantic = var->semantic.name; + enum vkd3d_shader_sysval_semantic semantic; + const char *name = var->semantic.name; size_t string_offset; - D3D_NAME usage; if ((output && !var->is_output_semantic) || (!output && !var->is_input_semantic)) continue; - hlsl_sm4_usage_from_semantic(ctx, &var->semantic, output, &usage); - if (usage == ~0u) + sysval_semantic_from_hlsl(&semantic, ctx, &var->semantic, output); + if (semantic == ~0u) continue; - if (usage == D3D_NAME_TARGET && !ascii_strcasecmp(semantic, "color")) + if (semantic == VKD3D_SHADER_SV_TARGET && !ascii_strcasecmp(name, "color")) string_offset = put_string(&buffer, "SV_Target"); - else if (usage == D3D_NAME_DEPTH && !ascii_strcasecmp(semantic, "depth")) + else if (semantic == VKD3D_SHADER_SV_DEPTH && !ascii_strcasecmp(name, "depth")) string_offset = put_string(&buffer, "SV_Depth"); - else if (usage == D3D_NAME_POSITION && !ascii_strcasecmp(semantic, "position")) + else if (semantic == VKD3D_SHADER_SV_POSITION && !ascii_strcasecmp(name, "position")) string_offset = put_string(&buffer, "SV_Position"); else - string_offset = put_string(&buffer, semantic); + string_offset = put_string(&buffer, name); set_u32(&buffer, (2 + i++ * 6) * sizeof(uint32_t), string_offset); } @@ -3123,24 +3369,24 @@ static D3D_SHADER_INPUT_TYPE sm4_resource_type(const struct hlsl_type *type) vkd3d_unreachable(); } -static D3D_RESOURCE_RETURN_TYPE sm4_resource_format(const struct hlsl_type *type) +static enum vkd3d_sm4_data_type sm4_data_type(const struct hlsl_type *type) { switch (type->e.resource.format->e.numeric.type) { case HLSL_TYPE_DOUBLE: - return D3D_RETURN_TYPE_DOUBLE; + return VKD3D_SM4_DATA_DOUBLE; case HLSL_TYPE_FLOAT: case HLSL_TYPE_HALF: - return D3D_RETURN_TYPE_FLOAT; + return VKD3D_SM4_DATA_FLOAT; case HLSL_TYPE_INT: - return D3D_RETURN_TYPE_SINT; + return VKD3D_SM4_DATA_INT; break; case HLSL_TYPE_BOOL: case HLSL_TYPE_UINT: - return D3D_RETURN_TYPE_UINT; + return VKD3D_SM4_DATA_UINT; default: vkd3d_unreachable(); @@ -3471,7 +3717,7 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) { unsigned int dimx = resource->component_type->e.resource.format->dimx; - put_u32(&buffer, sm4_resource_format(resource->component_type)); + put_u32(&buffer, sm4_data_type(resource->component_type)); put_u32(&buffer, sm4_rdef_resource_dimension(resource->component_type)); put_u32(&buffer, ~0u); /* FIXME: multisample count */ flags |= (dimx - 1) << VKD3D_SM4_SIF_TEXTURE_COMPONENTS_SHIFT; @@ -4182,10 +4428,55 @@ static void sm4_write_src_register(const struct tpf_writer *tpf, const struct vk } } +static void sm4_update_stat_counters(const struct tpf_writer *tpf, const struct sm4_instruction *instr) +{ + enum vkd3d_shader_type shader_type = tpf->ctx->profile->type; + enum vkd3d_sm4_stat_field stat_field; + uint32_t opcode; + + ++tpf->stat->fields[VKD3D_STAT_INSTR_COUNT]; + + opcode = instr->opcode & VKD3D_SM4_OPCODE_MASK; + stat_field = get_stat_field_from_sm4_opcode(&tpf->lookup, opcode); + + switch (opcode) + { + case VKD3D_SM4_OP_DCL_TEMPS: + tpf->stat->fields[stat_field] = max(tpf->stat->fields[stat_field], instr->idx[0]); + break; + case VKD3D_SM4_OP_DCL_OUTPUT_TOPOLOGY: + case VKD3D_SM4_OP_DCL_INPUT_PRIMITIVE: + tpf->stat->fields[stat_field] = (instr->opcode & VKD3D_SM4_PRIMITIVE_TYPE_MASK) + >> VKD3D_SM4_PRIMITIVE_TYPE_SHIFT; + break; + case VKD3D_SM4_OP_DCL_VERTICES_OUT: + case VKD3D_SM5_OP_DCL_GS_INSTANCES: + tpf->stat->fields[stat_field] = instr->idx[0]; + break; + case VKD3D_SM5_OP_DCL_TESSELLATOR_DOMAIN: + case VKD3D_SM5_OP_DCL_TESSELLATOR_PARTITIONING: + case VKD3D_SM5_OP_DCL_TESSELLATOR_OUTPUT_PRIMITIVE: + tpf->stat->fields[stat_field] = (instr->opcode & VKD3D_SM5_TESSELLATOR_MASK) >> VKD3D_SM5_TESSELLATOR_SHIFT; + break; + case VKD3D_SM5_OP_DCL_INPUT_CONTROL_POINT_COUNT: + case VKD3D_SM5_OP_DCL_OUTPUT_CONTROL_POINT_COUNT: + if ((shader_type == VKD3D_SHADER_TYPE_HULL && opcode == VKD3D_SM5_OP_DCL_OUTPUT_CONTROL_POINT_COUNT) + || (shader_type == VKD3D_SHADER_TYPE_DOMAIN + && opcode == VKD3D_SM5_OP_DCL_INPUT_CONTROL_POINT_COUNT)) + { + tpf->stat->fields[stat_field] = (instr->opcode & VKD3D_SM5_CONTROL_POINT_COUNT_MASK) + >> VKD3D_SM5_CONTROL_POINT_COUNT_SHIFT; + } + break; + default: + ++tpf->stat->fields[stat_field]; + } +} + static void write_sm4_instruction(const struct tpf_writer *tpf, const struct sm4_instruction *instr) { - struct vkd3d_bytecode_buffer *buffer = tpf->buffer; uint32_t token = instr->opcode | instr->extra_bits; + struct vkd3d_bytecode_buffer *buffer = tpf->buffer; unsigned int size, i, j; size_t token_position; @@ -4218,6 +4509,8 @@ static void write_sm4_instruction(const struct tpf_writer *tpf, const struct sm4 size = (bytecode_get_size(buffer) - token_position) / sizeof(uint32_t); token |= (size << VKD3D_SM4_INSTRUCTION_LENGTH_SHIFT); set_u32(buffer, token_position, token); + + sm4_update_stat_counters(tpf, instr); } static bool encode_texel_offset_as_aoffimmi(struct sm4_instruction *instr, @@ -4348,7 +4641,7 @@ static void write_sm4_dcl_textures(const struct tpf_writer *tpf, const struct ex .dsts[0].reg.idx_count = 1, .dst_count = 1, - .idx[0] = sm4_resource_format(component_type) * 0x1111, + .idx[0] = sm4_data_type(component_type) * 0x1111, .idx_count = 1, }; @@ -4412,7 +4705,7 @@ static void write_sm4_dcl_semantic(const struct tpf_writer *tpf, const struct hl { const struct hlsl_profile_info *profile = tpf->ctx->profile; const bool output = var->is_output_semantic; - D3D_NAME usage; + enum vkd3d_shader_sysval_semantic semantic; bool has_idx; struct sm4_instruction instr = @@ -4445,22 +4738,23 @@ static void write_sm4_dcl_semantic(const struct tpf_writer *tpf, const struct hl if (instr.dsts[0].reg.type == VKD3DSPR_DEPTHOUT) instr.dsts[0].reg.dimension = VSIR_DIMENSION_SCALAR; - hlsl_sm4_usage_from_semantic(tpf->ctx, &var->semantic, output, &usage); - if (usage == ~0u) - usage = D3D_NAME_UNDEFINED; + sysval_semantic_from_hlsl(&semantic, tpf->ctx, &var->semantic, output); + if (semantic == ~0u) + semantic = VKD3D_SHADER_SV_NONE; if (var->is_input_semantic) { - switch (usage) + switch (semantic) { - case D3D_NAME_UNDEFINED: + case VKD3D_SHADER_SV_NONE: instr.opcode = (profile->type == VKD3D_SHADER_TYPE_PIXEL) ? VKD3D_SM4_OP_DCL_INPUT_PS : VKD3D_SM4_OP_DCL_INPUT; break; - case D3D_NAME_INSTANCE_ID: - case D3D_NAME_PRIMITIVE_ID: - case D3D_NAME_VERTEX_ID: + case VKD3D_SHADER_SV_INSTANCE_ID: + case VKD3D_SHADER_SV_PRIMITIVE_ID: + case VKD3D_SHADER_SV_VERTEX_ID: + case VKD3D_SHADER_SV_SAMPLE_INDEX: instr.opcode = (profile->type == VKD3D_SHADER_TYPE_PIXEL) ? VKD3D_SM4_OP_DCL_INPUT_PS_SGV : VKD3D_SM4_OP_DCL_INPUT_SGV; break; @@ -4510,25 +4804,25 @@ static void write_sm4_dcl_semantic(const struct tpf_writer *tpf, const struct hl } else { - if (usage == D3D_NAME_UNDEFINED || profile->type == VKD3D_SHADER_TYPE_PIXEL) + if (semantic == VKD3D_SHADER_SV_NONE || profile->type == VKD3D_SHADER_TYPE_PIXEL) instr.opcode = VKD3D_SM4_OP_DCL_OUTPUT; else instr.opcode = VKD3D_SM4_OP_DCL_OUTPUT_SIV; } - switch (usage) + switch (semantic) { - case D3D_NAME_COVERAGE: - case D3D_NAME_DEPTH: - case D3D_NAME_DEPTH_GREATER_EQUAL: - case D3D_NAME_DEPTH_LESS_EQUAL: - case D3D_NAME_TARGET: - case D3D_NAME_UNDEFINED: + case VKD3D_SHADER_SV_COVERAGE: + case VKD3D_SHADER_SV_DEPTH: + case VKD3D_SHADER_SV_DEPTH_GREATER_EQUAL: + case VKD3D_SHADER_SV_DEPTH_LESS_EQUAL: + case VKD3D_SHADER_SV_TARGET: + case VKD3D_SHADER_SV_NONE: break; default: instr.idx_count = 1; - instr.idx[0] = usage; + instr.idx[0] = semantic; break; } @@ -4577,6 +4871,17 @@ static void write_sm4_dcl_thread_group(const struct tpf_writer *tpf, const uint3 write_sm4_instruction(tpf, &instr); } +static void write_sm4_dcl_global_flags(const struct tpf_writer *tpf, uint32_t flags) +{ + struct sm4_instruction instr = + { + .opcode = VKD3D_SM4_OP_DCL_GLOBAL_FLAGS, + .extra_bits = flags << VKD3D_SM4_GLOBAL_FLAGS_SHIFT, + }; + + write_sm4_instruction(tpf, &instr); +} + static void write_sm4_ret(const struct tpf_writer *tpf) { struct sm4_instruction instr = @@ -5578,6 +5883,23 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex write_sm4_ternary_op(tpf, VKD3D_SM4_OP_MOVC, &expr->node, arg1, arg2, arg3); break; + case HLSL_OP3_MAD: + switch (dst_type->e.numeric.type) + { + case HLSL_TYPE_FLOAT: + write_sm4_ternary_op(tpf, VKD3D_SM4_OP_MAD, &expr->node, arg1, arg2, arg3); + break; + + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: + write_sm4_ternary_op(tpf, VKD3D_SM4_OP_IMAD, &expr->node, arg1, arg2, arg3); + break; + + default: + hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s negation expression.", dst_type_string->buffer); + } + break; + default: hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s expression.", debug_hlsl_expr_op(expr->op)); } @@ -5998,8 +6320,8 @@ static void write_sm4_block(const struct tpf_writer *tpf, const struct hlsl_bloc } } -static void write_sm4_shdr(struct hlsl_ctx *ctx, - const struct hlsl_ir_function_decl *entry_func, struct dxbc_writer *dxbc) +static void write_sm4_shdr(struct hlsl_ctx *ctx, const struct hlsl_ir_function_decl *entry_func, + struct sm4_stat *stat, struct dxbc_writer *dxbc) { const struct hlsl_profile_info *profile = ctx->profile; struct vkd3d_bytecode_buffer buffer = {0}; @@ -6024,7 +6346,7 @@ static void write_sm4_shdr(struct hlsl_ctx *ctx, VKD3D_SM4_LIB, }; - tpf_writer_init(&tpf, ctx, &buffer); + tpf_writer_init(&tpf, ctx, stat, &buffer); extern_resources = sm4_get_extern_resources(ctx, &extern_resources_count); @@ -6049,6 +6371,9 @@ static void write_sm4_shdr(struct hlsl_ctx *ctx, write_sm4_dcl_textures(&tpf, resource, true); } + if (entry_func->early_depth_test && profile->major_version >= 5) + write_sm4_dcl_global_flags(&tpf, VKD3DSGF_FORCE_EARLY_DEPTH_STENCIL); + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) { if ((var->is_input_semantic && var->last_read) || (var->is_output_semantic && var->first_write)) @@ -6110,14 +6435,64 @@ static void write_sm4_sfi0(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) /* FIXME: We also emit code that should require UAVS_AT_EVERY_STAGE, * STENCIL_REF, and TYPED_UAV_LOAD_ADDITIONAL_FORMATS. */ - if (flags) + if (*flags) dxbc_writer_add_section(dxbc, TAG_SFI0, flags, sizeof(*flags)); else vkd3d_free(flags); } +static void write_sm4_stat(struct hlsl_ctx *ctx, const struct sm4_stat *stat, struct dxbc_writer *dxbc) +{ + struct vkd3d_bytecode_buffer buffer = {0}; + + put_u32(&buffer, stat->fields[VKD3D_STAT_INSTR_COUNT]); + put_u32(&buffer, stat->fields[VKD3D_STAT_TEMPS]); + put_u32(&buffer, 0); /* Def count */ + put_u32(&buffer, 0); /* DCL count */ + put_u32(&buffer, stat->fields[VKD3D_STAT_FLOAT]); + put_u32(&buffer, stat->fields[VKD3D_STAT_INT]); + put_u32(&buffer, stat->fields[VKD3D_STAT_UINT]); + put_u32(&buffer, 0); /* Static flow control count */ + put_u32(&buffer, 0); /* Dynamic flow control count */ + put_u32(&buffer, 0); /* Macro instruction count */ + put_u32(&buffer, 0); /* Temp array count */ + put_u32(&buffer, 0); /* Array instr count */ + put_u32(&buffer, stat->fields[VKD3D_STAT_CUT]); + put_u32(&buffer, stat->fields[VKD3D_STAT_EMIT]); + put_u32(&buffer, stat->fields[VKD3D_STAT_SAMPLE]); + put_u32(&buffer, stat->fields[VKD3D_STAT_LOAD]); + put_u32(&buffer, stat->fields[VKD3D_STAT_SAMPLE_C]); + put_u32(&buffer, stat->fields[VKD3D_STAT_SAMPLE_BIAS]); + put_u32(&buffer, stat->fields[VKD3D_STAT_SAMPLE_GRAD]); + put_u32(&buffer, stat->fields[VKD3D_STAT_MOV]); + put_u32(&buffer, stat->fields[VKD3D_STAT_MOVC]); + put_u32(&buffer, stat->fields[VKD3D_STAT_CONV]); + put_u32(&buffer, stat->fields[VKD3D_STAT_BITWISE]); + put_u32(&buffer, stat->fields[VKD3D_STAT_DCL_INPUT_PRIMITIVE]); + put_u32(&buffer, stat->fields[VKD3D_STAT_DCL_OUTPUT_TOPOLOGY]); + put_u32(&buffer, stat->fields[VKD3D_STAT_DCL_VERTICES_OUT]); + put_u32(&buffer, stat->fields[VKD3D_STAT_GATHER]); + put_u32(&buffer, stat->fields[VKD3D_STAT_LOD]); + put_u32(&buffer, 0); /* Sample frequency */ + + if (hlsl_version_ge(ctx, 5, 0)) + { + put_u32(&buffer, stat->fields[VKD3D_STAT_DCL_GS_INSTANCES]); + put_u32(&buffer, stat->fields[VKD3D_STAT_TESS_CONTROL_POINT_COUNT]); + put_u32(&buffer, stat->fields[VKD3D_STAT_TESS_OUTPUT_PRIMITIVE]); + put_u32(&buffer, stat->fields[VKD3D_STAT_TESS_PARTITIONING]); + put_u32(&buffer, stat->fields[VKD3D_STAT_TESS_DOMAIN]); + put_u32(&buffer, stat->fields[VKD3D_STAT_BARRIER]); + put_u32(&buffer, stat->fields[VKD3D_STAT_ATOMIC]); + put_u32(&buffer, stat->fields[VKD3D_STAT_STORE]); + } + + add_section(ctx, dxbc, TAG_STAT, &buffer); +} + int hlsl_sm4_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, struct vkd3d_shader_code *out) { + struct sm4_stat stat = {0}; struct dxbc_writer dxbc; size_t i; int ret; @@ -6127,8 +6502,9 @@ int hlsl_sm4_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_fun write_sm4_signature(ctx, &dxbc, false); write_sm4_signature(ctx, &dxbc, true); write_sm4_rdef(ctx, &dxbc); - write_sm4_shdr(ctx, entry_func, &dxbc); + write_sm4_shdr(ctx, entry_func, &stat, &dxbc); write_sm4_sfi0(ctx, &dxbc); + write_sm4_stat(ctx, &stat, &dxbc); if (!(ret = ctx->result)) ret = dxbc_writer_write(&dxbc, out); diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c index 306c1ca0dd8..fc217860403 100644 --- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c +++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c @@ -23,6 +23,8 @@ #include #include +/* VKD3D_DEBUG_ENV_NAME("VKD3D_SHADER_DEBUG"); */ + static inline int char_to_int(char c) { if ('0' <= c && c <= '9') @@ -443,20 +445,47 @@ void set_string(struct vkd3d_bytecode_buffer *buffer, size_t offset, const char bytecode_set_bytes(buffer, offset, string, length); } -static void vkd3d_shader_dump_blob(const char *path, const char *profile, - const char *suffix, const void *data, size_t size) +struct shader_dump_data +{ + uint8_t checksum[16]; + const char *path; + const char *profile; + const char *source_suffix; + const char *target_suffix; +}; + +static void vkd3d_shader_dump_shader(const struct shader_dump_data *dump_data, + const void *data, size_t size, bool source) { - static unsigned int shader_id = 0; + static const char hexadecimal_digits[] = "0123456789abcdef"; + const uint8_t *checksum = dump_data->checksum; + char str_checksum[33]; + unsigned int pos = 0; char filename[1024]; - unsigned int id; + unsigned int i; FILE *f; - id = vkd3d_atomic_increment_u32(&shader_id) - 1; + if (!dump_data->path) + return; + + for (i = 0; i < ARRAY_SIZE(dump_data->checksum); ++i) + { + str_checksum[2 * i] = hexadecimal_digits[checksum[i] >> 4]; + str_checksum[2 * i + 1] = hexadecimal_digits[checksum[i] & 0xf]; + } + str_checksum[32] = '\0'; - if (profile) - snprintf(filename, ARRAY_SIZE(filename), "%s/vkd3d-shader-%u-%s.%s", path, id, profile, suffix); + pos = snprintf(filename, ARRAY_SIZE(filename), "%s/vkd3d-shader-%s", dump_data->path, str_checksum); + + if (dump_data->profile) + pos += snprintf(filename + pos, ARRAY_SIZE(filename) - pos, "-%s", dump_data->profile); + + if (source) + pos += snprintf(filename + pos, ARRAY_SIZE(filename) - pos, "-source.%s", dump_data->source_suffix); else - snprintf(filename, ARRAY_SIZE(filename), "%s/vkd3d-shader-%u.%s", path, id, suffix); + pos += snprintf(filename + pos, ARRAY_SIZE(filename) - pos, "-target.%s", dump_data->target_suffix); + + TRACE("Dumping shader to \"%s\".\n", filename); if ((f = fopen(filename, "wb"))) { if (fwrite(data, 1, size, f) != size) @@ -488,37 +517,61 @@ static const char *shader_get_source_type_suffix(enum vkd3d_shader_source_type t } } -void vkd3d_shader_dump_shader(const struct vkd3d_shader_compile_info *compile_info) +static const char *shader_get_target_type_suffix(enum vkd3d_shader_target_type type) +{ + switch (type) + { + case VKD3D_SHADER_TARGET_SPIRV_BINARY: + return "spv"; + case VKD3D_SHADER_TARGET_SPIRV_TEXT: + return "spv.s"; + case VKD3D_SHADER_TARGET_D3D_ASM: + return "d3d.s"; + case VKD3D_SHADER_TARGET_D3D_BYTECODE: + return "d3dbc"; + case VKD3D_SHADER_TARGET_DXBC_TPF: + return "dxbc"; + case VKD3D_SHADER_TARGET_GLSL: + return "glsl"; + case VKD3D_SHADER_TARGET_FX: + return "fx"; + case VKD3D_SHADER_TARGET_MSL: + return "msl"; + default: + FIXME("Unhandled target type %#x.\n", type); + return "bin"; + } +} + +static void fill_shader_dump_data(const struct vkd3d_shader_compile_info *compile_info, + struct shader_dump_data *data) { - const struct vkd3d_shader_code *shader = &compile_info->source; - const struct vkd3d_shader_hlsl_source_info *hlsl_source_info; - const struct hlsl_profile_info *profile; - const char *profile_name = NULL; static bool enabled = true; - const char *path; + + data->path = NULL; if (!enabled) return; - if (!(path = getenv("VKD3D_SHADER_DUMP_PATH"))) + if (!(data->path = getenv("VKD3D_SHADER_DUMP_PATH"))) { enabled = false; return; } + data->profile = NULL; if (compile_info->source_type == VKD3D_SHADER_SOURCE_HLSL) { - if (!(hlsl_source_info = vkd3d_find_struct(compile_info->next, HLSL_SOURCE_INFO))) - return; + const struct vkd3d_shader_hlsl_source_info *hlsl_source_info; - if (!(profile = hlsl_get_target_info(hlsl_source_info->profile))) - return; - - profile_name = profile->name; + if ((hlsl_source_info = vkd3d_find_struct(compile_info->next, HLSL_SOURCE_INFO))) + data->profile = hlsl_source_info->profile; } - vkd3d_shader_dump_blob(path, profile_name, shader_get_source_type_suffix(compile_info->source_type), - shader->code, shader->size); + vkd3d_compute_md5(compile_info->source.code, compile_info->source.size, + (uint32_t *)data->checksum, VKD3D_MD5_STANDARD); + data->source_suffix = shader_get_source_type_suffix(compile_info->source_type); + data->target_suffix = shader_get_target_type_suffix(compile_info->target_type); } static void init_scan_signature_info(const struct vkd3d_shader_compile_info *info) @@ -1497,6 +1550,7 @@ static int vsir_program_scan(struct vsir_program *program, const struct vkd3d_sh int vkd3d_shader_scan(const struct vkd3d_shader_compile_info *compile_info, char **messages) { struct vkd3d_shader_message_context message_context; + struct shader_dump_data dump_data; int ret; TRACE("compile_info %p, messages %p.\n", compile_info, messages); @@ -1511,7 +1565,8 @@ int vkd3d_shader_scan(const struct vkd3d_shader_compile_info *compile_info, char vkd3d_shader_message_context_init(&message_context, compile_info->log_level); - vkd3d_shader_dump_shader(compile_info); + fill_shader_dump_data(compile_info, &dump_data); + vkd3d_shader_dump_shader(&dump_data, compile_info->source.code, compile_info->source.size, true); if (compile_info->source_type == VKD3D_SHADER_SOURCE_HLSL) { @@ -1580,7 +1635,8 @@ int vsir_program_compile(struct vsir_program *program, uint64_t config_flags, case VKD3D_SHADER_TARGET_GLSL: if ((ret = vsir_program_scan(program, &scan_info, message_context, &scan_descriptor_info)) < 0) return ret; - ret = glsl_compile(program, config_flags, compile_info, out, message_context); + ret = glsl_compile(program, config_flags, &scan_descriptor_info, + compile_info, out, message_context); vkd3d_shader_free_scan_descriptor_info1(&scan_descriptor_info); break; @@ -1593,6 +1649,10 @@ int vsir_program_compile(struct vsir_program *program, uint64_t config_flags, vkd3d_shader_free_scan_descriptor_info1(&scan_descriptor_info); break; + case VKD3D_SHADER_TARGET_MSL: + ret = msl_compile(program, config_flags, compile_info, message_context); + break; + default: /* Validation should prevent us from reaching this. */ vkd3d_unreachable(); @@ -1620,6 +1680,7 @@ int vkd3d_shader_compile(const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *out, char **messages) { struct vkd3d_shader_message_context message_context; + struct shader_dump_data dump_data; int ret; TRACE("compile_info %p, out %p, messages %p.\n", compile_info, out, messages); @@ -1634,7 +1695,8 @@ int vkd3d_shader_compile(const struct vkd3d_shader_compile_info *compile_info, vkd3d_shader_message_context_init(&message_context, compile_info->log_level); - vkd3d_shader_dump_shader(compile_info); + fill_shader_dump_data(compile_info, &dump_data); + vkd3d_shader_dump_shader(&dump_data, compile_info->source.code, compile_info->source.size, true); if (compile_info->source_type == VKD3D_SHADER_SOURCE_HLSL) { @@ -1676,6 +1738,8 @@ int vkd3d_shader_compile(const struct vkd3d_shader_compile_info *compile_info, } } + vkd3d_shader_dump_shader(&dump_data, out->code, out->size, false); + vkd3d_shader_message_context_trace_messages(&message_context); if (!vkd3d_shader_message_context_copy_messages(&message_context, messages)) ret = VKD3D_ERROR_OUT_OF_MEMORY; @@ -1888,6 +1952,9 @@ const enum vkd3d_shader_target_type *vkd3d_shader_get_supported_target_types( VKD3D_SHADER_TARGET_D3D_ASM, #ifdef VKD3D_SHADER_UNSUPPORTED_GLSL VKD3D_SHADER_TARGET_GLSL, +#endif +#ifdef VKD3D_SHADER_UNSUPPORTED_MSL + VKD3D_SHADER_TARGET_MSL, #endif }; diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h index ef66a8ca07a..447210449da 100644 --- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h +++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h @@ -80,6 +80,7 @@ enum vkd3d_shader_error VKD3D_SHADER_ERROR_TPF_INVALID_CASE_VALUE = 1007, VKD3D_SHADER_ERROR_TPF_INVALID_REGISTER_DIMENSION = 1008, VKD3D_SHADER_ERROR_TPF_INVALID_REGISTER_SWIZZLE = 1009, + VKD3D_SHADER_ERROR_TPF_INVALID_REGISTER_DCL = 1010, VKD3D_SHADER_WARNING_TPF_MASK_NOT_CONTIGUOUS = 1300, VKD3D_SHADER_WARNING_TPF_UNHANDLED_INDEX_RANGE_MASK = 1301, @@ -152,6 +153,12 @@ enum vkd3d_shader_error VKD3D_SHADER_ERROR_HLSL_UNKNOWN_MODIFIER = 5030, VKD3D_SHADER_ERROR_HLSL_INVALID_STATE_BLOCK_ENTRY = 5031, VKD3D_SHADER_ERROR_HLSL_FAILED_FORCED_UNROLL = 5032, + VKD3D_SHADER_ERROR_HLSL_INVALID_PROFILE = 5033, + VKD3D_SHADER_ERROR_HLSL_MISPLACED_COMPILE = 5034, + VKD3D_SHADER_ERROR_HLSL_INVALID_DOMAIN = 5035, + VKD3D_SHADER_ERROR_HLSL_INVALID_CONTROL_POINT_COUNT = 5036, + VKD3D_SHADER_ERROR_HLSL_INVALID_OUTPUT_PRIMITIVE = 5037, + VKD3D_SHADER_ERROR_HLSL_INVALID_PARTITIONING = 5038, VKD3D_SHADER_WARNING_HLSL_IMPLICIT_TRUNCATION = 5300, VKD3D_SHADER_WARNING_HLSL_DIVISION_BY_ZERO = 5301, @@ -159,8 +166,10 @@ enum vkd3d_shader_error VKD3D_SHADER_WARNING_HLSL_IMAGINARY_NUMERIC_RESULT = 5303, VKD3D_SHADER_WARNING_HLSL_NON_FINITE_RESULT = 5304, VKD3D_SHADER_WARNING_HLSL_IGNORED_ATTRIBUTE = 5305, + VKD3D_SHADER_WARNING_HLSL_IGNORED_DEFAULT_VALUE = 5306, VKD3D_SHADER_ERROR_GLSL_INTERNAL = 6000, + VKD3D_SHADER_ERROR_GLSL_BINDING_NOT_FOUND = 6001, VKD3D_SHADER_ERROR_D3DBC_UNEXPECTED_EOF = 7000, VKD3D_SHADER_ERROR_D3DBC_INVALID_VERSION_TOKEN = 7001, @@ -169,6 +178,11 @@ enum vkd3d_shader_error VKD3D_SHADER_ERROR_D3DBC_OUT_OF_MEMORY = 7004, VKD3D_SHADER_ERROR_D3DBC_INVALID_REGISTER_INDEX = 7005, VKD3D_SHADER_ERROR_D3DBC_UNDECLARED_SEMANTIC = 7006, + VKD3D_SHADER_ERROR_D3DBC_INVALID_REGISTER_TYPE = 7007, + VKD3D_SHADER_ERROR_D3DBC_INVALID_REGISTER_COUNT = 7008, + VKD3D_SHADER_ERROR_D3DBC_NOT_IMPLEMENTED = 7009, + VKD3D_SHADER_ERROR_D3DBC_INVALID_PROFILE = 7010, + VKD3D_SHADER_ERROR_D3DBC_INVALID_WRITEMASK = 7011, VKD3D_SHADER_WARNING_D3DBC_IGNORED_INSTRUCTION_FLAGS= 7300, @@ -227,6 +241,8 @@ enum vkd3d_shader_error VKD3D_SHADER_ERROR_VSIR_INVALID_GS = 9019, VKD3D_SHADER_WARNING_VSIR_DYNAMIC_DESCRIPTOR_ARRAY = 9300, + + VKD3D_SHADER_ERROR_MSL_INTERNAL = 10000, }; enum vkd3d_shader_opcode @@ -1344,8 +1360,6 @@ bool shader_instruction_array_add_icb(struct vkd3d_shader_instruction_array *ins struct vkd3d_shader_immediate_constant_buffer *icb); bool shader_instruction_array_clone_instruction(struct vkd3d_shader_instruction_array *instructions, unsigned int dst, unsigned int src); -struct vkd3d_shader_src_param *instruction_array_create_outpointid_param( - struct vkd3d_shader_instruction_array *instructions); void shader_instruction_array_destroy(struct vkd3d_shader_instruction_array *instructions); enum vkd3d_shader_config_flags @@ -1353,6 +1367,12 @@ enum vkd3d_shader_config_flags VKD3D_SHADER_CONFIG_FLAG_FORCE_VALIDATION = 0x00000001, }; +enum vsir_control_flow_type +{ + VSIR_CF_STRUCTURED, + VSIR_CF_BLOCKS, +}; + struct vsir_program { struct vkd3d_shader_version shader_version; @@ -1372,6 +1392,7 @@ struct vsir_program unsigned int temp_count; unsigned int ssa_count; bool use_vocp; + enum vsir_control_flow_type cf_type; const char **block_names; size_t block_name_count; @@ -1384,11 +1405,16 @@ int vsir_program_compile(struct vsir_program *program, uint64_t config_flags, const struct vkd3d_shader_parameter1 *vsir_program_get_parameter( const struct vsir_program *program, enum vkd3d_shader_parameter_name name); bool vsir_program_init(struct vsir_program *program, const struct vkd3d_shader_compile_info *compile_info, - const struct vkd3d_shader_version *version, unsigned int reserve); -enum vkd3d_result vsir_program_normalise(struct vsir_program *program, uint64_t config_flags, + const struct vkd3d_shader_version *version, unsigned int reserve, enum vsir_control_flow_type cf_type); +enum vkd3d_result vsir_program_transform(struct vsir_program *program, uint64_t config_flags, const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context); enum vkd3d_result vsir_program_validate(struct vsir_program *program, uint64_t config_flags, const char *source_name, struct vkd3d_shader_message_context *message_context); +struct vkd3d_shader_src_param *vsir_program_create_outpointid_param( + struct vsir_program *program); +bool vsir_instruction_init_with_params(struct vsir_program *program, + struct vkd3d_shader_instruction *ins, const struct vkd3d_shader_location *location, + enum vkd3d_shader_opcode opcode, unsigned int dst_count, unsigned int src_count); static inline struct vkd3d_shader_dst_param *vsir_program_get_dst_params( struct vsir_program *program, unsigned int count) @@ -1549,7 +1575,6 @@ void vkd3d_shader_warning(struct vkd3d_shader_message_context *context, const st void vkd3d_shader_vwarning(struct vkd3d_shader_message_context *context, const struct vkd3d_shader_location *location, enum vkd3d_shader_error error, const char *format, va_list args); -void vkd3d_shader_dump_shader(const struct vkd3d_shader_compile_info *compile_info); uint64_t vkd3d_shader_init_config_flags(void); void vkd3d_shader_trace_text_(const char *text, size_t size, const char *function); #define vkd3d_shader_trace_text(text, size) \ @@ -1570,8 +1595,9 @@ int shader_parse_input_signature(const struct vkd3d_shader_code *dxbc, struct vkd3d_shader_message_context *message_context, struct shader_signature *signature); int glsl_compile(struct vsir_program *program, uint64_t config_flags, - const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *out, - struct vkd3d_shader_message_context *message_context); + const struct vkd3d_shader_scan_descriptor_info1 *descriptor_info, + const struct vkd3d_shader_compile_info *compile_info, + struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context); #define SPIRV_MAX_SRC_COUNT 6 @@ -1580,7 +1606,16 @@ int spirv_compile(struct vsir_program *program, uint64_t config_flags, const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context); -void vkd3d_compute_dxbc_checksum(const void *dxbc, size_t size, uint32_t checksum[4]); +int msl_compile(struct vsir_program *program, uint64_t config_flags, + const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context); + +enum vkd3d_md5_variant +{ + VKD3D_MD5_STANDARD, + VKD3D_MD5_DXBC, +}; + +void vkd3d_compute_md5(const void *dxbc, size_t size, uint32_t checksum[4], enum vkd3d_md5_variant variant); int preproc_lexer_parse(const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context); @@ -1853,7 +1888,7 @@ static inline void *vkd3d_find_struct_(const struct vkd3d_struct *chain, #define VKD3D_DXBC_HEADER_SIZE (8 * sizeof(uint32_t)) #define VKD3D_DXBC_CHUNK_ALIGNMENT sizeof(uint32_t) -#define DXBC_MAX_SECTION_COUNT 5 +#define DXBC_MAX_SECTION_COUNT 6 struct dxbc_writer { diff --git a/libs/vkd3d/libs/vkd3d/command.c b/libs/vkd3d/libs/vkd3d/command.c index dcc7690876f..eab0436bebd 100644 --- a/libs/vkd3d/libs/vkd3d/command.c +++ b/libs/vkd3d/libs/vkd3d/command.c @@ -19,6 +19,7 @@ */ #include "vkd3d_private.h" +#include static void d3d12_fence_incref(struct d3d12_fence *fence); static void d3d12_fence_decref(struct d3d12_fence *fence); @@ -2451,6 +2452,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_list_Close(ID3D12GraphicsCommandL } list->is_recording = false; + list->has_depth_bounds = false; if (!list->is_valid) { @@ -2479,7 +2481,7 @@ static void d3d12_command_list_reset_state(struct d3d12_command_list *list, list->fb_layer_count = 0; list->xfb_enabled = false; - + list->has_depth_bounds = false; list->is_predicated = false; list->current_framebuffer = VK_NULL_HANDLE; @@ -3078,7 +3080,7 @@ done: vkd3d_free(vk_descriptor_writes); } -static void d3d12_command_list_update_descriptors(struct d3d12_command_list *list, +static void d3d12_command_list_update_virtual_descriptors(struct d3d12_command_list *list, enum vkd3d_pipeline_bind_point bind_point) { struct vkd3d_pipeline_bindings *bindings = &list->pipeline_bindings[bind_point]; @@ -3210,6 +3212,9 @@ static void command_list_flush_vk_heap_updates(struct d3d12_command_list *list) static void command_list_add_descriptor_heap(struct d3d12_command_list *list, struct d3d12_descriptor_heap *heap) { + if (!list->device->use_vk_heaps) + return; + if (!contains_heap(list->descriptor_heaps, list->descriptor_heap_count, heap)) { if (list->descriptor_heap_count == ARRAY_SIZE(list->descriptor_heaps)) @@ -3296,6 +3301,15 @@ static void d3d12_command_list_update_heap_descriptors(struct d3d12_command_list d3d12_command_list_bind_descriptor_heap(list, bind_point, sampler_heap); } +static void d3d12_command_list_update_descriptors(struct d3d12_command_list *list, + enum vkd3d_pipeline_bind_point bind_point) +{ + if (list->device->use_vk_heaps) + d3d12_command_list_update_heap_descriptors(list, bind_point); + else + d3d12_command_list_update_virtual_descriptors(list, bind_point); +} + static bool d3d12_command_list_update_compute_state(struct d3d12_command_list *list) { d3d12_command_list_end_current_render_pass(list); @@ -3303,7 +3317,7 @@ static bool d3d12_command_list_update_compute_state(struct d3d12_command_list *l if (!d3d12_command_list_update_compute_pipeline(list)) return false; - list->update_descriptors(list, VKD3D_PIPELINE_BIND_POINT_COMPUTE); + d3d12_command_list_update_descriptors(list, VKD3D_PIPELINE_BIND_POINT_COMPUTE); return true; } @@ -3320,7 +3334,7 @@ static bool d3d12_command_list_begin_render_pass(struct d3d12_command_list *list if (!d3d12_command_list_update_current_framebuffer(list)) return false; - list->update_descriptors(list, VKD3D_PIPELINE_BIND_POINT_GRAPHICS); + d3d12_command_list_update_descriptors(list, VKD3D_PIPELINE_BIND_POINT_GRAPHICS); if (list->current_render_pass != VK_NULL_HANDLE) return true; @@ -3351,6 +3365,12 @@ static bool d3d12_command_list_begin_render_pass(struct d3d12_command_list *list list->xfb_enabled = true; } + if (graphics->ds_desc.depthBoundsTestEnable && !list->has_depth_bounds) + { + list->has_depth_bounds = true; + VK_CALL(vkCmdSetDepthBounds(list->vk_command_buffer, 0.0f, 1.0f)); + } + return true; } @@ -5939,7 +5959,25 @@ static void STDMETHODCALLTYPE d3d12_command_list_AtomicCopyBufferUINT64(ID3D12Gr static void STDMETHODCALLTYPE d3d12_command_list_OMSetDepthBounds(ID3D12GraphicsCommandList6 *iface, FLOAT min, FLOAT max) { - FIXME("iface %p, min %.8e, max %.8e stub!\n", iface, min, max); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); + const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs; + + TRACE("iface %p, min %.8e, max %.8e.\n", iface, min, max); + + if (isnan(max)) + max = 0.0f; + if (isnan(min)) + min = 0.0f; + + if (!list->device->vk_info.EXT_depth_range_unrestricted && (min < 0.0f || min > 1.0f || max < 0.0f || max > 1.0f)) + { + WARN("VK_EXT_depth_range_unrestricted was not found, clamping depth bounds to 0.0 and 1.0.\n"); + max = vkd3d_clamp(max, 0.0f, 1.0f); + min = vkd3d_clamp(min, 0.0f, 1.0f); + } + + list->has_depth_bounds = true; + VK_CALL(vkCmdSetDepthBounds(list->vk_command_buffer, min, max)); } static void STDMETHODCALLTYPE d3d12_command_list_SetSamplePositions(ID3D12GraphicsCommandList6 *iface, @@ -6189,8 +6227,6 @@ static HRESULT d3d12_command_list_init(struct d3d12_command_list *list, struct d list->allocator = allocator; - list->update_descriptors = device->use_vk_heaps ? d3d12_command_list_update_heap_descriptors - : d3d12_command_list_update_descriptors; list->descriptor_heap_count = 0; if (SUCCEEDED(hr = d3d12_command_allocator_allocate_command_buffer(allocator, list))) diff --git a/libs/vkd3d/libs/vkd3d/device.c b/libs/vkd3d/libs/vkd3d/device.c index 01841c89692..65339c7ba5d 100644 --- a/libs/vkd3d/libs/vkd3d/device.c +++ b/libs/vkd3d/libs/vkd3d/device.c @@ -102,6 +102,7 @@ static const struct vkd3d_optional_extension_info optional_device_extensions[] = VK_EXTENSION(EXT_CALIBRATED_TIMESTAMPS, EXT_calibrated_timestamps), VK_EXTENSION(EXT_CONDITIONAL_RENDERING, EXT_conditional_rendering), VK_DEBUG_EXTENSION(EXT_DEBUG_MARKER, EXT_debug_marker), + VK_EXTENSION(EXT_DEPTH_RANGE_UNRESTRICTED, EXT_depth_range_unrestricted), VK_EXTENSION(EXT_DEPTH_CLIP_ENABLE, EXT_depth_clip_enable), VK_EXTENSION(EXT_DESCRIPTOR_INDEXING, EXT_descriptor_indexing), VK_EXTENSION(EXT_FRAGMENT_SHADER_INTERLOCK, EXT_fragment_shader_interlock), diff --git a/libs/vkd3d/libs/vkd3d/state.c b/libs/vkd3d/libs/vkd3d/state.c index 682d488faa8..ea7d8f040b5 100644 --- a/libs/vkd3d/libs/vkd3d/state.c +++ b/libs/vkd3d/libs/vkd3d/state.c @@ -738,7 +738,7 @@ static bool vkd3d_validate_descriptor_set_count(struct d3d12_device *device, uns if (set_count > max_count) { /* NOTE: If maxBoundDescriptorSets is < 9, try VKD3D_CONFIG=virtual_heaps */ - ERR("Required descriptor set count exceeds maximum allowed count of %u.\n", max_count); + WARN("Required descriptor set count exceeds maximum allowed count of %u.\n", max_count); return false; } @@ -3867,6 +3867,7 @@ VkPipeline d3d12_pipeline_state_get_or_create_pipeline(struct d3d12_pipeline_sta VK_DYNAMIC_STATE_SCISSOR, VK_DYNAMIC_STATE_BLEND_CONSTANTS, VK_DYNAMIC_STATE_STENCIL_REFERENCE, + VK_DYNAMIC_STATE_DEPTH_BOUNDS, }; static const VkPipelineDynamicStateCreateInfo dynamic_desc = { diff --git a/libs/vkd3d/libs/vkd3d/utils.c b/libs/vkd3d/libs/vkd3d/utils.c index 831dc07af56..839bb173854 100644 --- a/libs/vkd3d/libs/vkd3d/utils.c +++ b/libs/vkd3d/libs/vkd3d/utils.c @@ -703,7 +703,7 @@ const char *debug_vk_extent_3d(VkExtent3D extent) const char *debug_vk_queue_flags(VkQueueFlags flags) { - char buffer[159]; + char buffer[191]; buffer[0] = '\0'; #define FLAG_TO_STR(f) if (flags & f) { strcat(buffer, " | "#f); flags &= ~f; } @@ -715,6 +715,7 @@ const char *debug_vk_queue_flags(VkQueueFlags flags) #undef FLAG_TO_STR #define FLAG_TO_STR(f, n) if (flags & f) { strcat(buffer, " | "#n); flags &= ~f; } FLAG_TO_STR(0x20, VK_QUEUE_VIDEO_DECODE_BIT_KHR) + FLAG_TO_STR(0x40, VK_QUEUE_VIDEO_ENCODE_BIT_KHR) #undef FLAG_TO_STR if (flags) FIXME("Unrecognized flag(s) %#x.\n", flags); diff --git a/libs/vkd3d/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/libs/vkd3d/vkd3d_private.h index ba4e2e8488d..e6d477a5c12 100644 --- a/libs/vkd3d/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/libs/vkd3d/vkd3d_private.h @@ -131,6 +131,7 @@ struct vkd3d_vulkan_info bool EXT_calibrated_timestamps; bool EXT_conditional_rendering; bool EXT_debug_marker; + bool EXT_depth_range_unrestricted; bool EXT_depth_clip_enable; bool EXT_descriptor_indexing; bool EXT_fragment_shader_interlock; @@ -1254,7 +1255,7 @@ struct d3d12_command_list VkFormat dsv_format; bool xfb_enabled; - + bool has_depth_bounds; bool is_predicated; VkFramebuffer current_framebuffer; @@ -1271,7 +1272,6 @@ struct d3d12_command_list VkBuffer so_counter_buffers[D3D12_SO_BUFFER_SLOT_COUNT]; VkDeviceSize so_counter_buffer_offsets[D3D12_SO_BUFFER_SLOT_COUNT]; - void (*update_descriptors)(struct d3d12_command_list *list, enum vkd3d_pipeline_bind_point bind_point); struct d3d12_descriptor_heap *descriptor_heaps[64]; unsigned int descriptor_heap_count; -- 2.45.2