From caa0d7f0cafae8a67fb79dd7426839126285a092 Mon Sep 17 00:00:00 2001 From: Alistair Leslie-Hughes Date: Fri, 29 Nov 2024 07:14:57 +1100 Subject: [PATCH] Updated vkd3d to 9619582d1b6a54720e17a148a72b446fda2fd41f. --- libs/vkd3d/include/private/vkd3d_common.h | 2 +- libs/vkd3d/include/vkd3d_shader.h | 25 + libs/vkd3d/libs/vkd3d-common/blob.c | 1 + libs/vkd3d/libs/vkd3d-shader/d3dbc.c | 2 + libs/vkd3d/libs/vkd3d-shader/dxil.c | 29 +- libs/vkd3d/libs/vkd3d-shader/fx.c | 151 ++- libs/vkd3d/libs/vkd3d-shader/hlsl.c | 69 +- libs/vkd3d/libs/vkd3d-shader/hlsl.h | 38 +- libs/vkd3d/libs/vkd3d-shader/hlsl.l | 3 + libs/vkd3d/libs/vkd3d-shader/hlsl.y | 26 +- libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 1164 ++++++++++++++--- libs/vkd3d/libs/vkd3d-shader/ir.c | 519 +++++++- libs/vkd3d/libs/vkd3d-shader/msl.c | 237 +++- libs/vkd3d/libs/vkd3d-shader/preproc.l | 1 + libs/vkd3d/libs/vkd3d-shader/spirv.c | 80 +- libs/vkd3d/libs/vkd3d-shader/tpf.c | 1090 +-------------- .../libs/vkd3d-shader/vkd3d_shader_main.c | 20 + .../libs/vkd3d-shader/vkd3d_shader_private.h | 1 + 18 files changed, 2038 insertions(+), 1420 deletions(-) diff --git a/libs/vkd3d/include/private/vkd3d_common.h b/libs/vkd3d/include/private/vkd3d_common.h index ec1dd70c9b2..fd62730f948 100644 --- a/libs/vkd3d/include/private/vkd3d_common.h +++ b/libs/vkd3d/include/private/vkd3d_common.h @@ -275,7 +275,7 @@ static inline unsigned int vkd3d_popcount(unsigned int v) { #ifdef _MSC_VER return __popcnt(v); -#elif defined(__MINGW32__) +#elif defined(HAVE_BUILTIN_POPCOUNT) return __builtin_popcount(v); #else v -= (v >> 1) & 0x55555555; diff --git a/libs/vkd3d/include/vkd3d_shader.h b/libs/vkd3d/include/vkd3d_shader.h index f95caa2f825..cb561d7f079 100644 --- a/libs/vkd3d/include/vkd3d_shader.h +++ b/libs/vkd3d/include/vkd3d_shader.h @@ -112,6 +112,11 @@ enum vkd3d_shader_structure_type * \since 1.13 */ VKD3D_SHADER_STRUCTURE_TYPE_PARAMETER_INFO, + /** + * The structure is a vkd3d_shader_scan_hull_shader_tessellation_info structure. + * \since 1.15 + */ + VKD3D_SHADER_STRUCTURE_TYPE_SCAN_HULL_SHADER_TESSELLATION_INFO, VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_STRUCTURE_TYPE), }; @@ -2040,6 +2045,26 @@ struct vkd3d_shader_scan_combined_resource_sampler_info unsigned int combined_sampler_count; }; +/** + * A chained structure describing the tessellation information in a hull shader. + * + * This structure extends vkd3d_shader_compile_info. + * + * \since 1.15 + */ +struct vkd3d_shader_scan_hull_shader_tessellation_info +{ + /** Must be set to VKD3D_SHADER_STRUCTURE_TYPE_SCAN_HULL_SHADER_TESSELLATION_INFO. */ + enum vkd3d_shader_structure_type type; + /** Optional pointer to a structure containing further parameters. */ + const void *next; + + /** The tessellation output primitive. */ + enum vkd3d_shader_tessellator_output_primitive output_primitive; + /** The tessellation partitioning mode. */ + enum vkd3d_shader_tessellator_partitioning partitioning; +}; + /** * Data type of a shader varying, returned as part of struct * vkd3d_shader_signature_element. diff --git a/libs/vkd3d/libs/vkd3d-common/blob.c b/libs/vkd3d/libs/vkd3d-common/blob.c index f60ef7db769..c2c6ad67804 100644 --- a/libs/vkd3d/libs/vkd3d-common/blob.c +++ b/libs/vkd3d/libs/vkd3d-common/blob.c @@ -20,6 +20,7 @@ #define WIDL_C_INLINE_WRAPPERS #endif #define COBJMACROS + #define CONST_VTABLE #include "vkd3d.h" #include "vkd3d_blob.h" diff --git a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c index 9e2eacbcfa6..bda9bc72f56 100644 --- a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c +++ b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c @@ -1565,6 +1565,7 @@ D3DXPARAMETER_CLASS hlsl_sm1_class(const struct hlsl_type *type) case HLSL_CLASS_HULL_SHADER: case HLSL_CLASS_GEOMETRY_SHADER: case HLSL_CLASS_BLEND_STATE: + case HLSL_CLASS_STREAM_OUTPUT: case HLSL_CLASS_NULL: break; } @@ -1671,6 +1672,7 @@ D3DXPARAMETER_TYPE hlsl_sm1_base_type(const struct hlsl_type *type) case HLSL_CLASS_HULL_SHADER: case HLSL_CLASS_GEOMETRY_SHADER: case HLSL_CLASS_BLEND_STATE: + case HLSL_CLASS_STREAM_OUTPUT: case HLSL_CLASS_NULL: break; } diff --git a/libs/vkd3d/libs/vkd3d-shader/dxil.c b/libs/vkd3d/libs/vkd3d-shader/dxil.c index 7099bcc9ce2..71f3c7f17b0 100644 --- a/libs/vkd3d/libs/vkd3d-shader/dxil.c +++ b/libs/vkd3d/libs/vkd3d-shader/dxil.c @@ -3824,7 +3824,7 @@ static void src_params_init_from_operands(struct vkd3d_shader_src_param *src_par } static enum vkd3d_shader_register_type register_type_from_dxil_semantic_kind( - enum vkd3d_shader_sysval_semantic sysval_semantic) + enum vkd3d_shader_sysval_semantic sysval_semantic, bool is_input) { switch (sysval_semantic) { @@ -3834,7 +3834,7 @@ static enum vkd3d_shader_register_type register_type_from_dxil_semantic_kind( case VKD3D_SHADER_SV_SAMPLE_INDEX: return VKD3DSPR_NULL; case VKD3D_SHADER_SV_COVERAGE: - return VKD3DSPR_COVERAGE; + return is_input ? VKD3DSPR_COVERAGE : VKD3DSPR_SAMPLEMASK; case VKD3D_SHADER_SV_DEPTH: return VKD3DSPR_DEPTHOUT; case VKD3D_SHADER_SV_DEPTH_GREATER_EQUAL: @@ -3884,7 +3884,7 @@ static void sm6_parser_init_signature(struct sm6_parser *sm6, const struct shade param = ¶ms[i]; if (e->register_index == UINT_MAX - && (io_reg_type = register_type_from_dxil_semantic_kind(e->sysval_semantic)) != VKD3DSPR_NULL) + && (io_reg_type = register_type_from_dxil_semantic_kind(e->sysval_semantic, is_input)) != VKD3DSPR_NULL) { dst_param_io_init(param, e, io_reg_type); continue; @@ -9348,7 +9348,7 @@ static void signature_element_read_additional_element_values(struct signature_el } static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const struct sm6_metadata_value *m, - struct shader_signature *s, enum vkd3d_tessellator_domain tessellator_domain) + struct shader_signature *s, enum vkd3d_tessellator_domain tessellator_domain, bool is_input) { unsigned int i, j, column_count, operand_count, index; const struct sm6_metadata_node *node, *element_node; @@ -9466,7 +9466,7 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const if ((is_register = e->register_index == UINT_MAX)) { - if (register_type_from_dxil_semantic_kind(e->sysval_semantic) == VKD3DSPR_INVALID) + if (register_type_from_dxil_semantic_kind(e->sysval_semantic, is_input) == VKD3DSPR_INVALID) { WARN("Unhandled I/O register semantic kind %u.\n", j); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_SIGNATURE, @@ -9578,17 +9578,17 @@ static enum vkd3d_result sm6_parser_signatures_init(struct sm6_parser *sm6, cons } if (m->u.node->operand_count && (ret = sm6_parser_read_signature(sm6, m->u.node->operands[0], - &program->input_signature, tessellator_domain)) < 0) + &program->input_signature, tessellator_domain, true)) < 0) { return ret; } if (m->u.node->operand_count > 1 && (ret = sm6_parser_read_signature(sm6, m->u.node->operands[1], - &program->output_signature, tessellator_domain)) < 0) + &program->output_signature, tessellator_domain, false)) < 0) { return ret; } if (m->u.node->operand_count > 1 && (ret = sm6_parser_read_signature(sm6, m->u.node->operands[2], - &program->patch_constant_signature, tessellator_domain)) < 0) + &program->patch_constant_signature, tessellator_domain, false)) < 0) { return ret; } @@ -9717,12 +9717,13 @@ static void sm6_parser_emit_dcl_tessellator_domain(struct sm6_parser *sm6, ins = sm6_parser_add_instruction(sm6, VKD3DSIH_DCL_TESSELLATOR_DOMAIN); ins->declaration.tessellator_domain = tessellator_domain; + sm6->p.program->tess_domain = tessellator_domain; } -static void sm6_parser_validate_control_point_count(struct sm6_parser *sm6, unsigned int count, - const char *type) +static void sm6_parser_validate_control_point_count(struct sm6_parser *sm6, + unsigned int count, bool allow_zero, const char *type) { - if (!count || count > 32) + if ((!count && !allow_zero) || count > 32) { WARN("%s control point count %u invalid.\n", type, count); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, @@ -9951,7 +9952,7 @@ static enum vkd3d_tessellator_domain sm6_parser_ds_properties_init(struct sm6_pa } sm6_parser_emit_dcl_tessellator_domain(sm6, operands[0]); - sm6_parser_validate_control_point_count(sm6, operands[1], "Domain shader input"); + sm6_parser_validate_control_point_count(sm6, operands[1], true, "Domain shader input"); sm6->p.program->input_control_point_count = operands[1]; return operands[0]; @@ -10010,9 +10011,9 @@ static enum vkd3d_tessellator_domain sm6_parser_hs_properties_init(struct sm6_pa } } - sm6_parser_validate_control_point_count(sm6, operands[1], "Hull shader input"); + sm6_parser_validate_control_point_count(sm6, operands[1], false, "Hull shader input"); program->input_control_point_count = operands[1]; - sm6_parser_validate_control_point_count(sm6, operands[2], "Hull shader output"); + sm6_parser_validate_control_point_count(sm6, operands[2], false, "Hull shader output"); sm6_parser_emit_dcl_count(sm6, VKD3DSIH_DCL_OUTPUT_CONTROL_POINT_COUNT, operands[2]); program->output_control_point_count = operands[2]; sm6_parser_emit_dcl_tessellator_domain(sm6, operands[3]); diff --git a/libs/vkd3d/libs/vkd3d-shader/fx.c b/libs/vkd3d/libs/vkd3d-shader/fx.c index e22177e1e30..064e15c4b60 100644 --- a/libs/vkd3d/libs/vkd3d-shader/fx.c +++ b/libs/vkd3d/libs/vkd3d-shader/fx.c @@ -36,6 +36,16 @@ struct fx_4_binary_type uint32_t typeinfo; }; +struct fx_5_shader +{ + uint32_t offset; + uint32_t sodecl[4]; + uint32_t sodecl_count; + uint32_t rast_stream; + uint32_t iface_bindings_count; + uint32_t iface_bindings; +}; + struct string_entry { struct rb_entry entry; @@ -550,6 +560,8 @@ enum fx_4_type_constants FX_4_ASSIGNMENT_VARIABLE = 0x2, FX_4_ASSIGNMENT_ARRAY_CONSTANT_INDEX = 0x3, FX_4_ASSIGNMENT_ARRAY_VARIABLE_INDEX = 0x4, + FX_4_ASSIGNMENT_INLINE_SHADER = 0x7, + FX_5_ASSIGNMENT_INLINE_SHADER = 0x8, }; static const uint32_t fx_4_numeric_base_types[] = @@ -762,6 +774,7 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co case HLSL_CLASS_TECHNIQUE: case HLSL_CLASS_CONSTANT_BUFFER: case HLSL_CLASS_NULL: + case HLSL_CLASS_STREAM_OUTPUT: vkd3d_unreachable(); case HLSL_CLASS_VOID: @@ -1298,6 +1311,7 @@ static bool is_type_supported_fx_2(struct hlsl_ctx *ctx, const struct hlsl_type case HLSL_CLASS_TECHNIQUE: case HLSL_CLASS_CONSTANT_BUFFER: case HLSL_CLASS_NULL: + case HLSL_CLASS_STREAM_OUTPUT: /* This cannot appear as an extern variable. */ break; } @@ -1834,6 +1848,7 @@ enum state_property_component_type FX_BLEND, FX_VERTEXSHADER, FX_PIXELSHADER, + FX_GEOMETRYSHADER, FX_COMPONENT_TYPE_COUNT, }; @@ -2065,6 +2080,7 @@ fx_4_states[] = { "VertexShader", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_VERTEXSHADER, 1, 1, 6 }, { "PixelShader", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_PIXELSHADER, 1, 1, 7 }, + { "GeometryShader", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_GEOMETRYSHADER, 1, 1, 8 }, { "DS_StencilRef", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 9 }, { "AB_BlendFactor", HLSL_CLASS_PASS, HLSL_CLASS_VECTOR, FX_FLOAT, 4, 1, 10 }, { "AB_SampleMask", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 11 }, @@ -2951,7 +2967,7 @@ static void VKD3D_PRINTF_FUNC(3, 4) fx_parser_error(struct fx_parser *parser, en static int fx_2_parse(struct fx_parser *parser) { - fx_parser_error(parser, VKD3D_SHADER_ERROR_FX_NOT_IMPLEMENTED, "Parsing fx_2_0 binaries is not implemented.\n"); + fx_parser_error(parser, VKD3D_SHADER_ERROR_FX_NOT_IMPLEMENTED, "Parsing fx_2_0 binaries is not implemented."); return -1; } @@ -3120,7 +3136,7 @@ static void fx_parse_fx_4_annotations(struct fx_parser *parser) else { fx_parser_error(parser, VKD3D_SHADER_ERROR_FX_INVALID_DATA, - "Only numeric and string types are supported in annotations.\n"); + "Only numeric and string types are supported in annotations."); } if (type.element_count) @@ -3210,27 +3226,13 @@ static void fx_parse_buffers(struct fx_parser *parser) } } -static void fx_4_parse_shader_initializer(struct fx_parser *parser, unsigned int object_type) +static void fx_4_parse_shader_blob(struct fx_parser *parser, unsigned int object_type, const struct fx_5_shader *shader) { struct vkd3d_shader_compile_info info = { 0 }; struct vkd3d_shader_code output; - uint32_t data_size, offset; const void *data = NULL; const char *p, *q, *end; - struct fx_5_shader - { - uint32_t offset; - uint32_t sodecl[4]; - uint32_t sodecl_count; - uint32_t rast_stream; - uint32_t iface_bindings_count; - uint32_t iface_bindings; - } shader5; - struct fx_4_gs_so - { - uint32_t offset; - uint32_t sodecl; - } gs_so; + uint32_t data_size; int ret; static const struct vkd3d_shader_compile_option options[] = @@ -3238,35 +3240,9 @@ static void fx_4_parse_shader_initializer(struct fx_parser *parser, unsigned int {VKD3D_SHADER_COMPILE_OPTION_API_VERSION, VKD3D_SHADER_API_VERSION_1_14}, }; - switch (object_type) - { - case FX_4_OBJECT_TYPE_PIXEL_SHADER: - case FX_4_OBJECT_TYPE_VERTEX_SHADER: - case FX_4_OBJECT_TYPE_GEOMETRY_SHADER: - offset = fx_parser_read_u32(parser); - break; - - case FX_4_OBJECT_TYPE_GEOMETRY_SHADER_SO: - fx_parser_read_u32s(parser, &gs_so, sizeof(gs_so)); - offset = gs_so.offset; - break; - - case FX_5_OBJECT_TYPE_GEOMETRY_SHADER: - case FX_5_OBJECT_TYPE_COMPUTE_SHADER: - case FX_5_OBJECT_TYPE_HULL_SHADER: - case FX_5_OBJECT_TYPE_DOMAIN_SHADER: - fx_parser_read_u32s(parser, &shader5, sizeof(shader5)); - offset = shader5.offset; - break; - - default: - parser->failed = true; - return; - } - - fx_parser_read_unstructured(parser, &data_size, offset, sizeof(data_size)); + fx_parser_read_unstructured(parser, &data_size, shader->offset, sizeof(data_size)); if (data_size) - data = fx_parser_get_unstructured_ptr(parser, offset + 4, data_size); + data = fx_parser_get_unstructured_ptr(parser, shader->offset + 4, data_size); if (!data) return; @@ -3283,7 +3259,7 @@ static void fx_4_parse_shader_initializer(struct fx_parser *parser, unsigned int if ((ret = vkd3d_shader_compile(&info, &output, NULL)) < 0) { fx_parser_error(parser, VKD3D_SHADER_ERROR_FX_INVALID_DATA, - "Failed to disassemble shader blob.\n"); + "Failed to disassemble shader blob."); return; } parse_fx_print_indent(parser); @@ -3307,26 +3283,58 @@ static void fx_4_parse_shader_initializer(struct fx_parser *parser, unsigned int parse_fx_print_indent(parser); vkd3d_string_buffer_printf(&parser->buffer, "}"); - if (object_type == FX_4_OBJECT_TYPE_GEOMETRY_SHADER_SO && gs_so.sodecl) + if (object_type == FX_4_OBJECT_TYPE_GEOMETRY_SHADER_SO && shader->sodecl[0]) { vkd3d_string_buffer_printf(&parser->buffer, "\n/* Stream output declaration: \"%s\" */", - fx_4_get_string(parser, gs_so.sodecl)); + fx_4_get_string(parser, shader->sodecl[0])); } else if (object_type == FX_5_OBJECT_TYPE_GEOMETRY_SHADER) { - for (unsigned int i = 0; i < ARRAY_SIZE(shader5.sodecl); ++i) + for (unsigned int i = 0; i < ARRAY_SIZE(shader->sodecl); ++i) { - if (shader5.sodecl[i]) + if (shader->sodecl[i]) vkd3d_string_buffer_printf(&parser->buffer, "\n/* Stream output %u declaration: \"%s\" */", - i, fx_4_get_string(parser, shader5.sodecl[i])); + i, fx_4_get_string(parser, shader->sodecl[i])); } - if (shader5.sodecl_count) - vkd3d_string_buffer_printf(&parser->buffer, "\n/* Rasterized stream %u */", shader5.rast_stream); + if (shader->sodecl_count) + vkd3d_string_buffer_printf(&parser->buffer, "\n/* Rasterized stream %u */", shader->rast_stream); } vkd3d_shader_free_shader_code(&output); } +static void fx_4_parse_shader_initializer(struct fx_parser *parser, unsigned int object_type) +{ + struct fx_5_shader shader = { 0 }; + + switch (object_type) + { + case FX_4_OBJECT_TYPE_PIXEL_SHADER: + case FX_4_OBJECT_TYPE_VERTEX_SHADER: + case FX_4_OBJECT_TYPE_GEOMETRY_SHADER: + shader.offset = fx_parser_read_u32(parser); + break; + + case FX_4_OBJECT_TYPE_GEOMETRY_SHADER_SO: + shader.offset = fx_parser_read_u32(parser); + shader.sodecl[0] = fx_parser_read_u32(parser); + break; + + case FX_5_OBJECT_TYPE_GEOMETRY_SHADER: + case FX_5_OBJECT_TYPE_COMPUTE_SHADER: + case FX_5_OBJECT_TYPE_HULL_SHADER: + case FX_5_OBJECT_TYPE_DOMAIN_SHADER: + fx_parser_read_u32s(parser, &shader, sizeof(shader)); + break; + + default: + parser->failed = true; + return; + } + + fx_4_parse_shader_blob(parser, object_type, &shader); +} + static bool fx_4_object_has_initializer(const struct fx_4_binary_type *type) { switch (type->typeinfo) @@ -3390,6 +3398,8 @@ static void fx_4_parse_state_object_initializer(struct fx_parser *parser, uint32 [FX_UINT8] = "byte", }; const struct rhs_named_value *named_value; + struct fx_5_shader shader = { 0 }; + unsigned int shader_type = 0; uint32_t i, j, comp_count; struct fx_4_state *state; @@ -3400,7 +3410,7 @@ static void fx_4_parse_state_object_initializer(struct fx_parser *parser, uint32 if (!(state = bsearch(&entry.id, fx_4_states, ARRAY_SIZE(fx_4_states), sizeof(*fx_4_states), fx_4_state_id_compare))) { - fx_parser_error(parser, VKD3D_SHADER_ERROR_FX_INVALID_DATA, "Unrecognized state id %#x.\n", entry.id); + fx_parser_error(parser, VKD3D_SHADER_ERROR_FX_INVALID_DATA, "Unrecognized state id %#x.", entry.id); break; } @@ -3486,9 +3496,38 @@ static void fx_4_parse_state_object_initializer(struct fx_parser *parser, uint32 vkd3d_string_buffer_printf(&parser->buffer, "%s[%s]", fx_4_get_string(parser, index.name), fx_4_get_string(parser, index.index)); break; + case FX_4_ASSIGNMENT_INLINE_SHADER: + case FX_5_ASSIGNMENT_INLINE_SHADER: + { + bool shader5 = entry.type == FX_5_ASSIGNMENT_INLINE_SHADER; + + if (shader5) + fx_parser_read_unstructured(parser, &shader, entry.value, sizeof(shader)); + else + fx_parser_read_unstructured(parser, &shader, entry.value, 2 * sizeof(uint32_t)); + + if (state->type == FX_PIXELSHADER) + shader_type = FX_4_OBJECT_TYPE_PIXEL_SHADER; + else if (state->type == FX_VERTEXSHADER) + shader_type = FX_4_OBJECT_TYPE_VERTEX_SHADER; + else if (state->type == FX_GEOMETRYSHADER) + shader_type = shader5 ? FX_5_OBJECT_TYPE_GEOMETRY_SHADER : FX_4_OBJECT_TYPE_GEOMETRY_SHADER_SO; + else if (state->type == FX_HULLSHADER) + shader_type = FX_5_OBJECT_TYPE_HULL_SHADER; + else if (state->type == FX_DOMAINSHADER) + shader_type = FX_5_OBJECT_TYPE_DOMAIN_SHADER; + else if (state->type == FX_COMPUTESHADER) + shader_type = FX_5_OBJECT_TYPE_COMPUTE_SHADER; + + vkd3d_string_buffer_printf(&parser->buffer, "\n"); + parse_fx_start_indent(parser); + fx_4_parse_shader_blob(parser, shader_type, &shader); + parse_fx_end_indent(parser); + break; + } default: fx_parser_error(parser, VKD3D_SHADER_ERROR_FX_NOT_IMPLEMENTED, - "Unsupported assignment type %u.\n", entry.type); + "Unsupported assignment type %u.", entry.type); } vkd3d_string_buffer_printf(&parser->buffer, ";\n"); } diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.c b/libs/vkd3d/libs/vkd3d-shader/hlsl.c index 96de18dc886..97c6c0a1377 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.c +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.c @@ -287,6 +287,7 @@ bool hlsl_type_is_shader(const struct hlsl_type *type) case HLSL_CLASS_UAV: case HLSL_CLASS_CONSTANT_BUFFER: case HLSL_CLASS_BLEND_STATE: + case HLSL_CLASS_STREAM_OUTPUT: case HLSL_CLASS_VOID: case HLSL_CLASS_NULL: return false; @@ -434,6 +435,7 @@ static void hlsl_type_calculate_reg_size(struct hlsl_ctx *ctx, struct hlsl_type case HLSL_CLASS_HULL_SHADER: case HLSL_CLASS_GEOMETRY_SHADER: case HLSL_CLASS_BLEND_STATE: + case HLSL_CLASS_STREAM_OUTPUT: case HLSL_CLASS_NULL: break; } @@ -525,6 +527,7 @@ static bool type_is_single_component(const struct hlsl_type *type) case HLSL_CLASS_PASS: case HLSL_CLASS_TECHNIQUE: case HLSL_CLASS_VOID: + case HLSL_CLASS_STREAM_OUTPUT: break; } vkd3d_unreachable(); @@ -680,6 +683,7 @@ unsigned int hlsl_type_get_component_offset(struct hlsl_ctx *ctx, struct hlsl_ty case HLSL_CLASS_SCALAR: case HLSL_CLASS_CONSTANT_BUFFER: case HLSL_CLASS_NULL: + case HLSL_CLASS_STREAM_OUTPUT: vkd3d_unreachable(); } type = next_type; @@ -898,6 +902,22 @@ struct hlsl_type *hlsl_new_array_type(struct hlsl_ctx *ctx, struct hlsl_type *ba return type; } +struct hlsl_type *hlsl_new_stream_output_type(struct hlsl_ctx *ctx, + enum hlsl_so_object_type so_type, struct hlsl_type *data_type) +{ + struct hlsl_type *type; + + if (!(type = hlsl_alloc(ctx, sizeof(*type)))) + return NULL; + type->class = HLSL_CLASS_STREAM_OUTPUT; + type->e.so.so_type = so_type; + type->e.so.type = data_type; + + list_add_tail(&ctx->types, &type->entry); + + return type; +} + struct hlsl_type *hlsl_new_struct_type(struct hlsl_ctx *ctx, const char *name, struct hlsl_struct_field *fields, size_t field_count) { @@ -1086,6 +1106,7 @@ unsigned int hlsl_type_component_count(const struct hlsl_type *type) case HLSL_CLASS_PASS: case HLSL_CLASS_TECHNIQUE: case HLSL_CLASS_VOID: + case HLSL_CLASS_STREAM_OUTPUT: break; } @@ -1157,6 +1178,11 @@ bool hlsl_types_are_equal(const struct hlsl_type *t1, const struct hlsl_type *t2 case HLSL_CLASS_CONSTANT_BUFFER: return hlsl_types_are_equal(t1->e.resource.format, t2->e.resource.format); + case HLSL_CLASS_STREAM_OUTPUT: + if (t1->e.so.so_type != t2->e.so.so_type) + return false; + return hlsl_types_are_equal(t1->e.so.type, t2->e.so.type); + case HLSL_CLASS_DEPTH_STENCIL_STATE: case HLSL_CLASS_DEPTH_STENCIL_VIEW: case HLSL_CLASS_EFFECT_GROUP: @@ -1695,22 +1721,6 @@ struct hlsl_ir_node *hlsl_new_switch(struct hlsl_ctx *ctx, struct hlsl_ir_node * return &s->node; } -struct hlsl_ir_node *hlsl_new_vsir_instruction_ref(struct hlsl_ctx *ctx, unsigned int vsir_instr_idx, - struct hlsl_type *type, const struct hlsl_reg *reg, const struct vkd3d_shader_location *loc) -{ - struct hlsl_ir_vsir_instruction_ref *vsir_instr; - - if (!(vsir_instr = hlsl_alloc(ctx, sizeof(*vsir_instr)))) - return NULL; - init_node(&vsir_instr->node, HLSL_IR_VSIR_INSTRUCTION_REF, type, loc); - vsir_instr->vsir_instr_idx = vsir_instr_idx; - - if (reg) - vsir_instr->node.reg = *reg; - - return &vsir_instr->node; -} - struct hlsl_ir_load *hlsl_new_load_index(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, struct hlsl_ir_node *idx, const struct vkd3d_shader_location *loc) { @@ -2533,9 +2543,6 @@ static struct hlsl_ir_node *clone_instr(struct hlsl_ctx *ctx, case HLSL_IR_STATEBLOCK_CONSTANT: return clone_stateblock_constant(ctx, map, hlsl_ir_stateblock_constant(instr)); - - case HLSL_IR_VSIR_INSTRUCTION_REF: - vkd3d_unreachable(); } vkd3d_unreachable(); @@ -2836,6 +2843,20 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru vkd3d_string_buffer_printf(string, ""); return string; + case HLSL_CLASS_STREAM_OUTPUT: + if (type->e.so.so_type == HLSL_STREAM_OUTPUT_POINT_STREAM) + vkd3d_string_buffer_printf(string, "PointStream"); + else if (type->e.so.so_type == HLSL_STREAM_OUTPUT_LINE_STREAM) + vkd3d_string_buffer_printf(string, "LineStream"); + else + vkd3d_string_buffer_printf(string, "TriangleStream"); + if ((inner_string = hlsl_type_to_string(ctx, type->e.so.type))) + { + vkd3d_string_buffer_printf(string, "<%s>", inner_string->buffer); + hlsl_release_string_buffer(ctx, inner_string); + } + return string; + case HLSL_CLASS_DEPTH_STENCIL_STATE: case HLSL_CLASS_DEPTH_STENCIL_VIEW: case HLSL_CLASS_EFFECT_GROUP: @@ -2968,7 +2989,6 @@ const char *hlsl_node_type_to_string(enum hlsl_ir_node_type type) [HLSL_IR_COMPILE] = "HLSL_IR_COMPILE", [HLSL_IR_SAMPLER_STATE] = "HLSL_IR_SAMPLER_STATE", [HLSL_IR_STATEBLOCK_CONSTANT] = "HLSL_IR_STATEBLOCK_CONSTANT", - [HLSL_IR_VSIR_INSTRUCTION_REF] = "HLSL_IR_VSIR_INSTRUCTION_REF", }; if (type >= ARRAY_SIZE(names)) @@ -3562,11 +3582,6 @@ static void dump_instr(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, case HLSL_IR_STATEBLOCK_CONSTANT: dump_ir_stateblock_constant(buffer, hlsl_ir_stateblock_constant(instr)); break; - - case HLSL_IR_VSIR_INSTRUCTION_REF: - vkd3d_string_buffer_printf(buffer, "vsir_program instruction %u", - hlsl_ir_vsir_instruction_ref(instr)->vsir_instr_idx); - break; } } @@ -3875,10 +3890,6 @@ void hlsl_free_instr(struct hlsl_ir_node *node) case HLSL_IR_STATEBLOCK_CONSTANT: free_ir_stateblock_constant(hlsl_ir_stateblock_constant(node)); break; - - case HLSL_IR_VSIR_INSTRUCTION_REF: - vkd3d_free(hlsl_ir_vsir_instruction_ref(node)); - break; } } diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h index 075c76cb0e2..25d1b8df947 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.h +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h @@ -22,7 +22,6 @@ #include "vkd3d_shader_private.h" #include "wine/rbtree.h" -#include "d3dcommon.h" #include "d3dx9shader.h" /* The general IR structure is inspired by Mesa GLSL hir, even though the code @@ -105,6 +104,7 @@ enum hlsl_type_class HLSL_CLASS_GEOMETRY_SHADER, HLSL_CLASS_CONSTANT_BUFFER, HLSL_CLASS_BLEND_STATE, + HLSL_CLASS_STREAM_OUTPUT, HLSL_CLASS_VOID, HLSL_CLASS_NULL, HLSL_CLASS_ERROR, @@ -142,6 +142,13 @@ enum hlsl_sampler_dim /* NOTE: Remember to update object_methods[] in hlsl.y if this enum is modified. */ }; +enum hlsl_so_object_type +{ + HLSL_STREAM_OUTPUT_POINT_STREAM, + HLSL_STREAM_OUTPUT_LINE_STREAM, + HLSL_STREAM_OUTPUT_TRIANGLE_STREAM, +}; + enum hlsl_regset { HLSL_REGSET_SAMPLERS, @@ -220,6 +227,12 @@ struct hlsl_type } resource; /* Additional field to distinguish object types. Currently used only for technique types. */ unsigned int version; + /* Additional information if type is HLSL_CLASS_STREAM_OUTPUT. */ + struct + { + struct hlsl_type *type; + enum hlsl_so_object_type so_type; + } so; } e; /* Number of numeric register components used by one value of this type, for each regset. @@ -330,8 +343,6 @@ enum hlsl_ir_node_type HLSL_IR_COMPILE, HLSL_IR_SAMPLER_STATE, HLSL_IR_STATEBLOCK_CONSTANT, - - HLSL_IR_VSIR_INSTRUCTION_REF, }; /* Common data for every type of IR instruction node. */ @@ -934,16 +945,6 @@ struct hlsl_ir_stateblock_constant char *name; }; -/* A vkd3d_shader_instruction that can be inserted in a hlsl_block. - * Only used for the HLSL IR to vsir translation, might be removed once this translation is complete. */ -struct hlsl_ir_vsir_instruction_ref -{ - struct hlsl_ir_node node; - - /* Index to a vkd3d_shader_instruction within a vkd3d_shader_instruction_array in a vsir_program. */ - unsigned int vsir_instr_idx; -}; - struct hlsl_scope { /* Item entry for hlsl_ctx.scopes. */ @@ -1259,12 +1260,6 @@ static inline struct hlsl_ir_stateblock_constant *hlsl_ir_stateblock_constant(co return CONTAINING_RECORD(node, struct hlsl_ir_stateblock_constant, node); } -static inline struct hlsl_ir_vsir_instruction_ref *hlsl_ir_vsir_instruction_ref(const struct hlsl_ir_node *node) -{ - VKD3D_ASSERT(node->type == HLSL_IR_VSIR_INSTRUCTION_REF); - return CONTAINING_RECORD(node, struct hlsl_ir_vsir_instruction_ref, node); -} - static inline void hlsl_block_init(struct hlsl_block *block) { list_init(&block->instrs); @@ -1519,6 +1514,8 @@ struct hlsl_ir_node *hlsl_new_if(struct hlsl_ctx *ctx, struct hlsl_ir_node *cond struct hlsl_ir_node *hlsl_new_int_constant(struct hlsl_ctx *ctx, int32_t n, const struct vkd3d_shader_location *loc); struct hlsl_ir_node *hlsl_new_jump(struct hlsl_ctx *ctx, enum hlsl_ir_jump_type type, struct hlsl_ir_node *condition, const struct vkd3d_shader_location *loc); +struct hlsl_type *hlsl_new_stream_output_type(struct hlsl_ctx *ctx, + enum hlsl_so_object_type so_type, struct hlsl_type *type); struct hlsl_ir_node *hlsl_new_ternary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, struct hlsl_ir_node *arg3); @@ -1588,9 +1585,6 @@ struct hlsl_ir_switch_case *hlsl_new_switch_case(struct hlsl_ctx *ctx, unsigned struct hlsl_ir_node *hlsl_new_switch(struct hlsl_ctx *ctx, struct hlsl_ir_node *selector, struct list *cases, const struct vkd3d_shader_location *loc); -struct hlsl_ir_node *hlsl_new_vsir_instruction_ref(struct hlsl_ctx *ctx, unsigned int vsir_instr_idx, - struct hlsl_type *type, const struct hlsl_reg *reg, const struct vkd3d_shader_location *loc); - void hlsl_error(struct hlsl_ctx *ctx, const struct vkd3d_shader_location *loc, enum vkd3d_shader_error error, const char *fmt, ...) VKD3D_PRINTF_FUNC(4, 5); void hlsl_fixme(struct hlsl_ctx *ctx, const struct vkd3d_shader_location *loc, diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.l b/libs/vkd3d/libs/vkd3d-shader/hlsl.l index 8dace11916a..31fb30521e9 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.l +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.l @@ -104,6 +104,7 @@ if {return KW_IF; } in {return KW_IN; } inline {return KW_INLINE; } inout {return KW_INOUT; } +LineStream {return KW_LINESTREAM; } linear {return KW_LINEAR; } matrix {return KW_MATRIX; } namespace {return KW_NAMESPACE; } @@ -114,6 +115,7 @@ out {return KW_OUT; } packoffset {return KW_PACKOFFSET; } pass {return KW_PASS; } PixelShader {return KW_PIXELSHADER; } +PointStream {return KW_POINTSTREAM; } pixelshader {return KW_PIXELSHADER; } RasterizerOrderedBuffer {return KW_RASTERIZERORDEREDBUFFER; } RasterizerOrderedStructuredBuffer {return KW_RASTERIZERORDEREDSTRUCTUREDBUFFER; } @@ -170,6 +172,7 @@ texture3D {return KW_TEXTURE3D; } TextureCube {return KW_TEXTURECUBE; } textureCUBE {return KW_TEXTURECUBE; } TextureCubeArray {return KW_TEXTURECUBEARRAY; } +TriangleStream {return KW_TRIANGLESTREAM; } true {return KW_TRUE; } typedef {return KW_TYPEDEF; } unsigned {return KW_UNSIGNED; } diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.y b/libs/vkd3d/libs/vkd3d-shader/hlsl.y index 60aade732db..5bcd5e9034b 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.y +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.y @@ -678,8 +678,6 @@ static struct hlsl_default_value evaluate_static_expression(struct hlsl_ctx *ctx hlsl_error(ctx, &node->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, "Expected literal expression."); break; - case HLSL_IR_VSIR_INSTRUCTION_REF: - vkd3d_unreachable(); } } @@ -6553,6 +6551,7 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, struct hlsl_semantic semantic; enum hlsl_buffer_type buffer_type; enum hlsl_sampler_dim sampler_dim; + enum hlsl_so_object_type so_type; struct hlsl_attribute *attr; struct parse_attribute_list attr_list; struct hlsl_ir_switch_case *switch_case; @@ -6596,6 +6595,7 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, %token KW_INLINE %token KW_INOUT %token KW_LINEAR +%token KW_LINESTREAM %token KW_MATRIX %token KW_NAMESPACE %token KW_NOINTERPOLATION @@ -6605,6 +6605,7 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, %token KW_PACKOFFSET %token KW_PASS %token KW_PIXELSHADER +%token KW_POINTSTREAM %token KW_RASTERIZERORDEREDBUFFER %token KW_RASTERIZERORDEREDSTRUCTUREDBUFFER %token KW_RASTERIZERORDEREDTEXTURE1D @@ -6654,6 +6655,7 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, %token KW_TEXTURE3D %token KW_TEXTURECUBE %token KW_TEXTURECUBEARRAY +%token KW_TRIANGLESTREAM %token KW_TRUE %token KW_TYPEDEF %token KW_UNSIGNED @@ -6784,6 +6786,8 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, %type semantic +%type so_type + %type state_block %type state_block_index_opt @@ -7805,6 +7809,20 @@ rov_type: $$ = HLSL_SAMPLER_DIM_3D; } +so_type: + KW_POINTSTREAM + { + $$ = HLSL_STREAM_OUTPUT_POINT_STREAM; + } + | KW_LINESTREAM + { + $$ = HLSL_STREAM_OUTPUT_LINE_STREAM; + } + | KW_TRIANGLESTREAM + { + $$ = HLSL_STREAM_OUTPUT_TRIANGLE_STREAM; + } + resource_format: var_modifiers type { @@ -7948,6 +7966,10 @@ type_no_void: validate_uav_type(ctx, $1, $3, &@4); $$ = hlsl_new_uav_type(ctx, $1, $3, true); } + | so_type '<' type '>' + { + $$ = hlsl_new_stream_output_type(ctx, $1, $3); + } | KW_RWBYTEADDRESSBUFFER { $$ = hlsl_new_uav_type(ctx, HLSL_SAMPLER_DIM_RAW_BUFFER, hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), false); diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c index d11ff481f6b..8f45628dbee 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c @@ -19,6 +19,7 @@ */ #include "hlsl.h" +#include "vkd3d_shader_private.h" #include #include @@ -1678,6 +1679,7 @@ static bool copy_propagation_transform_load(struct hlsl_ctx *ctx, case HLSL_CLASS_DEPTH_STENCIL_VIEW: case HLSL_CLASS_GEOMETRY_SHADER: case HLSL_CLASS_BLEND_STATE: + case HLSL_CLASS_STREAM_OUTPUT: case HLSL_CLASS_NULL: break; @@ -4162,9 +4164,6 @@ static bool dce(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) case HLSL_IR_STATEBLOCK_CONSTANT: /* Stateblock constants should not appear in the shader program. */ vkd3d_unreachable(); - case HLSL_IR_VSIR_INSTRUCTION_REF: - /* HLSL IR nodes are not translated to hlsl_ir_vsir_instruction_ref at this point. */ - vkd3d_unreachable(); } return false; @@ -4304,9 +4303,6 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop case HLSL_IR_STATEBLOCK_CONSTANT: /* Stateblock constants should not appear in the shader program. */ vkd3d_unreachable(); - case HLSL_IR_VSIR_INSTRUCTION_REF: - /* HLSL IR nodes are not translated to hlsl_ir_vsir_instruction_ref at this point. */ - vkd3d_unreachable(); case HLSL_IR_STORE: { @@ -4494,6 +4490,9 @@ struct register_allocator /* Two allocations with different mode can't share the same register. */ int mode; + /* If an allocation is VIP, no new allocations can be made in the + * register unless they are VIP as well. */ + bool vip; } *allocations; size_t count, capacity; @@ -4513,7 +4512,7 @@ struct register_allocator }; static unsigned int get_available_writemask(const struct register_allocator *allocator, - unsigned int first_write, unsigned int last_read, uint32_t reg_idx, int mode) + unsigned int first_write, unsigned int last_read, uint32_t reg_idx, int mode, bool vip) { unsigned int writemask = VKD3DSP_WRITEMASK_ALL; size_t i; @@ -4532,6 +4531,8 @@ static unsigned int get_available_writemask(const struct register_allocator *all writemask &= ~allocation->writemask; if (allocation->mode != mode) writemask = 0; + if (allocation->vip && !vip) + writemask = 0; } if (!writemask) @@ -4542,7 +4543,7 @@ static unsigned int get_available_writemask(const struct register_allocator *all } static void record_allocation(struct hlsl_ctx *ctx, struct register_allocator *allocator, uint32_t reg_idx, - unsigned int writemask, unsigned int first_write, unsigned int last_read, int mode) + unsigned int writemask, unsigned int first_write, unsigned int last_read, int mode, bool vip) { struct allocation *allocation; @@ -4556,16 +4557,25 @@ static void record_allocation(struct hlsl_ctx *ctx, struct register_allocator *a allocation->first_write = first_write; allocation->last_read = last_read; allocation->mode = mode; + allocation->vip = vip; allocator->reg_count = max(allocator->reg_count, reg_idx + 1); } -/* reg_size is the number of register components to be reserved, while component_count is the number - * of components for the register's writemask. In SM1, floats and vectors allocate the whole - * register, even if they don't use it completely. */ +/* Allocates a register (or some components of it) within the register allocator. + * 'reg_size' is the number of register components to be reserved. + * 'component_count' is the number of components for the hlsl_reg's + * writemask, which can be smaller than 'reg_size'. For instance, sm1 + * floats and vectors allocate the whole register even if they are not + * using all components. + * 'mode' can be provided to avoid allocating on a register that already has an + * allocation with a different mode. + * 'force_align' can be used so that the allocation always start in '.x'. + * 'vip' can be used so that no new allocations can be made in the given register + * unless they are 'vip' as well. */ static struct hlsl_reg allocate_register(struct hlsl_ctx *ctx, struct register_allocator *allocator, unsigned int first_write, unsigned int last_read, unsigned int reg_size, - unsigned int component_count, int mode, bool force_align) + unsigned int component_count, int mode, bool force_align, bool vip) { struct hlsl_reg ret = {.allocation_size = 1, .allocated = true}; unsigned int required_size = force_align ? 4 : reg_size; @@ -4579,7 +4589,7 @@ static struct hlsl_reg allocate_register(struct hlsl_ctx *ctx, struct register_a for (uint32_t reg_idx = 0; reg_idx < allocator->reg_count; ++reg_idx) { unsigned int available_writemask = get_available_writemask(allocator, - first_write, last_read, reg_idx, mode); + first_write, last_read, reg_idx, mode, vip); if (vkd3d_popcount(available_writemask) >= pref) { @@ -4589,7 +4599,8 @@ static struct hlsl_reg allocate_register(struct hlsl_ctx *ctx, struct register_a ret.id = reg_idx; ret.writemask = hlsl_combine_writemasks(writemask, vkd3d_write_mask_from_component_count(component_count)); - record_allocation(ctx, allocator, reg_idx, writemask, first_write, last_read, mode); + + record_allocation(ctx, allocator, reg_idx, writemask, first_write, last_read, mode, vip); return ret; } } @@ -4598,13 +4609,14 @@ static struct hlsl_reg allocate_register(struct hlsl_ctx *ctx, struct register_a ret.id = allocator->reg_count; ret.writemask = vkd3d_write_mask_from_component_count(component_count); record_allocation(ctx, allocator, allocator->reg_count, - vkd3d_write_mask_from_component_count(reg_size), first_write, last_read, mode); + vkd3d_write_mask_from_component_count(reg_size), first_write, last_read, mode, vip); return ret; } /* Allocate a register with writemask, while reserving reg_writemask. */ -static struct hlsl_reg allocate_register_with_masks(struct hlsl_ctx *ctx, struct register_allocator *allocator, - unsigned int first_write, unsigned int last_read, uint32_t reg_writemask, uint32_t writemask, int mode) +static struct hlsl_reg allocate_register_with_masks(struct hlsl_ctx *ctx, + struct register_allocator *allocator, unsigned int first_write, unsigned int last_read, + uint32_t reg_writemask, uint32_t writemask, int mode, bool vip) { struct hlsl_reg ret = {0}; uint32_t reg_idx; @@ -4614,11 +4626,11 @@ static struct hlsl_reg allocate_register_with_masks(struct hlsl_ctx *ctx, struct for (reg_idx = 0;; ++reg_idx) { if ((get_available_writemask(allocator, first_write, last_read, - reg_idx, mode) & reg_writemask) == reg_writemask) + reg_idx, mode, vip) & reg_writemask) == reg_writemask) break; } - record_allocation(ctx, allocator, reg_idx, reg_writemask, first_write, last_read, mode); + record_allocation(ctx, allocator, reg_idx, reg_writemask, first_write, last_read, mode, vip); ret.id = reg_idx; ret.allocation_size = 1; @@ -4628,7 +4640,7 @@ static struct hlsl_reg allocate_register_with_masks(struct hlsl_ctx *ctx, struct } static bool is_range_available(const struct register_allocator *allocator, unsigned int first_write, - unsigned int last_read, uint32_t reg_idx, unsigned int reg_size, int mode) + unsigned int last_read, uint32_t reg_idx, unsigned int reg_size, int mode, bool vip) { unsigned int last_reg_mask = (1u << (reg_size % 4)) - 1; unsigned int writemask; @@ -4636,18 +4648,18 @@ static bool is_range_available(const struct register_allocator *allocator, unsig for (i = 0; i < (reg_size / 4); ++i) { - writemask = get_available_writemask(allocator, first_write, last_read, reg_idx + i, mode); + writemask = get_available_writemask(allocator, first_write, last_read, reg_idx + i, mode, vip); if (writemask != VKD3DSP_WRITEMASK_ALL) return false; } - writemask = get_available_writemask(allocator, first_write, last_read, reg_idx + (reg_size / 4), mode); + writemask = get_available_writemask(allocator, first_write, last_read, reg_idx + (reg_size / 4), mode, vip); if ((writemask & last_reg_mask) != last_reg_mask) return false; return true; } static struct hlsl_reg allocate_range(struct hlsl_ctx *ctx, struct register_allocator *allocator, - unsigned int first_write, unsigned int last_read, unsigned int reg_size, int mode) + unsigned int first_write, unsigned int last_read, unsigned int reg_size, int mode, bool vip) { struct hlsl_reg ret = {0}; uint32_t reg_idx; @@ -4655,15 +4667,15 @@ static struct hlsl_reg allocate_range(struct hlsl_ctx *ctx, struct register_allo for (reg_idx = 0;; ++reg_idx) { - if (is_range_available(allocator, first_write, last_read, reg_idx, reg_size, mode)) + if (is_range_available(allocator, first_write, last_read, reg_idx, reg_size, mode, vip)) break; } for (i = 0; i < reg_size / 4; ++i) - record_allocation(ctx, allocator, reg_idx + i, VKD3DSP_WRITEMASK_ALL, first_write, last_read, mode); + record_allocation(ctx, allocator, reg_idx + i, VKD3DSP_WRITEMASK_ALL, first_write, last_read, mode, vip); if (reg_size % 4) record_allocation(ctx, allocator, reg_idx + (reg_size / 4), - (1u << (reg_size % 4)) - 1, first_write, last_read, mode); + (1u << (reg_size % 4)) - 1, first_write, last_read, mode, vip); ret.id = reg_idx; ret.allocation_size = align(reg_size, 4) / 4; @@ -4679,9 +4691,9 @@ static struct hlsl_reg allocate_numeric_registers_for_type(struct hlsl_ctx *ctx, /* FIXME: We could potentially pack structs or arrays more efficiently... */ if (type->class <= HLSL_CLASS_VECTOR) - return allocate_register(ctx, allocator, first_write, last_read, type->dimx, type->dimx, 0, false); + return allocate_register(ctx, allocator, first_write, last_read, type->dimx, type->dimx, 0, false, false); else - return allocate_range(ctx, allocator, first_write, last_read, reg_size, 0); + return allocate_range(ctx, allocator, first_write, last_read, reg_size, 0, false); } static const char *debug_register(char class, struct hlsl_reg reg, const struct hlsl_type *type) @@ -4859,8 +4871,8 @@ static void allocate_instr_temp_register(struct hlsl_ctx *ctx, } if (reg_writemask) - instr->reg = allocate_register_with_masks(ctx, allocator, - instr->index, instr->last_read, reg_writemask, dst_writemask, 0); + instr->reg = allocate_register_with_masks(ctx, allocator, instr->index, + instr->last_read, reg_writemask, dst_writemask, 0, false); else instr->reg = allocate_numeric_registers_for_type(ctx, allocator, instr->index, instr->last_read, instr->data_type); @@ -5181,14 +5193,15 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi { if (i < bind_count) { - if (get_available_writemask(&allocator_used, 1, UINT_MAX, reg_idx + i, 0) != VKD3DSP_WRITEMASK_ALL) + if (get_available_writemask(&allocator_used, 1, UINT_MAX, + reg_idx + i, 0, false) != VKD3DSP_WRITEMASK_ALL) { hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, "Overlapping register() reservations on 'c%u'.", reg_idx + i); } - record_allocation(ctx, &allocator_used, reg_idx + i, VKD3DSP_WRITEMASK_ALL, 1, UINT_MAX, 0); + record_allocation(ctx, &allocator_used, reg_idx + i, VKD3DSP_WRITEMASK_ALL, 1, UINT_MAX, 0, false); } - record_allocation(ctx, &allocator, reg_idx + i, VKD3DSP_WRITEMASK_ALL, 1, UINT_MAX, 0); + record_allocation(ctx, &allocator, reg_idx + i, VKD3DSP_WRITEMASK_ALL, 1, UINT_MAX, 0, false); } var->regs[HLSL_REGSET_NUMERIC].id = reg_idx; @@ -5211,7 +5224,7 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi if (!var->regs[HLSL_REGSET_NUMERIC].allocated) { - var->regs[HLSL_REGSET_NUMERIC] = allocate_range(ctx, &allocator, 1, UINT_MAX, alloc_size, 0); + var->regs[HLSL_REGSET_NUMERIC] = allocate_range(ctx, &allocator, 1, UINT_MAX, alloc_size, 0, false); TRACE("Allocated %s to %s.\n", var->name, debug_register('c', var->regs[HLSL_REGSET_NUMERIC], var->data_type)); } @@ -5254,7 +5267,8 @@ static uint32_t allocate_temp_registers(struct hlsl_ctx *ctx, struct hlsl_ir_fun var = entry_func->parameters.vars[i]; if (var->is_output_semantic) { - record_allocation(ctx, &allocator, 0, VKD3DSP_WRITEMASK_ALL, var->first_write, var->last_read, 0); + record_allocation(ctx, &allocator, 0, VKD3DSP_WRITEMASK_ALL, + var->first_write, var->last_read, 0, false); break; } } @@ -5311,6 +5325,8 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var enum vkd3d_shader_register_type type; struct vkd3d_shader_version version; + bool special_interpolation = false; + bool vip_allocation = false; uint32_t reg; bool builtin; @@ -5363,6 +5379,14 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var * domains, it is allocated as if it was 'float[1]'. */ var->force_align = true; } + + if (semantic == VKD3D_SHADER_SV_RENDER_TARGET_ARRAY_INDEX + || semantic == VKD3D_SHADER_SV_VIEWPORT_ARRAY_INDEX + || semantic == VKD3D_SHADER_SV_PRIMITIVE_ID) + vip_allocation = true; + + if (semantic == VKD3D_SHADER_SV_IS_FRONT_FACE || semantic == VKD3D_SHADER_SV_SAMPLE_INDEX) + special_interpolation = true; } if (builtin) @@ -5376,8 +5400,11 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var ? 0 : sm4_get_interpolation_mode(var->data_type, var->storage_modifiers); unsigned int reg_size = optimize ? var->data_type->dimx : 4; - var->regs[HLSL_REGSET_NUMERIC] = allocate_register(ctx, allocator, 1, - UINT_MAX, reg_size, var->data_type->dimx, mode, var->force_align); + if (special_interpolation) + mode = VKD3DSIM_NONE; + + var->regs[HLSL_REGSET_NUMERIC] = allocate_register(ctx, allocator, 1, UINT_MAX, + reg_size, var->data_type->dimx, mode, var->force_align, vip_allocation); TRACE("Allocated %s to %s (mode %d).\n", var->name, debug_register(output ? 'o' : 'v', var->regs[HLSL_REGSET_NUMERIC], var->data_type), mode); @@ -6812,7 +6839,7 @@ static void vsir_src_from_hlsl_constant_value(struct vkd3d_shader_src_param *src } static void vsir_src_from_hlsl_node(struct vkd3d_shader_src_param *src, - struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, uint32_t map_writemask) + struct hlsl_ctx *ctx, const struct hlsl_ir_node *instr, uint32_t map_writemask) { struct hlsl_ir_constant *constant; @@ -6832,6 +6859,242 @@ static void vsir_src_from_hlsl_node(struct vkd3d_shader_src_param *src, } } +static bool sm4_generate_vsir_numeric_reg_from_deref(struct hlsl_ctx *ctx, struct vsir_program *program, + struct vkd3d_shader_register *reg, uint32_t *writemask, const struct hlsl_deref *deref) +{ + const struct hlsl_ir_var *var = deref->var; + unsigned int offset_const_deref; + + reg->type = var->indexable ? VKD3DSPR_IDXTEMP : VKD3DSPR_TEMP; + reg->idx[0].offset = var->regs[HLSL_REGSET_NUMERIC].id; + reg->dimension = VSIR_DIMENSION_VEC4; + + VKD3D_ASSERT(var->regs[HLSL_REGSET_NUMERIC].allocated); + + if (!var->indexable) + { + offset_const_deref = hlsl_offset_from_deref_safe(ctx, deref); + reg->idx[0].offset += offset_const_deref / 4; + reg->idx_count = 1; + } + else + { + offset_const_deref = deref->const_offset; + reg->idx[1].offset = offset_const_deref / 4; + reg->idx_count = 2; + + if (deref->rel_offset.node) + { + struct vkd3d_shader_src_param *idx_src; + + if (!(idx_src = vsir_program_get_src_params(program, 1))) + { + ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; + return false; + } + memset(idx_src, 0, sizeof(*idx_src)); + reg->idx[1].rel_addr = idx_src; + + vsir_src_from_hlsl_node(idx_src, ctx, deref->rel_offset.node, VKD3DSP_WRITEMASK_ALL); + } + } + + *writemask = 0xf & (0xf << (offset_const_deref % 4)); + if (var->regs[HLSL_REGSET_NUMERIC].writemask) + *writemask = hlsl_combine_writemasks(var->regs[HLSL_REGSET_NUMERIC].writemask, *writemask); + return true; +} + +static bool sm4_generate_vsir_reg_from_deref(struct hlsl_ctx *ctx, struct vsir_program *program, + struct vkd3d_shader_register *reg, uint32_t *writemask, const struct hlsl_deref *deref) +{ + const struct vkd3d_shader_version *version = &program->shader_version; + const struct hlsl_type *data_type = hlsl_deref_get_type(ctx, deref); + const struct hlsl_ir_var *var = deref->var; + + if (var->is_uniform) + { + enum hlsl_regset regset = hlsl_deref_get_regset(ctx, deref); + + if (regset == HLSL_REGSET_TEXTURES) + { + reg->type = VKD3DSPR_RESOURCE; + reg->dimension = VSIR_DIMENSION_VEC4; + if (vkd3d_shader_ver_ge(version, 5, 1)) + { + reg->idx[0].offset = var->regs[HLSL_REGSET_TEXTURES].id; + reg->idx[1].offset = var->regs[HLSL_REGSET_TEXTURES].index; /* FIXME: array index */ + reg->idx_count = 2; + } + else + { + reg->idx[0].offset = var->regs[HLSL_REGSET_TEXTURES].index; + reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref); + reg->idx_count = 1; + } + VKD3D_ASSERT(regset == HLSL_REGSET_TEXTURES); + *writemask = VKD3DSP_WRITEMASK_ALL; + } + else if (regset == HLSL_REGSET_UAVS) + { + reg->type = VKD3DSPR_UAV; + reg->dimension = VSIR_DIMENSION_VEC4; + if (vkd3d_shader_ver_ge(version, 5, 1)) + { + reg->idx[0].offset = var->regs[HLSL_REGSET_UAVS].id; + reg->idx[1].offset = var->regs[HLSL_REGSET_UAVS].index; /* FIXME: array index */ + reg->idx_count = 2; + } + else + { + reg->idx[0].offset = var->regs[HLSL_REGSET_UAVS].index; + reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref); + reg->idx_count = 1; + } + VKD3D_ASSERT(regset == HLSL_REGSET_UAVS); + *writemask = VKD3DSP_WRITEMASK_ALL; + } + else if (regset == HLSL_REGSET_SAMPLERS) + { + reg->type = VKD3DSPR_SAMPLER; + reg->dimension = VSIR_DIMENSION_NONE; + if (vkd3d_shader_ver_ge(version, 5, 1)) + { + reg->idx[0].offset = var->regs[HLSL_REGSET_SAMPLERS].id; + reg->idx[1].offset = var->regs[HLSL_REGSET_SAMPLERS].index; /* FIXME: array index */ + reg->idx_count = 2; + } + else + { + reg->idx[0].offset = var->regs[HLSL_REGSET_SAMPLERS].index; + reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref); + reg->idx_count = 1; + } + VKD3D_ASSERT(regset == HLSL_REGSET_SAMPLERS); + *writemask = VKD3DSP_WRITEMASK_ALL; + } + else + { + unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref) + var->buffer_offset; + + VKD3D_ASSERT(data_type->class <= HLSL_CLASS_VECTOR); + reg->type = VKD3DSPR_CONSTBUFFER; + reg->dimension = VSIR_DIMENSION_VEC4; + if (vkd3d_shader_ver_ge(version, 5, 1)) + { + reg->idx[0].offset = var->buffer->reg.id; + reg->idx[1].offset = var->buffer->reg.index; /* FIXME: array index */ + reg->idx[2].offset = offset / 4; + reg->idx_count = 3; + } + else + { + reg->idx[0].offset = var->buffer->reg.index; + reg->idx[1].offset = offset / 4; + reg->idx_count = 2; + } + *writemask = ((1u << data_type->dimx) - 1) << (offset & 3); + } + } + else if (var->is_input_semantic) + { + bool has_idx; + + if (sm4_register_from_semantic_name(version, var->semantic.name, false, ®->type, &has_idx)) + { + unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref); + + if (has_idx) + { + reg->idx[0].offset = var->semantic.index + offset / 4; + reg->idx_count = 1; + } + + if (shader_sm4_is_scalar_register(reg)) + reg->dimension = VSIR_DIMENSION_SCALAR; + else + reg->dimension = VSIR_DIMENSION_VEC4; + *writemask = ((1u << data_type->dimx) - 1) << (offset % 4); + } + else + { + struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref); + + VKD3D_ASSERT(hlsl_reg.allocated); + + if (version->type == VKD3D_SHADER_TYPE_DOMAIN) + reg->type = VKD3DSPR_PATCHCONST; + else + reg->type = VKD3DSPR_INPUT; + reg->dimension = VSIR_DIMENSION_VEC4; + reg->idx[0].offset = hlsl_reg.id; + reg->idx_count = 1; + *writemask = hlsl_reg.writemask; + } + } + else if (var->is_output_semantic) + { + bool has_idx; + + if (sm4_register_from_semantic_name(version, var->semantic.name, true, ®->type, &has_idx)) + { + unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref); + + if (has_idx) + { + reg->idx[0].offset = var->semantic.index + offset / 4; + reg->idx_count = 1; + } + + if (shader_sm4_is_scalar_register(reg)) + reg->dimension = VSIR_DIMENSION_SCALAR; + else + reg->dimension = VSIR_DIMENSION_VEC4; + *writemask = ((1u << data_type->dimx) - 1) << (offset % 4); + } + else + { + struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref); + + VKD3D_ASSERT(hlsl_reg.allocated); + reg->type = VKD3DSPR_OUTPUT; + reg->dimension = VSIR_DIMENSION_VEC4; + reg->idx[0].offset = hlsl_reg.id; + reg->idx_count = 1; + *writemask = hlsl_reg.writemask; + } + } + else + { + return sm4_generate_vsir_numeric_reg_from_deref(ctx, program, reg, writemask, deref); + } + return true; +} + +static bool sm4_generate_vsir_init_src_param_from_deref(struct hlsl_ctx *ctx, struct vsir_program *program, + struct vkd3d_shader_src_param *src_param, const struct hlsl_deref *deref, + unsigned int dst_writemask, const struct vkd3d_shader_location *loc) +{ + uint32_t writemask; + + if (!sm4_generate_vsir_reg_from_deref(ctx, program, &src_param->reg, &writemask, deref)) + return false; + src_param->swizzle = generate_vsir_get_src_swizzle(writemask, dst_writemask); + return true; +} + +static bool sm4_generate_vsir_init_dst_param_from_deref(struct hlsl_ctx *ctx, struct vsir_program *program, + struct vkd3d_shader_dst_param *dst_param, const struct hlsl_deref *deref, + const struct vkd3d_shader_location *loc, unsigned int writemask) +{ + uint32_t reg_writemask; + + if (!sm4_generate_vsir_reg_from_deref(ctx, program, &dst_param->reg, ®_writemask, deref)) + return false; + dst_param->write_mask = hlsl_combine_writemasks(reg_writemask, writemask); + return true; +} + static void vsir_dst_from_hlsl_node(struct vkd3d_shader_dst_param *dst, struct hlsl_ctx *ctx, const struct hlsl_ir_node *instr) { @@ -7059,13 +7322,10 @@ static bool sm1_generate_vsir_instr_expr_cast(struct hlsl_ctx *ctx, case HLSL_TYPE_INT: case HLSL_TYPE_UINT: + case HLSL_TYPE_BOOL: generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, 0, true); return true; - case HLSL_TYPE_BOOL: - hlsl_fixme(ctx, &instr->loc, "SM1 cast from bool to integer."); - break; - case HLSL_TYPE_DOUBLE: hlsl_fixme(ctx, &instr->loc, "SM1 cast from double to integer."); break; @@ -7659,40 +7919,6 @@ static void sm1_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl sm1_generate_vsir_block(ctx, &entry_func->body, program); } -static void add_last_vsir_instr_to_block(struct hlsl_ctx *ctx, struct vsir_program *program, struct hlsl_block *block) -{ - struct vkd3d_shader_location *loc; - struct hlsl_ir_node *vsir_instr; - - loc = &program->instructions.elements[program->instructions.count - 1].location; - - if (!(vsir_instr = hlsl_new_vsir_instruction_ref(ctx, program->instructions.count - 1, NULL, NULL, loc))) - { - ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; - return; - } - hlsl_block_add_instr(block, vsir_instr); -} - -static void replace_instr_with_last_vsir_instr(struct hlsl_ctx *ctx, - struct vsir_program *program, struct hlsl_ir_node *instr) -{ - struct vkd3d_shader_location *loc; - struct hlsl_ir_node *vsir_instr; - - loc = &program->instructions.elements[program->instructions.count - 1].location; - - if (!(vsir_instr = hlsl_new_vsir_instruction_ref(ctx, - program->instructions.count - 1, instr->data_type, &instr->reg, loc))) - { - ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; - return; - } - - list_add_before(&instr->entry, &vsir_instr->entry); - hlsl_replace_node(instr, vsir_instr); -} - static void sm4_generate_vsir_instr_dcl_semantic(struct hlsl_ctx *ctx, struct vsir_program *program, const struct hlsl_ir_var *var, bool is_patch_constant_func, struct hlsl_block *block, const struct vkd3d_shader_location *loc) @@ -7806,8 +8032,6 @@ static void sm4_generate_vsir_instr_dcl_semantic(struct hlsl_ctx *ctx, struct vs if (var->is_input_semantic && version->type == VKD3D_SHADER_TYPE_PIXEL) ins->flags = sm4_get_interpolation_mode(var->data_type, var->storage_modifiers); - - add_last_vsir_instr_to_block(ctx, program, block); } static void sm4_generate_vsir_instr_dcl_temps(struct hlsl_ctx *ctx, struct vsir_program *program, @@ -7819,8 +8043,6 @@ static void sm4_generate_vsir_instr_dcl_temps(struct hlsl_ctx *ctx, struct vsir_ return; ins->declaration.count = temp_count; - - add_last_vsir_instr_to_block(ctx, program, block); } static void sm4_generate_vsir_instr_dcl_indexable_temp(struct hlsl_ctx *ctx, @@ -7838,8 +8060,6 @@ static void sm4_generate_vsir_instr_dcl_indexable_temp(struct hlsl_ctx *ctx, ins->declaration.indexable_temp.data_type = VKD3D_DATA_FLOAT; ins->declaration.indexable_temp.component_count = comp_count; ins->declaration.indexable_temp.has_function_scope = false; - - add_last_vsir_instr_to_block(ctx, program, block); } static bool type_is_float(const struct hlsl_type *type) @@ -8505,99 +8725,730 @@ static bool sm4_generate_vsir_instr_expr(struct hlsl_ctx *ctx, } } -static void sm4_generate_vsir_block(struct hlsl_ctx *ctx, struct hlsl_block *block, struct vsir_program *program) +static bool sm4_generate_vsir_instr_store(struct hlsl_ctx *ctx, + struct vsir_program *program, struct hlsl_ir_store *store) { - struct vkd3d_string_buffer *dst_type_string; - struct hlsl_ir_node *instr, *next; - struct hlsl_ir_switch_case *c; + struct hlsl_ir_node *instr = &store->node; + struct vkd3d_shader_dst_param *dst_param; + struct vkd3d_shader_src_param *src_param; + struct vkd3d_shader_instruction *ins; - LIST_FOR_EACH_ENTRY_SAFE(instr, next, &block->instrs, struct hlsl_ir_node, entry) - { - if (instr->data_type) - { - if (instr->data_type->class != HLSL_CLASS_SCALAR && instr->data_type->class != HLSL_CLASS_VECTOR) - { - hlsl_fixme(ctx, &instr->loc, "Class %#x should have been lowered or removed.", instr->data_type->class); - break; - } - } + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_MOV, 1, 1))) + return false; - switch (instr->type) - { - case HLSL_IR_CALL: - vkd3d_unreachable(); + dst_param = &ins->dst[0]; + if (!sm4_generate_vsir_init_dst_param_from_deref(ctx, program, + dst_param, &store->lhs, &instr->loc, store->writemask)) + return false; - case HLSL_IR_CONSTANT: - /* In SM4 all constants are inlined. */ - break; + src_param = &ins->src[0]; + vsir_src_from_hlsl_node(src_param, ctx, store->rhs.node, dst_param->write_mask); - case HLSL_IR_EXPR: - if (!(dst_type_string = hlsl_type_to_string(ctx, instr->data_type))) - break; + return true; +} - if (sm4_generate_vsir_instr_expr(ctx, program, hlsl_ir_expr(instr), dst_type_string->buffer)) - replace_instr_with_last_vsir_instr(ctx, program, instr); +/* Does this variable's data come directly from the API user, rather than + * being temporary or from a previous shader stage? I.e. is it a uniform or + * VS input? */ +static bool var_is_user_input(const struct vkd3d_shader_version *version, const struct hlsl_ir_var *var) +{ + if (var->is_uniform) + return true; - hlsl_release_string_buffer(ctx, dst_type_string); - break; + return var->is_input_semantic && version->type == VKD3D_SHADER_TYPE_VERTEX; +} - case HLSL_IR_IF: - sm4_generate_vsir_block(ctx, &hlsl_ir_if(instr)->then_block, program); - sm4_generate_vsir_block(ctx, &hlsl_ir_if(instr)->else_block, program); - break; +static bool sm4_generate_vsir_instr_load(struct hlsl_ctx *ctx, struct vsir_program *program, struct hlsl_ir_load *load) +{ + const struct vkd3d_shader_version *version = &program->shader_version; + const struct hlsl_type *type = load->node.data_type; + struct vkd3d_shader_dst_param *dst_param; + struct hlsl_ir_node *instr = &load->node; + struct vkd3d_shader_instruction *ins; + struct hlsl_constant_value value; - case HLSL_IR_LOOP: - sm4_generate_vsir_block(ctx, &hlsl_ir_loop(instr)->body, program); - break; + VKD3D_ASSERT(hlsl_is_numeric_type(type)); + if (type->e.numeric.type == HLSL_TYPE_BOOL && var_is_user_input(version, load->src.var)) + { + /* Uniform bools can be specified as anything, but internal bools + * always have 0 for false and ~0 for true. Normalise that here. */ - case HLSL_IR_SWITCH: - LIST_FOR_EACH_ENTRY(c, &hlsl_ir_switch(instr)->cases, struct hlsl_ir_switch_case, entry) - sm4_generate_vsir_block(ctx, &c->body, program); - break; + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_MOVC, 1, 3))) + return false; - case HLSL_IR_SWIZZLE: - generate_vsir_instr_swizzle(ctx, program, hlsl_ir_swizzle(instr)); - replace_instr_with_last_vsir_instr(ctx, program, instr); - break; + dst_param = &ins->dst[0]; + vsir_dst_from_hlsl_node(dst_param, ctx, instr); - default: - break; - } + if (!sm4_generate_vsir_init_src_param_from_deref(ctx, program, + &ins->src[0], &load->src, dst_param->write_mask, &instr->loc)) + return false; + + memset(&value, 0xff, sizeof(value)); + vsir_src_from_hlsl_constant_value(&ins->src[1], ctx, &value, + VKD3D_DATA_UINT, type->dimx, dst_param->write_mask); + memset(&value, 0x00, sizeof(value)); + vsir_src_from_hlsl_constant_value(&ins->src[2], ctx, &value, + VKD3D_DATA_UINT, type->dimx, dst_param->write_mask); + } + else + { + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_MOV, 1, 1))) + return false; + + dst_param = &ins->dst[0]; + vsir_dst_from_hlsl_node(dst_param, ctx, instr); + + if (!sm4_generate_vsir_init_src_param_from_deref(ctx, program, + &ins->src[0], &load->src, dst_param->write_mask, &instr->loc)) + return false; } + return true; } -static void sm4_generate_vsir_add_function(struct hlsl_ctx *ctx, - struct hlsl_ir_function_decl *func, uint64_t config_flags, struct vsir_program *program) +static bool sm4_generate_vsir_instr_resource_store(struct hlsl_ctx *ctx, + struct vsir_program *program, struct hlsl_ir_resource_store *store) { - bool is_patch_constant_func = func == ctx->patch_constant_func; - struct hlsl_block block = {0}; - struct hlsl_scope *scope; - struct hlsl_ir_var *var; - uint32_t temp_count; - - compute_liveness(ctx, func); - mark_indexable_vars(ctx, func); - temp_count = allocate_temp_registers(ctx, func); - if (ctx->result) - return; - program->temp_count = max(program->temp_count, temp_count); + struct hlsl_type *resource_type = hlsl_deref_get_type(ctx, &store->resource); + struct hlsl_ir_node *coords = store->coords.node, *value = store->value.node; + struct hlsl_ir_node *instr = &store->node; + struct vkd3d_shader_instruction *ins; + unsigned int writemask; - hlsl_block_init(&block); + if (!store->resource.var->is_uniform) + { + hlsl_fixme(ctx, &store->node.loc, "Store to non-uniform resource variable."); + return false; + } - LIST_FOR_EACH_ENTRY(var, &func->extern_vars, struct hlsl_ir_var, extern_entry) + if (resource_type->sampler_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER) { - if ((var->is_input_semantic && var->last_read) - || (var->is_output_semantic && var->first_write)) - sm4_generate_vsir_instr_dcl_semantic(ctx, program, var, is_patch_constant_func, &block, &var->loc); + hlsl_fixme(ctx, &store->node.loc, "Structured buffers store is not implemented."); + return false; } - if (temp_count) - sm4_generate_vsir_instr_dcl_temps(ctx, program, temp_count, &block, &func->loc); + if (resource_type->sampler_dim == HLSL_SAMPLER_DIM_RAW_BUFFER) + { + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_STORE_RAW, 1, 2))) + return false; - LIST_FOR_EACH_ENTRY(scope, &ctx->scopes, struct hlsl_scope, entry) + writemask = vkd3d_write_mask_from_component_count(value->data_type->dimx); + if (!sm4_generate_vsir_init_dst_param_from_deref(ctx, program, + &ins->dst[0], &store->resource, &instr->loc, writemask)) + return false; + } + else { - LIST_FOR_EACH_ENTRY(var, &scope->vars, struct hlsl_ir_var, scope_entry) - { + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_STORE_UAV_TYPED, 1, 2))) + return false; + + if (!sm4_generate_vsir_init_dst_param_from_deref(ctx, program, + &ins->dst[0], &store->resource, &instr->loc, VKD3DSP_WRITEMASK_ALL)) + return false; + } + + vsir_src_from_hlsl_node(&ins->src[0], ctx, coords, VKD3DSP_WRITEMASK_ALL); + vsir_src_from_hlsl_node(&ins->src[1], ctx, value, VKD3DSP_WRITEMASK_ALL); + + return true; +} + +static bool sm4_generate_vsir_validate_texel_offset_aoffimmi(const struct hlsl_ir_node *texel_offset) +{ + struct hlsl_ir_constant *offset; + + VKD3D_ASSERT(texel_offset); + if (texel_offset->type != HLSL_IR_CONSTANT) + return false; + offset = hlsl_ir_constant(texel_offset); + + if (offset->value.u[0].i < -8 || offset->value.u[0].i > 7) + return false; + if (offset->node.data_type->dimx > 1 && (offset->value.u[1].i < -8 || offset->value.u[1].i > 7)) + return false; + if (offset->node.data_type->dimx > 2 && (offset->value.u[2].i < -8 || offset->value.u[2].i > 7)) + return false; + return true; +} + +static void sm4_generate_vsir_encode_texel_offset_as_aoffimmi( + struct vkd3d_shader_instruction *ins, const struct hlsl_ir_node *texel_offset) +{ + struct hlsl_ir_constant *offset; + + if (!texel_offset) + return; + offset = hlsl_ir_constant(texel_offset); + + ins->texel_offset.u = offset->value.u[0].i; + ins->texel_offset.v = 0; + ins->texel_offset.w = 0; + if (offset->node.data_type->dimx > 1) + ins->texel_offset.v = offset->value.u[1].i; + if (offset->node.data_type->dimx > 2) + ins->texel_offset.w = offset->value.u[2].i; +} + +static bool sm4_generate_vsir_instr_ld(struct hlsl_ctx *ctx, + struct vsir_program *program, const struct hlsl_ir_resource_load *load) +{ + const struct hlsl_type *resource_type = hlsl_deref_get_type(ctx, &load->resource); + bool uav = (hlsl_deref_get_regset(ctx, &load->resource) == HLSL_REGSET_UAVS); + const struct vkd3d_shader_version *version = &program->shader_version; + bool raw = resource_type->sampler_dim == HLSL_SAMPLER_DIM_RAW_BUFFER; + const struct hlsl_ir_node *sample_index = load->sample_index.node; + const struct hlsl_ir_node *texel_offset = load->texel_offset.node; + const struct hlsl_ir_node *coords = load->coords.node; + unsigned int coords_writemask = VKD3DSP_WRITEMASK_ALL; + const struct hlsl_deref *resource = &load->resource; + const struct hlsl_ir_node *instr = &load->node; + enum hlsl_sampler_dim dim = load->sampling_dim; + struct vkd3d_shader_instruction *ins; + enum vkd3d_shader_opcode opcode; + bool multisampled; + + VKD3D_ASSERT(load->load_type == HLSL_RESOURCE_LOAD); + + multisampled = resource_type->class == HLSL_CLASS_TEXTURE + && (resource_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS + || resource_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY); + + if (uav) + opcode = VKD3DSIH_LD_UAV_TYPED; + else if (raw) + opcode = VKD3DSIH_LD_RAW; + else + opcode = multisampled ? VKD3DSIH_LD2DMS : VKD3DSIH_LD; + + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, opcode, 1, 2 + multisampled))) + return false; + + if (texel_offset && !sm4_generate_vsir_validate_texel_offset_aoffimmi(texel_offset)) + { + hlsl_error(ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, + "Offset must resolve to integer literal in the range -8 to 7."); + return false; + } + sm4_generate_vsir_encode_texel_offset_as_aoffimmi(ins, texel_offset); + + vsir_dst_from_hlsl_node(&ins->dst[0], ctx, instr); + + if (!uav) + { + /* Mipmap level is in the last component in the IR, but needs to be in + * the W component in the instruction. */ + unsigned int dim_count = hlsl_sampler_dim_count(dim); + + if (dim_count == 1) + coords_writemask = VKD3DSP_WRITEMASK_0 | VKD3DSP_WRITEMASK_3; + if (dim_count == 2) + coords_writemask = VKD3DSP_WRITEMASK_0 | VKD3DSP_WRITEMASK_1 | VKD3DSP_WRITEMASK_3; + } + + vsir_src_from_hlsl_node(&ins->src[0], ctx, coords, coords_writemask); + + if (!sm4_generate_vsir_init_src_param_from_deref(ctx, program, + &ins->src[1], resource, ins->dst[0].write_mask, &instr->loc)) + return false; + + if (multisampled) + { + if (sample_index->type == HLSL_IR_CONSTANT) + vsir_src_from_hlsl_constant_value(&ins->src[2], ctx, + &hlsl_ir_constant(sample_index)->value, VKD3D_DATA_INT, 1, 0); + else if (version->major == 4 && version->minor == 0) + hlsl_error(ctx, &sample_index->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Expected literal sample index."); + else + vsir_src_from_hlsl_node(&ins->src[2], ctx, sample_index, VKD3DSP_WRITEMASK_ALL); + } + return true; +} + +static bool sm4_generate_vsir_instr_sample(struct hlsl_ctx *ctx, + struct vsir_program *program, const struct hlsl_ir_resource_load *load) +{ + const struct hlsl_ir_node *texel_offset = load->texel_offset.node; + const struct hlsl_ir_node *coords = load->coords.node; + const struct hlsl_deref *resource = &load->resource; + const struct hlsl_deref *sampler = &load->sampler; + const struct hlsl_ir_node *instr = &load->node; + struct vkd3d_shader_instruction *ins; + enum vkd3d_shader_opcode opcode; + unsigned int src_count; + + switch (load->load_type) + { + case HLSL_RESOURCE_SAMPLE: + opcode = VKD3DSIH_SAMPLE; + src_count = 3; + break; + + case HLSL_RESOURCE_SAMPLE_CMP: + opcode = VKD3DSIH_SAMPLE_C; + src_count = 4; + break; + + case HLSL_RESOURCE_SAMPLE_CMP_LZ: + opcode = VKD3DSIH_SAMPLE_C_LZ; + src_count = 4; + break; + + case HLSL_RESOURCE_SAMPLE_LOD: + opcode = VKD3DSIH_SAMPLE_LOD; + src_count = 4; + break; + + case HLSL_RESOURCE_SAMPLE_LOD_BIAS: + opcode = VKD3DSIH_SAMPLE_B; + src_count = 4; + break; + + case HLSL_RESOURCE_SAMPLE_GRAD: + opcode = VKD3DSIH_SAMPLE_GRAD; + src_count = 5; + break; + + default: + vkd3d_unreachable(); + } + + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, opcode, 1, src_count))) + return false; + + if (texel_offset && !sm4_generate_vsir_validate_texel_offset_aoffimmi(texel_offset)) + { + hlsl_error(ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, + "Offset must resolve to integer literal in the range -8 to 7."); + return false; + } + sm4_generate_vsir_encode_texel_offset_as_aoffimmi(ins, texel_offset); + + vsir_dst_from_hlsl_node(&ins->dst[0], ctx, instr); + + vsir_src_from_hlsl_node(&ins->src[0], ctx, coords, VKD3DSP_WRITEMASK_ALL); + + if (!sm4_generate_vsir_init_src_param_from_deref(ctx, program, &ins->src[1], + resource, ins->dst[0].write_mask, &instr->loc)) + return false; + + if (!sm4_generate_vsir_init_src_param_from_deref(ctx, program, &ins->src[2], + sampler, VKD3DSP_WRITEMASK_ALL, &instr->loc)) + return false; + + if (opcode == VKD3DSIH_SAMPLE_LOD || opcode == VKD3DSIH_SAMPLE_B) + { + vsir_src_from_hlsl_node(&ins->src[3], ctx, load->lod.node, VKD3DSP_WRITEMASK_ALL); + } + else if (opcode == VKD3DSIH_SAMPLE_C || opcode == VKD3DSIH_SAMPLE_C_LZ) + { + vsir_src_from_hlsl_node(&ins->src[3], ctx, load->cmp.node, VKD3DSP_WRITEMASK_ALL); + } + else if (opcode == VKD3DSIH_SAMPLE_GRAD) + { + vsir_src_from_hlsl_node(&ins->src[3], ctx, load->ddx.node, VKD3DSP_WRITEMASK_ALL); + vsir_src_from_hlsl_node(&ins->src[4], ctx, load->ddy.node, VKD3DSP_WRITEMASK_ALL); + } + return true; +} + +static bool sm4_generate_vsir_instr_gather(struct hlsl_ctx *ctx, struct vsir_program *program, + const struct hlsl_ir_resource_load *load, uint32_t swizzle) +{ + const struct vkd3d_shader_version *version = &program->shader_version; + const struct hlsl_ir_node *texel_offset = load->texel_offset.node; + const struct hlsl_ir_node *coords = load->coords.node; + const struct hlsl_deref *resource = &load->resource; + const struct hlsl_deref *sampler = &load->sampler; + const struct hlsl_ir_node *instr = &load->node; + struct vkd3d_shader_instruction *ins; + enum vkd3d_shader_opcode opcode; + + opcode = VKD3DSIH_GATHER4; + if (texel_offset && !sm4_generate_vsir_validate_texel_offset_aoffimmi(texel_offset)) + { + if (!vkd3d_shader_ver_ge(version, 5, 0)) + { + hlsl_error(ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, + "Offset must resolve to integer literal in the range -8 to 7 for profiles < 5."); + return false; + } + opcode = VKD3DSIH_GATHER4_PO; + } + + if (opcode == VKD3DSIH_GATHER4) + { + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, opcode, 1, 3))) + return false; + + vsir_dst_from_hlsl_node(&ins->dst[0], ctx, instr); + vsir_src_from_hlsl_node(&ins->src[0], ctx, coords, VKD3DSP_WRITEMASK_ALL); + sm4_generate_vsir_encode_texel_offset_as_aoffimmi(ins, texel_offset); + + if (!sm4_generate_vsir_init_src_param_from_deref(ctx, program, + &ins->src[1], resource, ins->dst[0].write_mask, &instr->loc)) + return false; + + if (!sm4_generate_vsir_init_src_param_from_deref(ctx, program, + &ins->src[2], sampler, VKD3DSP_WRITEMASK_ALL, &instr->loc)) + return false; + ins->src[2].reg.dimension = VSIR_DIMENSION_VEC4; + ins->src[2].swizzle = swizzle; + } + else if (opcode == VKD3DSIH_GATHER4_PO) + { + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, opcode, 1, 4))) + return false; + + vsir_dst_from_hlsl_node(&ins->dst[0], ctx, instr); + vsir_src_from_hlsl_node(&ins->src[0], ctx, coords, VKD3DSP_WRITEMASK_ALL); + vsir_src_from_hlsl_node(&ins->src[1], ctx, texel_offset, VKD3DSP_WRITEMASK_ALL); + + if (!sm4_generate_vsir_init_src_param_from_deref(ctx, program, + &ins->src[2], resource, ins->dst[0].write_mask, &instr->loc)) + return false; + + if (!sm4_generate_vsir_init_src_param_from_deref(ctx, program, + &ins->src[3], sampler, VKD3DSP_WRITEMASK_ALL, &instr->loc)) + return false; + ins->src[3].reg.dimension = VSIR_DIMENSION_VEC4; + ins->src[3].swizzle = swizzle; + } + else + { + vkd3d_unreachable(); + } + return true; +} + +static bool sm4_generate_vsir_instr_sample_info(struct hlsl_ctx *ctx, + struct vsir_program *program, const struct hlsl_ir_resource_load *load) +{ + const struct hlsl_deref *resource = &load->resource; + const struct hlsl_ir_node *instr = &load->node; + struct hlsl_type *type = instr->data_type; + struct vkd3d_shader_instruction *ins; + + VKD3D_ASSERT(type->e.numeric.type == HLSL_TYPE_UINT || type->e.numeric.type == HLSL_TYPE_FLOAT); + + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_SAMPLE_INFO, 1, 1))) + return false; + + if (type->e.numeric.type == HLSL_TYPE_UINT) + ins->flags = VKD3DSI_SAMPLE_INFO_UINT; + + vsir_dst_from_hlsl_node(&ins->dst[0], ctx, instr); + + if (!sm4_generate_vsir_init_src_param_from_deref(ctx, program, + &ins->src[0], resource, ins->dst[0].write_mask, &instr->loc)) + return false; + + return true; +} + +static bool sm4_generate_vsir_instr_resinfo(struct hlsl_ctx *ctx, + struct vsir_program *program, const struct hlsl_ir_resource_load *load) +{ + const struct hlsl_deref *resource = &load->resource; + const struct hlsl_ir_node *instr = &load->node; + struct hlsl_type *type = instr->data_type; + struct vkd3d_shader_instruction *ins; + + if (resource->data_type->sampler_dim == HLSL_SAMPLER_DIM_BUFFER + || resource->data_type->sampler_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER) + { + hlsl_fixme(ctx, &load->node.loc, "resinfo for buffers."); + return false; + } + + VKD3D_ASSERT(type->e.numeric.type == HLSL_TYPE_UINT || type->e.numeric.type == HLSL_TYPE_FLOAT); + + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_RESINFO, 1, 2))) + return false; + + if (type->e.numeric.type == HLSL_TYPE_UINT) + ins->flags = VKD3DSI_RESINFO_UINT; + + vsir_dst_from_hlsl_node(&ins->dst[0], ctx, instr); + + vsir_src_from_hlsl_node(&ins->src[0], ctx, load->lod.node, VKD3DSP_WRITEMASK_ALL); + + if (!sm4_generate_vsir_init_src_param_from_deref(ctx, program, + &ins->src[1], resource, ins->dst[0].write_mask, &instr->loc)) + return false; + + return true; +} + +static bool sm4_generate_vsir_instr_resource_load(struct hlsl_ctx *ctx, + struct vsir_program *program, const struct hlsl_ir_resource_load *load) +{ + if (load->sampler.var && !load->sampler.var->is_uniform) + { + hlsl_fixme(ctx, &load->node.loc, "Sample using non-uniform sampler variable."); + return false; + } + + if (!load->resource.var->is_uniform) + { + hlsl_fixme(ctx, &load->node.loc, "Load from non-uniform resource variable."); + return false; + } + + switch (load->load_type) + { + case HLSL_RESOURCE_LOAD: + return sm4_generate_vsir_instr_ld(ctx, program, load); + + case HLSL_RESOURCE_SAMPLE: + case HLSL_RESOURCE_SAMPLE_CMP: + case HLSL_RESOURCE_SAMPLE_CMP_LZ: + case HLSL_RESOURCE_SAMPLE_LOD: + case HLSL_RESOURCE_SAMPLE_LOD_BIAS: + case HLSL_RESOURCE_SAMPLE_GRAD: + /* Combined sample expressions were lowered. */ + VKD3D_ASSERT(load->sampler.var); + return sm4_generate_vsir_instr_sample(ctx, program, load); + + case HLSL_RESOURCE_GATHER_RED: + return sm4_generate_vsir_instr_gather(ctx, program, load, VKD3D_SHADER_SWIZZLE(X, X, X, X)); + + case HLSL_RESOURCE_GATHER_GREEN: + return sm4_generate_vsir_instr_gather(ctx, program, load, VKD3D_SHADER_SWIZZLE(Y, Y, Y, Y)); + + case HLSL_RESOURCE_GATHER_BLUE: + return sm4_generate_vsir_instr_gather(ctx, program, load, VKD3D_SHADER_SWIZZLE(Z, Z, Z, Z)); + + case HLSL_RESOURCE_GATHER_ALPHA: + return sm4_generate_vsir_instr_gather(ctx, program, load, VKD3D_SHADER_SWIZZLE(W, W, W, W)); + + case HLSL_RESOURCE_SAMPLE_INFO: + return sm4_generate_vsir_instr_sample_info(ctx, program, load); + + case HLSL_RESOURCE_RESINFO: + return sm4_generate_vsir_instr_resinfo(ctx, program, load); + + case HLSL_RESOURCE_SAMPLE_PROJ: + vkd3d_unreachable(); + + default: + return false; + } +} + +static bool sm4_generate_vsir_instr_jump(struct hlsl_ctx *ctx, + struct vsir_program *program, const struct hlsl_ir_jump *jump) +{ + const struct hlsl_ir_node *instr = &jump->node; + struct vkd3d_shader_instruction *ins; + + switch (jump->type) + { + case HLSL_IR_JUMP_BREAK: + return generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_BREAK, 0, 0); + + case HLSL_IR_JUMP_CONTINUE: + return generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_CONTINUE, 0, 0); + + case HLSL_IR_JUMP_DISCARD_NZ: + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_DISCARD, 0, 1))) + return false; + ins->flags = VKD3D_SHADER_CONDITIONAL_OP_NZ; + + vsir_src_from_hlsl_node(&ins->src[0], ctx, jump->condition.node, VKD3DSP_WRITEMASK_ALL); + return true; + + case HLSL_IR_JUMP_RETURN: + vkd3d_unreachable(); + + default: + hlsl_fixme(ctx, &jump->node.loc, "Jump type %s.", hlsl_jump_type_to_string(jump->type)); + return false; + } +} + +static void sm4_generate_vsir_block(struct hlsl_ctx *ctx, struct hlsl_block *block, struct vsir_program *program); + +static void sm4_generate_vsir_instr_if(struct hlsl_ctx *ctx, struct vsir_program *program, struct hlsl_ir_if *iff) +{ + struct hlsl_ir_node *instr = &iff->node; + struct vkd3d_shader_instruction *ins; + + VKD3D_ASSERT(iff->condition.node->data_type->dimx == 1); + + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_IF, 0, 1))) + return; + ins->flags = VKD3D_SHADER_CONDITIONAL_OP_NZ; + + vsir_src_from_hlsl_node(&ins->src[0], ctx, iff->condition.node, VKD3DSP_WRITEMASK_ALL); + + sm4_generate_vsir_block(ctx, &iff->then_block, program); + + if (!list_empty(&iff->else_block.instrs)) + { + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_ELSE, 0, 0))) + return; + sm4_generate_vsir_block(ctx, &iff->else_block, program); + } + + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_ENDIF, 0, 0))) + return; +} + +static void sm4_generate_vsir_instr_loop(struct hlsl_ctx *ctx, + struct vsir_program *program, struct hlsl_ir_loop *loop) +{ + struct hlsl_ir_node *instr = &loop->node; + struct vkd3d_shader_instruction *ins; + + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_LOOP, 0, 0))) + return; + + sm4_generate_vsir_block(ctx, &loop->body, program); + + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_ENDLOOP, 0, 0))) + return; +} + +static void sm4_generate_vsir_instr_switch(struct hlsl_ctx *ctx, + struct vsir_program *program, struct hlsl_ir_switch *swi) +{ + const struct hlsl_ir_node *selector = swi->selector.node; + struct hlsl_ir_node *instr = &swi->node; + struct vkd3d_shader_instruction *ins; + struct hlsl_ir_switch_case *cas; + + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_SWITCH, 0, 1))) + return; + vsir_src_from_hlsl_node(&ins->src[0], ctx, selector, VKD3DSP_WRITEMASK_ALL); + + LIST_FOR_EACH_ENTRY(cas, &swi->cases, struct hlsl_ir_switch_case, entry) + { + if (cas->is_default) + { + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_DEFAULT, 0, 0))) + return; + } + else + { + struct hlsl_constant_value value = {.u[0].u = cas->value}; + + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_CASE, 0, 1))) + return; + vsir_src_from_hlsl_constant_value(&ins->src[0], ctx, &value, VKD3D_DATA_UINT, 1, VKD3DSP_WRITEMASK_ALL); + } + + sm4_generate_vsir_block(ctx, &cas->body, program); + } + + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_ENDSWITCH, 0, 0))) + return; +} + +static void sm4_generate_vsir_block(struct hlsl_ctx *ctx, struct hlsl_block *block, struct vsir_program *program) +{ + struct vkd3d_string_buffer *dst_type_string; + struct hlsl_ir_node *instr, *next; + + LIST_FOR_EACH_ENTRY_SAFE(instr, next, &block->instrs, struct hlsl_ir_node, entry) + { + if (instr->data_type) + { + if (instr->data_type->class != HLSL_CLASS_SCALAR && instr->data_type->class != HLSL_CLASS_VECTOR) + { + hlsl_fixme(ctx, &instr->loc, "Class %#x should have been lowered or removed.", instr->data_type->class); + break; + } + } + + switch (instr->type) + { + case HLSL_IR_CALL: + vkd3d_unreachable(); + + case HLSL_IR_CONSTANT: + /* In SM4 all constants are inlined. */ + break; + + case HLSL_IR_EXPR: + if (!(dst_type_string = hlsl_type_to_string(ctx, instr->data_type))) + break; + sm4_generate_vsir_instr_expr(ctx, program, hlsl_ir_expr(instr), dst_type_string->buffer); + hlsl_release_string_buffer(ctx, dst_type_string); + break; + + case HLSL_IR_IF: + sm4_generate_vsir_instr_if(ctx, program, hlsl_ir_if(instr)); + break; + + case HLSL_IR_LOAD: + sm4_generate_vsir_instr_load(ctx, program, hlsl_ir_load(instr)); + break; + + case HLSL_IR_LOOP: + sm4_generate_vsir_instr_loop(ctx, program, hlsl_ir_loop(instr)); + break; + + case HLSL_IR_RESOURCE_LOAD: + sm4_generate_vsir_instr_resource_load(ctx, program, hlsl_ir_resource_load(instr)); + break; + + case HLSL_IR_RESOURCE_STORE: + sm4_generate_vsir_instr_resource_store(ctx, program, hlsl_ir_resource_store(instr)); + break; + + case HLSL_IR_JUMP: + sm4_generate_vsir_instr_jump(ctx, program, hlsl_ir_jump(instr)); + break; + + case HLSL_IR_STORE: + sm4_generate_vsir_instr_store(ctx, program, hlsl_ir_store(instr)); + break; + + case HLSL_IR_SWITCH: + sm4_generate_vsir_instr_switch(ctx, program, hlsl_ir_switch(instr)); + break; + + case HLSL_IR_SWIZZLE: + generate_vsir_instr_swizzle(ctx, program, hlsl_ir_swizzle(instr)); + break; + + default: + break; + } + } +} + +static void sm4_generate_vsir_add_function(struct hlsl_ctx *ctx, + struct hlsl_ir_function_decl *func, uint64_t config_flags, struct vsir_program *program) +{ + bool is_patch_constant_func = func == ctx->patch_constant_func; + struct hlsl_block block = {0}; + struct hlsl_scope *scope; + struct hlsl_ir_var *var; + uint32_t temp_count; + + compute_liveness(ctx, func); + mark_indexable_vars(ctx, func); + temp_count = allocate_temp_registers(ctx, func); + if (ctx->result) + return; + program->temp_count = max(program->temp_count, temp_count); + + hlsl_block_init(&block); + + LIST_FOR_EACH_ENTRY(var, &func->extern_vars, struct hlsl_ir_var, extern_entry) + { + if ((var->is_input_semantic && var->last_read) + || (var->is_output_semantic && var->first_write)) + sm4_generate_vsir_instr_dcl_semantic(ctx, program, var, is_patch_constant_func, &block, &var->loc); + } + + if (temp_count) + sm4_generate_vsir_instr_dcl_temps(ctx, program, temp_count, &block, &func->loc); + + LIST_FOR_EACH_ENTRY(scope, &ctx->scopes, struct hlsl_scope, entry) + { + LIST_FOR_EACH_ENTRY(var, &scope->vars, struct hlsl_ir_var, scope_entry) + { if (var->is_uniform || var->is_input_semantic || var->is_output_semantic) continue; if (!var->regs[HLSL_REGSET_NUMERIC].allocated) @@ -8618,6 +9469,8 @@ static void sm4_generate_vsir_add_function(struct hlsl_ctx *ctx, hlsl_block_cleanup(&block); sm4_generate_vsir_block(ctx, &func->body, program); + + generate_vsir_add_program_instruction(ctx, program, &func->loc, VKD3DSIH_RET, 0, 0); } /* OBJECTIVE: Translate all the information from ctx and entry_func to the @@ -8649,9 +9502,16 @@ static void sm4_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl program->thread_group_size.z = ctx->thread_count[2]; } + if (version.type == VKD3D_SHADER_TYPE_HULL) + generate_vsir_add_program_instruction(ctx, program, + &ctx->patch_constant_func->loc, VKD3DSIH_HS_CONTROL_POINT_PHASE, 0, 0); sm4_generate_vsir_add_function(ctx, func, config_flags, program); if (version.type == VKD3D_SHADER_TYPE_HULL) + { + generate_vsir_add_program_instruction(ctx, program, + &ctx->patch_constant_func->loc, VKD3DSIH_HS_FORK_PHASE, 0, 0); sm4_generate_vsir_add_function(ctx, ctx->patch_constant_func, config_flags, program); + } } static struct hlsl_ir_jump *loop_unrolling_find_jump(struct hlsl_block *block, struct hlsl_ir_node *stop_point, diff --git a/libs/vkd3d/libs/vkd3d-shader/ir.c b/libs/vkd3d/libs/vkd3d-shader/ir.c index b0e89bededb..0c06db9ff15 100644 --- a/libs/vkd3d/libs/vkd3d-shader/ir.c +++ b/libs/vkd3d/libs/vkd3d-shader/ir.c @@ -201,6 +201,14 @@ static void src_param_init_const_uint(struct vkd3d_shader_src_param *src, uint32 src->reg.u.immconst_u32[0] = value; } +static void vsir_src_param_init_io(struct vkd3d_shader_src_param *src, + enum vkd3d_shader_register_type reg_type, const struct signature_element *e, unsigned int idx_count) +{ + vsir_src_param_init(src, reg_type, vkd3d_data_type_from_component_type(e->component_type), idx_count); + src->reg.dimension = VSIR_DIMENSION_VEC4; + src->swizzle = vsir_swizzle_from_writemask(e->mask); +} + void vsir_src_param_init_label(struct vkd3d_shader_src_param *param, unsigned int label_id) { vsir_src_param_init(param, VKD3DSPR_LABEL, VKD3D_DATA_UNUSED, 1); @@ -278,6 +286,14 @@ void vsir_dst_param_init(struct vkd3d_shader_dst_param *param, enum vkd3d_shader param->shift = 0; } +static void vsir_dst_param_init_io(struct vkd3d_shader_dst_param *dst, enum vkd3d_shader_register_type reg_type, + const struct signature_element *e, unsigned int idx_count) +{ + vsir_dst_param_init(dst, reg_type, vkd3d_data_type_from_component_type(e->component_type), idx_count); + dst->reg.dimension = VSIR_DIMENSION_VEC4; + dst->write_mask = e->mask; +} + static void dst_param_init_ssa_bool(struct vkd3d_shader_dst_param *dst, unsigned int idx) { vsir_dst_param_init(dst, VKD3DSPR_SSA, VKD3D_DATA_BOOL, 1); @@ -743,6 +759,7 @@ static enum vkd3d_result vsir_program_lower_instructions(struct vsir_program *pr case VKD3DSIH_DCL_GLOBAL_FLAGS: case VKD3DSIH_DCL_SAMPLER: case VKD3DSIH_DCL_TEMPS: + case VKD3DSIH_DCL_TESSELLATOR_DOMAIN: case VKD3DSIH_DCL_THREAD_GROUP: case VKD3DSIH_DCL_UAV_TYPED: vkd3d_shader_instruction_make_nop(ins); @@ -1369,26 +1386,17 @@ static void shader_dst_param_normalise_outpointid(struct vkd3d_shader_dst_param } } -static void shader_dst_param_io_init(struct vkd3d_shader_dst_param *param, const struct signature_element *e, - enum vkd3d_shader_register_type reg_type, unsigned int idx_count) -{ - param->write_mask = e->mask; - param->modifiers = 0; - param->shift = 0; - vsir_register_init(¶m->reg, reg_type, vkd3d_data_type_from_component_type(e->component_type), idx_count); -} - static enum vkd3d_result control_point_normaliser_emit_hs_input(struct control_point_normaliser *normaliser, const struct shader_signature *s, unsigned int input_control_point_count, unsigned int dst, const struct vkd3d_shader_location *location) { struct vkd3d_shader_instruction *ins; - struct vkd3d_shader_dst_param *param; const struct signature_element *e; - unsigned int i, count; + unsigned int i, count, stride = 0; - for (i = 0, count = 1; i < s->element_count; ++i) - count += !!s->elements[i].used_mask; + for (i = 0; i < s->element_count; ++i) + stride += !!s->elements[i].used_mask; + count = 2 + 3 * stride; if (!shader_instruction_array_reserve(&normaliser->instructions, normaliser->instructions.count + count)) return VKD3D_ERROR_OUT_OF_MEMORY; @@ -1399,31 +1407,75 @@ static enum vkd3d_result control_point_normaliser_emit_hs_input(struct control_p ins = &normaliser->instructions.elements[dst]; vsir_instruction_init(ins, location, VKD3DSIH_HS_CONTROL_POINT_PHASE); - ins->flags = 1; - ++ins; + + ins = &normaliser->instructions.elements[dst + 1 + 3 * stride]; + vsir_instruction_init(ins, location, VKD3DSIH_RET); + + ins = &normaliser->instructions.elements[dst + 1]; for (i = 0; i < s->element_count; ++i) { + struct vkd3d_shader_instruction *ins_in, *ins_out, *ins_mov; + struct vkd3d_shader_dst_param *param_in, *param_out; + e = &s->elements[i]; if (!e->used_mask) continue; + ins_in = ins; + ins_out = &ins[stride]; + ins_mov = &ins[2 * stride]; + if (e->sysval_semantic != VKD3D_SHADER_SV_NONE) { - vsir_instruction_init(ins, location, VKD3DSIH_DCL_INPUT_SIV); - param = &ins->declaration.register_semantic.reg; - ins->declaration.register_semantic.sysval_semantic = vkd3d_siv_from_sysval(e->sysval_semantic); + vsir_instruction_init(ins_in, location, VKD3DSIH_DCL_INPUT_SIV); + param_in = &ins_in->declaration.register_semantic.reg; + ins_in->declaration.register_semantic.sysval_semantic = vkd3d_siv_from_sysval(e->sysval_semantic); + + vsir_instruction_init(ins_out, location, VKD3DSIH_DCL_OUTPUT_SIV); + param_out = &ins_out->declaration.register_semantic.reg; + ins_out->declaration.register_semantic.sysval_semantic = vkd3d_siv_from_sysval(e->sysval_semantic); } else { - vsir_instruction_init(ins, location, VKD3DSIH_DCL_INPUT); - param = &ins->declaration.dst; + vsir_instruction_init(ins_in, location, VKD3DSIH_DCL_INPUT); + param_in = &ins_in->declaration.dst; + + vsir_instruction_init(ins_out, location, VKD3DSIH_DCL_OUTPUT); + param_out = &ins_out->declaration.dst; } - shader_dst_param_io_init(param, e, VKD3DSPR_INPUT, 2); - param->reg.idx[0].offset = input_control_point_count; - param->reg.idx[1].offset = e->register_index; - param->write_mask = e->mask; + vsir_dst_param_init_io(param_in, VKD3DSPR_INPUT, e, 2); + param_in->reg.idx[0].offset = input_control_point_count; + param_in->reg.idx[1].offset = e->register_index; + param_in->write_mask = e->mask; + + vsir_dst_param_init_io(param_out, VKD3DSPR_OUTPUT, e, 2); + param_out->reg.idx[0].offset = input_control_point_count; + param_out->reg.idx[1].offset = e->register_index; + param_out->write_mask = e->mask; + + vsir_instruction_init(ins_mov, location, VKD3DSIH_MOV); + ins_mov->dst = shader_dst_param_allocator_get(&normaliser->instructions.dst_params, 1); + ins_mov->dst_count = 1; + ins_mov->src = shader_src_param_allocator_get(&normaliser->instructions.src_params, 1); + ins_mov->src_count = 1; + + if (!ins_mov->dst || ! ins_mov->src) + { + WARN("Failed to allocate dst/src param.\n"); + return VKD3D_ERROR_OUT_OF_MEMORY; + } + + vsir_dst_param_init_io(&ins_mov->dst[0], VKD3DSPR_OUTPUT, e, 2); + ins_mov->dst[0].reg.idx[0].offset = 0; + ins_mov->dst[0].reg.idx[0].rel_addr = normaliser->outpointid_param; + ins_mov->dst[0].reg.idx[1].offset = e->register_index; + + vsir_src_param_init_io(&ins_mov->src[0], VKD3DSPR_INPUT, e, 2); + ins_mov->src[0].reg.idx[0].offset = 0; + ins_mov->src[0].reg.idx[0].rel_addr = normaliser->outpointid_param; + ins_mov->src[0].reg.idx[1].offset = e->register_index; ++ins; } @@ -2129,6 +2181,8 @@ static void shader_src_param_io_normalise(struct vkd3d_shader_src_param *src_par case VKD3DSPR_OUTCONTROLPOINT: reg->type = VKD3DSPR_OUTPUT; + if (io_normaliser_is_in_fork_or_join_phase(normaliser)) + normaliser->use_vocp = true; /* fall through */ case VKD3DSPR_OUTPUT: reg_idx = reg->idx[reg->idx_count - 1].offset; @@ -2179,9 +2233,6 @@ static void shader_instruction_normalise_io_params(struct vkd3d_shader_instructi { reg = &ins->declaration.dst.reg; - if (reg->type == VKD3DSPR_OUTCONTROLPOINT) - normaliser->use_vocp = true; - /* We don't need to keep OUTCONTROLPOINT or PATCHCONST input declarations since their * equivalents were declared earlier, but INCONTROLPOINT may be the first occurrence. */ if (reg->type == VKD3DSPR_OUTCONTROLPOINT || reg->type == VKD3DSPR_PATCHCONST) @@ -6665,6 +6716,9 @@ struct validation_context enum vkd3d_shader_opcode *blocks; size_t depth; size_t blocks_capacity; + + unsigned int outer_tess_idxs[4]; + unsigned int inner_tess_idxs[2]; }; static void VKD3D_PRINTF_FUNC(3, 4) validator_error(struct validation_context *ctx, @@ -7185,6 +7239,10 @@ static void vsir_validate_register(struct validation_context *ctx, vsir_validate_register_without_indices(ctx, reg); break; + case VKD3DSPR_PRIMID: + vsir_validate_register_without_indices(ctx, reg); + break; + case VKD3DSPR_NULL: vsir_validate_register_without_indices(ctx, reg); break; @@ -7201,6 +7259,18 @@ static void vsir_validate_register(struct validation_context *ctx, vsir_validate_uav_register(ctx, reg); break; + case VKD3DSPR_OUTPOINTID: + vsir_validate_register_without_indices(ctx, reg); + break; + + case VKD3DSPR_FORKINSTID: + vsir_validate_register_without_indices(ctx, reg); + break; + + case VKD3DSPR_JOININSTID: + vsir_validate_register_without_indices(ctx, reg); + break; + case VKD3DSPR_INCONTROLPOINT: vsir_validate_io_register(ctx, reg); break; @@ -7213,6 +7283,38 @@ static void vsir_validate_register(struct validation_context *ctx, vsir_validate_io_register(ctx, reg); break; + case VKD3DSPR_TESSCOORD: + vsir_validate_register_without_indices(ctx, reg); + break; + + case VKD3DSPR_THREADID: + vsir_validate_register_without_indices(ctx, reg); + break; + + case VKD3DSPR_THREADGROUPID: + vsir_validate_register_without_indices(ctx, reg); + break; + + case VKD3DSPR_LOCALTHREADID: + vsir_validate_register_without_indices(ctx, reg); + break; + + case VKD3DSPR_LOCALTHREADINDEX: + vsir_validate_register_without_indices(ctx, reg); + break; + + case VKD3DSPR_COVERAGE: + vsir_validate_register_without_indices(ctx, reg); + break; + + case VKD3DSPR_SAMPLEMASK: + vsir_validate_register_without_indices(ctx, reg); + break; + + case VKD3DSPR_GSINSTID: + vsir_validate_register_without_indices(ctx, reg); + break; + case VKD3DSPR_DEPTHOUTGE: vsir_validate_register_without_indices(ctx, reg); break; @@ -7221,10 +7323,22 @@ static void vsir_validate_register(struct validation_context *ctx, vsir_validate_register_without_indices(ctx, reg); break; + case VKD3DSPR_OUTSTENCILREF: + vsir_validate_register_without_indices(ctx, reg); + break; + case VKD3DSPR_SSA: vsir_validate_ssa_register(ctx, reg); break; + case VKD3DSPR_WAVELANECOUNT: + vsir_validate_register_without_indices(ctx, reg); + break; + + case VKD3DSPR_WAVELANEINDEX: + vsir_validate_register_without_indices(ctx, reg); + break; + default: break; } @@ -7469,9 +7583,11 @@ static void vsir_validate_signature_element(struct validation_context *ctx, const struct shader_signature *signature, enum vsir_signature_type signature_type, unsigned int idx) { + enum vkd3d_tessellator_domain expected_tess_domain = VKD3D_TESSELLATOR_DOMAIN_INVALID; const char *signature_type_name = signature_type_names[signature_type]; const struct signature_element *element = &signature->elements[idx]; - bool integer_type = false; + bool integer_type = false, is_outer = false; + unsigned int semantic_index_max = 0; if (element->register_count == 0) validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, @@ -7523,12 +7639,6 @@ static void vsir_validate_signature_element(struct validation_context *ctx, case VKD3D_SHADER_SV_INSTANCE_ID: case VKD3D_SHADER_SV_IS_FRONT_FACE: case VKD3D_SHADER_SV_SAMPLE_INDEX: - case VKD3D_SHADER_SV_TESS_FACTOR_QUADEDGE: - case VKD3D_SHADER_SV_TESS_FACTOR_QUADINT: - case VKD3D_SHADER_SV_TESS_FACTOR_TRIEDGE: - case VKD3D_SHADER_SV_TESS_FACTOR_TRIINT: - case VKD3D_SHADER_SV_TESS_FACTOR_LINEDET: - case VKD3D_SHADER_SV_TESS_FACTOR_LINEDEN: case VKD3D_SHADER_SV_TARGET: case VKD3D_SHADER_SV_DEPTH: case VKD3D_SHADER_SV_COVERAGE: @@ -7537,6 +7647,37 @@ static void vsir_validate_signature_element(struct validation_context *ctx, case VKD3D_SHADER_SV_STENCIL_REF: break; + case VKD3D_SHADER_SV_TESS_FACTOR_QUADEDGE: + expected_tess_domain = VKD3D_TESSELLATOR_DOMAIN_QUAD; + semantic_index_max = 4; + is_outer = true; + break; + + case VKD3D_SHADER_SV_TESS_FACTOR_QUADINT: + expected_tess_domain = VKD3D_TESSELLATOR_DOMAIN_QUAD; + semantic_index_max = 2; + is_outer = false; + break; + + case VKD3D_SHADER_SV_TESS_FACTOR_TRIEDGE: + expected_tess_domain = VKD3D_TESSELLATOR_DOMAIN_TRIANGLE; + semantic_index_max = 3; + is_outer = true; + break; + + case VKD3D_SHADER_SV_TESS_FACTOR_TRIINT: + expected_tess_domain = VKD3D_TESSELLATOR_DOMAIN_TRIANGLE; + semantic_index_max = 1; + is_outer = false; + break; + + case VKD3D_SHADER_SV_TESS_FACTOR_LINEDET: + case VKD3D_SHADER_SV_TESS_FACTOR_LINEDEN: + expected_tess_domain = VKD3D_TESSELLATOR_DOMAIN_LINE; + semantic_index_max = 2; + is_outer = true; + break; + default: validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, "element %u of %s signature: Invalid system value semantic %#x.", @@ -7544,6 +7685,38 @@ static void vsir_validate_signature_element(struct validation_context *ctx, break; } + if (expected_tess_domain != VKD3D_TESSELLATOR_DOMAIN_INVALID) + { + if (signature_type != SIGNATURE_TYPE_PATCH_CONSTANT) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, + "element %u of %s signature: System value semantic %#x is only valid " + "in the patch constant signature.", + idx, signature_type_name, element->sysval_semantic); + + if (ctx->program->tess_domain != expected_tess_domain) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, + "element %u of %s signature: Invalid system value semantic %#x for tessellator domain %#x.", + idx, signature_type_name, element->sysval_semantic, ctx->program->tess_domain); + + if (element->semantic_index >= semantic_index_max) + { + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, + "element %u of %s signature: Invalid semantic index %u for system value semantic %#x.", + idx, signature_type_name, element->semantic_index, element->sysval_semantic); + } + else + { + unsigned int *idx_pos = &(is_outer ? ctx->outer_tess_idxs : ctx->inner_tess_idxs)[element->semantic_index]; + + if (*idx_pos != ~0u) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, + "element %u of %s signature: Duplicate semantic index %u for system value semantic %#x.", + idx, signature_type_name, element->semantic_index, element->sysval_semantic); + else + *idx_pos = idx; + } + } + if (element->sysval_semantic < ARRAY_SIZE(sysval_validation_data)) { const struct sysval_validation_data_element *data = &sysval_validation_data[element->sysval_semantic]; @@ -7643,6 +7816,109 @@ static void vsir_validate_signature(struct validation_context *ctx, for (i = 0; i < signature->element_count; ++i) vsir_validate_signature_element(ctx, signature, signature_type, i); + + if (signature_type == SIGNATURE_TYPE_PATCH_CONSTANT) + { + const struct signature_element *first_element, *element; + unsigned int expected_outer_count = 0; + unsigned int expected_inner_count = 0; + + switch (ctx->program->tess_domain) + { + case VKD3D_TESSELLATOR_DOMAIN_QUAD: + expected_outer_count = 4; + expected_inner_count = 2; + break; + + case VKD3D_TESSELLATOR_DOMAIN_TRIANGLE: + expected_outer_count = 3; + expected_inner_count = 1; + break; + + case VKD3D_TESSELLATOR_DOMAIN_LINE: + expected_outer_count = 2; + expected_inner_count = 0; + break; + + default: + break; + } + + /* After I/O normalisation tessellation factors are merged in a single array. */ + if (ctx->program->normalisation_level >= VSIR_FULLY_NORMALISED_IO) + { + expected_outer_count = min(1, expected_outer_count); + expected_inner_count = min(1, expected_inner_count); + } + + first_element = NULL; + for (i = 0; i < expected_outer_count; ++i) + { + if (ctx->outer_tess_idxs[i] == ~0u) + { + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, + "Missing outer system value semantic %u.", i); + } + else + { + element = &signature->elements[ctx->outer_tess_idxs[i]]; + + if (!first_element) + { + first_element = element; + continue; + } + + if (element->register_index != first_element->register_index + i) + { + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, + "Invalid register index %u for outer system value semantic %u, expected %u.", + element->register_index, i, first_element->register_index + i); + } + + if (element->mask != first_element->mask) + { + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_WRITE_MASK, + "Invalid mask %#x for outer system value semantic %u, expected %#x.", + element->mask, i, first_element->mask); + } + } + } + + first_element = NULL; + for (i = 0; i < expected_inner_count; ++i) + { + if (ctx->inner_tess_idxs[i] == ~0u) + { + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, + "Missing inner system value semantic %u.", i); + } + else + { + element = &signature->elements[ctx->inner_tess_idxs[i]]; + + if (!first_element) + { + first_element = element; + continue; + } + + if (element->register_index != first_element->register_index + i) + { + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, + "Invalid register index %u for inner system value semantic %u, expected %u.", + element->register_index, i, first_element->register_index + i); + } + + if (element->mask != first_element->mask) + { + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_WRITE_MASK, + "Invalid mask %#x for inner system value semantic %u, expected %#x.", + element->mask, i, first_element->mask); + } + } + } + } } static const char *name_from_cf_type(enum vsir_control_flow_type type) @@ -7754,6 +8030,39 @@ static void vsir_validate_dcl_hs_max_tessfactor(struct validation_context *ctx, instruction->declaration.max_tessellation_factor); } +static void vsir_validate_dcl_input(struct validation_context *ctx, + const struct vkd3d_shader_instruction *instruction) +{ + switch (instruction->declaration.dst.reg.type) + { + /* Signature input registers. */ + case VKD3DSPR_INPUT: + case VKD3DSPR_INCONTROLPOINT: + case VKD3DSPR_OUTCONTROLPOINT: + case VKD3DSPR_PATCHCONST: + /* Non-signature input registers. */ + case VKD3DSPR_PRIMID: + case VKD3DSPR_FORKINSTID: + case VKD3DSPR_JOININSTID: + case VKD3DSPR_THREADID: + case VKD3DSPR_THREADGROUPID: + case VKD3DSPR_LOCALTHREADID: + case VKD3DSPR_LOCALTHREADINDEX: + case VKD3DSPR_COVERAGE: + case VKD3DSPR_TESSCOORD: + case VKD3DSPR_OUTPOINTID: + case VKD3DSPR_GSINSTID: + case VKD3DSPR_WAVELANECOUNT: + case VKD3DSPR_WAVELANEINDEX: + break; + + default: + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, + "Invalid register type %#x in instruction DCL_INPUT.", + instruction->declaration.dst.reg.type); + } +} + static void vsir_validate_dcl_input_primitive(struct validation_context *ctx, const struct vkd3d_shader_instruction *instruction) { @@ -7763,6 +8072,105 @@ static void vsir_validate_dcl_input_primitive(struct validation_context *ctx, instruction->declaration.primitive_type.type); } +static void vsir_validate_dcl_input_ps(struct validation_context *ctx, + const struct vkd3d_shader_instruction *instruction) +{ + switch (instruction->declaration.dst.reg.type) + { + case VKD3DSPR_INPUT: + break; + + default: + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, + "Invalid register type %#x in instruction DCL_INPUT_PS.", + instruction->declaration.dst.reg.type); + } +} + +static void vsir_validate_dcl_input_ps_sgv(struct validation_context *ctx, + const struct vkd3d_shader_instruction *instruction) +{ + switch (instruction->declaration.register_semantic.reg.reg.type) + { + case VKD3DSPR_INPUT: + break; + + default: + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, + "Invalid register type %#x in instruction DCL_INPUT_PS_SGV.", + instruction->declaration.register_semantic.reg.reg.type); + } +} + +static void vsir_validate_dcl_input_ps_siv(struct validation_context *ctx, + const struct vkd3d_shader_instruction *instruction) +{ + switch (instruction->declaration.register_semantic.reg.reg.type) + { + case VKD3DSPR_INPUT: + break; + + default: + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, + "Invalid register type %#x in instruction DCL_INPUT_PS_SIV.", + instruction->declaration.register_semantic.reg.reg.type); + } +} + +static void vsir_validate_dcl_input_sgv(struct validation_context *ctx, + const struct vkd3d_shader_instruction *instruction) +{ + switch (instruction->declaration.register_semantic.reg.reg.type) + { + case VKD3DSPR_INPUT: + break; + + default: + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, + "Invalid register type %#x in instruction DCL_INPUT_SGV.", + instruction->declaration.register_semantic.reg.reg.type); + } +} + +static void vsir_validate_dcl_input_siv(struct validation_context *ctx, + const struct vkd3d_shader_instruction *instruction) +{ + switch (instruction->declaration.register_semantic.reg.reg.type) + { + case VKD3DSPR_INPUT: + case VKD3DSPR_PATCHCONST: + break; + + default: + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, + "Invalid register type %#x in instruction DCL_INPUT_SIV.", + instruction->declaration.register_semantic.reg.reg.type); + } +} + +static void vsir_validate_dcl_output(struct validation_context *ctx, + const struct vkd3d_shader_instruction *instruction) +{ + switch (instruction->declaration.dst.reg.type) + { + /* Signature output registers. */ + case VKD3DSPR_OUTPUT: + case VKD3DSPR_PATCHCONST: + /* Non-signature output registers. */ + case VKD3DSPR_DEPTHOUT: + case VKD3DSPR_SAMPLEMASK: + case VKD3DSPR_DEPTHOUTGE: + case VKD3DSPR_DEPTHOUTLE: + case VKD3DSPR_OUTSTENCILREF: + break; + + default: + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, + "Invalid register type %#x in instruction DCL_OUTPUT.", + instruction->declaration.dst.reg.type); + } +} + static void vsir_validate_dcl_output_control_point_count(struct validation_context *ctx, const struct vkd3d_shader_instruction *instruction) { @@ -7772,6 +8180,22 @@ static void vsir_validate_dcl_output_control_point_count(struct validation_conte instruction->declaration.count); } +static void vsir_validate_dcl_output_siv(struct validation_context *ctx, + const struct vkd3d_shader_instruction *instruction) +{ + switch (instruction->declaration.register_semantic.reg.reg.type) + { + case VKD3DSPR_OUTPUT: + case VKD3DSPR_PATCHCONST: + break; + + default: + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, + "Invalid register type %#x in instruction DCL_OUTPUT_SIV.", + instruction->declaration.register_semantic.reg.reg.type); + } +} + static void vsir_validate_dcl_output_topology(struct validation_context *ctx, const struct vkd3d_shader_instruction *instruction) { @@ -7801,6 +8225,11 @@ static void vsir_validate_dcl_tessellator_domain(struct validation_context *ctx, || instruction->declaration.tessellator_domain >= VKD3D_TESSELLATOR_DOMAIN_COUNT) validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION, "Tessellator domain %#x is invalid.", instruction->declaration.tessellator_domain); + + if (instruction->declaration.tessellator_domain != ctx->program->tess_domain) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION, + "DCL_TESSELLATOR_DOMAIN argument %#x doesn't match the shader tessellator domain %#x.", + instruction->declaration.tessellator_domain, ctx->program->tess_domain); } static void vsir_validate_dcl_tessellator_output_primitive(struct validation_context *ctx, @@ -8063,8 +8492,16 @@ static const struct vsir_validator_instruction_desc vsir_validator_instructions[ [VKD3DSIH_HS_JOIN_PHASE] = {0, 0, vsir_validate_hull_shader_phase}, [VKD3DSIH_DCL_GS_INSTANCES] = {0, 0, vsir_validate_dcl_gs_instances}, [VKD3DSIH_DCL_HS_MAX_TESSFACTOR] = {0, 0, vsir_validate_dcl_hs_max_tessfactor}, + [VKD3DSIH_DCL_INPUT] = {0, 0, vsir_validate_dcl_input}, [VKD3DSIH_DCL_INPUT_PRIMITIVE] = {0, 0, vsir_validate_dcl_input_primitive}, + [VKD3DSIH_DCL_INPUT_PS] = {0, 0, vsir_validate_dcl_input_ps}, + [VKD3DSIH_DCL_INPUT_PS_SGV] = {0, 0, vsir_validate_dcl_input_ps_sgv}, + [VKD3DSIH_DCL_INPUT_PS_SIV] = {0, 0, vsir_validate_dcl_input_ps_siv}, + [VKD3DSIH_DCL_INPUT_SGV] = {0, 0, vsir_validate_dcl_input_sgv}, + [VKD3DSIH_DCL_INPUT_SIV] = {0, 0, vsir_validate_dcl_input_siv}, + [VKD3DSIH_DCL_OUTPUT] = {0, 0, vsir_validate_dcl_output}, [VKD3DSIH_DCL_OUTPUT_CONTROL_POINT_COUNT] = {0, 0, vsir_validate_dcl_output_control_point_count}, + [VKD3DSIH_DCL_OUTPUT_SIV] = {0, 0, vsir_validate_dcl_output_siv}, [VKD3DSIH_DCL_OUTPUT_TOPOLOGY] = {0, 0, vsir_validate_dcl_output_topology}, [VKD3DSIH_DCL_TEMPS] = {0, 0, vsir_validate_dcl_temps}, [VKD3DSIH_DCL_TESSELLATOR_DOMAIN] = {0, 0, vsir_validate_dcl_tessellator_domain}, @@ -8177,6 +8614,12 @@ enum vkd3d_result vsir_program_validate(struct vsir_program *program, uint64_t c .status = VKD3D_OK, .phase = VKD3DSIH_INVALID, .invalid_instruction_idx = true, + .outer_tess_idxs[0] = ~0u, + .outer_tess_idxs[1] = ~0u, + .outer_tess_idxs[2] = ~0u, + .outer_tess_idxs[3] = ~0u, + .inner_tess_idxs[0] = ~0u, + .inner_tess_idxs[1] = ~0u, }; unsigned int i; @@ -8187,12 +8630,20 @@ enum vkd3d_result vsir_program_validate(struct vsir_program *program, uint64_t c { case VKD3D_SHADER_TYPE_HULL: case VKD3D_SHADER_TYPE_DOMAIN: + if (program->tess_domain == VKD3D_TESSELLATOR_DOMAIN_INVALID + || program->tess_domain >= VKD3D_TESSELLATOR_DOMAIN_COUNT) + validator_error(&ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION, + "Invalid tessellation domain %#x.", program->tess_domain); break; default: if (program->patch_constant_signature.element_count != 0) validator_error(&ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, "Patch constant signature is only valid for hull and domain shaders."); + + if (program->tess_domain != VKD3D_TESSELLATOR_DOMAIN_INVALID) + validator_error(&ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION, + "Invalid tessellation domain %#x.", program->tess_domain); } switch (program->shader_version.type) diff --git a/libs/vkd3d/libs/vkd3d-shader/msl.c b/libs/vkd3d/libs/vkd3d-shader/msl.c index df3edeaa4e6..29f51088728 100644 --- a/libs/vkd3d/libs/vkd3d-shader/msl.c +++ b/libs/vkd3d/libs/vkd3d-shader/msl.c @@ -153,6 +153,64 @@ static void msl_print_register_name(struct vkd3d_string_buffer *buffer, msl_print_register_datatype(buffer, gen, reg->data_type); break; + case VKD3DSPR_IMMCONST: + switch (reg->dimension) + { + case VSIR_DIMENSION_SCALAR: + switch (reg->data_type) + { + case VKD3D_DATA_INT: + vkd3d_string_buffer_printf(buffer, "as_type(%#xu)", reg->u.immconst_u32[0]); + break; + case VKD3D_DATA_UINT: + vkd3d_string_buffer_printf(buffer, "%#xu", reg->u.immconst_u32[0]); + break; + case VKD3D_DATA_FLOAT: + vkd3d_string_buffer_printf(buffer, "as_type(%#xu)", reg->u.immconst_u32[0]); + break; + default: + msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, + "Internal compiler error: Unhandled immconst datatype %#x.", reg->data_type); + vkd3d_string_buffer_printf(buffer, "", reg->data_type); + break; + } + break; + + case VSIR_DIMENSION_VEC4: + switch (reg->data_type) + { + case VKD3D_DATA_INT: + vkd3d_string_buffer_printf(buffer, "as_type(uint4(%#xu, %#xu, %#xu, %#xu))", + reg->u.immconst_u32[0], reg->u.immconst_u32[1], + reg->u.immconst_u32[2], reg->u.immconst_u32[3]); + break; + case VKD3D_DATA_UINT: + vkd3d_string_buffer_printf(buffer, "uint4(%#xu, %#xu, %#xu, %#xu)", + reg->u.immconst_u32[0], reg->u.immconst_u32[1], + reg->u.immconst_u32[2], reg->u.immconst_u32[3]); + vkd3d_string_buffer_printf(buffer, "%#xu", reg->u.immconst_u32[0]); + break; + case VKD3D_DATA_FLOAT: + vkd3d_string_buffer_printf(buffer, "as_type(uint4(%#xu, %#xu, %#xu, %#xu))", + reg->u.immconst_u32[0], reg->u.immconst_u32[1], + reg->u.immconst_u32[2], reg->u.immconst_u32[3]); + break; + default: + msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, + "Internal compiler error: Unhandled immconst datatype %#x.", reg->data_type); + vkd3d_string_buffer_printf(buffer, "", reg->data_type); + break; + } + break; + + default: + vkd3d_string_buffer_printf(buffer, "", reg->dimension); + msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, + "Internal compiler error: Unhandled dimension %#x.", reg->dimension); + break; + } + break; + case VKD3DSPR_CONSTBUFFER: if (reg->idx_count != 3) { @@ -215,19 +273,43 @@ static void msl_src_init(struct msl_src *msl_src, struct msl_generator *gen, const struct vkd3d_shader_src_param *vsir_src, uint32_t mask) { const struct vkd3d_shader_register *reg = &vsir_src->reg; + struct vkd3d_string_buffer *str; msl_src->str = vkd3d_string_buffer_get(&gen->string_buffers); if (reg->non_uniform) msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, "Internal compiler error: Unhandled 'non-uniform' modifier."); - if (vsir_src->modifiers) - msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, - "Internal compiler error: Unhandled source modifier(s) %#x.", vsir_src->modifiers); - msl_print_register_name(msl_src->str, gen, reg); + if (!vsir_src->modifiers) + str = msl_src->str; + else + str = vkd3d_string_buffer_get(&gen->string_buffers); + + msl_print_register_name(str, gen, reg); if (reg->dimension == VSIR_DIMENSION_VEC4) - msl_print_swizzle(msl_src->str, vsir_src->swizzle, mask); + msl_print_swizzle(str, vsir_src->swizzle, mask); + + switch (vsir_src->modifiers) + { + case VKD3DSPSM_NONE: + break; + case VKD3DSPSM_NEG: + vkd3d_string_buffer_printf(msl_src->str, "-%s", str->buffer); + break; + case VKD3DSPSM_ABS: + vkd3d_string_buffer_printf(msl_src->str, "abs(%s)", str->buffer); + break; + default: + vkd3d_string_buffer_printf(msl_src->str, "(%s)", + vsir_src->modifiers, str->buffer); + msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, + "Internal compiler error: Unhandled source modifier(s) %#x.", vsir_src->modifiers); + break; + } + + if (str != msl_src->str) + vkd3d_string_buffer_release(&gen->string_buffers, str); } static void msl_dst_cleanup(struct msl_dst *dst, struct vkd3d_string_buffer_cache *cache) @@ -288,6 +370,80 @@ static void msl_unhandled(struct msl_generator *gen, const struct vkd3d_shader_i "Internal compiler error: Unhandled instruction %#x.", ins->opcode); } +static void msl_binop(struct msl_generator *gen, const struct vkd3d_shader_instruction *ins, const char *op) +{ + struct msl_src src[2]; + struct msl_dst dst; + uint32_t mask; + + mask = msl_dst_init(&dst, gen, ins, &ins->dst[0]); + msl_src_init(&src[0], gen, &ins->src[0], mask); + msl_src_init(&src[1], gen, &ins->src[1], mask); + + msl_print_assignment(gen, &dst, "%s %s %s", src[0].str->buffer, op, src[1].str->buffer); + + msl_src_cleanup(&src[1], &gen->string_buffers); + msl_src_cleanup(&src[0], &gen->string_buffers); + msl_dst_cleanup(&dst, &gen->string_buffers); +} + +static void msl_intrinsic(struct msl_generator *gen, const struct vkd3d_shader_instruction *ins, const char *op) +{ + struct msl_src src; + struct msl_dst dst; + uint32_t mask; + + mask = msl_dst_init(&dst, gen, ins, &ins->dst[0]); + msl_src_init(&src, gen, &ins->src[0], mask); + + msl_print_assignment(gen, &dst, "%s(%s)", op, src.str->buffer); + + msl_src_cleanup(&src, &gen->string_buffers); + msl_dst_cleanup(&dst, &gen->string_buffers); +} + +static void msl_relop(struct msl_generator *gen, const struct vkd3d_shader_instruction *ins, const char *op) +{ + unsigned int mask_size; + struct msl_src src[2]; + struct msl_dst dst; + uint32_t mask; + + mask = msl_dst_init(&dst, gen, ins, &ins->dst[0]); + msl_src_init(&src[0], gen, &ins->src[0], mask); + msl_src_init(&src[1], gen, &ins->src[1], mask); + + if ((mask_size = vsir_write_mask_component_count(mask)) > 1) + msl_print_assignment(gen, &dst, "select(uint%u(0u), uint%u(0xffffffffu), bool%u(%s %s %s))", + mask_size, mask_size, mask_size, src[0].str->buffer, op, src[1].str->buffer); + else + msl_print_assignment(gen, &dst, "%s %s %s ? 0xffffffffu : 0u", + src[0].str->buffer, op, src[1].str->buffer); + + msl_src_cleanup(&src[1], &gen->string_buffers); + msl_src_cleanup(&src[0], &gen->string_buffers); + msl_dst_cleanup(&dst, &gen->string_buffers); +} + +static void msl_cast(struct msl_generator *gen, const struct vkd3d_shader_instruction *ins, const char *constructor) +{ + unsigned int component_count; + struct msl_src src; + struct msl_dst dst; + uint32_t mask; + + mask = msl_dst_init(&dst, gen, ins, &ins->dst[0]); + msl_src_init(&src, gen, &ins->src[0], mask); + + if ((component_count = vsir_write_mask_component_count(mask)) > 1) + msl_print_assignment(gen, &dst, "%s%u(%s)", constructor, component_count, src.str->buffer); + else + msl_print_assignment(gen, &dst, "%s(%s)", constructor, src.str->buffer); + + msl_src_cleanup(&src, &gen->string_buffers); + msl_dst_cleanup(&dst, &gen->string_buffers); +} + static void msl_mov(struct msl_generator *gen, const struct vkd3d_shader_instruction *ins) { struct msl_src src; @@ -303,6 +459,31 @@ static void msl_mov(struct msl_generator *gen, const struct vkd3d_shader_instruc msl_dst_cleanup(&dst, &gen->string_buffers); } +static void msl_movc(struct msl_generator *gen, const struct vkd3d_shader_instruction *ins) +{ + unsigned int component_count; + struct msl_src src[3]; + struct msl_dst dst; + uint32_t mask; + + mask = msl_dst_init(&dst, gen, ins, &ins->dst[0]); + msl_src_init(&src[0], gen, &ins->src[0], mask); + msl_src_init(&src[1], gen, &ins->src[1], mask); + msl_src_init(&src[2], gen, &ins->src[2], mask); + + if ((component_count = vsir_write_mask_component_count(mask)) > 1) + msl_print_assignment(gen, &dst, "select(%s, %s, bool%u(%s))", + src[2].str->buffer, src[1].str->buffer, component_count, src[0].str->buffer); + else + msl_print_assignment(gen, &dst, "select(%s, %s, bool(%s))", + src[2].str->buffer, src[1].str->buffer, src[0].str->buffer); + + msl_src_cleanup(&src[2], &gen->string_buffers); + msl_src_cleanup(&src[1], &gen->string_buffers); + msl_src_cleanup(&src[0], &gen->string_buffers); + msl_dst_cleanup(&dst, &gen->string_buffers); +} + static void msl_ret(struct msl_generator *gen, const struct vkd3d_shader_instruction *ins) { msl_print_indent(gen->buffer, gen->indent); @@ -315,17 +496,61 @@ static void msl_handle_instruction(struct msl_generator *gen, const struct vkd3d switch (ins->opcode) { + case VKD3DSIH_ADD: + msl_binop(gen, ins, "+"); + break; + case VKD3DSIH_AND: + msl_binop(gen, ins, "&"); + break; case VKD3DSIH_DCL_INPUT: case VKD3DSIH_DCL_OUTPUT: case VKD3DSIH_DCL_OUTPUT_SIV: case VKD3DSIH_NOP: break; + case VKD3DSIH_DIV: + msl_binop(gen, ins, "/"); + break; + case VKD3DSIH_FRC: + msl_intrinsic(gen, ins, "fract"); + break; + case VKD3DSIH_FTOI: + msl_cast(gen, ins, "int"); + break; + case VKD3DSIH_FTOU: + msl_cast(gen, ins, "uint"); + break; + case VKD3DSIH_GEO: + msl_relop(gen, ins, ">="); + break; + case VKD3DSIH_INE: + case VKD3DSIH_NEU: + msl_relop(gen, ins, "!="); + break; + case VKD3DSIH_ITOF: + case VKD3DSIH_UTOF: + msl_cast(gen, ins, "float"); + break; case VKD3DSIH_MOV: msl_mov(gen, ins); break; + case VKD3DSIH_MOVC: + msl_movc(gen, ins); + break; + case VKD3DSIH_MUL: + msl_binop(gen, ins, "*"); + break; + case VKD3DSIH_OR: + msl_binop(gen, ins, "|"); + break; case VKD3DSIH_RET: msl_ret(gen, ins); break; + case VKD3DSIH_ROUND_PI: + msl_intrinsic(gen, ins, "ceil"); + break; + case VKD3DSIH_ROUND_Z: + msl_intrinsic(gen, ins, "trunc"); + break; default: msl_unhandled(gen, ins); break; @@ -790,6 +1015,8 @@ static int msl_generator_generate(struct msl_generator *gen, struct vkd3d_shader MESSAGE("Generating a MSL shader. This is unsupported; you get to keep all the pieces if it breaks.\n"); vkd3d_string_buffer_printf(gen->buffer, "/* Generated by %s. */\n\n", vkd3d_shader_get_version(NULL, NULL)); + vkd3d_string_buffer_printf(gen->buffer, "#include \n\n"); + vkd3d_string_buffer_printf(gen->buffer, "using namespace metal;\n\n"); if (gen->program->global_flags) msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, diff --git a/libs/vkd3d/libs/vkd3d-shader/preproc.l b/libs/vkd3d/libs/vkd3d-shader/preproc.l index 4a8d0fddae1..d167415c356 100644 --- a/libs/vkd3d/libs/vkd3d-shader/preproc.l +++ b/libs/vkd3d/libs/vkd3d-shader/preproc.l @@ -20,6 +20,7 @@ %{ +#include "preproc.h" #include "preproc.tab.h" #undef ERROR /* defined in wingdi.h */ diff --git a/libs/vkd3d/libs/vkd3d-shader/spirv.c b/libs/vkd3d/libs/vkd3d-shader/spirv.c index bdfd632ad12..005b40a9d1f 100644 --- a/libs/vkd3d/libs/vkd3d-shader/spirv.c +++ b/libs/vkd3d/libs/vkd3d-shader/spirv.c @@ -2471,7 +2471,6 @@ struct spirv_compiler bool emit_point_size; enum vkd3d_shader_opcode phase; - bool emit_default_control_point_phase; struct vkd3d_shader_phase control_point_phase; struct vkd3d_shader_phase patch_constant_phase; @@ -6822,15 +6821,11 @@ static void spirv_compiler_emit_dcl_gs_instances(struct spirv_compiler *compiler compiler->spirv_builder.invocation_count = instruction->declaration.count; } -static void spirv_compiler_emit_dcl_tessellator_domain(struct spirv_compiler *compiler, - const struct vkd3d_shader_instruction *instruction) +static void spirv_compiler_emit_tessellator_domain(struct spirv_compiler *compiler, + enum vkd3d_tessellator_domain domain) { - enum vkd3d_tessellator_domain domain = instruction->declaration.tessellator_domain; SpvExecutionMode mode; - if (compiler->shader_type == VKD3D_SHADER_TYPE_HULL && spirv_compiler_is_opengl_target(compiler)) - return; - switch (domain) { case VKD3D_TESSELLATOR_DOMAIN_LINE: @@ -6916,15 +6911,10 @@ static void spirv_compiler_emit_thread_group_size(struct spirv_compiler *compile SpvExecutionModeLocalSize, local_size, ARRAY_SIZE(local_size)); } -static void spirv_compiler_emit_default_control_point_phase(struct spirv_compiler *compiler); - static void spirv_compiler_leave_shader_phase(struct spirv_compiler *compiler) { struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; - if (is_in_control_point_phase(compiler) && compiler->emit_default_control_point_phase) - spirv_compiler_emit_default_control_point_phase(compiler); - vkd3d_spirv_build_op_function_end(builder); if (is_in_control_point_phase(compiler)) @@ -6969,9 +6959,6 @@ static void spirv_compiler_enter_shader_phase(struct spirv_compiler *compiler, phase->function_id = function_id; /* The insertion location must be set after the label is emitted. */ phase->function_location = 0; - - if (instruction->opcode == VKD3DSIH_HS_CONTROL_POINT_PHASE) - compiler->emit_default_control_point_phase = instruction->flags; } static void spirv_compiler_initialise_block(struct spirv_compiler *compiler) @@ -7000,63 +6987,6 @@ static void spirv_compiler_initialise_block(struct spirv_compiler *compiler) } } -static void spirv_compiler_emit_default_control_point_phase(struct spirv_compiler *compiler) -{ - const struct shader_signature *output_signature = &compiler->output_signature; - const struct shader_signature *input_signature = &compiler->input_signature; - uint32_t type_id, output_ptr_type_id, input_id, dst_id, invocation_id; - struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; - enum vkd3d_shader_component_type component_type; - struct vkd3d_shader_src_param invocation; - struct vkd3d_shader_register input_reg; - unsigned int component_count; - unsigned int i; - - vkd3d_spirv_build_op_label(builder, vkd3d_spirv_alloc_id(builder)); - spirv_compiler_initialise_block(compiler); - invocation_id = spirv_compiler_emit_load_invocation_id(compiler); - - memset(&invocation, 0, sizeof(invocation)); - vsir_register_init(&invocation.reg, VKD3DSPR_OUTPOINTID, VKD3D_DATA_INT, 0); - invocation.swizzle = VKD3D_SHADER_NO_SWIZZLE; - - vsir_register_init(&input_reg, VKD3DSPR_INPUT, VKD3D_DATA_FLOAT, 2); - input_reg.idx[0].offset = 0; - input_reg.idx[0].rel_addr = &invocation; - input_reg.idx[1].offset = 0; - input_id = spirv_compiler_get_register_id(compiler, &input_reg); - - VKD3D_ASSERT(input_signature->element_count == output_signature->element_count); - for (i = 0; i < output_signature->element_count; ++i) - { - const struct signature_element *output = &output_signature->elements[i]; - const struct signature_element *input = &input_signature->elements[i]; - struct vkd3d_shader_register_info output_reg_info; - struct vkd3d_shader_register output_reg; - - VKD3D_ASSERT(input->mask == output->mask); - VKD3D_ASSERT(input->component_type == output->component_type); - - input_reg.idx[1].offset = i; - input_id = spirv_compiler_get_register_id(compiler, &input_reg); - - vsir_register_init(&output_reg, VKD3DSPR_OUTPUT, VKD3D_DATA_FLOAT, 1); - output_reg.idx[0].offset = i; - spirv_compiler_get_register_info(compiler, &output_reg, &output_reg_info); - - component_type = output->component_type; - component_count = vsir_write_mask_component_count(output->mask); - type_id = vkd3d_spirv_get_type_id(builder, component_type, component_count); - output_ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, SpvStorageClassOutput, type_id); - - dst_id = vkd3d_spirv_build_op_access_chain1(builder, output_ptr_type_id, output_reg_info.id, invocation_id); - - vkd3d_spirv_build_op_copy_memory(builder, dst_id, input_id, SpvMemoryAccessMaskNone); - } - - vkd3d_spirv_build_op_return(builder); -} - static void spirv_compiler_emit_barrier(struct spirv_compiler *compiler, SpvScope execution_scope, SpvScope memory_scope, SpvMemorySemanticsMask semantics) { @@ -10239,9 +10169,6 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, case VKD3DSIH_DCL_OUTPUT_CONTROL_POINT_COUNT: spirv_compiler_emit_output_vertex_count(compiler, instruction); break; - case VKD3DSIH_DCL_TESSELLATOR_DOMAIN: - spirv_compiler_emit_dcl_tessellator_domain(compiler, instruction); - break; case VKD3DSIH_DCL_TESSELLATOR_OUTPUT_PRIMITIVE: spirv_compiler_emit_tessellator_output_primitive(compiler, instruction->declaration.tessellator_output_primitive); @@ -10743,6 +10670,9 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, struct compiler->input_control_point_count = program->input_control_point_count; compiler->output_control_point_count = program->output_control_point_count; + if (program->shader_version.type == VKD3D_SHADER_TYPE_HULL && !spirv_compiler_is_opengl_target(compiler)) + spirv_compiler_emit_tessellator_domain(compiler, program->tess_domain); + if (compiler->shader_type != VKD3D_SHADER_TYPE_HULL) spirv_compiler_emit_shader_signature_outputs(compiler); diff --git a/libs/vkd3d/libs/vkd3d-shader/tpf.c b/libs/vkd3d/libs/vkd3d-shader/tpf.c index 9c41e2c2053..409328b2e53 100644 --- a/libs/vkd3d/libs/vkd3d-shader/tpf.c +++ b/libs/vkd3d/libs/vkd3d-shader/tpf.c @@ -707,6 +707,7 @@ struct vkd3d_sm4_opcode_info char src_info[SM4_MAX_SRC_COUNT]; void (*read_opcode_func)(struct vkd3d_shader_instruction *ins, uint32_t opcode, uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv); + bool is_conditional_op; }; static const enum vkd3d_primitive_type output_primitive_type_table[] = @@ -1268,6 +1269,7 @@ static void shader_sm5_read_dcl_tessellator_domain(struct vkd3d_shader_instructi { ins->declaration.tessellator_domain = (opcode_token & VKD3D_SM5_TESSELLATOR_MASK) >> VKD3D_SM5_TESSELLATOR_SHIFT; + priv->p.program->tess_domain = ins->declaration.tessellator_domain; } static void shader_sm5_read_dcl_tessellator_partitioning(struct vkd3d_shader_instruction *ins, uint32_t opcode, @@ -1439,18 +1441,18 @@ static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup) {VKD3D_SM4_OP_AND, VKD3DSIH_AND, "u", "uu"}, {VKD3D_SM4_OP_BREAK, VKD3DSIH_BREAK, "", ""}, {VKD3D_SM4_OP_BREAKC, VKD3DSIH_BREAKP, "", "u", - shader_sm4_read_conditional_op}, + shader_sm4_read_conditional_op, true}, {VKD3D_SM4_OP_CASE, VKD3DSIH_CASE, "", "u", shader_sm4_read_case_condition}, {VKD3D_SM4_OP_CONTINUE, VKD3DSIH_CONTINUE, "", ""}, {VKD3D_SM4_OP_CONTINUEC, VKD3DSIH_CONTINUEP, "", "u", - shader_sm4_read_conditional_op}, + shader_sm4_read_conditional_op, true}, {VKD3D_SM4_OP_CUT, VKD3DSIH_CUT, "", ""}, {VKD3D_SM4_OP_DEFAULT, VKD3DSIH_DEFAULT, "", ""}, {VKD3D_SM4_OP_DERIV_RTX, VKD3DSIH_DSX, "f", "f"}, {VKD3D_SM4_OP_DERIV_RTY, VKD3DSIH_DSY, "f", "f"}, {VKD3D_SM4_OP_DISCARD, VKD3DSIH_DISCARD, "", "u", - shader_sm4_read_conditional_op}, + shader_sm4_read_conditional_op, true}, {VKD3D_SM4_OP_DIV, VKD3DSIH_DIV, "f", "ff"}, {VKD3D_SM4_OP_DP2, VKD3DSIH_DP2, "f", "ff"}, {VKD3D_SM4_OP_DP3, VKD3DSIH_DP3, "f", "ff"}, @@ -1468,7 +1470,7 @@ static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup) {VKD3D_SM4_OP_GE, VKD3DSIH_GEO, "u", "ff"}, {VKD3D_SM4_OP_IADD, VKD3DSIH_IADD, "i", "ii"}, {VKD3D_SM4_OP_IF, VKD3DSIH_IF, "", "u", - shader_sm4_read_conditional_op}, + shader_sm4_read_conditional_op, true}, {VKD3D_SM4_OP_IEQ, VKD3DSIH_IEQ, "u", "ii"}, {VKD3D_SM4_OP_IGE, VKD3DSIH_IGE, "u", "ii"}, {VKD3D_SM4_OP_ILT, VKD3DSIH_ILT, "u", "ii"}, @@ -1502,7 +1504,7 @@ static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup) {VKD3D_SM4_OP_RESINFO, VKD3DSIH_RESINFO, "f", "i*"}, {VKD3D_SM4_OP_RET, VKD3DSIH_RET, "", ""}, {VKD3D_SM4_OP_RETC, VKD3DSIH_RETP, "", "u", - shader_sm4_read_conditional_op}, + shader_sm4_read_conditional_op, true}, {VKD3D_SM4_OP_ROUND_NE, VKD3DSIH_ROUND_NE, "f", "f"}, {VKD3D_SM4_OP_ROUND_NI, VKD3DSIH_ROUND_NI, "f", "f"}, {VKD3D_SM4_OP_ROUND_PI, VKD3DSIH_ROUND_PI, "f", "f"}, @@ -1967,16 +1969,6 @@ static const struct vkd3d_sm4_register_type_info *get_info_from_vkd3d_register_t return lookup->register_type_info_from_vkd3d[vkd3d_type]; } -static enum vkd3d_sm4_swizzle_type vkd3d_sm4_get_default_swizzle_type( - const struct vkd3d_sm4_lookup_tables *lookup, enum vkd3d_shader_register_type vkd3d_type) -{ - const struct vkd3d_sm4_register_type_info *register_type_info = - get_info_from_vkd3d_register_type(lookup, vkd3d_type); - - VKD3D_ASSERT(register_type_info); - return register_type_info->default_src_swizzle_type; -} - static enum vkd3d_sm4_stat_field get_stat_field_from_sm4_opcode( const struct vkd3d_sm4_lookup_tables *lookup, enum vkd3d_sm4_opcode sm4_opcode) { @@ -2993,8 +2985,6 @@ int tpf_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t con return VKD3D_OK; } -static void write_sm4_block(struct tpf_compiler *tpf, const struct hlsl_block *block); - bool sm4_register_from_semantic_name(const struct vkd3d_shader_version *version, const char *semantic_name, bool output, enum vkd3d_shader_register_type *type, bool *has_idx) { @@ -3331,6 +3321,7 @@ static D3D_SHADER_VARIABLE_CLASS sm4_class(const struct hlsl_type *type) case HLSL_CLASS_HULL_SHADER: case HLSL_CLASS_GEOMETRY_SHADER: case HLSL_CLASS_BLEND_STATE: + case HLSL_CLASS_STREAM_OUTPUT: case HLSL_CLASS_NULL: break; } @@ -4096,297 +4087,6 @@ struct sm4_instruction unsigned int idx_src_count; }; -static void sm4_register_from_node(struct vkd3d_shader_register *reg, uint32_t *writemask, - const struct hlsl_ir_node *instr) -{ - VKD3D_ASSERT(instr->reg.allocated); - reg->type = VKD3DSPR_TEMP; - reg->dimension = VSIR_DIMENSION_VEC4; - reg->idx[0].offset = instr->reg.id; - reg->idx_count = 1; - *writemask = instr->reg.writemask; -} - -static void sm4_numeric_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_register *reg, - enum vkd3d_shader_register_type type, uint32_t *writemask, const struct hlsl_deref *deref, - struct sm4_instruction *sm4_instr) -{ - const struct hlsl_ir_var *var = deref->var; - unsigned int offset_const_deref; - - reg->type = type; - reg->idx[0].offset = var->regs[HLSL_REGSET_NUMERIC].id; - reg->dimension = VSIR_DIMENSION_VEC4; - - VKD3D_ASSERT(var->regs[HLSL_REGSET_NUMERIC].allocated); - - if (!var->indexable) - { - offset_const_deref = hlsl_offset_from_deref_safe(ctx, deref); - reg->idx[0].offset += offset_const_deref / 4; - reg->idx_count = 1; - } - else - { - offset_const_deref = deref->const_offset; - reg->idx[1].offset = offset_const_deref / 4; - reg->idx_count = 2; - - if (deref->rel_offset.node) - { - struct vkd3d_shader_src_param *idx_src; - unsigned int idx_writemask; - - VKD3D_ASSERT(sm4_instr->idx_src_count < ARRAY_SIZE(sm4_instr->idx_srcs)); - idx_src = &sm4_instr->idx_srcs[sm4_instr->idx_src_count++]; - memset(idx_src, 0, sizeof(*idx_src)); - - reg->idx[1].rel_addr = idx_src; - sm4_register_from_node(&idx_src->reg, &idx_writemask, deref->rel_offset.node); - VKD3D_ASSERT(idx_writemask != 0); - idx_src->swizzle = swizzle_from_sm4(hlsl_swizzle_from_writemask(idx_writemask)); - } - } - - *writemask = 0xf & (0xf << (offset_const_deref % 4)); - if (var->regs[HLSL_REGSET_NUMERIC].writemask) - *writemask = hlsl_combine_writemasks(var->regs[HLSL_REGSET_NUMERIC].writemask, *writemask); -} - -static void sm4_register_from_deref(const struct tpf_compiler *tpf, struct vkd3d_shader_register *reg, - uint32_t *writemask, const struct hlsl_deref *deref, struct sm4_instruction *sm4_instr) -{ - const struct vkd3d_shader_version *version = &tpf->program->shader_version; - const struct hlsl_type *data_type = hlsl_deref_get_type(tpf->ctx, deref); - const struct hlsl_ir_var *var = deref->var; - struct hlsl_ctx *ctx = tpf->ctx; - - if (var->is_uniform) - { - enum hlsl_regset regset = hlsl_deref_get_regset(ctx, deref); - - if (regset == HLSL_REGSET_TEXTURES) - { - reg->type = VKD3DSPR_RESOURCE; - reg->dimension = VSIR_DIMENSION_VEC4; - if (vkd3d_shader_ver_ge(version, 5, 1)) - { - reg->idx[0].offset = var->regs[HLSL_REGSET_TEXTURES].id; - reg->idx[1].offset = var->regs[HLSL_REGSET_TEXTURES].index; /* FIXME: array index */ - reg->idx_count = 2; - } - else - { - reg->idx[0].offset = var->regs[HLSL_REGSET_TEXTURES].index; - reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref); - reg->idx_count = 1; - } - VKD3D_ASSERT(regset == HLSL_REGSET_TEXTURES); - *writemask = VKD3DSP_WRITEMASK_ALL; - } - else if (regset == HLSL_REGSET_UAVS) - { - reg->type = VKD3DSPR_UAV; - reg->dimension = VSIR_DIMENSION_VEC4; - if (vkd3d_shader_ver_ge(version, 5, 1)) - { - reg->idx[0].offset = var->regs[HLSL_REGSET_UAVS].id; - reg->idx[1].offset = var->regs[HLSL_REGSET_UAVS].index; /* FIXME: array index */ - reg->idx_count = 2; - } - else - { - reg->idx[0].offset = var->regs[HLSL_REGSET_UAVS].index; - reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref); - reg->idx_count = 1; - } - VKD3D_ASSERT(regset == HLSL_REGSET_UAVS); - *writemask = VKD3DSP_WRITEMASK_ALL; - } - else if (regset == HLSL_REGSET_SAMPLERS) - { - reg->type = VKD3DSPR_SAMPLER; - reg->dimension = VSIR_DIMENSION_NONE; - if (vkd3d_shader_ver_ge(version, 5, 1)) - { - reg->idx[0].offset = var->regs[HLSL_REGSET_SAMPLERS].id; - reg->idx[1].offset = var->regs[HLSL_REGSET_SAMPLERS].index; /* FIXME: array index */ - reg->idx_count = 2; - } - else - { - reg->idx[0].offset = var->regs[HLSL_REGSET_SAMPLERS].index; - reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref); - reg->idx_count = 1; - } - VKD3D_ASSERT(regset == HLSL_REGSET_SAMPLERS); - *writemask = VKD3DSP_WRITEMASK_ALL; - } - else - { - unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref) + var->buffer_offset; - - VKD3D_ASSERT(data_type->class <= HLSL_CLASS_VECTOR); - reg->type = VKD3DSPR_CONSTBUFFER; - reg->dimension = VSIR_DIMENSION_VEC4; - if (vkd3d_shader_ver_ge(version, 5, 1)) - { - reg->idx[0].offset = var->buffer->reg.id; - reg->idx[1].offset = var->buffer->reg.index; /* FIXME: array index */ - reg->idx[2].offset = offset / 4; - reg->idx_count = 3; - } - else - { - reg->idx[0].offset = var->buffer->reg.index; - reg->idx[1].offset = offset / 4; - reg->idx_count = 2; - } - *writemask = ((1u << data_type->dimx) - 1) << (offset & 3); - } - } - else if (var->is_input_semantic) - { - bool has_idx; - - if (sm4_register_from_semantic_name(version, var->semantic.name, false, ®->type, &has_idx)) - { - unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref); - - if (has_idx) - { - reg->idx[0].offset = var->semantic.index + offset / 4; - reg->idx_count = 1; - } - - if (shader_sm4_is_scalar_register(reg)) - reg->dimension = VSIR_DIMENSION_SCALAR; - else - reg->dimension = VSIR_DIMENSION_VEC4; - *writemask = ((1u << data_type->dimx) - 1) << (offset % 4); - } - else - { - struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref); - - VKD3D_ASSERT(hlsl_reg.allocated); - - if (version->type == VKD3D_SHADER_TYPE_DOMAIN) - reg->type = VKD3DSPR_PATCHCONST; - else - reg->type = VKD3DSPR_INPUT; - reg->dimension = VSIR_DIMENSION_VEC4; - reg->idx[0].offset = hlsl_reg.id; - reg->idx_count = 1; - *writemask = hlsl_reg.writemask; - } - } - else if (var->is_output_semantic) - { - bool has_idx; - - if (sm4_register_from_semantic_name(version, var->semantic.name, true, ®->type, &has_idx)) - { - unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref); - - if (has_idx) - { - reg->idx[0].offset = var->semantic.index + offset / 4; - reg->idx_count = 1; - } - - if (shader_sm4_is_scalar_register(reg)) - reg->dimension = VSIR_DIMENSION_SCALAR; - else - reg->dimension = VSIR_DIMENSION_VEC4; - *writemask = ((1u << data_type->dimx) - 1) << (offset % 4); - } - else - { - struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref); - - VKD3D_ASSERT(hlsl_reg.allocated); - reg->type = VKD3DSPR_OUTPUT; - reg->dimension = VSIR_DIMENSION_VEC4; - reg->idx[0].offset = hlsl_reg.id; - reg->idx_count = 1; - *writemask = hlsl_reg.writemask; - } - } - else - { - enum vkd3d_shader_register_type type = deref->var->indexable ? VKD3DSPR_IDXTEMP : VKD3DSPR_TEMP; - - sm4_numeric_register_from_deref(ctx, reg, type, writemask, deref, sm4_instr); - } -} - -static void sm4_src_from_deref(const struct tpf_compiler *tpf, struct vkd3d_shader_src_param *src, - const struct hlsl_deref *deref, unsigned int map_writemask, struct sm4_instruction *sm4_instr) -{ - unsigned int hlsl_swizzle; - uint32_t writemask; - - sm4_register_from_deref(tpf, &src->reg, &writemask, deref, sm4_instr); - if (vkd3d_sm4_get_default_swizzle_type(&tpf->lookup, src->reg.type) == VKD3D_SM4_SWIZZLE_VEC4) - { - hlsl_swizzle = hlsl_map_swizzle(hlsl_swizzle_from_writemask(writemask), map_writemask); - src->swizzle = swizzle_from_sm4(hlsl_swizzle); - } -} - -static void sm4_dst_from_node(struct vkd3d_shader_dst_param *dst, const struct hlsl_ir_node *instr) -{ - sm4_register_from_node(&dst->reg, &dst->write_mask, instr); -} - -static void sm4_src_from_constant_value(struct vkd3d_shader_src_param *src, - const struct hlsl_constant_value *value, unsigned int width, unsigned int map_writemask) -{ - src->swizzle = 0; - src->reg.type = VKD3DSPR_IMMCONST; - if (width == 1) - { - src->reg.dimension = VSIR_DIMENSION_SCALAR; - src->reg.u.immconst_u32[0] = value->u[0].u; - } - else - { - unsigned int i, j = 0; - - src->reg.dimension = VSIR_DIMENSION_VEC4; - for (i = 0; i < 4; ++i) - { - if ((map_writemask & (1u << i)) && (j < width)) - src->reg.u.immconst_u32[i] = value->u[j++].u; - else - src->reg.u.immconst_u32[i] = 0; - } - } -} - -static void sm4_src_from_node(const struct tpf_compiler *tpf, struct vkd3d_shader_src_param *src, - const struct hlsl_ir_node *instr, uint32_t map_writemask) -{ - unsigned int hlsl_swizzle; - uint32_t writemask; - - if (instr->type == HLSL_IR_CONSTANT) - { - struct hlsl_ir_constant *constant = hlsl_ir_constant(instr); - - sm4_src_from_constant_value(src, &constant->value, instr->data_type->dimx, map_writemask); - return; - } - - sm4_register_from_node(&src->reg, &writemask, instr); - if (vkd3d_sm4_get_default_swizzle_type(&tpf->lookup, src->reg.type) == VKD3D_SM4_SWIZZLE_VEC4) - { - hlsl_swizzle = hlsl_map_swizzle(hlsl_swizzle_from_writemask(writemask), map_writemask); - src->swizzle = swizzle_from_sm4(hlsl_swizzle); - } -} - static unsigned int sm4_get_index_addressing_from_reg(const struct vkd3d_shader_register *reg, unsigned int i) { @@ -4650,33 +4350,6 @@ static void write_sm4_instruction(const struct tpf_compiler *tpf, const struct s sm4_update_stat_counters(tpf, instr); } -static bool encode_texel_offset_as_aoffimmi(struct sm4_instruction *instr, - const struct hlsl_ir_node *texel_offset) -{ - struct sm4_instruction_modifier modif; - struct hlsl_ir_constant *offset; - - if (!texel_offset || texel_offset->type != HLSL_IR_CONSTANT) - return false; - offset = hlsl_ir_constant(texel_offset); - - modif.type = VKD3D_SM4_MODIFIER_AOFFIMMI; - modif.u.aoffimmi.u = offset->value.u[0].i; - modif.u.aoffimmi.v = 0; - modif.u.aoffimmi.w = 0; - if (offset->node.data_type->dimx > 1) - modif.u.aoffimmi.v = offset->value.u[1].i; - if (offset->node.data_type->dimx > 2) - modif.u.aoffimmi.w = offset->value.u[2].i; - if (modif.u.aoffimmi.u < -8 || modif.u.aoffimmi.u > 7 - || modif.u.aoffimmi.v < -8 || modif.u.aoffimmi.v > 7 - || modif.u.aoffimmi.w < -8 || modif.u.aoffimmi.w > 7) - return false; - - instr->modifiers[instr->modifier_count++] = modif; - return true; -} - static void write_sm4_dcl_constant_buffer(const struct tpf_compiler *tpf, const struct hlsl_buffer *cbuffer) { size_t size = (cbuffer->used_size + 3) / 4; @@ -4945,26 +4618,6 @@ static void tpf_write_hs_decls(const struct tpf_compiler *tpf) write_sm4_instruction(tpf, &instr); } -static void tpf_write_hs_control_point_phase(const struct tpf_compiler *tpf) -{ - struct sm4_instruction instr = - { - .opcode = VKD3D_SM5_OP_HS_CONTROL_POINT_PHASE, - }; - - write_sm4_instruction(tpf, &instr); -} - -static void tpf_write_hs_fork_phase(const struct tpf_compiler *tpf) -{ - struct sm4_instruction instr = - { - .opcode = VKD3D_SM5_OP_HS_FORK_PHASE, - }; - - write_sm4_instruction(tpf, &instr); -} - static void tpf_write_dcl_input_control_point_count(const struct tpf_compiler *tpf, const uint32_t count) { struct sm4_instruction instr = @@ -5022,594 +4675,9 @@ static void tpf_write_dcl_tessellator_output_primitive(const struct tpf_compiler write_sm4_instruction(tpf, &instr); } -static void write_sm4_ret(const struct tpf_compiler *tpf) -{ - struct sm4_instruction instr = - { - .opcode = VKD3D_SM4_OP_RET, - }; - - write_sm4_instruction(tpf, &instr); -} - -static void write_sm4_ld(const struct tpf_compiler *tpf, const struct hlsl_ir_node *dst, - const struct hlsl_deref *resource, const struct hlsl_ir_node *coords, - const struct hlsl_ir_node *sample_index, const struct hlsl_ir_node *texel_offset, - enum hlsl_sampler_dim dim) -{ - const struct hlsl_type *resource_type = hlsl_deref_get_type(tpf->ctx, resource); - bool multisampled = resource_type->class == HLSL_CLASS_TEXTURE - && (resource_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS || resource_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY); - bool uav = (hlsl_deref_get_regset(tpf->ctx, resource) == HLSL_REGSET_UAVS); - const struct vkd3d_shader_version *version = &tpf->program->shader_version; - bool raw = resource_type->sampler_dim == HLSL_SAMPLER_DIM_RAW_BUFFER; - unsigned int coords_writemask = VKD3DSP_WRITEMASK_ALL; - struct sm4_instruction instr; - - memset(&instr, 0, sizeof(instr)); - if (uav) - instr.opcode = VKD3D_SM5_OP_LD_UAV_TYPED; - else if (raw) - instr.opcode = VKD3D_SM5_OP_LD_RAW; - else - instr.opcode = multisampled ? VKD3D_SM4_OP_LD2DMS : VKD3D_SM4_OP_LD; - - if (texel_offset) - { - if (!encode_texel_offset_as_aoffimmi(&instr, texel_offset)) - { - hlsl_error(tpf->ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, - "Offset must resolve to integer literal in the range -8 to 7."); - return; - } - } - - sm4_dst_from_node(&instr.dsts[0], dst); - instr.dst_count = 1; - - if (!uav) - { - /* Mipmap level is in the last component in the IR, but needs to be in the W - * component in the instruction. */ - unsigned int dim_count = hlsl_sampler_dim_count(dim); - - if (dim_count == 1) - coords_writemask = VKD3DSP_WRITEMASK_0 | VKD3DSP_WRITEMASK_3; - if (dim_count == 2) - coords_writemask = VKD3DSP_WRITEMASK_0 | VKD3DSP_WRITEMASK_1 | VKD3DSP_WRITEMASK_3; - } - - sm4_src_from_node(tpf, &instr.srcs[0], coords, coords_writemask); - - sm4_src_from_deref(tpf, &instr.srcs[1], resource, instr.dsts[0].write_mask, &instr); - - instr.src_count = 2; - - if (multisampled) - { - if (sample_index->type == HLSL_IR_CONSTANT) - { - struct vkd3d_shader_register *reg = &instr.srcs[2].reg; - struct hlsl_ir_constant *index; - - index = hlsl_ir_constant(sample_index); - - memset(&instr.srcs[2], 0, sizeof(instr.srcs[2])); - reg->type = VKD3DSPR_IMMCONST; - reg->dimension = VSIR_DIMENSION_SCALAR; - reg->u.immconst_u32[0] = index->value.u[0].u; - } - else if (version->major == 4 && version->minor == 0) - { - hlsl_error(tpf->ctx, &sample_index->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Expected literal sample index."); - } - else - { - sm4_src_from_node(tpf, &instr.srcs[2], sample_index, 0); - } - - ++instr.src_count; - } - - write_sm4_instruction(tpf, &instr); -} - -static void write_sm4_sample(const struct tpf_compiler *tpf, const struct hlsl_ir_resource_load *load) -{ - const struct hlsl_ir_node *texel_offset = load->texel_offset.node; - const struct hlsl_ir_node *coords = load->coords.node; - const struct hlsl_deref *resource = &load->resource; - const struct hlsl_deref *sampler = &load->sampler; - const struct hlsl_ir_node *dst = &load->node; - struct sm4_instruction instr; - - memset(&instr, 0, sizeof(instr)); - switch (load->load_type) - { - case HLSL_RESOURCE_SAMPLE: - instr.opcode = VKD3D_SM4_OP_SAMPLE; - break; - - case HLSL_RESOURCE_SAMPLE_CMP: - instr.opcode = VKD3D_SM4_OP_SAMPLE_C; - break; - - case HLSL_RESOURCE_SAMPLE_CMP_LZ: - instr.opcode = VKD3D_SM4_OP_SAMPLE_C_LZ; - break; - - case HLSL_RESOURCE_SAMPLE_LOD: - instr.opcode = VKD3D_SM4_OP_SAMPLE_LOD; - break; - - case HLSL_RESOURCE_SAMPLE_LOD_BIAS: - instr.opcode = VKD3D_SM4_OP_SAMPLE_B; - break; - - case HLSL_RESOURCE_SAMPLE_GRAD: - instr.opcode = VKD3D_SM4_OP_SAMPLE_GRAD; - break; - - default: - vkd3d_unreachable(); - } - - if (texel_offset) - { - if (!encode_texel_offset_as_aoffimmi(&instr, texel_offset)) - { - hlsl_error(tpf->ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, - "Offset must resolve to integer literal in the range -8 to 7."); - return; - } - } - - sm4_dst_from_node(&instr.dsts[0], dst); - instr.dst_count = 1; - - sm4_src_from_node(tpf, &instr.srcs[0], coords, VKD3DSP_WRITEMASK_ALL); - sm4_src_from_deref(tpf, &instr.srcs[1], resource, instr.dsts[0].write_mask, &instr); - sm4_src_from_deref(tpf, &instr.srcs[2], sampler, VKD3DSP_WRITEMASK_ALL, &instr); - instr.src_count = 3; - - if (load->load_type == HLSL_RESOURCE_SAMPLE_LOD - || load->load_type == HLSL_RESOURCE_SAMPLE_LOD_BIAS) - { - sm4_src_from_node(tpf, &instr.srcs[3], load->lod.node, VKD3DSP_WRITEMASK_ALL); - ++instr.src_count; - } - else if (load->load_type == HLSL_RESOURCE_SAMPLE_GRAD) - { - sm4_src_from_node(tpf, &instr.srcs[3], load->ddx.node, VKD3DSP_WRITEMASK_ALL); - sm4_src_from_node(tpf, &instr.srcs[4], load->ddy.node, VKD3DSP_WRITEMASK_ALL); - instr.src_count += 2; - } - else if (load->load_type == HLSL_RESOURCE_SAMPLE_CMP - || load->load_type == HLSL_RESOURCE_SAMPLE_CMP_LZ) - { - sm4_src_from_node(tpf, &instr.srcs[3], load->cmp.node, VKD3DSP_WRITEMASK_ALL); - ++instr.src_count; - } - - write_sm4_instruction(tpf, &instr); -} - -static void write_sm4_sampleinfo(const struct tpf_compiler *tpf, const struct hlsl_ir_resource_load *load) -{ - const struct hlsl_deref *resource = &load->resource; - const struct hlsl_ir_node *dst = &load->node; - struct sm4_instruction instr; - - VKD3D_ASSERT(dst->data_type->e.numeric.type == HLSL_TYPE_UINT || dst->data_type->e.numeric.type == HLSL_TYPE_FLOAT); - - memset(&instr, 0, sizeof(instr)); - instr.opcode = VKD3D_SM4_OP_SAMPLE_INFO; - if (dst->data_type->e.numeric.type == HLSL_TYPE_UINT) - instr.extra_bits |= VKD3DSI_SAMPLE_INFO_UINT << VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT; - - sm4_dst_from_node(&instr.dsts[0], dst); - instr.dst_count = 1; - - sm4_src_from_deref(tpf, &instr.srcs[0], resource, instr.dsts[0].write_mask, &instr); - instr.src_count = 1; - - write_sm4_instruction(tpf, &instr); -} - -static void write_sm4_resinfo(const struct tpf_compiler *tpf, const struct hlsl_ir_resource_load *load) -{ - const struct hlsl_deref *resource = &load->resource; - const struct hlsl_ir_node *dst = &load->node; - struct sm4_instruction instr; - - if (resource->data_type->sampler_dim == HLSL_SAMPLER_DIM_BUFFER - || resource->data_type->sampler_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER) - { - hlsl_fixme(tpf->ctx, &load->node.loc, "resinfo for buffers."); - return; - } - - VKD3D_ASSERT(dst->data_type->e.numeric.type == HLSL_TYPE_UINT || dst->data_type->e.numeric.type == HLSL_TYPE_FLOAT); - - memset(&instr, 0, sizeof(instr)); - instr.opcode = VKD3D_SM4_OP_RESINFO; - if (dst->data_type->e.numeric.type == HLSL_TYPE_UINT) - instr.extra_bits |= VKD3DSI_RESINFO_UINT << VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT; - - sm4_dst_from_node(&instr.dsts[0], dst); - instr.dst_count = 1; - - sm4_src_from_node(tpf, &instr.srcs[0], load->lod.node, VKD3DSP_WRITEMASK_ALL); - sm4_src_from_deref(tpf, &instr.srcs[1], resource, instr.dsts[0].write_mask, &instr); - instr.src_count = 2; - - write_sm4_instruction(tpf, &instr); -} - -static void write_sm4_if(struct tpf_compiler *tpf, const struct hlsl_ir_if *iff) -{ - struct sm4_instruction instr = - { - .opcode = VKD3D_SM4_OP_IF, - .extra_bits = VKD3D_SM4_CONDITIONAL_NZ, - .src_count = 1, - }; - - VKD3D_ASSERT(iff->condition.node->data_type->dimx == 1); - - sm4_src_from_node(tpf, &instr.srcs[0], iff->condition.node, VKD3DSP_WRITEMASK_ALL); - write_sm4_instruction(tpf, &instr); - - write_sm4_block(tpf, &iff->then_block); - - if (!list_empty(&iff->else_block.instrs)) - { - instr.opcode = VKD3D_SM4_OP_ELSE; - instr.src_count = 0; - write_sm4_instruction(tpf, &instr); - - write_sm4_block(tpf, &iff->else_block); - } - - instr.opcode = VKD3D_SM4_OP_ENDIF; - instr.src_count = 0; - write_sm4_instruction(tpf, &instr); -} - -static void write_sm4_jump(const struct tpf_compiler *tpf, const struct hlsl_ir_jump *jump) -{ - struct sm4_instruction instr = {0}; - - switch (jump->type) - { - case HLSL_IR_JUMP_BREAK: - instr.opcode = VKD3D_SM4_OP_BREAK; - break; - - case HLSL_IR_JUMP_CONTINUE: - instr.opcode = VKD3D_SM4_OP_CONTINUE; - break; - - case HLSL_IR_JUMP_DISCARD_NZ: - { - instr.opcode = VKD3D_SM4_OP_DISCARD; - instr.extra_bits = VKD3D_SM4_CONDITIONAL_NZ; - - memset(&instr.srcs[0], 0, sizeof(*instr.srcs)); - instr.src_count = 1; - sm4_src_from_node(tpf, &instr.srcs[0], jump->condition.node, VKD3DSP_WRITEMASK_ALL); - break; - } - - case HLSL_IR_JUMP_RETURN: - vkd3d_unreachable(); - - default: - hlsl_fixme(tpf->ctx, &jump->node.loc, "Jump type %s.", hlsl_jump_type_to_string(jump->type)); - return; - } - - write_sm4_instruction(tpf, &instr); -} - -/* Does this variable's data come directly from the API user, rather than being - * temporary or from a previous shader stage? - * I.e. is it a uniform or VS input? */ -static bool var_is_user_input(const struct vkd3d_shader_version *version, const struct hlsl_ir_var *var) -{ - if (var->is_uniform) - return true; - - return var->is_input_semantic && version->type == VKD3D_SHADER_TYPE_VERTEX; -} - -static void write_sm4_load(const struct tpf_compiler *tpf, const struct hlsl_ir_load *load) -{ - const struct vkd3d_shader_version *version = &tpf->program->shader_version; - const struct hlsl_type *type = load->node.data_type; - struct sm4_instruction instr; - - memset(&instr, 0, sizeof(instr)); - - sm4_dst_from_node(&instr.dsts[0], &load->node); - instr.dst_count = 1; - - VKD3D_ASSERT(hlsl_is_numeric_type(type)); - if (type->e.numeric.type == HLSL_TYPE_BOOL && var_is_user_input(version, load->src.var)) - { - struct hlsl_constant_value value; - - /* Uniform bools can be specified as anything, but internal bools always - * have 0 for false and ~0 for true. Normalize that here. */ - - instr.opcode = VKD3D_SM4_OP_MOVC; - - sm4_src_from_deref(tpf, &instr.srcs[0], &load->src, instr.dsts[0].write_mask, &instr); - - memset(&value, 0xff, sizeof(value)); - sm4_src_from_constant_value(&instr.srcs[1], &value, type->dimx, instr.dsts[0].write_mask); - memset(&value, 0, sizeof(value)); - sm4_src_from_constant_value(&instr.srcs[2], &value, type->dimx, instr.dsts[0].write_mask); - instr.src_count = 3; - } - else - { - instr.opcode = VKD3D_SM4_OP_MOV; - - sm4_src_from_deref(tpf, &instr.srcs[0], &load->src, instr.dsts[0].write_mask, &instr); - instr.src_count = 1; - } - - write_sm4_instruction(tpf, &instr); -} - -static void write_sm4_loop(struct tpf_compiler *tpf, const struct hlsl_ir_loop *loop) -{ - struct sm4_instruction instr = - { - .opcode = VKD3D_SM4_OP_LOOP, - }; - - write_sm4_instruction(tpf, &instr); - - write_sm4_block(tpf, &loop->body); - - instr.opcode = VKD3D_SM4_OP_ENDLOOP; - write_sm4_instruction(tpf, &instr); -} - -static void write_sm4_gather(const struct tpf_compiler *tpf, const struct hlsl_ir_node *dst, - const struct hlsl_deref *resource, const struct hlsl_deref *sampler, - const struct hlsl_ir_node *coords, uint32_t swizzle, const struct hlsl_ir_node *texel_offset) -{ - const struct vkd3d_shader_version *version = &tpf->program->shader_version; - struct vkd3d_shader_src_param *src; - struct sm4_instruction instr; - - memset(&instr, 0, sizeof(instr)); - - instr.opcode = VKD3D_SM4_OP_GATHER4; - - sm4_dst_from_node(&instr.dsts[0], dst); - instr.dst_count = 1; - - sm4_src_from_node(tpf, &instr.srcs[instr.src_count++], coords, VKD3DSP_WRITEMASK_ALL); - - if (texel_offset) - { - if (!encode_texel_offset_as_aoffimmi(&instr, texel_offset)) - { - if (!vkd3d_shader_ver_ge(version, 5, 0)) - { - hlsl_error(tpf->ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, - "Offset must resolve to integer literal in the range -8 to 7 for profiles < 5."); - return; - } - instr.opcode = VKD3D_SM5_OP_GATHER4_PO; - sm4_src_from_node(tpf, &instr.srcs[instr.src_count++], texel_offset, VKD3DSP_WRITEMASK_ALL); - } - } - - sm4_src_from_deref(tpf, &instr.srcs[instr.src_count++], resource, instr.dsts[0].write_mask, &instr); - - src = &instr.srcs[instr.src_count++]; - sm4_src_from_deref(tpf, src, sampler, VKD3DSP_WRITEMASK_ALL, &instr); - src->reg.dimension = VSIR_DIMENSION_VEC4; - src->swizzle = swizzle; - - write_sm4_instruction(tpf, &instr); -} - -static void write_sm4_resource_load(const struct tpf_compiler *tpf, const struct hlsl_ir_resource_load *load) -{ - const struct hlsl_ir_node *texel_offset = load->texel_offset.node; - const struct hlsl_ir_node *sample_index = load->sample_index.node; - const struct hlsl_ir_node *coords = load->coords.node; - - if (load->sampler.var && !load->sampler.var->is_uniform) - { - hlsl_fixme(tpf->ctx, &load->node.loc, "Sample using non-uniform sampler variable."); - return; - } - - if (!load->resource.var->is_uniform) - { - hlsl_fixme(tpf->ctx, &load->node.loc, "Load from non-uniform resource variable."); - return; - } - - switch (load->load_type) - { - case HLSL_RESOURCE_LOAD: - write_sm4_ld(tpf, &load->node, &load->resource, - coords, sample_index, texel_offset, load->sampling_dim); - break; - - case HLSL_RESOURCE_SAMPLE: - case HLSL_RESOURCE_SAMPLE_CMP: - case HLSL_RESOURCE_SAMPLE_CMP_LZ: - case HLSL_RESOURCE_SAMPLE_LOD: - case HLSL_RESOURCE_SAMPLE_LOD_BIAS: - case HLSL_RESOURCE_SAMPLE_GRAD: - /* Combined sample expressions were lowered. */ - VKD3D_ASSERT(load->sampler.var); - write_sm4_sample(tpf, load); - break; - - case HLSL_RESOURCE_GATHER_RED: - write_sm4_gather(tpf, &load->node, &load->resource, &load->sampler, coords, - VKD3D_SHADER_SWIZZLE(X, X, X, X), texel_offset); - break; - - case HLSL_RESOURCE_GATHER_GREEN: - write_sm4_gather(tpf, &load->node, &load->resource, &load->sampler, coords, - VKD3D_SHADER_SWIZZLE(Y, Y, Y, Y), texel_offset); - break; - - case HLSL_RESOURCE_GATHER_BLUE: - write_sm4_gather(tpf, &load->node, &load->resource, &load->sampler, coords, - VKD3D_SHADER_SWIZZLE(Z, Z, Z, Z), texel_offset); - break; - - case HLSL_RESOURCE_GATHER_ALPHA: - write_sm4_gather(tpf, &load->node, &load->resource, &load->sampler, coords, - VKD3D_SHADER_SWIZZLE(W, W, W, W), texel_offset); - break; - - case HLSL_RESOURCE_SAMPLE_INFO: - write_sm4_sampleinfo(tpf, load); - break; - - case HLSL_RESOURCE_RESINFO: - write_sm4_resinfo(tpf, load); - break; - - case HLSL_RESOURCE_SAMPLE_PROJ: - vkd3d_unreachable(); - } -} - -static void write_sm4_resource_store(const struct tpf_compiler *tpf, const struct hlsl_ir_resource_store *store) -{ - struct hlsl_type *resource_type = hlsl_deref_get_type(tpf->ctx, &store->resource); - struct hlsl_ir_node *coords = store->coords.node, *value = store->value.node; - struct sm4_instruction instr; - - if (!store->resource.var->is_uniform) - { - hlsl_fixme(tpf->ctx, &store->node.loc, "Store to non-uniform resource variable."); - return; - } - - if (resource_type->sampler_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER) - { - hlsl_fixme(tpf->ctx, &store->node.loc, "Structured buffers store is not implemented."); - return; - } - - memset(&instr, 0, sizeof(instr)); - - sm4_register_from_deref(tpf, &instr.dsts[0].reg, &instr.dsts[0].write_mask, &store->resource, &instr); - instr.dst_count = 1; - if (resource_type->sampler_dim == HLSL_SAMPLER_DIM_RAW_BUFFER) - { - instr.opcode = VKD3D_SM5_OP_STORE_RAW; - instr.dsts[0].write_mask = vkd3d_write_mask_from_component_count(value->data_type->dimx); - } - else - { - instr.opcode = VKD3D_SM5_OP_STORE_UAV_TYPED; - } - - sm4_src_from_node(tpf, &instr.srcs[0], coords, VKD3DSP_WRITEMASK_ALL); - sm4_src_from_node(tpf, &instr.srcs[1], value, VKD3DSP_WRITEMASK_ALL); - instr.src_count = 2; - - write_sm4_instruction(tpf, &instr); -} - -static void write_sm4_store(const struct tpf_compiler *tpf, const struct hlsl_ir_store *store) -{ - const struct hlsl_ir_node *rhs = store->rhs.node; - struct sm4_instruction instr; - uint32_t writemask; - - memset(&instr, 0, sizeof(instr)); - instr.opcode = VKD3D_SM4_OP_MOV; - - sm4_register_from_deref(tpf, &instr.dsts[0].reg, &writemask, &store->lhs, &instr); - instr.dsts[0].write_mask = hlsl_combine_writemasks(writemask, store->writemask); - instr.dst_count = 1; - - sm4_src_from_node(tpf, &instr.srcs[0], rhs, instr.dsts[0].write_mask); - instr.src_count = 1; - - write_sm4_instruction(tpf, &instr); -} - -static void write_sm4_switch(struct tpf_compiler *tpf, const struct hlsl_ir_switch *s) -{ - const struct hlsl_ir_node *selector = s->selector.node; - struct hlsl_ir_switch_case *c; - struct sm4_instruction instr; - - memset(&instr, 0, sizeof(instr)); - instr.opcode = VKD3D_SM4_OP_SWITCH; - - sm4_src_from_node(tpf, &instr.srcs[0], selector, VKD3DSP_WRITEMASK_ALL); - instr.src_count = 1; - - write_sm4_instruction(tpf, &instr); - - LIST_FOR_EACH_ENTRY(c, &s->cases, struct hlsl_ir_switch_case, entry) - { - memset(&instr, 0, sizeof(instr)); - if (c->is_default) - { - instr.opcode = VKD3D_SM4_OP_DEFAULT; - } - else - { - struct hlsl_constant_value value = { .u[0].u = c->value }; - - instr.opcode = VKD3D_SM4_OP_CASE; - sm4_src_from_constant_value(&instr.srcs[0], &value, 1, VKD3DSP_WRITEMASK_ALL); - instr.src_count = 1; - } - - write_sm4_instruction(tpf, &instr); - write_sm4_block(tpf, &c->body); - } - - memset(&instr, 0, sizeof(instr)); - instr.opcode = VKD3D_SM4_OP_ENDSWITCH; - - write_sm4_instruction(tpf, &instr); -} - -static void write_sm4_swizzle(const struct tpf_compiler *tpf, const struct hlsl_ir_swizzle *swizzle) -{ - unsigned int hlsl_swizzle; - struct sm4_instruction instr; - uint32_t writemask; - - memset(&instr, 0, sizeof(instr)); - instr.opcode = VKD3D_SM4_OP_MOV; - - sm4_dst_from_node(&instr.dsts[0], &swizzle->node); - instr.dst_count = 1; - - sm4_register_from_node(&instr.srcs[0].reg, &writemask, swizzle->val.node); - hlsl_swizzle = hlsl_map_swizzle(hlsl_combine_swizzles(hlsl_swizzle_from_writemask(writemask), - swizzle->swizzle, swizzle->node.data_type->dimx), instr.dsts[0].write_mask); - instr.srcs[0].swizzle = swizzle_from_sm4(hlsl_swizzle); - instr.src_count = 1; - - write_sm4_instruction(tpf, &instr); -} - static void tpf_simple_instruction(struct tpf_compiler *tpf, const struct vkd3d_shader_instruction *ins) { + struct sm4_instruction_modifier *modifier; const struct vkd3d_sm4_opcode_info *info; struct sm4_instruction instr = {0}; unsigned int dst_count, src_count; @@ -5655,6 +4723,22 @@ static void tpf_simple_instruction(struct tpf_compiler *tpf, const struct vkd3d_ for (unsigned int i = 0; i < ins->src_count; ++i) instr.srcs[i] = ins->src[i]; + if (ins->texel_offset.u || ins->texel_offset.v || ins->texel_offset.w) + { + VKD3D_ASSERT(instr.modifier_count < ARRAY_SIZE(instr.modifiers)); + modifier = &instr.modifiers[instr.modifier_count++]; + modifier->type = VKD3D_SM4_MODIFIER_AOFFIMMI; + modifier->u.aoffimmi.u = ins->texel_offset.u; + modifier->u.aoffimmi.v = ins->texel_offset.v; + modifier->u.aoffimmi.w = ins->texel_offset.w; + } + + if (info->is_conditional_op) + { + if (ins->flags == VKD3D_SHADER_CONDITIONAL_OP_NZ) + instr.extra_bits = VKD3D_SM4_CONDITIONAL_NZ; + } + write_sm4_instruction(tpf, &instr); } @@ -5704,6 +4788,11 @@ static void tpf_handle_instruction(struct tpf_compiler *tpf, const struct vkd3d_ case VKD3DSIH_ADD: case VKD3DSIH_AND: + case VKD3DSIH_BREAK: + case VKD3DSIH_CASE: + case VKD3DSIH_CONTINUE: + case VKD3DSIH_DEFAULT: + case VKD3DSIH_DISCARD: case VKD3DSIH_DIV: case VKD3DSIH_DP2: case VKD3DSIH_DP3: @@ -5714,6 +4803,10 @@ static void tpf_handle_instruction(struct tpf_compiler *tpf, const struct vkd3d_ case VKD3DSIH_DSY: case VKD3DSIH_DSY_COARSE: case VKD3DSIH_DSY_FINE: + case VKD3DSIH_ELSE: + case VKD3DSIH_ENDIF: + case VKD3DSIH_ENDLOOP: + case VKD3DSIH_ENDSWITCH: case VKD3DSIH_EQO: case VKD3DSIH_EXP: case VKD3DSIH_F16TOF32: @@ -5721,9 +4814,14 @@ static void tpf_handle_instruction(struct tpf_compiler *tpf, const struct vkd3d_ case VKD3DSIH_FRC: case VKD3DSIH_FTOI: case VKD3DSIH_FTOU: + case VKD3DSIH_GATHER4: + case VKD3DSIH_GATHER4_PO: case VKD3DSIH_GEO: + case VKD3DSIH_HS_CONTROL_POINT_PHASE: + case VKD3DSIH_HS_FORK_PHASE: case VKD3DSIH_IADD: case VKD3DSIH_IEQ: + case VKD3DSIH_IF: case VKD3DSIH_IGE: case VKD3DSIH_ILT: case VKD3DSIH_IMAD: @@ -5735,7 +4833,12 @@ static void tpf_handle_instruction(struct tpf_compiler *tpf, const struct vkd3d_ case VKD3DSIH_ISHL: case VKD3DSIH_ISHR: case VKD3DSIH_ITOF: + case VKD3DSIH_LD: + case VKD3DSIH_LD2DMS: + case VKD3DSIH_LD_RAW: + case VKD3DSIH_LD_UAV_TYPED: case VKD3DSIH_LOG: + case VKD3DSIH_LOOP: case VKD3DSIH_LTO: case VKD3DSIH_MAD: case VKD3DSIH_MAX: @@ -5747,14 +4850,25 @@ static void tpf_handle_instruction(struct tpf_compiler *tpf, const struct vkd3d_ case VKD3DSIH_NOT: case VKD3DSIH_OR: case VKD3DSIH_RCP: + case VKD3DSIH_RESINFO: + case VKD3DSIH_RET: case VKD3DSIH_ROUND_NE: case VKD3DSIH_ROUND_NI: case VKD3DSIH_ROUND_PI: case VKD3DSIH_ROUND_Z: case VKD3DSIH_RSQ: + case VKD3DSIH_SAMPLE: + case VKD3DSIH_SAMPLE_B: + case VKD3DSIH_SAMPLE_C: + case VKD3DSIH_SAMPLE_C_LZ: + case VKD3DSIH_SAMPLE_GRAD: case VKD3DSIH_SAMPLE_INFO: + case VKD3DSIH_SAMPLE_LOD: case VKD3DSIH_SINCOS: case VKD3DSIH_SQRT: + case VKD3DSIH_STORE_RAW: + case VKD3DSIH_STORE_UAV_TYPED: + case VKD3DSIH_SWITCH: case VKD3DSIH_UDIV: case VKD3DSIH_UGE: case VKD3DSIH_ULT: @@ -5772,90 +4886,15 @@ static void tpf_handle_instruction(struct tpf_compiler *tpf, const struct vkd3d_ } } -static void write_sm4_block(struct tpf_compiler *tpf, const struct hlsl_block *block) +static void tpf_write_program(struct tpf_compiler *tpf, const struct vsir_program *program) { - const struct hlsl_ir_node *instr; - unsigned int vsir_instr_idx; - - LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry) - { - if (instr->data_type) - { - if (instr->data_type->class != HLSL_CLASS_SCALAR && instr->data_type->class != HLSL_CLASS_VECTOR) - { - hlsl_fixme(tpf->ctx, &instr->loc, "Class %#x should have been lowered or removed.", - instr->data_type->class); - break; - } - - if (!instr->reg.allocated) - { - VKD3D_ASSERT(instr->type == HLSL_IR_CONSTANT); - continue; - } - } - - switch (instr->type) - { - case HLSL_IR_CALL: - case HLSL_IR_CONSTANT: - vkd3d_unreachable(); - - case HLSL_IR_IF: - write_sm4_if(tpf, hlsl_ir_if(instr)); - break; - - case HLSL_IR_JUMP: - write_sm4_jump(tpf, hlsl_ir_jump(instr)); - break; - - case HLSL_IR_LOAD: - write_sm4_load(tpf, hlsl_ir_load(instr)); - break; - - case HLSL_IR_RESOURCE_LOAD: - write_sm4_resource_load(tpf, hlsl_ir_resource_load(instr)); - break; - - case HLSL_IR_RESOURCE_STORE: - write_sm4_resource_store(tpf, hlsl_ir_resource_store(instr)); - break; - - case HLSL_IR_LOOP: - write_sm4_loop(tpf, hlsl_ir_loop(instr)); - break; - - case HLSL_IR_STORE: - write_sm4_store(tpf, hlsl_ir_store(instr)); - break; - - case HLSL_IR_SWITCH: - write_sm4_switch(tpf, hlsl_ir_switch(instr)); - break; - - case HLSL_IR_SWIZZLE: - write_sm4_swizzle(tpf, hlsl_ir_swizzle(instr)); - break; - - case HLSL_IR_VSIR_INSTRUCTION_REF: - vsir_instr_idx = hlsl_ir_vsir_instruction_ref(instr)->vsir_instr_idx; - tpf_handle_instruction(tpf, &tpf->program->instructions.elements[vsir_instr_idx]); - break; - - default: - hlsl_fixme(tpf->ctx, &instr->loc, "Instruction type %s.", hlsl_node_type_to_string(instr->type)); - } - } -} + unsigned int i; -static void tpf_write_shader_function(struct tpf_compiler *tpf, struct hlsl_ir_function_decl *func) -{ if (tpf->program->shader_version.type == VKD3D_SHADER_TYPE_COMPUTE) tpf_dcl_thread_group(tpf, &tpf->program->thread_group_size); - write_sm4_block(tpf, &func->body); - - write_sm4_ret(tpf); + for (i = 0; i < program->instructions.count; ++i) + tpf_handle_instruction(tpf, &program->instructions.elements[i]); } static void tpf_write_shdr(struct tpf_compiler *tpf, struct hlsl_ir_function_decl *entry_func) @@ -5944,16 +4983,7 @@ static void tpf_write_shdr(struct tpf_compiler *tpf, struct hlsl_ir_function_dec write_sm4_dcl_textures(tpf, resource, true); } - if (version->type == VKD3D_SHADER_TYPE_HULL) - tpf_write_hs_control_point_phase(tpf); - - tpf_write_shader_function(tpf, entry_func); - - if (version->type == VKD3D_SHADER_TYPE_HULL) - { - tpf_write_hs_fork_phase(tpf); - tpf_write_shader_function(tpf, ctx->patch_constant_func); - } + tpf_write_program(tpf, tpf->program); set_u32(&buffer, token_count_position, bytecode_get_size(&buffer) / sizeof(uint32_t)); diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c index d751f2dc6bf..db61eec8f28 100644 --- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c +++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c @@ -23,6 +23,8 @@ #include #include +/* VKD3D_DEBUG_ENV_NAME("VKD3D_SHADER_DEBUG"); */ + static inline int char_to_int(char c) { if ('0' <= c && c <= '9') @@ -805,6 +807,9 @@ struct vkd3d_shader_scan_context struct vkd3d_shader_scan_combined_resource_sampler_info *combined_sampler_info; size_t combined_samplers_size; + + enum vkd3d_shader_tessellator_output_primitive output_primitive; + enum vkd3d_shader_tessellator_partitioning partitioning; }; static VKD3D_PRINTF_FUNC(3, 4) void vkd3d_shader_scan_error(struct vkd3d_shader_scan_context *context, @@ -1262,6 +1267,12 @@ static int vkd3d_shader_scan_instruction(struct vkd3d_shader_scan_context *conte VKD3D_SHADER_RESOURCE_BUFFER, VKD3D_SHADER_RESOURCE_DATA_UINT, 0, instruction->declaration.structured_resource.byte_stride, false, instruction->flags); break; + case VKD3DSIH_DCL_TESSELLATOR_OUTPUT_PRIMITIVE: + context->output_primitive = instruction->declaration.tessellator_output_primitive; + break; + case VKD3DSIH_DCL_TESSELLATOR_PARTITIONING: + context->partitioning = instruction->declaration.tessellator_partitioning; + break; case VKD3DSIH_IF: case VKD3DSIH_IFC: cf_info = vkd3d_shader_scan_push_cf_info(context); @@ -1502,6 +1513,7 @@ static int vsir_program_scan(struct vsir_program *program, const struct vkd3d_sh struct vkd3d_shader_scan_descriptor_info1 *descriptor_info1) { struct vkd3d_shader_scan_combined_resource_sampler_info *combined_sampler_info; + struct vkd3d_shader_scan_hull_shader_tessellation_info *tessellation_info; struct vkd3d_shader_scan_descriptor_info1 local_descriptor_info1 = {0}; struct vkd3d_shader_scan_descriptor_info *descriptor_info; struct vkd3d_shader_scan_signature_info *signature_info; @@ -1530,6 +1542,8 @@ static int vsir_program_scan(struct vsir_program *program, const struct vkd3d_sh descriptor_info1 = &local_descriptor_info1; } + tessellation_info = vkd3d_find_struct(compile_info->next, SCAN_HULL_SHADER_TESSELLATION_INFO); + vkd3d_shader_scan_context_init(&context, &program->shader_version, compile_info, descriptor_info1, combined_sampler_info, message_context); @@ -1573,6 +1587,12 @@ static int vsir_program_scan(struct vsir_program *program, const struct vkd3d_sh if (!ret && descriptor_info) ret = convert_descriptor_info(descriptor_info, descriptor_info1); + if (!ret && tessellation_info) + { + tessellation_info->output_primitive = context.output_primitive; + tessellation_info->partitioning = context.partitioning; + } + if (ret < 0) { if (combined_sampler_info) diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h index be7c0b73a22..ad04972b3fb 100644 --- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h +++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h @@ -1431,6 +1431,7 @@ struct vsir_program uint8_t diffuse_written_mask; enum vsir_control_flow_type cf_type; enum vsir_normalisation_level normalisation_level; + enum vkd3d_tessellator_domain tess_domain; const char **block_names; size_t block_name_count; -- 2.45.2