From 6d0c679f28b976983d6ea925d9adc5dff37bcb56 Mon Sep 17 00:00:00 2001 From: Alistair Leslie-Hughes Date: Tue, 24 Jun 2025 13:36:03 +1000 Subject: [PATCH] Updated vkd3d to d8edf20c2b4224384d8e206c620bfbd61c56219d. --- libs/vkd3d/libs/vkd3d-shader/d3dbc.c | 5 +- libs/vkd3d/libs/vkd3d-shader/dxil.c | 2 + libs/vkd3d/libs/vkd3d-shader/fx.c | 574 +++++----- libs/vkd3d/libs/vkd3d-shader/hlsl.c | 16 +- libs/vkd3d/libs/vkd3d-shader/hlsl.h | 7 +- libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 129 ++- libs/vkd3d/libs/vkd3d-shader/ir.c | 1009 ++++++++++++++++- libs/vkd3d/libs/vkd3d-shader/msl.c | 259 ++++- libs/vkd3d/libs/vkd3d-shader/spirv.c | 10 +- libs/vkd3d/libs/vkd3d-shader/tpf.c | 3 + .../libs/vkd3d-shader/vkd3d_shader_private.h | 10 + 11 files changed, 1680 insertions(+), 344 deletions(-) diff --git a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c index fbc0235cdd0..f19a6283197 100644 --- a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c +++ b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c @@ -2137,6 +2137,9 @@ int d3dbc_compile(struct vsir_program *program, uint64_t config_flags, struct vkd3d_bytecode_buffer *buffer = &d3dbc.buffer; int result; + if ((result = vsir_allocate_temp_registers(program, message_context))) + return result; + d3dbc.program = program; d3dbc.message_context = message_context; switch (version->type) @@ -2156,7 +2159,7 @@ int d3dbc_compile(struct vsir_program *program, uint64_t config_flags, } put_u32(buffer, sm1_version(version->type, version->major, version->minor)); - d3dbc_write_comment(&d3dbc, VKD3D_MAKE_TAG('C','T','A','B'), ctab); + d3dbc_write_comment(&d3dbc, TAG_CTAB, ctab); d3dbc_write_semantic_dcls(&d3dbc); d3dbc_write_program_instructions(&d3dbc); diff --git a/libs/vkd3d/libs/vkd3d-shader/dxil.c b/libs/vkd3d/libs/vkd3d-shader/dxil.c index 9ebcb6870e9..71fa81ec163 100644 --- a/libs/vkd3d/libs/vkd3d-shader/dxil.c +++ b/libs/vkd3d/libs/vkd3d-shader/dxil.c @@ -5180,6 +5180,8 @@ static void sm6_parser_emit_dx_cbuffer_load(struct sm6_parser *sm6, enum dx_intr src_param->reg.data_type = vkd3d_data_type_from_sm6_type(type); if (data_type_is_64_bit(src_param->reg.data_type)) src_param->swizzle = vsir_swizzle_64_from_32(src_param->swizzle); + else + register_convert_to_minimum_precision(&src_param->reg); instruction_dst_param_init_ssa_vector(ins, sm6_type_max_vector_size(type), sm6); } diff --git a/libs/vkd3d/libs/vkd3d-shader/fx.c b/libs/vkd3d/libs/vkd3d-shader/fx.c index 95a172fd827..0ab1a676400 100644 --- a/libs/vkd3d/libs/vkd3d-shader/fx.c +++ b/libs/vkd3d/libs/vkd3d-shader/fx.c @@ -251,6 +251,8 @@ struct fx_write_context_ops void (*write_technique)(struct hlsl_ir_var *var, struct fx_write_context *fx); void (*write_pass)(struct hlsl_ir_var *var, struct fx_write_context *fx); void (*write_annotation)(struct hlsl_ir_var *var, struct fx_write_context *fx); + void (*write_state_assignment)(const struct hlsl_ir_var *var, + struct hlsl_state_block_entry *entry, struct fx_write_context *fx); bool are_child_effects_supported; }; @@ -313,6 +315,15 @@ static void write_pass(struct hlsl_ir_var *var, struct fx_write_context *fx) fx->ops->write_pass(var, fx); } +static void write_state_assignment(const struct hlsl_ir_var *var, + struct hlsl_state_block_entry *entry, struct fx_write_context *fx) +{ + fx->ops->write_state_assignment(var, entry, fx); +} + +static uint32_t write_state_block(struct hlsl_ir_var *var, + unsigned int block_index, struct fx_write_context *fx); + static uint32_t write_annotations(struct hlsl_scope *scope, struct fx_write_context *fx) { struct hlsl_ctx *ctx = fx->ctx; @@ -348,8 +359,6 @@ static void write_fx_4_annotations(struct hlsl_scope *scope, struct fx_write_con static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_context *fx); static const char * get_fx_4_type_name(const struct hlsl_type *type); static void write_fx_4_annotation(struct hlsl_ir_var *var, struct fx_write_context *fx); -static void write_fx_4_state_block(struct hlsl_ir_var *var, unsigned int block_index, - uint32_t count_offset, struct fx_write_context *fx); static uint32_t write_type(const struct hlsl_type *type, struct fx_write_context *fx) { @@ -502,17 +511,22 @@ static uint32_t write_fx_4_string(const char *string, struct fx_write_context *f return string_entry->offset; } +static void fx_4_decompose_state_blocks(struct hlsl_ir_var *var, struct fx_write_context *fx); + static void write_fx_4_pass(struct hlsl_ir_var *var, struct fx_write_context *fx) { struct vkd3d_bytecode_buffer *buffer = &fx->structured; - uint32_t name_offset, count_offset; + uint32_t name_offset, count_offset, count; name_offset = write_string(var->name, fx); put_u32(buffer, name_offset); count_offset = put_u32(buffer, 0); + fx_4_decompose_state_blocks(var, fx); + write_fx_4_annotations(var->annotations, fx); - write_fx_4_state_block(var, 0, count_offset, fx); + count = write_state_block(var, 0, fx); + set_u32(buffer, count_offset, count); } static void write_fx_2_annotations(struct hlsl_ir_var *var, uint32_t count_offset, struct fx_write_context *fx) @@ -775,9 +789,10 @@ static const struct rhs_named_value fx_2_filter_values[] = { NULL } }; -struct fx_2_state +struct fx_state { const char *name; + enum hlsl_type_class container; enum hlsl_type_class class; enum state_property_component_type type; unsigned int dimx; @@ -786,215 +801,215 @@ struct fx_2_state const struct rhs_named_value *values; }; -static const struct fx_2_state fx_2_pass_states[] = -{ - { "ZEnable", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 0, fx_2_zenable_values }, - { "FillMode", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 1, fx_2_fillmode_values }, - { "ShadeMode", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 2, fx_2_shademode_values }, - { "ZWriteEnable", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 3 }, - { "AlphaTestEnable", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 4 }, - { "LastPixel", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 5 }, - { "SrcBlend", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 6, fx_2_blendmode_values }, - { "DestBlend", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 7, fx_2_blendmode_values }, - { "CullMode", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 8, fx_2_cullmode_values }, - { "ZFunc", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 9, fx_2_cmpfunc_values }, - { "AlphaRef", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 10 }, - { "AlphaFunc", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 11, fx_2_cmpfunc_values }, - { "DitherEnable", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 12 }, - { "AlphaBlendEnable", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 13 }, - { "FogEnable", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 14 }, - { "SpecularEnable", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 15 }, - { "FogColor", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 16 }, - { "FogTableMode", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 17, fx_2_fogmode_values }, - { "FogStart", HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 18 }, - { "FogEnd", HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 19 }, - { "FogDensity", HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 20 }, - { "RangeFogEnable", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 21 }, - { "StencilEnable", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 22 }, - { "StencilFail", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 23, fx_2_stencilcaps_values }, - { "StencilZFail", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 24, fx_2_stencilcaps_values }, - { "StencilPass", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 25, fx_2_stencilcaps_values }, - { "StencilFunc", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 26, fx_2_cmpfunc_values }, - { "StencilRef", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 27 }, - { "StencilMask", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 28 }, - { "StencilWriteMask", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 29 }, - { "TextureFactor", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 30 }, - { "Wrap0", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 31, fx_2_wrap_values }, - { "Wrap1", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 32, fx_2_wrap_values }, - { "Wrap2", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 33, fx_2_wrap_values }, - { "Wrap3", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 34, fx_2_wrap_values }, - { "Wrap4", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 35, fx_2_wrap_values }, - { "Wrap5", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 36, fx_2_wrap_values }, - { "Wrap6", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 37, fx_2_wrap_values }, - { "Wrap7", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 38, fx_2_wrap_values }, - { "Wrap8", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 39, fx_2_wrap_values }, - { "Wrap9", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 40, fx_2_wrap_values }, - { "Wrap10", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 41, fx_2_wrap_values }, - { "Wrap11", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 42, fx_2_wrap_values }, - { "Wrap12", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 43, fx_2_wrap_values }, - { "Wrap13", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 44, fx_2_wrap_values }, - { "Wrap14", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 45, fx_2_wrap_values }, - { "Wrap15", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 46, fx_2_wrap_values }, - { "Clipping", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 47 }, - { "Lighting", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 48 }, - { "Ambient", HLSL_CLASS_VECTOR, FX_FLOAT, 4, 1, 49 }, - { "FogVertexMode", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 50, fx_2_fogmode_values }, - { "ColorVertex", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 51 }, - { "LocalViewer", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 52 }, - { "NormalizeNormals", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 53 }, - - { "DiffuseMaterialSource", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 54, fx_2_materialcolorsource_values }, - { "SpecularMaterialSource", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 55, fx_2_materialcolorsource_values }, - { "AmbientMaterialSource", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 56, fx_2_materialcolorsource_values }, - { "EmissiveMaterialSource", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 57, fx_2_materialcolorsource_values }, - - { "VertexBlend", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 58, fx_2_vertexblend_values }, - { "ClipPlaneEnable", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 59, fx_2_clipplane_values }, - { "PointSize", HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 60 }, - { "PointSize_Min", HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 61 }, - { "PointSize_Max", HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 62 }, - { "PointSpriteEnable", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 63 }, - { "PointScaleEnable", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 64 }, - { "PointScale_A", HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 65 }, - { "PointScale_B", HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 66 }, - { "PointScale_C", HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 67 }, - - { "MultiSampleAntialias", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 68 }, - { "MultiSampleMask", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 69 }, - { "PatchEdgeStyle", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 70, fx_2_patchedgestyle_values }, - { "DebugMonitorToken", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 71 }, - { "IndexedVertexBlendEnable", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 72 }, - { "ColorWriteEnable", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 73, fx_2_colorwriteenable_values }, - { "TweenFactor", HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 74 }, - { "BlendOp", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 75, fx_2_blendop_values }, - { "PositionDegree", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 76, fx_2_degree_values }, - { "NormalDegree", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 77, fx_2_degree_values }, - { "ScissorTestEnable", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 78 }, - { "SlopeScaleDepthBias", HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 79 }, - - { "AntialiasedLineEnable", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 80 }, - { "MinTessellationLevel", HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 81 }, - { "MaxTessellationLevel", HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 82 }, - { "AdaptiveTess_X", HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 83 }, - { "AdaptiveTess_Y", HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 84 }, - { "AdaptiveTess_Z", HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 85 }, - { "AdaptiveTess_W", HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 86 }, - { "EnableAdaptiveTessellation",HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 87 }, - { "TwoSidedStencilMode", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 88 }, - { "StencilFail", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 89, fx_2_stencilcaps_values }, - { "StencilZFail", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 90, fx_2_stencilcaps_values }, - { "StencilPass", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 91, fx_2_stencilcaps_values }, - { "StencilFunc", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 92, fx_2_cmpfunc_values }, - - { "ColorWriteEnable1", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 93, fx_2_colorwriteenable_values }, - { "ColorWriteEnable2", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 94, fx_2_colorwriteenable_values }, - { "ColorWriteEnable3", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 95, fx_2_colorwriteenable_values }, - { "BlendFactor", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 96 }, - { "SRGBWriteEnable", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 97 }, - { "DepthBias", HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 98 }, - { "SeparateAlphaBlendEnable", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 99 }, - { "SrcBlendAlpha", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 100, fx_2_blendmode_values }, - { "DestBlendAlpha", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 101, fx_2_blendmode_values }, - { "BlendOpAlpha", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 102, fx_2_blendmode_values }, - - { "ColorOp", HLSL_CLASS_SCALAR, FX_UINT, 1, 8, 103, fx_2_textureop_values }, - { "ColorArg0", HLSL_CLASS_SCALAR, FX_UINT, 1, 8, 104, fx_2_colorarg_values }, - { "ColorArg1", HLSL_CLASS_SCALAR, FX_UINT, 1, 8, 105, fx_2_colorarg_values }, - { "ColorArg2", HLSL_CLASS_SCALAR, FX_UINT, 1, 8, 106, fx_2_colorarg_values }, - { "AlphaOp", HLSL_CLASS_SCALAR, FX_UINT, 1, 8, 107, fx_2_textureop_values }, - { "AlphaArg0", HLSL_CLASS_SCALAR, FX_UINT, 1, 8, 108, fx_2_colorarg_values }, - { "AlphaArg1", HLSL_CLASS_SCALAR, FX_UINT, 1, 8, 109, fx_2_colorarg_values }, - { "AlphaArg2", HLSL_CLASS_SCALAR, FX_UINT, 1, 8, 110, fx_2_colorarg_values }, - { "ResultArg", HLSL_CLASS_SCALAR, FX_UINT, 1, 8, 111, fx_2_colorarg_values }, - { "BumpEnvMat00", HLSL_CLASS_SCALAR, FX_FLOAT, 1, 8, 112 }, - { "BumpEnvMat01", HLSL_CLASS_SCALAR, FX_FLOAT, 1, 8, 113 }, - { "BumpEnvMat10", HLSL_CLASS_SCALAR, FX_FLOAT, 1, 8, 114 }, - { "BumpEnvMat11", HLSL_CLASS_SCALAR, FX_FLOAT, 1, 8, 115 }, - { "TexCoordIndex", HLSL_CLASS_SCALAR, FX_UINT, 1, 8, 116 }, - { "BumpEnvLScale", HLSL_CLASS_SCALAR, FX_FLOAT, 1, 8, 117 }, - { "BumpEnvLOffset", HLSL_CLASS_SCALAR, FX_FLOAT, 1, 8, 118 }, - { "TextureTransformFlags", HLSL_CLASS_SCALAR, FX_UINT, 1, 8, 119, fx_2_texturetransform_values }, - { "Constant", HLSL_CLASS_SCALAR, FX_UINT, 1, 8, 120 }, - { "PatchSegments", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 121 }, - { "FVF", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 122 }, - - { "ProjectionTransform", HLSL_CLASS_MATRIX, FX_FLOAT, 4, 1, 123 }, - { "ViewTransform", HLSL_CLASS_MATRIX, FX_FLOAT, 4, 1, 124 }, - { "WorldTransform", HLSL_CLASS_MATRIX, FX_FLOAT, 4, 256, 125 }, - { "TextureTransform", HLSL_CLASS_MATRIX, FX_FLOAT, 4, 8, 126 }, - - { "MaterialAmbient", HLSL_CLASS_VECTOR, FX_FLOAT, 4, 1, 127 }, - { "MaterialDiffuse", HLSL_CLASS_VECTOR, FX_FLOAT, 4, 1, 128 }, - { "MaterialSpecular", HLSL_CLASS_VECTOR, FX_FLOAT, 4, 1, 129 }, - { "MaterialEmissive", HLSL_CLASS_VECTOR, FX_FLOAT, 4, 1, 130 }, - { "MaterialPower", HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 131 }, - - { "LightType", HLSL_CLASS_SCALAR, FX_UINT, 1, ~0u, 132, fx_2_lighttype_values }, - { "LightDiffuse", HLSL_CLASS_VECTOR, FX_FLOAT, 4, ~0u, 133 }, - { "LightSpecular", HLSL_CLASS_VECTOR, FX_FLOAT, 4, ~0u, 134 }, - { "LightAmbient", HLSL_CLASS_VECTOR, FX_FLOAT, 4, ~0u, 135 }, - { "LightPosition", HLSL_CLASS_VECTOR, FX_FLOAT, 3, ~0u, 136 }, - { "LightDirection", HLSL_CLASS_VECTOR, FX_FLOAT, 3, ~0u, 137 }, - { "LightRange", HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u, 138 }, - { "LightFalloff", HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u, 139 }, - { "LightAttenuation0", HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u, 140 }, - { "LightAttenuation1", HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u, 141 }, - { "LightAttenuation2", HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u, 142 }, - { "LightTheta", HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u, 143 }, - { "LightPhi", HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u, 144 }, - { "LightEnable", HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u, 145 }, - - { "VertexShader", HLSL_CLASS_SCALAR, FX_VERTEXSHADER, 1, 1, 146 }, - { "PixelShader", HLSL_CLASS_SCALAR, FX_PIXELSHADER, 1, 1, 147 }, - - { "VertexShaderConstantF", HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u, 148 }, - { "VertexShaderConstantB", HLSL_CLASS_SCALAR, FX_BOOL, 1, ~0u, 149 }, - { "VertexShaderConstantI", HLSL_CLASS_SCALAR, FX_UINT, 1, ~0u, 150 }, - { "VertexShaderConstant", HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u, 151 }, - { "VertexShaderConstant1", HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u, 152 }, - { "VertexShaderConstant2", HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u, 153 }, - { "VertexShaderConstant3", HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u, 154 }, - { "VertexShaderConstant4", HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u, 155 }, - - { "PixelShaderConstantF", HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u, 156 }, - { "PixelShaderConstantB", HLSL_CLASS_SCALAR, FX_BOOL, 1, ~0u, 157 }, - { "PixelShaderConstantI", HLSL_CLASS_SCALAR, FX_UINT, 1, ~0u, 158 }, - { "PixelShaderConstant", HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u, 159 }, - { "PixelShaderConstant1", HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u, 160 }, - { "PixelShaderConstant2", HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u, 161 }, - { "PixelShaderConstant3", HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u, 162 }, - { "PixelShaderConstant4", HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u, 163 }, - - { "Texture", HLSL_CLASS_SCALAR, FX_TEXTURE, 1, 261, 164 }, - { "AddressU", HLSL_CLASS_SCALAR, FX_UINT, 1, 261, 165, fx_2_address_values }, - { "AddressV", HLSL_CLASS_SCALAR, FX_UINT, 1, 261, 166, fx_2_address_values }, - { "AddressW", HLSL_CLASS_SCALAR, FX_UINT, 1, 261, 167, fx_2_address_values }, - { "BorderColor", HLSL_CLASS_SCALAR, FX_UINT, 1, 261, 168 }, - { "MagFilter", HLSL_CLASS_SCALAR, FX_UINT, 1, 261, 169, fx_2_filter_values }, - { "MinFilter", HLSL_CLASS_SCALAR, FX_UINT, 1, 261, 170, fx_2_filter_values }, - { "MipFilter", HLSL_CLASS_SCALAR, FX_UINT, 1, 261, 171, fx_2_filter_values }, - { "MipMapLodBias", HLSL_CLASS_SCALAR, FX_UINT, 1, 261, 172 }, - { "MaxMipLevel", HLSL_CLASS_SCALAR, FX_UINT, 1, 261, 173 }, - { "MaxAnisotropy", HLSL_CLASS_SCALAR, FX_UINT, 1, 261, 174 }, - { "SRGBTexture", HLSL_CLASS_SCALAR, FX_UINT, 1, 261, 175 }, - { "ElementIndex", HLSL_CLASS_SCALAR, FX_UINT, 1, 261, 176 }, +static const struct fx_state fx_2_pass_states[] = +{ + { "ZEnable", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 0, fx_2_zenable_values }, + { "FillMode", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 1, fx_2_fillmode_values }, + { "ShadeMode", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 2, fx_2_shademode_values }, + { "ZWriteEnable", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 3 }, + { "AlphaTestEnable", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 4 }, + { "LastPixel", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 5 }, + { "SrcBlend", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 6, fx_2_blendmode_values }, + { "DestBlend", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 7, fx_2_blendmode_values }, + { "CullMode", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 8, fx_2_cullmode_values }, + { "ZFunc", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 9, fx_2_cmpfunc_values }, + { "AlphaRef", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 10 }, + { "AlphaFunc", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 11, fx_2_cmpfunc_values }, + { "DitherEnable", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 12 }, + { "AlphaBlendEnable", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 13 }, + { "FogEnable", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 14 }, + { "SpecularEnable", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 15 }, + { "FogColor", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 16 }, + { "FogTableMode", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 17, fx_2_fogmode_values }, + { "FogStart", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 18 }, + { "FogEnd", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 19 }, + { "FogDensity", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 20 }, + { "RangeFogEnable", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 21 }, + { "StencilEnable", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 22 }, + { "StencilFail", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 23, fx_2_stencilcaps_values }, + { "StencilZFail", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 24, fx_2_stencilcaps_values }, + { "StencilPass", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 25, fx_2_stencilcaps_values }, + { "StencilFunc", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 26, fx_2_cmpfunc_values }, + { "StencilRef", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 27 }, + { "StencilMask", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 28 }, + { "StencilWriteMask", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 29 }, + { "TextureFactor", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 30 }, + { "Wrap0", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 31, fx_2_wrap_values }, + { "Wrap1", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 32, fx_2_wrap_values }, + { "Wrap2", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 33, fx_2_wrap_values }, + { "Wrap3", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 34, fx_2_wrap_values }, + { "Wrap4", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 35, fx_2_wrap_values }, + { "Wrap5", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 36, fx_2_wrap_values }, + { "Wrap6", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 37, fx_2_wrap_values }, + { "Wrap7", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 38, fx_2_wrap_values }, + { "Wrap8", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 39, fx_2_wrap_values }, + { "Wrap9", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 40, fx_2_wrap_values }, + { "Wrap10", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 41, fx_2_wrap_values }, + { "Wrap11", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 42, fx_2_wrap_values }, + { "Wrap12", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 43, fx_2_wrap_values }, + { "Wrap13", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 44, fx_2_wrap_values }, + { "Wrap14", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 45, fx_2_wrap_values }, + { "Wrap15", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 46, fx_2_wrap_values }, + { "Clipping", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 47 }, + { "Lighting", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 48 }, + { "Ambient", HLSL_CLASS_PASS, HLSL_CLASS_VECTOR, FX_FLOAT, 4, 1, 49 }, + { "FogVertexMode", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 50, fx_2_fogmode_values }, + { "ColorVertex", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 51 }, + { "LocalViewer", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 52 }, + { "NormalizeNormals", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 53 }, + + { "DiffuseMaterialSource", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 54, fx_2_materialcolorsource_values }, + { "SpecularMaterialSource", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 55, fx_2_materialcolorsource_values }, + { "AmbientMaterialSource", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 56, fx_2_materialcolorsource_values }, + { "EmissiveMaterialSource", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 57, fx_2_materialcolorsource_values }, + + { "VertexBlend", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 58, fx_2_vertexblend_values }, + { "ClipPlaneEnable", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 59, fx_2_clipplane_values }, + { "PointSize", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 60 }, + { "PointSize_Min", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 61 }, + { "PointSize_Max", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 62 }, + { "PointSpriteEnable", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 63 }, + { "PointScaleEnable", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 64 }, + { "PointScale_A", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 65 }, + { "PointScale_B", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 66 }, + { "PointScale_C", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 67 }, + + { "MultiSampleAntialias", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 68 }, + { "MultiSampleMask", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 69 }, + { "PatchEdgeStyle", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 70, fx_2_patchedgestyle_values }, + { "DebugMonitorToken", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 71 }, + { "IndexedVertexBlendEnable", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 72 }, + { "ColorWriteEnable", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 73, fx_2_colorwriteenable_values }, + { "TweenFactor", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 74 }, + { "BlendOp", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 75, fx_2_blendop_values }, + { "PositionDegree", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 76, fx_2_degree_values }, + { "NormalDegree", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 77, fx_2_degree_values }, + { "ScissorTestEnable", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 78 }, + { "SlopeScaleDepthBias", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 79 }, + + { "AntialiasedLineEnable", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 80 }, + { "MinTessellationLevel", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 81 }, + { "MaxTessellationLevel", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 82 }, + { "AdaptiveTess_X", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 83 }, + { "AdaptiveTess_Y", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 84 }, + { "AdaptiveTess_Z", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 85 }, + { "AdaptiveTess_W", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 86 }, + { "EnableAdaptiveTessellation",HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 87 }, + { "TwoSidedStencilMode", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 88 }, + { "StencilFail", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 89, fx_2_stencilcaps_values }, + { "StencilZFail", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 90, fx_2_stencilcaps_values }, + { "StencilPass", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 91, fx_2_stencilcaps_values }, + { "StencilFunc", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 92, fx_2_cmpfunc_values }, + + { "ColorWriteEnable1", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 93, fx_2_colorwriteenable_values }, + { "ColorWriteEnable2", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 94, fx_2_colorwriteenable_values }, + { "ColorWriteEnable3", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 95, fx_2_colorwriteenable_values }, + { "BlendFactor", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 96 }, + { "SRGBWriteEnable", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 97 }, + { "DepthBias", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 98 }, + { "SeparateAlphaBlendEnable", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 99 }, + { "SrcBlendAlpha", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 100, fx_2_blendmode_values }, + { "DestBlendAlpha", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 101, fx_2_blendmode_values }, + { "BlendOpAlpha", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 102, fx_2_blendmode_values }, + + { "ColorOp", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 8, 103, fx_2_textureop_values }, + { "ColorArg0", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 8, 104, fx_2_colorarg_values }, + { "ColorArg1", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 8, 105, fx_2_colorarg_values }, + { "ColorArg2", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 8, 106, fx_2_colorarg_values }, + { "AlphaOp", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 8, 107, fx_2_textureop_values }, + { "AlphaArg0", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 8, 108, fx_2_colorarg_values }, + { "AlphaArg1", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 8, 109, fx_2_colorarg_values }, + { "AlphaArg2", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 8, 110, fx_2_colorarg_values }, + { "ResultArg", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 8, 111, fx_2_colorarg_values }, + { "BumpEnvMat00", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 8, 112 }, + { "BumpEnvMat01", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 8, 113 }, + { "BumpEnvMat10", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 8, 114 }, + { "BumpEnvMat11", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 8, 115 }, + { "TexCoordIndex", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 8, 116 }, + { "BumpEnvLScale", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 8, 117 }, + { "BumpEnvLOffset", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 8, 118 }, + { "TextureTransformFlags", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 8, 119, fx_2_texturetransform_values }, + { "Constant", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 8, 120 }, + { "PatchSegments", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 121 }, + { "FVF", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 122 }, + + { "ProjectionTransform", HLSL_CLASS_PASS, HLSL_CLASS_MATRIX, FX_FLOAT, 4, 1, 123 }, + { "ViewTransform", HLSL_CLASS_PASS, HLSL_CLASS_MATRIX, FX_FLOAT, 4, 1, 124 }, + { "WorldTransform", HLSL_CLASS_PASS, HLSL_CLASS_MATRIX, FX_FLOAT, 4, 256, 125 }, + { "TextureTransform", HLSL_CLASS_PASS, HLSL_CLASS_MATRIX, FX_FLOAT, 4, 8, 126 }, + + { "MaterialDiffuse", HLSL_CLASS_PASS, HLSL_CLASS_VECTOR, FX_FLOAT, 4, 1, 127 }, + { "MaterialAmbient", HLSL_CLASS_PASS, HLSL_CLASS_VECTOR, FX_FLOAT, 4, 1, 128 }, + { "MaterialSpecular", HLSL_CLASS_PASS, HLSL_CLASS_VECTOR, FX_FLOAT, 4, 1, 129 }, + { "MaterialEmissive", HLSL_CLASS_PASS, HLSL_CLASS_VECTOR, FX_FLOAT, 4, 1, 130 }, + { "MaterialPower", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 131 }, + + { "LightType", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, ~0u, 132, fx_2_lighttype_values }, + { "LightDiffuse", HLSL_CLASS_PASS, HLSL_CLASS_VECTOR, FX_FLOAT, 4, ~0u, 133 }, + { "LightSpecular", HLSL_CLASS_PASS, HLSL_CLASS_VECTOR, FX_FLOAT, 4, ~0u, 134 }, + { "LightAmbient", HLSL_CLASS_PASS, HLSL_CLASS_VECTOR, FX_FLOAT, 4, ~0u, 135 }, + { "LightPosition", HLSL_CLASS_PASS, HLSL_CLASS_VECTOR, FX_FLOAT, 3, ~0u, 136 }, + { "LightDirection", HLSL_CLASS_PASS, HLSL_CLASS_VECTOR, FX_FLOAT, 3, ~0u, 137 }, + { "LightRange", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u, 138 }, + { "LightFalloff", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u, 139 }, + { "LightAttenuation0", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u, 140 }, + { "LightAttenuation1", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u, 141 }, + { "LightAttenuation2", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u, 142 }, + { "LightTheta", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u, 143 }, + { "LightPhi", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u, 144 }, + { "LightEnable", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u, 145 }, + + { "VertexShader", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_VERTEXSHADER, 1, 1, 146 }, + { "PixelShader", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_PIXELSHADER, 1, 1, 147 }, + + { "VertexShaderConstantF", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u, 148 }, + { "VertexShaderConstantB", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_BOOL, 1, ~0u, 149 }, + { "VertexShaderConstantI", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, ~0u, 150 }, + { "VertexShaderConstant", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u, 151 }, + { "VertexShaderConstant1", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u, 152 }, + { "VertexShaderConstant2", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u, 153 }, + { "VertexShaderConstant3", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u, 154 }, + { "VertexShaderConstant4", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u, 155 }, + + { "PixelShaderConstantF", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u, 156 }, + { "PixelShaderConstantB", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_BOOL, 1, ~0u, 157 }, + { "PixelShaderConstantI", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, ~0u, 158 }, + { "PixelShaderConstant", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u, 159 }, + { "PixelShaderConstant1", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u, 160 }, + { "PixelShaderConstant2", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u, 161 }, + { "PixelShaderConstant3", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u, 162 }, + { "PixelShaderConstant4", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_FLOAT, 1, ~0u, 163 }, + + { "Texture", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_TEXTURE, 1, 261, 164 }, + { "AddressU", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 261, 165, fx_2_address_values }, + { "AddressV", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 261, 166, fx_2_address_values }, + { "AddressW", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 261, 167, fx_2_address_values }, + { "BorderColor", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 261, 168 }, + { "MagFilter", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 261, 169, fx_2_filter_values }, + { "MinFilter", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 261, 170, fx_2_filter_values }, + { "MipFilter", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 261, 171, fx_2_filter_values }, + { "MipMapLodBias", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 261, 172 }, + { "MaxMipLevel", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 261, 173 }, + { "MaxAnisotropy", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 261, 174 }, + { "SRGBTexture", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 261, 175 }, + { "ElementIndex", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 261, 176 }, }; -static const struct fx_2_state fx_2_sampler_states[] = -{ - { "Texture", HLSL_CLASS_SCALAR, FX_TEXTURE, 1, 1, 164 }, - { "AddressU", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 165, fx_2_address_values }, - { "AddressV", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 166, fx_2_address_values }, - { "AddressW", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 167, fx_2_address_values }, - { "BorderColor", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 168 }, - { "MagFilter", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 169, fx_2_filter_values }, - { "MinFilter", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 170, fx_2_filter_values }, - { "MipFilter", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 171, fx_2_filter_values }, - { "MipMapLodBias", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 172 }, - { "MaxMipLevel", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 173 }, - { "MaxAnisotropy", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 174 }, - { "SRGBTexture", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 175 }, - { "ElementIndex", HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 176 }, +static const struct fx_state fx_2_sampler_states[] = +{ + { "Texture", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_TEXTURE, 1, 1, 164 }, + { "AddressU", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 165, fx_2_address_values }, + { "AddressV", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 166, fx_2_address_values }, + { "AddressW", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 167, fx_2_address_values }, + { "BorderColor", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 168 }, + { "MagFilter", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 169, fx_2_filter_values }, + { "MinFilter", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 170, fx_2_filter_values }, + { "MipFilter", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 171, fx_2_filter_values }, + { "MipMapLodBias", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 172 }, + { "MaxMipLevel", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 173 }, + { "MaxAnisotropy", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 174 }, + { "SRGBTexture", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 175 }, + { "ElementIndex", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 176 }, }; static void write_fx_2_pass(struct hlsl_ir_var *var, struct fx_write_context *fx) @@ -1979,12 +1994,21 @@ static void write_fx_2_annotation(struct hlsl_ir_var *var, struct fx_write_conte put_u32(buffer, value_offset); } +static void write_fx_2_state_assignment(const struct hlsl_ir_var *var, + struct hlsl_state_block_entry *entry, struct fx_write_context *fx) +{ + struct hlsl_ctx *ctx = fx->ctx; + + hlsl_fixme(ctx, &var->loc, "Writing fx_2_0 state assignments is not implemented."); +} + static const struct fx_write_context_ops fx_2_ops = { .write_string = write_fx_2_string, .write_technique = write_fx_2_technique, .write_pass = write_fx_2_pass, .write_annotation = write_fx_2_annotation, + .write_state_assignment = write_fx_2_state_assignment, }; static int hlsl_fx_2_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) @@ -2047,12 +2071,16 @@ static int hlsl_fx_2_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) return fx_write_context_cleanup(&fx); } +static void write_fx_4_state_assignment(const struct hlsl_ir_var *var, + struct hlsl_state_block_entry *entry, struct fx_write_context *fx); + static const struct fx_write_context_ops fx_4_ops = { .write_string = write_fx_4_string, .write_technique = write_fx_4_technique, .write_pass = write_fx_4_pass, .write_annotation = write_fx_4_annotation, + .write_state_assignment = write_fx_4_state_assignment, .are_child_effects_supported = true, }; @@ -2660,18 +2688,7 @@ static const struct rhs_named_value null_values[] = { NULL } }; -static const struct fx_4_state -{ - const char *name; - enum hlsl_type_class container; - enum hlsl_type_class class; - enum state_property_component_type type; - unsigned int dimx; - unsigned int array_size; - int id; - const struct rhs_named_value *values; -} -fx_4_states[] = +static const struct fx_state fx_4_states[] = { { "RasterizerState", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_RASTERIZER, 1, 1, 0 }, { "DepthStencilState", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_DEPTHSTENCIL, 1, 1, 1 }, @@ -2739,7 +2756,7 @@ fx_4_states[] = { "ComputeShader", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_COMPUTESHADER, 1, 1, 58 }, }; -static const struct fx_4_state fx_5_blend_states[] = +static const struct fx_state fx_5_blend_states[] = { { "AlphaToCoverageEnable", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 1, 36, bool_values }, { "BlendEnable", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 8, 37, bool_values }, @@ -2752,45 +2769,61 @@ static const struct fx_4_state fx_5_blend_states[] = { "RenderTargetWriteMask", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_UINT8, 1, 8, 44 }, }; -struct fx_4_state_table +struct fx_state_table { - const struct fx_4_state *ptr; + const struct fx_state *ptr; unsigned int count; }; -static struct fx_4_state_table fx_4_get_state_table(enum hlsl_type_class type_class, +static struct fx_state_table fx_get_state_table(enum hlsl_type_class type_class, unsigned int major, unsigned int minor) { - struct fx_4_state_table table; + struct fx_state_table table; - if (type_class == HLSL_CLASS_BLEND_STATE && (major == 5 || (major == 4 && minor == 1))) + if (major == 2) { - table.ptr = fx_5_blend_states; - table.count = ARRAY_SIZE(fx_5_blend_states); + if (type_class == HLSL_CLASS_PASS) + { + table.ptr = fx_2_pass_states; + table.count = ARRAY_SIZE(fx_2_pass_states); + } + else + { + table.ptr = fx_2_sampler_states; + table.count = ARRAY_SIZE(fx_2_sampler_states); + } } else { - table.ptr = fx_4_states; - table.count = ARRAY_SIZE(fx_4_states); + if (type_class == HLSL_CLASS_BLEND_STATE && (major == 5 || (major == 4 && minor == 1))) + { + table.ptr = fx_5_blend_states; + table.count = ARRAY_SIZE(fx_5_blend_states); + } + else + { + table.ptr = fx_4_states; + table.count = ARRAY_SIZE(fx_4_states); + } } return table; } -static void resolve_fx_4_state_block_values(struct hlsl_ir_var *var, +static void resolve_fx_state_block_values(struct hlsl_ir_var *var, struct hlsl_state_block_entry *entry, struct fx_write_context *fx) { const struct hlsl_type *type = hlsl_get_multiarray_element_type(var->data_type); struct replace_state_context replace_context; - const struct fx_4_state *state = NULL; + const struct fx_state *state = NULL; struct hlsl_type *state_type = NULL; struct hlsl_ctx *ctx = fx->ctx; enum hlsl_base_type base_type; - struct fx_4_state_table table; + struct fx_state_table table; struct hlsl_ir_node *node; unsigned int i; - table = fx_4_get_state_table(type->class, ctx->profile->major_version, ctx->profile->minor_version); + table = fx_get_state_table(type->class, ctx->profile->major_version, ctx->profile->minor_version); for (i = 0; i < table.count; ++i) { @@ -3076,21 +3109,34 @@ static unsigned int decompose_fx_4_state_block(struct hlsl_ir_var *var, struct h return decompose_fx_4_state_block_expand_array(var, block, entry_index, fx); } -static void write_fx_4_state_block(struct hlsl_ir_var *var, unsigned int block_index, - uint32_t count_offset, struct fx_write_context *fx) +static void fx_4_decompose_state_blocks(struct hlsl_ir_var *var, struct fx_write_context *fx) { - struct vkd3d_bytecode_buffer *buffer = &fx->structured; + unsigned int block_count = hlsl_get_multiarray_size(var->data_type); struct hlsl_state_block *block; - uint32_t i, count = 0; - if (var->state_blocks) + if (!var->state_blocks) + return; + + for (unsigned int i = 0; i < block_count; ++i) { - block = var->state_blocks[block_index]; + block = var->state_blocks[i]; - for (i = 0; i < block->count;) + for (unsigned int j = 0; j < block->count;) { - i += decompose_fx_4_state_block(var, block, i, fx); + j += decompose_fx_4_state_block(var, block, j, fx); } + } +} + +static uint32_t write_state_block(struct hlsl_ir_var *var, unsigned int block_index, + struct fx_write_context *fx) +{ + struct hlsl_state_block *block; + uint32_t i, count = 0; + + if (var->state_blocks) + { + block = var->state_blocks[block_index]; for (i = 0; i < block->count; ++i) { @@ -3101,27 +3147,29 @@ static void write_fx_4_state_block(struct hlsl_ir_var *var, unsigned int block_i continue; /* Resolve special constant names and property names. */ - resolve_fx_4_state_block_values(var, entry, fx); + resolve_fx_state_block_values(var, entry, fx); - write_fx_4_state_assignment(var, entry, fx); + write_state_assignment(var, entry, fx); ++count; } } - set_u32(buffer, count_offset, count); + return count; } static void write_fx_4_state_object_initializer(struct hlsl_ir_var *var, struct fx_write_context *fx) { uint32_t elements_count = hlsl_get_multiarray_size(var->data_type), i; struct vkd3d_bytecode_buffer *buffer = &fx->structured; - uint32_t count_offset; + uint32_t count_offset, count; + + fx_4_decompose_state_blocks(var, fx); for (i = 0; i < elements_count; ++i) { count_offset = put_u32(buffer, 0); - - write_fx_4_state_block(var, i, count_offset, fx); + count = write_state_block(var, i, fx); + set_u32(buffer, count_offset, count); } } @@ -3977,17 +4025,13 @@ static void fx_parse_fx_2_annotations(struct fx_parser *parser, uint32_t count) vkd3d_string_buffer_printf(&parser->buffer, ">"); } -static const struct fx_2_state *fx_2_get_state_by_id(enum hlsl_type_class container, uint32_t id) +static const struct fx_state *fx_2_get_state_by_id(enum hlsl_type_class container, uint32_t id) { - const struct fx_2_state *table; - unsigned int count; - - count = container == HLSL_CLASS_PASS ? ARRAY_SIZE(fx_2_pass_states) : ARRAY_SIZE(fx_2_sampler_states); - table = container == HLSL_CLASS_PASS ? fx_2_pass_states : fx_2_sampler_states; + struct fx_state_table table = fx_get_state_table(container, 2, 0); /* State identifiers are sequential, no gaps */ - if (id >= table[0].id && id <= table[count - 1].id) - return &table[id - table[0].id]; + if (id >= table.ptr[0].id && id <= table.ptr[table.count - 1].id) + return &table.ptr[id - table.ptr[0].id]; return NULL; } @@ -3996,7 +4040,7 @@ static void fx_parse_fx_2_assignment(struct fx_parser *parser, enum hlsl_type_cl const struct fx_assignment *entry) { const struct rhs_named_value *named_value = NULL; - const struct fx_2_state *state; + const struct fx_state *state; if ((state = fx_2_get_state_by_id(container, entry->id))) { @@ -4700,7 +4744,7 @@ static bool fx_4_object_has_initializer(const struct fx_4_binary_type *type) static int fx_4_state_id_compare(const void *a, const void *b) { - const struct fx_4_state *state = b; + const struct fx_state *state = b; int id = *(int *)a; return id - state->id; @@ -5186,12 +5230,12 @@ static void fx_4_parse_state_object_initializer(struct fx_parser *parser, uint32 }; const struct rhs_named_value *named_value; struct fx_5_shader shader = { 0 }; - struct fx_4_state_table table; + struct fx_state_table table; unsigned int shader_type = 0; uint32_t i, j, comp_count; - struct fx_4_state *state; + struct fx_state *state; - table = fx_4_get_state_table(type_class, parser->version.major, parser->version.minor); + table = fx_get_state_table(type_class, parser->version.major, parser->version.minor); for (i = 0; i < count; ++i) { diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.c b/libs/vkd3d/libs/vkd3d-shader/hlsl.c index 2b88a04a120..73cd4da906a 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.c +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.c @@ -3531,21 +3531,7 @@ static void dump_deref(struct vkd3d_string_buffer *buffer, const struct hlsl_der const char *debug_hlsl_writemask(unsigned int writemask) { - static const char components[] = {'x', 'y', 'z', 'w'}; - char string[5]; - unsigned int i = 0, pos = 0; - - VKD3D_ASSERT(!(writemask & ~VKD3DSP_WRITEMASK_ALL)); - - while (writemask) - { - if (writemask & 1) - string[pos++] = components[i]; - writemask >>= 1; - i++; - } - string[pos] = '\0'; - return vkd3d_dbg_sprintf(".%s", string); + return debug_vsir_writemask(writemask); } const char *debug_hlsl_swizzle(uint32_t swizzle, unsigned int size) diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h index bb37f0be6cf..369181cada8 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.h +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h @@ -59,8 +59,7 @@ static inline unsigned int hlsl_swizzle_get_component(uint32_t swizzle, unsigned static inline void hlsl_swizzle_set_component(uint32_t *swizzle, unsigned int idx, unsigned int component) { - *swizzle &= ~(VKD3D_SHADER_SWIZZLE_MASK << VKD3D_SHADER_SWIZZLE_SHIFT(idx)); - *swizzle |= component << VKD3D_SHADER_SWIZZLE_SHIFT(idx); + vsir_swizzle_set_component(swizzle, idx, component); } enum hlsl_type_class @@ -309,6 +308,8 @@ struct hlsl_reg unsigned int writemask; /* Whether the register has been allocated. */ bool allocated; + /* Currently only used for numeric registers. */ + enum vkd3d_shader_register_type type; }; /* Types of instruction nodes for the IR. @@ -1187,6 +1188,8 @@ struct hlsl_ctx } constant_defs; /* 'c' registers where the constants expected by SM2 sincos are stored. */ struct hlsl_reg d3dsincosconst1, d3dsincosconst2; + /* Number of allocated SSA IDs, used in translation to vsir. */ + unsigned int ssa_count; /* Number of threads to be executed (on the X, Y, and Z dimensions) in a single thread group in * compute shader profiles. It is set using the numthreads() attribute in the entry point. */ diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c index d4e29e16b7c..04bb2d98b26 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c @@ -5656,6 +5656,7 @@ static struct hlsl_reg allocate_register(struct hlsl_ctx *ctx, struct register_a unsigned int writemask = hlsl_combine_writemasks(available_writemask, vkd3d_write_mask_from_component_count(reg_size)); + ret.type = VKD3DSPR_TEMP; ret.id = reg_idx; ret.writemask = hlsl_combine_writemasks(writemask, vkd3d_write_mask_from_component_count(component_count)); @@ -5666,6 +5667,7 @@ static struct hlsl_reg allocate_register(struct hlsl_ctx *ctx, struct register_a } } + ret.type = VKD3DSPR_TEMP; ret.id = allocator->reg_count; ret.writemask = vkd3d_write_mask_from_component_count(component_count); record_allocation(ctx, allocator, allocator->reg_count, @@ -5692,6 +5694,7 @@ static struct hlsl_reg allocate_register_with_masks(struct hlsl_ctx *ctx, record_allocation(ctx, allocator, reg_idx, reg_writemask, first_write, last_read, mode, vip); + ret.type = VKD3DSPR_TEMP; ret.id = reg_idx; ret.allocation_size = 1; ret.writemask = writemask; @@ -5737,6 +5740,7 @@ static struct hlsl_reg allocate_range(struct hlsl_ctx *ctx, struct register_allo record_allocation(ctx, allocator, reg_idx + (reg_size / 4), (1u << (reg_size % 4)) - 1, first_write, last_read, mode, vip); + ret.type = VKD3DSPR_TEMP; ret.id = reg_idx; ret.allocation_size = align(reg_size, 4) / 4; ret.allocated = true; @@ -5757,20 +5761,30 @@ static struct hlsl_reg allocate_numeric_registers_for_type(struct hlsl_ctx *ctx, return allocate_range(ctx, allocator, first_write, last_read, reg_size, 0, false); } -static const char *debug_register(char class, struct hlsl_reg reg, const struct hlsl_type *type) +static const char *debug_register(struct hlsl_reg reg, const struct hlsl_type *type) { static const char writemask_offset[] = {'w','x','y','z'}; unsigned int reg_size = type->reg_size[HLSL_REGSET_NUMERIC]; + const char *class = "r"; + + if (reg.type == VKD3DSPR_CONST) + class = "c"; + else if (reg.type == VKD3DSPR_INPUT) + class = "v"; + else if (reg.type == VKD3DSPR_OUTPUT) + class = "o"; + else if (reg.type == VKD3DSPR_SSA) + class = "sr"; if (reg_size > 4 && !hlsl_type_is_patch_array(type)) { if (reg_size & 3) - return vkd3d_dbg_sprintf("%c%u-%c%u.%c", class, reg.id, class, reg.id + (reg_size / 4), + return vkd3d_dbg_sprintf("%s%u-%s%u.%c", class, reg.id, class, reg.id + (reg_size / 4), writemask_offset[reg_size & 3]); - return vkd3d_dbg_sprintf("%c%u-%c%u", class, reg.id, class, reg.id + (reg_size / 4) - 1); + return vkd3d_dbg_sprintf("%s%u-%s%u", class, reg.id, class, reg.id + (reg_size / 4) - 1); } - return vkd3d_dbg_sprintf("%c%u%s", class, reg.id, debug_hlsl_writemask(reg.writemask)); + return vkd3d_dbg_sprintf("%s%u%s", class, reg.id, debug_hlsl_writemask(reg.writemask)); } static bool track_object_components_sampler_dim(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) @@ -5910,11 +5924,12 @@ static void allocate_instr_temp_register(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct register_allocator *allocator) { unsigned int reg_writemask = 0, dst_writemask = 0; + bool is_per_component = false; if (instr->reg.allocated || !instr->last_read) return; - if (instr->type == HLSL_IR_EXPR) + if (instr->type == HLSL_IR_EXPR && ctx->profile->major_version < 4) { switch (hlsl_ir_expr(instr)->op) { @@ -5928,20 +5943,42 @@ static void allocate_instr_temp_register(struct hlsl_ctx *ctx, reg_writemask = ctx->profile->major_version < 3 ? (1 << 3) - 1 : VKD3DSP_WRITEMASK_1; break; + case HLSL_OP1_EXP2: + case HLSL_OP1_LOG2: + case HLSL_OP1_RCP: + case HLSL_OP1_RSQ: + /* These ops can only be written one component at a time in sm1, + * so it'll take more than one instruction to fill the variable + * and thus we can't use an SSA. + * FIXME: We should probably handle this by splitting at the vsir + * level instead. */ + is_per_component = true; + break; + default: break; } } + VKD3D_ASSERT(instr->data_type->class <= HLSL_CLASS_VECTOR); + if (reg_writemask) - instr->reg = allocate_register_with_masks(ctx, allocator, instr->index, - instr->last_read, reg_writemask, dst_writemask, 0, false); - else + instr->reg = allocate_register_with_masks(ctx, allocator, + instr->index, instr->last_read, reg_writemask, dst_writemask, 0, false); + else if (is_per_component) instr->reg = allocate_numeric_registers_for_type(ctx, allocator, instr->index, instr->last_read, instr->data_type); + else + { + instr->reg.writemask = vkd3d_write_mask_from_component_count(instr->data_type->e.numeric.dimx); + instr->reg.allocation_size = 1; + instr->reg.allocated = true; + instr->reg.type = VKD3DSPR_SSA; + instr->reg.id = ctx->ssa_count++; + } TRACE("Allocated anonymous expression @%u to %s (liveness %u-%u).\n", instr->index, - debug_register('r', instr->reg, instr->data_type), instr->index, instr->last_read); + debug_register(instr->reg, instr->data_type), instr->index, instr->last_read); } static void allocate_variable_temp_register(struct hlsl_ctx *ctx, @@ -5966,8 +6003,8 @@ static void allocate_variable_temp_register(struct hlsl_ctx *ctx, var->regs[HLSL_REGSET_NUMERIC] = allocate_numeric_registers_for_type(ctx, allocator, var->first_write, var->last_read, var->data_type); - TRACE("Allocated %s to %s (liveness %u-%u).\n", var->name, debug_register('r', - var->regs[HLSL_REGSET_NUMERIC], var->data_type), var->first_write, var->last_read); + TRACE("Allocated %s to %s (liveness %u-%u).\n", var->name, + debug_register(var->regs[HLSL_REGSET_NUMERIC], var->data_type), var->first_write, var->last_read); } } } @@ -6051,6 +6088,7 @@ static bool find_constant(struct hlsl_ctx *ctx, const float *f, unsigned int cou if ((reg->allocated_mask & writemask) == writemask && !memcmp(f, ®->value.f[j], count * sizeof(float))) { + ret->type = VKD3DSPR_CONST; ret->id = reg->index; ret->allocation_size = 1; ret->writemask = writemask; @@ -6144,12 +6182,13 @@ static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, if (find_constant(ctx, f, type->e.numeric.dimx, &constant->reg)) { TRACE("Reusing already allocated constant %s for @%u.\n", - debug_register('c', constant->reg, type), instr->index); + debug_register(constant->reg, type), instr->index); break; } constant->reg = allocate_numeric_registers_for_type(ctx, allocator, 1, UINT_MAX, type); - TRACE("Allocated constant @%u to %s.\n", instr->index, debug_register('c', constant->reg, type)); + constant->reg.type = VKD3DSPR_CONST; + TRACE("Allocated constant @%u to %s.\n", instr->index, debug_register(constant->reg, type)); for (unsigned int x = 0, i = 0; x < 4; ++x) { @@ -6246,14 +6285,16 @@ static void allocate_sincos_const_registers(struct hlsl_ctx *ctx, struct hlsl_bl type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, 4); ctx->d3dsincosconst1 = allocate_numeric_registers_for_type(ctx, allocator, 1, UINT_MAX, type); - TRACE("Allocated D3DSINCOSCONST1 to %s.\n", debug_register('c', ctx->d3dsincosconst1, type)); + ctx->d3dsincosconst1.type = VKD3DSPR_CONST; + TRACE("Allocated D3DSINCOSCONST1 to %s.\n", debug_register(ctx->d3dsincosconst1, type)); record_constant(ctx, ctx->d3dsincosconst1.id * 4 + 0, -1.55009923e-06f, &instr->loc); record_constant(ctx, ctx->d3dsincosconst1.id * 4 + 1, -2.17013894e-05f, &instr->loc); record_constant(ctx, ctx->d3dsincosconst1.id * 4 + 2, 2.60416674e-03f, &instr->loc); record_constant(ctx, ctx->d3dsincosconst1.id * 4 + 3, 2.60416680e-04f, &instr->loc); ctx->d3dsincosconst2 = allocate_numeric_registers_for_type(ctx, allocator, 1, UINT_MAX, type); - TRACE("Allocated D3DSINCOSCONST2 to %s.\n", debug_register('c', ctx->d3dsincosconst2, type)); + ctx->d3dsincosconst2.type = VKD3DSPR_CONST; + TRACE("Allocated D3DSINCOSCONST2 to %s.\n", debug_register(ctx->d3dsincosconst2, type)); record_constant(ctx, ctx->d3dsincosconst2.id * 4 + 0, -2.08333340e-02f, &instr->loc); record_constant(ctx, ctx->d3dsincosconst2.id * 4 + 1, -1.25000000e-01f, &instr->loc); record_constant(ctx, ctx->d3dsincosconst2.id * 4 + 2, 1.00000000e+00f, &instr->loc); @@ -6301,12 +6342,13 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi record_allocation(ctx, &allocator, reg_idx + i, VKD3DSP_WRITEMASK_ALL, 1, UINT_MAX, 0, false); } + var->regs[HLSL_REGSET_NUMERIC].type = VKD3DSPR_CONST; var->regs[HLSL_REGSET_NUMERIC].id = reg_idx; var->regs[HLSL_REGSET_NUMERIC].allocation_size = reg_size / 4; var->regs[HLSL_REGSET_NUMERIC].writemask = VKD3DSP_WRITEMASK_ALL; var->regs[HLSL_REGSET_NUMERIC].allocated = true; TRACE("Allocated reserved %s to %s.\n", var->name, - debug_register('c', var->regs[HLSL_REGSET_NUMERIC], var->data_type)); + debug_register(var->regs[HLSL_REGSET_NUMERIC], var->data_type)); } } @@ -6322,8 +6364,9 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi if (!var->regs[HLSL_REGSET_NUMERIC].allocated) { var->regs[HLSL_REGSET_NUMERIC] = allocate_range(ctx, &allocator, 1, UINT_MAX, alloc_size, 0, false); + var->regs[HLSL_REGSET_NUMERIC].type = VKD3DSPR_CONST; TRACE("Allocated %s to %s.\n", var->name, - debug_register('c', var->regs[HLSL_REGSET_NUMERIC], var->data_type)); + debug_register(var->regs[HLSL_REGSET_NUMERIC], var->data_type)); } } @@ -6519,9 +6562,10 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var var->regs[HLSL_REGSET_NUMERIC] = allocate_register(ctx, allocator, 1, UINT_MAX, reg_size, component_count, mode, var->force_align, vip_allocation); + var->regs[HLSL_REGSET_NUMERIC].type = output ? VKD3DSPR_OUTPUT : VKD3DSPR_INPUT; - TRACE("Allocated %s to %s (mode %d).\n", var->name, debug_register(output ? 'o' : 'v', - var->regs[HLSL_REGSET_NUMERIC], var->data_type), mode); + TRACE("Allocated %s to %s (mode %d).\n", var->name, + debug_register(var->regs[HLSL_REGSET_NUMERIC], var->data_type), mode); } } @@ -7733,8 +7777,6 @@ static void validate_and_record_stream_outputs(struct hlsl_ctx *ctx) reported_invalid_index = true; } } - - /* TODO: check that maxvertexcount * outputdatasize <= 1024. */ } static void validate_max_output_size(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, @@ -8376,7 +8418,7 @@ static void vsir_src_from_hlsl_node(struct vkd3d_shader_src_param *src, } else { - vsir_register_init(&src->reg, VKD3DSPR_TEMP, vsir_data_type_from_hlsl_instruction(ctx, instr), 1); + vsir_register_init(&src->reg, instr->reg.type, vsir_data_type_from_hlsl_instruction(ctx, instr), 1); src->reg.idx[0].offset = instr->reg.id; src->reg.dimension = VSIR_DIMENSION_VEC4; src->swizzle = generate_vsir_get_src_swizzle(instr->reg.writemask, map_writemask); @@ -8657,7 +8699,7 @@ static void vsir_dst_from_hlsl_node(struct vkd3d_shader_dst_param *dst, struct hlsl_ctx *ctx, const struct hlsl_ir_node *instr) { VKD3D_ASSERT(instr->reg.allocated); - vsir_dst_param_init(dst, VKD3DSPR_TEMP, vsir_data_type_from_hlsl_instruction(ctx, instr), 1); + vsir_dst_param_init(dst, instr->reg.type, vsir_data_type_from_hlsl_instruction(ctx, instr), 1); dst->reg.idx[0].offset = instr->reg.id; dst->reg.dimension = VSIR_DIMENSION_VEC4; dst->write_mask = instr->reg.writemask; @@ -8767,13 +8809,13 @@ static void sm1_generate_vsir_instr_expr_per_component_instr_op(struct hlsl_ctx return; dst_param = &ins->dst[0]; - vsir_register_init(&dst_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); + vsir_register_init(&dst_param->reg, instr->reg.type, VKD3D_DATA_FLOAT, 1); dst_param->reg.idx[0].offset = instr->reg.id; dst_param->reg.dimension = VSIR_DIMENSION_VEC4; dst_param->write_mask = 1u << i; src_param = &ins->src[0]; - vsir_register_init(&src_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); + vsir_register_init(&src_param->reg, operand->reg.type, VKD3D_DATA_FLOAT, 1); src_param->reg.idx[0].offset = operand->reg.id; src_param->reg.dimension = VSIR_DIMENSION_VEC4; c = vsir_swizzle_get_component(src_swizzle, i); @@ -9372,7 +9414,7 @@ static void generate_vsir_instr_swizzle(struct hlsl_ctx *ctx, src_param = &ins->src[0]; VKD3D_ASSERT(val->type != HLSL_IR_CONSTANT); - vsir_register_init(&src_param->reg, VKD3DSPR_TEMP, vsir_data_type_from_hlsl_instruction(ctx, val), 1); + vsir_register_init(&src_param->reg, val->reg.type, vsir_data_type_from_hlsl_instruction(ctx, val), 1); src_param->reg.idx[0].offset = val->reg.id; src_param->reg.dimension = VSIR_DIMENSION_VEC4; src_param->swizzle = swizzle; @@ -9528,6 +9570,7 @@ static void sm1_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl return; } + program->ssa_count = 0; program->temp_count = allocate_temp_registers(ctx, entry_func); if (ctx->result) return; @@ -9540,6 +9583,8 @@ static void sm1_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl list_move_head(&entry_func->body.instrs, &block.instrs); sm1_generate_vsir_block(ctx, &entry_func->body, program); + + program->ssa_count = ctx->ssa_count; } D3DXPARAMETER_CLASS hlsl_sm1_class(const struct hlsl_type *type) @@ -12323,6 +12368,8 @@ static void sm4_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl } } + program->ssa_count = 0; + if (version.type == VKD3D_SHADER_TYPE_HULL) generate_vsir_add_program_instruction(ctx, program, &ctx->patch_constant_func->loc, VKD3DSIH_HS_CONTROL_POINT_PHASE, 0, 0); @@ -12336,6 +12383,8 @@ static void sm4_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl generate_vsir_scan_required_features(ctx, program); generate_vsir_scan_global_flags(ctx, program, func); + + program->ssa_count = ctx->ssa_count; } /* For some reason, for matrices, values from default value initializers end @@ -13523,6 +13572,19 @@ static void process_entry_function(struct hlsl_ctx *ctx, lower_ir(ctx, lower_matrix_swizzles, body); lower_ir(ctx, lower_index_loads, body); + if (entry_func->return_var) + { + if (profile->type == VKD3D_SHADER_TYPE_GEOMETRY) + hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_INCOMPATIBLE_PROFILE, + "Geometry shaders cannot return values."); + else if (entry_func->return_var->data_type->class != HLSL_CLASS_STRUCT + && !entry_func->return_var->semantic.name) + hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_SEMANTIC, + "Entry point \"%s\" is missing a return value semantic.", entry_func->func->name); + + append_output_var_copy(ctx, entry_func, entry_func->return_var); + } + for (i = 0; i < entry_func->parameters.count; ++i) { var = entry_func->parameters.vars[i]; @@ -13627,18 +13689,9 @@ static void process_entry_function(struct hlsl_ctx *ctx, } } } + if (entry_func->return_var) { - if (profile->type == VKD3D_SHADER_TYPE_GEOMETRY) - hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_INCOMPATIBLE_PROFILE, - "Geometry shaders cannot return values."); - else if (entry_func->return_var->data_type->class != HLSL_CLASS_STRUCT - && !entry_func->return_var->semantic.name) - hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_SEMANTIC, - "Entry point \"%s\" is missing a return value semantic.", entry_func->func->name); - - append_output_var_copy(ctx, entry_func, entry_func->return_var); - if (profile->type == VKD3D_SHADER_TYPE_HULL && !ctx->is_patch_constant_func) ctx->output_control_point_type = entry_func->return_var->data_type; } @@ -13882,6 +13935,8 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry return ctx->result; } + vsir_program_trace(&program); + result = d3dbc_compile(&program, config_flags, NULL, &ctab, out, ctx->message_context); vsir_program_cleanup(&program); vkd3d_shader_free_shader_code(&ctab); @@ -13907,6 +13962,8 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry return ctx->result; } + vsir_program_trace(&program); + result = tpf_compile(&program, config_flags, &rdef, out, ctx->message_context); vsir_program_cleanup(&program); vkd3d_shader_free_shader_code(&rdef); diff --git a/libs/vkd3d/libs/vkd3d-shader/ir.c b/libs/vkd3d/libs/vkd3d-shader/ir.c index 1429c3a8778..8489d0b5ecb 100644 --- a/libs/vkd3d/libs/vkd3d-shader/ir.c +++ b/libs/vkd3d/libs/vkd3d-shader/ir.c @@ -503,6 +503,53 @@ bool vsir_signature_find_sysval(const struct shader_signature *signature, return false; } +const char *debug_vsir_writemask(unsigned int writemask) +{ + static const char components[] = {'x', 'y', 'z', 'w'}; + char string[5]; + unsigned int i = 0, pos = 0; + + VKD3D_ASSERT(!(writemask & ~VKD3DSP_WRITEMASK_ALL)); + + while (writemask) + { + if (writemask & 1) + string[pos++] = components[i]; + writemask >>= 1; + i++; + } + string[pos] = '\0'; + return vkd3d_dbg_sprintf(".%s", string); +} + +static unsigned int vsir_combine_write_masks(unsigned int first, unsigned int second) +{ + unsigned int ret = 0, j = 0; + + for (unsigned int i = 0; i < VKD3D_VEC4_SIZE; ++i) + { + if (first & (1u << i)) + { + if (second & (1u << j++)) + ret |= (1u << i); + } + } + + return ret; +} + +static uint32_t vsir_combine_swizzles(uint32_t first, uint32_t second) +{ + uint32_t ret = 0; + + for (unsigned int i = 0; i < VKD3D_VEC4_SIZE; ++i) + { + unsigned int s = vsir_swizzle_get_component(second, i); + vsir_swizzle_set_component(&ret, i, vsir_swizzle_get_component(first, s)); + } + return ret; +} + void vsir_register_init(struct vkd3d_shader_register *reg, enum vkd3d_shader_register_type reg_type, enum vkd3d_data_type data_type, unsigned int idx_count) { @@ -7725,6 +7772,892 @@ static enum vkd3d_result vsir_program_insert_vertex_fog(struct vsir_program *pro return VKD3D_OK; } + +/* Distinguishes between instruction sources which are masked, where the used + * components of the source are determined by the write mask, and sources which + * are not masked, where the used components are pre-defined. + * + * E.g. "add r0.yz, r1.xyzw, r2.xyzw" uses the .yz components of r1 and r2, and + * therefore those sources are considered "masked", but + * "dp3 r0.y, r1.xyzw, r2.xyzw" uses the .xyz components. */ +static bool vsir_src_is_masked(enum vkd3d_shader_opcode opcode, unsigned int src_idx) +{ + switch (opcode) + { + case VKD3DSIH_ABS: + case VKD3DSIH_ACOS: + case VKD3DSIH_ADD: + case VKD3DSIH_AND: + case VKD3DSIH_ASIN: + case VKD3DSIH_ATAN: + case VKD3DSIH_BFI: + case VKD3DSIH_BFREV: + case VKD3DSIH_CMP: + case VKD3DSIH_CND: + case VKD3DSIH_COS: + case VKD3DSIH_COUNTBITS: + case VKD3DSIH_DADD: /* NB: These are masked, but the mask is double-sized. */ + case VKD3DSIH_DDIV: + case VKD3DSIH_DFMA: + case VKD3DSIH_DIV: + case VKD3DSIH_DMAX: + case VKD3DSIH_DMIN: + case VKD3DSIH_DMOV: + case VKD3DSIH_DMOVC: + case VKD3DSIH_DMUL: + case VKD3DSIH_DRCP: + case VKD3DSIH_DSX: + case VKD3DSIH_DSX_COARSE: + case VKD3DSIH_DSX_FINE: + case VKD3DSIH_DSY: + case VKD3DSIH_DSY_COARSE: + case VKD3DSIH_DSY_FINE: + case VKD3DSIH_EQO: + case VKD3DSIH_EQU: + case VKD3DSIH_EXP: + case VKD3DSIH_EXPP: + case VKD3DSIH_F16TOF32: + case VKD3DSIH_F32TOF16: + case VKD3DSIH_FIRSTBIT_HI: + case VKD3DSIH_FIRSTBIT_LO: + case VKD3DSIH_FIRSTBIT_SHI: + case VKD3DSIH_FRC: + case VKD3DSIH_FREM: + case VKD3DSIH_FTOD: + case VKD3DSIH_FTOI: + case VKD3DSIH_FTOU: + case VKD3DSIH_GEO: + case VKD3DSIH_GEU: + case VKD3DSIH_HCOS: + case VKD3DSIH_HSIN: + case VKD3DSIH_HTAN: + case VKD3DSIH_IADD: + case VKD3DSIH_IBFE: + case VKD3DSIH_IDIV: + case VKD3DSIH_IEQ: + case VKD3DSIH_IGE: + case VKD3DSIH_ILT: + case VKD3DSIH_IMAD: + case VKD3DSIH_IMAX: + case VKD3DSIH_IMIN: + case VKD3DSIH_IMUL: + case VKD3DSIH_INE: + case VKD3DSIH_INEG: + case VKD3DSIH_ISFINITE: + case VKD3DSIH_ISHL: + case VKD3DSIH_ISHR: + case VKD3DSIH_ISINF: + case VKD3DSIH_ISNAN: + case VKD3DSIH_ITOD: + case VKD3DSIH_ITOF: + case VKD3DSIH_ITOI: + case VKD3DSIH_LOG: + case VKD3DSIH_LOGP: + case VKD3DSIH_LRP: + case VKD3DSIH_LTO: + case VKD3DSIH_LTU: + case VKD3DSIH_MAD: + case VKD3DSIH_MAX: + case VKD3DSIH_MIN: + case VKD3DSIH_MOV: + case VKD3DSIH_MOVA: + case VKD3DSIH_MOVC: + case VKD3DSIH_MSAD: /* FIXME: Is this correct? */ + case VKD3DSIH_MUL: + case VKD3DSIH_NEO: + case VKD3DSIH_NEU: + case VKD3DSIH_NOT: + case VKD3DSIH_OR: + case VKD3DSIH_ORD: + case VKD3DSIH_PHI: + case VKD3DSIH_POW: + case VKD3DSIH_QUAD_READ_ACROSS_D: + case VKD3DSIH_QUAD_READ_ACROSS_X: + case VKD3DSIH_QUAD_READ_ACROSS_Y: + case VKD3DSIH_RCP: + case VKD3DSIH_ROUND_NE: + case VKD3DSIH_ROUND_NI: + case VKD3DSIH_ROUND_PI: + case VKD3DSIH_ROUND_Z: + case VKD3DSIH_RSQ: + case VKD3DSIH_SETP: + case VKD3DSIH_SGE: + case VKD3DSIH_SGN: + case VKD3DSIH_SIN: + case VKD3DSIH_SINCOS: /* FIXME: Only for sm4. */ + case VKD3DSIH_SLT: + case VKD3DSIH_SQRT: + case VKD3DSIH_SUB: + case VKD3DSIH_SWAPC: + case VKD3DSIH_TAN: + case VKD3DSIH_UBFE: + case VKD3DSIH_UDIV: + case VKD3DSIH_UGE: + case VKD3DSIH_ULT: + case VKD3DSIH_UMAX: + case VKD3DSIH_UMIN: + case VKD3DSIH_UMUL: + case VKD3DSIH_UNO: + case VKD3DSIH_USHR: + case VKD3DSIH_UTOD: + case VKD3DSIH_UTOF: + case VKD3DSIH_UTOU: + case VKD3DSIH_WAVE_ACTIVE_ALL_EQUAL: + case VKD3DSIH_WAVE_ACTIVE_BIT_AND: + case VKD3DSIH_WAVE_ACTIVE_BIT_OR: + case VKD3DSIH_WAVE_ACTIVE_BIT_XOR: + case VKD3DSIH_WAVE_ALL_TRUE: + case VKD3DSIH_WAVE_ANY_TRUE: + case VKD3DSIH_WAVE_OP_ADD: + case VKD3DSIH_WAVE_OP_IMAX: + case VKD3DSIH_WAVE_OP_IMIN: + case VKD3DSIH_WAVE_OP_MAX: + case VKD3DSIH_WAVE_OP_MIN: + case VKD3DSIH_WAVE_OP_MUL: + case VKD3DSIH_WAVE_OP_UMAX: + case VKD3DSIH_WAVE_OP_UMIN: + case VKD3DSIH_WAVE_READ_LANE_FIRST: + case VKD3DSIH_XOR: + return true; + + /* Atomics can't have a writemask. */ + case VKD3DSIH_ATOMIC_AND: + case VKD3DSIH_ATOMIC_CMP_STORE: + case VKD3DSIH_ATOMIC_IADD: + case VKD3DSIH_ATOMIC_IMAX: + case VKD3DSIH_ATOMIC_IMIN: + case VKD3DSIH_ATOMIC_OR: + case VKD3DSIH_ATOMIC_UMAX: + case VKD3DSIH_ATOMIC_UMIN: + case VKD3DSIH_ATOMIC_XOR: + case VKD3DSIH_BEM: + case VKD3DSIH_BRANCH: + case VKD3DSIH_BREAK: + case VKD3DSIH_BREAKC: + case VKD3DSIH_BREAKP: + case VKD3DSIH_BUFINFO: + case VKD3DSIH_CALL: + case VKD3DSIH_CALLNZ: + case VKD3DSIH_CASE: + case VKD3DSIH_CHECK_ACCESS_FULLY_MAPPED: /* FIXME: Is this correct? */ + case VKD3DSIH_CONTINUE: + case VKD3DSIH_CONTINUEP: + case VKD3DSIH_CRS: + case VKD3DSIH_CUT: + case VKD3DSIH_CUT_STREAM: + case VKD3DSIH_DCL: + case VKD3DSIH_DCL_CONSTANT_BUFFER: + case VKD3DSIH_DCL_FUNCTION_BODY: + case VKD3DSIH_DCL_FUNCTION_TABLE: + case VKD3DSIH_DCL_GLOBAL_FLAGS: + case VKD3DSIH_DCL_GS_INSTANCES: + case VKD3DSIH_DCL_HS_FORK_PHASE_INSTANCE_COUNT: + case VKD3DSIH_DCL_HS_JOIN_PHASE_INSTANCE_COUNT: + case VKD3DSIH_DCL_HS_MAX_TESSFACTOR: + case VKD3DSIH_DCL_IMMEDIATE_CONSTANT_BUFFER: + case VKD3DSIH_DCL_INDEXABLE_TEMP: + case VKD3DSIH_DCL_INDEX_RANGE: + case VKD3DSIH_DCL_INPUT: + case VKD3DSIH_DCL_INPUT_CONTROL_POINT_COUNT: + case VKD3DSIH_DCL_INPUT_PRIMITIVE: + case VKD3DSIH_DCL_INPUT_PS: + case VKD3DSIH_DCL_INPUT_PS_SGV: + case VKD3DSIH_DCL_INPUT_PS_SIV: + case VKD3DSIH_DCL_INPUT_SGV: + case VKD3DSIH_DCL_INPUT_SIV: + case VKD3DSIH_DCL_INTERFACE: + case VKD3DSIH_DCL_OUTPUT: + case VKD3DSIH_DCL_OUTPUT_CONTROL_POINT_COUNT: + case VKD3DSIH_DCL_OUTPUT_SGV: + case VKD3DSIH_DCL_OUTPUT_SIV: + case VKD3DSIH_DCL_OUTPUT_TOPOLOGY: + case VKD3DSIH_DCL_RESOURCE_RAW: + case VKD3DSIH_DCL_RESOURCE_STRUCTURED: + case VKD3DSIH_DCL_SAMPLER: + case VKD3DSIH_DCL_STREAM: + case VKD3DSIH_DCL_TEMPS: + case VKD3DSIH_DCL_TESSELLATOR_DOMAIN: + case VKD3DSIH_DCL_TESSELLATOR_OUTPUT_PRIMITIVE: + case VKD3DSIH_DCL_TESSELLATOR_PARTITIONING: + case VKD3DSIH_DCL_TGSM_RAW: + case VKD3DSIH_DCL_TGSM_STRUCTURED: + case VKD3DSIH_DCL_THREAD_GROUP: + case VKD3DSIH_DCL_UAV_RAW: + case VKD3DSIH_DCL_UAV_STRUCTURED: + case VKD3DSIH_DCL_UAV_TYPED: + case VKD3DSIH_DCL_VERTICES_OUT: + case VKD3DSIH_DEF: + case VKD3DSIH_DEFAULT: + case VKD3DSIH_DEFB: + case VKD3DSIH_DEFI: + case VKD3DSIH_DEQO: + case VKD3DSIH_DGEO: + case VKD3DSIH_DISCARD: + case VKD3DSIH_DLT: + case VKD3DSIH_DNE: + case VKD3DSIH_DP2: + case VKD3DSIH_DP2ADD: + case VKD3DSIH_DP3: + case VKD3DSIH_DP4: + case VKD3DSIH_DST: + case VKD3DSIH_DTOF: + case VKD3DSIH_DTOI: + case VKD3DSIH_DTOU: + case VKD3DSIH_ELSE: + case VKD3DSIH_EMIT: + case VKD3DSIH_EMIT_STREAM: + case VKD3DSIH_ENDIF: + case VKD3DSIH_ENDLOOP: + case VKD3DSIH_ENDREP: + case VKD3DSIH_ENDSWITCH: + case VKD3DSIH_FCALL: + case VKD3DSIH_HS_CONTROL_POINT_PHASE: + case VKD3DSIH_HS_DECLS: + case VKD3DSIH_HS_FORK_PHASE: + case VKD3DSIH_HS_JOIN_PHASE: + case VKD3DSIH_IF: + case VKD3DSIH_IFC: + /* It's unclear if any mapping is done for the source value. + * Does it require replicate swizzle? */ + case VKD3DSIH_IMM_ATOMIC_ALLOC: + case VKD3DSIH_IMM_ATOMIC_AND: + case VKD3DSIH_IMM_ATOMIC_CMP_EXCH: + case VKD3DSIH_IMM_ATOMIC_CONSUME: + case VKD3DSIH_IMM_ATOMIC_EXCH: + case VKD3DSIH_IMM_ATOMIC_IADD: + case VKD3DSIH_IMM_ATOMIC_IMAX: + case VKD3DSIH_IMM_ATOMIC_IMIN: + case VKD3DSIH_IMM_ATOMIC_OR: + case VKD3DSIH_IMM_ATOMIC_UMAX: + case VKD3DSIH_IMM_ATOMIC_UMIN: + case VKD3DSIH_IMM_ATOMIC_XOR: + case VKD3DSIH_LABEL: + case VKD3DSIH_LOOP: + case VKD3DSIH_LIT: + case VKD3DSIH_M3x2: + case VKD3DSIH_M3x3: + case VKD3DSIH_M3x4: + case VKD3DSIH_M4x3: + case VKD3DSIH_M4x4: + case VKD3DSIH_NOP: + /* NRM writemask must be .xyz or .xyzw. */ + case VKD3DSIH_NRM: + case VKD3DSIH_PHASE: + case VKD3DSIH_REP: + case VKD3DSIH_RET: + case VKD3DSIH_RETP: + /* Store instructions always require a trivial writemask. */ + case VKD3DSIH_STORE_RAW: + case VKD3DSIH_STORE_STRUCTURED: + case VKD3DSIH_STORE_UAV_TYPED: + case VKD3DSIH_SWITCH: + case VKD3DSIH_SWITCH_MONOLITHIC: + case VKD3DSIH_SYNC: + case VKD3DSIH_TEX: + case VKD3DSIH_TEXBEM: + case VKD3DSIH_TEXBEML: + case VKD3DSIH_TEXCOORD: + case VKD3DSIH_TEXCRD: + case VKD3DSIH_TEXDEPTH: + case VKD3DSIH_TEXDP3: + case VKD3DSIH_TEXDP3TEX: + case VKD3DSIH_TEXKILL: + case VKD3DSIH_TEXLD: + case VKD3DSIH_TEXLDD: + case VKD3DSIH_TEXLDL: + case VKD3DSIH_TEXM3x2DEPTH: + case VKD3DSIH_TEXM3x2PAD: + case VKD3DSIH_TEXM3x2TEX: + case VKD3DSIH_TEXM3x3: + case VKD3DSIH_TEXM3x3DIFF: + case VKD3DSIH_TEXM3x3PAD: + case VKD3DSIH_TEXM3x3SPEC: + case VKD3DSIH_TEXM3x3TEX: + case VKD3DSIH_TEXM3x3VSPEC: + case VKD3DSIH_TEXREG2AR: + case VKD3DSIH_TEXREG2GB: + case VKD3DSIH_TEXREG2RGB: + case VKD3DSIH_WAVE_ACTIVE_BALLOT: + case VKD3DSIH_WAVE_ALL_BIT_COUNT: + case VKD3DSIH_WAVE_IS_FIRST_LANE: + case VKD3DSIH_WAVE_PREFIX_BIT_COUNT: + return false; + + case VKD3DSIH_QUAD_READ_LANE_AT: + case VKD3DSIH_WAVE_READ_LANE_AT: + return (src_idx == 0); + + /* sm4 resource instructions are an odd case, since they're not actually + * per-component. However, the "swizzle" placed on the resource allows + * arbitrary destination writemasks to be used. + * + * This means that for the purposes of the "remapping" done by + * temp_allocator_set_dst(), we can basically treat those sources as + * "mapped", altering them when we reassign the destination writemask. */ + + /* FIXME: The documentation seems to say that these instructions behave + * this way, but is it correct? + * (It's silent about EVAL_*, but presumably they behave the same way.) */ + case VKD3DSIH_EVAL_CENTROID: + case VKD3DSIH_EVAL_SAMPLE_INDEX: + case VKD3DSIH_SAMPLE_INFO: + case VKD3DSIH_SAMPLE_POS: + return (src_idx == 0); + case VKD3DSIH_GATHER4: + case VKD3DSIH_GATHER4_C: + case VKD3DSIH_GATHER4_C_S: + case VKD3DSIH_GATHER4_S: + case VKD3DSIH_LD: + case VKD3DSIH_LD2DMS: + case VKD3DSIH_LD2DMS_S: + case VKD3DSIH_LD_RAW: + case VKD3DSIH_LD_RAW_S: + case VKD3DSIH_LD_S: + case VKD3DSIH_LD_UAV_TYPED: + case VKD3DSIH_LD_UAV_TYPED_S: + case VKD3DSIH_LOD: + case VKD3DSIH_RESINFO: + case VKD3DSIH_SAMPLE: + case VKD3DSIH_SAMPLE_B: + case VKD3DSIH_SAMPLE_B_CL_S: + case VKD3DSIH_SAMPLE_C: + case VKD3DSIH_SAMPLE_CL_S: + case VKD3DSIH_SAMPLE_C_CL_S: + case VKD3DSIH_SAMPLE_C_LZ: + case VKD3DSIH_SAMPLE_C_LZ_S: + case VKD3DSIH_SAMPLE_GRAD: + case VKD3DSIH_SAMPLE_GRAD_CL_S: + case VKD3DSIH_SAMPLE_LOD: + case VKD3DSIH_SAMPLE_LOD_S: + return (src_idx == 1); + case VKD3DSIH_GATHER4_PO: + case VKD3DSIH_GATHER4_PO_C: + case VKD3DSIH_GATHER4_PO_C_S: + case VKD3DSIH_GATHER4_PO_S: + case VKD3DSIH_LD_STRUCTURED: + case VKD3DSIH_LD_STRUCTURED_S: + return (src_idx == 2); + + case VKD3DSIH_INVALID: + case VKD3DSIH_COUNT: + break; + } + + vkd3d_unreachable(); +} + +struct liveness_tracker +{ + struct liveness_tracker_reg + { + bool written; + bool fixed_mask; + uint8_t mask; + unsigned int first_write, last_access; + } *ssa_regs; +}; + +static void liveness_track_src(struct liveness_tracker *tracker, + struct vkd3d_shader_src_param *src, unsigned int index) +{ + for (unsigned int k = 0; k < src->reg.idx_count; ++k) + { + if (src->reg.idx[k].rel_addr) + liveness_track_src(tracker, src->reg.idx[k].rel_addr, index); + } + + if (src->reg.type == VKD3DSPR_SSA) + tracker->ssa_regs[src->reg.idx[0].offset].last_access = index; +} + +static void liveness_track_dst(struct liveness_tracker *tracker, struct vkd3d_shader_dst_param *dst, + unsigned int index, const struct vkd3d_shader_version *version, enum vkd3d_shader_opcode opcode) +{ + struct liveness_tracker_reg *reg; + + for (unsigned int k = 0; k < dst->reg.idx_count; ++k) + { + if (dst->reg.idx[k].rel_addr) + liveness_track_src(tracker, dst->reg.idx[k].rel_addr, index); + } + + if (dst->reg.type == VKD3DSPR_SSA) + reg = &tracker->ssa_regs[dst->reg.idx[0].offset]; + else + return; + + if (!reg->written) + reg->first_write = index; + reg->last_access = index; + reg->written = true; + reg->mask |= dst->write_mask; + + switch (opcode) + { + case VKD3DSIH_BEM: + case VKD3DSIH_CRS: + case VKD3DSIH_DST: + case VKD3DSIH_LIT: + case VKD3DSIH_M3x2: + case VKD3DSIH_M3x3: + case VKD3DSIH_M3x4: + case VKD3DSIH_M4x3: + case VKD3DSIH_M4x4: + case VKD3DSIH_NRM: + case VKD3DSIH_TEX: + case VKD3DSIH_TEXBEM: + case VKD3DSIH_TEXBEML: + case VKD3DSIH_TEXCOORD: + case VKD3DSIH_TEXCRD: + case VKD3DSIH_TEXDEPTH: + case VKD3DSIH_TEXDP3: + case VKD3DSIH_TEXDP3TEX: + case VKD3DSIH_TEXLD: + case VKD3DSIH_TEXLDD: + case VKD3DSIH_TEXLDL: + case VKD3DSIH_TEXM3x2DEPTH: + case VKD3DSIH_TEXM3x2PAD: + case VKD3DSIH_TEXM3x2TEX: + case VKD3DSIH_TEXM3x3: + case VKD3DSIH_TEXM3x3DIFF: + case VKD3DSIH_TEXM3x3PAD: + case VKD3DSIH_TEXM3x3SPEC: + case VKD3DSIH_TEXM3x3TEX: + case VKD3DSIH_TEXM3x3VSPEC: + case VKD3DSIH_TEXREG2AR: + case VKD3DSIH_TEXREG2GB: + case VKD3DSIH_TEXREG2RGB: + /* All of these instructions have fixed destinations—they can + * in some cases be masked, but the destination cannot be + * reallocated to a different set of components. */ + case VKD3DSIH_IDIV: + case VKD3DSIH_IMUL: + case VKD3DSIH_SWAPC: + case VKD3DSIH_UDIV: + case VKD3DSIH_UMUL: + /* These instructions don't have fixed destinations, but they have + * multiple destination and are per-component, meaning that the + * destination masks for each component have to match. + * This is a bit tricky to pull off, so for now we just force + * these to have a fixed mask as well. + * This assumes that the destination masks are equal to each other + * to begin with! */ + reg->fixed_mask = true; + break; + + case VKD3DSIH_SINCOS: + /* sm1 has a fixed destination like LIT, NRM. + * sm4 is two-component and masked, like IMUL. */ + if (version->major < 3) + { + /* We have the additional constraint here that sincos scratches + * whichever components of .xyz it doesn't write. We can achieve + * this by simply adding those components to reg->mask. */ + reg->mask |= 0x7; + } + reg->fixed_mask = true; + break; + + default: + break; + } +} + +static void liveness_tracker_cleanup(struct liveness_tracker *tracker) +{ + vkd3d_free(tracker->ssa_regs); +} + +static enum vkd3d_result track_liveness(struct vsir_program *program, struct liveness_tracker *tracker) +{ + struct liveness_tracker_reg *regs; + unsigned int loop_depth = 0; + unsigned int loop_start = 0; + + memset(tracker, 0, sizeof(*tracker)); + + if (!(regs = vkd3d_calloc(program->ssa_count, sizeof(*regs)))) + return VKD3D_ERROR_OUT_OF_MEMORY; + tracker->ssa_regs = regs; + + for (unsigned int i = 0; i < program->instructions.count; ++i) + { + const struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; + + if (ins->opcode == VKD3DSIH_LOOP || ins->opcode == VKD3DSIH_REP) + { + if (!loop_depth++) + loop_start = i; + } + else if (ins->opcode == VKD3DSIH_ENDLOOP || ins->opcode == VKD3DSIH_ENDREP) + { + if (!--loop_depth) + { + /* Go through the allocator, find anything that was touched + * during the loop, and extend its liveness to the whole range + * of the loop. + * This isn't very sophisticated (e.g. we could try to avoid + * this for registers first written inside a loop body and only + * ever read inside one), but many of the cases that matter are + * affected by other optimizations such as copy propagation + * anyway. + * + * This is overkill for SSA registers. If an SSA register is + * written in loop L and last read in L, we don't need to touch + * its liveness. If it's last read in an inferior loop of L, we + * only need to extend its last-read to the end of L. (And it + * should be illegal for an SSA value to be read in a block + * containing L.) + * We don't try to perform this optimization yet, in the name of + * maximal simplicity, and also because this code is intended to + * be extended to non-SSA values. */ + for (unsigned int j = 0; j < program->ssa_count; ++j) + { + struct liveness_tracker_reg *reg = &tracker->ssa_regs[j]; + + if (reg->first_write > loop_start) + reg->first_write = loop_start; + if (reg->last_access < i) + reg->last_access = i; + } + } + } + + for (unsigned int j = 0; j < ins->dst_count; ++j) + liveness_track_dst(tracker, &ins->dst[j], i, &program->shader_version, ins->opcode); + for (unsigned int j = 0; j < ins->src_count; ++j) + liveness_track_src(tracker, &ins->src[j], i); + } + + return VKD3D_OK; +} + +struct temp_allocator +{ + struct vkd3d_shader_message_context *message_context; + struct temp_allocator_reg + { + uint8_t allocated_mask; + uint32_t temp_id; + } *ssa_regs; + size_t allocated_ssa_count; + enum vkd3d_result result; +}; + +static uint8_t get_available_writemask(const struct temp_allocator *allocator, + struct liveness_tracker *tracker, unsigned int first_write, unsigned int last_access, uint32_t temp_id) +{ + uint8_t writemask = VKD3DSP_WRITEMASK_ALL; + + for (size_t i = 0; i < allocator->allocated_ssa_count; ++i) + { + const struct temp_allocator_reg *reg = &allocator->ssa_regs[i]; + const struct liveness_tracker_reg *liveness_reg = &tracker->ssa_regs[i]; + + /* We do not overlap if first write == last read: + * this is the case where we are allocating the result of that + * expression, e.g. "add r0, r0, r1". */ + + if (reg->temp_id == temp_id + && first_write < liveness_reg->last_access + && last_access > liveness_reg->first_write) + writemask &= ~reg->allocated_mask; + + if (!writemask) + return writemask; + } + + return writemask; +} + +static void temp_allocator_allocate(struct temp_allocator *allocator, struct liveness_tracker *tracker, + struct temp_allocator_reg *reg, const struct liveness_tracker_reg *liveness_reg, uint32_t base_id) +{ + if (!liveness_reg->written) + return; + + for (uint32_t id = base_id;; ++id) + { + uint8_t available_mask = get_available_writemask(allocator, tracker, + liveness_reg->first_write, liveness_reg->last_access, id); + + if (liveness_reg->fixed_mask) + { + if ((available_mask & liveness_reg->mask) == liveness_reg->mask) + { + reg->temp_id = id; + reg->allocated_mask = liveness_reg->mask; + return; + } + } + else + { + /* For SSA values the mask is always zero-based and contiguous. + * We don't correctly handle cases where it's not, currently. */ + VKD3D_ASSERT((liveness_reg->mask | (liveness_reg->mask - 1)) == liveness_reg->mask); + + if (vkd3d_popcount(available_mask) >= vkd3d_popcount(liveness_reg->mask)) + { + reg->temp_id = id; + reg->allocated_mask = vsir_combine_write_masks(available_mask, liveness_reg->mask); + return; + } + } + } +} + +static void temp_allocator_set_src(struct temp_allocator *allocator, struct vkd3d_shader_src_param *src) +{ + struct temp_allocator_reg *reg; + + for (unsigned int k = 0; k < src->reg.idx_count; ++k) + { + if (src->reg.idx[k].rel_addr) + temp_allocator_set_src(allocator, src->reg.idx[k].rel_addr); + } + + if (src->reg.type == VKD3DSPR_SSA) + reg = &allocator->ssa_regs[src->reg.idx[0].offset]; + else + return; + + src->reg.type = VKD3DSPR_TEMP; + src->reg.idx[0].offset = reg->temp_id; + src->swizzle = vsir_combine_swizzles(vsir_swizzle_from_writemask(reg->allocated_mask), src->swizzle); +} + +static uint32_t vsir_map_swizzle(uint32_t swizzle, unsigned int writemask) +{ + unsigned int src_component = 0; + uint32_t ret = 0; + + /* Leave replicate swizzles alone; some instructions need them. */ + if (swizzle == VKD3D_SHADER_SWIZZLE(X, X, X, X) + || swizzle == VKD3D_SHADER_SWIZZLE(Y, Y, Y, Y) + || swizzle == VKD3D_SHADER_SWIZZLE(Z, Z, Z, Z) + || swizzle == VKD3D_SHADER_SWIZZLE(W, W, W, W)) + return swizzle; + + for (unsigned int dst_component = 0; dst_component < VKD3D_VEC4_SIZE; ++dst_component) + { + if (writemask & (1u << dst_component)) + vsir_swizzle_set_component(&ret, dst_component, vsir_swizzle_get_component(swizzle, src_component++)); + } + return ret; +} + +static void vsir_remap_immconst(struct vkd3d_shader_src_param *src, unsigned int writemask) +{ + union vsir_immediate_constant prev = src->reg.u; + unsigned int src_component = 0; + + for (unsigned int dst_component = 0; dst_component < VKD3D_VEC4_SIZE; ++dst_component) + { + if (writemask & (1u << dst_component)) + src->reg.u.immconst_u32[dst_component] = prev.immconst_u32[src_component++]; + } +} + +static void vsir_remap_immconst64(struct vkd3d_shader_src_param *src, unsigned int writemask) +{ + if (writemask == (VKD3DSP_WRITEMASK_2 | VKD3DSP_WRITEMASK_3)) + src->reg.u.immconst_u64[1] = src->reg.u.immconst_u64[0]; +} + +static bool vsir_opcode_is_double(enum vkd3d_shader_opcode opcode) +{ + switch (opcode) + { + case VKD3DSIH_DADD: + case VKD3DSIH_DDIV: + case VKD3DSIH_DFMA: + case VKD3DSIH_DMAX: + case VKD3DSIH_DMIN: + case VKD3DSIH_DMOV: + case VKD3DSIH_DMOVC: + case VKD3DSIH_DMUL: + case VKD3DSIH_DRCP: + case VKD3DSIH_DEQO: + case VKD3DSIH_DGEO: + case VKD3DSIH_DLT: + case VKD3DSIH_DNE: + case VKD3DSIH_DTOF: + case VKD3DSIH_DTOI: + case VKD3DSIH_DTOU: + case VKD3DSIH_FTOD: + return true; + + default: + return false; + } +} + +static void temp_allocator_set_dst(struct temp_allocator *allocator, + struct vkd3d_shader_dst_param *dst, const struct vkd3d_shader_instruction *ins) +{ + struct temp_allocator_reg *reg; + + for (unsigned int k = 0; k < dst->reg.idx_count; ++k) + { + if (dst->reg.idx[k].rel_addr) + temp_allocator_set_src(allocator, dst->reg.idx[k].rel_addr); + } + + if (dst->reg.type == VKD3DSPR_SSA) + reg = &allocator->ssa_regs[dst->reg.idx[0].offset]; + else + return; + + dst->reg.type = VKD3DSPR_TEMP; + dst->reg.idx[0].offset = reg->temp_id; + if (reg->allocated_mask != dst->write_mask) + { + dst->write_mask = reg->allocated_mask; + + if (vsir_opcode_is_double(ins->opcode)) + { + vkd3d_shader_error(allocator->message_context, &ins->location, + VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, "Doubles are not currently handled."); + allocator->result = VKD3D_ERROR_NOT_IMPLEMENTED; + } + + for (unsigned int i = 0; i < ins->src_count; ++i) + { + struct vkd3d_shader_src_param *src = &ins->src[i]; + + if (vsir_src_is_masked(ins->opcode, i)) + { + if (src->reg.type == VKD3DSPR_IMMCONST) + vsir_remap_immconst(src, dst->write_mask); + else if (src->reg.type == VKD3DSPR_IMMCONST64) + vsir_remap_immconst64(src, dst->write_mask); + else + src->swizzle = vsir_map_swizzle(src->swizzle, dst->write_mask); + } + } + } +} + +enum vkd3d_result vsir_allocate_temp_registers(struct vsir_program *program, + struct vkd3d_shader_message_context *message_context) +{ + struct temp_allocator allocator = {0}; + struct temp_allocator_reg *regs; + struct liveness_tracker tracker; + uint32_t temp_count = 0; + enum vkd3d_result ret; + + if (!program->ssa_count) + return VKD3D_OK; + + if ((ret = track_liveness(program, &tracker))) + return ret; + + if (!(regs = vkd3d_calloc(program->ssa_count, sizeof(*regs)))) + { + liveness_tracker_cleanup(&tracker); + return VKD3D_ERROR_OUT_OF_MEMORY; + } + allocator.message_context = message_context; + allocator.ssa_regs = regs; + + for (unsigned int i = 0; i < program->ssa_count; ++i) + { + const struct liveness_tracker_reg *liveness_reg = &tracker.ssa_regs[i]; + struct temp_allocator_reg *reg = &allocator.ssa_regs[i]; + + temp_allocator_allocate(&allocator, &tracker, reg, liveness_reg, program->temp_count); + TRACE("Allocated r%u%s to sr%u (liveness %u-%u).\n", + reg->temp_id, debug_vsir_writemask(reg->allocated_mask), i, + liveness_reg->first_write, liveness_reg->last_access); + ++allocator.allocated_ssa_count; + } + + for (unsigned int i = 0; i < program->instructions.count; ++i) + { + const struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; + + /* Make sure we do the srcs first; setting the dst writemask may need + * to remap their swizzles. */ + for (unsigned int j = 0; j < ins->src_count; ++j) + temp_allocator_set_src(&allocator, &ins->src[j]); + for (unsigned int j = 0; j < ins->dst_count; ++j) + temp_allocator_set_dst(&allocator, &ins->dst[j], ins); + } + + /* Rewrite dcl_temps to reflect the new temp count. + * Note that dcl_temps appears once per phase, and should reflect only the + * number of temps needed by that phase. + * Therefore we iterate backwards through the shader, finding the maximum + * register used by any instruction, update the dcl_temps at the beginning + * of each phase, and then reset the temp count back to 0 for the next + * phase (if any). */ + for (int i = program->instructions.count - 1; i >= 0; --i) + { + struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; + + if (ins->opcode == VKD3DSIH_DCL_TEMPS) + { + ins->declaration.count = temp_count; + temp_count = 0; + continue; + } + if (temp_count && program->shader_version.major >= 4 + && (ins->opcode == VKD3DSIH_HS_CONTROL_POINT_PHASE + || ins->opcode == VKD3DSIH_HS_FORK_PHASE + || ins->opcode == VKD3DSIH_HS_JOIN_PHASE)) + { + /* The phase didn't have a dcl_temps instruction, but we added + * temps here, so we need to insert one. */ + if (!shader_instruction_array_insert_at(&program->instructions, i + 1, 1)) + { + vkd3d_free(regs); + liveness_tracker_cleanup(&tracker); + return VKD3D_ERROR_OUT_OF_MEMORY; + } + + ins = &program->instructions.elements[i + 1]; + vsir_instruction_init(ins, &program->instructions.elements[i].location, VKD3DSIH_DCL_TEMPS); + ins->declaration.count = temp_count; + temp_count = 0; + continue; + } + + /* No need to check sources. If we've produced an unwritten source then + * that's a bug somewhere in this pass. */ + for (unsigned int j = 0; j < ins->dst_count; ++j) + { + if (ins->dst[j].reg.type == VKD3DSPR_TEMP) + { + temp_count = max(temp_count, ins->dst[j].reg.idx[0].offset + 1); + program->temp_count = max(program->temp_count, temp_count); + } + } + } + + if (temp_count && program->shader_version.major >= 4) + { + struct vkd3d_shader_instruction *ins; + + if (!shader_instruction_array_insert_at(&program->instructions, 0, 1)) + { + vkd3d_free(regs); + liveness_tracker_cleanup(&tracker); + return VKD3D_ERROR_OUT_OF_MEMORY; + } + + ins = &program->instructions.elements[0]; + vsir_instruction_init(ins, &program->instructions.elements[1].location, VKD3DSIH_DCL_TEMPS); + ins->declaration.count = temp_count; + } + + program->ssa_count = 0; + + vkd3d_free(regs); + liveness_tracker_cleanup(&tracker); + return allocator.result; +} + struct validation_context { struct vkd3d_shader_message_context *message_context; @@ -8280,10 +9213,6 @@ static void vsir_validate_descriptor_indices(struct validation_context *ctx, static void vsir_validate_constbuffer_register(struct validation_context *ctx, const struct vkd3d_shader_register *reg) { - if (reg->precision != VKD3D_SHADER_REGISTER_PRECISION_DEFAULT) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_PRECISION, - "Invalid precision %#x for a CONSTBUFFER register.", reg->precision); - if (reg->dimension != VSIR_DIMENSION_VEC4) validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION, "Invalid dimension %#x for a CONSTBUFFER register.", reg->dimension); @@ -8795,6 +9724,11 @@ static void vsir_validate_src_param(struct validation_context *ctx, [VKD3DSPSM_BIASNEG] = {F32_BIT}, [VKD3DSPSM_SIGN] = {F32_BIT}, [VKD3DSPSM_SIGNNEG] = {F32_BIT}, + [VKD3DSPSM_COMP] = {F32_BIT}, + [VKD3DSPSM_X2] = {F32_BIT}, + [VKD3DSPSM_X2NEG] = {F32_BIT}, + [VKD3DSPSM_DZ] = {F32_BIT}, + [VKD3DSPSM_DW] = {F32_BIT}, }; vsir_validate_register(ctx, &src->reg); @@ -9455,6 +10389,17 @@ static void vsir_validate_elementwise_operation(struct validation_context *ctx, } } +static void vsir_validate_double_elementwise_operation(struct validation_context *ctx, + const struct vkd3d_shader_instruction *instruction) +{ + static const bool types[VKD3D_DATA_COUNT] = + { + [VKD3D_DATA_DOUBLE] = true, + }; + + vsir_validate_elementwise_operation(ctx, instruction, types); +} + static void vsir_validate_float_elementwise_operation(struct validation_context *ctx, const struct vkd3d_shader_instruction *instruction) { @@ -9479,6 +10424,52 @@ static void vsir_validate_logic_elementwise_operation(struct validation_context vsir_validate_elementwise_operation(ctx, instruction, types); } +static void vsir_validate_comparison_operation(struct validation_context *ctx, + const struct vkd3d_shader_instruction *instruction, const bool types[VKD3D_DATA_COUNT]) +{ + enum vkd3d_data_type dst_data_type, src_data_type; + unsigned int i; + + if (instruction->dst_count < 1) + return; + + dst_data_type = instruction->dst[0].reg.data_type; + + if (dst_data_type != VKD3D_DATA_UINT && dst_data_type != VKD3D_DATA_BOOL) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, + "Invalid data type %#x for result of comparison operation \"%s\" (%#x).", + dst_data_type, vsir_opcode_get_name(instruction->opcode, ""), instruction->opcode); + + if (instruction->src_count < 1) + return; + + src_data_type = instruction->src[0].reg.data_type; + + if (src_data_type >= VKD3D_DATA_COUNT) + return; + + for (i = 1; i < instruction->src_count; ++i) + { + if (instruction->src[i].reg.data_type != src_data_type) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, + "Data type %#x for operand %u doesn't match the first operands data type %#x " + "for comparison operation \"%s\" (%#x).", + instruction->src[i].reg.data_type, i, src_data_type, + vsir_opcode_get_name(instruction->opcode, ""), instruction->opcode); + } +} + +static void vsir_validate_double_comparison_operation(struct validation_context *ctx, + const struct vkd3d_shader_instruction *instruction) +{ + static const bool types[VKD3D_DATA_COUNT] = + { + [VKD3D_DATA_DOUBLE] = true, + }; + + vsir_validate_comparison_operation(ctx, instruction, types); +} + static void vsir_validate_branch(struct validation_context *ctx, const struct vkd3d_shader_instruction *instruction) { size_t i; @@ -10171,6 +11162,16 @@ static const struct vsir_validator_instruction_desc vsir_validator_instructions[ [VKD3DSIH_ASIN] = {1, 1, vsir_validate_float_elementwise_operation}, [VKD3DSIH_ATAN] = {1, 1, vsir_validate_float_elementwise_operation}, [VKD3DSIH_BRANCH] = {0, ~0u, vsir_validate_branch}, + [VKD3DSIH_DADD] = {1, 2, vsir_validate_double_elementwise_operation}, + [VKD3DSIH_DDIV] = {1, 2, vsir_validate_double_elementwise_operation}, + [VKD3DSIH_DFMA] = {1, 3, vsir_validate_double_elementwise_operation}, + [VKD3DSIH_DGEO] = {1, 2, vsir_validate_double_comparison_operation}, + [VKD3DSIH_DIV] = {1, 2, vsir_validate_float_elementwise_operation}, + [VKD3DSIH_DLT] = {1, 2, vsir_validate_double_comparison_operation}, + [VKD3DSIH_DMAX] = {1, 2, vsir_validate_double_elementwise_operation}, + [VKD3DSIH_DMIN] = {1, 2, vsir_validate_double_elementwise_operation}, + [VKD3DSIH_DMOV] = {1, 1, vsir_validate_double_elementwise_operation}, + [VKD3DSIH_DMUL] = {1, 2, vsir_validate_double_elementwise_operation}, [VKD3DSIH_HS_CONTROL_POINT_PHASE] = {0, 0, vsir_validate_hull_shader_phase}, [VKD3DSIH_HS_DECLS] = {0, 0, vsir_validate_hull_shader_phase}, [VKD3DSIH_HS_FORK_PHASE] = {0, 0, vsir_validate_hull_shader_phase}, diff --git a/libs/vkd3d/libs/vkd3d-shader/msl.c b/libs/vkd3d/libs/vkd3d-shader/msl.c index 08519787b0a..ac9f7412d56 100644 --- a/libs/vkd3d/libs/vkd3d-shader/msl.c +++ b/libs/vkd3d/libs/vkd3d-shader/msl.c @@ -55,8 +55,14 @@ struct msl_generator struct msl_resource_type_info { - size_t read_coord_size; + /* The number of coordinates needed to address/sample the resource type. */ + size_t coord_size; + /* Whether the resource type is an array type. */ bool array; + /* Whether the resource type has a shadow/comparison variant. */ + bool comparison; + /* The type suffix for the resource type. I.e., the "2d_ms" part of + * "texture2d_ms_array" or "depth2d_ms_array". */ const char *type_suffix; }; @@ -78,17 +84,17 @@ static const struct msl_resource_type_info *msl_get_resource_type_info(enum vkd3 { static const struct msl_resource_type_info info[] = { - [VKD3D_SHADER_RESOURCE_NONE] = {0, 0, "none"}, - [VKD3D_SHADER_RESOURCE_BUFFER] = {1, 0, "_buffer"}, - [VKD3D_SHADER_RESOURCE_TEXTURE_1D] = {1, 0, "1d"}, - [VKD3D_SHADER_RESOURCE_TEXTURE_2D] = {2, 0, "2d"}, - [VKD3D_SHADER_RESOURCE_TEXTURE_2DMS] = {2, 0, "2d_ms"}, - [VKD3D_SHADER_RESOURCE_TEXTURE_3D] = {3, 0, "3d"}, - [VKD3D_SHADER_RESOURCE_TEXTURE_CUBE] = {2, 0, "cube"}, - [VKD3D_SHADER_RESOURCE_TEXTURE_1DARRAY] = {1, 1, "1d_array"}, - [VKD3D_SHADER_RESOURCE_TEXTURE_2DARRAY] = {2, 1, "2d_array"}, - [VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY] = {2, 1, "2d_ms_array"}, - [VKD3D_SHADER_RESOURCE_TEXTURE_CUBEARRAY] = {2, 1, "cube_array"}, + [VKD3D_SHADER_RESOURCE_NONE] = {0, 0, 0, "none"}, + [VKD3D_SHADER_RESOURCE_BUFFER] = {1, 0, 0, "_buffer"}, + [VKD3D_SHADER_RESOURCE_TEXTURE_1D] = {1, 0, 0, "1d"}, + [VKD3D_SHADER_RESOURCE_TEXTURE_2D] = {2, 0, 1, "2d"}, + [VKD3D_SHADER_RESOURCE_TEXTURE_2DMS] = {2, 0, 1, "2d_ms"}, + [VKD3D_SHADER_RESOURCE_TEXTURE_3D] = {3, 0, 0, "3d"}, + [VKD3D_SHADER_RESOURCE_TEXTURE_CUBE] = {3, 0, 1, "cube"}, + [VKD3D_SHADER_RESOURCE_TEXTURE_1DARRAY] = {1, 1, 0, "1d"}, + [VKD3D_SHADER_RESOURCE_TEXTURE_2DARRAY] = {2, 1, 1, "2d"}, + [VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY] = {2, 1, 1, "2d_ms"}, + [VKD3D_SHADER_RESOURCE_TEXTURE_CUBEARRAY] = {3, 1, 1, "cube"}, }; if (!t || t >= ARRAY_SIZE(info)) @@ -228,6 +234,35 @@ static const struct vkd3d_shader_descriptor_binding *msl_get_cbv_binding(const s return NULL; } +static const struct vkd3d_shader_descriptor_binding *msl_get_sampler_binding(const struct msl_generator *gen, + unsigned int register_space, unsigned int register_idx) +{ + const struct vkd3d_shader_interface_info *interface_info = gen->interface_info; + const struct vkd3d_shader_resource_binding *binding; + unsigned int i; + + if (!interface_info) + return NULL; + + for (i = 0; i < interface_info->binding_count; ++i) + { + binding = &interface_info->bindings[i]; + + if (binding->type != VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER) + continue; + if (binding->register_space != register_space) + continue; + if (binding->register_index != register_idx) + continue; + if (!msl_check_shader_visibility(gen, binding->shader_visibility)) + continue; + + return &binding->binding; + } + + return NULL; +} + static const struct vkd3d_shader_descriptor_binding *msl_get_srv_binding(const struct msl_generator *gen, unsigned int register_space, unsigned int register_idx, enum vkd3d_shader_resource_type resource_type) { @@ -267,11 +302,17 @@ static void msl_print_cbv_name(struct vkd3d_string_buffer *buffer, unsigned int vkd3d_string_buffer_printf(buffer, "descriptors[%u].buf()", binding); } +static void msl_print_sampler_name(struct vkd3d_string_buffer *buffer, unsigned int binding) +{ + vkd3d_string_buffer_printf(buffer, "descriptors[%u].as()", binding); +} + static void msl_print_srv_name(struct vkd3d_string_buffer *buffer, struct msl_generator *gen, unsigned int binding, - const struct msl_resource_type_info *resource_type_info, enum vkd3d_data_type resource_data_type) + const struct msl_resource_type_info *resource_type_info, enum vkd3d_data_type resource_data_type, bool compare) { - vkd3d_string_buffer_printf(buffer, "descriptors[%u].textype_suffix); + vkd3d_string_buffer_printf(buffer, "descriptors[%u].as<%s%s%s<", + binding, compare ? "depth" : "texture", resource_type_info->type_suffix, + resource_type_info->array ? "_array" : ""); msl_print_resource_datatype(gen, buffer, resource_data_type); vkd3d_string_buffer_printf(buffer, ">>()"); } @@ -877,7 +918,7 @@ static void msl_ld(struct msl_generator *gen, const struct vkd3d_shader_instruct "Internal compiler error: Unhandled resource type %#x.", resource_type); resource_type_info = msl_get_resource_type_info(VKD3D_SHADER_RESOURCE_TEXTURE_2D); } - coord_mask = vkd3d_write_mask_from_component_count(resource_type_info->read_coord_size); + coord_mask = vkd3d_write_mask_from_component_count(resource_type_info->coord_size); if ((binding = msl_get_srv_binding(gen, resource_space, resource_idx, resource_type))) { @@ -895,7 +936,7 @@ static void msl_ld(struct msl_generator *gen, const struct vkd3d_shader_instruct read = vkd3d_string_buffer_get(&gen->string_buffers); vkd3d_string_buffer_printf(read, "as_type("); - msl_print_srv_name(read, gen, srv_binding, resource_type_info, data_type); + msl_print_srv_name(read, gen, srv_binding, resource_type_info, data_type, false); vkd3d_string_buffer_printf(read, ".read("); msl_print_src_with_type(read, gen, &ins->src[0], coord_mask, VKD3D_DATA_UINT); if (resource_type_info->array) @@ -920,6 +961,181 @@ static void msl_ld(struct msl_generator *gen, const struct vkd3d_shader_instruct msl_dst_cleanup(&dst, &gen->string_buffers); } +static void msl_sample(struct msl_generator *gen, const struct vkd3d_shader_instruction *ins) +{ + const struct msl_resource_type_info *resource_type_info; + unsigned int resource_id, resource_idx, resource_space; + bool bias, compare, comparison_sampler, grad, lod_zero; + const struct vkd3d_shader_descriptor_binding *binding; + unsigned int sampler_id, sampler_idx, sampler_space; + const struct vkd3d_shader_descriptor_info1 *d; + enum vkd3d_shader_resource_type resource_type; + unsigned int srv_binding, sampler_binding; + struct vkd3d_string_buffer *sample; + enum vkd3d_data_type data_type; + uint32_t coord_mask; + struct msl_dst dst; + + bias = ins->opcode == VKD3DSIH_SAMPLE_B; + compare = ins->opcode == VKD3DSIH_SAMPLE_C || ins->opcode == VKD3DSIH_SAMPLE_C_LZ; + grad = ins->opcode == VKD3DSIH_SAMPLE_GRAD; + lod_zero = ins->opcode == VKD3DSIH_SAMPLE_C_LZ; + + if (vkd3d_shader_instruction_has_texel_offset(ins)) + msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, + "Internal compiler error: Unhandled texel sample offset."); + + if (ins->src[1].reg.idx[0].rel_addr || ins->src[1].reg.idx[1].rel_addr + || ins->src[2].reg.idx[0].rel_addr || ins->src[2].reg.idx[1].rel_addr) + msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_UNSUPPORTED, + "Descriptor indexing is not supported."); + + resource_id = ins->src[1].reg.idx[0].offset; + resource_idx = ins->src[1].reg.idx[1].offset; + if ((d = vkd3d_shader_find_descriptor(&gen->program->descriptors, + VKD3D_SHADER_DESCRIPTOR_TYPE_SRV, resource_id))) + { + resource_space = d->register_space; + resource_type = d->resource_type; + data_type = d->resource_data_type; + } + else + { + msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, + "Internal compiler error: Undeclared resource descriptor %u.", resource_id); + resource_space = 0; + resource_type = VKD3D_SHADER_RESOURCE_TEXTURE_2D; + data_type = VKD3D_DATA_FLOAT; + } + + if (resource_type == VKD3D_SHADER_RESOURCE_BUFFER + || resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_2DMS + || resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY) + msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_UNSUPPORTED, + "Sampling resource type %#x is not supported.", resource_type); + + if ((resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_1D || resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_1DARRAY) + && (bias || grad || lod_zero)) + msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_UNSUPPORTED, + "Resource type %#x does not support mipmapping.", resource_type); + + if (!(resource_type_info = msl_get_resource_type_info(resource_type))) + { + msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, + "Internal compiler error: Unhandled resource type %#x.", resource_type); + resource_type_info = msl_get_resource_type_info(VKD3D_SHADER_RESOURCE_TEXTURE_2D); + } + coord_mask = vkd3d_write_mask_from_component_count(resource_type_info->coord_size); + + if ((binding = msl_get_srv_binding(gen, resource_space, resource_idx, resource_type))) + { + srv_binding = binding->binding; + } + else + { + msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_BINDING_NOT_FOUND, + "No descriptor binding specified for SRV %u (index %u, space %u).", + resource_id, resource_idx, resource_space); + srv_binding = 0; + } + + sampler_id = ins->src[2].reg.idx[0].offset; + sampler_idx = ins->src[2].reg.idx[1].offset; + if ((d = vkd3d_shader_find_descriptor(&gen->program->descriptors, + VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER, sampler_id))) + { + sampler_space = d->register_space; + comparison_sampler = d->flags & VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_SAMPLER_COMPARISON_MODE; + + if (compare) + { + if (!comparison_sampler) + msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, + "Internal compiler error: Sampler %u is not a comparison sampler.", sampler_id); + } + else + { + if (comparison_sampler) + msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, + "Internal compiler error: Sampler %u is a comparison sampler.", sampler_id); + } + } + else + { + msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, + "Internal compiler error: Undeclared sampler descriptor %u.", sampler_id); + sampler_space = 0; + } + + if ((binding = msl_get_sampler_binding(gen, sampler_space, sampler_idx))) + { + sampler_binding = binding->binding; + } + else + { + msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_BINDING_NOT_FOUND, + "No descriptor binding specified for sampler %u (index %u, space %u).", + sampler_id, sampler_idx, sampler_space); + sampler_binding = 0; + } + + msl_dst_init(&dst, gen, ins, &ins->dst[0]); + sample = vkd3d_string_buffer_get(&gen->string_buffers); + + if (ins->dst[0].reg.data_type == VKD3D_DATA_UINT) + vkd3d_string_buffer_printf(sample, "as_type("); + msl_print_srv_name(sample, gen, srv_binding, resource_type_info, data_type, compare); + if (compare) + vkd3d_string_buffer_printf(sample, ".sample_compare("); + else + vkd3d_string_buffer_printf(sample, ".sample("); + msl_print_sampler_name(sample, sampler_binding); + vkd3d_string_buffer_printf(sample, ", "); + msl_print_src_with_type(sample, gen, &ins->src[0], coord_mask, ins->src[0].reg.data_type); + if (resource_type_info->array) + { + vkd3d_string_buffer_printf(sample, ", uint("); + msl_print_src_with_type(sample, gen, &ins->src[0], coord_mask + 1, ins->src[0].reg.data_type); + vkd3d_string_buffer_printf(sample, ")"); + } + if (compare) + { + if (!resource_type_info->comparison) + msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_UNSUPPORTED, + "Comparison samplers are not supported with resource type %#x.", resource_type); + vkd3d_string_buffer_printf(sample, ", "); + msl_print_src_with_type(sample, gen, &ins->src[3], VKD3DSP_WRITEMASK_0, ins->src[3].reg.data_type); + } + if (grad) + { + vkd3d_string_buffer_printf(sample, ", gradient%s(", resource_type_info->type_suffix); + msl_print_src_with_type(sample, gen, &ins->src[3], coord_mask, ins->src[3].reg.data_type); + vkd3d_string_buffer_printf(sample, ", "); + msl_print_src_with_type(sample, gen, &ins->src[4], coord_mask, ins->src[4].reg.data_type); + vkd3d_string_buffer_printf(sample, ")"); + } + if (lod_zero) + { + vkd3d_string_buffer_printf(sample, ", level(0.0f)"); + } + if (bias) + { + vkd3d_string_buffer_printf(sample, ", bias("); + msl_print_src_with_type(sample, gen, &ins->src[3], VKD3DSP_WRITEMASK_0, ins->src[3].reg.data_type); + vkd3d_string_buffer_printf(sample, ")"); + } + vkd3d_string_buffer_printf(sample, ")"); + if (ins->dst[0].reg.data_type == VKD3D_DATA_UINT) + vkd3d_string_buffer_printf(sample, ")"); + if (!compare) + msl_print_swizzle(sample, ins->src[1].swizzle, ins->dst[0].write_mask); + + msl_print_assignment(gen, &dst, "%s", sample->buffer); + + vkd3d_string_buffer_release(&gen->string_buffers, sample); + msl_dst_cleanup(&dst, &gen->string_buffers); +} + static void msl_unary_op(struct msl_generator *gen, const struct vkd3d_shader_instruction *ins, const char *op) { struct msl_src src; @@ -1086,6 +1302,13 @@ static void msl_handle_instruction(struct msl_generator *gen, const struct vkd3d case VKD3DSIH_FTOU: msl_cast(gen, ins, "uint"); break; + case VKD3DSIH_SAMPLE: + case VKD3DSIH_SAMPLE_B: + case VKD3DSIH_SAMPLE_C: + case VKD3DSIH_SAMPLE_C_LZ: + case VKD3DSIH_SAMPLE_GRAD: + msl_sample(gen, ins); + break; case VKD3DSIH_GEO: case VKD3DSIH_IGE: msl_relop(gen, ins, ">="); @@ -1631,7 +1854,7 @@ static int msl_generator_generate(struct msl_generator *gen, struct vkd3d_shader " const device void *ptr;\n" "\n" " template\n" - " constant T &tex() constant\n" + " constant T &as() constant\n" " {\n" " return reinterpret_cast(this->ptr);\n" " }\n" diff --git a/libs/vkd3d/libs/vkd3d-shader/spirv.c b/libs/vkd3d/libs/vkd3d-shader/spirv.c index 0413cd7c344..2bf6f5d9363 100644 --- a/libs/vkd3d/libs/vkd3d-shader/spirv.c +++ b/libs/vkd3d/libs/vkd3d-shader/spirv.c @@ -3522,8 +3522,12 @@ static struct vkd3d_shader_descriptor_binding spirv_compiler_get_descriptor_bind goto done; } - resource_type_flag = resource_type == VKD3D_SHADER_RESOURCE_BUFFER - ? VKD3D_SHADER_BINDING_FLAG_BUFFER : VKD3D_SHADER_BINDING_FLAG_IMAGE; + if (resource_type == VKD3D_SHADER_RESOURCE_NONE) + resource_type_flag = 0; + else if (resource_type == VKD3D_SHADER_RESOURCE_BUFFER) + resource_type_flag = VKD3D_SHADER_BINDING_FLAG_BUFFER; + else + resource_type_flag = VKD3D_SHADER_BINDING_FLAG_IMAGE; if (is_uav_counter) { @@ -3567,7 +3571,7 @@ static struct vkd3d_shader_descriptor_binding spirv_compiler_get_descriptor_bind { const struct vkd3d_shader_resource_binding *current = &shader_interface->bindings[i]; - if (!(current->flags & resource_type_flag)) + if ((current->flags & resource_type_flag) != resource_type_flag) continue; if (!spirv_compiler_check_shader_visibility(compiler, current->shader_visibility)) diff --git a/libs/vkd3d/libs/vkd3d-shader/tpf.c b/libs/vkd3d/libs/vkd3d-shader/tpf.c index ae3fa1650bf..01af2f6ebbd 100644 --- a/libs/vkd3d/libs/vkd3d-shader/tpf.c +++ b/libs/vkd3d/libs/vkd3d-shader/tpf.c @@ -4463,6 +4463,9 @@ int tpf_compile(struct vsir_program *program, uint64_t config_flags, const struc size_t i; int ret; + if ((ret = vsir_allocate_temp_registers(program, message_context))) + return ret; + tpf.program = program; tpf.buffer = NULL; tpf.stat = &stat; diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h index cefd9f753a1..07e4b913e6f 100644 --- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h +++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h @@ -1503,6 +1503,8 @@ struct vsir_program size_t block_name_count; }; +enum vkd3d_result vsir_allocate_temp_registers(struct vsir_program *program, + struct vkd3d_shader_message_context *message_context); void vsir_program_cleanup(struct vsir_program *program); int vsir_program_compile(struct vsir_program *program, uint64_t config_flags, const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *out, @@ -1850,6 +1852,8 @@ static inline uint32_t vsir_write_mask_32_from_64(uint32_t write_mask64) } } +const char *debug_vsir_writemask(unsigned int writemask); + static inline uint32_t vsir_swizzle_64_from_32(uint32_t swizzle32) { switch (swizzle32) @@ -1899,6 +1903,12 @@ static inline unsigned int vsir_swizzle_get_component(uint32_t swizzle, unsigned return (swizzle >> VKD3D_SHADER_SWIZZLE_SHIFT(idx)) & VKD3D_SHADER_SWIZZLE_MASK; } +static inline void vsir_swizzle_set_component(uint32_t *swizzle, unsigned int idx, unsigned int component) +{ + *swizzle &= ~(VKD3D_SHADER_SWIZZLE_MASK << VKD3D_SHADER_SWIZZLE_SHIFT(idx)); + *swizzle |= component << VKD3D_SHADER_SWIZZLE_SHIFT(idx); +} + static inline unsigned int vkd3d_compact_swizzle(uint32_t swizzle, uint32_t write_mask) { unsigned int i, compacted_swizzle = 0; -- 2.47.2