From ca62b0d06263c5a02f00bc215ec02d39b3c8e63d Mon Sep 17 00:00:00 2001 From: Alistair Leslie-Hughes Date: Fri, 29 Nov 2024 07:14:57 +1100 Subject: [PATCH] Updated vkd3d to a68fd1b0ded735580b0ec9025f75fe02d62966df. --- libs/vkd3d/include/private/vkd3d_common.h | 2 +- libs/vkd3d/include/vkd3d_shader.h | 219 ++ libs/vkd3d/libs/vkd3d-common/blob.c | 1 + libs/vkd3d/libs/vkd3d-shader/d3d_asm.c | 34 +- libs/vkd3d/libs/vkd3d-shader/d3dbc.c | 908 ++---- libs/vkd3d/libs/vkd3d-shader/dxbc.c | 35 + libs/vkd3d/libs/vkd3d-shader/dxil.c | 36 +- libs/vkd3d/libs/vkd3d-shader/fx.c | 163 +- libs/vkd3d/libs/vkd3d-shader/glsl.c | 9 +- libs/vkd3d/libs/vkd3d-shader/hlsl.c | 289 +- libs/vkd3d/libs/vkd3d-shader/hlsl.h | 143 +- libs/vkd3d/libs/vkd3d-shader/hlsl.l | 3 + libs/vkd3d/libs/vkd3d-shader/hlsl.y | 404 ++- libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 2614 ++++++++++++++--- .../libs/vkd3d-shader/hlsl_constant_ops.c | 149 +- libs/vkd3d/libs/vkd3d-shader/ir.c | 1987 ++++++++++--- libs/vkd3d/libs/vkd3d-shader/msl.c | 465 ++- libs/vkd3d/libs/vkd3d-shader/preproc.l | 1 + libs/vkd3d/libs/vkd3d-shader/spirv.c | 709 +++-- libs/vkd3d/libs/vkd3d-shader/tpf.c | 1561 ++-------- .../libs/vkd3d-shader/vkd3d_shader_main.c | 28 +- .../libs/vkd3d-shader/vkd3d_shader_private.h | 45 +- libs/vkd3d/libs/vkd3d/command.c | 273 +- libs/vkd3d/libs/vkd3d/device.c | 58 +- libs/vkd3d/libs/vkd3d/state.c | 237 +- libs/vkd3d/libs/vkd3d/vkd3d_private.h | 63 +- 26 files changed, 6880 insertions(+), 3556 deletions(-) diff --git a/libs/vkd3d/include/private/vkd3d_common.h b/libs/vkd3d/include/private/vkd3d_common.h index ec1dd70c9b2..fd62730f948 100644 --- a/libs/vkd3d/include/private/vkd3d_common.h +++ b/libs/vkd3d/include/private/vkd3d_common.h @@ -275,7 +275,7 @@ static inline unsigned int vkd3d_popcount(unsigned int v) { #ifdef _MSC_VER return __popcnt(v); -#elif defined(__MINGW32__) +#elif defined(HAVE_BUILTIN_POPCOUNT) return __builtin_popcount(v); #else v -= (v >> 1) & 0x55555555; diff --git a/libs/vkd3d/include/vkd3d_shader.h b/libs/vkd3d/include/vkd3d_shader.h index f95caa2f825..af55d63a5c8 100644 --- a/libs/vkd3d/include/vkd3d_shader.h +++ b/libs/vkd3d/include/vkd3d_shader.h @@ -112,6 +112,11 @@ enum vkd3d_shader_structure_type * \since 1.13 */ VKD3D_SHADER_STRUCTURE_TYPE_PARAMETER_INFO, + /** + * The structure is a vkd3d_shader_scan_hull_shader_tessellation_info structure. + * \since 1.15 + */ + VKD3D_SHADER_STRUCTURE_TYPE_SCAN_HULL_SHADER_TESSELLATION_INFO, VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_STRUCTURE_TYPE), }; @@ -471,6 +476,109 @@ enum vkd3d_shader_binding_flag VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_BINDING_FLAG), }; +/** + * The factor used to interpolate the fragment output colour with fog. + * + * See VKD3D_SHADER_PARAMETER_NAME_FOG_FRAGMENT_MODE for specification of the + * interpolation factor as defined here. + * + * The following variables may be used to determine the interpolation factor: + * + * c = The fog coordinate value output from the vertex shader. This is an + * inter-stage varying with the semantic name "FOG" and semantic index 0. + * It may be modified by VKD3D_SHADER_PARAMETER_NAME_FOG_SOURCE. + * E = The value of VKD3D_SHADER_PARAMETER_NAME_FOG_END. + * k = The value of VKD3D_SHADER_PARAMETER_NAME_FOG_SCALE. + * + * \since 1.15 + */ +enum vkd3d_shader_fog_fragment_mode +{ + /** + * No fog interpolation is applied; + * the output colour is passed through unmodified. + * Equivalently, the fog interpolation factor is 1. + */ + VKD3D_SHADER_FOG_FRAGMENT_NONE = 0x0, + /** + * The fog interpolation factor is 2^-(k * c). + * + * In order to implement traditional exponential fog, as present in + * Direct3D and OpenGL, i.e. + * + * e^-(density * c) + * + * set + * + * k = density * log₂(e) + */ + VKD3D_SHADER_FOG_FRAGMENT_EXP = 0x1, + /** + * The fog interpolation factor is 2^-((k * c)²). + * + * In order to implement traditional square-exponential fog, as present in + * Direct3D and OpenGL, i.e. + * + * e^-((density * c)²) + * + * set + * + * k = density * √log₂(e) + */ + VKD3D_SHADER_FOG_FRAGMENT_EXP2 = 0x2, + /** + * The fog interpolation factor is (E - c) * k. + * + * In order to implement traditional linear fog, as present in Direct3D and + * OpenGL, i.e. + * + * (end - c) / (end - start) + * + * set + * + * E = end + * k = 1 / (end - start) + */ + VKD3D_SHADER_FOG_FRAGMENT_LINEAR = 0x3, +}; + +/** + * The source of the fog varying output by a pre-rasterization shader. + * The fog varying is defined as the output varying with the semantic name "FOG" + * and semantic index 0. + * + * See VKD3D_SHADER_PARAMETER_NAME_FOG_SOURCE for further documentation of this + * parameter. + * + * \since 1.15 + */ +enum vkd3d_shader_fog_source +{ + /** + * The source shader is not modified. That is, the fog varying in the target + * shader is the original fog varying if and only if present. + */ + VKD3D_SHADER_FOG_SOURCE_FOG = 0x0, + /** + * If the source shader has a fog varying, it is not modified. + * Otherwise, if the source shader outputs a varying with semantic name + * "COLOR" and semantic index 1 whose index includes a W component, + * said W component is output as fog varying. + * Otherwise, no fog varying is output. + */ + VKD3D_SHADER_FOG_SOURCE_FOG_OR_SPECULAR_W = 0x1, + /** + * The fog source is the Z component of the position output by the vertex + * shader. + */ + VKD3D_SHADER_FOG_SOURCE_Z = 0x2, + /** + * The fog source is the W component of the position output by the vertex + * shader. + */ + VKD3D_SHADER_FOG_SOURCE_W = 0x3, +}; + /** * The manner in which a parameter value is provided to the shader, used in * struct vkd3d_shader_parameter and struct vkd3d_shader_parameter1. @@ -734,6 +842,97 @@ enum vkd3d_shader_parameter_name * \since 1.14 */ VKD3D_SHADER_PARAMETER_NAME_POINT_SPRITE, + /** + * Fog mode used in fragment shaders. + * + * The value specified by this parameter must be a member of + * enum vkd3d_shader_fog_fragment_mode. + * + * If not VKD3D_SHADER_FOG_FRAGMENT_NONE, the pixel shader colour output at + * location 0 is linearly interpolated with the fog colour defined by + * VKD3D_SHADER_PARAMETER_NAME_FOG_COLOUR. The interpolation factor is + * defined according to the enumerant selected by this parameter. + * The interpolated value is then outputted instead of the original value at + * location 0. + * + * An interpolation factor of 0 specifies to use the fog colour; a factor of + * 1 specifies to use the original colour output. The interpolation factor + * is clamped to the [0, 1] range before interpolating. + * + * The default value is VKD3D_SHADER_FOG_FRAGMENT_NONE. + * + * The data type for this parameter must be + * VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32. + * + * Only VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT is supported in this + * version of vkd3d-shader. + * + * \since 1.15 + */ + VKD3D_SHADER_PARAMETER_NAME_FOG_FRAGMENT_MODE, + /** + * Fog colour. + * See VKD3D_SHADER_PARAMETER_NAME_FOG_FRAGMENT_MODE for documentation of + * fog. + * + * The data type for this parameter must be + * VKD3D_SHADER_PARAMETER_DATA_TYPE_FLOAT32_VEC4. + * + * The default value is transparent black, i.e. the vector {0, 0, 0, 0}. + * + * \since 1.15 + */ + VKD3D_SHADER_PARAMETER_NAME_FOG_COLOUR, + /** + * End coordinate for linear fog. + * See VKD3D_SHADER_PARAMETER_NAME_FOG_FRAGMENT_MODE for documentation of + * fog. + * + * The data type for this parameter must be + * VKD3D_SHADER_PARAMETER_DATA_TYPE_FLOAT32. + * + * The default value is 1.0. + * + * \since 1.15 + */ + VKD3D_SHADER_PARAMETER_NAME_FOG_END, + /** + * Scale value for fog. + * See VKD3D_SHADER_PARAMETER_NAME_FOG_FRAGMENT_MODE for documentation of + * fog. + * + * The data type for this parameter must be + * VKD3D_SHADER_PARAMETER_DATA_TYPE_FLOAT32. + * + * The default value is 1.0. + * + * \since 1.15 + */ + VKD3D_SHADER_PARAMETER_NAME_FOG_SCALE, + /** + * Fog source. The value specified by this parameter must be a member of + * enum vkd3d_shader_fog_source. + * + * This parameter replaces or suppletes the fog varying output by a + * pre-rasterization shader. The fog varying is defined as the output + * varying with the semantic name "FOG" and semantic index 0. + * + * Together with other fog parameters, this parameter can be used to + * implement fixed function fog, as present in Direct3D versions up to 9, + * if the target environment does not support fog as part of its own + * fixed-function API (as Vulkan and core OpenGL). + * + * The default value is VKD3D_SHADER_FOG_SOURCE_FOG. + * + * The data type for this parameter must be + * VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32. + * + * Only VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT is supported in this + * version of vkd3d-shader. + * + * \since 1.15 + */ + VKD3D_SHADER_PARAMETER_NAME_FOG_SOURCE, VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_PARAMETER_NAME), }; @@ -2040,6 +2239,26 @@ struct vkd3d_shader_scan_combined_resource_sampler_info unsigned int combined_sampler_count; }; +/** + * A chained structure describing the tessellation information in a hull shader. + * + * This structure extends vkd3d_shader_compile_info. + * + * \since 1.15 + */ +struct vkd3d_shader_scan_hull_shader_tessellation_info +{ + /** Must be set to VKD3D_SHADER_STRUCTURE_TYPE_SCAN_HULL_SHADER_TESSELLATION_INFO. */ + enum vkd3d_shader_structure_type type; + /** Optional pointer to a structure containing further parameters. */ + const void *next; + + /** The tessellation output primitive. */ + enum vkd3d_shader_tessellator_output_primitive output_primitive; + /** The tessellation partitioning mode. */ + enum vkd3d_shader_tessellator_partitioning partitioning; +}; + /** * Data type of a shader varying, returned as part of struct * vkd3d_shader_signature_element. diff --git a/libs/vkd3d/libs/vkd3d-common/blob.c b/libs/vkd3d/libs/vkd3d-common/blob.c index f60ef7db769..c2c6ad67804 100644 --- a/libs/vkd3d/libs/vkd3d-common/blob.c +++ b/libs/vkd3d/libs/vkd3d-common/blob.c @@ -20,6 +20,7 @@ #define WIDL_C_INLINE_WRAPPERS #endif #define COBJMACROS + #define CONST_VTABLE #include "vkd3d.h" #include "vkd3d_blob.h" diff --git a/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c index 7c5444f63a3..8c96befadea 100644 --- a/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c +++ b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c @@ -49,7 +49,7 @@ static const char * const shader_opcode_names[] = [VKD3DSIH_BFREV ] = "bfrev", [VKD3DSIH_BRANCH ] = "branch", [VKD3DSIH_BREAK ] = "break", - [VKD3DSIH_BREAKC ] = "breakc", + [VKD3DSIH_BREAKC ] = "break", [VKD3DSIH_BREAKP ] = "breakp", [VKD3DSIH_BUFINFO ] = "bufinfo", [VKD3DSIH_CALL ] = "call", @@ -183,7 +183,7 @@ static const char * const shader_opcode_names[] = [VKD3DSIH_IDIV ] = "idiv", [VKD3DSIH_IEQ ] = "ieq", [VKD3DSIH_IF ] = "if", - [VKD3DSIH_IFC ] = "ifc", + [VKD3DSIH_IFC ] = "if", [VKD3DSIH_IGE ] = "ige", [VKD3DSIH_ILT ] = "ilt", [VKD3DSIH_IMAD ] = "imad", @@ -815,7 +815,7 @@ static void shader_print_dcl_usage(struct vkd3d_d3d_asm_compiler *compiler, usage = "tessfactor"; break; case VKD3D_DECL_USAGE_POSITIONT: - usage = "positionT"; + usage = "positiont"; indexed = true; break; case VKD3D_DECL_USAGE_FOG: @@ -2547,6 +2547,33 @@ static void trace_signature(const struct shader_signature *signature, const char vkd3d_string_buffer_cleanup(&buffer); } +static void trace_io_declarations(const struct vsir_program *program) +{ + struct vkd3d_string_buffer buffer; + bool empty = true; + unsigned int i; + + vkd3d_string_buffer_init(&buffer); + + vkd3d_string_buffer_printf(&buffer, "Input/output declarations:"); + + for (i = 0; i < sizeof(program->io_dcls) * CHAR_BIT; ++i) + { + if (bitmap_is_set(program->io_dcls, i)) + { + empty = false; + vkd3d_string_buffer_printf(&buffer, " %u", i); + } + } + + if (empty) + vkd3d_string_buffer_printf(&buffer, " empty"); + + TRACE("%s\n", buffer.buffer); + + vkd3d_string_buffer_cleanup(&buffer); +} + void vsir_program_trace(const struct vsir_program *program) { const unsigned int flags = VSIR_ASM_FLAG_DUMP_TYPES | VSIR_ASM_FLAG_DUMP_ALL_INDICES; @@ -2556,6 +2583,7 @@ void vsir_program_trace(const struct vsir_program *program) trace_signature(&program->input_signature, "Input"); trace_signature(&program->output_signature, "Output"); trace_signature(&program->patch_constant_signature, "Patch-constant"); + trace_io_declarations(program); if (d3d_asm_compile(program, NULL, &code, flags) != VKD3D_OK) return; diff --git a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c index 9e2eacbcfa6..a931883e8d1 100644 --- a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c +++ b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c @@ -8,7 +8,7 @@ * Copyright 2006 Ivan Gyurdiev * Copyright 2007-2008 Stefan Dösinger for CodeWeavers * Copyright 2009, 2021 Henri Verbeet for CodeWeavers - * Copyright 2019-2020 Zebediah Figura for CodeWeavers + * Copyright 2019-2020, 2023-2024 Elizabeth Figura for CodeWeavers * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public @@ -25,7 +25,7 @@ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA */ -#include "hlsl.h" +#include "vkd3d_shader_private.h" #define VKD3D_SM1_VS 0xfffeu #define VKD3D_SM1_PS 0xffffu @@ -235,7 +235,7 @@ static const struct vkd3d_sm1_opcode_info vs_opcode_table[] = /* Arithmetic */ {VKD3D_SM1_OP_NOP, 0, 0, VKD3DSIH_NOP}, {VKD3D_SM1_OP_MOV, 1, 1, VKD3DSIH_MOV}, - {VKD3D_SM1_OP_MOVA, 1, 1, VKD3DSIH_MOVA, {2, 0}, {~0u, ~0u}}, + {VKD3D_SM1_OP_MOVA, 1, 1, VKD3DSIH_MOVA, {2, 0}}, {VKD3D_SM1_OP_ADD, 1, 2, VKD3DSIH_ADD}, {VKD3D_SM1_OP_SUB, 1, 2, VKD3DSIH_SUB}, {VKD3D_SM1_OP_MAD, 1, 3, VKD3DSIH_MAD}, @@ -248,22 +248,22 @@ static const struct vkd3d_sm1_opcode_info vs_opcode_table[] = {VKD3D_SM1_OP_MAX, 1, 2, VKD3DSIH_MAX}, {VKD3D_SM1_OP_SLT, 1, 2, VKD3DSIH_SLT}, {VKD3D_SM1_OP_SGE, 1, 2, VKD3DSIH_SGE}, - {VKD3D_SM1_OP_ABS, 1, 1, VKD3DSIH_ABS}, + {VKD3D_SM1_OP_ABS, 1, 1, VKD3DSIH_ABS, {2, 0}}, {VKD3D_SM1_OP_EXP, 1, 1, VKD3DSIH_EXP}, {VKD3D_SM1_OP_LOG, 1, 1, VKD3DSIH_LOG}, {VKD3D_SM1_OP_EXPP, 1, 1, VKD3DSIH_EXPP}, {VKD3D_SM1_OP_LOGP, 1, 1, VKD3DSIH_LOGP}, {VKD3D_SM1_OP_LIT, 1, 1, VKD3DSIH_LIT}, {VKD3D_SM1_OP_DST, 1, 2, VKD3DSIH_DST}, - {VKD3D_SM1_OP_LRP, 1, 3, VKD3DSIH_LRP}, + {VKD3D_SM1_OP_LRP, 1, 3, VKD3DSIH_LRP, {2, 0}}, {VKD3D_SM1_OP_FRC, 1, 1, VKD3DSIH_FRC}, - {VKD3D_SM1_OP_POW, 1, 2, VKD3DSIH_POW}, - {VKD3D_SM1_OP_CRS, 1, 2, VKD3DSIH_CRS}, - {VKD3D_SM1_OP_SGN, 1, 3, VKD3DSIH_SGN, {2, 0}, { 2, 1}}, - {VKD3D_SM1_OP_SGN, 1, 1, VKD3DSIH_SGN, {3, 0}, {~0u, ~0u}}, - {VKD3D_SM1_OP_NRM, 1, 1, VKD3DSIH_NRM,}, - {VKD3D_SM1_OP_SINCOS, 1, 3, VKD3DSIH_SINCOS, {2, 0}, { 2, 1}}, - {VKD3D_SM1_OP_SINCOS, 1, 1, VKD3DSIH_SINCOS, {3, 0}, {~0u, ~0u}}, + {VKD3D_SM1_OP_POW, 1, 2, VKD3DSIH_POW, {2, 0}}, + {VKD3D_SM1_OP_CRS, 1, 2, VKD3DSIH_CRS, {2, 0}}, + {VKD3D_SM1_OP_SGN, 1, 3, VKD3DSIH_SGN, {2, 0}, {2, 1}}, + {VKD3D_SM1_OP_SGN, 1, 1, VKD3DSIH_SGN, {3, 0}}, + {VKD3D_SM1_OP_NRM, 1, 1, VKD3DSIH_NRM, {2, 0}}, + {VKD3D_SM1_OP_SINCOS, 1, 3, VKD3DSIH_SINCOS, {2, 0}, {2, 1}}, + {VKD3D_SM1_OP_SINCOS, 1, 1, VKD3DSIH_SINCOS, {3, 0}}, /* Matrix */ {VKD3D_SM1_OP_M4x4, 1, 2, VKD3DSIH_M4x4}, {VKD3D_SM1_OP_M4x3, 1, 2, VKD3DSIH_M4x3}, @@ -274,27 +274,27 @@ static const struct vkd3d_sm1_opcode_info vs_opcode_table[] = {VKD3D_SM1_OP_DCL, 0, 0, VKD3DSIH_DCL}, /* Constant definitions */ {VKD3D_SM1_OP_DEF, 1, 1, VKD3DSIH_DEF}, - {VKD3D_SM1_OP_DEFB, 1, 1, VKD3DSIH_DEFB}, - {VKD3D_SM1_OP_DEFI, 1, 1, VKD3DSIH_DEFI}, + {VKD3D_SM1_OP_DEFB, 1, 1, VKD3DSIH_DEFB, {2, 0}}, + {VKD3D_SM1_OP_DEFI, 1, 1, VKD3DSIH_DEFI, {2, 0}}, /* Control flow */ - {VKD3D_SM1_OP_REP, 0, 1, VKD3DSIH_REP, {2, 0}, {~0u, ~0u}}, - {VKD3D_SM1_OP_ENDREP, 0, 0, VKD3DSIH_ENDREP, {2, 0}, {~0u, ~0u}}, - {VKD3D_SM1_OP_IF, 0, 1, VKD3DSIH_IF, {2, 0}, {~0u, ~0u}}, - {VKD3D_SM1_OP_IFC, 0, 2, VKD3DSIH_IFC, {2, 1}, {~0u, ~0u}}, - {VKD3D_SM1_OP_ELSE, 0, 0, VKD3DSIH_ELSE, {2, 0}, {~0u, ~0u}}, - {VKD3D_SM1_OP_ENDIF, 0, 0, VKD3DSIH_ENDIF, {2, 0}, {~0u, ~0u}}, - {VKD3D_SM1_OP_BREAK, 0, 0, VKD3DSIH_BREAK, {2, 1}, {~0u, ~0u}}, - {VKD3D_SM1_OP_BREAKC, 0, 2, VKD3DSIH_BREAKC, {2, 1}, {~0u, ~0u}}, - {VKD3D_SM1_OP_BREAKP, 0, 1, VKD3DSIH_BREAKP}, - {VKD3D_SM1_OP_CALL, 0, 1, VKD3DSIH_CALL, {2, 0}, {~0u, ~0u}}, - {VKD3D_SM1_OP_CALLNZ, 0, 2, VKD3DSIH_CALLNZ, {2, 0}, {~0u, ~0u}}, - {VKD3D_SM1_OP_LOOP, 0, 2, VKD3DSIH_LOOP, {2, 0}, {~0u, ~0u}}, - {VKD3D_SM1_OP_RET, 0, 0, VKD3DSIH_RET, {2, 0}, {~0u, ~0u}}, - {VKD3D_SM1_OP_ENDLOOP, 0, 0, VKD3DSIH_ENDLOOP, {2, 0}, {~0u, ~0u}}, - {VKD3D_SM1_OP_LABEL, 0, 1, VKD3DSIH_LABEL, {2, 0}, {~0u, ~0u}}, - - {VKD3D_SM1_OP_SETP, 1, 2, VKD3DSIH_SETP}, - {VKD3D_SM1_OP_TEXLDL, 1, 2, VKD3DSIH_TEXLDL, {3, 0}, {~0u, ~0u}}, + {VKD3D_SM1_OP_REP, 0, 1, VKD3DSIH_REP, {2, 0}}, + {VKD3D_SM1_OP_ENDREP, 0, 0, VKD3DSIH_ENDREP, {2, 0}}, + {VKD3D_SM1_OP_IF, 0, 1, VKD3DSIH_IF, {2, 0}}, + {VKD3D_SM1_OP_IFC, 0, 2, VKD3DSIH_IFC, {2, 1}}, + {VKD3D_SM1_OP_ELSE, 0, 0, VKD3DSIH_ELSE, {2, 0}}, + {VKD3D_SM1_OP_ENDIF, 0, 0, VKD3DSIH_ENDIF, {2, 0}}, + {VKD3D_SM1_OP_BREAK, 0, 0, VKD3DSIH_BREAK, {2, 1}}, + {VKD3D_SM1_OP_BREAKC, 0, 2, VKD3DSIH_BREAKC, {2, 1}}, + {VKD3D_SM1_OP_BREAKP, 0, 1, VKD3DSIH_BREAKP, {2, 1}}, + {VKD3D_SM1_OP_CALL, 0, 1, VKD3DSIH_CALL, {2, 0}}, + {VKD3D_SM1_OP_CALLNZ, 0, 2, VKD3DSIH_CALLNZ, {2, 0}}, + {VKD3D_SM1_OP_LOOP, 0, 2, VKD3DSIH_LOOP, {2, 0}}, + {VKD3D_SM1_OP_RET, 0, 0, VKD3DSIH_RET, {2, 0}}, + {VKD3D_SM1_OP_ENDLOOP, 0, 0, VKD3DSIH_ENDLOOP, {2, 0}}, + {VKD3D_SM1_OP_LABEL, 0, 1, VKD3DSIH_LABEL, {2, 0}}, + + {VKD3D_SM1_OP_SETP, 1, 2, VKD3DSIH_SETP, {2, 1}}, + {VKD3D_SM1_OP_TEXLDL, 1, 2, VKD3DSIH_TEXLDL, {3, 0}}, {0, 0, 0, VKD3DSIH_INVALID}, }; @@ -307,89 +307,84 @@ static const struct vkd3d_sm1_opcode_info ps_opcode_table[] = {VKD3D_SM1_OP_SUB, 1, 2, VKD3DSIH_SUB}, {VKD3D_SM1_OP_MAD, 1, 3, VKD3DSIH_MAD}, {VKD3D_SM1_OP_MUL, 1, 2, VKD3DSIH_MUL}, - {VKD3D_SM1_OP_RCP, 1, 1, VKD3DSIH_RCP}, - {VKD3D_SM1_OP_RSQ, 1, 1, VKD3DSIH_RSQ}, + {VKD3D_SM1_OP_RCP, 1, 1, VKD3DSIH_RCP, {2, 0}}, + {VKD3D_SM1_OP_RSQ, 1, 1, VKD3DSIH_RSQ, {2, 0}}, {VKD3D_SM1_OP_DP3, 1, 2, VKD3DSIH_DP3}, - {VKD3D_SM1_OP_DP4, 1, 2, VKD3DSIH_DP4}, - {VKD3D_SM1_OP_MIN, 1, 2, VKD3DSIH_MIN}, - {VKD3D_SM1_OP_MAX, 1, 2, VKD3DSIH_MAX}, - {VKD3D_SM1_OP_SLT, 1, 2, VKD3DSIH_SLT}, - {VKD3D_SM1_OP_SGE, 1, 2, VKD3DSIH_SGE}, - {VKD3D_SM1_OP_ABS, 1, 1, VKD3DSIH_ABS}, - {VKD3D_SM1_OP_EXP, 1, 1, VKD3DSIH_EXP}, - {VKD3D_SM1_OP_LOG, 1, 1, VKD3DSIH_LOG}, - {VKD3D_SM1_OP_EXPP, 1, 1, VKD3DSIH_EXPP}, - {VKD3D_SM1_OP_LOGP, 1, 1, VKD3DSIH_LOGP}, - {VKD3D_SM1_OP_DST, 1, 2, VKD3DSIH_DST}, + {VKD3D_SM1_OP_DP4, 1, 2, VKD3DSIH_DP4, {1, 2}}, + {VKD3D_SM1_OP_MIN, 1, 2, VKD3DSIH_MIN, {2, 0}}, + {VKD3D_SM1_OP_MAX, 1, 2, VKD3DSIH_MAX, {2, 0}}, + {VKD3D_SM1_OP_ABS, 1, 1, VKD3DSIH_ABS, {2, 0}}, + {VKD3D_SM1_OP_EXP, 1, 1, VKD3DSIH_EXP, {2, 0}}, + {VKD3D_SM1_OP_LOG, 1, 1, VKD3DSIH_LOG, {2, 0}}, {VKD3D_SM1_OP_LRP, 1, 3, VKD3DSIH_LRP}, - {VKD3D_SM1_OP_FRC, 1, 1, VKD3DSIH_FRC}, - {VKD3D_SM1_OP_CND, 1, 3, VKD3DSIH_CND, {1, 0}, { 1, 4}}, - {VKD3D_SM1_OP_CMP, 1, 3, VKD3DSIH_CMP, {1, 2}, { 3, 0}}, - {VKD3D_SM1_OP_POW, 1, 2, VKD3DSIH_POW}, - {VKD3D_SM1_OP_CRS, 1, 2, VKD3DSIH_CRS}, - {VKD3D_SM1_OP_NRM, 1, 1, VKD3DSIH_NRM}, - {VKD3D_SM1_OP_SINCOS, 1, 3, VKD3DSIH_SINCOS, {2, 0}, { 2, 1}}, - {VKD3D_SM1_OP_SINCOS, 1, 1, VKD3DSIH_SINCOS, {3, 0}, {~0u, ~0u}}, - {VKD3D_SM1_OP_DP2ADD, 1, 3, VKD3DSIH_DP2ADD, {2, 0}, {~0u, ~0u}}, + {VKD3D_SM1_OP_FRC, 1, 1, VKD3DSIH_FRC, {2, 0}}, + {VKD3D_SM1_OP_CND, 1, 3, VKD3DSIH_CND, {1, 0}, {1, 4}}, + {VKD3D_SM1_OP_CMP, 1, 3, VKD3DSIH_CMP, {1, 2}}, + {VKD3D_SM1_OP_POW, 1, 2, VKD3DSIH_POW, {2, 0}}, + {VKD3D_SM1_OP_CRS, 1, 2, VKD3DSIH_CRS, {2, 0}}, + {VKD3D_SM1_OP_NRM, 1, 1, VKD3DSIH_NRM, {2, 0}}, + {VKD3D_SM1_OP_SINCOS, 1, 3, VKD3DSIH_SINCOS, {2, 0}, {2, 1}}, + {VKD3D_SM1_OP_SINCOS, 1, 1, VKD3DSIH_SINCOS, {3, 0}}, + {VKD3D_SM1_OP_DP2ADD, 1, 3, VKD3DSIH_DP2ADD, {2, 0}}, /* Matrix */ - {VKD3D_SM1_OP_M4x4, 1, 2, VKD3DSIH_M4x4}, - {VKD3D_SM1_OP_M4x3, 1, 2, VKD3DSIH_M4x3}, - {VKD3D_SM1_OP_M3x4, 1, 2, VKD3DSIH_M3x4}, - {VKD3D_SM1_OP_M3x3, 1, 2, VKD3DSIH_M3x3}, - {VKD3D_SM1_OP_M3x2, 1, 2, VKD3DSIH_M3x2}, + {VKD3D_SM1_OP_M4x4, 1, 2, VKD3DSIH_M4x4, {2, 0}}, + {VKD3D_SM1_OP_M4x3, 1, 2, VKD3DSIH_M4x3, {2, 0}}, + {VKD3D_SM1_OP_M3x4, 1, 2, VKD3DSIH_M3x4, {2, 0}}, + {VKD3D_SM1_OP_M3x3, 1, 2, VKD3DSIH_M3x3, {2, 0}}, + {VKD3D_SM1_OP_M3x2, 1, 2, VKD3DSIH_M3x2, {2, 0}}, /* Declarations */ - {VKD3D_SM1_OP_DCL, 0, 0, VKD3DSIH_DCL}, + {VKD3D_SM1_OP_DCL, 0, 0, VKD3DSIH_DCL, {2, 0}}, /* Constant definitions */ {VKD3D_SM1_OP_DEF, 1, 1, VKD3DSIH_DEF}, - {VKD3D_SM1_OP_DEFB, 1, 1, VKD3DSIH_DEFB}, - {VKD3D_SM1_OP_DEFI, 1, 1, VKD3DSIH_DEFI}, + {VKD3D_SM1_OP_DEFB, 1, 1, VKD3DSIH_DEFB, {2, 0}}, + {VKD3D_SM1_OP_DEFI, 1, 1, VKD3DSIH_DEFI, {2, 1}}, /* Control flow */ - {VKD3D_SM1_OP_REP, 0, 1, VKD3DSIH_REP, {2, 1}, {~0u, ~0u}}, - {VKD3D_SM1_OP_ENDREP, 0, 0, VKD3DSIH_ENDREP, {2, 1}, {~0u, ~0u}}, - {VKD3D_SM1_OP_IF, 0, 1, VKD3DSIH_IF, {2, 1}, {~0u, ~0u}}, - {VKD3D_SM1_OP_IFC, 0, 2, VKD3DSIH_IFC, {2, 1}, {~0u, ~0u}}, - {VKD3D_SM1_OP_ELSE, 0, 0, VKD3DSIH_ELSE, {2, 1}, {~0u, ~0u}}, - {VKD3D_SM1_OP_ENDIF, 0, 0, VKD3DSIH_ENDIF, {2, 1}, {~0u, ~0u}}, - {VKD3D_SM1_OP_BREAK, 0, 0, VKD3DSIH_BREAK, {2, 1}, {~0u, ~0u}}, - {VKD3D_SM1_OP_BREAKC, 0, 2, VKD3DSIH_BREAKC, {2, 1}, {~0u, ~0u}}, - {VKD3D_SM1_OP_BREAKP, 0, 1, VKD3DSIH_BREAKP}, - {VKD3D_SM1_OP_CALL, 0, 1, VKD3DSIH_CALL, {2, 1}, {~0u, ~0u}}, - {VKD3D_SM1_OP_CALLNZ, 0, 2, VKD3DSIH_CALLNZ, {2, 1}, {~0u, ~0u}}, - {VKD3D_SM1_OP_LOOP, 0, 2, VKD3DSIH_LOOP, {3, 0}, {~0u, ~0u}}, - {VKD3D_SM1_OP_RET, 0, 0, VKD3DSIH_RET, {2, 1}, {~0u, ~0u}}, - {VKD3D_SM1_OP_ENDLOOP, 0, 0, VKD3DSIH_ENDLOOP, {3, 0}, {~0u, ~0u}}, - {VKD3D_SM1_OP_LABEL, 0, 1, VKD3DSIH_LABEL, {2, 1}, {~0u, ~0u}}, + {VKD3D_SM1_OP_REP, 0, 1, VKD3DSIH_REP, {2, 1}}, + {VKD3D_SM1_OP_ENDREP, 0, 0, VKD3DSIH_ENDREP, {2, 1}}, + {VKD3D_SM1_OP_IF, 0, 1, VKD3DSIH_IF, {2, 1}}, + {VKD3D_SM1_OP_IFC, 0, 2, VKD3DSIH_IFC, {2, 1}}, + {VKD3D_SM1_OP_ELSE, 0, 0, VKD3DSIH_ELSE, {2, 1}}, + {VKD3D_SM1_OP_ENDIF, 0, 0, VKD3DSIH_ENDIF, {2, 1}}, + {VKD3D_SM1_OP_BREAK, 0, 0, VKD3DSIH_BREAK, {2, 1}}, + {VKD3D_SM1_OP_BREAKC, 0, 2, VKD3DSIH_BREAKC, {2, 1}}, + {VKD3D_SM1_OP_BREAKP, 0, 1, VKD3DSIH_BREAKP, {2, 1}}, + {VKD3D_SM1_OP_CALL, 0, 1, VKD3DSIH_CALL, {2, 1}}, + {VKD3D_SM1_OP_CALLNZ, 0, 2, VKD3DSIH_CALLNZ, {2, 1}}, + {VKD3D_SM1_OP_LOOP, 0, 2, VKD3DSIH_LOOP, {3, 0}}, + {VKD3D_SM1_OP_RET, 0, 0, VKD3DSIH_RET, {2, 1}}, + {VKD3D_SM1_OP_ENDLOOP, 0, 0, VKD3DSIH_ENDLOOP, {3, 0}}, + {VKD3D_SM1_OP_LABEL, 0, 1, VKD3DSIH_LABEL, {2, 1}}, /* Texture */ - {VKD3D_SM1_OP_TEXCOORD, 1, 0, VKD3DSIH_TEXCOORD, {0, 0}, { 1, 3}}, - {VKD3D_SM1_OP_TEXCOORD, 1, 1, VKD3DSIH_TEXCOORD, {1 ,4}, { 1, 4}}, - {VKD3D_SM1_OP_TEXKILL, 1, 0, VKD3DSIH_TEXKILL, {1 ,0}, { 3, 0}}, - {VKD3D_SM1_OP_TEX, 1, 0, VKD3DSIH_TEX, {0, 0}, { 1, 3}}, - {VKD3D_SM1_OP_TEX, 1, 1, VKD3DSIH_TEX, {1, 4}, { 1, 4}}, - {VKD3D_SM1_OP_TEX, 1, 2, VKD3DSIH_TEX, {2, 0}, {~0u, ~0u}}, - {VKD3D_SM1_OP_TEXBEM, 1, 1, VKD3DSIH_TEXBEM, {0, 0}, { 1, 3}}, - {VKD3D_SM1_OP_TEXBEML, 1, 1, VKD3DSIH_TEXBEML, {1, 0}, { 1, 3}}, - {VKD3D_SM1_OP_TEXREG2AR, 1, 1, VKD3DSIH_TEXREG2AR, {1, 0}, { 1, 3}}, - {VKD3D_SM1_OP_TEXREG2GB, 1, 1, VKD3DSIH_TEXREG2GB, {1, 0}, { 1, 3}}, - {VKD3D_SM1_OP_TEXREG2RGB, 1, 1, VKD3DSIH_TEXREG2RGB, {1, 2}, { 1, 3}}, - {VKD3D_SM1_OP_TEXM3x2PAD, 1, 1, VKD3DSIH_TEXM3x2PAD, {1, 0}, { 1, 3}}, - {VKD3D_SM1_OP_TEXM3x2TEX, 1, 1, VKD3DSIH_TEXM3x2TEX, {1, 0}, { 1, 3}}, - {VKD3D_SM1_OP_TEXM3x3PAD, 1, 1, VKD3DSIH_TEXM3x3PAD, {1, 0}, { 1, 3}}, - {VKD3D_SM1_OP_TEXM3x3DIFF, 1, 1, VKD3DSIH_TEXM3x3DIFF, {0, 0}, { 0, 0}}, - {VKD3D_SM1_OP_TEXM3x3SPEC, 1, 2, VKD3DSIH_TEXM3x3SPEC, {1, 0}, { 1, 3}}, - {VKD3D_SM1_OP_TEXM3x3VSPEC, 1, 1, VKD3DSIH_TEXM3x3VSPEC, {1, 0}, { 1, 3}}, - {VKD3D_SM1_OP_TEXM3x3TEX, 1, 1, VKD3DSIH_TEXM3x3TEX, {1, 0}, { 1, 3}}, - {VKD3D_SM1_OP_TEXDP3TEX, 1, 1, VKD3DSIH_TEXDP3TEX, {1, 2}, { 1, 3}}, - {VKD3D_SM1_OP_TEXM3x2DEPTH, 1, 1, VKD3DSIH_TEXM3x2DEPTH, {1, 3}, { 1, 3}}, - {VKD3D_SM1_OP_TEXDP3, 1, 1, VKD3DSIH_TEXDP3, {1, 2}, { 1, 3}}, - {VKD3D_SM1_OP_TEXM3x3, 1, 1, VKD3DSIH_TEXM3x3, {1, 2}, { 1, 3}}, - {VKD3D_SM1_OP_TEXDEPTH, 1, 0, VKD3DSIH_TEXDEPTH, {1, 4}, { 1, 4}}, - {VKD3D_SM1_OP_BEM, 1, 2, VKD3DSIH_BEM, {1, 4}, { 1, 4}}, - {VKD3D_SM1_OP_DSX, 1, 1, VKD3DSIH_DSX, {2, 1}, {~0u, ~0u}}, - {VKD3D_SM1_OP_DSY, 1, 1, VKD3DSIH_DSY, {2, 1}, {~0u, ~0u}}, - {VKD3D_SM1_OP_TEXLDD, 1, 4, VKD3DSIH_TEXLDD, {2, 1}, {~0u, ~0u}}, - {VKD3D_SM1_OP_SETP, 1, 2, VKD3DSIH_SETP}, - {VKD3D_SM1_OP_TEXLDL, 1, 2, VKD3DSIH_TEXLDL, {3, 0}, {~0u, ~0u}}, - {VKD3D_SM1_OP_PHASE, 0, 0, VKD3DSIH_PHASE}, + {VKD3D_SM1_OP_TEXCOORD, 1, 0, VKD3DSIH_TEXCOORD, {0, 0}, {1, 3}}, + {VKD3D_SM1_OP_TEXCOORD, 1, 1, VKD3DSIH_TEXCOORD, {1, 4}, {1, 4}}, + {VKD3D_SM1_OP_TEXKILL, 1, 0, VKD3DSIH_TEXKILL, {1, 0}}, + {VKD3D_SM1_OP_TEX, 1, 0, VKD3DSIH_TEX, {0, 0}, {1, 3}}, + {VKD3D_SM1_OP_TEX, 1, 1, VKD3DSIH_TEX, {1, 4}, {1, 4}}, + {VKD3D_SM1_OP_TEX, 1, 2, VKD3DSIH_TEX, {2, 0}}, + {VKD3D_SM1_OP_TEXBEM, 1, 1, VKD3DSIH_TEXBEM, {0, 0}, {1, 3}}, + {VKD3D_SM1_OP_TEXBEML, 1, 1, VKD3DSIH_TEXBEML, {1, 0}, {1, 3}}, + {VKD3D_SM1_OP_TEXREG2AR, 1, 1, VKD3DSIH_TEXREG2AR, {1, 0}, {1, 3}}, + {VKD3D_SM1_OP_TEXREG2GB, 1, 1, VKD3DSIH_TEXREG2GB, {1, 0}, {1, 3}}, + {VKD3D_SM1_OP_TEXREG2RGB, 1, 1, VKD3DSIH_TEXREG2RGB, {1, 2}, {1, 3}}, + {VKD3D_SM1_OP_TEXM3x2PAD, 1, 1, VKD3DSIH_TEXM3x2PAD, {1, 0}, {1, 3}}, + {VKD3D_SM1_OP_TEXM3x2TEX, 1, 1, VKD3DSIH_TEXM3x2TEX, {1, 0}, {1, 3}}, + {VKD3D_SM1_OP_TEXM3x3PAD, 1, 1, VKD3DSIH_TEXM3x3PAD, {1, 0}, {1, 3}}, + {VKD3D_SM1_OP_TEXM3x3DIFF, 1, 1, VKD3DSIH_TEXM3x3DIFF, {0, 0}, {0, 0}}, + {VKD3D_SM1_OP_TEXM3x3SPEC, 1, 2, VKD3DSIH_TEXM3x3SPEC, {1, 0}, {1, 3}}, + {VKD3D_SM1_OP_TEXM3x3VSPEC, 1, 1, VKD3DSIH_TEXM3x3VSPEC, {1, 0}, {1, 3}}, + {VKD3D_SM1_OP_TEXM3x3TEX, 1, 1, VKD3DSIH_TEXM3x3TEX, {1, 0}, {1, 3}}, + {VKD3D_SM1_OP_TEXDP3TEX, 1, 1, VKD3DSIH_TEXDP3TEX, {1, 2}, {1, 3}}, + {VKD3D_SM1_OP_TEXM3x2DEPTH, 1, 1, VKD3DSIH_TEXM3x2DEPTH, {1, 3}, {1, 3}}, + {VKD3D_SM1_OP_TEXDP3, 1, 1, VKD3DSIH_TEXDP3, {1, 2}, {1, 3}}, + {VKD3D_SM1_OP_TEXM3x3, 1, 1, VKD3DSIH_TEXM3x3, {1, 2}, {1, 3}}, + {VKD3D_SM1_OP_TEXDEPTH, 1, 0, VKD3DSIH_TEXDEPTH, {1, 4}, {1, 4}}, + {VKD3D_SM1_OP_BEM, 1, 2, VKD3DSIH_BEM, {1, 4}, {1, 4}}, + {VKD3D_SM1_OP_DSX, 1, 1, VKD3DSIH_DSX, {2, 1}}, + {VKD3D_SM1_OP_DSY, 1, 1, VKD3DSIH_DSY, {2, 1}}, + {VKD3D_SM1_OP_TEXLDD, 1, 4, VKD3DSIH_TEXLDD, {2, 1}}, + {VKD3D_SM1_OP_SETP, 1, 2, VKD3DSIH_SETP, {2, 1}}, + {VKD3D_SM1_OP_TEXLDL, 1, 2, VKD3DSIH_TEXLDL, {3, 0}}, + {VKD3D_SM1_OP_PHASE, 0, 0, VKD3DSIH_PHASE, {1, 4}, {1, 4}}, {0, 0, 0, VKD3DSIH_INVALID}, }; @@ -638,7 +633,32 @@ static void add_signature_mask(struct vkd3d_shader_sm1_parser *sm1, bool output, return; } + /* Normally VSIR mandates that the register mask is a subset of the usage + * mask, and the usage mask is a subset of the signature mask. This is + * doesn't always happen with SM1-3 registers, because of the limited + * flexibility with expressing swizzles. + * + * For example it's easy to find shaders like this: + * ps_3_0 + * [...] + * dcl_texcoord0 v0 + * [...] + * texld r2.xyzw, v0.xyzw, s1.xyzw + * [...] + * + * The dcl_textcoord0 instruction secretly has a .xy mask, which is used to + * compute the signature mask, but the texld instruction apparently uses all + * the components. Of course the last two components are ignored, but + * formally they seem to be used. So we end up with a signature element with + * mask .xy and usage mask .xyzw. + * + * In order to avoid this problem, when generating VSIR code with SM4 + * normalisation level we remove the unused components in the write mask. We + * don't do that when targetting the SM1 normalisation level (i.e., when + * disassembling) so as to generate the same disassembly code as native. */ element->used_mask |= mask; + if (program->normalisation_level >= VSIR_NORMALISED_SM4) + element->used_mask &= element->mask; } static bool add_signature_element_from_register(struct vkd3d_shader_sm1_parser *sm1, @@ -968,6 +988,8 @@ static void shader_sm1_read_dst_param(struct vkd3d_shader_sm1_parser *sm1, const if (dst_param->reg.type == VKD3DSPR_RASTOUT && dst_param->reg.idx[0].offset == VSIR_RASTOUT_POINT_SIZE) sm1->p.program->has_point_size = true; + if (dst_param->reg.type == VKD3DSPR_RASTOUT && dst_param->reg.idx[0].offset == VSIR_RASTOUT_FOG) + sm1->p.program->has_fog = true; } static void shader_sm1_read_semantic(struct vkd3d_shader_sm1_parser *sm1, @@ -1268,6 +1290,7 @@ static enum vkd3d_result shader_sm1_init(struct vkd3d_shader_sm1_parser *sm1, st const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context) { const struct vkd3d_shader_location location = {.source_name = compile_info->source_name}; + enum vsir_normalisation_level normalisation_level; const uint32_t *code = compile_info->source.code; size_t code_size = compile_info->source.size; struct vkd3d_shader_version version; @@ -1318,9 +1341,13 @@ static enum vkd3d_result shader_sm1_init(struct vkd3d_shader_sm1_parser *sm1, st sm1->start = &code[1]; sm1->end = &code[token_count]; + normalisation_level = VSIR_NORMALISED_SM1; + if (compile_info->target_type != VKD3D_SHADER_TARGET_D3D_ASM) + normalisation_level = VSIR_NORMALISED_SM4; + /* Estimate instruction count to avoid reallocation in most shaders. */ if (!vsir_program_init(program, compile_info, &version, - code_size != ~(size_t)0 ? token_count / 4u + 4 : 16, VSIR_CF_STRUCTURED, VSIR_NOT_NORMALISED)) + code_size != ~(size_t)0 ? token_count / 4u + 4 : 16, VSIR_CF_STRUCTURED, normalisation_level)) return VKD3D_ERROR_OUT_OF_MEMORY; vkd3d_shader_parser_init(&sm1->p, program, message_context, compile_info->source_name); @@ -1525,387 +1552,73 @@ static uint32_t sm1_version(enum vkd3d_shader_type type, unsigned int major, uns type == VKD3D_SHADER_TYPE_VERTEX ? VKD3D_SM1_VS : VKD3D_SM1_PS); } -D3DXPARAMETER_CLASS hlsl_sm1_class(const struct hlsl_type *type) +static const struct vkd3d_sm1_opcode_info *shader_sm1_get_opcode_info_from_vsir( + struct d3dbc_compiler *d3dbc, enum vkd3d_shader_opcode vkd3d_opcode) { - switch (type->class) - { - case HLSL_CLASS_ARRAY: - return hlsl_sm1_class(type->e.array.type); - case HLSL_CLASS_MATRIX: - VKD3D_ASSERT(type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK); - if (type->modifiers & HLSL_MODIFIER_COLUMN_MAJOR) - return D3DXPC_MATRIX_COLUMNS; - else - return D3DXPC_MATRIX_ROWS; - case HLSL_CLASS_SCALAR: - return D3DXPC_SCALAR; - case HLSL_CLASS_STRUCT: - return D3DXPC_STRUCT; - case HLSL_CLASS_VECTOR: - return D3DXPC_VECTOR; - case HLSL_CLASS_PIXEL_SHADER: - case HLSL_CLASS_SAMPLER: - case HLSL_CLASS_STRING: - case HLSL_CLASS_TEXTURE: - case HLSL_CLASS_VERTEX_SHADER: - return D3DXPC_OBJECT; - case HLSL_CLASS_DEPTH_STENCIL_STATE: - case HLSL_CLASS_DEPTH_STENCIL_VIEW: - case HLSL_CLASS_EFFECT_GROUP: - case HLSL_CLASS_ERROR: - case HLSL_CLASS_PASS: - case HLSL_CLASS_RASTERIZER_STATE: - case HLSL_CLASS_RENDER_TARGET_VIEW: - case HLSL_CLASS_TECHNIQUE: - case HLSL_CLASS_UAV: - case HLSL_CLASS_VOID: - case HLSL_CLASS_CONSTANT_BUFFER: - case HLSL_CLASS_COMPUTE_SHADER: - case HLSL_CLASS_DOMAIN_SHADER: - case HLSL_CLASS_HULL_SHADER: - case HLSL_CLASS_GEOMETRY_SHADER: - case HLSL_CLASS_BLEND_STATE: - case HLSL_CLASS_NULL: - break; - } - - vkd3d_unreachable(); -} + const struct vkd3d_shader_version *version = &d3dbc->program->shader_version; + const struct vkd3d_sm1_opcode_info *info; + unsigned int i = 0; -D3DXPARAMETER_TYPE hlsl_sm1_base_type(const struct hlsl_type *type) -{ - switch (type->class) + for (;;) { - case HLSL_CLASS_SCALAR: - case HLSL_CLASS_VECTOR: - case HLSL_CLASS_MATRIX: - switch (type->e.numeric.type) - { - case HLSL_TYPE_BOOL: - return D3DXPT_BOOL; - /* Actually double behaves differently depending on DLL version: - * For <= 36, it maps to D3DXPT_FLOAT. - * For 37-40, it maps to zero (D3DXPT_VOID). - * For >= 41, it maps to 39, which is D3D_SVT_DOUBLE (note D3D_SVT_* - * values are mostly compatible with D3DXPT_*). - * However, the latter two cases look like bugs, and a reasonable - * application certainly wouldn't know what to do with them. - * For fx_2_0 it's always D3DXPT_FLOAT regardless of DLL version. */ - case HLSL_TYPE_DOUBLE: - case HLSL_TYPE_FLOAT: - case HLSL_TYPE_HALF: - return D3DXPT_FLOAT; - case HLSL_TYPE_INT: - case HLSL_TYPE_UINT: - return D3DXPT_INT; - default: - vkd3d_unreachable(); - } - - case HLSL_CLASS_SAMPLER: - switch (type->sampler_dim) - { - case HLSL_SAMPLER_DIM_1D: - return D3DXPT_SAMPLER1D; - case HLSL_SAMPLER_DIM_2D: - return D3DXPT_SAMPLER2D; - case HLSL_SAMPLER_DIM_3D: - return D3DXPT_SAMPLER3D; - case HLSL_SAMPLER_DIM_CUBE: - return D3DXPT_SAMPLERCUBE; - case HLSL_SAMPLER_DIM_GENERIC: - return D3DXPT_SAMPLER; - default: - ERR("Invalid dimension %#x.\n", type->sampler_dim); - vkd3d_unreachable(); - } - break; - - case HLSL_CLASS_TEXTURE: - switch (type->sampler_dim) - { - case HLSL_SAMPLER_DIM_1D: - return D3DXPT_TEXTURE1D; - case HLSL_SAMPLER_DIM_2D: - return D3DXPT_TEXTURE2D; - case HLSL_SAMPLER_DIM_3D: - return D3DXPT_TEXTURE3D; - case HLSL_SAMPLER_DIM_CUBE: - return D3DXPT_TEXTURECUBE; - case HLSL_SAMPLER_DIM_GENERIC: - return D3DXPT_TEXTURE; - default: - ERR("Invalid dimension %#x.\n", type->sampler_dim); - vkd3d_unreachable(); - } - break; + info = &d3dbc->opcode_table[i++]; + if (info->vkd3d_opcode == VKD3DSIH_INVALID) + return NULL; - case HLSL_CLASS_ARRAY: - return hlsl_sm1_base_type(type->e.array.type); - - case HLSL_CLASS_STRUCT: - return D3DXPT_VOID; - - case HLSL_CLASS_STRING: - return D3DXPT_STRING; - - case HLSL_CLASS_PIXEL_SHADER: - return D3DXPT_PIXELSHADER; - - case HLSL_CLASS_VERTEX_SHADER: - return D3DXPT_VERTEXSHADER; - - case HLSL_CLASS_DEPTH_STENCIL_STATE: - case HLSL_CLASS_DEPTH_STENCIL_VIEW: - case HLSL_CLASS_EFFECT_GROUP: - case HLSL_CLASS_ERROR: - case HLSL_CLASS_PASS: - case HLSL_CLASS_RASTERIZER_STATE: - case HLSL_CLASS_RENDER_TARGET_VIEW: - case HLSL_CLASS_TECHNIQUE: - case HLSL_CLASS_UAV: - case HLSL_CLASS_VOID: - case HLSL_CLASS_CONSTANT_BUFFER: - case HLSL_CLASS_COMPUTE_SHADER: - case HLSL_CLASS_DOMAIN_SHADER: - case HLSL_CLASS_HULL_SHADER: - case HLSL_CLASS_GEOMETRY_SHADER: - case HLSL_CLASS_BLEND_STATE: - case HLSL_CLASS_NULL: - break; + if (vkd3d_opcode == info->vkd3d_opcode + && vkd3d_shader_ver_ge(version, info->min_version.major, info->min_version.minor) + && (vkd3d_shader_ver_le(version, info->max_version.major, info->max_version.minor) + || !info->max_version.major)) + return info; } - - vkd3d_unreachable(); } -static void write_sm1_type(struct vkd3d_bytecode_buffer *buffer, struct hlsl_type *type, unsigned int ctab_start) +static const struct vkd3d_sm1_opcode_info *shader_sm1_get_opcode_info_from_vsir_instruction( + struct d3dbc_compiler *d3dbc, const struct vkd3d_shader_instruction *ins) { - const struct hlsl_type *array_type = hlsl_get_multiarray_element_type(type); - unsigned int array_size = hlsl_get_multiarray_size(type); - unsigned int field_count = 0; - size_t fields_offset = 0; - size_t i; - - if (type->bytecode_offset) - return; + const struct vkd3d_sm1_opcode_info *info; - if (array_type->class == HLSL_CLASS_STRUCT) + if (!(info = shader_sm1_get_opcode_info_from_vsir(d3dbc, ins->opcode))) { - field_count = array_type->e.record.field_count; - - for (i = 0; i < field_count; ++i) - { - struct hlsl_struct_field *field = &array_type->e.record.fields[i]; - - field->name_bytecode_offset = put_string(buffer, field->name); - write_sm1_type(buffer, field->type, ctab_start); - } - - fields_offset = bytecode_align(buffer) - ctab_start; - - for (i = 0; i < field_count; ++i) - { - struct hlsl_struct_field *field = &array_type->e.record.fields[i]; - - put_u32(buffer, field->name_bytecode_offset - ctab_start); - put_u32(buffer, field->type->bytecode_offset - ctab_start); - } + vkd3d_shader_error(d3dbc->message_context, &ins->location, VKD3D_SHADER_ERROR_D3DBC_INVALID_OPCODE, + "Opcode %#x not supported for shader profile.", ins->opcode); + d3dbc->failed = true; + return NULL; } - type->bytecode_offset = put_u32(buffer, vkd3d_make_u32(hlsl_sm1_class(type), hlsl_sm1_base_type(array_type))); - put_u32(buffer, vkd3d_make_u32(type->dimy, type->dimx)); - put_u32(buffer, vkd3d_make_u32(array_size, field_count)); - put_u32(buffer, fields_offset); -} - -static void sm1_sort_extern(struct list *sorted, struct hlsl_ir_var *to_sort) -{ - struct hlsl_ir_var *var; - - list_remove(&to_sort->extern_entry); - - LIST_FOR_EACH_ENTRY(var, sorted, struct hlsl_ir_var, extern_entry) + if (ins->dst_count != info->dst_count) { - if (strcmp(to_sort->name, var->name) < 0) - { - list_add_before(&var->extern_entry, &to_sort->extern_entry); - return; - } + vkd3d_shader_error(d3dbc->message_context, &ins->location, VKD3D_SHADER_ERROR_D3DBC_INVALID_REGISTER_COUNT, + "Invalid destination count %u for vsir instruction %#x (expected %u).", + ins->dst_count, ins->opcode, info->dst_count); + d3dbc->failed = true; + return NULL; } - - list_add_tail(sorted, &to_sort->extern_entry); -} - -static void sm1_sort_externs(struct hlsl_ctx *ctx) -{ - struct list sorted = LIST_INIT(sorted); - struct hlsl_ir_var *var, *next; - - LIST_FOR_EACH_ENTRY_SAFE(var, next, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + if (ins->src_count != info->src_count) { - if (var->is_uniform) - sm1_sort_extern(&sorted, var); + vkd3d_shader_error(d3dbc->message_context, &ins->location, VKD3D_SHADER_ERROR_D3DBC_INVALID_REGISTER_COUNT, + "Invalid source count %u for vsir instruction %#x (expected %u).", + ins->src_count, ins->opcode, info->src_count); + d3dbc->failed = true; + return NULL; } - list_move_tail(&ctx->extern_vars, &sorted); + + return info; } -void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer) +static void d3dbc_write_comment(struct d3dbc_compiler *d3dbc, + uint32_t tag, const struct vkd3d_shader_code *comment) { - size_t ctab_offset, ctab_start, ctab_end, vars_offset, vars_start, size_offset, creator_offset, offset; - unsigned int uniform_count = 0; - struct hlsl_ir_var *var; - - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) - { - unsigned int r; - - for (r = 0; r <= HLSL_REGSET_LAST; ++r) - { - if (var->semantic.name || !var->regs[r].allocated || !var->last_read) - continue; - - ++uniform_count; - - if (var->is_param && var->is_uniform) - { - char *new_name; - - if (!(new_name = hlsl_sprintf_alloc(ctx, "$%s", var->name))) - return; - vkd3d_free((char *)var->name); - var->name = new_name; - } - } - } - - sm1_sort_externs(ctx); - - size_offset = put_u32(buffer, 0); - ctab_offset = put_u32(buffer, VKD3D_MAKE_TAG('C','T','A','B')); - - ctab_start = put_u32(buffer, 7 * sizeof(uint32_t)); /* CTAB header size. */ - creator_offset = put_u32(buffer, 0); - put_u32(buffer, sm1_version(ctx->profile->type, ctx->profile->major_version, ctx->profile->minor_version)); - put_u32(buffer, uniform_count); - vars_offset = put_u32(buffer, 0); - put_u32(buffer, 0); /* FIXME: flags */ - put_u32(buffer, 0); /* FIXME: target string */ - - vars_start = bytecode_align(buffer); - set_u32(buffer, vars_offset, vars_start - ctab_start); - - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) - { - unsigned int r; - - for (r = 0; r <= HLSL_REGSET_LAST; ++r) - { - if (var->semantic.name || !var->regs[r].allocated || !var->last_read) - continue; - - put_u32(buffer, 0); /* name */ - if (r == HLSL_REGSET_NUMERIC) - { - put_u32(buffer, vkd3d_make_u32(D3DXRS_FLOAT4, var->regs[r].id)); - put_u32(buffer, var->bind_count[r]); - } - else - { - put_u32(buffer, vkd3d_make_u32(D3DXRS_SAMPLER, var->regs[r].index)); - put_u32(buffer, var->bind_count[r]); - } - put_u32(buffer, 0); /* type */ - put_u32(buffer, 0); /* default value */ - } - } - - uniform_count = 0; - - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) - { - unsigned int r; - - for (r = 0; r <= HLSL_REGSET_LAST; ++r) - { - size_t var_offset, name_offset; - - if (var->semantic.name || !var->regs[r].allocated || !var->last_read) - continue; - - var_offset = vars_start + (uniform_count * 5 * sizeof(uint32_t)); - - name_offset = put_string(buffer, var->name); - set_u32(buffer, var_offset, name_offset - ctab_start); - - write_sm1_type(buffer, var->data_type, ctab_start); - set_u32(buffer, var_offset + 3 * sizeof(uint32_t), var->data_type->bytecode_offset - ctab_start); - - if (var->default_values) - { - unsigned int reg_size = var->data_type->reg_size[HLSL_REGSET_NUMERIC]; - unsigned int comp_count = hlsl_type_component_count(var->data_type); - unsigned int default_value_offset; - unsigned int k; - - default_value_offset = bytecode_reserve_bytes(buffer, reg_size * sizeof(uint32_t)); - set_u32(buffer, var_offset + 4 * sizeof(uint32_t), default_value_offset - ctab_start); - - for (k = 0; k < comp_count; ++k) - { - struct hlsl_type *comp_type = hlsl_type_get_component_type(ctx, var->data_type, k); - unsigned int comp_offset; - enum hlsl_regset regset; - - comp_offset = hlsl_type_get_component_offset(ctx, var->data_type, k, ®set); - if (regset == HLSL_REGSET_NUMERIC) - { - union - { - uint32_t u; - float f; - } uni; - - switch (comp_type->e.numeric.type) - { - case HLSL_TYPE_DOUBLE: - if (ctx->double_as_float_alias) - uni.u = var->default_values[k].number.u; - else - uni.u = 0; - break; - - case HLSL_TYPE_INT: - uni.f = var->default_values[k].number.i; - break; - - case HLSL_TYPE_UINT: - case HLSL_TYPE_BOOL: - uni.f = var->default_values[k].number.u; - break; - - case HLSL_TYPE_HALF: - case HLSL_TYPE_FLOAT: - uni.u = var->default_values[k].number.u; - break; - - default: - vkd3d_unreachable(); - } - - set_u32(buffer, default_value_offset + comp_offset * sizeof(uint32_t), uni.u); - } - } - } + struct vkd3d_bytecode_buffer *buffer = &d3dbc->buffer; + size_t offset, start, end; - ++uniform_count; - } - } + offset = put_u32(buffer, 0); - offset = put_string(buffer, vkd3d_shader_get_version(NULL, NULL)); - set_u32(buffer, creator_offset, offset - ctab_start); + start = put_u32(buffer, tag); + bytecode_put_bytes(buffer, comment->code, comment->size); + end = bytecode_align(buffer); - ctab_end = bytecode_align(buffer); - set_u32(buffer, size_offset, vkd3d_make_u32(VKD3D_SM1_OP_COMMENT, (ctab_end - ctab_offset) / sizeof(uint32_t))); + set_u32(buffer, offset, vkd3d_make_u32(VKD3D_SM1_OP_COMMENT, (end - start) / sizeof(uint32_t))); } static uint32_t sm1_encode_register_type(enum vkd3d_shader_register_type type) @@ -1914,166 +1627,108 @@ static uint32_t sm1_encode_register_type(enum vkd3d_shader_register_type type) | ((type << VKD3D_SM1_REGISTER_TYPE_SHIFT2) & VKD3D_SM1_REGISTER_TYPE_MASK2); } -struct sm1_instruction +static uint32_t swizzle_from_vsir(uint32_t swizzle) { - enum vkd3d_sm1_opcode opcode; - unsigned int flags; - - struct sm1_dst_register - { - enum vkd3d_shader_register_type type; - enum vkd3d_shader_dst_modifier mod; - unsigned int writemask; - uint32_t reg; - } dst; + uint32_t x = vsir_swizzle_get_component(swizzle, 0); + uint32_t y = vsir_swizzle_get_component(swizzle, 1); + uint32_t z = vsir_swizzle_get_component(swizzle, 2); + uint32_t w = vsir_swizzle_get_component(swizzle, 3); - struct sm1_src_register - { - enum vkd3d_shader_register_type type; - enum vkd3d_shader_src_modifier mod; - unsigned int swizzle; - uint32_t reg; - } srcs[4]; - unsigned int src_count; + if (x & ~0x3u || y & ~0x3u || z & ~0x3u || w & ~0x3u) + ERR("Unexpected vsir swizzle: 0x%08x.\n", swizzle); - unsigned int has_dst; -}; + return ((x & 0x3u) << VKD3D_SM1_SWIZZLE_COMPONENT_SHIFT(0)) + | ((y & 0x3) << VKD3D_SM1_SWIZZLE_COMPONENT_SHIFT(1)) + | ((z & 0x3) << VKD3D_SM1_SWIZZLE_COMPONENT_SHIFT(2)) + | ((w & 0x3) << VKD3D_SM1_SWIZZLE_COMPONENT_SHIFT(3)); +} -static bool is_inconsequential_instr(const struct sm1_instruction *instr) +static bool is_inconsequential_instr(const struct vkd3d_shader_instruction *ins) { - const struct sm1_src_register *src = &instr->srcs[0]; - const struct sm1_dst_register *dst = &instr->dst; + const struct vkd3d_shader_dst_param *dst = &ins->dst[0]; + const struct vkd3d_shader_src_param *src = &ins->src[0]; unsigned int i; - if (instr->opcode != VKD3D_SM1_OP_MOV) + if (ins->opcode != VKD3DSIH_MOV) return false; - if (dst->mod != VKD3DSPDM_NONE) + if (dst->modifiers != VKD3DSPDM_NONE) return false; - if (src->mod != VKD3DSPSM_NONE) + if (src->modifiers != VKD3DSPSM_NONE) return false; - if (src->type != dst->type) + if (src->reg.type != dst->reg.type) return false; - if (src->reg != dst->reg) + if (src->reg.idx[0].offset != dst->reg.idx[0].offset) return false; for (i = 0; i < 4; ++i) { - if ((dst->writemask & (1 << i)) && (vsir_swizzle_get_component(src->swizzle, i) != i)) + if ((dst->write_mask & (1u << i)) && (vsir_swizzle_get_component(src->swizzle, i) != i)) return false; } return true; } -static void write_sm1_dst_register(struct vkd3d_bytecode_buffer *buffer, const struct sm1_dst_register *reg) +static void write_sm1_dst_register(struct vkd3d_bytecode_buffer *buffer, const struct vkd3d_shader_dst_param *reg) { - VKD3D_ASSERT(reg->writemask); + VKD3D_ASSERT(reg->write_mask); put_u32(buffer, VKD3D_SM1_INSTRUCTION_PARAMETER - | sm1_encode_register_type(reg->type) - | (reg->mod << VKD3D_SM1_DST_MODIFIER_SHIFT) - | (reg->writemask << VKD3D_SM1_WRITEMASK_SHIFT) | reg->reg); + | sm1_encode_register_type(reg->reg.type) + | (reg->modifiers << VKD3D_SM1_DST_MODIFIER_SHIFT) + | (reg->write_mask << VKD3D_SM1_WRITEMASK_SHIFT) | reg->reg.idx[0].offset); } -static void write_sm1_src_register(struct vkd3d_bytecode_buffer *buffer, - const struct sm1_src_register *reg) +static void write_sm1_src_register(struct vkd3d_bytecode_buffer *buffer, const struct vkd3d_shader_src_param *reg) { put_u32(buffer, VKD3D_SM1_INSTRUCTION_PARAMETER - | sm1_encode_register_type(reg->type) - | (reg->mod << VKD3D_SM1_SRC_MODIFIER_SHIFT) - | (reg->swizzle << VKD3D_SM1_SWIZZLE_SHIFT) | reg->reg); + | sm1_encode_register_type(reg->reg.type) + | (reg->modifiers << VKD3D_SM1_SRC_MODIFIER_SHIFT) + | (swizzle_from_vsir(reg->swizzle) << VKD3D_SM1_SWIZZLE_SHIFT) | reg->reg.idx[0].offset); } -static void d3dbc_write_instruction(struct d3dbc_compiler *d3dbc, const struct sm1_instruction *instr) +static void d3dbc_write_instruction(struct d3dbc_compiler *d3dbc, const struct vkd3d_shader_instruction *ins) { const struct vkd3d_shader_version *version = &d3dbc->program->shader_version; struct vkd3d_bytecode_buffer *buffer = &d3dbc->buffer; - uint32_t token = instr->opcode; + const struct vkd3d_sm1_opcode_info *info; unsigned int i; + uint32_t token; + + if (!(info = shader_sm1_get_opcode_info_from_vsir_instruction(d3dbc, ins))) + return; - if (is_inconsequential_instr(instr)) + if (is_inconsequential_instr(ins)) return; - token |= VKD3D_SM1_INSTRUCTION_FLAGS_MASK & (instr->flags << VKD3D_SM1_INSTRUCTION_FLAGS_SHIFT); + token = info->sm1_opcode; + token |= VKD3D_SM1_INSTRUCTION_FLAGS_MASK & (ins->flags << VKD3D_SM1_INSTRUCTION_FLAGS_SHIFT); if (version->major > 1) - token |= (instr->has_dst + instr->src_count) << VKD3D_SM1_INSTRUCTION_LENGTH_SHIFT; + token |= (ins->dst_count + ins->src_count) << VKD3D_SM1_INSTRUCTION_LENGTH_SHIFT; put_u32(buffer, token); - if (instr->has_dst) - write_sm1_dst_register(buffer, &instr->dst); - - for (i = 0; i < instr->src_count; ++i) - write_sm1_src_register(buffer, &instr->srcs[i]); -}; - -static const struct vkd3d_sm1_opcode_info *shader_sm1_get_opcode_info_from_vsir( - struct d3dbc_compiler *d3dbc, enum vkd3d_shader_opcode vkd3d_opcode) -{ - const struct vkd3d_shader_version *version = &d3dbc->program->shader_version; - const struct vkd3d_sm1_opcode_info *info; - unsigned int i = 0; - - for (;;) - { - info = &d3dbc->opcode_table[i++]; - if (info->vkd3d_opcode == VKD3DSIH_INVALID) - return NULL; - - if (vkd3d_opcode == info->vkd3d_opcode - && vkd3d_shader_ver_ge(version, info->min_version.major, info->min_version.minor) - && (vkd3d_shader_ver_le(version, info->max_version.major, info->max_version.minor) - || !info->max_version.major)) - return info; - } -} - -static uint32_t swizzle_from_vsir(uint32_t swizzle) -{ - uint32_t x = vsir_swizzle_get_component(swizzle, 0); - uint32_t y = vsir_swizzle_get_component(swizzle, 1); - uint32_t z = vsir_swizzle_get_component(swizzle, 2); - uint32_t w = vsir_swizzle_get_component(swizzle, 3); - - if (x & ~0x3u || y & ~0x3u || z & ~0x3u || w & ~0x3u) - ERR("Unexpected vsir swizzle: 0x%08x.\n", swizzle); - - return ((x & 0x3u) << VKD3D_SM1_SWIZZLE_COMPONENT_SHIFT(0)) - | ((y & 0x3) << VKD3D_SM1_SWIZZLE_COMPONENT_SHIFT(1)) - | ((z & 0x3) << VKD3D_SM1_SWIZZLE_COMPONENT_SHIFT(2)) - | ((w & 0x3) << VKD3D_SM1_SWIZZLE_COMPONENT_SHIFT(3)); -} - -static void sm1_src_reg_from_vsir(struct d3dbc_compiler *d3dbc, const struct vkd3d_shader_src_param *param, - struct sm1_src_register *src, const struct vkd3d_shader_location *loc) -{ - src->mod = param->modifiers; - src->reg = param->reg.idx[0].offset; - src->type = param->reg.type; - src->swizzle = swizzle_from_vsir(param->swizzle); - - if (param->reg.idx[0].rel_addr) + for (i = 0; i < ins->dst_count; ++i) { - vkd3d_shader_error(d3dbc->message_context, loc, VKD3D_SHADER_ERROR_D3DBC_NOT_IMPLEMENTED, - "Unhandled relative addressing on source register."); - d3dbc->failed = true; + if (ins->dst[i].reg.idx[0].rel_addr) + { + vkd3d_shader_error(d3dbc->message_context, &ins->location, VKD3D_SHADER_ERROR_D3DBC_NOT_IMPLEMENTED, + "Unhandled relative addressing on destination register."); + d3dbc->failed = true; + } + write_sm1_dst_register(buffer, &ins->dst[i]); } -} - -static void sm1_dst_reg_from_vsir(struct d3dbc_compiler *d3dbc, const struct vkd3d_shader_dst_param *param, - struct sm1_dst_register *dst, const struct vkd3d_shader_location *loc) -{ - dst->mod = param->modifiers; - dst->reg = param->reg.idx[0].offset; - dst->type = param->reg.type; - dst->writemask = param->write_mask; - if (param->reg.idx[0].rel_addr) + for (i = 0; i < ins->src_count; ++i) { - vkd3d_shader_error(d3dbc->message_context, loc, VKD3D_SHADER_ERROR_D3DBC_NOT_IMPLEMENTED, - "Unhandled relative addressing on destination register."); - d3dbc->failed = true; + if (ins->src[i].reg.idx[0].rel_addr) + { + vkd3d_shader_error(d3dbc->message_context, &ins->location, VKD3D_SHADER_ERROR_D3DBC_NOT_IMPLEMENTED, + "Unhandled relative addressing on source register."); + d3dbc->failed = true; + } + write_sm1_src_register(buffer, &ins->src[i]); } -} +}; static void d3dbc_write_vsir_def(struct d3dbc_compiler *d3dbc, const struct vkd3d_shader_instruction *ins) { @@ -2081,11 +1736,11 @@ static void d3dbc_write_vsir_def(struct d3dbc_compiler *d3dbc, const struct vkd3 struct vkd3d_bytecode_buffer *buffer = &d3dbc->buffer; uint32_t token; - const struct sm1_dst_register reg = + const struct vkd3d_shader_dst_param reg = { - .type = VKD3DSPR_CONST, - .writemask = VKD3DSP_WRITEMASK_ALL, - .reg = ins->dst[0].reg.idx[0].offset, + .reg.type = VKD3DSPR_CONST, + .write_mask = VKD3DSP_WRITEMASK_ALL, + .reg.idx[0].offset = ins->dst[0].reg.idx[0].offset, }; token = VKD3D_SM1_OP_DEF; @@ -2103,7 +1758,7 @@ static void d3dbc_write_vsir_sampler_dcl(struct d3dbc_compiler *d3dbc, { const struct vkd3d_shader_version *version = &d3dbc->program->shader_version; struct vkd3d_bytecode_buffer *buffer = &d3dbc->buffer; - struct sm1_dst_register reg = {0}; + struct vkd3d_shader_dst_param reg = {0}; uint32_t token; token = VKD3D_SM1_OP_DCL; @@ -2115,9 +1770,9 @@ static void d3dbc_write_vsir_sampler_dcl(struct d3dbc_compiler *d3dbc, token |= res_type << VKD3D_SM1_RESOURCE_TYPE_SHIFT; put_u32(buffer, token); - reg.type = VKD3DSPR_COMBINED_SAMPLER; - reg.writemask = VKD3DSP_WRITEMASK_ALL; - reg.reg = reg_id; + reg.reg.type = VKD3DSPR_COMBINED_SAMPLER; + reg.write_mask = VKD3DSP_WRITEMASK_ALL; + reg.reg.idx[0].offset = reg_id; write_sm1_dst_register(buffer, ®); } @@ -2163,61 +1818,6 @@ static void d3dbc_write_vsir_dcl(struct d3dbc_compiler *d3dbc, const struct vkd3 } } -static const struct vkd3d_sm1_opcode_info *shader_sm1_get_opcode_info_from_vsir_instruction( - struct d3dbc_compiler *d3dbc, const struct vkd3d_shader_instruction *ins) -{ - const struct vkd3d_sm1_opcode_info *info; - - if (!(info = shader_sm1_get_opcode_info_from_vsir(d3dbc, ins->opcode))) - { - vkd3d_shader_error(d3dbc->message_context, &ins->location, VKD3D_SHADER_ERROR_D3DBC_INVALID_OPCODE, - "Opcode %#x not supported for shader profile.", ins->opcode); - d3dbc->failed = true; - return NULL; - } - - if (ins->dst_count != info->dst_count) - { - vkd3d_shader_error(d3dbc->message_context, &ins->location, VKD3D_SHADER_ERROR_D3DBC_INVALID_REGISTER_COUNT, - "Invalid destination count %u for vsir instruction %#x (expected %u).", - ins->dst_count, ins->opcode, info->dst_count); - d3dbc->failed = true; - return NULL; - } - if (ins->src_count != info->src_count) - { - vkd3d_shader_error(d3dbc->message_context, &ins->location, VKD3D_SHADER_ERROR_D3DBC_INVALID_REGISTER_COUNT, - "Invalid source count %u for vsir instruction %#x (expected %u).", - ins->src_count, ins->opcode, info->src_count); - d3dbc->failed = true; - return NULL; - } - - return info; -} - -static void d3dbc_write_vsir_simple_instruction(struct d3dbc_compiler *d3dbc, - const struct vkd3d_shader_instruction *ins) -{ - struct sm1_instruction instr = {0}; - const struct vkd3d_sm1_opcode_info *info; - - if (!(info = shader_sm1_get_opcode_info_from_vsir_instruction(d3dbc, ins))) - return; - - instr.opcode = info->sm1_opcode; - instr.flags = ins->flags; - instr.has_dst = info->dst_count; - instr.src_count = info->src_count; - - if (instr.has_dst) - sm1_dst_reg_from_vsir(d3dbc, &ins->dst[0], &instr.dst, &ins->location); - for (unsigned int i = 0; i < instr.src_count; ++i) - sm1_src_reg_from_vsir(d3dbc, &ins->src[i], &instr.srcs[i], &ins->location); - - d3dbc_write_instruction(d3dbc, &instr); -} - static void d3dbc_write_vsir_instruction(struct d3dbc_compiler *d3dbc, const struct vkd3d_shader_instruction *ins) { uint32_t writemask; @@ -2254,7 +1854,7 @@ static void d3dbc_write_vsir_instruction(struct d3dbc_compiler *d3dbc, const str case VKD3DSIH_TEX: case VKD3DSIH_TEXKILL: case VKD3DSIH_TEXLDD: - d3dbc_write_vsir_simple_instruction(d3dbc, ins); + d3dbc_write_instruction(d3dbc, ins); break; case VKD3DSIH_EXP: @@ -2271,7 +1871,7 @@ static void d3dbc_write_vsir_instruction(struct d3dbc_compiler *d3dbc, const str writemask, ins->opcode); d3dbc->failed = true; } - d3dbc_write_vsir_simple_instruction(d3dbc, ins); + d3dbc_write_instruction(d3dbc, ins); break; default: @@ -2287,13 +1887,13 @@ static void d3dbc_write_semantic_dcl(struct d3dbc_compiler *d3dbc, { const struct vkd3d_shader_version *version = &d3dbc->program->shader_version; struct vkd3d_bytecode_buffer *buffer = &d3dbc->buffer; - struct sm1_dst_register reg = {0}; + struct vkd3d_shader_dst_param reg = {0}; enum vkd3d_decl_usage usage; uint32_t token, usage_idx; bool ret; if (sm1_register_from_semantic_name(version, element->semantic_name, - element->semantic_index, output, ®.type, ®.reg)) + element->semantic_index, output, ®.reg.type, ®.reg.idx[0].offset)) { usage = 0; usage_idx = 0; @@ -2302,8 +1902,8 @@ static void d3dbc_write_semantic_dcl(struct d3dbc_compiler *d3dbc, { ret = sm1_usage_from_semantic_name(element->semantic_name, element->semantic_index, &usage, &usage_idx); VKD3D_ASSERT(ret); - reg.type = output ? VKD3DSPR_OUTPUT : VKD3DSPR_INPUT; - reg.reg = element->register_index; + reg.reg.type = output ? VKD3DSPR_OUTPUT : VKD3DSPR_INPUT; + reg.reg.idx[0].offset = element->register_index; } token = VKD3D_SM1_OP_DCL; @@ -2316,7 +1916,7 @@ static void d3dbc_write_semantic_dcl(struct d3dbc_compiler *d3dbc, token |= usage_idx << VKD3D_SM1_DCL_USAGE_INDEX_SHIFT; put_u32(buffer, token); - reg.writemask = element->mask; + reg.write_mask = element->mask; write_sm1_dst_register(buffer, ®); } @@ -2384,9 +1984,7 @@ int d3dbc_compile(struct vsir_program *program, uint64_t config_flags, } put_u32(buffer, sm1_version(version->type, version->major, version->minor)); - - bytecode_put_bytes(buffer, ctab->code, ctab->size); - + d3dbc_write_comment(&d3dbc, VKD3D_MAKE_TAG('C','T','A','B'), ctab); d3dbc_write_semantic_dcls(&d3dbc); d3dbc_write_program_instructions(&d3dbc); diff --git a/libs/vkd3d/libs/vkd3d-shader/dxbc.c b/libs/vkd3d/libs/vkd3d-shader/dxbc.c index f6ac8e0829e..81af62f7810 100644 --- a/libs/vkd3d/libs/vkd3d-shader/dxbc.c +++ b/libs/vkd3d/libs/vkd3d-shader/dxbc.c @@ -115,6 +115,14 @@ static uint32_t read_u32(const char **ptr) return ret; } +static uint64_t read_u64(const char **ptr) +{ + uint64_t ret; + memcpy(&ret, *ptr, sizeof(ret)); + *ptr += sizeof(ret); + return ret; +} + static float read_float(const char **ptr) { union @@ -502,6 +510,28 @@ int shader_parse_input_signature(const struct vkd3d_shader_code *dxbc, return ret; } +static int shdr_parse_features(const struct vkd3d_shader_dxbc_section_desc *section, + struct vkd3d_shader_message_context *message_context, struct vsir_features *f) +{ + const char *data = section->data.code; + const char *ptr = data; + uint64_t flags; + + if (!require_space(0, 1, sizeof(uint64_t), section->data.size)) + { + WARN("Invalid data size %#zx.\n", section->data.size); + vkd3d_shader_error(message_context, NULL, VKD3D_SHADER_ERROR_DXBC_INVALID_CHUNK_SIZE, + "SFI0 section size %zu is too small to contain flags.\n", section->data.size); + return VKD3D_ERROR_INVALID_ARGUMENT; + } + flags = read_u64(&ptr); + + if (flags & DXBC_SFI0_REQUIRES_ROVS) + f->rovs = true; + + return VKD3D_OK; +} + static int shdr_handler(const struct vkd3d_shader_dxbc_section_desc *section, struct vkd3d_shader_message_context *message_context, void *context) { @@ -558,6 +588,11 @@ static int shdr_handler(const struct vkd3d_shader_dxbc_section_desc *section, desc->byte_code_size = section->data.size; break; + case TAG_SFI0: + if ((ret = shdr_parse_features(section, message_context, &desc->features)) < 0) + return ret; + break; + case TAG_AON9: TRACE("Skipping AON9 shader code chunk.\n"); break; diff --git a/libs/vkd3d/libs/vkd3d-shader/dxil.c b/libs/vkd3d/libs/vkd3d-shader/dxil.c index 7099bcc9ce2..4493602dfb7 100644 --- a/libs/vkd3d/libs/vkd3d-shader/dxil.c +++ b/libs/vkd3d/libs/vkd3d-shader/dxil.c @@ -3824,7 +3824,7 @@ static void src_params_init_from_operands(struct vkd3d_shader_src_param *src_par } static enum vkd3d_shader_register_type register_type_from_dxil_semantic_kind( - enum vkd3d_shader_sysval_semantic sysval_semantic) + enum vkd3d_shader_sysval_semantic sysval_semantic, bool is_input) { switch (sysval_semantic) { @@ -3834,7 +3834,7 @@ static enum vkd3d_shader_register_type register_type_from_dxil_semantic_kind( case VKD3D_SHADER_SV_SAMPLE_INDEX: return VKD3DSPR_NULL; case VKD3D_SHADER_SV_COVERAGE: - return VKD3DSPR_COVERAGE; + return is_input ? VKD3DSPR_COVERAGE : VKD3DSPR_SAMPLEMASK; case VKD3D_SHADER_SV_DEPTH: return VKD3DSPR_DEPTHOUT; case VKD3D_SHADER_SV_DEPTH_GREATER_EQUAL: @@ -3884,7 +3884,7 @@ static void sm6_parser_init_signature(struct sm6_parser *sm6, const struct shade param = ¶ms[i]; if (e->register_index == UINT_MAX - && (io_reg_type = register_type_from_dxil_semantic_kind(e->sysval_semantic)) != VKD3DSPR_NULL) + && (io_reg_type = register_type_from_dxil_semantic_kind(e->sysval_semantic, is_input)) != VKD3DSPR_NULL) { dst_param_io_init(param, e, io_reg_type); continue; @@ -9348,7 +9348,7 @@ static void signature_element_read_additional_element_values(struct signature_el } static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const struct sm6_metadata_value *m, - struct shader_signature *s, enum vkd3d_tessellator_domain tessellator_domain) + struct shader_signature *s, enum vkd3d_tessellator_domain tessellator_domain, bool is_input) { unsigned int i, j, column_count, operand_count, index; const struct sm6_metadata_node *node, *element_node; @@ -9466,7 +9466,7 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const if ((is_register = e->register_index == UINT_MAX)) { - if (register_type_from_dxil_semantic_kind(e->sysval_semantic) == VKD3DSPR_INVALID) + if (register_type_from_dxil_semantic_kind(e->sysval_semantic, is_input) == VKD3DSPR_INVALID) { WARN("Unhandled I/O register semantic kind %u.\n", j); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_SIGNATURE, @@ -9578,17 +9578,17 @@ static enum vkd3d_result sm6_parser_signatures_init(struct sm6_parser *sm6, cons } if (m->u.node->operand_count && (ret = sm6_parser_read_signature(sm6, m->u.node->operands[0], - &program->input_signature, tessellator_domain)) < 0) + &program->input_signature, tessellator_domain, true)) < 0) { return ret; } if (m->u.node->operand_count > 1 && (ret = sm6_parser_read_signature(sm6, m->u.node->operands[1], - &program->output_signature, tessellator_domain)) < 0) + &program->output_signature, tessellator_domain, false)) < 0) { return ret; } if (m->u.node->operand_count > 1 && (ret = sm6_parser_read_signature(sm6, m->u.node->operands[2], - &program->patch_constant_signature, tessellator_domain)) < 0) + &program->patch_constant_signature, tessellator_domain, false)) < 0) { return ret; } @@ -9717,12 +9717,13 @@ static void sm6_parser_emit_dcl_tessellator_domain(struct sm6_parser *sm6, ins = sm6_parser_add_instruction(sm6, VKD3DSIH_DCL_TESSELLATOR_DOMAIN); ins->declaration.tessellator_domain = tessellator_domain; + sm6->p.program->tess_domain = tessellator_domain; } -static void sm6_parser_validate_control_point_count(struct sm6_parser *sm6, unsigned int count, - const char *type) +static void sm6_parser_validate_control_point_count(struct sm6_parser *sm6, + unsigned int count, bool allow_zero, const char *type) { - if (!count || count > 32) + if ((!count && !allow_zero) || count > 32) { WARN("%s control point count %u invalid.\n", type, count); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, @@ -9744,6 +9745,8 @@ static void sm6_parser_emit_dcl_tessellator_partitioning(struct sm6_parser *sm6, ins = sm6_parser_add_instruction(sm6, VKD3DSIH_DCL_TESSELLATOR_PARTITIONING); ins->declaration.tessellator_partitioning = tessellator_partitioning; + + sm6->p.program->tess_partitioning = tessellator_partitioning; } static void sm6_parser_emit_dcl_tessellator_output_primitive(struct sm6_parser *sm6, @@ -9760,6 +9763,8 @@ static void sm6_parser_emit_dcl_tessellator_output_primitive(struct sm6_parser * ins = sm6_parser_add_instruction(sm6, VKD3DSIH_DCL_TESSELLATOR_OUTPUT_PRIMITIVE); ins->declaration.tessellator_output_primitive = primitive; + + sm6->p.program->tess_output_primitive = primitive; } static void sm6_parser_emit_dcl_max_tessellation_factor(struct sm6_parser *sm6, struct sm6_metadata_value *m) @@ -9951,7 +9956,7 @@ static enum vkd3d_tessellator_domain sm6_parser_ds_properties_init(struct sm6_pa } sm6_parser_emit_dcl_tessellator_domain(sm6, operands[0]); - sm6_parser_validate_control_point_count(sm6, operands[1], "Domain shader input"); + sm6_parser_validate_control_point_count(sm6, operands[1], true, "Domain shader input"); sm6->p.program->input_control_point_count = operands[1]; return operands[0]; @@ -10010,9 +10015,9 @@ static enum vkd3d_tessellator_domain sm6_parser_hs_properties_init(struct sm6_pa } } - sm6_parser_validate_control_point_count(sm6, operands[1], "Hull shader input"); + sm6_parser_validate_control_point_count(sm6, operands[1], false, "Hull shader input"); program->input_control_point_count = operands[1]; - sm6_parser_validate_control_point_count(sm6, operands[2], "Hull shader output"); + sm6_parser_validate_control_point_count(sm6, operands[2], false, "Hull shader output"); sm6_parser_emit_dcl_count(sm6, VKD3DSIH_DCL_OUTPUT_CONTROL_POINT_COUNT, operands[2]); program->output_control_point_count = operands[2]; sm6_parser_emit_dcl_tessellator_domain(sm6, operands[3]); @@ -10351,7 +10356,7 @@ static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, struct vsir_pro /* Estimate instruction count to avoid reallocation in most shaders. */ count = max(token_count, 400) - 400; if (!vsir_program_init(program, compile_info, &version, - (count + (count >> 2)) / 2u + 10, VSIR_CF_BLOCKS, VSIR_FULLY_NORMALISED_IO)) + (count + (count >> 2)) / 2u + 10, VSIR_CF_BLOCKS, VSIR_NORMALISED_SM6)) return VKD3D_ERROR_OUT_OF_MEMORY; vkd3d_shader_parser_init(&sm6->p, program, message_context, compile_info->source_name); sm6->ptr = &sm6->start[1]; @@ -10378,6 +10383,7 @@ static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, struct vsir_pro *input_signature = dxbc_desc->input_signature; *output_signature = dxbc_desc->output_signature; *patch_constant_signature = dxbc_desc->patch_constant_signature; + program->features = dxbc_desc->features; memset(dxbc_desc, 0, sizeof(*dxbc_desc)); block = &sm6->root_block; diff --git a/libs/vkd3d/libs/vkd3d-shader/fx.c b/libs/vkd3d/libs/vkd3d-shader/fx.c index e22177e1e30..3795add87c7 100644 --- a/libs/vkd3d/libs/vkd3d-shader/fx.c +++ b/libs/vkd3d/libs/vkd3d-shader/fx.c @@ -36,6 +36,16 @@ struct fx_4_binary_type uint32_t typeinfo; }; +struct fx_5_shader +{ + uint32_t offset; + uint32_t sodecl[4]; + uint32_t sodecl_count; + uint32_t rast_stream; + uint32_t iface_bindings_count; + uint32_t iface_bindings; +}; + struct string_entry { struct rb_entry entry; @@ -550,6 +560,8 @@ enum fx_4_type_constants FX_4_ASSIGNMENT_VARIABLE = 0x2, FX_4_ASSIGNMENT_ARRAY_CONSTANT_INDEX = 0x3, FX_4_ASSIGNMENT_ARRAY_VARIABLE_INDEX = 0x4, + FX_4_ASSIGNMENT_INLINE_SHADER = 0x7, + FX_5_ASSIGNMENT_INLINE_SHADER = 0x8, }; static const uint32_t fx_4_numeric_base_types[] = @@ -762,6 +774,7 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co case HLSL_CLASS_TECHNIQUE: case HLSL_CLASS_CONSTANT_BUFFER: case HLSL_CLASS_NULL: + case HLSL_CLASS_STREAM_OUTPUT: vkd3d_unreachable(); case HLSL_CLASS_VOID: @@ -1008,8 +1021,8 @@ static uint32_t get_fx_2_type_class(const struct hlsl_type *type) return hlsl_sm1_class(type); } -static uint32_t write_fx_2_parameter(const struct hlsl_type *type, const char *name, const struct hlsl_semantic *semantic, - struct fx_write_context *fx) +static uint32_t write_fx_2_parameter(const struct hlsl_type *type, const char *name, + const struct hlsl_semantic *semantic, bool is_combined_sampler, struct fx_write_context *fx) { struct vkd3d_bytecode_buffer *buffer = &fx->unstructured; uint32_t semantic_offset, offset, elements_count = 0, name_offset; @@ -1025,7 +1038,7 @@ static uint32_t write_fx_2_parameter(const struct hlsl_type *type, const char *n name_offset = write_string(name, fx); semantic_offset = semantic->raw_name ? write_string(semantic->raw_name, fx) : 0; - offset = put_u32(buffer, hlsl_sm1_base_type(type)); + offset = put_u32(buffer, hlsl_sm1_base_type(type, is_combined_sampler)); put_u32(buffer, get_fx_2_type_class(type)); put_u32(buffer, name_offset); put_u32(buffer, semantic_offset); @@ -1061,7 +1074,7 @@ static uint32_t write_fx_2_parameter(const struct hlsl_type *type, const char *n /* Validated in check_invalid_object_fields(). */ VKD3D_ASSERT(hlsl_is_numeric_type(field->type)); - write_fx_2_parameter(field->type, field->name, &field->semantic, fx); + write_fx_2_parameter(field->type, field->name, &field->semantic, false, fx); } } @@ -1298,6 +1311,7 @@ static bool is_type_supported_fx_2(struct hlsl_ctx *ctx, const struct hlsl_type case HLSL_CLASS_TECHNIQUE: case HLSL_CLASS_CONSTANT_BUFFER: case HLSL_CLASS_NULL: + case HLSL_CLASS_STREAM_OUTPUT: /* This cannot appear as an extern variable. */ break; } @@ -1321,7 +1335,7 @@ static void write_fx_2_parameters(struct fx_write_context *fx) if (!is_type_supported_fx_2(ctx, var->data_type, &var->loc)) continue; - desc_offset = write_fx_2_parameter(var->data_type, var->name, &var->semantic, fx); + desc_offset = write_fx_2_parameter(var->data_type, var->name, &var->semantic, var->is_combined_sampler, fx); value_offset = write_fx_2_initial_value(var, fx); flags = 0; @@ -1344,7 +1358,7 @@ static void write_fx_2_annotation(struct hlsl_ir_var *var, struct fx_write_conte struct vkd3d_bytecode_buffer *buffer = &fx->structured; uint32_t desc_offset, value_offset; - desc_offset = write_fx_2_parameter(var->data_type, var->name, &var->semantic, fx); + desc_offset = write_fx_2_parameter(var->data_type, var->name, &var->semantic, var->is_combined_sampler, fx); value_offset = write_fx_2_initial_value(var, fx); put_u32(buffer, desc_offset); @@ -1834,6 +1848,7 @@ enum state_property_component_type FX_BLEND, FX_VERTEXSHADER, FX_PIXELSHADER, + FX_GEOMETRYSHADER, FX_COMPONENT_TYPE_COUNT, }; @@ -2065,6 +2080,7 @@ fx_4_states[] = { "VertexShader", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_VERTEXSHADER, 1, 1, 6 }, { "PixelShader", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_PIXELSHADER, 1, 1, 7 }, + { "GeometryShader", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_GEOMETRYSHADER, 1, 1, 8 }, { "DS_StencilRef", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 9 }, { "AB_BlendFactor", HLSL_CLASS_PASS, HLSL_CLASS_VECTOR, FX_FLOAT, 4, 1, 10 }, { "AB_SampleMask", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 11 }, @@ -2951,7 +2967,7 @@ static void VKD3D_PRINTF_FUNC(3, 4) fx_parser_error(struct fx_parser *parser, en static int fx_2_parse(struct fx_parser *parser) { - fx_parser_error(parser, VKD3D_SHADER_ERROR_FX_NOT_IMPLEMENTED, "Parsing fx_2_0 binaries is not implemented.\n"); + fx_parser_error(parser, VKD3D_SHADER_ERROR_FX_NOT_IMPLEMENTED, "Parsing fx_2_0 binaries is not implemented."); return -1; } @@ -3120,7 +3136,7 @@ static void fx_parse_fx_4_annotations(struct fx_parser *parser) else { fx_parser_error(parser, VKD3D_SHADER_ERROR_FX_INVALID_DATA, - "Only numeric and string types are supported in annotations.\n"); + "Only numeric and string types are supported in annotations."); } if (type.element_count) @@ -3210,27 +3226,13 @@ static void fx_parse_buffers(struct fx_parser *parser) } } -static void fx_4_parse_shader_initializer(struct fx_parser *parser, unsigned int object_type) +static void fx_4_parse_shader_blob(struct fx_parser *parser, unsigned int object_type, const struct fx_5_shader *shader) { struct vkd3d_shader_compile_info info = { 0 }; struct vkd3d_shader_code output; - uint32_t data_size, offset; const void *data = NULL; const char *p, *q, *end; - struct fx_5_shader - { - uint32_t offset; - uint32_t sodecl[4]; - uint32_t sodecl_count; - uint32_t rast_stream; - uint32_t iface_bindings_count; - uint32_t iface_bindings; - } shader5; - struct fx_4_gs_so - { - uint32_t offset; - uint32_t sodecl; - } gs_so; + uint32_t data_size; int ret; static const struct vkd3d_shader_compile_option options[] = @@ -3238,35 +3240,9 @@ static void fx_4_parse_shader_initializer(struct fx_parser *parser, unsigned int {VKD3D_SHADER_COMPILE_OPTION_API_VERSION, VKD3D_SHADER_API_VERSION_1_14}, }; - switch (object_type) - { - case FX_4_OBJECT_TYPE_PIXEL_SHADER: - case FX_4_OBJECT_TYPE_VERTEX_SHADER: - case FX_4_OBJECT_TYPE_GEOMETRY_SHADER: - offset = fx_parser_read_u32(parser); - break; - - case FX_4_OBJECT_TYPE_GEOMETRY_SHADER_SO: - fx_parser_read_u32s(parser, &gs_so, sizeof(gs_so)); - offset = gs_so.offset; - break; - - case FX_5_OBJECT_TYPE_GEOMETRY_SHADER: - case FX_5_OBJECT_TYPE_COMPUTE_SHADER: - case FX_5_OBJECT_TYPE_HULL_SHADER: - case FX_5_OBJECT_TYPE_DOMAIN_SHADER: - fx_parser_read_u32s(parser, &shader5, sizeof(shader5)); - offset = shader5.offset; - break; - - default: - parser->failed = true; - return; - } - - fx_parser_read_unstructured(parser, &data_size, offset, sizeof(data_size)); + fx_parser_read_unstructured(parser, &data_size, shader->offset, sizeof(data_size)); if (data_size) - data = fx_parser_get_unstructured_ptr(parser, offset + 4, data_size); + data = fx_parser_get_unstructured_ptr(parser, shader->offset + 4, data_size); if (!data) return; @@ -3283,7 +3259,7 @@ static void fx_4_parse_shader_initializer(struct fx_parser *parser, unsigned int if ((ret = vkd3d_shader_compile(&info, &output, NULL)) < 0) { fx_parser_error(parser, VKD3D_SHADER_ERROR_FX_INVALID_DATA, - "Failed to disassemble shader blob.\n"); + "Failed to disassemble shader blob."); return; } parse_fx_print_indent(parser); @@ -3307,26 +3283,58 @@ static void fx_4_parse_shader_initializer(struct fx_parser *parser, unsigned int parse_fx_print_indent(parser); vkd3d_string_buffer_printf(&parser->buffer, "}"); - if (object_type == FX_4_OBJECT_TYPE_GEOMETRY_SHADER_SO && gs_so.sodecl) + if (object_type == FX_4_OBJECT_TYPE_GEOMETRY_SHADER_SO && shader->sodecl[0]) { vkd3d_string_buffer_printf(&parser->buffer, "\n/* Stream output declaration: \"%s\" */", - fx_4_get_string(parser, gs_so.sodecl)); + fx_4_get_string(parser, shader->sodecl[0])); } else if (object_type == FX_5_OBJECT_TYPE_GEOMETRY_SHADER) { - for (unsigned int i = 0; i < ARRAY_SIZE(shader5.sodecl); ++i) + for (unsigned int i = 0; i < ARRAY_SIZE(shader->sodecl); ++i) { - if (shader5.sodecl[i]) + if (shader->sodecl[i]) vkd3d_string_buffer_printf(&parser->buffer, "\n/* Stream output %u declaration: \"%s\" */", - i, fx_4_get_string(parser, shader5.sodecl[i])); + i, fx_4_get_string(parser, shader->sodecl[i])); } - if (shader5.sodecl_count) - vkd3d_string_buffer_printf(&parser->buffer, "\n/* Rasterized stream %u */", shader5.rast_stream); + if (shader->sodecl_count) + vkd3d_string_buffer_printf(&parser->buffer, "\n/* Rasterized stream %u */", shader->rast_stream); } vkd3d_shader_free_shader_code(&output); } +static void fx_4_parse_shader_initializer(struct fx_parser *parser, unsigned int object_type) +{ + struct fx_5_shader shader = { 0 }; + + switch (object_type) + { + case FX_4_OBJECT_TYPE_PIXEL_SHADER: + case FX_4_OBJECT_TYPE_VERTEX_SHADER: + case FX_4_OBJECT_TYPE_GEOMETRY_SHADER: + shader.offset = fx_parser_read_u32(parser); + break; + + case FX_4_OBJECT_TYPE_GEOMETRY_SHADER_SO: + shader.offset = fx_parser_read_u32(parser); + shader.sodecl[0] = fx_parser_read_u32(parser); + break; + + case FX_5_OBJECT_TYPE_GEOMETRY_SHADER: + case FX_5_OBJECT_TYPE_COMPUTE_SHADER: + case FX_5_OBJECT_TYPE_HULL_SHADER: + case FX_5_OBJECT_TYPE_DOMAIN_SHADER: + fx_parser_read_u32s(parser, &shader, sizeof(shader)); + break; + + default: + parser->failed = true; + return; + } + + fx_4_parse_shader_blob(parser, object_type, &shader); +} + static bool fx_4_object_has_initializer(const struct fx_4_binary_type *type) { switch (type->typeinfo) @@ -3390,6 +3398,8 @@ static void fx_4_parse_state_object_initializer(struct fx_parser *parser, uint32 [FX_UINT8] = "byte", }; const struct rhs_named_value *named_value; + struct fx_5_shader shader = { 0 }; + unsigned int shader_type = 0; uint32_t i, j, comp_count; struct fx_4_state *state; @@ -3400,7 +3410,7 @@ static void fx_4_parse_state_object_initializer(struct fx_parser *parser, uint32 if (!(state = bsearch(&entry.id, fx_4_states, ARRAY_SIZE(fx_4_states), sizeof(*fx_4_states), fx_4_state_id_compare))) { - fx_parser_error(parser, VKD3D_SHADER_ERROR_FX_INVALID_DATA, "Unrecognized state id %#x.\n", entry.id); + fx_parser_error(parser, VKD3D_SHADER_ERROR_FX_INVALID_DATA, "Unrecognized state id %#x.", entry.id); break; } @@ -3486,9 +3496,38 @@ static void fx_4_parse_state_object_initializer(struct fx_parser *parser, uint32 vkd3d_string_buffer_printf(&parser->buffer, "%s[%s]", fx_4_get_string(parser, index.name), fx_4_get_string(parser, index.index)); break; + case FX_4_ASSIGNMENT_INLINE_SHADER: + case FX_5_ASSIGNMENT_INLINE_SHADER: + { + bool shader5 = entry.type == FX_5_ASSIGNMENT_INLINE_SHADER; + + if (shader5) + fx_parser_read_unstructured(parser, &shader, entry.value, sizeof(shader)); + else + fx_parser_read_unstructured(parser, &shader, entry.value, 2 * sizeof(uint32_t)); + + if (state->type == FX_PIXELSHADER) + shader_type = FX_4_OBJECT_TYPE_PIXEL_SHADER; + else if (state->type == FX_VERTEXSHADER) + shader_type = FX_4_OBJECT_TYPE_VERTEX_SHADER; + else if (state->type == FX_GEOMETRYSHADER) + shader_type = shader5 ? FX_5_OBJECT_TYPE_GEOMETRY_SHADER : FX_4_OBJECT_TYPE_GEOMETRY_SHADER_SO; + else if (state->type == FX_HULLSHADER) + shader_type = FX_5_OBJECT_TYPE_HULL_SHADER; + else if (state->type == FX_DOMAINSHADER) + shader_type = FX_5_OBJECT_TYPE_DOMAIN_SHADER; + else if (state->type == FX_COMPUTESHADER) + shader_type = FX_5_OBJECT_TYPE_COMPUTE_SHADER; + + vkd3d_string_buffer_printf(&parser->buffer, "\n"); + parse_fx_start_indent(parser); + fx_4_parse_shader_blob(parser, shader_type, &shader); + parse_fx_end_indent(parser); + break; + } default: fx_parser_error(parser, VKD3D_SHADER_ERROR_FX_NOT_IMPLEMENTED, - "Unsupported assignment type %u.\n", entry.type); + "Unsupported assignment type %u.", entry.type); } vkd3d_string_buffer_printf(&parser->buffer, ";\n"); } diff --git a/libs/vkd3d/libs/vkd3d-shader/glsl.c b/libs/vkd3d/libs/vkd3d-shader/glsl.c index 0df0e30f399..ab6604bd703 100644 --- a/libs/vkd3d/libs/vkd3d-shader/glsl.c +++ b/libs/vkd3d/libs/vkd3d-shader/glsl.c @@ -1507,13 +1507,6 @@ static void vkd3d_glsl_handle_instruction(struct vkd3d_glsl_generator *gen, case VKD3DSIH_DCL_INDEXABLE_TEMP: shader_glsl_dcl_indexable_temp(gen, ins); break; - case VKD3DSIH_DCL_INPUT: - case VKD3DSIH_DCL_INPUT_PS: - case VKD3DSIH_DCL_INPUT_PS_SGV: - case VKD3DSIH_DCL_INPUT_PS_SIV: - case VKD3DSIH_DCL_INPUT_SGV: - case VKD3DSIH_DCL_OUTPUT: - case VKD3DSIH_DCL_OUTPUT_SIV: case VKD3DSIH_NOP: break; case VKD3DSIH_DEFAULT: @@ -2476,7 +2469,7 @@ int glsl_compile(struct vsir_program *program, uint64_t config_flags, if ((ret = vsir_program_transform(program, config_flags, compile_info, message_context)) < 0) return ret; - VKD3D_ASSERT(program->normalisation_level == VSIR_FULLY_NORMALISED_IO); + VKD3D_ASSERT(program->normalisation_level == VSIR_NORMALISED_SM6); vkd3d_glsl_generator_init(&generator, program, compile_info, descriptor_info, combined_sampler_info, message_context); diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.c b/libs/vkd3d/libs/vkd3d-shader/hlsl.c index 96de18dc886..84da2fcbc9f 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.c +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.c @@ -287,6 +287,7 @@ bool hlsl_type_is_shader(const struct hlsl_type *type) case HLSL_CLASS_UAV: case HLSL_CLASS_CONSTANT_BUFFER: case HLSL_CLASS_BLEND_STATE: + case HLSL_CLASS_STREAM_OUTPUT: case HLSL_CLASS_VOID: case HLSL_CLASS_NULL: return false; @@ -434,6 +435,7 @@ static void hlsl_type_calculate_reg_size(struct hlsl_ctx *ctx, struct hlsl_type case HLSL_CLASS_HULL_SHADER: case HLSL_CLASS_GEOMETRY_SHADER: case HLSL_CLASS_BLEND_STATE: + case HLSL_CLASS_STREAM_OUTPUT: case HLSL_CLASS_NULL: break; } @@ -525,6 +527,7 @@ static bool type_is_single_component(const struct hlsl_type *type) case HLSL_CLASS_PASS: case HLSL_CLASS_TECHNIQUE: case HLSL_CLASS_VOID: + case HLSL_CLASS_STREAM_OUTPUT: break; } vkd3d_unreachable(); @@ -680,6 +683,7 @@ unsigned int hlsl_type_get_component_offset(struct hlsl_ctx *ctx, struct hlsl_ty case HLSL_CLASS_SCALAR: case HLSL_CLASS_CONSTANT_BUFFER: case HLSL_CLASS_NULL: + case HLSL_CLASS_STREAM_OUTPUT: vkd3d_unreachable(); } type = next_type; @@ -898,6 +902,22 @@ struct hlsl_type *hlsl_new_array_type(struct hlsl_ctx *ctx, struct hlsl_type *ba return type; } +struct hlsl_type *hlsl_new_stream_output_type(struct hlsl_ctx *ctx, + enum hlsl_so_object_type so_type, struct hlsl_type *data_type) +{ + struct hlsl_type *type; + + if (!(type = hlsl_alloc(ctx, sizeof(*type)))) + return NULL; + type->class = HLSL_CLASS_STREAM_OUTPUT; + type->e.so.so_type = so_type; + type->e.so.type = data_type; + + list_add_tail(&ctx->types, &type->entry); + + return type; +} + struct hlsl_type *hlsl_new_struct_type(struct hlsl_ctx *ctx, const char *name, struct hlsl_struct_field *fields, size_t field_count) { @@ -1086,6 +1106,7 @@ unsigned int hlsl_type_component_count(const struct hlsl_type *type) case HLSL_CLASS_PASS: case HLSL_CLASS_TECHNIQUE: case HLSL_CLASS_VOID: + case HLSL_CLASS_STREAM_OUTPUT: break; } @@ -1157,6 +1178,11 @@ bool hlsl_types_are_equal(const struct hlsl_type *t1, const struct hlsl_type *t2 case HLSL_CLASS_CONSTANT_BUFFER: return hlsl_types_are_equal(t1->e.resource.format, t2->e.resource.format); + case HLSL_CLASS_STREAM_OUTPUT: + if (t1->e.so.so_type != t2->e.so.so_type) + return false; + return hlsl_types_are_equal(t1->e.so.type, t2->e.so.type); + case HLSL_CLASS_DEPTH_STENCIL_STATE: case HLSL_CLASS_DEPTH_STENCIL_VIEW: case HLSL_CLASS_EFFECT_GROUP: @@ -1695,22 +1721,6 @@ struct hlsl_ir_node *hlsl_new_switch(struct hlsl_ctx *ctx, struct hlsl_ir_node * return &s->node; } -struct hlsl_ir_node *hlsl_new_vsir_instruction_ref(struct hlsl_ctx *ctx, unsigned int vsir_instr_idx, - struct hlsl_type *type, const struct hlsl_reg *reg, const struct vkd3d_shader_location *loc) -{ - struct hlsl_ir_vsir_instruction_ref *vsir_instr; - - if (!(vsir_instr = hlsl_alloc(ctx, sizeof(*vsir_instr)))) - return NULL; - init_node(&vsir_instr->node, HLSL_IR_VSIR_INSTRUCTION_REF, type, loc); - vsir_instr->vsir_instr_idx = vsir_instr_idx; - - if (reg) - vsir_instr->node.reg = *reg; - - return &vsir_instr->node; -} - struct hlsl_ir_load *hlsl_new_load_index(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, struct hlsl_ir_node *idx, const struct vkd3d_shader_location *loc) { @@ -1844,22 +1854,45 @@ struct hlsl_ir_node *hlsl_new_resource_store(struct hlsl_ctx *ctx, const struct return &store->node; } -struct hlsl_ir_node *hlsl_new_swizzle(struct hlsl_ctx *ctx, uint32_t s, unsigned int components, +struct hlsl_ir_node *hlsl_new_swizzle(struct hlsl_ctx *ctx, uint32_t s, unsigned int component_count, struct hlsl_ir_node *val, const struct vkd3d_shader_location *loc) { struct hlsl_ir_swizzle *swizzle; struct hlsl_type *type; + VKD3D_ASSERT(val->data_type->class <= HLSL_CLASS_VECTOR); + if (!(swizzle = hlsl_alloc(ctx, sizeof(*swizzle)))) return NULL; - VKD3D_ASSERT(hlsl_is_numeric_type(val->data_type)); - if (components == 1) + if (component_count > 1) + type = hlsl_get_vector_type(ctx, val->data_type->e.numeric.type, component_count); + else type = hlsl_get_scalar_type(ctx, val->data_type->e.numeric.type); + init_node(&swizzle->node, HLSL_IR_SWIZZLE, type, loc); + hlsl_src_from_node(&swizzle->val, val); + swizzle->u.vector = s; + + return &swizzle->node; +} + +struct hlsl_ir_node *hlsl_new_matrix_swizzle(struct hlsl_ctx *ctx, struct hlsl_matrix_swizzle s, + unsigned int component_count, struct hlsl_ir_node *val, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_swizzle *swizzle; + struct hlsl_type *type; + + VKD3D_ASSERT(val->data_type->class == HLSL_CLASS_MATRIX); + + if (!(swizzle = hlsl_alloc(ctx, sizeof(*swizzle)))) + return NULL; + if (component_count > 1) + type = hlsl_get_vector_type(ctx, val->data_type->e.numeric.type, component_count); else - type = hlsl_get_vector_type(ctx, val->data_type->e.numeric.type, components); + type = hlsl_get_scalar_type(ctx, val->data_type->e.numeric.type); init_node(&swizzle->node, HLSL_IR_SWIZZLE, type, loc); hlsl_src_from_node(&swizzle->val, val); - swizzle->swizzle = s; + swizzle->u.matrix = s; + return &swizzle->node; } @@ -2054,8 +2087,8 @@ struct hlsl_ir_node *hlsl_new_jump(struct hlsl_ctx *ctx, enum hlsl_ir_jump_type return &jump->node; } -struct hlsl_ir_node *hlsl_new_loop(struct hlsl_ctx *ctx, - struct hlsl_block *block, enum hlsl_ir_loop_unroll_type unroll_type, +struct hlsl_ir_node *hlsl_new_loop(struct hlsl_ctx *ctx, struct hlsl_block *iter, + struct hlsl_block *block, enum hlsl_loop_unroll_type unroll_type, unsigned int unroll_limit, const struct vkd3d_shader_location *loc) { struct hlsl_ir_loop *loop; @@ -2066,6 +2099,10 @@ struct hlsl_ir_node *hlsl_new_loop(struct hlsl_ctx *ctx, hlsl_block_init(&loop->body); hlsl_block_add_block(&loop->body, block); + hlsl_block_init(&loop->iter); + if (iter) + hlsl_block_add_block(&loop->iter, iter); + loop->unroll_type = unroll_type; loop->unroll_limit = unroll_limit; return &loop->node; @@ -2221,14 +2258,21 @@ static struct hlsl_ir_node *clone_load(struct hlsl_ctx *ctx, struct clone_instr_ static struct hlsl_ir_node *clone_loop(struct hlsl_ctx *ctx, struct clone_instr_map *map, struct hlsl_ir_loop *src) { + struct hlsl_block iter, body; struct hlsl_ir_node *dst; - struct hlsl_block body; + + if (!clone_block(ctx, &iter, &src->iter, map)) + return NULL; if (!clone_block(ctx, &body, &src->body, map)) + { + hlsl_block_cleanup(&iter); return NULL; + } - if (!(dst = hlsl_new_loop(ctx, &body, src->unroll_type, src->unroll_limit, &src->node.loc))) + if (!(dst = hlsl_new_loop(ctx, &iter, &body, src->unroll_type, src->unroll_limit, &src->node.loc))) { + hlsl_block_cleanup(&iter); hlsl_block_cleanup(&body); return NULL; } @@ -2310,8 +2354,12 @@ static struct hlsl_ir_node *clone_store(struct hlsl_ctx *ctx, struct clone_instr static struct hlsl_ir_node *clone_swizzle(struct hlsl_ctx *ctx, struct clone_instr_map *map, struct hlsl_ir_swizzle *src) { - return hlsl_new_swizzle(ctx, src->swizzle, src->node.data_type->dimx, - map_instr(map, src->val.node), &src->node.loc); + if (src->val.node->data_type->class == HLSL_CLASS_MATRIX) + return hlsl_new_matrix_swizzle(ctx, src->u.matrix, src->node.data_type->dimx, + map_instr(map, src->val.node), &src->node.loc); + else + return hlsl_new_swizzle(ctx, src->u.vector, src->node.data_type->dimx, + map_instr(map, src->val.node), &src->node.loc); } static struct hlsl_ir_node *clone_index(struct hlsl_ctx *ctx, struct clone_instr_map *map, @@ -2533,9 +2581,6 @@ static struct hlsl_ir_node *clone_instr(struct hlsl_ctx *ctx, case HLSL_IR_STATEBLOCK_CONSTANT: return clone_stateblock_constant(ctx, map, hlsl_ir_stateblock_constant(instr)); - - case HLSL_IR_VSIR_INSTRUCTION_REF: - vkd3d_unreachable(); } vkd3d_unreachable(); @@ -2693,10 +2738,8 @@ struct hlsl_ir_function_decl *hlsl_get_func_decl(struct hlsl_ctx *ctx, const cha return NULL; } -struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const struct hlsl_type *type) +static void hlsl_dump_type(struct vkd3d_string_buffer *buffer, const struct hlsl_type *type) { - struct vkd3d_string_buffer *string, *inner_string; - static const char *const base_types[] = { [HLSL_TYPE_FLOAT] = "float", @@ -2720,31 +2763,28 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru [HLSL_SAMPLER_DIM_CUBEARRAY] = "CubeArray", }; - if (!(string = hlsl_get_string_buffer(ctx))) - return NULL; - if (type->name) { - vkd3d_string_buffer_printf(string, "%s", type->name); - return string; + vkd3d_string_buffer_printf(buffer, "%s", type->name); + return; } switch (type->class) { case HLSL_CLASS_SCALAR: VKD3D_ASSERT(type->e.numeric.type < ARRAY_SIZE(base_types)); - vkd3d_string_buffer_printf(string, "%s", base_types[type->e.numeric.type]); - return string; + vkd3d_string_buffer_printf(buffer, "%s", base_types[type->e.numeric.type]); + return; case HLSL_CLASS_VECTOR: VKD3D_ASSERT(type->e.numeric.type < ARRAY_SIZE(base_types)); - vkd3d_string_buffer_printf(string, "%s%u", base_types[type->e.numeric.type], type->dimx); - return string; + vkd3d_string_buffer_printf(buffer, "%s%u", base_types[type->e.numeric.type], type->dimx); + return; case HLSL_CLASS_MATRIX: VKD3D_ASSERT(type->e.numeric.type < ARRAY_SIZE(base_types)); - vkd3d_string_buffer_printf(string, "%s%ux%u", base_types[type->e.numeric.type], type->dimy, type->dimx); - return string; + vkd3d_string_buffer_printf(buffer, "%s%ux%u", base_types[type->e.numeric.type], type->dimy, type->dimx); + return; case HLSL_CLASS_ARRAY: { @@ -2753,88 +2793,85 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru for (t = type; t->class == HLSL_CLASS_ARRAY; t = t->e.array.type) ; - if ((inner_string = hlsl_type_to_string(ctx, t))) - { - vkd3d_string_buffer_printf(string, "%s", inner_string->buffer); - hlsl_release_string_buffer(ctx, inner_string); - } - + hlsl_dump_type(buffer, t); for (t = type; t->class == HLSL_CLASS_ARRAY; t = t->e.array.type) { if (t->e.array.elements_count == HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT) - vkd3d_string_buffer_printf(string, "[]"); + vkd3d_string_buffer_printf(buffer, "[]"); else - vkd3d_string_buffer_printf(string, "[%u]", t->e.array.elements_count); + vkd3d_string_buffer_printf(buffer, "[%u]", t->e.array.elements_count); } - return string; + return; } case HLSL_CLASS_STRUCT: - vkd3d_string_buffer_printf(string, ""); - return string; + vkd3d_string_buffer_printf(buffer, ""); + return; case HLSL_CLASS_TEXTURE: if (type->sampler_dim == HLSL_SAMPLER_DIM_RAW_BUFFER) { - vkd3d_string_buffer_printf(string, "ByteAddressBuffer"); - return string; + vkd3d_string_buffer_printf(buffer, "ByteAddressBuffer"); + return; } if (type->sampler_dim == HLSL_SAMPLER_DIM_GENERIC) { - vkd3d_string_buffer_printf(string, "Texture"); - return string; + vkd3d_string_buffer_printf(buffer, "Texture"); + return; } VKD3D_ASSERT(hlsl_is_numeric_type(type->e.resource.format)); VKD3D_ASSERT(type->e.resource.format->e.numeric.type < ARRAY_SIZE(base_types)); if (type->sampler_dim == HLSL_SAMPLER_DIM_BUFFER) { - vkd3d_string_buffer_printf(string, "Buffer"); + vkd3d_string_buffer_printf(buffer, "Buffer<"); } else { VKD3D_ASSERT(type->sampler_dim < ARRAY_SIZE(dimensions)); - vkd3d_string_buffer_printf(string, "Texture%s", dimensions[type->sampler_dim]); + vkd3d_string_buffer_printf(buffer, "Texture%s<", dimensions[type->sampler_dim]); } - if ((inner_string = hlsl_type_to_string(ctx, type->e.resource.format))) - { - vkd3d_string_buffer_printf(string, "<%s>", inner_string->buffer); - hlsl_release_string_buffer(ctx, inner_string); - } - return string; + hlsl_dump_type(buffer, type->e.resource.format); + vkd3d_string_buffer_printf(buffer, ">"); + return; case HLSL_CLASS_UAV: if (type->sampler_dim == HLSL_SAMPLER_DIM_RAW_BUFFER) { - vkd3d_string_buffer_printf(string, "RWByteAddressBuffer"); - return string; + vkd3d_string_buffer_printf(buffer, "RWByteAddressBuffer"); + return; } if (type->sampler_dim == HLSL_SAMPLER_DIM_BUFFER) - vkd3d_string_buffer_printf(string, "RWBuffer"); + vkd3d_string_buffer_printf(buffer, "RWBuffer<"); else if (type->sampler_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER) - vkd3d_string_buffer_printf(string, "RWStructuredBuffer"); + vkd3d_string_buffer_printf(buffer, "RWStructuredBuffer<"); else - vkd3d_string_buffer_printf(string, "RWTexture%s", dimensions[type->sampler_dim]); - if ((inner_string = hlsl_type_to_string(ctx, type->e.resource.format))) - { - vkd3d_string_buffer_printf(string, "<%s>", inner_string->buffer); - hlsl_release_string_buffer(ctx, inner_string); - } - return string; + vkd3d_string_buffer_printf(buffer, "RWTexture%s<", dimensions[type->sampler_dim]); + hlsl_dump_type(buffer, type->e.resource.format); + vkd3d_string_buffer_printf(buffer, ">"); + return; case HLSL_CLASS_CONSTANT_BUFFER: - vkd3d_string_buffer_printf(string, "ConstantBuffer"); - if ((inner_string = hlsl_type_to_string(ctx, type->e.resource.format))) - { - vkd3d_string_buffer_printf(string, "<%s>", inner_string->buffer); - hlsl_release_string_buffer(ctx, inner_string); - } - return string; + vkd3d_string_buffer_printf(buffer, "ConstantBuffer<"); + hlsl_dump_type(buffer, type->e.resource.format); + vkd3d_string_buffer_printf(buffer, ">"); + return; case HLSL_CLASS_ERROR: - vkd3d_string_buffer_printf(string, ""); - return string; + vkd3d_string_buffer_printf(buffer, ""); + return; + + case HLSL_CLASS_STREAM_OUTPUT: + if (type->e.so.so_type == HLSL_STREAM_OUTPUT_POINT_STREAM) + vkd3d_string_buffer_printf(buffer, "PointStream<"); + else if (type->e.so.so_type == HLSL_STREAM_OUTPUT_LINE_STREAM) + vkd3d_string_buffer_printf(buffer, "LineStream<"); + else + vkd3d_string_buffer_printf(buffer, "TriangleStream<"); + hlsl_dump_type(buffer, type->e.so.type); + vkd3d_string_buffer_printf(buffer, ">"); + return; case HLSL_CLASS_DEPTH_STENCIL_STATE: case HLSL_CLASS_DEPTH_STENCIL_VIEW: @@ -2857,8 +2894,17 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru break; } - vkd3d_string_buffer_printf(string, ""); - return string; + vkd3d_string_buffer_printf(buffer, ""); +} + +struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const struct hlsl_type *type) +{ + struct vkd3d_string_buffer *buffer; + + if (!(buffer = hlsl_get_string_buffer(ctx))) + return NULL; + hlsl_dump_type(buffer, type); + return buffer; } struct vkd3d_string_buffer *hlsl_component_to_string(struct hlsl_ctx *ctx, const struct hlsl_ir_var *var, @@ -2968,7 +3014,6 @@ const char *hlsl_node_type_to_string(enum hlsl_ir_node_type type) [HLSL_IR_COMPILE] = "HLSL_IR_COMPILE", [HLSL_IR_SAMPLER_STATE] = "HLSL_IR_SAMPLER_STATE", [HLSL_IR_STATEBLOCK_CONSTANT] = "HLSL_IR_STATEBLOCK_CONSTANT", - [HLSL_IR_VSIR_INSTRUCTION_REF] = "HLSL_IR_VSIR_INSTRUCTION_REF", }; if (type >= ARRAY_SIZE(names)) @@ -3022,7 +3067,8 @@ static void dump_ir_var(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer vkd3d_string_buffer_printf(buffer, "%s ", string->buffer); hlsl_release_string_buffer(ctx, string); } - vkd3d_string_buffer_printf(buffer, "%s %s", debug_hlsl_type(ctx, var->data_type), var->name); + hlsl_dump_type(buffer, var->data_type); + vkd3d_string_buffer_printf(buffer, " %s", var->name); if (var->semantic.name) vkd3d_string_buffer_printf(buffer, " : %s%u", var->semantic.name, var->semantic.index); } @@ -3103,34 +3149,28 @@ const char *debug_hlsl_swizzle(uint32_t swizzle, unsigned int size) return vkd3d_dbg_sprintf(".%s", string); } -static void dump_ir_call(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, const struct hlsl_ir_call *call) +void hlsl_dump_ir_function_decl(struct hlsl_ctx *ctx, + struct vkd3d_string_buffer *buffer, const struct hlsl_ir_function_decl *f) { - const struct hlsl_ir_function_decl *decl = call->decl; - struct vkd3d_string_buffer *string; size_t i; - if (!(string = hlsl_type_to_string(ctx, decl->return_type))) - return; - - vkd3d_string_buffer_printf(buffer, "call %s %s(", string->buffer, decl->func->name); - hlsl_release_string_buffer(ctx, string); - - for (i = 0; i < decl->parameters.count; ++i) + hlsl_dump_type(buffer, f->return_type); + vkd3d_string_buffer_printf(buffer, " %s(", f->func->name); + for (i = 0; i < f->parameters.count; ++i) { - const struct hlsl_ir_var *param = decl->parameters.vars[i]; - - if (!(string = hlsl_type_to_string(ctx, param->data_type))) - return; - if (i) vkd3d_string_buffer_printf(buffer, ", "); - vkd3d_string_buffer_printf(buffer, "%s", string->buffer); - - hlsl_release_string_buffer(ctx, string); + dump_ir_var(ctx, buffer, f->parameters.vars[i]); } vkd3d_string_buffer_printf(buffer, ")"); } +static void dump_ir_call(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, const struct hlsl_ir_call *call) +{ + vkd3d_string_buffer_printf(buffer, "call "); + hlsl_dump_ir_function_decl(ctx, buffer, call->decl); +} + static void dump_ir_constant(struct vkd3d_string_buffer *buffer, const struct hlsl_ir_constant *constant) { struct hlsl_type *type = constant->node.data_type; @@ -3201,13 +3241,11 @@ const char *debug_hlsl_expr_op(enum hlsl_ir_expr_op op) [HLSL_OP1_LOG2] = "log2", [HLSL_OP1_LOGIC_NOT] = "!", [HLSL_OP1_NEG] = "-", - [HLSL_OP1_NRM] = "nrm", [HLSL_OP1_RCP] = "rcp", [HLSL_OP1_REINTERPRET] = "reinterpret", [HLSL_OP1_ROUND] = "round", [HLSL_OP1_RSQ] = "rsq", [HLSL_OP1_SAT] = "sat", - [HLSL_OP1_SIGN] = "sign", [HLSL_OP1_SIN] = "sin", [HLSL_OP1_SIN_REDUCED] = "sin_reduced", [HLSL_OP1_SQRT] = "sqrt", @@ -3217,7 +3255,6 @@ const char *debug_hlsl_expr_op(enum hlsl_ir_expr_op op) [HLSL_OP2_BIT_AND] = "&", [HLSL_OP2_BIT_OR] = "|", [HLSL_OP2_BIT_XOR] = "^", - [HLSL_OP2_CRS] = "crs", [HLSL_OP2_DIV] = "/", [HLSL_OP2_DOT] = "dot", [HLSL_OP2_EQUAL] = "==", @@ -3402,11 +3439,12 @@ static void dump_ir_swizzle(struct vkd3d_string_buffer *buffer, const struct hls { vkd3d_string_buffer_printf(buffer, "."); for (i = 0; i < swizzle->node.data_type->dimx; ++i) - vkd3d_string_buffer_printf(buffer, "_m%u%u", (swizzle->swizzle >> i * 8) & 0xf, (swizzle->swizzle >> (i * 8 + 4)) & 0xf); + vkd3d_string_buffer_printf(buffer, "_m%u%u", + swizzle->u.matrix.components[i].y, swizzle->u.matrix.components[i].x); } else { - vkd3d_string_buffer_printf(buffer, "%s", debug_hlsl_swizzle(swizzle->swizzle, swizzle->node.data_type->dimx)); + vkd3d_string_buffer_printf(buffer, "%s", debug_hlsl_swizzle(swizzle->u.vector, swizzle->node.data_type->dimx)); } } @@ -3562,11 +3600,6 @@ static void dump_instr(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, case HLSL_IR_STATEBLOCK_CONSTANT: dump_ir_stateblock_constant(buffer, hlsl_ir_stateblock_constant(instr)); break; - - case HLSL_IR_VSIR_INSTRUCTION_REF: - vkd3d_string_buffer_printf(buffer, "vsir_program instruction %u", - hlsl_ir_vsir_instruction_ref(instr)->vsir_instr_idx); - break; } } @@ -3719,6 +3752,7 @@ static void free_ir_load(struct hlsl_ir_load *load) static void free_ir_loop(struct hlsl_ir_loop *loop) { hlsl_block_cleanup(&loop->body); + hlsl_block_cleanup(&loop->iter); vkd3d_free(loop); } @@ -3875,10 +3909,6 @@ void hlsl_free_instr(struct hlsl_ir_node *node) case HLSL_IR_STATEBLOCK_CONSTANT: free_ir_stateblock_constant(hlsl_ir_stateblock_constant(node)); break; - - case HLSL_IR_VSIR_INSTRUCTION_REF: - vkd3d_free(hlsl_ir_vsir_instruction_ref(node)); - break; } } @@ -3977,8 +4007,8 @@ void hlsl_add_function(struct hlsl_ctx *ctx, char *name, struct hlsl_ir_function uint32_t hlsl_map_swizzle(uint32_t swizzle, unsigned int writemask) { + unsigned int src_component = 0; uint32_t ret = 0; - unsigned int i; /* Leave replicate swizzles alone; some instructions need them. */ if (swizzle == HLSL_SWIZZLE(X, X, X, X) @@ -3987,13 +4017,10 @@ uint32_t hlsl_map_swizzle(uint32_t swizzle, unsigned int writemask) || swizzle == HLSL_SWIZZLE(W, W, W, W)) return swizzle; - for (i = 0; i < 4; ++i) + for (unsigned int dst_component = 0; dst_component < 4; ++dst_component) { - if (writemask & (1 << i)) - { - ret |= (swizzle & 3) << (i * 2); - swizzle >>= 2; - } + if (writemask & (1 << dst_component)) + hlsl_swizzle_set_component(&ret, dst_component, hlsl_swizzle_get_component(swizzle, src_component++)); } return ret; } @@ -4046,7 +4073,7 @@ uint32_t hlsl_combine_swizzles(uint32_t first, uint32_t second, unsigned int dim for (i = 0; i < dim; ++i) { unsigned int s = hlsl_swizzle_get_component(second, i); - ret |= hlsl_swizzle_get_component(first, s) << HLSL_SWIZZLE_SHIFT(i); + hlsl_swizzle_set_component(&ret, i, hlsl_swizzle_get_component(first, s)); } return ret; } diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h index 075c76cb0e2..7c9547a1c01 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.h +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h @@ -22,7 +22,6 @@ #include "vkd3d_shader_private.h" #include "wine/rbtree.h" -#include "d3dcommon.h" #include "d3dx9shader.h" /* The general IR structure is inspired by Mesa GLSL hir, even though the code @@ -51,31 +50,17 @@ * DEALINGS IN THE SOFTWARE. */ -#define HLSL_SWIZZLE_X (0u) -#define HLSL_SWIZZLE_Y (1u) -#define HLSL_SWIZZLE_Z (2u) -#define HLSL_SWIZZLE_W (3u) - -#define HLSL_SWIZZLE(x, y, z, w) \ - (((HLSL_SWIZZLE_ ## x) << 0) \ - | ((HLSL_SWIZZLE_ ## y) << 2) \ - | ((HLSL_SWIZZLE_ ## z) << 4) \ - | ((HLSL_SWIZZLE_ ## w) << 6)) - -#define HLSL_SWIZZLE_MASK (0x3u) -#define HLSL_SWIZZLE_SHIFT(idx) (2u * (idx)) +#define HLSL_SWIZZLE VKD3D_SHADER_SWIZZLE static inline unsigned int hlsl_swizzle_get_component(uint32_t swizzle, unsigned int idx) { - return (swizzle >> HLSL_SWIZZLE_SHIFT(idx)) & HLSL_SWIZZLE_MASK; + return vsir_swizzle_get_component(swizzle, idx); } -static inline uint32_t vsir_swizzle_from_hlsl(uint32_t swizzle) +static inline void hlsl_swizzle_set_component(uint32_t *swizzle, unsigned int idx, unsigned int component) { - return vkd3d_shader_create_swizzle(hlsl_swizzle_get_component(swizzle, 0), - hlsl_swizzle_get_component(swizzle, 1), - hlsl_swizzle_get_component(swizzle, 2), - hlsl_swizzle_get_component(swizzle, 3)); + *swizzle &= ~(VKD3D_SHADER_SWIZZLE_MASK << VKD3D_SHADER_SWIZZLE_SHIFT(idx)); + *swizzle |= component << VKD3D_SHADER_SWIZZLE_SHIFT(idx); } enum hlsl_type_class @@ -105,6 +90,7 @@ enum hlsl_type_class HLSL_CLASS_GEOMETRY_SHADER, HLSL_CLASS_CONSTANT_BUFFER, HLSL_CLASS_BLEND_STATE, + HLSL_CLASS_STREAM_OUTPUT, HLSL_CLASS_VOID, HLSL_CLASS_NULL, HLSL_CLASS_ERROR, @@ -142,6 +128,13 @@ enum hlsl_sampler_dim /* NOTE: Remember to update object_methods[] in hlsl.y if this enum is modified. */ }; +enum hlsl_so_object_type +{ + HLSL_STREAM_OUTPUT_POINT_STREAM, + HLSL_STREAM_OUTPUT_LINE_STREAM, + HLSL_STREAM_OUTPUT_TRIANGLE_STREAM, +}; + enum hlsl_regset { HLSL_REGSET_SAMPLERS, @@ -220,6 +213,12 @@ struct hlsl_type } resource; /* Additional field to distinguish object types. Currently used only for technique types. */ unsigned int version; + /* Additional information if type is HLSL_CLASS_STREAM_OUTPUT. */ + struct + { + struct hlsl_type *type; + enum hlsl_so_object_type so_type; + } so; } e; /* Number of numeric register components used by one value of this type, for each regset. @@ -330,8 +329,6 @@ enum hlsl_ir_node_type HLSL_IR_COMPILE, HLSL_IR_SAMPLER_STATE, HLSL_IR_STATEBLOCK_CONSTANT, - - HLSL_IR_VSIR_INSTRUCTION_REF, }; /* Common data for every type of IR instruction node. */ @@ -524,6 +521,10 @@ struct hlsl_ir_var * element of a struct, and thus needs to be aligned when packed in the signature. */ bool force_align; + /* Whether this is a sampler that was created from the combination of a + * sampler and a texture for SM<4 backwards compatibility. */ + bool is_combined_sampler; + uint32_t is_input_semantic : 1; uint32_t is_output_semantic : 1; uint32_t is_uniform : 1; @@ -644,21 +645,30 @@ struct hlsl_ir_if struct hlsl_block else_block; }; -enum hlsl_ir_loop_unroll_type +enum hlsl_loop_unroll_type +{ + HLSL_LOOP_UNROLL, + HLSL_LOOP_FORCE_UNROLL, + HLSL_LOOP_FORCE_LOOP +}; + +enum hlsl_loop_type { - HLSL_IR_LOOP_UNROLL, - HLSL_IR_LOOP_FORCE_UNROLL, - HLSL_IR_LOOP_FORCE_LOOP + HLSL_LOOP_FOR, + HLSL_LOOP_WHILE, + HLSL_LOOP_DO_WHILE }; struct hlsl_ir_loop { struct hlsl_ir_node node; + struct hlsl_block iter; /* loop condition is stored in the body (as "if (!condition) break;") */ struct hlsl_block body; + enum hlsl_loop_type type; unsigned int next_index; /* liveness index of the end of the loop */ unsigned int unroll_limit; - enum hlsl_ir_loop_unroll_type unroll_type; + enum hlsl_loop_unroll_type unroll_type; }; struct hlsl_ir_switch_case @@ -703,13 +713,11 @@ enum hlsl_ir_expr_op HLSL_OP1_LOG2, HLSL_OP1_LOGIC_NOT, HLSL_OP1_NEG, - HLSL_OP1_NRM, HLSL_OP1_RCP, HLSL_OP1_REINTERPRET, HLSL_OP1_ROUND, HLSL_OP1_RSQ, HLSL_OP1_SAT, - HLSL_OP1_SIGN, HLSL_OP1_SIN, HLSL_OP1_SIN_REDUCED, /* Reduced range [-pi, pi], writes to .y */ HLSL_OP1_SQRT, @@ -719,7 +727,6 @@ enum hlsl_ir_expr_op HLSL_OP2_BIT_AND, HLSL_OP2_BIT_OR, HLSL_OP2_BIT_XOR, - HLSL_OP2_CRS, HLSL_OP2_DIV, HLSL_OP2_DOT, HLSL_OP2_EQUAL, @@ -781,7 +788,17 @@ struct hlsl_ir_swizzle { struct hlsl_ir_node node; struct hlsl_src val; - uint32_t swizzle; + union + { + uint32_t vector; + struct hlsl_matrix_swizzle + { + struct + { + uint8_t x, y; + } components[4]; + } matrix; + } u; }; struct hlsl_ir_index @@ -934,16 +951,6 @@ struct hlsl_ir_stateblock_constant char *name; }; -/* A vkd3d_shader_instruction that can be inserted in a hlsl_block. - * Only used for the HLSL IR to vsir translation, might be removed once this translation is complete. */ -struct hlsl_ir_vsir_instruction_ref -{ - struct hlsl_ir_node node; - - /* Index to a vkd3d_shader_instruction within a vkd3d_shader_instruction_array in a vsir_program. */ - unsigned int vsir_instr_idx; -}; - struct hlsl_scope { /* Item entry for hlsl_ctx.scopes. */ @@ -1259,12 +1266,6 @@ static inline struct hlsl_ir_stateblock_constant *hlsl_ir_stateblock_constant(co return CONTAINING_RECORD(node, struct hlsl_ir_stateblock_constant, node); } -static inline struct hlsl_ir_vsir_instruction_ref *hlsl_ir_vsir_instruction_ref(const struct hlsl_ir_node *node) -{ - VKD3D_ASSERT(node->type == HLSL_IR_VSIR_INSTRUCTION_REF); - return CONTAINING_RECORD(node, struct hlsl_ir_vsir_instruction_ref, node); -} - static inline void hlsl_block_init(struct hlsl_block *block) { list_init(&block->instrs); @@ -1442,6 +1443,8 @@ void hlsl_block_cleanup(struct hlsl_block *block); bool hlsl_clone_block(struct hlsl_ctx *ctx, struct hlsl_block *dst_block, const struct hlsl_block *src_block); void hlsl_dump_function(struct hlsl_ctx *ctx, const struct hlsl_ir_function_decl *func); +void hlsl_dump_ir_function_decl(struct hlsl_ctx *ctx, + struct vkd3d_string_buffer *buffer, const struct hlsl_ir_function_decl *f); void hlsl_dump_var_default_values(const struct hlsl_ir_var *var); bool hlsl_state_block_add_entry(struct hlsl_state_block *state_block, @@ -1519,6 +1522,8 @@ struct hlsl_ir_node *hlsl_new_if(struct hlsl_ctx *ctx, struct hlsl_ir_node *cond struct hlsl_ir_node *hlsl_new_int_constant(struct hlsl_ctx *ctx, int32_t n, const struct vkd3d_shader_location *loc); struct hlsl_ir_node *hlsl_new_jump(struct hlsl_ctx *ctx, enum hlsl_ir_jump_type type, struct hlsl_ir_node *condition, const struct vkd3d_shader_location *loc); +struct hlsl_type *hlsl_new_stream_output_type(struct hlsl_ctx *ctx, + enum hlsl_so_object_type so_type, struct hlsl_type *type); struct hlsl_ir_node *hlsl_new_ternary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, struct hlsl_ir_node *arg3); @@ -1550,8 +1555,11 @@ struct hlsl_ir_node *hlsl_new_compile(struct hlsl_ctx *ctx, enum hlsl_compile_ty struct hlsl_block *args_instrs, const struct vkd3d_shader_location *loc); struct hlsl_ir_node *hlsl_new_index(struct hlsl_ctx *ctx, struct hlsl_ir_node *val, struct hlsl_ir_node *idx, const struct vkd3d_shader_location *loc); -struct hlsl_ir_node *hlsl_new_loop(struct hlsl_ctx *ctx, - struct hlsl_block *block, enum hlsl_ir_loop_unroll_type unroll_type, unsigned int unroll_limit, const struct vkd3d_shader_location *loc); +struct hlsl_ir_node *hlsl_new_loop(struct hlsl_ctx *ctx, struct hlsl_block *iter, + struct hlsl_block *block, enum hlsl_loop_unroll_type unroll_type, + unsigned int unroll_limit, const struct vkd3d_shader_location *loc); +struct hlsl_ir_node *hlsl_new_matrix_swizzle(struct hlsl_ctx *ctx, struct hlsl_matrix_swizzle s, + unsigned int width, struct hlsl_ir_node *val, const struct vkd3d_shader_location *loc); struct hlsl_ir_node *hlsl_new_resource_load(struct hlsl_ctx *ctx, const struct hlsl_resource_load_params *params, const struct vkd3d_shader_location *loc); struct hlsl_ir_node *hlsl_new_resource_store(struct hlsl_ctx *ctx, const struct hlsl_deref *resource, @@ -1588,9 +1596,6 @@ struct hlsl_ir_switch_case *hlsl_new_switch_case(struct hlsl_ctx *ctx, unsigned struct hlsl_ir_node *hlsl_new_switch(struct hlsl_ctx *ctx, struct hlsl_ir_node *selector, struct list *cases, const struct vkd3d_shader_location *loc); -struct hlsl_ir_node *hlsl_new_vsir_instruction_ref(struct hlsl_ctx *ctx, unsigned int vsir_instr_idx, - struct hlsl_type *type, const struct hlsl_reg *reg, const struct vkd3d_shader_location *loc); - void hlsl_error(struct hlsl_ctx *ctx, const struct vkd3d_shader_location *loc, enum vkd3d_shader_error error, const char *fmt, ...) VKD3D_PRINTF_FUNC(4, 5); void hlsl_fixme(struct hlsl_ctx *ctx, const struct vkd3d_shader_location *loc, @@ -1645,21 +1650,39 @@ struct hlsl_reg hlsl_reg_from_deref(struct hlsl_ctx *ctx, const struct hlsl_dere bool hlsl_copy_propagation_execute(struct hlsl_ctx *ctx, struct hlsl_block *block); bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context); bool hlsl_fold_constant_identities(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context); +bool hlsl_normalize_binary_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context); bool hlsl_fold_constant_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context); bool hlsl_transform_ir(struct hlsl_ctx *ctx, bool (*func)(struct hlsl_ctx *ctx, struct hlsl_ir_node *, void *), struct hlsl_block *block, void *context); D3DXPARAMETER_CLASS hlsl_sm1_class(const struct hlsl_type *type); -D3DXPARAMETER_TYPE hlsl_sm1_base_type(const struct hlsl_type *type); +D3DXPARAMETER_TYPE hlsl_sm1_base_type(const struct hlsl_type *type, bool is_combined_sampler); -void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer); -int d3dbc_compile(struct vsir_program *program, uint64_t config_flags, - const struct vkd3d_shader_compile_info *compile_info, const struct vkd3d_shader_code *ctab, - struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context); +struct extern_resource +{ + /* "var" is only not NULL if this resource is a whole variable, so it may + * be responsible for more than one component. */ + const struct hlsl_ir_var *var; + const struct hlsl_buffer *buffer; + + char *name; + bool is_user_packed; + + /* The data type of a single component of the resource. This might be + * different from the data type of the resource itself in 4.0 profiles, + * where an array (or multi-dimensional array) is handled as a single + * resource, unlike in 5.0. */ + struct hlsl_type *component_type; + + enum hlsl_regset regset; + unsigned int id, space, index, bind_count; + + struct vkd3d_shader_location loc; +}; -int tpf_compile(struct vsir_program *program, uint64_t config_flags, - struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context, - struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func); +struct extern_resource *sm4_get_extern_resources(struct hlsl_ctx *ctx, unsigned int *count); +void sm4_free_extern_resources(struct extern_resource *extern_resources, unsigned int count); +void sm4_generate_rdef(struct hlsl_ctx *ctx, struct vkd3d_shader_code *rdef); enum vkd3d_shader_interpolation_mode sm4_get_interpolation_mode(struct hlsl_type *type, unsigned int storage_modifiers); diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.l b/libs/vkd3d/libs/vkd3d-shader/hlsl.l index 8dace11916a..31fb30521e9 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.l +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.l @@ -104,6 +104,7 @@ if {return KW_IF; } in {return KW_IN; } inline {return KW_INLINE; } inout {return KW_INOUT; } +LineStream {return KW_LINESTREAM; } linear {return KW_LINEAR; } matrix {return KW_MATRIX; } namespace {return KW_NAMESPACE; } @@ -114,6 +115,7 @@ out {return KW_OUT; } packoffset {return KW_PACKOFFSET; } pass {return KW_PASS; } PixelShader {return KW_PIXELSHADER; } +PointStream {return KW_POINTSTREAM; } pixelshader {return KW_PIXELSHADER; } RasterizerOrderedBuffer {return KW_RASTERIZERORDEREDBUFFER; } RasterizerOrderedStructuredBuffer {return KW_RASTERIZERORDEREDSTRUCTUREDBUFFER; } @@ -170,6 +172,7 @@ texture3D {return KW_TEXTURE3D; } TextureCube {return KW_TEXTURECUBE; } textureCUBE {return KW_TEXTURECUBE; } TextureCubeArray {return KW_TEXTURECUBEARRAY; } +TriangleStream {return KW_TRIANGLESTREAM; } true {return KW_TRUE; } typedef {return KW_TYPEDEF; } unsigned {return KW_UNSIGNED; } diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.y b/libs/vkd3d/libs/vkd3d-shader/hlsl.y index 60aade732db..a3814a810b5 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.y +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.y @@ -475,7 +475,11 @@ static bool add_explicit_conversion(struct hlsl_ctx *ctx, struct hlsl_block *blo for (i = 0; i < arrays->count; ++i) { if (arrays->sizes[i] == HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT) + { hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Implicit size arrays not allowed in casts."); + dst_type = ctx->builtin_types.error; + break; + } dst_type = hlsl_new_array_type(ctx, dst_type, arrays->sizes[i]); } @@ -551,13 +555,6 @@ static bool append_conditional_break(struct hlsl_ctx *ctx, struct hlsl_block *co return true; } -enum loop_type -{ - LOOP_FOR, - LOOP_WHILE, - LOOP_DO_WHILE -}; - static void check_attribute_list_for_duplicates(struct hlsl_ctx *ctx, const struct parse_attribute_list *attrs) { unsigned int i, j; @@ -573,8 +570,8 @@ static void check_attribute_list_for_duplicates(struct hlsl_ctx *ctx, const stru } } -static void resolve_loop_continue(struct hlsl_ctx *ctx, struct hlsl_block *block, enum loop_type type, - struct hlsl_block *cond, struct hlsl_block *iter) +static void resolve_loop_continue(struct hlsl_ctx *ctx, struct hlsl_block *block, + enum hlsl_loop_type type, struct hlsl_block *cond) { struct hlsl_ir_node *instr, *next; @@ -584,8 +581,8 @@ static void resolve_loop_continue(struct hlsl_ctx *ctx, struct hlsl_block *block { struct hlsl_ir_if *iff = hlsl_ir_if(instr); - resolve_loop_continue(ctx, &iff->then_block, type, cond, iter); - resolve_loop_continue(ctx, &iff->else_block, type, cond, iter); + resolve_loop_continue(ctx, &iff->then_block, type, cond); + resolve_loop_continue(ctx, &iff->else_block, type, cond); } else if (instr->type == HLSL_IR_JUMP) { @@ -595,7 +592,7 @@ static void resolve_loop_continue(struct hlsl_ctx *ctx, struct hlsl_block *block if (jump->type != HLSL_IR_JUMP_UNRESOLVED_CONTINUE) continue; - if (type == LOOP_DO_WHILE) + if (type == HLSL_LOOP_DO_WHILE) { if (!hlsl_clone_block(ctx, &cond_block, cond)) return; @@ -606,13 +603,6 @@ static void resolve_loop_continue(struct hlsl_ctx *ctx, struct hlsl_block *block } list_move_before(&instr->entry, &cond_block.instrs); } - else if (type == LOOP_FOR) - { - if (!hlsl_clone_block(ctx, &cond_block, iter)) - return; - list_move_before(&instr->entry, &cond_block.instrs); - } - jump->type = HLSL_IR_JUMP_CONTINUE; } } } @@ -678,8 +668,6 @@ static struct hlsl_default_value evaluate_static_expression(struct hlsl_ctx *ctx hlsl_error(ctx, &node->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, "Expected literal expression."); break; - case HLSL_IR_VSIR_INSTRUCTION_REF: - vkd3d_unreachable(); } } @@ -738,11 +726,11 @@ static unsigned int evaluate_static_expression_as_uint(struct hlsl_ctx *ctx, str return res.number.u; } -static struct hlsl_block *create_loop(struct hlsl_ctx *ctx, enum loop_type type, +static struct hlsl_block *create_loop(struct hlsl_ctx *ctx, enum hlsl_loop_type type, const struct parse_attribute_list *attributes, struct hlsl_block *init, struct hlsl_block *cond, struct hlsl_block *iter, struct hlsl_block *body, const struct vkd3d_shader_location *loc) { - enum hlsl_ir_loop_unroll_type unroll_type = HLSL_IR_LOOP_UNROLL; + enum hlsl_loop_unroll_type unroll_type = HLSL_LOOP_UNROLL; unsigned int i, unroll_limit = 0; struct hlsl_ir_node *loop; @@ -773,11 +761,11 @@ static struct hlsl_block *create_loop(struct hlsl_ctx *ctx, enum loop_type type, hlsl_block_cleanup(&expr); } - unroll_type = HLSL_IR_LOOP_FORCE_UNROLL; + unroll_type = HLSL_LOOP_FORCE_UNROLL; } else if (!strcmp(attr->name, "loop")) { - unroll_type = HLSL_IR_LOOP_FORCE_LOOP; + unroll_type = HLSL_LOOP_FORCE_LOOP; } else if (!strcmp(attr->name, "fastopt") || !strcmp(attr->name, "allow_uav_condition")) @@ -790,7 +778,7 @@ static struct hlsl_block *create_loop(struct hlsl_ctx *ctx, enum loop_type type, } } - resolve_loop_continue(ctx, body, type, cond, iter); + resolve_loop_continue(ctx, body, type, cond); if (!init && !(init = make_empty_block(ctx))) goto oom; @@ -798,15 +786,12 @@ static struct hlsl_block *create_loop(struct hlsl_ctx *ctx, enum loop_type type, if (!append_conditional_break(ctx, cond)) goto oom; - if (iter) - hlsl_block_add_block(body, iter); - - if (type == LOOP_DO_WHILE) + if (type == HLSL_LOOP_DO_WHILE) list_move_tail(&body->instrs, &cond->instrs); else list_move_head(&body->instrs, &cond->instrs); - if (!(loop = hlsl_new_loop(ctx, body, unroll_type, unroll_limit, loc))) + if (!(loop = hlsl_new_loop(ctx, iter, body, unroll_type, unroll_limit, loc))) goto oom; hlsl_block_add_instr(init, loop); @@ -860,6 +845,7 @@ static struct hlsl_ir_node *get_swizzle(struct hlsl_ctx *ctx, struct hlsl_ir_nod if (value->data_type->class == HLSL_CLASS_MATRIX) { /* Matrix swizzle */ + struct hlsl_matrix_swizzle s; bool m_swizzle; unsigned int inc, x, y; @@ -890,10 +876,11 @@ static struct hlsl_ir_node *get_swizzle(struct hlsl_ctx *ctx, struct hlsl_ir_nod if (x >= value->data_type->dimx || y >= value->data_type->dimy) return NULL; - swiz |= (y << 4 | x) << component * 8; + s.components[component].x = x; + s.components[component].y = y; component++; } - return hlsl_new_swizzle(ctx, swiz, component, value, loc); + return hlsl_new_matrix_swizzle(ctx, s, component, value, loc); } /* Vector swizzle */ @@ -922,8 +909,7 @@ static struct hlsl_ir_node *get_swizzle(struct hlsl_ctx *ctx, struct hlsl_ir_nod if (s >= value->data_type->dimx) return NULL; - swiz |= s << component * 2; - component++; + hlsl_swizzle_set_component(&swiz, component++, s); } if (valid) return hlsl_new_swizzle(ctx, swiz, component, value, loc); @@ -1192,6 +1178,8 @@ static bool gen_struct_fields(struct hlsl_ctx *ctx, struct parse_fields *fields, { hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Implicit size arrays not allowed in struct fields."); + field->type = ctx->builtin_types.error; + break; } field->type = hlsl_new_array_type(ctx, field->type, v->arrays.sizes[k]); @@ -1282,6 +1270,12 @@ static bool add_typedef(struct hlsl_ctx *ctx, struct hlsl_type *const orig_type, { hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Implicit size arrays not allowed in typedefs."); + if (!(type = hlsl_type_clone(ctx, ctx->builtin_types.error, 0, 0))) + { + free_parse_variable_def(v); + ret = false; + } + break; } if (!(type = hlsl_new_array_type(ctx, type, v->arrays.sizes[i]))) @@ -2092,8 +2086,8 @@ static bool invert_swizzle(uint32_t *swizzle, unsigned int *writemask, unsigned { if (*writemask & (1 << i)) { - unsigned int s = (*swizzle >> (i * 2)) & 3; - new_swizzle |= s << (bit++ * 2); + unsigned int s = hlsl_swizzle_get_component(*swizzle, i); + hlsl_swizzle_set_component(&new_swizzle, bit++, s); if (new_writemask & (1 << s)) return false; new_writemask |= 1 << s; @@ -2107,9 +2101,9 @@ static bool invert_swizzle(uint32_t *swizzle, unsigned int *writemask, unsigned { for (j = 0; j < width; ++j) { - unsigned int s = (new_swizzle >> (j * 2)) & 3; + unsigned int s = hlsl_swizzle_get_component(new_swizzle, j); if (s == i) - inverted |= j << (bit++ * 2); + hlsl_swizzle_set_component(&inverted, bit++, j); } } @@ -2119,22 +2113,22 @@ static bool invert_swizzle(uint32_t *swizzle, unsigned int *writemask, unsigned return true; } -static bool invert_swizzle_matrix(uint32_t *swizzle, unsigned int *writemask, unsigned int *ret_width) +static bool invert_swizzle_matrix(const struct hlsl_matrix_swizzle *swizzle, + uint32_t *ret_inverted, unsigned int *writemask, unsigned int *ret_width) { - /* swizzle is 8 bits per component, each component is (from LSB) 4 bits X, then 4 bits Y. - * components are indexed by their sources. i.e. the first component comes from the first - * component of the rhs. */ - unsigned int i, j, bit = 0, inverted = 0, width, new_writemask = 0, new_swizzle = 0; + unsigned int i, j, bit = 0, inverted = 0, width, new_writemask = 0; + struct hlsl_matrix_swizzle new_swizzle = {0}; /* First, we filter the swizzle to remove components that aren't enabled by writemask. */ for (i = 0; i < 4; ++i) { if (*writemask & (1 << i)) { - unsigned int s = (*swizzle >> (i * 8)) & 0xff; - unsigned int x = s & 0xf, y = (s >> 4) & 0xf; + unsigned int x = swizzle->components[i].x; + unsigned int y = swizzle->components[i].y; unsigned int idx = x + y * 4; - new_swizzle |= s << (bit++ * 8); + + new_swizzle.components[bit++] = swizzle->components[i]; if (new_writemask & (1 << idx)) return false; new_writemask |= 1 << idx; @@ -2142,22 +2136,22 @@ static bool invert_swizzle_matrix(uint32_t *swizzle, unsigned int *writemask, un } width = bit; - /* Then we invert the swizzle. The resulting swizzle has 2 bits per component, because it's for the - * incoming vector. */ + /* Then we invert the swizzle. The resulting swizzle uses a uint32_t + * vector format, because it's for the incoming vector. */ bit = 0; for (i = 0; i < 16; ++i) { for (j = 0; j < width; ++j) { - unsigned int s = (new_swizzle >> (j * 8)) & 0xff; - unsigned int x = s & 0xf, y = (s >> 4) & 0xf; + unsigned int x = new_swizzle.components[j].x; + unsigned int y = new_swizzle.components[j].y; unsigned int idx = x + y * 4; if (idx == i) - inverted |= j << (bit++ * 2); + hlsl_swizzle_set_component(&inverted, bit++, j); } } - *swizzle = inverted; + *ret_inverted = inverted; *writemask = new_writemask; *ret_width = width; return true; @@ -2211,28 +2205,34 @@ static bool add_assignment(struct hlsl_ctx *ctx, struct hlsl_block *block, struc { struct hlsl_ir_swizzle *swizzle = hlsl_ir_swizzle(lhs); struct hlsl_ir_node *new_swizzle; - uint32_t s = swizzle->swizzle; + uint32_t s; VKD3D_ASSERT(!matrix_writemask); if (swizzle->val.node->data_type->class == HLSL_CLASS_MATRIX) { + struct hlsl_matrix_swizzle ms = swizzle->u.matrix; + if (swizzle->val.node->type != HLSL_IR_LOAD && swizzle->val.node->type != HLSL_IR_INDEX) { hlsl_fixme(ctx, &lhs->loc, "Unhandled source of matrix swizzle."); return false; } - if (!invert_swizzle_matrix(&s, &writemask, &width)) + if (!invert_swizzle_matrix(&ms, &s, &writemask, &width)) { hlsl_error(ctx, &lhs->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_WRITEMASK, "Invalid writemask for matrix."); return false; } matrix_writemask = true; } - else if (!invert_swizzle(&s, &writemask, &width)) + else { - hlsl_error(ctx, &lhs->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_WRITEMASK, "Invalid writemask."); - return false; + s = swizzle->u.vector; + if (!invert_swizzle(&s, &writemask, &width)) + { + hlsl_error(ctx, &lhs->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_WRITEMASK, "Invalid writemask."); + return false; + } } if (!(new_swizzle = hlsl_new_swizzle(ctx, s, width, rhs, &swizzle->node.loc))) @@ -2670,26 +2670,30 @@ static void declare_var(struct hlsl_ctx *ctx, struct parse_variable_def *v) { hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Only innermost array size can be implicit."); - v->initializer.args_count = 0; + type = ctx->builtin_types.error; + break; } else if (elem_components == 0) { hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Cannot declare an implicit size array of a size 0 type."); - v->initializer.args_count = 0; + type = ctx->builtin_types.error; + break; } else if (size == 0) { hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Implicit size arrays need to be initialized."); - v->initializer.args_count = 0; + type = ctx->builtin_types.error; + break; } else if (size % elem_components != 0) { hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, "Cannot initialize implicit size array with %u components, expected a multiple of %u.", size, elem_components); - v->initializer.args_count = 0; + type = ctx->builtin_types.error; + break; } else { @@ -2908,7 +2912,8 @@ static struct hlsl_block *initialize_vars(struct hlsl_ctx *ctx, struct list *var v->initializer.args[0] = node_from_block(v->initializer.instrs); } - initialize_var(ctx, var, &v->initializer, is_default_values_initializer); + if (var->data_type->class != HLSL_CLASS_ERROR) + initialize_var(ctx, var, &v->initializer, is_default_values_initializer); if (is_default_values_initializer) { @@ -2993,13 +2998,137 @@ static bool func_is_compatible_match(struct hlsl_ctx *ctx, const struct hlsl_ir_ return true; } +static enum hlsl_base_type hlsl_base_type_class(enum hlsl_base_type t) +{ + switch (t) + { + case HLSL_TYPE_HALF: + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_DOUBLE: + return HLSL_TYPE_FLOAT; + + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: + return HLSL_TYPE_INT; + + case HLSL_TYPE_BOOL: + return HLSL_TYPE_BOOL; + } + + return 0; +} + +static unsigned int hlsl_base_type_width(enum hlsl_base_type t) +{ + switch (t) + { + case HLSL_TYPE_HALF: + return 16; + + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: + case HLSL_TYPE_BOOL: + return 32; + + case HLSL_TYPE_DOUBLE: + return 64; + } + + return 0; +} + +static int function_parameter_compare(const struct hlsl_ir_var *candidate, + const struct hlsl_ir_var *ref, const struct hlsl_ir_node *arg) +{ + struct + { + enum hlsl_base_type type; + enum hlsl_base_type class; + unsigned int count, width; + } c, r, a; + int ret; + + /* TODO: Non-numeric types. */ + if (!hlsl_is_numeric_type(arg->data_type)) + return 0; + + c.type = candidate->data_type->e.numeric.type; + c.class = hlsl_base_type_class(c.type); + c.count = hlsl_type_component_count(candidate->data_type); + c.width = hlsl_base_type_width(c.type); + + r.type = ref->data_type->e.numeric.type; + r.class = hlsl_base_type_class(r.type); + r.count = hlsl_type_component_count(ref->data_type); + r.width = hlsl_base_type_width(r.type); + + a.type = arg->data_type->e.numeric.type; + a.class = hlsl_base_type_class(a.type); + a.count = hlsl_type_component_count(arg->data_type); + a.width = hlsl_base_type_width(a.type); + + /* Prefer candidates without component count narrowing. E.g., given an + * float4 argument, half4 is a better match than float2. */ + if ((ret = (a.count > r.count) - (a.count > c.count))) + return ret; + + /* Prefer candidates with matching component type classes. E.g., given a + * float argument, double is a better match than int. */ + if ((ret = (a.class == c.class) - (a.class == r.class))) + return ret; + + /* Prefer candidates with matching component types. E.g., given an int + * argument, int4 is a better match than uint4. */ + if ((ret = (a.type == c.type) - (a.type == r.type))) + return ret; + + /* Prefer candidates without component type narrowing. E.g., given a float + * argument, double is a better match than half. */ + if ((ret = (a.width > r.width) - (a.width > c.width))) + return ret; + + /* Prefer candidates without component count widening. E.g. given a float + * argument, float is a better match than float2. */ + return (a.count < r.count) - (a.count < c.count); +} + +static int function_compare(const struct hlsl_ir_function_decl *candidate, + const struct hlsl_ir_function_decl *ref, const struct parse_initializer *args) +{ + bool any_worse = false, any_better = false; + unsigned int i; + int ret; + + for (i = 0; i < args->args_count; ++i) + { + ret = function_parameter_compare(candidate->parameters.vars[i], ref->parameters.vars[i], args->args[i]); + if (ret < 0) + any_worse = true; + else if (ret > 0) + any_better = true; + } + + /* We consider a candidate better if at least one parameter is a better + * match, and none are a worse match. */ + return any_better - any_worse; +} + static struct hlsl_ir_function_decl *find_function_call(struct hlsl_ctx *ctx, const char *name, const struct parse_initializer *args, bool is_compile, const struct vkd3d_shader_location *loc) { - struct hlsl_ir_function_decl *decl, *compatible_match = NULL; + struct hlsl_ir_function_decl *decl; + struct vkd3d_string_buffer *s; struct hlsl_ir_function *func; struct rb_entry *entry; + int compare; + size_t i; + struct + { + struct hlsl_ir_function_decl **candidates; + size_t count, capacity; + } candidates = {0}; if (!(entry = rb_get(&ctx->functions, name))) return NULL; @@ -3007,18 +3136,58 @@ static struct hlsl_ir_function_decl *find_function_call(struct hlsl_ctx *ctx, LIST_FOR_EACH_ENTRY(decl, &func->overloads, struct hlsl_ir_function_decl, entry) { - if (func_is_compatible_match(ctx, decl, is_compile, args)) + if (!func_is_compatible_match(ctx, decl, is_compile, args)) + continue; + + if (candidates.count) { - if (compatible_match) + compare = function_compare(decl, candidates.candidates[0], args); + + /* The candidate is worse; skip it. */ + if (compare < 0) + continue; + + /* The candidate is better; replace the current candidates. */ + if (compare > 0) { - hlsl_fixme(ctx, loc, "Prioritize between multiple compatible function overloads."); - break; + candidates.candidates[0] = decl; + candidates.count = 1; + continue; + } + } + + if (!(hlsl_array_reserve(ctx, (void **)&candidates.candidates, + &candidates.capacity, candidates.count + 1, sizeof(decl)))) + { + vkd3d_free(candidates.candidates); + return NULL; + } + candidates.candidates[candidates.count++] = decl; + } + + if (!candidates.count) + return NULL; + + if (candidates.count > 1) + { + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_AMBIGUOUS_CALL, "Ambiguous function call."); + if ((s = hlsl_get_string_buffer(ctx))) + { + hlsl_note(ctx, loc, VKD3D_SHADER_LOG_ERROR, "Candidates are:"); + for (i = 0; i < candidates.count; ++i) + { + hlsl_dump_ir_function_decl(ctx, s, candidates.candidates[i]); + hlsl_note(ctx, loc, VKD3D_SHADER_LOG_ERROR, " %s;", s->buffer); + vkd3d_string_buffer_clear(s); } - compatible_match = decl; + hlsl_release_string_buffer(ctx, s); } } - return compatible_match; + decl = candidates.candidates[0]; + vkd3d_free(candidates.candidates); + + return decl; } static struct hlsl_ir_node *hlsl_new_void_expr(struct hlsl_ctx *ctx, const struct vkd3d_shader_location *loc) @@ -5447,6 +5616,17 @@ static struct hlsl_block *add_constructor(struct hlsl_ctx *ctx, struct hlsl_type struct hlsl_ir_load *load; struct hlsl_ir_var *var; + if (!hlsl_is_numeric_type(type)) + { + struct vkd3d_string_buffer *string; + + if ((string = hlsl_type_to_string(ctx, type))) + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Constructor data type %s is not numeric.", string->buffer); + hlsl_release_string_buffer(ctx, string); + return NULL; + } + if (!(var = hlsl_new_synthetic_var(ctx, "constructor", type, loc))) return NULL; @@ -6553,6 +6733,7 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, struct hlsl_semantic semantic; enum hlsl_buffer_type buffer_type; enum hlsl_sampler_dim sampler_dim; + enum hlsl_so_object_type so_type; struct hlsl_attribute *attr; struct parse_attribute_list attr_list; struct hlsl_ir_switch_case *switch_case; @@ -6596,6 +6777,7 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, %token KW_INLINE %token KW_INOUT %token KW_LINEAR +%token KW_LINESTREAM %token KW_MATRIX %token KW_NAMESPACE %token KW_NOINTERPOLATION @@ -6605,6 +6787,7 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, %token KW_PACKOFFSET %token KW_PASS %token KW_PIXELSHADER +%token KW_POINTSTREAM %token KW_RASTERIZERORDEREDBUFFER %token KW_RASTERIZERORDEREDSTRUCTUREDBUFFER %token KW_RASTERIZERORDEREDTEXTURE1D @@ -6654,6 +6837,7 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, %token KW_TEXTURE3D %token KW_TEXTURECUBE %token KW_TEXTURECUBEARRAY +%token KW_TRIANGLESTREAM %token KW_TRUE %token KW_TYPEDEF %token KW_UNSIGNED @@ -6784,6 +6968,8 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, %type semantic +%type so_type + %type state_block %type state_block_index_opt @@ -7684,7 +7870,10 @@ parameter_decl: { hlsl_error(ctx, &@3, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Implicit size arrays not allowed in function parameters."); + type = ctx->builtin_types.error; + break; } + type = hlsl_new_array_type(ctx, type, $4.sizes[i]); } vkd3d_free($4.sizes); @@ -7805,6 +7994,20 @@ rov_type: $$ = HLSL_SAMPLER_DIM_3D; } +so_type: + KW_POINTSTREAM + { + $$ = HLSL_STREAM_OUTPUT_POINT_STREAM; + } + | KW_LINESTREAM + { + $$ = HLSL_STREAM_OUTPUT_LINE_STREAM; + } + | KW_TRIANGLESTREAM + { + $$ = HLSL_STREAM_OUTPUT_TRIANGLE_STREAM; + } + resource_format: var_modifiers type { @@ -7948,6 +8151,10 @@ type_no_void: validate_uav_type(ctx, $1, $3, &@4); $$ = hlsl_new_uav_type(ctx, $1, $3, true); } + | so_type '<' type '>' + { + $$ = hlsl_new_stream_output_type(ctx, $1, $3); + } | KW_RWBYTEADDRESSBUFFER { $$ = hlsl_new_uav_type(ctx, HLSL_SAMPLER_DIM_RAW_BUFFER, hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), false); @@ -8088,14 +8295,9 @@ typedef: } if (modifiers) - { hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, "Storage modifiers are not allowed on typedefs."); - LIST_FOR_EACH_ENTRY_SAFE(v, v_next, $4, struct parse_variable_def, entry) - vkd3d_free(v); - vkd3d_free($4); - YYABORT; - } + if (!add_typedef(ctx, type, $4)) YYABORT; } @@ -8753,25 +8955,25 @@ if_body: loop_statement: attribute_list_optional loop_scope_start KW_WHILE '(' expr ')' statement { - $$ = create_loop(ctx, LOOP_WHILE, &$1, NULL, $5, NULL, $7, &@3); + $$ = create_loop(ctx, HLSL_LOOP_WHILE, &$1, NULL, $5, NULL, $7, &@3); hlsl_pop_scope(ctx); cleanup_parse_attribute_list(&$1); } | attribute_list_optional loop_scope_start KW_DO statement KW_WHILE '(' expr ')' ';' { - $$ = create_loop(ctx, LOOP_DO_WHILE, &$1, NULL, $7, NULL, $4, &@3); + $$ = create_loop(ctx, HLSL_LOOP_DO_WHILE, &$1, NULL, $7, NULL, $4, &@3); hlsl_pop_scope(ctx); cleanup_parse_attribute_list(&$1); } | attribute_list_optional loop_scope_start KW_FOR '(' expr_statement expr_statement expr_optional ')' statement { - $$ = create_loop(ctx, LOOP_FOR, &$1, $5, $6, $7, $9, &@3); + $$ = create_loop(ctx, HLSL_LOOP_FOR, &$1, $5, $6, $7, $9, &@3); hlsl_pop_scope(ctx); cleanup_parse_attribute_list(&$1); } | attribute_list_optional loop_scope_start KW_FOR '(' declaration expr_statement expr_optional ')' statement { - $$ = create_loop(ctx, LOOP_FOR, &$1, $5, $6, $7, $9, &@3); + $$ = create_loop(ctx, HLSL_LOOP_FOR, &$1, $5, $6, $7, $9, &@3); hlsl_pop_scope(ctx); cleanup_parse_attribute_list(&$1); } @@ -8979,17 +9181,24 @@ primary_expr: struct hlsl_ir_load *load; struct hlsl_ir_var *var; - if (!(var = hlsl_get_var(ctx->cur_scope, $1))) + if ((var = hlsl_get_var(ctx->cur_scope, $1))) + { + vkd3d_free($1); + + if (!(load = hlsl_new_var_load(ctx, var, &@1))) + YYABORT; + if (!($$ = make_block(ctx, &load->node))) + YYABORT; + } + else { hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_NOT_DEFINED, "Variable \"%s\" is not defined.", $1); vkd3d_free($1); - YYABORT; + + if (!($$ = make_empty_block(ctx))) + YYABORT; + $$->value = ctx->error_instr; } - vkd3d_free($1); - if (!(load = hlsl_new_var_load(ctx, var, &@1))) - YYABORT; - if (!($$ = make_block(ctx, &load->node))) - YYABORT; } | '(' expr ')' { @@ -9149,23 +9358,8 @@ postfix_expr: | var_modifiers type '(' initializer_expr_list ')' { if ($1) - { hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, "Modifiers are not allowed on constructors."); - free_parse_initializer(&$4); - YYABORT; - } - if (!hlsl_is_numeric_type($2)) - { - struct vkd3d_string_buffer *string; - - if ((string = hlsl_type_to_string(ctx, $2))) - hlsl_error(ctx, &@2, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, - "Constructor data type %s is not numeric.", string->buffer); - hlsl_release_string_buffer(ctx, string); - free_parse_initializer(&$4); - YYABORT; - } if (!($$ = add_constructor(ctx, $2, &$4, &@2))) { @@ -9233,11 +9427,8 @@ unary_expr: | '(' var_modifiers type arrays ')' unary_expr { if ($2) - { hlsl_error(ctx, &@2, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, "Modifiers are not allowed on casts."); - YYABORT; - } if (!add_explicit_conversion(ctx, $6, $3, &$4, &@3)) { @@ -9381,10 +9572,7 @@ assignment_expr: struct hlsl_ir_node *lhs = node_from_block($1), *rhs = node_from_block($3); if (lhs->data_type->modifiers & HLSL_MODIFIER_CONST) - { hlsl_error(ctx, &@2, VKD3D_SHADER_ERROR_HLSL_MODIFIES_CONST, "Statement modifies a const expression."); - YYABORT; - } hlsl_block_add_block($3, $1); destroy_block($1); if (!add_assignment(ctx, $3, lhs, $2, rhs)) diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c index d11ff481f6b..a43ea53089e 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c @@ -19,6 +19,7 @@ */ #include "hlsl.h" +#include "vkd3d_shader_private.h" #include #include @@ -1075,7 +1076,7 @@ static bool lower_matrix_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *ins struct hlsl_deref var_deref; struct hlsl_type *matrix_type; struct hlsl_ir_var *var; - unsigned int x, y, k, i; + unsigned int k, i; if (instr->type != HLSL_IR_SWIZZLE) return false; @@ -1093,9 +1094,7 @@ static bool lower_matrix_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *ins struct hlsl_block store_block; struct hlsl_ir_node *load; - y = (swizzle->swizzle >> (8 * i + 4)) & 0xf; - x = (swizzle->swizzle >> 8 * i) & 0xf; - k = y * matrix_type->dimx + x; + k = swizzle->u.matrix.components[i].y * matrix_type->dimx + swizzle->u.matrix.components[i].x; if (!(load = hlsl_add_load_component(ctx, block, swizzle->val.node, k, &instr->loc))) return false; @@ -1358,8 +1357,10 @@ struct copy_propagation_var_def struct copy_propagation_state { - struct rb_tree var_defs; - struct copy_propagation_state *parent; + struct rb_tree *scope_var_defs; + size_t scope_count, scopes_capacity; + struct hlsl_ir_node *stop; + bool stopped; }; static int copy_propagation_var_def_compare(const void *key, const struct rb_entry *entry) @@ -1381,6 +1382,38 @@ static void copy_propagation_var_def_destroy(struct rb_entry *entry, void *conte vkd3d_free(var_def); } +static size_t copy_propagation_push_scope(struct copy_propagation_state *state, struct hlsl_ctx *ctx) +{ + if (!(hlsl_array_reserve(ctx, (void **)&state->scope_var_defs, &state->scopes_capacity, + state->scope_count + 1, sizeof(*state->scope_var_defs)))) + return false; + + rb_init(&state->scope_var_defs[state->scope_count++], copy_propagation_var_def_compare); + + return state->scope_count; +} + +static size_t copy_propagation_pop_scope(struct copy_propagation_state *state) +{ + rb_destroy(&state->scope_var_defs[--state->scope_count], copy_propagation_var_def_destroy, NULL); + + return state->scope_count; +} + +static bool copy_propagation_state_init(struct copy_propagation_state *state, struct hlsl_ctx *ctx) +{ + memset(state, 0, sizeof(*state)); + + return copy_propagation_push_scope(state, ctx); +} + +static void copy_propagation_state_destroy(struct copy_propagation_state *state) +{ + while (copy_propagation_pop_scope(state)); + + vkd3d_free(state->scope_var_defs); +} + static struct copy_propagation_value *copy_propagation_get_value_at_time( struct copy_propagation_component_trace *trace, unsigned int time) { @@ -1398,9 +1431,10 @@ static struct copy_propagation_value *copy_propagation_get_value_at_time( static struct copy_propagation_value *copy_propagation_get_value(const struct copy_propagation_state *state, const struct hlsl_ir_var *var, unsigned int component, unsigned int time) { - for (; state; state = state->parent) + for (size_t i = state->scope_count - 1; i < state->scope_count; i--) { - struct rb_entry *entry = rb_get(&state->var_defs, var); + struct rb_tree *tree = &state->scope_var_defs[i]; + struct rb_entry *entry = rb_get(tree, var); if (entry) { struct copy_propagation_var_def *var_def = RB_ENTRY_VALUE(entry, struct copy_propagation_var_def, entry); @@ -1426,7 +1460,8 @@ static struct copy_propagation_value *copy_propagation_get_value(const struct co static struct copy_propagation_var_def *copy_propagation_create_var_def(struct hlsl_ctx *ctx, struct copy_propagation_state *state, struct hlsl_ir_var *var) { - struct rb_entry *entry = rb_get(&state->var_defs, var); + struct rb_tree *tree = &state->scope_var_defs[state->scope_count - 1]; + struct rb_entry *entry = rb_get(tree, var); struct copy_propagation_var_def *var_def; unsigned int component_count = hlsl_type_component_count(var->data_type); int res; @@ -1439,7 +1474,7 @@ static struct copy_propagation_var_def *copy_propagation_create_var_def(struct h var_def->var = var; - res = rb_put(&state->var_defs, var, &var_def->entry); + res = rb_put(tree, var, &var_def->entry); VKD3D_ASSERT(!res); return var_def; @@ -1596,7 +1631,7 @@ static bool copy_propagation_replace_with_single_instr(struct hlsl_ctx *ctx, var->name, start, start + count, debug_hlsl_swizzle(swizzle, instr_component_count)); return false; } - ret_swizzle |= value->component << HLSL_SWIZZLE_SHIFT(i); + hlsl_swizzle_set_component(&ret_swizzle, i, value->component); } TRACE("Load from %s[%u-%u]%s propagated as instruction %p%s.\n", @@ -1678,6 +1713,7 @@ static bool copy_propagation_transform_load(struct hlsl_ctx *ctx, case HLSL_CLASS_DEPTH_STENCIL_VIEW: case HLSL_CLASS_GEOMETRY_SHADER: case HLSL_CLASS_BLEND_STATE: + case HLSL_CLASS_STREAM_OUTPUT: case HLSL_CLASS_NULL: break; @@ -1719,10 +1755,10 @@ static bool copy_propagation_transform_swizzle(struct hlsl_ctx *ctx, return false; load = hlsl_ir_load(swizzle->val.node); - if (copy_propagation_replace_with_constant_vector(ctx, state, load, swizzle->swizzle, &swizzle->node)) + if (copy_propagation_replace_with_constant_vector(ctx, state, load, swizzle->u.vector, &swizzle->node)) return true; - if (copy_propagation_replace_with_single_instr(ctx, state, load, swizzle->swizzle, &swizzle->node)) + if (copy_propagation_replace_with_single_instr(ctx, state, load, swizzle->u.vector, &swizzle->node)) return true; return false; @@ -1818,18 +1854,6 @@ static void copy_propagation_record_store(struct hlsl_ctx *ctx, struct hlsl_ir_s } } -static void copy_propagation_state_init(struct hlsl_ctx *ctx, struct copy_propagation_state *state, - struct copy_propagation_state *parent) -{ - rb_init(&state->var_defs, copy_propagation_var_def_compare); - state->parent = parent; -} - -static void copy_propagation_state_destroy(struct copy_propagation_state *state) -{ - rb_destroy(&state->var_defs, copy_propagation_var_def_destroy, NULL); -} - static void copy_propagation_invalidate_from_block(struct hlsl_ctx *ctx, struct copy_propagation_state *state, struct hlsl_block *block, unsigned int time) { @@ -1898,16 +1922,19 @@ static bool copy_propagation_transform_block(struct hlsl_ctx *ctx, struct hlsl_b static bool copy_propagation_process_if(struct hlsl_ctx *ctx, struct hlsl_ir_if *iff, struct copy_propagation_state *state) { - struct copy_propagation_state inner_state; bool progress = false; - copy_propagation_state_init(ctx, &inner_state, state); - progress |= copy_propagation_transform_block(ctx, &iff->then_block, &inner_state); - copy_propagation_state_destroy(&inner_state); + copy_propagation_push_scope(state, ctx); + progress |= copy_propagation_transform_block(ctx, &iff->then_block, state); + if (state->stopped) + return progress; + copy_propagation_pop_scope(state); - copy_propagation_state_init(ctx, &inner_state, state); - progress |= copy_propagation_transform_block(ctx, &iff->else_block, &inner_state); - copy_propagation_state_destroy(&inner_state); + copy_propagation_push_scope(state, ctx); + progress |= copy_propagation_transform_block(ctx, &iff->else_block, state); + if (state->stopped) + return progress; + copy_propagation_pop_scope(state); /* Ideally we'd invalidate the outer state looking at what was * touched in the two inner states, but this doesn't work for @@ -1922,14 +1949,16 @@ static bool copy_propagation_process_if(struct hlsl_ctx *ctx, struct hlsl_ir_if static bool copy_propagation_process_loop(struct hlsl_ctx *ctx, struct hlsl_ir_loop *loop, struct copy_propagation_state *state) { - struct copy_propagation_state inner_state; bool progress = false; copy_propagation_invalidate_from_block(ctx, state, &loop->body, loop->node.index); + copy_propagation_invalidate_from_block(ctx, state, &loop->iter, loop->node.index); - copy_propagation_state_init(ctx, &inner_state, state); - progress |= copy_propagation_transform_block(ctx, &loop->body, &inner_state); - copy_propagation_state_destroy(&inner_state); + copy_propagation_push_scope(state, ctx); + progress |= copy_propagation_transform_block(ctx, &loop->body, state); + if (state->stopped) + return progress; + copy_propagation_pop_scope(state); return progress; } @@ -1937,15 +1966,16 @@ static bool copy_propagation_process_loop(struct hlsl_ctx *ctx, struct hlsl_ir_l static bool copy_propagation_process_switch(struct hlsl_ctx *ctx, struct hlsl_ir_switch *s, struct copy_propagation_state *state) { - struct copy_propagation_state inner_state; struct hlsl_ir_switch_case *c; bool progress = false; LIST_FOR_EACH_ENTRY(c, &s->cases, struct hlsl_ir_switch_case, entry) { - copy_propagation_state_init(ctx, &inner_state, state); - progress |= copy_propagation_transform_block(ctx, &c->body, &inner_state); - copy_propagation_state_destroy(&inner_state); + copy_propagation_push_scope(state, ctx); + progress |= copy_propagation_transform_block(ctx, &c->body, state); + if (state->stopped) + return progress; + copy_propagation_pop_scope(state); } LIST_FOR_EACH_ENTRY(c, &s->cases, struct hlsl_ir_switch_case, entry) @@ -1964,6 +1994,12 @@ static bool copy_propagation_transform_block(struct hlsl_ctx *ctx, struct hlsl_b LIST_FOR_EACH_ENTRY_SAFE(instr, next, &block->instrs, struct hlsl_ir_node, entry) { + if (instr == state->stop) + { + state->stopped = true; + return progress; + } + switch (instr->type) { case HLSL_IR_LOAD: @@ -2001,6 +2037,9 @@ static bool copy_propagation_transform_block(struct hlsl_ctx *ctx, struct hlsl_b default: break; } + + if (state->stopped) + return progress; } return progress; @@ -2013,7 +2052,7 @@ bool hlsl_copy_propagation_execute(struct hlsl_ctx *ctx, struct hlsl_block *bloc index_instructions(block, 2); - copy_propagation_state_init(ctx, &state, NULL); + copy_propagation_state_init(&state, ctx); progress = copy_propagation_transform_block(ctx, block, &state); @@ -2401,8 +2440,8 @@ static bool fold_swizzle_chains(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr struct hlsl_ir_node *new_swizzle; uint32_t combined_swizzle; - combined_swizzle = hlsl_combine_swizzles(hlsl_ir_swizzle(next_instr)->swizzle, - swizzle->swizzle, instr->data_type->dimx); + combined_swizzle = hlsl_combine_swizzles(hlsl_ir_swizzle(next_instr)->u.vector, + swizzle->u.vector, instr->data_type->dimx); next_instr = hlsl_ir_swizzle(next_instr)->val.node; if (!(new_swizzle = hlsl_new_swizzle(ctx, combined_swizzle, instr->data_type->dimx, next_instr, &instr->loc))) @@ -2429,7 +2468,7 @@ static bool remove_trivial_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *i return false; for (i = 0; i < instr->data_type->dimx; ++i) - if (hlsl_swizzle_get_component(swizzle->swizzle, i) != i) + if (hlsl_swizzle_get_component(swizzle->u.vector, i) != i) return false; hlsl_replace_node(instr, swizzle->val.node); @@ -2788,6 +2827,108 @@ static bool lower_nonconstant_array_loads(struct hlsl_ctx *ctx, struct hlsl_ir_n return true; } + +static struct hlsl_type *clone_texture_array_as_combined_sampler_array(struct hlsl_ctx *ctx, struct hlsl_type *type) +{ + struct hlsl_type *sampler_type; + + if (type->class == HLSL_CLASS_ARRAY) + { + if (!(sampler_type = clone_texture_array_as_combined_sampler_array(ctx, type->e.array.type))) + return NULL; + + return hlsl_new_array_type(ctx, sampler_type, type->e.array.elements_count); + } + + return ctx->builtin_types.sampler[type->sampler_dim]; +} + +static bool deref_offset_is_zero(struct hlsl_ctx *ctx, const struct hlsl_deref *deref) +{ + enum hlsl_regset regset = hlsl_deref_get_regset(ctx, deref); + unsigned int index; + + if (!hlsl_regset_index_from_deref(ctx, deref, regset, &index)) + return false; + return index == 0; +} + +/* Lower samples from separate texture and sampler variables to samples from + * synthetized combined samplers. That is, translate SM4-style samples in the + * source to SM1-style samples in the bytecode. */ +static bool lower_separate_samples(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +{ + struct hlsl_ir_var *var, *resource, *sampler; + struct hlsl_ir_resource_load *load; + struct vkd3d_string_buffer *name; + struct hlsl_type *sampler_type; + + if (instr->type != HLSL_IR_RESOURCE_LOAD) + return false; + load = hlsl_ir_resource_load(instr); + + if (load->load_type != HLSL_RESOURCE_SAMPLE + && load->load_type != HLSL_RESOURCE_SAMPLE_LOD + && load->load_type != HLSL_RESOURCE_SAMPLE_LOD_BIAS) + return false; + + if (!load->sampler.var) + return false; + resource = load->resource.var; + sampler = load->sampler.var; + + VKD3D_ASSERT(hlsl_type_is_resource(resource->data_type)); + VKD3D_ASSERT(hlsl_type_is_resource(sampler->data_type)); + if (sampler->data_type->class == HLSL_CLASS_ARRAY && !deref_offset_is_zero(ctx, &load->sampler)) + { + /* Not supported by d3dcompiler. */ + hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NOT_IMPLEMENTED, + "Lower separated samples with sampler arrays."); + return false; + } + if (!resource->is_uniform) + return false; + if(!sampler->is_uniform) + return false; + + if (!(name = hlsl_get_string_buffer(ctx))) + return false; + vkd3d_string_buffer_printf(name, "%s+%s", sampler->name, resource->name); + + TRACE("Lowering to combined sampler %s.\n", debugstr_a(name->buffer)); + + if (!(var = hlsl_get_var(ctx->globals, name->buffer))) + { + if (!(sampler_type = clone_texture_array_as_combined_sampler_array(ctx, resource->data_type))) + { + hlsl_release_string_buffer(ctx, name); + return false; + } + + if (!(var = hlsl_new_synthetic_var_named(ctx, name->buffer, sampler_type, &instr->loc, false))) + { + hlsl_release_string_buffer(ctx, name); + return false; + } + var->storage_modifiers |= HLSL_STORAGE_UNIFORM; + var->is_combined_sampler = true; + var->is_uniform = 1; + + list_remove(&var->scope_entry); + list_add_after(&sampler->scope_entry, &var->scope_entry); + + list_add_after(&sampler->extern_entry, &var->extern_entry); + } + hlsl_release_string_buffer(ctx, name); + + /* Only change the deref's var, keep the path. */ + load->resource.var = var; + hlsl_cleanup_deref(&load->sampler); + load->sampler.var = NULL; + + return true; +} + /* Lower combined samples and sampler variables to synthesized separated textures and samplers. * That is, translate SM1-style samples in the source to SM4-style samples in the bytecode. */ static bool lower_combined_samples(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) @@ -2899,6 +3040,27 @@ static void insert_ensuring_decreasing_bind_count(struct list *list, struct hlsl list_add_tail(list, &to_add->extern_entry); } +static bool sort_synthetic_combined_samplers_first(struct hlsl_ctx *ctx) +{ + struct list separated_resources; + struct hlsl_ir_var *var, *next; + + list_init(&separated_resources); + + LIST_FOR_EACH_ENTRY_SAFE(var, next, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + if (var->is_combined_sampler) + { + list_remove(&var->extern_entry); + insert_ensuring_decreasing_bind_count(&separated_resources, var, HLSL_REGSET_SAMPLERS); + } + } + + list_move_head(&ctx->extern_vars, &separated_resources); + + return false; +} + static bool sort_synthetic_separated_samplers_first(struct hlsl_ctx *ctx) { struct list separated_resources; @@ -4162,9 +4324,6 @@ static bool dce(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) case HLSL_IR_STATEBLOCK_CONSTANT: /* Stateblock constants should not appear in the shader program. */ vkd3d_unreachable(); - case HLSL_IR_VSIR_INSTRUCTION_REF: - /* HLSL IR nodes are not translated to hlsl_ir_vsir_instruction_ref at this point. */ - vkd3d_unreachable(); } return false; @@ -4304,9 +4463,6 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop case HLSL_IR_STATEBLOCK_CONSTANT: /* Stateblock constants should not appear in the shader program. */ vkd3d_unreachable(); - case HLSL_IR_VSIR_INSTRUCTION_REF: - /* HLSL IR nodes are not translated to hlsl_ir_vsir_instruction_ref at this point. */ - vkd3d_unreachable(); case HLSL_IR_STORE: { @@ -4494,6 +4650,9 @@ struct register_allocator /* Two allocations with different mode can't share the same register. */ int mode; + /* If an allocation is VIP, no new allocations can be made in the + * register unless they are VIP as well. */ + bool vip; } *allocations; size_t count, capacity; @@ -4513,7 +4672,7 @@ struct register_allocator }; static unsigned int get_available_writemask(const struct register_allocator *allocator, - unsigned int first_write, unsigned int last_read, uint32_t reg_idx, int mode) + unsigned int first_write, unsigned int last_read, uint32_t reg_idx, int mode, bool vip) { unsigned int writemask = VKD3DSP_WRITEMASK_ALL; size_t i; @@ -4532,6 +4691,8 @@ static unsigned int get_available_writemask(const struct register_allocator *all writemask &= ~allocation->writemask; if (allocation->mode != mode) writemask = 0; + if (allocation->vip && !vip) + writemask = 0; } if (!writemask) @@ -4542,7 +4703,7 @@ static unsigned int get_available_writemask(const struct register_allocator *all } static void record_allocation(struct hlsl_ctx *ctx, struct register_allocator *allocator, uint32_t reg_idx, - unsigned int writemask, unsigned int first_write, unsigned int last_read, int mode) + unsigned int writemask, unsigned int first_write, unsigned int last_read, int mode, bool vip) { struct allocation *allocation; @@ -4556,16 +4717,25 @@ static void record_allocation(struct hlsl_ctx *ctx, struct register_allocator *a allocation->first_write = first_write; allocation->last_read = last_read; allocation->mode = mode; + allocation->vip = vip; allocator->reg_count = max(allocator->reg_count, reg_idx + 1); } -/* reg_size is the number of register components to be reserved, while component_count is the number - * of components for the register's writemask. In SM1, floats and vectors allocate the whole - * register, even if they don't use it completely. */ +/* Allocates a register (or some components of it) within the register allocator. + * 'reg_size' is the number of register components to be reserved. + * 'component_count' is the number of components for the hlsl_reg's + * writemask, which can be smaller than 'reg_size'. For instance, sm1 + * floats and vectors allocate the whole register even if they are not + * using all components. + * 'mode' can be provided to avoid allocating on a register that already has an + * allocation with a different mode. + * 'force_align' can be used so that the allocation always start in '.x'. + * 'vip' can be used so that no new allocations can be made in the given register + * unless they are 'vip' as well. */ static struct hlsl_reg allocate_register(struct hlsl_ctx *ctx, struct register_allocator *allocator, unsigned int first_write, unsigned int last_read, unsigned int reg_size, - unsigned int component_count, int mode, bool force_align) + unsigned int component_count, int mode, bool force_align, bool vip) { struct hlsl_reg ret = {.allocation_size = 1, .allocated = true}; unsigned int required_size = force_align ? 4 : reg_size; @@ -4579,7 +4749,7 @@ static struct hlsl_reg allocate_register(struct hlsl_ctx *ctx, struct register_a for (uint32_t reg_idx = 0; reg_idx < allocator->reg_count; ++reg_idx) { unsigned int available_writemask = get_available_writemask(allocator, - first_write, last_read, reg_idx, mode); + first_write, last_read, reg_idx, mode, vip); if (vkd3d_popcount(available_writemask) >= pref) { @@ -4589,7 +4759,8 @@ static struct hlsl_reg allocate_register(struct hlsl_ctx *ctx, struct register_a ret.id = reg_idx; ret.writemask = hlsl_combine_writemasks(writemask, vkd3d_write_mask_from_component_count(component_count)); - record_allocation(ctx, allocator, reg_idx, writemask, first_write, last_read, mode); + + record_allocation(ctx, allocator, reg_idx, writemask, first_write, last_read, mode, vip); return ret; } } @@ -4598,13 +4769,14 @@ static struct hlsl_reg allocate_register(struct hlsl_ctx *ctx, struct register_a ret.id = allocator->reg_count; ret.writemask = vkd3d_write_mask_from_component_count(component_count); record_allocation(ctx, allocator, allocator->reg_count, - vkd3d_write_mask_from_component_count(reg_size), first_write, last_read, mode); + vkd3d_write_mask_from_component_count(reg_size), first_write, last_read, mode, vip); return ret; } /* Allocate a register with writemask, while reserving reg_writemask. */ -static struct hlsl_reg allocate_register_with_masks(struct hlsl_ctx *ctx, struct register_allocator *allocator, - unsigned int first_write, unsigned int last_read, uint32_t reg_writemask, uint32_t writemask, int mode) +static struct hlsl_reg allocate_register_with_masks(struct hlsl_ctx *ctx, + struct register_allocator *allocator, unsigned int first_write, unsigned int last_read, + uint32_t reg_writemask, uint32_t writemask, int mode, bool vip) { struct hlsl_reg ret = {0}; uint32_t reg_idx; @@ -4614,11 +4786,11 @@ static struct hlsl_reg allocate_register_with_masks(struct hlsl_ctx *ctx, struct for (reg_idx = 0;; ++reg_idx) { if ((get_available_writemask(allocator, first_write, last_read, - reg_idx, mode) & reg_writemask) == reg_writemask) + reg_idx, mode, vip) & reg_writemask) == reg_writemask) break; } - record_allocation(ctx, allocator, reg_idx, reg_writemask, first_write, last_read, mode); + record_allocation(ctx, allocator, reg_idx, reg_writemask, first_write, last_read, mode, vip); ret.id = reg_idx; ret.allocation_size = 1; @@ -4628,7 +4800,7 @@ static struct hlsl_reg allocate_register_with_masks(struct hlsl_ctx *ctx, struct } static bool is_range_available(const struct register_allocator *allocator, unsigned int first_write, - unsigned int last_read, uint32_t reg_idx, unsigned int reg_size, int mode) + unsigned int last_read, uint32_t reg_idx, unsigned int reg_size, int mode, bool vip) { unsigned int last_reg_mask = (1u << (reg_size % 4)) - 1; unsigned int writemask; @@ -4636,18 +4808,18 @@ static bool is_range_available(const struct register_allocator *allocator, unsig for (i = 0; i < (reg_size / 4); ++i) { - writemask = get_available_writemask(allocator, first_write, last_read, reg_idx + i, mode); + writemask = get_available_writemask(allocator, first_write, last_read, reg_idx + i, mode, vip); if (writemask != VKD3DSP_WRITEMASK_ALL) return false; } - writemask = get_available_writemask(allocator, first_write, last_read, reg_idx + (reg_size / 4), mode); + writemask = get_available_writemask(allocator, first_write, last_read, reg_idx + (reg_size / 4), mode, vip); if ((writemask & last_reg_mask) != last_reg_mask) return false; return true; } static struct hlsl_reg allocate_range(struct hlsl_ctx *ctx, struct register_allocator *allocator, - unsigned int first_write, unsigned int last_read, unsigned int reg_size, int mode) + unsigned int first_write, unsigned int last_read, unsigned int reg_size, int mode, bool vip) { struct hlsl_reg ret = {0}; uint32_t reg_idx; @@ -4655,15 +4827,15 @@ static struct hlsl_reg allocate_range(struct hlsl_ctx *ctx, struct register_allo for (reg_idx = 0;; ++reg_idx) { - if (is_range_available(allocator, first_write, last_read, reg_idx, reg_size, mode)) + if (is_range_available(allocator, first_write, last_read, reg_idx, reg_size, mode, vip)) break; } for (i = 0; i < reg_size / 4; ++i) - record_allocation(ctx, allocator, reg_idx + i, VKD3DSP_WRITEMASK_ALL, first_write, last_read, mode); + record_allocation(ctx, allocator, reg_idx + i, VKD3DSP_WRITEMASK_ALL, first_write, last_read, mode, vip); if (reg_size % 4) record_allocation(ctx, allocator, reg_idx + (reg_size / 4), - (1u << (reg_size % 4)) - 1, first_write, last_read, mode); + (1u << (reg_size % 4)) - 1, first_write, last_read, mode, vip); ret.id = reg_idx; ret.allocation_size = align(reg_size, 4) / 4; @@ -4679,9 +4851,9 @@ static struct hlsl_reg allocate_numeric_registers_for_type(struct hlsl_ctx *ctx, /* FIXME: We could potentially pack structs or arrays more efficiently... */ if (type->class <= HLSL_CLASS_VECTOR) - return allocate_register(ctx, allocator, first_write, last_read, type->dimx, type->dimx, 0, false); + return allocate_register(ctx, allocator, first_write, last_read, type->dimx, type->dimx, 0, false, false); else - return allocate_range(ctx, allocator, first_write, last_read, reg_size, 0); + return allocate_range(ctx, allocator, first_write, last_read, reg_size, 0, false); } static const char *debug_register(char class, struct hlsl_reg reg, const struct hlsl_type *type) @@ -4859,8 +5031,8 @@ static void allocate_instr_temp_register(struct hlsl_ctx *ctx, } if (reg_writemask) - instr->reg = allocate_register_with_masks(ctx, allocator, - instr->index, instr->last_read, reg_writemask, dst_writemask, 0); + instr->reg = allocate_register_with_masks(ctx, allocator, instr->index, + instr->last_read, reg_writemask, dst_writemask, 0, false); else instr->reg = allocate_numeric_registers_for_type(ctx, allocator, instr->index, instr->last_read, instr->data_type); @@ -5084,7 +5256,7 @@ static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, } } -static void sort_uniform_by_numeric_bind_count(struct list *sorted, struct hlsl_ir_var *to_sort) +static void sort_uniform_by_bind_count(struct list *sorted, struct hlsl_ir_var *to_sort, enum hlsl_regset regset) { struct hlsl_ir_var *var; @@ -5092,8 +5264,8 @@ static void sort_uniform_by_numeric_bind_count(struct list *sorted, struct hlsl_ LIST_FOR_EACH_ENTRY(var, sorted, struct hlsl_ir_var, extern_entry) { - uint32_t to_sort_size = to_sort->bind_count[HLSL_REGSET_NUMERIC]; - uint32_t var_size = var->bind_count[HLSL_REGSET_NUMERIC]; + uint32_t to_sort_size = to_sort->bind_count[regset]; + uint32_t var_size = var->bind_count[regset]; if (to_sort_size > var_size) { @@ -5105,7 +5277,7 @@ static void sort_uniform_by_numeric_bind_count(struct list *sorted, struct hlsl_ list_add_tail(sorted, &to_sort->extern_entry); } -static void sort_uniforms_by_numeric_bind_count(struct hlsl_ctx *ctx) +static void sort_uniforms_by_bind_count(struct hlsl_ctx *ctx, enum hlsl_regset regset) { struct list sorted = LIST_INIT(sorted); struct hlsl_ir_var *var, *next; @@ -5113,7 +5285,7 @@ static void sort_uniforms_by_numeric_bind_count(struct hlsl_ctx *ctx) LIST_FOR_EACH_ENTRY_SAFE(var, next, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) { if (var->is_uniform) - sort_uniform_by_numeric_bind_count(&sorted, var); + sort_uniform_by_bind_count(&sorted, var, regset); } list_move_tail(&ctx->extern_vars, &sorted); } @@ -5161,7 +5333,7 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi struct register_allocator allocator = {0}; struct hlsl_ir_var *var; - sort_uniforms_by_numeric_bind_count(ctx); + sort_uniforms_by_bind_count(ctx, HLSL_REGSET_NUMERIC); LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) { @@ -5181,14 +5353,15 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi { if (i < bind_count) { - if (get_available_writemask(&allocator_used, 1, UINT_MAX, reg_idx + i, 0) != VKD3DSP_WRITEMASK_ALL) + if (get_available_writemask(&allocator_used, 1, UINT_MAX, + reg_idx + i, 0, false) != VKD3DSP_WRITEMASK_ALL) { hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, "Overlapping register() reservations on 'c%u'.", reg_idx + i); } - record_allocation(ctx, &allocator_used, reg_idx + i, VKD3DSP_WRITEMASK_ALL, 1, UINT_MAX, 0); + record_allocation(ctx, &allocator_used, reg_idx + i, VKD3DSP_WRITEMASK_ALL, 1, UINT_MAX, 0, false); } - record_allocation(ctx, &allocator, reg_idx + i, VKD3DSP_WRITEMASK_ALL, 1, UINT_MAX, 0); + record_allocation(ctx, &allocator, reg_idx + i, VKD3DSP_WRITEMASK_ALL, 1, UINT_MAX, 0, false); } var->regs[HLSL_REGSET_NUMERIC].id = reg_idx; @@ -5211,7 +5384,7 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi if (!var->regs[HLSL_REGSET_NUMERIC].allocated) { - var->regs[HLSL_REGSET_NUMERIC] = allocate_range(ctx, &allocator, 1, UINT_MAX, alloc_size, 0); + var->regs[HLSL_REGSET_NUMERIC] = allocate_range(ctx, &allocator, 1, UINT_MAX, alloc_size, 0, false); TRACE("Allocated %s to %s.\n", var->name, debug_register('c', var->regs[HLSL_REGSET_NUMERIC], var->data_type)); } @@ -5254,7 +5427,8 @@ static uint32_t allocate_temp_registers(struct hlsl_ctx *ctx, struct hlsl_ir_fun var = entry_func->parameters.vars[i]; if (var->is_output_semantic) { - record_allocation(ctx, &allocator, 0, VKD3DSP_WRITEMASK_ALL, var->first_write, var->last_read, 0); + record_allocation(ctx, &allocator, 0, VKD3DSP_WRITEMASK_ALL, + var->first_write, var->last_read, 0, false); break; } } @@ -5311,6 +5485,8 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var enum vkd3d_shader_register_type type; struct vkd3d_shader_version version; + bool special_interpolation = false; + bool vip_allocation = false; uint32_t reg; bool builtin; @@ -5363,6 +5539,14 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var * domains, it is allocated as if it was 'float[1]'. */ var->force_align = true; } + + if (semantic == VKD3D_SHADER_SV_RENDER_TARGET_ARRAY_INDEX + || semantic == VKD3D_SHADER_SV_VIEWPORT_ARRAY_INDEX + || semantic == VKD3D_SHADER_SV_PRIMITIVE_ID) + vip_allocation = true; + + if (semantic == VKD3D_SHADER_SV_IS_FRONT_FACE || semantic == VKD3D_SHADER_SV_SAMPLE_INDEX) + special_interpolation = true; } if (builtin) @@ -5376,8 +5560,11 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var ? 0 : sm4_get_interpolation_mode(var->data_type, var->storage_modifiers); unsigned int reg_size = optimize ? var->data_type->dimx : 4; - var->regs[HLSL_REGSET_NUMERIC] = allocate_register(ctx, allocator, 1, - UINT_MAX, reg_size, var->data_type->dimx, mode, var->force_align); + if (special_interpolation) + mode = VKD3DSIM_NONE; + + var->regs[HLSL_REGSET_NUMERIC] = allocate_register(ctx, allocator, 1, UINT_MAX, + reg_size, var->data_type->dimx, mode, var->force_align, vip_allocation); TRACE("Allocated %s to %s (mode %d).\n", var->name, debug_register(output ? 'o' : 'v', var->regs[HLSL_REGSET_NUMERIC], var->data_type), mode); @@ -6419,6 +6606,7 @@ void hlsl_run_const_passes(struct hlsl_ctx *ctx, struct hlsl_block *body) { progress = hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, body, NULL); progress |= hlsl_transform_ir(ctx, hlsl_fold_constant_identities, body, NULL); + progress |= hlsl_transform_ir(ctx, hlsl_normalize_binary_exprs, body, NULL); progress |= hlsl_transform_ir(ctx, hlsl_fold_constant_swizzles, body, NULL); progress |= hlsl_copy_propagation_execute(ctx, body); progress |= hlsl_transform_ir(ctx, fold_swizzle_chains, body, NULL); @@ -6636,7 +6824,6 @@ static uint32_t generate_vsir_get_src_swizzle(uint32_t src_writemask, uint32_t d swizzle = hlsl_swizzle_from_writemask(src_writemask); swizzle = hlsl_map_swizzle(swizzle, dst_writemask); - swizzle = vsir_swizzle_from_hlsl(swizzle); return swizzle; } @@ -6812,7 +6999,7 @@ static void vsir_src_from_hlsl_constant_value(struct vkd3d_shader_src_param *src } static void vsir_src_from_hlsl_node(struct vkd3d_shader_src_param *src, - struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, uint32_t map_writemask) + struct hlsl_ctx *ctx, const struct hlsl_ir_node *instr, uint32_t map_writemask) { struct hlsl_ir_constant *constant; @@ -6832,6 +7019,242 @@ static void vsir_src_from_hlsl_node(struct vkd3d_shader_src_param *src, } } +static bool sm4_generate_vsir_numeric_reg_from_deref(struct hlsl_ctx *ctx, struct vsir_program *program, + struct vkd3d_shader_register *reg, uint32_t *writemask, const struct hlsl_deref *deref) +{ + const struct hlsl_ir_var *var = deref->var; + unsigned int offset_const_deref; + + reg->type = var->indexable ? VKD3DSPR_IDXTEMP : VKD3DSPR_TEMP; + reg->idx[0].offset = var->regs[HLSL_REGSET_NUMERIC].id; + reg->dimension = VSIR_DIMENSION_VEC4; + + VKD3D_ASSERT(var->regs[HLSL_REGSET_NUMERIC].allocated); + + if (!var->indexable) + { + offset_const_deref = hlsl_offset_from_deref_safe(ctx, deref); + reg->idx[0].offset += offset_const_deref / 4; + reg->idx_count = 1; + } + else + { + offset_const_deref = deref->const_offset; + reg->idx[1].offset = offset_const_deref / 4; + reg->idx_count = 2; + + if (deref->rel_offset.node) + { + struct vkd3d_shader_src_param *idx_src; + + if (!(idx_src = vsir_program_get_src_params(program, 1))) + { + ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; + return false; + } + memset(idx_src, 0, sizeof(*idx_src)); + reg->idx[1].rel_addr = idx_src; + + vsir_src_from_hlsl_node(idx_src, ctx, deref->rel_offset.node, VKD3DSP_WRITEMASK_ALL); + } + } + + *writemask = 0xf & (0xf << (offset_const_deref % 4)); + if (var->regs[HLSL_REGSET_NUMERIC].writemask) + *writemask = hlsl_combine_writemasks(var->regs[HLSL_REGSET_NUMERIC].writemask, *writemask); + return true; +} + +static bool sm4_generate_vsir_reg_from_deref(struct hlsl_ctx *ctx, struct vsir_program *program, + struct vkd3d_shader_register *reg, uint32_t *writemask, const struct hlsl_deref *deref) +{ + const struct vkd3d_shader_version *version = &program->shader_version; + const struct hlsl_type *data_type = hlsl_deref_get_type(ctx, deref); + const struct hlsl_ir_var *var = deref->var; + + if (var->is_uniform) + { + enum hlsl_regset regset = hlsl_deref_get_regset(ctx, deref); + + if (regset == HLSL_REGSET_TEXTURES) + { + reg->type = VKD3DSPR_RESOURCE; + reg->dimension = VSIR_DIMENSION_VEC4; + if (vkd3d_shader_ver_ge(version, 5, 1)) + { + reg->idx[0].offset = var->regs[HLSL_REGSET_TEXTURES].id; + reg->idx[1].offset = var->regs[HLSL_REGSET_TEXTURES].index; /* FIXME: array index */ + reg->idx_count = 2; + } + else + { + reg->idx[0].offset = var->regs[HLSL_REGSET_TEXTURES].index; + reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref); + reg->idx_count = 1; + } + VKD3D_ASSERT(regset == HLSL_REGSET_TEXTURES); + *writemask = VKD3DSP_WRITEMASK_ALL; + } + else if (regset == HLSL_REGSET_UAVS) + { + reg->type = VKD3DSPR_UAV; + reg->dimension = VSIR_DIMENSION_VEC4; + if (vkd3d_shader_ver_ge(version, 5, 1)) + { + reg->idx[0].offset = var->regs[HLSL_REGSET_UAVS].id; + reg->idx[1].offset = var->regs[HLSL_REGSET_UAVS].index; /* FIXME: array index */ + reg->idx_count = 2; + } + else + { + reg->idx[0].offset = var->regs[HLSL_REGSET_UAVS].index; + reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref); + reg->idx_count = 1; + } + VKD3D_ASSERT(regset == HLSL_REGSET_UAVS); + *writemask = VKD3DSP_WRITEMASK_ALL; + } + else if (regset == HLSL_REGSET_SAMPLERS) + { + reg->type = VKD3DSPR_SAMPLER; + reg->dimension = VSIR_DIMENSION_NONE; + if (vkd3d_shader_ver_ge(version, 5, 1)) + { + reg->idx[0].offset = var->regs[HLSL_REGSET_SAMPLERS].id; + reg->idx[1].offset = var->regs[HLSL_REGSET_SAMPLERS].index; /* FIXME: array index */ + reg->idx_count = 2; + } + else + { + reg->idx[0].offset = var->regs[HLSL_REGSET_SAMPLERS].index; + reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref); + reg->idx_count = 1; + } + VKD3D_ASSERT(regset == HLSL_REGSET_SAMPLERS); + *writemask = VKD3DSP_WRITEMASK_ALL; + } + else + { + unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref) + var->buffer_offset; + + VKD3D_ASSERT(data_type->class <= HLSL_CLASS_VECTOR); + reg->type = VKD3DSPR_CONSTBUFFER; + reg->dimension = VSIR_DIMENSION_VEC4; + if (vkd3d_shader_ver_ge(version, 5, 1)) + { + reg->idx[0].offset = var->buffer->reg.id; + reg->idx[1].offset = var->buffer->reg.index; /* FIXME: array index */ + reg->idx[2].offset = offset / 4; + reg->idx_count = 3; + } + else + { + reg->idx[0].offset = var->buffer->reg.index; + reg->idx[1].offset = offset / 4; + reg->idx_count = 2; + } + *writemask = ((1u << data_type->dimx) - 1) << (offset & 3); + } + } + else if (var->is_input_semantic) + { + bool has_idx; + + if (sm4_register_from_semantic_name(version, var->semantic.name, false, ®->type, &has_idx)) + { + unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref); + + if (has_idx) + { + reg->idx[0].offset = var->semantic.index + offset / 4; + reg->idx_count = 1; + } + + if (shader_sm4_is_scalar_register(reg)) + reg->dimension = VSIR_DIMENSION_SCALAR; + else + reg->dimension = VSIR_DIMENSION_VEC4; + *writemask = ((1u << data_type->dimx) - 1) << (offset % 4); + } + else + { + struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref); + + VKD3D_ASSERT(hlsl_reg.allocated); + + if (version->type == VKD3D_SHADER_TYPE_DOMAIN) + reg->type = VKD3DSPR_PATCHCONST; + else + reg->type = VKD3DSPR_INPUT; + reg->dimension = VSIR_DIMENSION_VEC4; + reg->idx[0].offset = hlsl_reg.id; + reg->idx_count = 1; + *writemask = hlsl_reg.writemask; + } + } + else if (var->is_output_semantic) + { + bool has_idx; + + if (sm4_register_from_semantic_name(version, var->semantic.name, true, ®->type, &has_idx)) + { + unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref); + + if (has_idx) + { + reg->idx[0].offset = var->semantic.index + offset / 4; + reg->idx_count = 1; + } + + if (shader_sm4_is_scalar_register(reg)) + reg->dimension = VSIR_DIMENSION_SCALAR; + else + reg->dimension = VSIR_DIMENSION_VEC4; + *writemask = ((1u << data_type->dimx) - 1) << (offset % 4); + } + else + { + struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref); + + VKD3D_ASSERT(hlsl_reg.allocated); + reg->type = VKD3DSPR_OUTPUT; + reg->dimension = VSIR_DIMENSION_VEC4; + reg->idx[0].offset = hlsl_reg.id; + reg->idx_count = 1; + *writemask = hlsl_reg.writemask; + } + } + else + { + return sm4_generate_vsir_numeric_reg_from_deref(ctx, program, reg, writemask, deref); + } + return true; +} + +static bool sm4_generate_vsir_init_src_param_from_deref(struct hlsl_ctx *ctx, struct vsir_program *program, + struct vkd3d_shader_src_param *src_param, const struct hlsl_deref *deref, + unsigned int dst_writemask, const struct vkd3d_shader_location *loc) +{ + uint32_t writemask; + + if (!sm4_generate_vsir_reg_from_deref(ctx, program, &src_param->reg, &writemask, deref)) + return false; + src_param->swizzle = generate_vsir_get_src_swizzle(writemask, dst_writemask); + return true; +} + +static bool sm4_generate_vsir_init_dst_param_from_deref(struct hlsl_ctx *ctx, struct vsir_program *program, + struct vkd3d_shader_dst_param *dst_param, const struct hlsl_deref *deref, + const struct vkd3d_shader_location *loc, unsigned int writemask) +{ + uint32_t reg_writemask; + + if (!sm4_generate_vsir_reg_from_deref(ctx, program, &dst_param->reg, ®_writemask, deref)) + return false; + dst_param->write_mask = hlsl_combine_writemasks(reg_writemask, writemask); + return true; +} + static void vsir_dst_from_hlsl_node(struct vkd3d_shader_dst_param *dst, struct hlsl_ctx *ctx, const struct hlsl_ir_node *instr) { @@ -7059,13 +7482,10 @@ static bool sm1_generate_vsir_instr_expr_cast(struct hlsl_ctx *ctx, case HLSL_TYPE_INT: case HLSL_TYPE_UINT: + case HLSL_TYPE_BOOL: generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, 0, true); return true; - case HLSL_TYPE_BOOL: - hlsl_fixme(ctx, &instr->loc, "SM1 cast from bool to integer."); - break; - case HLSL_TYPE_DOUBLE: hlsl_fixme(ctx, &instr->loc, "SM1 cast from double to integer."); break; @@ -7472,9 +7892,8 @@ static void generate_vsir_instr_swizzle(struct hlsl_ctx *ctx, dst_param->write_mask = instr->reg.writemask; swizzle = hlsl_swizzle_from_writemask(val->reg.writemask); - swizzle = hlsl_combine_swizzles(swizzle, swizzle_instr->swizzle, instr->data_type->dimx); + swizzle = hlsl_combine_swizzles(swizzle, swizzle_instr->u.vector, instr->data_type->dimx); swizzle = hlsl_map_swizzle(swizzle, ins->dst[0].write_mask); - swizzle = vsir_swizzle_from_hlsl(swizzle); src_param = &ins->src[0]; VKD3D_ASSERT(val->type != HLSL_IR_CONSTANT); @@ -7624,31 +8043,20 @@ static void sm1_generate_vsir_block(struct hlsl_ctx *ctx, struct hlsl_block *blo } static void sm1_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, - uint64_t config_flags, struct vsir_program *program, struct vkd3d_shader_code *ctab) + uint64_t config_flags, struct vsir_program *program) { struct vkd3d_shader_version version = {0}; - struct vkd3d_bytecode_buffer buffer = {0}; struct hlsl_block block; version.major = ctx->profile->major_version; version.minor = ctx->profile->minor_version; version.type = ctx->profile->type; - if (!vsir_program_init(program, NULL, &version, 0, VSIR_CF_STRUCTURED, VSIR_NOT_NORMALISED)) + if (!vsir_program_init(program, NULL, &version, 0, VSIR_CF_STRUCTURED, VSIR_NORMALISED_SM4)) { ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; return; } - write_sm1_uniforms(ctx, &buffer); - if (buffer.status) - { - vkd3d_free(buffer.data); - ctx->result = buffer.status; - return; - } - ctab->code = buffer.data; - ctab->size = buffer.size; - generate_vsir_signature(ctx, program, entry_func); hlsl_block_init(&block); @@ -7659,38 +8067,401 @@ static void sm1_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl sm1_generate_vsir_block(ctx, &entry_func->body, program); } -static void add_last_vsir_instr_to_block(struct hlsl_ctx *ctx, struct vsir_program *program, struct hlsl_block *block) +D3DXPARAMETER_CLASS hlsl_sm1_class(const struct hlsl_type *type) { - struct vkd3d_shader_location *loc; - struct hlsl_ir_node *vsir_instr; - - loc = &program->instructions.elements[program->instructions.count - 1].location; - - if (!(vsir_instr = hlsl_new_vsir_instruction_ref(ctx, program->instructions.count - 1, NULL, NULL, loc))) + switch (type->class) { - ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; - return; - } - hlsl_block_add_instr(block, vsir_instr); + case HLSL_CLASS_ARRAY: + return hlsl_sm1_class(type->e.array.type); + case HLSL_CLASS_MATRIX: + VKD3D_ASSERT(type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK); + if (type->modifiers & HLSL_MODIFIER_COLUMN_MAJOR) + return D3DXPC_MATRIX_COLUMNS; + else + return D3DXPC_MATRIX_ROWS; + case HLSL_CLASS_SCALAR: + return D3DXPC_SCALAR; + case HLSL_CLASS_STRUCT: + return D3DXPC_STRUCT; + case HLSL_CLASS_VECTOR: + return D3DXPC_VECTOR; + case HLSL_CLASS_PIXEL_SHADER: + case HLSL_CLASS_SAMPLER: + case HLSL_CLASS_STRING: + case HLSL_CLASS_TEXTURE: + case HLSL_CLASS_VERTEX_SHADER: + return D3DXPC_OBJECT; + case HLSL_CLASS_DEPTH_STENCIL_STATE: + case HLSL_CLASS_DEPTH_STENCIL_VIEW: + case HLSL_CLASS_EFFECT_GROUP: + case HLSL_CLASS_ERROR: + case HLSL_CLASS_PASS: + case HLSL_CLASS_RASTERIZER_STATE: + case HLSL_CLASS_RENDER_TARGET_VIEW: + case HLSL_CLASS_TECHNIQUE: + case HLSL_CLASS_UAV: + case HLSL_CLASS_VOID: + case HLSL_CLASS_CONSTANT_BUFFER: + case HLSL_CLASS_COMPUTE_SHADER: + case HLSL_CLASS_DOMAIN_SHADER: + case HLSL_CLASS_HULL_SHADER: + case HLSL_CLASS_GEOMETRY_SHADER: + case HLSL_CLASS_BLEND_STATE: + case HLSL_CLASS_STREAM_OUTPUT: + case HLSL_CLASS_NULL: + break; + } + + vkd3d_unreachable(); } -static void replace_instr_with_last_vsir_instr(struct hlsl_ctx *ctx, - struct vsir_program *program, struct hlsl_ir_node *instr) +D3DXPARAMETER_TYPE hlsl_sm1_base_type(const struct hlsl_type *type, bool is_combined_sampler) { - struct vkd3d_shader_location *loc; - struct hlsl_ir_node *vsir_instr; + enum hlsl_type_class class = type->class; - loc = &program->instructions.elements[program->instructions.count - 1].location; + if (is_combined_sampler) + class = HLSL_CLASS_TEXTURE; - if (!(vsir_instr = hlsl_new_vsir_instruction_ref(ctx, - program->instructions.count - 1, instr->data_type, &instr->reg, loc))) + switch (class) { - ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; + case HLSL_CLASS_SCALAR: + case HLSL_CLASS_VECTOR: + case HLSL_CLASS_MATRIX: + switch (type->e.numeric.type) + { + case HLSL_TYPE_BOOL: + return D3DXPT_BOOL; + /* Actually double behaves differently depending on DLL version: + * For <= 36, it maps to D3DXPT_FLOAT. + * For 37-40, it maps to zero (D3DXPT_VOID). + * For >= 41, it maps to 39, which is D3D_SVT_DOUBLE (note D3D_SVT_* + * values are mostly compatible with D3DXPT_*). + * However, the latter two cases look like bugs, and a reasonable + * application certainly wouldn't know what to do with them. + * For fx_2_0 it's always D3DXPT_FLOAT regardless of DLL version. */ + case HLSL_TYPE_DOUBLE: + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_HALF: + return D3DXPT_FLOAT; + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: + return D3DXPT_INT; + default: + vkd3d_unreachable(); + } + + case HLSL_CLASS_SAMPLER: + switch (type->sampler_dim) + { + case HLSL_SAMPLER_DIM_1D: + return D3DXPT_SAMPLER1D; + case HLSL_SAMPLER_DIM_2D: + return D3DXPT_SAMPLER2D; + case HLSL_SAMPLER_DIM_3D: + return D3DXPT_SAMPLER3D; + case HLSL_SAMPLER_DIM_CUBE: + return D3DXPT_SAMPLERCUBE; + case HLSL_SAMPLER_DIM_GENERIC: + return D3DXPT_SAMPLER; + default: + ERR("Invalid dimension %#x.\n", type->sampler_dim); + vkd3d_unreachable(); + } + break; + + case HLSL_CLASS_TEXTURE: + switch (type->sampler_dim) + { + case HLSL_SAMPLER_DIM_1D: + return D3DXPT_TEXTURE1D; + case HLSL_SAMPLER_DIM_2D: + return D3DXPT_TEXTURE2D; + case HLSL_SAMPLER_DIM_3D: + return D3DXPT_TEXTURE3D; + case HLSL_SAMPLER_DIM_CUBE: + return D3DXPT_TEXTURECUBE; + case HLSL_SAMPLER_DIM_GENERIC: + return D3DXPT_TEXTURE; + default: + ERR("Invalid dimension %#x.\n", type->sampler_dim); + vkd3d_unreachable(); + } + break; + + case HLSL_CLASS_ARRAY: + return hlsl_sm1_base_type(type->e.array.type, is_combined_sampler); + + case HLSL_CLASS_STRUCT: + return D3DXPT_VOID; + + case HLSL_CLASS_STRING: + return D3DXPT_STRING; + + case HLSL_CLASS_PIXEL_SHADER: + return D3DXPT_PIXELSHADER; + + case HLSL_CLASS_VERTEX_SHADER: + return D3DXPT_VERTEXSHADER; + + case HLSL_CLASS_DEPTH_STENCIL_STATE: + case HLSL_CLASS_DEPTH_STENCIL_VIEW: + case HLSL_CLASS_EFFECT_GROUP: + case HLSL_CLASS_ERROR: + case HLSL_CLASS_PASS: + case HLSL_CLASS_RASTERIZER_STATE: + case HLSL_CLASS_RENDER_TARGET_VIEW: + case HLSL_CLASS_TECHNIQUE: + case HLSL_CLASS_UAV: + case HLSL_CLASS_VOID: + case HLSL_CLASS_CONSTANT_BUFFER: + case HLSL_CLASS_COMPUTE_SHADER: + case HLSL_CLASS_DOMAIN_SHADER: + case HLSL_CLASS_HULL_SHADER: + case HLSL_CLASS_GEOMETRY_SHADER: + case HLSL_CLASS_BLEND_STATE: + case HLSL_CLASS_STREAM_OUTPUT: + case HLSL_CLASS_NULL: + break; + } + + vkd3d_unreachable(); +} + +static void write_sm1_type(struct vkd3d_bytecode_buffer *buffer, + struct hlsl_type *type, bool is_combined_sampler, unsigned int ctab_start) +{ + const struct hlsl_type *array_type = hlsl_get_multiarray_element_type(type); + unsigned int array_size = hlsl_get_multiarray_size(type); + struct hlsl_struct_field *field; + unsigned int field_count = 0; + size_t fields_offset = 0; + size_t i; + + if (type->bytecode_offset) return; + + if (array_type->class == HLSL_CLASS_STRUCT) + { + field_count = array_type->e.record.field_count; + + for (i = 0; i < field_count; ++i) + { + field = &array_type->e.record.fields[i]; + field->name_bytecode_offset = put_string(buffer, field->name); + write_sm1_type(buffer, field->type, false, ctab_start); + } + + fields_offset = bytecode_align(buffer) - ctab_start; + + for (i = 0; i < field_count; ++i) + { + field = &array_type->e.record.fields[i]; + put_u32(buffer, field->name_bytecode_offset - ctab_start); + put_u32(buffer, field->type->bytecode_offset - ctab_start); + } + } + + type->bytecode_offset = put_u32(buffer, + vkd3d_make_u32(hlsl_sm1_class(type), hlsl_sm1_base_type(array_type, is_combined_sampler))); + put_u32(buffer, vkd3d_make_u32(type->dimy, type->dimx)); + put_u32(buffer, vkd3d_make_u32(array_size, field_count)); + put_u32(buffer, fields_offset); +} + +static void sm1_sort_extern(struct list *sorted, struct hlsl_ir_var *to_sort) +{ + struct hlsl_ir_var *var; + + list_remove(&to_sort->extern_entry); + + LIST_FOR_EACH_ENTRY(var, sorted, struct hlsl_ir_var, extern_entry) + { + if (strcmp(to_sort->name, var->name) < 0) + { + list_add_before(&var->extern_entry, &to_sort->extern_entry); + return; + } + } + + list_add_tail(sorted, &to_sort->extern_entry); +} + +static void sm1_sort_externs(struct hlsl_ctx *ctx) +{ + struct list sorted = LIST_INIT(sorted); + struct hlsl_ir_var *var, *next; + + LIST_FOR_EACH_ENTRY_SAFE(var, next, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + if (var->is_uniform) + sm1_sort_extern(&sorted, var); + } + list_move_tail(&ctx->extern_vars, &sorted); +} + +static void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer) +{ + size_t ctab_start, vars_offset, vars_start, creator_offset, offset; + unsigned int uniform_count = 0, r; + struct hlsl_ir_var *var; + + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + for (r = 0; r <= HLSL_REGSET_LAST; ++r) + { + if (var->semantic.name || !var->regs[r].allocated || !var->last_read) + continue; + + ++uniform_count; + + if (var->is_param && var->is_uniform) + { + char *new_name; + + if (!(new_name = hlsl_sprintf_alloc(ctx, "$%s", var->name))) + return; + vkd3d_free((char *)var->name); + var->name = new_name; + } + } + } + + sm1_sort_externs(ctx); + + ctab_start = put_u32(buffer, 7 * sizeof(uint32_t)); /* CTAB header size. */ + creator_offset = put_u32(buffer, 0); + if (ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX) + put_u32(buffer, D3DVS_VERSION(ctx->profile->major_version, ctx->profile->minor_version)); + else + put_u32(buffer, D3DPS_VERSION(ctx->profile->major_version, ctx->profile->minor_version)); + put_u32(buffer, uniform_count); + vars_offset = put_u32(buffer, 0); + put_u32(buffer, 0); /* FIXME: flags */ + put_u32(buffer, 0); /* FIXME: target string */ + + vars_start = bytecode_align(buffer); + set_u32(buffer, vars_offset, vars_start - ctab_start); + + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + for (r = 0; r <= HLSL_REGSET_LAST; ++r) + { + if (var->semantic.name || !var->regs[r].allocated || !var->last_read) + continue; + + put_u32(buffer, 0); /* name */ + if (r == HLSL_REGSET_NUMERIC) + { + put_u32(buffer, vkd3d_make_u32(D3DXRS_FLOAT4, var->regs[r].id)); + put_u32(buffer, var->bind_count[r]); + } + else + { + put_u32(buffer, vkd3d_make_u32(D3DXRS_SAMPLER, var->regs[r].index)); + put_u32(buffer, var->bind_count[r]); + } + put_u32(buffer, 0); /* type */ + put_u32(buffer, 0); /* default value */ + } + } + + uniform_count = 0; + + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + for (r = 0; r <= HLSL_REGSET_LAST; ++r) + { + size_t var_offset, name_offset; + + if (var->semantic.name || !var->regs[r].allocated || !var->last_read) + continue; + + var_offset = vars_start + (uniform_count * 5 * sizeof(uint32_t)); + + name_offset = put_string(buffer, var->name); + set_u32(buffer, var_offset, name_offset - ctab_start); + + write_sm1_type(buffer, var->data_type, var->is_combined_sampler, ctab_start); + set_u32(buffer, var_offset + 3 * sizeof(uint32_t), var->data_type->bytecode_offset - ctab_start); + + if (var->default_values) + { + unsigned int reg_size = var->data_type->reg_size[HLSL_REGSET_NUMERIC]; + unsigned int comp_count = hlsl_type_component_count(var->data_type); + unsigned int default_value_offset; + unsigned int k; + + default_value_offset = bytecode_reserve_bytes(buffer, reg_size * sizeof(uint32_t)); + set_u32(buffer, var_offset + 4 * sizeof(uint32_t), default_value_offset - ctab_start); + + for (k = 0; k < comp_count; ++k) + { + struct hlsl_type *comp_type = hlsl_type_get_component_type(ctx, var->data_type, k); + unsigned int comp_offset; + enum hlsl_regset regset; + + comp_offset = hlsl_type_get_component_offset(ctx, var->data_type, k, ®set); + if (regset == HLSL_REGSET_NUMERIC) + { + union + { + uint32_t u; + float f; + } uni; + + switch (comp_type->e.numeric.type) + { + case HLSL_TYPE_DOUBLE: + if (ctx->double_as_float_alias) + uni.u = var->default_values[k].number.u; + else + uni.u = 0; + break; + + case HLSL_TYPE_INT: + uni.f = var->default_values[k].number.i; + break; + + case HLSL_TYPE_UINT: + case HLSL_TYPE_BOOL: + uni.f = var->default_values[k].number.u; + break; + + case HLSL_TYPE_HALF: + case HLSL_TYPE_FLOAT: + uni.u = var->default_values[k].number.u; + break; + + default: + vkd3d_unreachable(); + } + + set_u32(buffer, default_value_offset + comp_offset * sizeof(uint32_t), uni.u); + } + } + } + + ++uniform_count; + } } - list_add_before(&instr->entry, &vsir_instr->entry); - hlsl_replace_node(instr, vsir_instr); + offset = put_string(buffer, vkd3d_shader_get_version(NULL, NULL)); + set_u32(buffer, creator_offset, offset - ctab_start); +} + +static void sm1_generate_ctab(struct hlsl_ctx *ctx, struct vkd3d_shader_code *ctab) +{ + struct vkd3d_bytecode_buffer buffer = {0}; + + write_sm1_uniforms(ctx, &buffer); + if (buffer.status) + { + vkd3d_free(buffer.data); + ctx->result = buffer.status; + return; + } + ctab->code = buffer.data; + ctab->size = buffer.size; } static void sm4_generate_vsir_instr_dcl_semantic(struct hlsl_ctx *ctx, struct vsir_program *program, @@ -7806,8 +8577,6 @@ static void sm4_generate_vsir_instr_dcl_semantic(struct hlsl_ctx *ctx, struct vs if (var->is_input_semantic && version->type == VKD3D_SHADER_TYPE_PIXEL) ins->flags = sm4_get_interpolation_mode(var->data_type, var->storage_modifiers); - - add_last_vsir_instr_to_block(ctx, program, block); } static void sm4_generate_vsir_instr_dcl_temps(struct hlsl_ctx *ctx, struct vsir_program *program, @@ -7819,8 +8588,6 @@ static void sm4_generate_vsir_instr_dcl_temps(struct hlsl_ctx *ctx, struct vsir_ return; ins->declaration.count = temp_count; - - add_last_vsir_instr_to_block(ctx, program, block); } static void sm4_generate_vsir_instr_dcl_indexable_temp(struct hlsl_ctx *ctx, @@ -7838,8 +8605,6 @@ static void sm4_generate_vsir_instr_dcl_indexable_temp(struct hlsl_ctx *ctx, ins->declaration.indexable_temp.data_type = VKD3D_DATA_FLOAT; ins->declaration.indexable_temp.component_count = comp_count; ins->declaration.indexable_temp.has_function_scope = false; - - add_last_vsir_instr_to_block(ctx, program, block); } static bool type_is_float(const struct hlsl_type *type) @@ -8505,59 +9270,690 @@ static bool sm4_generate_vsir_instr_expr(struct hlsl_ctx *ctx, } } -static void sm4_generate_vsir_block(struct hlsl_ctx *ctx, struct hlsl_block *block, struct vsir_program *program) +static bool sm4_generate_vsir_instr_store(struct hlsl_ctx *ctx, + struct vsir_program *program, struct hlsl_ir_store *store) { - struct vkd3d_string_buffer *dst_type_string; - struct hlsl_ir_node *instr, *next; - struct hlsl_ir_switch_case *c; + struct hlsl_ir_node *instr = &store->node; + struct vkd3d_shader_dst_param *dst_param; + struct vkd3d_shader_src_param *src_param; + struct vkd3d_shader_instruction *ins; - LIST_FOR_EACH_ENTRY_SAFE(instr, next, &block->instrs, struct hlsl_ir_node, entry) - { - if (instr->data_type) - { - if (instr->data_type->class != HLSL_CLASS_SCALAR && instr->data_type->class != HLSL_CLASS_VECTOR) - { - hlsl_fixme(ctx, &instr->loc, "Class %#x should have been lowered or removed.", instr->data_type->class); - break; - } - } + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_MOV, 1, 1))) + return false; - switch (instr->type) - { - case HLSL_IR_CALL: - vkd3d_unreachable(); + dst_param = &ins->dst[0]; + if (!sm4_generate_vsir_init_dst_param_from_deref(ctx, program, + dst_param, &store->lhs, &instr->loc, store->writemask)) + return false; - case HLSL_IR_CONSTANT: - /* In SM4 all constants are inlined. */ - break; + src_param = &ins->src[0]; + vsir_src_from_hlsl_node(src_param, ctx, store->rhs.node, dst_param->write_mask); - case HLSL_IR_EXPR: - if (!(dst_type_string = hlsl_type_to_string(ctx, instr->data_type))) - break; + return true; +} - if (sm4_generate_vsir_instr_expr(ctx, program, hlsl_ir_expr(instr), dst_type_string->buffer)) - replace_instr_with_last_vsir_instr(ctx, program, instr); +/* Does this variable's data come directly from the API user, rather than + * being temporary or from a previous shader stage? I.e. is it a uniform or + * VS input? */ +static bool var_is_user_input(const struct vkd3d_shader_version *version, const struct hlsl_ir_var *var) +{ + if (var->is_uniform) + return true; - hlsl_release_string_buffer(ctx, dst_type_string); - break; + return var->is_input_semantic && version->type == VKD3D_SHADER_TYPE_VERTEX; +} - case HLSL_IR_IF: - sm4_generate_vsir_block(ctx, &hlsl_ir_if(instr)->then_block, program); - sm4_generate_vsir_block(ctx, &hlsl_ir_if(instr)->else_block, program); - break; +static bool sm4_generate_vsir_instr_load(struct hlsl_ctx *ctx, struct vsir_program *program, struct hlsl_ir_load *load) +{ + const struct vkd3d_shader_version *version = &program->shader_version; + const struct hlsl_type *type = load->node.data_type; + struct vkd3d_shader_dst_param *dst_param; + struct hlsl_ir_node *instr = &load->node; + struct vkd3d_shader_instruction *ins; + struct hlsl_constant_value value; - case HLSL_IR_LOOP: - sm4_generate_vsir_block(ctx, &hlsl_ir_loop(instr)->body, program); + VKD3D_ASSERT(hlsl_is_numeric_type(type)); + if (type->e.numeric.type == HLSL_TYPE_BOOL && var_is_user_input(version, load->src.var)) + { + /* Uniform bools can be specified as anything, but internal bools + * always have 0 for false and ~0 for true. Normalise that here. */ + + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_MOVC, 1, 3))) + return false; + + dst_param = &ins->dst[0]; + vsir_dst_from_hlsl_node(dst_param, ctx, instr); + + if (!sm4_generate_vsir_init_src_param_from_deref(ctx, program, + &ins->src[0], &load->src, dst_param->write_mask, &instr->loc)) + return false; + + memset(&value, 0xff, sizeof(value)); + vsir_src_from_hlsl_constant_value(&ins->src[1], ctx, &value, + VKD3D_DATA_UINT, type->dimx, dst_param->write_mask); + memset(&value, 0x00, sizeof(value)); + vsir_src_from_hlsl_constant_value(&ins->src[2], ctx, &value, + VKD3D_DATA_UINT, type->dimx, dst_param->write_mask); + } + else + { + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_MOV, 1, 1))) + return false; + + dst_param = &ins->dst[0]; + vsir_dst_from_hlsl_node(dst_param, ctx, instr); + + if (!sm4_generate_vsir_init_src_param_from_deref(ctx, program, + &ins->src[0], &load->src, dst_param->write_mask, &instr->loc)) + return false; + } + return true; +} + +static bool sm4_generate_vsir_instr_resource_store(struct hlsl_ctx *ctx, + struct vsir_program *program, struct hlsl_ir_resource_store *store) +{ + struct hlsl_type *resource_type = hlsl_deref_get_type(ctx, &store->resource); + struct hlsl_ir_node *coords = store->coords.node, *value = store->value.node; + struct hlsl_ir_node *instr = &store->node; + struct vkd3d_shader_instruction *ins; + unsigned int writemask; + + if (!store->resource.var->is_uniform) + { + hlsl_fixme(ctx, &store->node.loc, "Store to non-uniform resource variable."); + return false; + } + + if (resource_type->sampler_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER) + { + hlsl_fixme(ctx, &store->node.loc, "Structured buffers store is not implemented."); + return false; + } + + if (resource_type->sampler_dim == HLSL_SAMPLER_DIM_RAW_BUFFER) + { + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_STORE_RAW, 1, 2))) + return false; + + writemask = vkd3d_write_mask_from_component_count(value->data_type->dimx); + if (!sm4_generate_vsir_init_dst_param_from_deref(ctx, program, + &ins->dst[0], &store->resource, &instr->loc, writemask)) + return false; + } + else + { + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_STORE_UAV_TYPED, 1, 2))) + return false; + + if (!sm4_generate_vsir_init_dst_param_from_deref(ctx, program, + &ins->dst[0], &store->resource, &instr->loc, VKD3DSP_WRITEMASK_ALL)) + return false; + } + + vsir_src_from_hlsl_node(&ins->src[0], ctx, coords, VKD3DSP_WRITEMASK_ALL); + vsir_src_from_hlsl_node(&ins->src[1], ctx, value, VKD3DSP_WRITEMASK_ALL); + + return true; +} + +static bool sm4_generate_vsir_validate_texel_offset_aoffimmi(const struct hlsl_ir_node *texel_offset) +{ + struct hlsl_ir_constant *offset; + + VKD3D_ASSERT(texel_offset); + if (texel_offset->type != HLSL_IR_CONSTANT) + return false; + offset = hlsl_ir_constant(texel_offset); + + if (offset->value.u[0].i < -8 || offset->value.u[0].i > 7) + return false; + if (offset->node.data_type->dimx > 1 && (offset->value.u[1].i < -8 || offset->value.u[1].i > 7)) + return false; + if (offset->node.data_type->dimx > 2 && (offset->value.u[2].i < -8 || offset->value.u[2].i > 7)) + return false; + return true; +} + +static void sm4_generate_vsir_encode_texel_offset_as_aoffimmi( + struct vkd3d_shader_instruction *ins, const struct hlsl_ir_node *texel_offset) +{ + struct hlsl_ir_constant *offset; + + if (!texel_offset) + return; + offset = hlsl_ir_constant(texel_offset); + + ins->texel_offset.u = offset->value.u[0].i; + ins->texel_offset.v = 0; + ins->texel_offset.w = 0; + if (offset->node.data_type->dimx > 1) + ins->texel_offset.v = offset->value.u[1].i; + if (offset->node.data_type->dimx > 2) + ins->texel_offset.w = offset->value.u[2].i; +} + +static bool sm4_generate_vsir_instr_ld(struct hlsl_ctx *ctx, + struct vsir_program *program, const struct hlsl_ir_resource_load *load) +{ + const struct hlsl_type *resource_type = hlsl_deref_get_type(ctx, &load->resource); + bool uav = (hlsl_deref_get_regset(ctx, &load->resource) == HLSL_REGSET_UAVS); + const struct vkd3d_shader_version *version = &program->shader_version; + bool raw = resource_type->sampler_dim == HLSL_SAMPLER_DIM_RAW_BUFFER; + const struct hlsl_ir_node *sample_index = load->sample_index.node; + const struct hlsl_ir_node *texel_offset = load->texel_offset.node; + const struct hlsl_ir_node *coords = load->coords.node; + unsigned int coords_writemask = VKD3DSP_WRITEMASK_ALL; + const struct hlsl_deref *resource = &load->resource; + const struct hlsl_ir_node *instr = &load->node; + enum hlsl_sampler_dim dim = load->sampling_dim; + struct vkd3d_shader_instruction *ins; + enum vkd3d_shader_opcode opcode; + bool multisampled; + + VKD3D_ASSERT(load->load_type == HLSL_RESOURCE_LOAD); + + multisampled = resource_type->class == HLSL_CLASS_TEXTURE + && (resource_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS + || resource_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY); + + if (uav) + opcode = VKD3DSIH_LD_UAV_TYPED; + else if (raw) + opcode = VKD3DSIH_LD_RAW; + else + opcode = multisampled ? VKD3DSIH_LD2DMS : VKD3DSIH_LD; + + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, opcode, 1, 2 + multisampled))) + return false; + + if (texel_offset && !sm4_generate_vsir_validate_texel_offset_aoffimmi(texel_offset)) + { + hlsl_error(ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, + "Offset must resolve to integer literal in the range -8 to 7."); + return false; + } + sm4_generate_vsir_encode_texel_offset_as_aoffimmi(ins, texel_offset); + + vsir_dst_from_hlsl_node(&ins->dst[0], ctx, instr); + + if (!uav) + { + /* Mipmap level is in the last component in the IR, but needs to be in + * the W component in the instruction. */ + unsigned int dim_count = hlsl_sampler_dim_count(dim); + + if (dim_count == 1) + coords_writemask = VKD3DSP_WRITEMASK_0 | VKD3DSP_WRITEMASK_3; + if (dim_count == 2) + coords_writemask = VKD3DSP_WRITEMASK_0 | VKD3DSP_WRITEMASK_1 | VKD3DSP_WRITEMASK_3; + } + + vsir_src_from_hlsl_node(&ins->src[0], ctx, coords, coords_writemask); + + if (!sm4_generate_vsir_init_src_param_from_deref(ctx, program, + &ins->src[1], resource, ins->dst[0].write_mask, &instr->loc)) + return false; + + if (multisampled) + { + if (sample_index->type == HLSL_IR_CONSTANT) + vsir_src_from_hlsl_constant_value(&ins->src[2], ctx, + &hlsl_ir_constant(sample_index)->value, VKD3D_DATA_INT, 1, 0); + else if (version->major == 4 && version->minor == 0) + hlsl_error(ctx, &sample_index->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Expected literal sample index."); + else + vsir_src_from_hlsl_node(&ins->src[2], ctx, sample_index, VKD3DSP_WRITEMASK_ALL); + } + return true; +} + +static bool sm4_generate_vsir_instr_sample(struct hlsl_ctx *ctx, + struct vsir_program *program, const struct hlsl_ir_resource_load *load) +{ + const struct hlsl_ir_node *texel_offset = load->texel_offset.node; + const struct hlsl_ir_node *coords = load->coords.node; + const struct hlsl_deref *resource = &load->resource; + const struct hlsl_deref *sampler = &load->sampler; + const struct hlsl_ir_node *instr = &load->node; + struct vkd3d_shader_instruction *ins; + enum vkd3d_shader_opcode opcode; + unsigned int src_count; + + switch (load->load_type) + { + case HLSL_RESOURCE_SAMPLE: + opcode = VKD3DSIH_SAMPLE; + src_count = 3; + break; + + case HLSL_RESOURCE_SAMPLE_CMP: + opcode = VKD3DSIH_SAMPLE_C; + src_count = 4; + break; + + case HLSL_RESOURCE_SAMPLE_CMP_LZ: + opcode = VKD3DSIH_SAMPLE_C_LZ; + src_count = 4; + break; + + case HLSL_RESOURCE_SAMPLE_LOD: + opcode = VKD3DSIH_SAMPLE_LOD; + src_count = 4; + break; + + case HLSL_RESOURCE_SAMPLE_LOD_BIAS: + opcode = VKD3DSIH_SAMPLE_B; + src_count = 4; + break; + + case HLSL_RESOURCE_SAMPLE_GRAD: + opcode = VKD3DSIH_SAMPLE_GRAD; + src_count = 5; + break; + + default: + vkd3d_unreachable(); + } + + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, opcode, 1, src_count))) + return false; + + if (texel_offset && !sm4_generate_vsir_validate_texel_offset_aoffimmi(texel_offset)) + { + hlsl_error(ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, + "Offset must resolve to integer literal in the range -8 to 7."); + return false; + } + sm4_generate_vsir_encode_texel_offset_as_aoffimmi(ins, texel_offset); + + vsir_dst_from_hlsl_node(&ins->dst[0], ctx, instr); + + vsir_src_from_hlsl_node(&ins->src[0], ctx, coords, VKD3DSP_WRITEMASK_ALL); + + if (!sm4_generate_vsir_init_src_param_from_deref(ctx, program, &ins->src[1], + resource, ins->dst[0].write_mask, &instr->loc)) + return false; + + if (!sm4_generate_vsir_init_src_param_from_deref(ctx, program, &ins->src[2], + sampler, VKD3DSP_WRITEMASK_ALL, &instr->loc)) + return false; + + if (opcode == VKD3DSIH_SAMPLE_LOD || opcode == VKD3DSIH_SAMPLE_B) + { + vsir_src_from_hlsl_node(&ins->src[3], ctx, load->lod.node, VKD3DSP_WRITEMASK_ALL); + } + else if (opcode == VKD3DSIH_SAMPLE_C || opcode == VKD3DSIH_SAMPLE_C_LZ) + { + vsir_src_from_hlsl_node(&ins->src[3], ctx, load->cmp.node, VKD3DSP_WRITEMASK_ALL); + } + else if (opcode == VKD3DSIH_SAMPLE_GRAD) + { + vsir_src_from_hlsl_node(&ins->src[3], ctx, load->ddx.node, VKD3DSP_WRITEMASK_ALL); + vsir_src_from_hlsl_node(&ins->src[4], ctx, load->ddy.node, VKD3DSP_WRITEMASK_ALL); + } + return true; +} + +static bool sm4_generate_vsir_instr_gather(struct hlsl_ctx *ctx, struct vsir_program *program, + const struct hlsl_ir_resource_load *load, uint32_t swizzle) +{ + const struct vkd3d_shader_version *version = &program->shader_version; + const struct hlsl_ir_node *texel_offset = load->texel_offset.node; + const struct hlsl_ir_node *coords = load->coords.node; + const struct hlsl_deref *resource = &load->resource; + const struct hlsl_deref *sampler = &load->sampler; + const struct hlsl_ir_node *instr = &load->node; + struct vkd3d_shader_instruction *ins; + enum vkd3d_shader_opcode opcode; + + opcode = VKD3DSIH_GATHER4; + if (texel_offset && !sm4_generate_vsir_validate_texel_offset_aoffimmi(texel_offset)) + { + if (!vkd3d_shader_ver_ge(version, 5, 0)) + { + hlsl_error(ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, + "Offset must resolve to integer literal in the range -8 to 7 for profiles < 5."); + return false; + } + opcode = VKD3DSIH_GATHER4_PO; + } + + if (opcode == VKD3DSIH_GATHER4) + { + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, opcode, 1, 3))) + return false; + + vsir_dst_from_hlsl_node(&ins->dst[0], ctx, instr); + vsir_src_from_hlsl_node(&ins->src[0], ctx, coords, VKD3DSP_WRITEMASK_ALL); + sm4_generate_vsir_encode_texel_offset_as_aoffimmi(ins, texel_offset); + + if (!sm4_generate_vsir_init_src_param_from_deref(ctx, program, + &ins->src[1], resource, ins->dst[0].write_mask, &instr->loc)) + return false; + + if (!sm4_generate_vsir_init_src_param_from_deref(ctx, program, + &ins->src[2], sampler, VKD3DSP_WRITEMASK_ALL, &instr->loc)) + return false; + ins->src[2].reg.dimension = VSIR_DIMENSION_VEC4; + ins->src[2].swizzle = swizzle; + } + else if (opcode == VKD3DSIH_GATHER4_PO) + { + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, opcode, 1, 4))) + return false; + + vsir_dst_from_hlsl_node(&ins->dst[0], ctx, instr); + vsir_src_from_hlsl_node(&ins->src[0], ctx, coords, VKD3DSP_WRITEMASK_ALL); + vsir_src_from_hlsl_node(&ins->src[1], ctx, texel_offset, VKD3DSP_WRITEMASK_ALL); + + if (!sm4_generate_vsir_init_src_param_from_deref(ctx, program, + &ins->src[2], resource, ins->dst[0].write_mask, &instr->loc)) + return false; + + if (!sm4_generate_vsir_init_src_param_from_deref(ctx, program, + &ins->src[3], sampler, VKD3DSP_WRITEMASK_ALL, &instr->loc)) + return false; + ins->src[3].reg.dimension = VSIR_DIMENSION_VEC4; + ins->src[3].swizzle = swizzle; + } + else + { + vkd3d_unreachable(); + } + return true; +} + +static bool sm4_generate_vsir_instr_sample_info(struct hlsl_ctx *ctx, + struct vsir_program *program, const struct hlsl_ir_resource_load *load) +{ + const struct hlsl_deref *resource = &load->resource; + const struct hlsl_ir_node *instr = &load->node; + struct hlsl_type *type = instr->data_type; + struct vkd3d_shader_instruction *ins; + + VKD3D_ASSERT(type->e.numeric.type == HLSL_TYPE_UINT || type->e.numeric.type == HLSL_TYPE_FLOAT); + + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_SAMPLE_INFO, 1, 1))) + return false; + + if (type->e.numeric.type == HLSL_TYPE_UINT) + ins->flags = VKD3DSI_SAMPLE_INFO_UINT; + + vsir_dst_from_hlsl_node(&ins->dst[0], ctx, instr); + + if (!sm4_generate_vsir_init_src_param_from_deref(ctx, program, + &ins->src[0], resource, ins->dst[0].write_mask, &instr->loc)) + return false; + + return true; +} + +static bool sm4_generate_vsir_instr_resinfo(struct hlsl_ctx *ctx, + struct vsir_program *program, const struct hlsl_ir_resource_load *load) +{ + const struct hlsl_deref *resource = &load->resource; + const struct hlsl_ir_node *instr = &load->node; + struct hlsl_type *type = instr->data_type; + struct vkd3d_shader_instruction *ins; + + if (resource->data_type->sampler_dim == HLSL_SAMPLER_DIM_BUFFER + || resource->data_type->sampler_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER) + { + hlsl_fixme(ctx, &load->node.loc, "resinfo for buffers."); + return false; + } + + VKD3D_ASSERT(type->e.numeric.type == HLSL_TYPE_UINT || type->e.numeric.type == HLSL_TYPE_FLOAT); + + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_RESINFO, 1, 2))) + return false; + + if (type->e.numeric.type == HLSL_TYPE_UINT) + ins->flags = VKD3DSI_RESINFO_UINT; + + vsir_dst_from_hlsl_node(&ins->dst[0], ctx, instr); + + vsir_src_from_hlsl_node(&ins->src[0], ctx, load->lod.node, VKD3DSP_WRITEMASK_ALL); + + if (!sm4_generate_vsir_init_src_param_from_deref(ctx, program, + &ins->src[1], resource, ins->dst[0].write_mask, &instr->loc)) + return false; + + return true; +} + +static bool sm4_generate_vsir_instr_resource_load(struct hlsl_ctx *ctx, + struct vsir_program *program, const struct hlsl_ir_resource_load *load) +{ + if (load->sampler.var && !load->sampler.var->is_uniform) + { + hlsl_fixme(ctx, &load->node.loc, "Sample using non-uniform sampler variable."); + return false; + } + + if (!load->resource.var->is_uniform) + { + hlsl_fixme(ctx, &load->node.loc, "Load from non-uniform resource variable."); + return false; + } + + switch (load->load_type) + { + case HLSL_RESOURCE_LOAD: + return sm4_generate_vsir_instr_ld(ctx, program, load); + + case HLSL_RESOURCE_SAMPLE: + case HLSL_RESOURCE_SAMPLE_CMP: + case HLSL_RESOURCE_SAMPLE_CMP_LZ: + case HLSL_RESOURCE_SAMPLE_LOD: + case HLSL_RESOURCE_SAMPLE_LOD_BIAS: + case HLSL_RESOURCE_SAMPLE_GRAD: + /* Combined sample expressions were lowered. */ + VKD3D_ASSERT(load->sampler.var); + return sm4_generate_vsir_instr_sample(ctx, program, load); + + case HLSL_RESOURCE_GATHER_RED: + return sm4_generate_vsir_instr_gather(ctx, program, load, VKD3D_SHADER_SWIZZLE(X, X, X, X)); + + case HLSL_RESOURCE_GATHER_GREEN: + return sm4_generate_vsir_instr_gather(ctx, program, load, VKD3D_SHADER_SWIZZLE(Y, Y, Y, Y)); + + case HLSL_RESOURCE_GATHER_BLUE: + return sm4_generate_vsir_instr_gather(ctx, program, load, VKD3D_SHADER_SWIZZLE(Z, Z, Z, Z)); + + case HLSL_RESOURCE_GATHER_ALPHA: + return sm4_generate_vsir_instr_gather(ctx, program, load, VKD3D_SHADER_SWIZZLE(W, W, W, W)); + + case HLSL_RESOURCE_SAMPLE_INFO: + return sm4_generate_vsir_instr_sample_info(ctx, program, load); + + case HLSL_RESOURCE_RESINFO: + return sm4_generate_vsir_instr_resinfo(ctx, program, load); + + case HLSL_RESOURCE_SAMPLE_PROJ: + vkd3d_unreachable(); + + default: + return false; + } +} + +static bool sm4_generate_vsir_instr_jump(struct hlsl_ctx *ctx, + struct vsir_program *program, const struct hlsl_ir_jump *jump) +{ + const struct hlsl_ir_node *instr = &jump->node; + struct vkd3d_shader_instruction *ins; + + switch (jump->type) + { + case HLSL_IR_JUMP_BREAK: + return generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_BREAK, 0, 0); + + case HLSL_IR_JUMP_CONTINUE: + return generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_CONTINUE, 0, 0); + + case HLSL_IR_JUMP_DISCARD_NZ: + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_DISCARD, 0, 1))) + return false; + ins->flags = VKD3D_SHADER_CONDITIONAL_OP_NZ; + + vsir_src_from_hlsl_node(&ins->src[0], ctx, jump->condition.node, VKD3DSP_WRITEMASK_ALL); + return true; + + case HLSL_IR_JUMP_RETURN: + vkd3d_unreachable(); + + default: + hlsl_fixme(ctx, &jump->node.loc, "Jump type %s.", hlsl_jump_type_to_string(jump->type)); + return false; + } +} + +static void sm4_generate_vsir_block(struct hlsl_ctx *ctx, struct hlsl_block *block, struct vsir_program *program); + +static void sm4_generate_vsir_instr_if(struct hlsl_ctx *ctx, struct vsir_program *program, struct hlsl_ir_if *iff) +{ + struct hlsl_ir_node *instr = &iff->node; + struct vkd3d_shader_instruction *ins; + + VKD3D_ASSERT(iff->condition.node->data_type->dimx == 1); + + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_IF, 0, 1))) + return; + ins->flags = VKD3D_SHADER_CONDITIONAL_OP_NZ; + + vsir_src_from_hlsl_node(&ins->src[0], ctx, iff->condition.node, VKD3DSP_WRITEMASK_ALL); + + sm4_generate_vsir_block(ctx, &iff->then_block, program); + + if (!list_empty(&iff->else_block.instrs)) + { + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_ELSE, 0, 0))) + return; + sm4_generate_vsir_block(ctx, &iff->else_block, program); + } + + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_ENDIF, 0, 0))) + return; +} + +static void sm4_generate_vsir_instr_loop(struct hlsl_ctx *ctx, + struct vsir_program *program, struct hlsl_ir_loop *loop) +{ + struct hlsl_ir_node *instr = &loop->node; + struct vkd3d_shader_instruction *ins; + + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_LOOP, 0, 0))) + return; + + sm4_generate_vsir_block(ctx, &loop->body, program); + + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_ENDLOOP, 0, 0))) + return; +} + +static void sm4_generate_vsir_instr_switch(struct hlsl_ctx *ctx, + struct vsir_program *program, struct hlsl_ir_switch *swi) +{ + const struct hlsl_ir_node *selector = swi->selector.node; + struct hlsl_ir_node *instr = &swi->node; + struct vkd3d_shader_instruction *ins; + struct hlsl_ir_switch_case *cas; + + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_SWITCH, 0, 1))) + return; + vsir_src_from_hlsl_node(&ins->src[0], ctx, selector, VKD3DSP_WRITEMASK_ALL); + + LIST_FOR_EACH_ENTRY(cas, &swi->cases, struct hlsl_ir_switch_case, entry) + { + if (cas->is_default) + { + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_DEFAULT, 0, 0))) + return; + } + else + { + struct hlsl_constant_value value = {.u[0].u = cas->value}; + + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_CASE, 0, 1))) + return; + vsir_src_from_hlsl_constant_value(&ins->src[0], ctx, &value, VKD3D_DATA_UINT, 1, VKD3DSP_WRITEMASK_ALL); + } + + sm4_generate_vsir_block(ctx, &cas->body, program); + } + + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_ENDSWITCH, 0, 0))) + return; +} + +static void sm4_generate_vsir_block(struct hlsl_ctx *ctx, struct hlsl_block *block, struct vsir_program *program) +{ + struct vkd3d_string_buffer *dst_type_string; + struct hlsl_ir_node *instr, *next; + + LIST_FOR_EACH_ENTRY_SAFE(instr, next, &block->instrs, struct hlsl_ir_node, entry) + { + if (instr->data_type) + { + if (instr->data_type->class != HLSL_CLASS_SCALAR && instr->data_type->class != HLSL_CLASS_VECTOR) + { + hlsl_fixme(ctx, &instr->loc, "Class %#x should have been lowered or removed.", instr->data_type->class); + break; + } + } + + switch (instr->type) + { + case HLSL_IR_CALL: + vkd3d_unreachable(); + + case HLSL_IR_CONSTANT: + /* In SM4 all constants are inlined. */ + break; + + case HLSL_IR_EXPR: + if (!(dst_type_string = hlsl_type_to_string(ctx, instr->data_type))) + break; + sm4_generate_vsir_instr_expr(ctx, program, hlsl_ir_expr(instr), dst_type_string->buffer); + hlsl_release_string_buffer(ctx, dst_type_string); + break; + + case HLSL_IR_IF: + sm4_generate_vsir_instr_if(ctx, program, hlsl_ir_if(instr)); + break; + + case HLSL_IR_LOAD: + sm4_generate_vsir_instr_load(ctx, program, hlsl_ir_load(instr)); + break; + + case HLSL_IR_LOOP: + sm4_generate_vsir_instr_loop(ctx, program, hlsl_ir_loop(instr)); + break; + + case HLSL_IR_RESOURCE_LOAD: + sm4_generate_vsir_instr_resource_load(ctx, program, hlsl_ir_resource_load(instr)); + break; + + case HLSL_IR_RESOURCE_STORE: + sm4_generate_vsir_instr_resource_store(ctx, program, hlsl_ir_resource_store(instr)); + break; + + case HLSL_IR_JUMP: + sm4_generate_vsir_instr_jump(ctx, program, hlsl_ir_jump(instr)); + break; + + case HLSL_IR_STORE: + sm4_generate_vsir_instr_store(ctx, program, hlsl_ir_store(instr)); break; case HLSL_IR_SWITCH: - LIST_FOR_EACH_ENTRY(c, &hlsl_ir_switch(instr)->cases, struct hlsl_ir_switch_case, entry) - sm4_generate_vsir_block(ctx, &c->body, program); + sm4_generate_vsir_instr_switch(ctx, program, hlsl_ir_switch(instr)); break; case HLSL_IR_SWIZZLE: generate_vsir_instr_swizzle(ctx, program, hlsl_ir_swizzle(instr)); - replace_instr_with_last_vsir_instr(ctx, program, instr); break; default: @@ -8582,42 +9978,330 @@ static void sm4_generate_vsir_add_function(struct hlsl_ctx *ctx, return; program->temp_count = max(program->temp_count, temp_count); - hlsl_block_init(&block); + hlsl_block_init(&block); + + LIST_FOR_EACH_ENTRY(var, &func->extern_vars, struct hlsl_ir_var, extern_entry) + { + if ((var->is_input_semantic && var->last_read) + || (var->is_output_semantic && var->first_write)) + sm4_generate_vsir_instr_dcl_semantic(ctx, program, var, is_patch_constant_func, &block, &var->loc); + } + + if (temp_count) + sm4_generate_vsir_instr_dcl_temps(ctx, program, temp_count, &block, &func->loc); + + LIST_FOR_EACH_ENTRY(scope, &ctx->scopes, struct hlsl_scope, entry) + { + LIST_FOR_EACH_ENTRY(var, &scope->vars, struct hlsl_ir_var, scope_entry) + { + if (var->is_uniform || var->is_input_semantic || var->is_output_semantic) + continue; + if (!var->regs[HLSL_REGSET_NUMERIC].allocated) + continue; + + if (var->indexable) + { + unsigned int id = var->regs[HLSL_REGSET_NUMERIC].id; + unsigned int size = align(var->data_type->reg_size[HLSL_REGSET_NUMERIC], 4) / 4; + + sm4_generate_vsir_instr_dcl_indexable_temp(ctx, program, &block, id, size, 4, &var->loc); + } + } + } + + list_move_head(&func->body.instrs, &block.instrs); + + hlsl_block_cleanup(&block); + + sm4_generate_vsir_block(ctx, &func->body, program); + + generate_vsir_add_program_instruction(ctx, program, &func->loc, VKD3DSIH_RET, 0, 0); +} + +static void generate_vsir_scan_required_features(struct hlsl_ctx *ctx, struct vsir_program *program) +{ + struct extern_resource *extern_resources; + unsigned int extern_resources_count; + + extern_resources = sm4_get_extern_resources(ctx, &extern_resources_count); + for (unsigned int i = 0; i < extern_resources_count; ++i) + { + if (extern_resources[i].component_type && extern_resources[i].component_type->e.resource.rasteriser_ordered) + program->features.rovs = true; + } + sm4_free_extern_resources(extern_resources, extern_resources_count); + + /* FIXME: We also emit code that should require UAVS_AT_EVERY_STAGE, + * STENCIL_REF, and TYPED_UAV_LOAD_ADDITIONAL_FORMATS. */ +} + +static void generate_vsir_scan_global_flags(struct hlsl_ctx *ctx, + struct vsir_program *program, const struct hlsl_ir_function_decl *entry_func) +{ + const struct vkd3d_shader_version *version = &program->shader_version; + struct extern_resource *extern_resources; + unsigned int extern_resources_count, i; + + extern_resources = sm4_get_extern_resources(ctx, &extern_resources_count); + + if (version->major == 4) + { + for (i = 0; i < extern_resources_count; ++i) + { + const struct extern_resource *resource = &extern_resources[i]; + const struct hlsl_type *type = resource->component_type; + + if (type && type->class == HLSL_CLASS_TEXTURE && type->sampler_dim == HLSL_SAMPLER_DIM_RAW_BUFFER) + { + program->global_flags |= VKD3DSGF_ENABLE_RAW_AND_STRUCTURED_BUFFERS; + break; + } + } + } + + sm4_free_extern_resources(extern_resources, extern_resources_count); + + if (entry_func->early_depth_test && vkd3d_shader_ver_ge(version, 5, 0)) + program->global_flags |= VKD3DSGF_FORCE_EARLY_DEPTH_STENCIL; +} + +static void sm4_generate_vsir_add_dcl_constant_buffer(struct hlsl_ctx *ctx, + struct vsir_program *program, const struct hlsl_buffer *cbuffer) +{ + unsigned int array_first = cbuffer->reg.index; + unsigned int array_last = cbuffer->reg.index; /* FIXME: array end. */ + struct vkd3d_shader_src_param *src_param; + struct vkd3d_shader_instruction *ins; + + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &cbuffer->loc, VKD3DSIH_DCL_CONSTANT_BUFFER, 0, 0))) + { + ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; + return; + } + + ins->declaration.cb.size = cbuffer->size; + + src_param = &ins->declaration.cb.src; + vsir_src_param_init(src_param, VKD3DSPR_CONSTBUFFER, VKD3D_DATA_FLOAT, 0); + src_param->reg.dimension = VSIR_DIMENSION_VEC4; + src_param->swizzle = VKD3D_SHADER_NO_SWIZZLE; + + ins->declaration.cb.range.space = cbuffer->reg.space; + ins->declaration.cb.range.first = array_first; + ins->declaration.cb.range.last = array_last; + + src_param->reg.idx[0].offset = cbuffer->reg.id; + src_param->reg.idx[1].offset = array_first; + src_param->reg.idx[2].offset = array_last; + src_param->reg.idx_count = 3; +} + +static void sm4_generate_vsir_add_dcl_sampler(struct hlsl_ctx *ctx, + struct vsir_program *program, const struct extern_resource *resource) +{ + struct vkd3d_shader_src_param *src_param; + struct vkd3d_shader_instruction *ins; + unsigned int i; + + VKD3D_ASSERT(resource->regset == HLSL_REGSET_SAMPLERS); + VKD3D_ASSERT(hlsl_version_lt(ctx, 5, 1) || resource->bind_count == 1); + + for (i = 0; i < resource->bind_count; ++i) + { + unsigned int array_first = resource->index + i; + unsigned int array_last = resource->index + i; /* FIXME: array end. */ + + if (resource->var && !resource->var->objects_usage[HLSL_REGSET_SAMPLERS][i].used) + continue; + + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &resource->loc, VKD3DSIH_DCL_SAMPLER, 0, 0))) + { + ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; + return; + } + + if (resource->component_type->sampler_dim == HLSL_SAMPLER_DIM_COMPARISON) + ins->flags |= VKD3DSI_SAMPLER_COMPARISON_MODE; + + src_param = &ins->declaration.sampler.src; + vsir_src_param_init(src_param, VKD3DSPR_SAMPLER, VKD3D_DATA_UNUSED, 0); + + ins->declaration.sampler.range.first = array_first; + ins->declaration.sampler.range.last = array_last; + ins->declaration.sampler.range.space = resource->space; + + src_param->reg.idx[0].offset = resource->id; + src_param->reg.idx[1].offset = array_first; + src_param->reg.idx[2].offset = array_last; + src_param->reg.idx_count = 3; + } +} + +static enum vkd3d_shader_resource_type sm4_generate_vsir_get_resource_type(const struct hlsl_type *type) +{ + switch (type->sampler_dim) + { + case HLSL_SAMPLER_DIM_1D: + return VKD3D_SHADER_RESOURCE_TEXTURE_1D; + case HLSL_SAMPLER_DIM_2D: + return VKD3D_SHADER_RESOURCE_TEXTURE_2D; + case HLSL_SAMPLER_DIM_3D: + return VKD3D_SHADER_RESOURCE_TEXTURE_3D; + case HLSL_SAMPLER_DIM_CUBE: + return VKD3D_SHADER_RESOURCE_TEXTURE_CUBE; + case HLSL_SAMPLER_DIM_1DARRAY: + return VKD3D_SHADER_RESOURCE_TEXTURE_1DARRAY; + case HLSL_SAMPLER_DIM_2DARRAY: + return VKD3D_SHADER_RESOURCE_TEXTURE_2DARRAY; + case HLSL_SAMPLER_DIM_2DMS: + return VKD3D_SHADER_RESOURCE_TEXTURE_2DMS; + case HLSL_SAMPLER_DIM_2DMSARRAY: + return VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY; + case HLSL_SAMPLER_DIM_CUBEARRAY: + return VKD3D_SHADER_RESOURCE_TEXTURE_CUBEARRAY; + case HLSL_SAMPLER_DIM_BUFFER: + case HLSL_SAMPLER_DIM_RAW_BUFFER: + case HLSL_SAMPLER_DIM_STRUCTURED_BUFFER: + return VKD3D_SHADER_RESOURCE_BUFFER; + default: + vkd3d_unreachable(); + } +} + +static enum vkd3d_data_type sm4_generate_vsir_get_format_type(const struct hlsl_type *type) +{ + const struct hlsl_type *format = type->e.resource.format; + + switch (format->e.numeric.type) + { + case HLSL_TYPE_DOUBLE: + return VKD3D_DATA_DOUBLE; + + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_HALF: + if (format->modifiers & HLSL_MODIFIER_UNORM) + return VKD3D_DATA_UNORM; + if (format->modifiers & HLSL_MODIFIER_SNORM) + return VKD3D_DATA_SNORM; + return VKD3D_DATA_FLOAT; + + case HLSL_TYPE_INT: + return VKD3D_DATA_INT; + break; + + case HLSL_TYPE_BOOL: + case HLSL_TYPE_UINT: + return VKD3D_DATA_UINT; + + default: + vkd3d_unreachable(); + } +} + +static void sm4_generate_vsir_add_dcl_texture(struct hlsl_ctx *ctx, + struct vsir_program *program, const struct extern_resource *resource, + bool uav) +{ + enum hlsl_regset regset = uav ? HLSL_REGSET_UAVS : HLSL_REGSET_TEXTURES; + struct vkd3d_shader_structured_resource *structured_resource; + struct vkd3d_shader_dst_param *dst_param; + struct vkd3d_shader_semantic *semantic; + struct vkd3d_shader_instruction *ins; + struct hlsl_type *component_type; + enum vkd3d_shader_opcode opcode; + bool multisampled; + unsigned int i, j; + + VKD3D_ASSERT(resource->regset == regset); + VKD3D_ASSERT(hlsl_version_lt(ctx, 5, 1) || resource->bind_count == 1); - LIST_FOR_EACH_ENTRY(var, &func->extern_vars, struct hlsl_ir_var, extern_entry) + component_type = resource->component_type; + + for (i = 0; i < resource->bind_count; ++i) { - if ((var->is_input_semantic && var->last_read) - || (var->is_output_semantic && var->first_write)) - sm4_generate_vsir_instr_dcl_semantic(ctx, program, var, is_patch_constant_func, &block, &var->loc); - } + unsigned int array_first = resource->index + i; + unsigned int array_last = resource->index + i; /* FIXME: array end. */ - if (temp_count) - sm4_generate_vsir_instr_dcl_temps(ctx, program, temp_count, &block, &func->loc); + if (resource->var && !resource->var->objects_usage[regset][i].used) + continue; - LIST_FOR_EACH_ENTRY(scope, &ctx->scopes, struct hlsl_scope, entry) - { - LIST_FOR_EACH_ENTRY(var, &scope->vars, struct hlsl_ir_var, scope_entry) + if (uav) { - if (var->is_uniform || var->is_input_semantic || var->is_output_semantic) - continue; - if (!var->regs[HLSL_REGSET_NUMERIC].allocated) - continue; - - if (var->indexable) + switch (component_type->sampler_dim) { - unsigned int id = var->regs[HLSL_REGSET_NUMERIC].id; - unsigned int size = align(var->data_type->reg_size[HLSL_REGSET_NUMERIC], 4) / 4; - - sm4_generate_vsir_instr_dcl_indexable_temp(ctx, program, &block, id, size, 4, &var->loc); + case HLSL_SAMPLER_DIM_STRUCTURED_BUFFER: + opcode = VKD3DSIH_DCL_UAV_STRUCTURED; + break; + case HLSL_SAMPLER_DIM_RAW_BUFFER: + opcode = VKD3DSIH_DCL_UAV_RAW; + break; + default: + opcode = VKD3DSIH_DCL_UAV_TYPED; + break; + } + } + else + { + switch (component_type->sampler_dim) + { + case HLSL_SAMPLER_DIM_RAW_BUFFER: + opcode = VKD3DSIH_DCL_RESOURCE_RAW; + break; + default: + opcode = VKD3DSIH_DCL; + break; } } - } - list_move_head(&func->body.instrs, &block.instrs); + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &resource->loc, opcode, 0, 0))) + { + ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; + return; + } + semantic = &ins->declaration.semantic; + structured_resource = &ins->declaration.structured_resource; + dst_param = &semantic->resource.reg; + vsir_dst_param_init(dst_param, uav ? VKD3DSPR_UAV : VKD3DSPR_RESOURCE, VKD3D_DATA_UNUSED, 0); - hlsl_block_cleanup(&block); + if (uav && component_type->sampler_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER) + structured_resource->byte_stride = 4 * component_type->e.resource.format->reg_size[HLSL_REGSET_NUMERIC]; + if (uav && component_type->e.resource.rasteriser_ordered) + ins->flags = VKD3DSUF_RASTERISER_ORDERED_VIEW; - sm4_generate_vsir_block(ctx, &func->body, program); + multisampled = component_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS + || component_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY; + + if (!hlsl_version_ge(ctx, 4, 1) && multisampled && !component_type->sample_count) + { + hlsl_error(ctx, &resource->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Multisampled texture object declaration needs sample count for profile %u.%u.", + ctx->profile->major_version, ctx->profile->minor_version); + } + + for (j = 0; j < 4; ++j) + semantic->resource_data_type[j] = sm4_generate_vsir_get_format_type(component_type); + + semantic->resource.range.first = array_first; + semantic->resource.range.last = array_last; + semantic->resource.range.space = resource->space; + + dst_param->reg.idx[0].offset = resource->id; + dst_param->reg.idx[1].offset = array_first; + dst_param->reg.idx[2].offset = array_last; + dst_param->reg.idx_count = 3; + + ins->resource_type = sm4_generate_vsir_get_resource_type(resource->component_type); + if (resource->component_type->sampler_dim == HLSL_SAMPLER_DIM_RAW_BUFFER) + ins->raw = true; + if (resource->component_type->sampler_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER) + { + ins->structured = true; + ins->resource_stride = 4 * component_type->e.resource.format->reg_size[HLSL_REGSET_NUMERIC]; + } + + if (multisampled) + semantic->sample_count = component_type->sample_count; + } } /* OBJECTIVE: Translate all the information from ctx and entry_func to the @@ -8627,12 +10311,15 @@ static void sm4_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl uint64_t config_flags, struct vsir_program *program) { struct vkd3d_shader_version version = {0}; + struct extern_resource *extern_resources; + unsigned int extern_resources_count; + const struct hlsl_buffer *cbuffer; version.major = ctx->profile->major_version; version.minor = ctx->profile->minor_version; version.type = ctx->profile->type; - if (!vsir_program_init(program, NULL, &version, 0, VSIR_CF_STRUCTURED, VSIR_NOT_NORMALISED)) + if (!vsir_program_init(program, NULL, &version, 0, VSIR_CF_STRUCTURED, VSIR_NORMALISED_SM4)) { ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; return; @@ -8648,45 +10335,178 @@ static void sm4_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl program->thread_group_size.y = ctx->thread_count[1]; program->thread_group_size.z = ctx->thread_count[2]; } + else if (version.type == VKD3D_SHADER_TYPE_HULL) + { + program->input_control_point_count = 1; /* TODO: Obtain from InputPatch */ + program->output_control_point_count = ctx->output_control_point_count; + program->tess_domain = ctx->domain; + program->tess_partitioning = ctx->partitioning; + program->tess_output_primitive = ctx->output_primitive; + } + else if (version.type == VKD3D_SHADER_TYPE_DOMAIN) + { + program->input_control_point_count = 0; /* TODO: Obtain from OutputPatch */ + program->tess_domain = ctx->domain; + } + + LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) + { + if (cbuffer->reg.allocated) + sm4_generate_vsir_add_dcl_constant_buffer(ctx, program, cbuffer); + } + + extern_resources = sm4_get_extern_resources(ctx, &extern_resources_count); + for (unsigned int i = 0; i < extern_resources_count; ++i) + { + const struct extern_resource *resource = &extern_resources[i]; + + if (resource->regset == HLSL_REGSET_SAMPLERS) + sm4_generate_vsir_add_dcl_sampler(ctx, program, resource); + else if (resource->regset == HLSL_REGSET_TEXTURES) + sm4_generate_vsir_add_dcl_texture(ctx, program, resource, false); + else if (resource->regset == HLSL_REGSET_UAVS) + sm4_generate_vsir_add_dcl_texture(ctx, program, resource, true); + } + sm4_free_extern_resources(extern_resources, extern_resources_count); + if (version.type == VKD3D_SHADER_TYPE_HULL) + generate_vsir_add_program_instruction(ctx, program, + &ctx->patch_constant_func->loc, VKD3DSIH_HS_CONTROL_POINT_PHASE, 0, 0); sm4_generate_vsir_add_function(ctx, func, config_flags, program); if (version.type == VKD3D_SHADER_TYPE_HULL) + { + generate_vsir_add_program_instruction(ctx, program, + &ctx->patch_constant_func->loc, VKD3DSIH_HS_FORK_PHASE, 0, 0); sm4_generate_vsir_add_function(ctx, ctx->patch_constant_func, config_flags, program); + } + + generate_vsir_scan_required_features(ctx, program); + generate_vsir_scan_global_flags(ctx, program, func); } -static struct hlsl_ir_jump *loop_unrolling_find_jump(struct hlsl_block *block, struct hlsl_ir_node *stop_point, - struct hlsl_block **found_block) +static bool loop_unrolling_generate_const_bool_store(struct hlsl_ctx *ctx, struct hlsl_ir_var *var, + bool val, struct hlsl_block *block, struct vkd3d_shader_location *loc) { - struct hlsl_ir_node *node; + struct hlsl_ir_node *const_node, *store; - LIST_FOR_EACH_ENTRY(node, &block->instrs, struct hlsl_ir_node, entry) + if (!(const_node = hlsl_new_bool_constant(ctx, val, loc))) + return false; + hlsl_block_add_instr(block, const_node); + + if (!(store = hlsl_new_simple_store(ctx, var, const_node))) + return false; + hlsl_block_add_instr(block, store); + + return true; +} + +static bool loop_unrolling_remove_jumps_recurse(struct hlsl_ctx *ctx, struct hlsl_block *block, + struct hlsl_ir_var *loop_broken, struct hlsl_ir_var *loop_continued); + +static bool loop_unrolling_remove_jumps_visit(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, + struct hlsl_ir_var *loop_broken, struct hlsl_ir_var *loop_continued) +{ + struct hlsl_ir_jump *jump; + struct hlsl_ir_var *var; + struct hlsl_block draft; + struct hlsl_ir_if *iff; + + if (node->type == HLSL_IR_IF) { - if (node == stop_point) - return NULL; + iff = hlsl_ir_if(node); + if (loop_unrolling_remove_jumps_recurse(ctx, &iff->then_block, loop_broken, loop_continued)) + return true; + if (loop_unrolling_remove_jumps_recurse(ctx, &iff->else_block, loop_broken, loop_continued)) + return true; + return false; + } - if (node->type == HLSL_IR_IF) - { - struct hlsl_ir_if *iff = hlsl_ir_if(node); - struct hlsl_ir_jump *jump = NULL; + if (node->type == HLSL_IR_JUMP) + { + jump = hlsl_ir_jump(node); + if (jump->type != HLSL_IR_JUMP_UNRESOLVED_CONTINUE && jump->type != HLSL_IR_JUMP_BREAK) + return false; - if ((jump = loop_unrolling_find_jump(&iff->then_block, stop_point, found_block))) - return jump; - if ((jump = loop_unrolling_find_jump(&iff->else_block, stop_point, found_block))) - return jump; - } - else if (node->type == HLSL_IR_JUMP) - { - struct hlsl_ir_jump *jump = hlsl_ir_jump(node); + hlsl_block_init(&draft); - if (jump->type == HLSL_IR_JUMP_BREAK || jump->type == HLSL_IR_JUMP_CONTINUE) - { - *found_block = block; - return jump; - } - } + if (jump->type == HLSL_IR_JUMP_UNRESOLVED_CONTINUE) + var = loop_continued; + else + var = loop_broken; + + if (!loop_unrolling_generate_const_bool_store(ctx, var, true, &draft, &jump->node.loc)) + return false; + + list_move_before(&jump->node.entry, &draft.instrs); + list_remove(&jump->node.entry); + hlsl_free_instr(&jump->node); + + return true; } - return NULL; + return false; +} + +static struct hlsl_ir_if *loop_unrolling_generate_var_check(struct hlsl_ctx *ctx, + struct hlsl_block *dst, struct hlsl_ir_var *var, struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *cond, *iff; + struct hlsl_block then_block; + struct hlsl_ir_load *load; + + hlsl_block_init(&then_block); + + if (!(load = hlsl_new_var_load(ctx, var, loc))) + return NULL; + hlsl_block_add_instr(dst, &load->node); + + if (!(cond = hlsl_new_unary_expr(ctx, HLSL_OP1_LOGIC_NOT, &load->node, loc))) + return NULL; + hlsl_block_add_instr(dst, cond); + + if (!(iff = hlsl_new_if(ctx, cond, &then_block, NULL, loc))) + return NULL; + hlsl_block_add_instr(dst, iff); + + return hlsl_ir_if(iff); +} + +static bool loop_unrolling_remove_jumps_recurse(struct hlsl_ctx *ctx, struct hlsl_block *block, + struct hlsl_ir_var *loop_broken, struct hlsl_ir_var *loop_continued) +{ + struct hlsl_ir_node *node, *next; + + LIST_FOR_EACH_ENTRY_SAFE(node, next, &block->instrs, struct hlsl_ir_node, entry) + { + struct hlsl_ir_if *broken_check, *continued_check; + struct hlsl_block draft; + + if (!loop_unrolling_remove_jumps_visit(ctx, node, loop_broken, loop_continued)) + continue; + + if (&next->entry == &block->instrs) + return true; + + hlsl_block_init(&draft); + + broken_check = loop_unrolling_generate_var_check(ctx, &draft, loop_broken, &next->loc); + continued_check = loop_unrolling_generate_var_check(ctx, + &broken_check->then_block, loop_continued, &next->loc); + + list_move_before(&next->entry, &draft.instrs); + + list_move_slice_tail(&continued_check->then_block.instrs, &next->entry, list_tail(&block->instrs)); + + return true; + } + + return false; +} + +static void loop_unrolling_remove_jumps(struct hlsl_ctx *ctx, struct hlsl_block *block, + struct hlsl_ir_var *loop_broken, struct hlsl_ir_var *loop_continued) +{ + while (loop_unrolling_remove_jumps_recurse(ctx, block, loop_broken, loop_continued)); } static unsigned int loop_unrolling_get_max_iterations(struct hlsl_ctx *ctx, struct hlsl_ir_loop *loop) @@ -8696,7 +10516,7 @@ static unsigned int loop_unrolling_get_max_iterations(struct hlsl_ctx *ctx, stru return loop->unroll_limit; /* All SMs will default to 1024 if [unroll] has been specified without an explicit limit. */ - if (loop->unroll_type == HLSL_IR_LOOP_FORCE_UNROLL) + if (loop->unroll_type == HLSL_LOOP_FORCE_UNROLL) return 1024; /* SM4 limits implicit unrolling to 254 iterations. */ @@ -8707,167 +10527,279 @@ static unsigned int loop_unrolling_get_max_iterations(struct hlsl_ctx *ctx, stru return 1024; } -static bool loop_unrolling_unroll_loop(struct hlsl_ctx *ctx, struct hlsl_block *block, - struct hlsl_block *loop_parent, struct hlsl_ir_loop *loop) +static void loop_unrolling_simplify(struct hlsl_ctx *ctx, struct hlsl_block *block, + struct copy_propagation_state *state, unsigned int *index) +{ + size_t scopes_depth = state->scope_count - 1; + unsigned int current_index; + bool progress; + + do + { + state->stopped = false; + for (size_t i = state->scope_count; scopes_depth < i; --i) + copy_propagation_pop_scope(state); + copy_propagation_push_scope(state, ctx); + + progress = hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, block, NULL); + progress |= hlsl_transform_ir(ctx, hlsl_fold_constant_identities, block, NULL); + progress |= hlsl_transform_ir(ctx, hlsl_fold_constant_swizzles, block, NULL); + + current_index = index_instructions(block, *index); + progress |= copy_propagation_transform_block(ctx, block, state); + + progress |= hlsl_transform_ir(ctx, fold_swizzle_chains, block, NULL); + progress |= hlsl_transform_ir(ctx, remove_trivial_swizzles, block, NULL); + progress |= hlsl_transform_ir(ctx, remove_trivial_conditional_branches, block, NULL); + } while (progress); + + *index = current_index; +} + +static bool loop_unrolling_check_val(struct copy_propagation_state *state, struct hlsl_ir_var *var) +{ + struct copy_propagation_value *v; + + if (!(v = copy_propagation_get_value(state, var, 0, UINT_MAX)) + || v->node->type != HLSL_IR_CONSTANT) + return false; + + return hlsl_ir_constant(v->node)->value.u[0].u; +} + +static bool loop_unrolling_unroll_loop(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_loop *loop) { - unsigned int max_iterations, i; + struct hlsl_block draft, tmp_dst, loop_body; + struct hlsl_ir_var *broken, *continued; + unsigned int max_iterations, i, index; + struct copy_propagation_state state; + struct hlsl_ir_if *target_if; + + if (!(broken = hlsl_new_synthetic_var(ctx, "broken", + hlsl_get_scalar_type(ctx, HLSL_TYPE_BOOL), &loop->node.loc))) + goto fail; + + if (!(continued = hlsl_new_synthetic_var(ctx, "continued", + hlsl_get_scalar_type(ctx, HLSL_TYPE_BOOL), &loop->node.loc))) + goto fail; + + hlsl_block_init(&draft); + hlsl_block_init(&tmp_dst); max_iterations = loop_unrolling_get_max_iterations(ctx, loop); + copy_propagation_state_init(&state, ctx); + index = 2; + state.stop = &loop->node; + loop_unrolling_simplify(ctx, block, &state, &index); + state.stopped = false; + index = loop->node.index; + + if (!loop_unrolling_generate_const_bool_store(ctx, broken, false, &tmp_dst, &loop->node.loc)) + goto fail; + hlsl_block_add_block(&draft, &tmp_dst); + + if (!loop_unrolling_generate_const_bool_store(ctx, continued, false, &tmp_dst, &loop->node.loc)) + goto fail; + hlsl_block_add_block(&draft, &tmp_dst); + + if (!(target_if = loop_unrolling_generate_var_check(ctx, &tmp_dst, broken, &loop->node.loc))) + goto fail; + state.stop = LIST_ENTRY(list_head(&tmp_dst.instrs), struct hlsl_ir_node, entry); + hlsl_block_add_block(&draft, &tmp_dst); + + copy_propagation_push_scope(&state, ctx); + loop_unrolling_simplify(ctx, &draft, &state, &index); + + /* As an optimization, we only remove jumps from the loop's body once. */ + if (!hlsl_clone_block(ctx, &loop_body, &loop->body)) + goto fail; + loop_unrolling_remove_jumps(ctx, &loop_body, broken, continued); for (i = 0; i < max_iterations; ++i) { - struct hlsl_block tmp_dst, *jump_block; - struct hlsl_ir_jump *jump = NULL; + copy_propagation_push_scope(&state, ctx); - if (!hlsl_clone_block(ctx, &tmp_dst, &loop->body)) - return false; - list_move_before(&loop->node.entry, &tmp_dst.instrs); - hlsl_block_cleanup(&tmp_dst); + if (!loop_unrolling_generate_const_bool_store(ctx, continued, false, &tmp_dst, &loop->node.loc)) + goto fail; + hlsl_block_add_block(&target_if->then_block, &tmp_dst); - hlsl_run_const_passes(ctx, block); + if (!hlsl_clone_block(ctx, &tmp_dst, &loop_body)) + goto fail; + hlsl_block_add_block(&target_if->then_block, &tmp_dst); - if ((jump = loop_unrolling_find_jump(loop_parent, &loop->node, &jump_block))) - { - enum hlsl_ir_jump_type type = jump->type; + loop_unrolling_simplify(ctx, &target_if->then_block, &state, &index); - if (jump_block != loop_parent) - { - if (loop->unroll_type == HLSL_IR_LOOP_FORCE_UNROLL) - hlsl_error(ctx, &jump->node.loc, VKD3D_SHADER_ERROR_HLSL_FAILED_FORCED_UNROLL, - "Unable to unroll loop, unrolling loops with conditional jumps is currently not supported."); - return false; - } + if (loop_unrolling_check_val(&state, broken)) + break; - list_move_slice_tail(&tmp_dst.instrs, &jump->node.entry, list_prev(&loop_parent->instrs, &loop->node.entry)); - hlsl_block_cleanup(&tmp_dst); + if (!(target_if = loop_unrolling_generate_var_check(ctx, &tmp_dst, broken, &loop->node.loc))) + goto fail; + hlsl_block_add_block(&draft, &tmp_dst); - if (type == HLSL_IR_JUMP_BREAK) - break; - } - } + if (!hlsl_clone_block(ctx, &tmp_dst, &loop->iter)) + goto fail; + hlsl_block_add_block(&target_if->then_block, &tmp_dst); + } /* Native will not emit an error if max_iterations has been reached with an * explicit limit. It also will not insert a loop if there are iterations left * i.e [unroll(4)] for (i = 0; i < 8; ++i)) */ if (!loop->unroll_limit && i == max_iterations) { - if (loop->unroll_type == HLSL_IR_LOOP_FORCE_UNROLL) + if (loop->unroll_type == HLSL_LOOP_FORCE_UNROLL) hlsl_error(ctx, &loop->node.loc, VKD3D_SHADER_ERROR_HLSL_FAILED_FORCED_UNROLL, "Unable to unroll loop, maximum iterations reached (%u).", max_iterations); - return false; + goto fail; } + hlsl_block_cleanup(&loop_body); + copy_propagation_state_destroy(&state); + + list_move_before(&loop->node.entry, &draft.instrs); + hlsl_block_cleanup(&draft); list_remove(&loop->node.entry); hlsl_free_instr(&loop->node); return true; + +fail: + hlsl_block_cleanup(&loop_body); + copy_propagation_state_destroy(&state); + hlsl_block_cleanup(&draft); + + return false; } -/* - * loop_unrolling_find_unrollable_loop() is not the normal way to do things; - * normal passes simply iterate over the whole block and apply a transformation - * to every relevant instruction. However, loop unrolling can fail, and we want - * to leave the loop in its previous state in that case. That isn't a problem by - * itself, except that loop unrolling needs copy-prop in order to work properly, - * and copy-prop state at the time of the loop depends on the rest of the program - * up to that point. This means we need to clone the whole program, and at that - * point we have to search it again anyway to find the clone of the loop we were - * going to unroll. - * - * FIXME: Ideally we wouldn't clone the whole program; instead we would run copyprop - * up until the loop instruction, clone just that loop, then use copyprop again - * with the saved state after unrolling. However, copyprop currently isn't built - * for that yet [notably, it still relies on indices]. Note also this still doesn't - * really let us use transform_ir() anyway [since we don't have a good way to say - * "copyprop from the beginning of the program up to the instruction we're - * currently processing" from the callback]; we'd have to use a dedicated - * recursive function instead. */ -static struct hlsl_ir_loop *loop_unrolling_find_unrollable_loop(struct hlsl_ctx *ctx, struct hlsl_block *block, - struct hlsl_block **containing_block) +static bool unroll_loops(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, void *context) { - struct hlsl_ir_node *instr; + struct hlsl_block *program = context; + struct hlsl_ir_loop *loop; - LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry) + if (node->type != HLSL_IR_LOOP) + return true; + + loop = hlsl_ir_loop(node); + + if (loop->unroll_type != HLSL_LOOP_UNROLL && loop->unroll_type != HLSL_LOOP_FORCE_UNROLL) + return true; + + if (!loop_unrolling_unroll_loop(ctx, program, loop)) + loop->unroll_type = HLSL_LOOP_FORCE_LOOP; + + return true; +} + +/* We could handle this at parse time. However, loop unrolling often needs to + * know the value of variables modified in the "iter" block. It is possible to + * detect that all exit paths of a loop body modify such variables in the same + * way, but difficult, and d3dcompiler does not attempt to do so. + * In fact, d3dcompiler is capable of unrolling the following loop: + * for (int i = 0; i < 10; ++i) + * { + * if (some_uniform > 4) + * continue; + * } + * but cannot unroll the same loop with "++i" moved to each exit path: + * for (int i = 0; i < 10;) + * { + * if (some_uniform > 4) + * { + * ++i; + * continue; + * } + * ++i; + * } + */ +static bool resolve_loops(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, void *context) +{ + struct hlsl_ir_loop *loop; + + if (node->type != HLSL_IR_LOOP) + return true; + + loop = hlsl_ir_loop(node); + + hlsl_block_add_block(&loop->body, &loop->iter); + return true; +} + +static void resolve_continues(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_loop *last_loop) +{ + struct hlsl_ir_node *node; + + LIST_FOR_EACH_ENTRY(node, &block->instrs, struct hlsl_ir_node, entry) { - switch (instr->type) + switch (node->type) { case HLSL_IR_LOOP: { - struct hlsl_ir_loop *nested_loop; - struct hlsl_ir_loop *loop = hlsl_ir_loop(instr); - - if ((nested_loop = loop_unrolling_find_unrollable_loop(ctx, &loop->body, containing_block))) - return nested_loop; - - if (loop->unroll_type == HLSL_IR_LOOP_UNROLL || loop->unroll_type == HLSL_IR_LOOP_FORCE_UNROLL) - { - *containing_block = block; - return loop; - } + struct hlsl_ir_loop *loop = hlsl_ir_loop(node); + resolve_continues(ctx, &loop->body, loop); break; } case HLSL_IR_IF: { - struct hlsl_ir_loop *loop; - struct hlsl_ir_if *iff = hlsl_ir_if(instr); - - if ((loop = loop_unrolling_find_unrollable_loop(ctx, &iff->then_block, containing_block))) - return loop; - if ((loop = loop_unrolling_find_unrollable_loop(ctx, &iff->else_block, containing_block))) - return loop; - + struct hlsl_ir_if *iff = hlsl_ir_if(node); + resolve_continues(ctx, &iff->then_block, last_loop); + resolve_continues(ctx, &iff->else_block, last_loop); break; } case HLSL_IR_SWITCH: { - struct hlsl_ir_switch *s = hlsl_ir_switch(instr); + struct hlsl_ir_switch *s = hlsl_ir_switch(node); struct hlsl_ir_switch_case *c; - struct hlsl_ir_loop *loop; LIST_FOR_EACH_ENTRY(c, &s->cases, struct hlsl_ir_switch_case, entry) { - if ((loop = loop_unrolling_find_unrollable_loop(ctx, &c->body, containing_block))) - return loop; + resolve_continues(ctx, &c->body, last_loop); + } + + break; + } + case HLSL_IR_JUMP: + { + struct hlsl_ir_jump *jump = hlsl_ir_jump(node); + + if (jump->type != HLSL_IR_JUMP_UNRESOLVED_CONTINUE) + break; + + if (last_loop->type == HLSL_LOOP_FOR) + { + struct hlsl_block draft; + + if (!hlsl_clone_block(ctx, &draft, &last_loop->iter)) + return; + + list_move_before(&node->entry, &draft.instrs); + hlsl_block_cleanup(&draft); } + jump->type = HLSL_IR_JUMP_CONTINUE; break; } default: break; } } - - return NULL; } -static void transform_unroll_loops(struct hlsl_ctx *ctx, struct hlsl_block *block) +static void loop_unrolling_execute(struct hlsl_ctx *ctx, struct hlsl_block *block) { - while (true) - { - struct hlsl_block clone, *containing_block; - struct hlsl_ir_loop *loop, *cloned_loop; - - if (!(loop = loop_unrolling_find_unrollable_loop(ctx, block, &containing_block))) - return; - - if (!hlsl_clone_block(ctx, &clone, block)) - return; - - cloned_loop = loop_unrolling_find_unrollable_loop(ctx, &clone, &containing_block); - VKD3D_ASSERT(cloned_loop); + bool progress; - if (!loop_unrolling_unroll_loop(ctx, &clone, containing_block, cloned_loop)) - { - hlsl_block_cleanup(&clone); - loop->unroll_type = HLSL_IR_LOOP_FORCE_LOOP; - continue; - } + /* These are required by copy propagation, which in turn is required for + * unrolling. */ + do + { + progress = hlsl_transform_ir(ctx, split_array_copies, block, NULL); + progress |= hlsl_transform_ir(ctx, split_struct_copies, block, NULL); + } while (progress); + hlsl_transform_ir(ctx, split_matrix_copies, block, NULL); - hlsl_block_cleanup(block); - hlsl_block_init(block); - hlsl_block_add_block(block, &clone); - } + hlsl_transform_ir(ctx, unroll_loops, block, block); + resolve_continues(ctx, block, NULL); + hlsl_transform_ir(ctx, resolve_loops, block, NULL); } static bool lower_f16tof32(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, struct hlsl_block *block) @@ -9116,7 +11048,7 @@ static void process_entry_function(struct hlsl_ctx *ctx, hlsl_transform_ir(ctx, lower_discard_nz, body, NULL); } - transform_unroll_loops(ctx, body); + loop_unrolling_execute(ctx, body); hlsl_run_const_passes(ctx, body); remove_unreachable_code(ctx, body); @@ -9126,9 +11058,13 @@ static void process_entry_function(struct hlsl_ctx *ctx, lower_ir(ctx, lower_casts_to_bool, body); lower_ir(ctx, lower_int_dot, body); + if (hlsl_version_lt(ctx, 4, 0)) + hlsl_transform_ir(ctx, lower_separate_samples, body, NULL); + hlsl_transform_ir(ctx, validate_dereferences, body, NULL); hlsl_transform_ir(ctx, track_object_components_sampler_dim, body, NULL); - if (profile->major_version >= 4) + + if (hlsl_version_ge(ctx, 4, 0)) hlsl_transform_ir(ctx, lower_combined_samples, body, NULL); do @@ -9136,7 +11072,10 @@ static void process_entry_function(struct hlsl_ctx *ctx, while (hlsl_transform_ir(ctx, dce, body, NULL)); hlsl_transform_ir(ctx, track_components_usage, body, NULL); - sort_synthetic_separated_samplers_first(ctx); + if (hlsl_version_lt(ctx, 4, 0)) + sort_synthetic_combined_samplers_first(ctx); + else + sort_synthetic_separated_samplers_first(ctx); if (profile->major_version < 4) { @@ -9241,14 +11180,16 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry mark_indexable_vars(ctx, entry_func); allocate_temp_registers(ctx, entry_func); allocate_const_registers(ctx, entry_func); + sort_uniforms_by_bind_count(ctx, HLSL_REGSET_SAMPLERS); + allocate_objects(ctx, entry_func, HLSL_REGSET_SAMPLERS); } else { allocate_buffers(ctx); allocate_objects(ctx, entry_func, HLSL_REGSET_TEXTURES); allocate_objects(ctx, entry_func, HLSL_REGSET_UAVS); + allocate_objects(ctx, entry_func, HLSL_REGSET_SAMPLERS); } - allocate_objects(ctx, entry_func, HLSL_REGSET_SAMPLERS); if (TRACE_ON()) rb_for_each_entry(&ctx->functions, dump_function, ctx); @@ -9265,7 +11206,11 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry struct vsir_program program; int result; - sm1_generate_vsir(ctx, entry_func, config_flags, &program, &ctab); + sm1_generate_ctab(ctx, &ctab); + if (ctx->result) + return ctx->result; + + sm1_generate_vsir(ctx, entry_func, config_flags, &program); if (ctx->result) { vsir_program_cleanup(&program); @@ -9282,18 +11227,25 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry case VKD3D_SHADER_TARGET_DXBC_TPF: { uint32_t config_flags = vkd3d_shader_init_config_flags(); + struct vkd3d_shader_code rdef = {0}; struct vsir_program program; int result; + sm4_generate_rdef(ctx, &rdef); + if (ctx->result) + return ctx->result; + sm4_generate_vsir(ctx, entry_func, config_flags, &program); if (ctx->result) { vsir_program_cleanup(&program); + vkd3d_shader_free_shader_code(&rdef); return ctx->result; } - result = tpf_compile(&program, config_flags, out, ctx->message_context, ctx, entry_func); + result = tpf_compile(&program, config_flags, &rdef, out, ctx->message_context); vsir_program_cleanup(&program); + vkd3d_shader_free_shader_code(&rdef); return result; } diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c index 716adb15f08..cd7cd2fe6a3 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c @@ -220,7 +220,9 @@ static bool fold_cast(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, break; case HLSL_TYPE_BOOL: - /* Casts to bool should have already been lowered. */ + dst->u[k].u = u ? ~0u : 0u; + break; + default: vkd3d_unreachable(); } @@ -1544,6 +1546,149 @@ bool hlsl_fold_constant_identities(struct hlsl_ctx *ctx, struct hlsl_ir_node *in return false; } +static bool is_op_associative(enum hlsl_ir_expr_op op, enum hlsl_base_type type) +{ + switch (op) + { + case HLSL_OP2_ADD: + case HLSL_OP2_MUL: + return type == HLSL_TYPE_INT || type == HLSL_TYPE_UINT; + + case HLSL_OP2_BIT_AND: + case HLSL_OP2_BIT_OR: + case HLSL_OP2_BIT_XOR: + case HLSL_OP2_LOGIC_AND: + case HLSL_OP2_LOGIC_OR: + case HLSL_OP2_MAX: + case HLSL_OP2_MIN: + return true; + + default: + return false; + } +} + +static bool is_op_commutative(enum hlsl_ir_expr_op op) +{ + switch (op) + { + case HLSL_OP2_ADD: + case HLSL_OP2_BIT_AND: + case HLSL_OP2_BIT_OR: + case HLSL_OP2_BIT_XOR: + case HLSL_OP2_DOT: + case HLSL_OP2_LOGIC_AND: + case HLSL_OP2_LOGIC_OR: + case HLSL_OP2_MAX: + case HLSL_OP2_MIN: + case HLSL_OP2_MUL: + return true; + + default: + return false; + } +} + +bool hlsl_normalize_binary_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +{ + struct hlsl_ir_node *arg1 , *arg2; + struct hlsl_ir_expr *expr; + enum hlsl_base_type type; + enum hlsl_ir_expr_op op; + bool progress = false; + + if (instr->type != HLSL_IR_EXPR) + return false; + expr = hlsl_ir_expr(instr); + + if (instr->data_type->class > HLSL_CLASS_VECTOR) + return false; + + arg1 = expr->operands[0].node; + arg2 = expr->operands[1].node; + type = instr->data_type->e.numeric.type; + op = expr->op; + + if (!arg1 || !arg2) + return false; + + if (is_op_commutative(op) && arg1->type == HLSL_IR_CONSTANT && arg2->type != HLSL_IR_CONSTANT) + { + /* a OP x -> x OP a */ + struct hlsl_ir_node *tmp = arg1; + + arg1 = arg2; + arg2 = tmp; + progress = true; + } + + if (is_op_associative(op, type)) + { + struct hlsl_ir_expr *e1 = arg1->type == HLSL_IR_EXPR ? hlsl_ir_expr(arg1) : NULL; + struct hlsl_ir_expr *e2 = arg2->type == HLSL_IR_EXPR ? hlsl_ir_expr(arg2) : NULL; + + if (e1 && e1->op == op && e1->operands[0].node->type != HLSL_IR_CONSTANT + && e1->operands[1].node->type == HLSL_IR_CONSTANT) + { + if (arg2->type == HLSL_IR_CONSTANT) + { + /* (x OP a) OP b -> x OP (a OP b) */ + struct hlsl_ir_node *ab; + + if (!(ab = hlsl_new_binary_expr(ctx, op, e1->operands[1].node, arg2))) + return false; + list_add_before(&instr->entry, &ab->entry); + + arg1 = e1->operands[0].node; + arg2 = ab; + progress = true; + } + else if (is_op_commutative(op)) + { + /* (x OP a) OP y -> (x OP y) OP a */ + struct hlsl_ir_node *xy; + + if (!(xy = hlsl_new_binary_expr(ctx, op, e1->operands[0].node, arg2))) + return false; + list_add_before(&instr->entry, &xy->entry); + + arg1 = xy; + arg2 = e1->operands[1].node; + progress = true; + } + } + + if (!progress && arg1->type != HLSL_IR_CONSTANT && e2 && e2->op == op + && e2->operands[0].node->type != HLSL_IR_CONSTANT && e2->operands[1].node->type == HLSL_IR_CONSTANT) + { + /* x OP (y OP a) -> (x OP y) OP a */ + struct hlsl_ir_node *xy; + + if (!(xy = hlsl_new_binary_expr(ctx, op, arg1, e2->operands[0].node))) + return false; + list_add_before(&instr->entry, &xy->entry); + + arg1 = xy; + arg2 = e2->operands[1].node; + progress = true; + } + + } + + if (progress) + { + struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {arg1, arg2}; + struct hlsl_ir_node *res; + + if (!(res = hlsl_new_expr(ctx, op, operands, instr->data_type, &instr->loc))) + return false; + list_add_before(&instr->entry, &res->entry); + hlsl_replace_node(instr, res); + } + + return progress; +} + bool hlsl_fold_constant_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) { struct hlsl_constant_value value; @@ -1560,7 +1705,7 @@ bool hlsl_fold_constant_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *inst src = hlsl_ir_constant(swizzle->val.node); for (i = 0; i < swizzle->node.data_type->dimx; ++i) - value.u[i] = src->value.u[hlsl_swizzle_get_component(swizzle->swizzle, i)]; + value.u[i] = src->value.u[hlsl_swizzle_get_component(swizzle->u.vector, i)]; if (!(dst = hlsl_new_constant(ctx, instr->data_type, &value, &instr->loc))) return false; diff --git a/libs/vkd3d/libs/vkd3d-shader/ir.c b/libs/vkd3d/libs/vkd3d-shader/ir.c index b0e89bededb..cdc0c18466f 100644 --- a/libs/vkd3d/libs/vkd3d-shader/ir.c +++ b/libs/vkd3d/libs/vkd3d-shader/ir.c @@ -1,5 +1,6 @@ /* * Copyright 2023 Conor McCarthy for CodeWeavers + * Copyright 2023-2024 Elizabeth Figura for CodeWeavers * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public @@ -201,6 +202,14 @@ static void src_param_init_const_uint(struct vkd3d_shader_src_param *src, uint32 src->reg.u.immconst_u32[0] = value; } +static void vsir_src_param_init_io(struct vkd3d_shader_src_param *src, + enum vkd3d_shader_register_type reg_type, const struct signature_element *e, unsigned int idx_count) +{ + vsir_src_param_init(src, reg_type, vkd3d_data_type_from_component_type(e->component_type), idx_count); + src->reg.dimension = VSIR_DIMENSION_VEC4; + src->swizzle = vsir_swizzle_from_writemask(e->mask); +} + void vsir_src_param_init_label(struct vkd3d_shader_src_param *param, unsigned int label_id) { vsir_src_param_init(param, VKD3DSPR_LABEL, VKD3D_DATA_UNUSED, 1); @@ -214,6 +223,14 @@ static void src_param_init_parameter(struct vkd3d_shader_src_param *src, uint32_ src->reg.idx[0].offset = idx; } +static void src_param_init_parameter_vec4(struct vkd3d_shader_src_param *src, uint32_t idx, enum vkd3d_data_type type) +{ + vsir_src_param_init(src, VKD3DSPR_PARAMETER, type, 1); + src->reg.idx[0].offset = idx; + src->reg.dimension = VSIR_DIMENSION_VEC4; + src->swizzle = VKD3D_SHADER_NO_SWIZZLE; +} + static void vsir_src_param_init_resource(struct vkd3d_shader_src_param *src, unsigned int id, unsigned int idx) { vsir_src_param_init(src, VKD3DSPR_RESOURCE, VKD3D_DATA_UNUSED, 2); @@ -243,6 +260,14 @@ static void src_param_init_ssa_float(struct vkd3d_shader_src_param *src, unsigne src->reg.idx[0].offset = idx; } +static void src_param_init_ssa_float4(struct vkd3d_shader_src_param *src, unsigned int idx) +{ + vsir_src_param_init(src, VKD3DSPR_SSA, VKD3D_DATA_FLOAT, 1); + src->reg.idx[0].offset = idx; + src->reg.dimension = VSIR_DIMENSION_VEC4; + src->swizzle = VKD3D_SHADER_NO_SWIZZLE; +} + static void src_param_init_temp_bool(struct vkd3d_shader_src_param *src, unsigned int idx) { vsir_src_param_init(src, VKD3DSPR_TEMP, VKD3D_DATA_BOOL, 1); @@ -278,6 +303,14 @@ void vsir_dst_param_init(struct vkd3d_shader_dst_param *param, enum vkd3d_shader param->shift = 0; } +static void vsir_dst_param_init_io(struct vkd3d_shader_dst_param *dst, enum vkd3d_shader_register_type reg_type, + const struct signature_element *e, unsigned int idx_count) +{ + vsir_dst_param_init(dst, reg_type, vkd3d_data_type_from_component_type(e->component_type), idx_count); + dst->reg.dimension = VSIR_DIMENSION_VEC4; + dst->write_mask = e->mask; +} + static void dst_param_init_ssa_bool(struct vkd3d_shader_dst_param *dst, unsigned int idx) { vsir_dst_param_init(dst, VKD3DSPR_SSA, VKD3D_DATA_BOOL, 1); @@ -290,6 +323,14 @@ static void dst_param_init_ssa_float(struct vkd3d_shader_dst_param *dst, unsigne dst->reg.idx[0].offset = idx; } +static void dst_param_init_ssa_float4(struct vkd3d_shader_dst_param *dst, unsigned int idx) +{ + vsir_dst_param_init(dst, VKD3DSPR_SSA, VKD3D_DATA_FLOAT, 1); + dst->reg.idx[0].offset = idx; + dst->reg.dimension = VSIR_DIMENSION_VEC4; + dst->write_mask = VKD3DSP_WRITEMASK_ALL; +} + static void dst_param_init_temp_bool(struct vkd3d_shader_dst_param *dst, unsigned int idx) { vsir_dst_param_init(dst, VKD3DSPR_TEMP, VKD3D_DATA_BOOL, 1); @@ -709,6 +750,76 @@ static enum vkd3d_result vsir_program_lower_texldd(struct vsir_program *program, return VKD3D_OK; } +static enum vkd3d_result vsir_program_lower_dcl_input(struct vsir_program *program, + struct vkd3d_shader_instruction *ins, struct vsir_transformation_context *ctx) +{ + switch (ins->declaration.dst.reg.type) + { + case VKD3DSPR_INPUT: + case VKD3DSPR_OUTPUT: + case VKD3DSPR_PATCHCONST: + case VKD3DSPR_INCONTROLPOINT: + case VKD3DSPR_OUTCONTROLPOINT: + break; + + case VKD3DSPR_PRIMID: + case VKD3DSPR_FORKINSTID: + case VKD3DSPR_JOININSTID: + case VKD3DSPR_THREADID: + case VKD3DSPR_THREADGROUPID: + case VKD3DSPR_LOCALTHREADID: + case VKD3DSPR_LOCALTHREADINDEX: + case VKD3DSPR_COVERAGE: + case VKD3DSPR_TESSCOORD: + case VKD3DSPR_OUTPOINTID: + case VKD3DSPR_GSINSTID: + case VKD3DSPR_WAVELANECOUNT: + case VKD3DSPR_WAVELANEINDEX: + bitmap_set(program->io_dcls, ins->declaration.dst.reg.type); + break; + + default: + vkd3d_shader_error(ctx->message_context, &ins->location, + VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, + "Internal compiler error: invalid register type %#x for DCL_INPUT.", + ins->declaration.dst.reg.type); + return VKD3D_ERROR; + } + + return VKD3D_OK; +} + +static enum vkd3d_result vsir_program_lower_dcl_output(struct vsir_program *program, + struct vkd3d_shader_instruction *ins, struct vsir_transformation_context *ctx) +{ + switch (ins->declaration.dst.reg.type) + { + case VKD3DSPR_INPUT: + case VKD3DSPR_OUTPUT: + case VKD3DSPR_PATCHCONST: + case VKD3DSPR_INCONTROLPOINT: + case VKD3DSPR_OUTCONTROLPOINT: + break; + + case VKD3DSPR_DEPTHOUT: + case VKD3DSPR_SAMPLEMASK: + case VKD3DSPR_DEPTHOUTGE: + case VKD3DSPR_DEPTHOUTLE: + case VKD3DSPR_OUTSTENCILREF: + bitmap_set(program->io_dcls, ins->declaration.dst.reg.type); + break; + + default: + vkd3d_shader_error(ctx->message_context, &ins->location, + VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, + "Internal compiler error: invalid register type %#x for DCL_OUTPUT.", + ins->declaration.dst.reg.type); + return VKD3D_ERROR; + } + + return VKD3D_OK; +} + static enum vkd3d_result vsir_program_lower_instructions(struct vsir_program *program, struct vsir_transformation_context *ctx) { @@ -743,11 +854,31 @@ static enum vkd3d_result vsir_program_lower_instructions(struct vsir_program *pr case VKD3DSIH_DCL_GLOBAL_FLAGS: case VKD3DSIH_DCL_SAMPLER: case VKD3DSIH_DCL_TEMPS: + case VKD3DSIH_DCL_TESSELLATOR_DOMAIN: case VKD3DSIH_DCL_THREAD_GROUP: case VKD3DSIH_DCL_UAV_TYPED: vkd3d_shader_instruction_make_nop(ins); break; + case VKD3DSIH_DCL_INPUT: + vsir_program_lower_dcl_input(program, ins, ctx); + vkd3d_shader_instruction_make_nop(ins); + break; + + case VKD3DSIH_DCL_OUTPUT: + vsir_program_lower_dcl_output(program, ins, ctx); + vkd3d_shader_instruction_make_nop(ins); + break; + + case VKD3DSIH_DCL_INPUT_SGV: + case VKD3DSIH_DCL_INPUT_SIV: + case VKD3DSIH_DCL_INPUT_PS: + case VKD3DSIH_DCL_INPUT_PS_SGV: + case VKD3DSIH_DCL_INPUT_PS_SIV: + case VKD3DSIH_DCL_OUTPUT_SIV: + vkd3d_shader_instruction_make_nop(ins); + break; + case VKD3DSIH_SINCOS: if ((ret = vsir_program_lower_sm1_sincos(program, ins)) < 0) return ret; @@ -847,11 +978,36 @@ static enum vkd3d_result vsir_program_ensure_ret(struct vsir_program *program, return VKD3D_OK; } +static bool add_signature_element(struct shader_signature *signature, const char *semantic_name, + uint32_t semantic_index, uint32_t mask, uint32_t register_index, + enum vkd3d_shader_interpolation_mode interpolation_mode) +{ + struct signature_element *new_elements, *e; + + if (!(new_elements = vkd3d_realloc(signature->elements, + (signature->element_count + 1) * sizeof(*signature->elements)))) + return false; + signature->elements = new_elements; + e = &signature->elements[signature->element_count++]; + memset(e, 0, sizeof(*e)); + e->semantic_name = vkd3d_strdup(semantic_name); + e->semantic_index = semantic_index; + e->sysval_semantic = VKD3D_SHADER_SV_NONE; + e->component_type = VKD3D_SHADER_COMPONENT_FLOAT; + e->register_count = 1; + e->mask = mask; + e->used_mask = mask; + e->register_index = register_index; + e->target_location = register_index; + e->interpolation_mode = interpolation_mode; + return true; +} + static enum vkd3d_result vsir_program_add_diffuse_output(struct vsir_program *program, struct vsir_transformation_context *ctx) { struct shader_signature *signature = &program->output_signature; - struct signature_element *new_elements, *e; + struct signature_element *e; if (program->shader_version.type != VKD3D_SHADER_TYPE_VERTEX) return VKD3D_OK; @@ -864,22 +1020,8 @@ static enum vkd3d_result vsir_program_add_diffuse_output(struct vsir_program *pr return VKD3D_OK; } - if (!(new_elements = vkd3d_realloc(signature->elements, - (signature->element_count + 1) * sizeof(*signature->elements)))) + if (!add_signature_element(signature, "COLOR", 0, VKD3DSP_WRITEMASK_ALL, SM1_COLOR_REGISTER_OFFSET, VKD3DSIM_NONE)) return VKD3D_ERROR_OUT_OF_MEMORY; - signature->elements = new_elements; - e = &signature->elements[signature->element_count++]; - memset(e, 0, sizeof(*e)); - e->semantic_name = vkd3d_strdup("COLOR"); - e->sysval_semantic = VKD3D_SHADER_SV_NONE; - e->component_type = VKD3D_SHADER_COMPONENT_FLOAT; - e->register_count = 1; - e->mask = VKD3DSP_WRITEMASK_ALL; - e->used_mask = VKD3DSP_WRITEMASK_ALL; - e->register_index = SM1_COLOR_REGISTER_OFFSET; - e->target_location = SM1_COLOR_REGISTER_OFFSET; - e->interpolation_mode = VKD3DSIM_NONE; - return VKD3D_OK; } @@ -1034,6 +1176,9 @@ static enum vkd3d_result vsir_program_remap_output_signature(struct vsir_program e->target_location = map->input_register_index; + TRACE("Mapping signature index %u (mask %#x) to target location %u (mask %#x).\n", + i, e->mask, map->input_register_index, map->input_mask); + if ((input_mask & e->mask) == input_mask) { ++subset_varying_count; @@ -1054,6 +1199,8 @@ static enum vkd3d_result vsir_program_remap_output_signature(struct vsir_program } else { + TRACE("Marking signature index %u (mask %#x) as unused.\n", i, e->mask); + e->target_location = SIGNATURE_TARGET_LOCATION_UNUSED; } @@ -1213,12 +1360,6 @@ static void flattener_eliminate_phase_related_dcls(struct hull_flattener *normal vkd3d_shader_instruction_make_nop(ins); return; } - else if (ins->opcode == VKD3DSIH_DCL_INPUT && shader_register_is_phase_instance_id( - &ins->declaration.dst.reg)) - { - vkd3d_shader_instruction_make_nop(ins); - return; - } if (normaliser->phase == VKD3DSIH_INVALID || vsir_instruction_is_dcl(ins)) return; @@ -1369,25 +1510,15 @@ static void shader_dst_param_normalise_outpointid(struct vkd3d_shader_dst_param } } -static void shader_dst_param_io_init(struct vkd3d_shader_dst_param *param, const struct signature_element *e, - enum vkd3d_shader_register_type reg_type, unsigned int idx_count) -{ - param->write_mask = e->mask; - param->modifiers = 0; - param->shift = 0; - vsir_register_init(¶m->reg, reg_type, vkd3d_data_type_from_component_type(e->component_type), idx_count); -} - static enum vkd3d_result control_point_normaliser_emit_hs_input(struct control_point_normaliser *normaliser, const struct shader_signature *s, unsigned int input_control_point_count, unsigned int dst, const struct vkd3d_shader_location *location) { struct vkd3d_shader_instruction *ins; - struct vkd3d_shader_dst_param *param; const struct signature_element *e; - unsigned int i, count; + unsigned int i, count = 2; - for (i = 0, count = 1; i < s->element_count; ++i) + for (i = 0; i < s->element_count; ++i) count += !!s->elements[i].used_mask; if (!shader_instruction_array_reserve(&normaliser->instructions, normaliser->instructions.count + count)) @@ -1399,7 +1530,7 @@ static enum vkd3d_result control_point_normaliser_emit_hs_input(struct control_p ins = &normaliser->instructions.elements[dst]; vsir_instruction_init(ins, location, VKD3DSIH_HS_CONTROL_POINT_PHASE); - ins->flags = 1; + ++ins; for (i = 0; i < s->element_count; ++i) @@ -1408,26 +1539,35 @@ static enum vkd3d_result control_point_normaliser_emit_hs_input(struct control_p if (!e->used_mask) continue; - if (e->sysval_semantic != VKD3D_SHADER_SV_NONE) - { - vsir_instruction_init(ins, location, VKD3DSIH_DCL_INPUT_SIV); - param = &ins->declaration.register_semantic.reg; - ins->declaration.register_semantic.sysval_semantic = vkd3d_siv_from_sysval(e->sysval_semantic); - } - else + vsir_instruction_init(ins, location, VKD3DSIH_MOV); + ins->dst = shader_dst_param_allocator_get(&normaliser->instructions.dst_params, 1); + ins->dst_count = 1; + ins->src = shader_src_param_allocator_get(&normaliser->instructions.src_params, 1); + ins->src_count = 1; + + if (!ins->dst || ! ins->src) { - vsir_instruction_init(ins, location, VKD3DSIH_DCL_INPUT); - param = &ins->declaration.dst; + WARN("Failed to allocate dst/src param.\n"); + return VKD3D_ERROR_OUT_OF_MEMORY; } - shader_dst_param_io_init(param, e, VKD3DSPR_INPUT, 2); - param->reg.idx[0].offset = input_control_point_count; - param->reg.idx[1].offset = e->register_index; - param->write_mask = e->mask; + vsir_dst_param_init_io(&ins->dst[0], VKD3DSPR_OUTPUT, e, 2); + ins->dst[0].reg.dimension = VSIR_DIMENSION_VEC4; + ins->dst[0].reg.idx[0].offset = 0; + ins->dst[0].reg.idx[0].rel_addr = normaliser->outpointid_param; + ins->dst[0].reg.idx[1].offset = e->register_index; + + vsir_src_param_init_io(&ins->src[0], VKD3DSPR_INPUT, e, 2); + ins->src[0].reg.dimension = VSIR_DIMENSION_VEC4; + ins->src[0].reg.idx[0].offset = 0; + ins->src[0].reg.idx[0].rel_addr = normaliser->outpointid_param; + ins->src[0].reg.idx[1].offset = e->register_index; ++ins; } + vsir_instruction_init(ins, location, VKD3DSIH_RET); + return VKD3D_OK; } @@ -1442,7 +1582,7 @@ static enum vkd3d_result instruction_array_normalise_hull_shader_control_point_i enum vkd3d_result ret; unsigned int i, j; - VKD3D_ASSERT(program->normalisation_level == VSIR_NOT_NORMALISED); + VKD3D_ASSERT(program->normalisation_level == VSIR_NORMALISED_SM4); if (program->shader_version.type != VKD3D_SHADER_TYPE_HULL) { @@ -1545,11 +1685,6 @@ static bool io_normaliser_is_in_fork_or_join_phase(const struct io_normaliser *n return normaliser->phase == VKD3DSIH_HS_FORK_PHASE || normaliser->phase == VKD3DSIH_HS_JOIN_PHASE; } -static bool io_normaliser_is_in_control_point_phase(const struct io_normaliser *normaliser) -{ - return normaliser->phase == VKD3DSIH_HS_CONTROL_POINT_PHASE; -} - static bool shader_signature_find_element_for_reg(const struct shader_signature *signature, unsigned int reg_idx, unsigned int write_mask, unsigned int *element_idx) { @@ -1920,41 +2055,26 @@ static unsigned int shader_register_normalise_arrayed_addressing(struct vkd3d_sh { VKD3D_ASSERT(id_idx < ARRAY_SIZE(reg->idx) - 1); - /* For a relative-addressed register index, move the id up a slot to separate it from the address, - * because rel_addr can be replaced with a constant offset in some cases. */ - if (reg->idx[id_idx].rel_addr) - { - reg->idx[id_idx + 1].rel_addr = NULL; - reg->idx[id_idx + 1].offset = reg->idx[id_idx].offset; - reg->idx[id_idx].offset -= register_index; - if (id_idx) - { - /* idx[id_idx] now contains the array index, which must be moved below the control point id. */ - struct vkd3d_shader_register_index tmp = reg->idx[id_idx]; - reg->idx[id_idx] = reg->idx[id_idx - 1]; - reg->idx[id_idx - 1] = tmp; - } - ++id_idx; - } - /* Otherwise we have no address for the arrayed register, so insert one. This happens e.g. where - * tessellation level registers are merged into an array because they're an array in SPIR-V. */ - else - { - ++id_idx; - memmove(®->idx[1], ®->idx[0], id_idx * sizeof(reg->idx[0])); - reg->idx[0].rel_addr = NULL; - reg->idx[0].offset = reg->idx[id_idx].offset - register_index; - } + /* Make room for the array index at the front of the array. */ + ++id_idx; + memmove(®->idx[1], ®->idx[0], id_idx * sizeof(reg->idx[0])); + + /* The array index inherits the register relative address, but is offsetted + * by the signature element register index. */ + reg->idx[0].rel_addr = reg->idx[id_idx].rel_addr; + reg->idx[0].offset = reg->idx[id_idx].offset - register_index; + reg->idx[id_idx].rel_addr = NULL; + + /* The signature index offset will be fixed in the caller. */ return id_idx; } -static bool shader_dst_param_io_normalise(struct vkd3d_shader_dst_param *dst_param, bool is_io_dcl, +static bool shader_dst_param_io_normalise(struct vkd3d_shader_dst_param *dst_param, struct io_normaliser *normaliser) { unsigned int id_idx, reg_idx, write_mask, element_idx; struct vkd3d_shader_register *reg = &dst_param->reg; - struct vkd3d_shader_dst_param **dcl_params; const struct shader_signature *signature; const struct signature_element *e; @@ -1970,26 +2090,22 @@ static bool shader_dst_param_io_normalise(struct vkd3d_shader_dst_param *dst_par /* Convert patch constant outputs to the patch constant register type to avoid the need * to convert compiler symbols when accessed as inputs in a later stage. */ reg->type = VKD3DSPR_PATCHCONST; - dcl_params = normaliser->pc_dcl_params; } else { signature = normaliser->output_signature; - dcl_params = normaliser->output_dcl_params; } break; case VKD3DSPR_PATCHCONST: reg_idx = reg->idx[reg->idx_count - 1].offset; signature = normaliser->patch_constant_signature; - dcl_params = normaliser->pc_dcl_params; break; case VKD3DSPR_COLOROUT: reg_idx = reg->idx[0].offset; signature = normaliser->output_signature; reg->type = VKD3DSPR_OUTPUT; - dcl_params = normaliser->output_dcl_params; break; case VKD3DSPR_INCONTROLPOINT: @@ -1997,14 +2113,12 @@ static bool shader_dst_param_io_normalise(struct vkd3d_shader_dst_param *dst_par reg_idx = reg->idx[reg->idx_count - 1].offset; signature = normaliser->input_signature; reg->type = VKD3DSPR_INPUT; - dcl_params = normaliser->input_dcl_params; break; case VKD3DSPR_ATTROUT: reg_idx = SM1_COLOR_REGISTER_OFFSET + reg->idx[0].offset; signature = normaliser->output_signature; reg->type = VKD3DSPR_OUTPUT; - dcl_params = normaliser->output_dcl_params; break; case VKD3DSPR_RASTOUT: @@ -2014,7 +2128,6 @@ static bool shader_dst_param_io_normalise(struct vkd3d_shader_dst_param *dst_par reg_idx = SM1_RASTOUT_REGISTER_OFFSET + reg->idx[0].offset; signature = normaliser->output_signature; reg->type = VKD3DSPR_OUTPUT; - dcl_params = normaliser->output_dcl_params; /* Fog and point size are scalar, but fxc/d3dcompiler emits a full * write mask when writing to them. */ if (reg->idx[0].offset > 0) @@ -2030,54 +2143,8 @@ static bool shader_dst_param_io_normalise(struct vkd3d_shader_dst_param *dst_par vkd3d_unreachable(); e = &signature->elements[element_idx]; - if (is_io_dcl) - { - /* Validated in the TPF reader. */ - VKD3D_ASSERT(element_idx < ARRAY_SIZE(normaliser->input_dcl_params)); - - if (dcl_params[element_idx]) - { - /* Merge split declarations into a single one. */ - dcl_params[element_idx]->write_mask |= dst_param->write_mask; - /* Turn this into a nop. */ - return false; - } - else - { - dcl_params[element_idx] = dst_param; - } - } - - if (io_normaliser_is_in_control_point_phase(normaliser) && reg->type == VKD3DSPR_OUTPUT) - { - if (is_io_dcl) - { - /* Emit an array size for the control points for consistency with inputs. */ - reg->idx[0].offset = normaliser->output_control_point_count; - } - else - { - /* The control point id param. */ - VKD3D_ASSERT(reg->idx[0].rel_addr); - } - id_idx = 1; - } - if ((e->register_count > 1 || vsir_sysval_semantic_is_tess_factor(e->sysval_semantic))) - { - if (is_io_dcl) - { - /* For control point I/O, idx 0 contains the control point count. - * Ensure it is moved up to the next slot. */ - reg->idx[id_idx].offset = reg->idx[0].offset; - reg->idx[0].offset = e->register_count; - ++id_idx; - } - else - { - id_idx = shader_register_normalise_arrayed_addressing(reg, id_idx, e->register_index); - } - } + id_idx = shader_register_normalise_arrayed_addressing(reg, id_idx, e->register_index); /* Replace the register index with the signature element index */ reg->idx[id_idx].offset = element_idx; @@ -2129,6 +2196,8 @@ static void shader_src_param_io_normalise(struct vkd3d_shader_src_param *src_par case VKD3DSPR_OUTCONTROLPOINT: reg->type = VKD3DSPR_OUTPUT; + if (io_normaliser_is_in_fork_or_join_phase(normaliser)) + normaliser->use_vocp = true; /* fall through */ case VKD3DSPR_OUTPUT: reg_idx = reg->idx[reg->idx_count - 1].offset; @@ -2169,40 +2238,10 @@ static void shader_src_param_io_normalise(struct vkd3d_shader_src_param *src_par static void shader_instruction_normalise_io_params(struct vkd3d_shader_instruction *ins, struct io_normaliser *normaliser) { - struct vkd3d_shader_register *reg; unsigned int i; switch (ins->opcode) { - case VKD3DSIH_DCL_INPUT: - if (normaliser->shader_type == VKD3D_SHADER_TYPE_HULL) - { - reg = &ins->declaration.dst.reg; - - if (reg->type == VKD3DSPR_OUTCONTROLPOINT) - normaliser->use_vocp = true; - - /* We don't need to keep OUTCONTROLPOINT or PATCHCONST input declarations since their - * equivalents were declared earlier, but INCONTROLPOINT may be the first occurrence. */ - if (reg->type == VKD3DSPR_OUTCONTROLPOINT || reg->type == VKD3DSPR_PATCHCONST) - vkd3d_shader_instruction_make_nop(ins); - else if (reg->type == VKD3DSPR_INCONTROLPOINT) - reg->type = VKD3DSPR_INPUT; - } - /* fall through */ - case VKD3DSIH_DCL_INPUT_PS: - case VKD3DSIH_DCL_OUTPUT: - if (!shader_dst_param_io_normalise(&ins->declaration.dst, true, normaliser)) - vkd3d_shader_instruction_make_nop(ins); - break; - case VKD3DSIH_DCL_INPUT_SGV: - case VKD3DSIH_DCL_INPUT_SIV: - case VKD3DSIH_DCL_INPUT_PS_SGV: - case VKD3DSIH_DCL_INPUT_PS_SIV: - case VKD3DSIH_DCL_OUTPUT_SIV: - if (!shader_dst_param_io_normalise(&ins->declaration.register_semantic.reg, true, normaliser)) - vkd3d_shader_instruction_make_nop(ins); - break; case VKD3DSIH_HS_CONTROL_POINT_PHASE: case VKD3DSIH_HS_FORK_PHASE: case VKD3DSIH_HS_JOIN_PHASE: @@ -2215,7 +2254,7 @@ static void shader_instruction_normalise_io_params(struct vkd3d_shader_instructi if (vsir_instruction_is_dcl(ins)) break; for (i = 0; i < ins->dst_count; ++i) - shader_dst_param_io_normalise(&ins->dst[i], false, normaliser); + shader_dst_param_io_normalise(&ins->dst[i], normaliser); for (i = 0; i < ins->src_count; ++i) shader_src_param_io_normalise(&ins->src[i], normaliser); break; @@ -2275,7 +2314,7 @@ static enum vkd3d_result vsir_program_normalise_io_registers(struct vsir_program program->instructions = normaliser.instructions; program->use_vocp = normaliser.use_vocp; - program->normalisation_level = VSIR_FULLY_NORMALISED_IO; + program->normalisation_level = VSIR_NORMALISED_SM6; return VKD3D_OK; } @@ -6634,149 +6673,747 @@ static enum vkd3d_result vsir_program_insert_point_coord(struct vsir_program *pr return VKD3D_OK; } -struct validation_context +static enum vkd3d_result vsir_program_add_fog_input(struct vsir_program *program, + struct vsir_transformation_context *ctx) { - struct vkd3d_shader_message_context *message_context; - const struct vsir_program *program; - size_t instruction_idx; - struct vkd3d_shader_location null_location; - bool invalid_instruction_idx; - enum vkd3d_result status; - bool dcl_temps_found; - enum vkd3d_shader_opcode phase; - bool inside_block; + struct shader_signature *signature = &program->input_signature; + uint32_t register_idx = 0; - struct validation_context_temp_data - { - enum vsir_dimension dimension; - size_t first_seen; - } *temps; + if (program->shader_version.type != VKD3D_SHADER_TYPE_PIXEL) + return VKD3D_OK; - struct validation_context_ssa_data - { - enum vsir_dimension dimension; - enum vkd3d_data_type data_type; - size_t first_seen; - uint32_t write_mask; - uint32_t read_mask; - size_t first_assigned; - } *ssas; + if (!vsir_program_get_parameter(program, VKD3D_SHADER_PARAMETER_NAME_FOG_FRAGMENT_MODE)) + return VKD3D_OK; - enum vkd3d_shader_opcode *blocks; - size_t depth; - size_t blocks_capacity; -}; + /* We could check the value and skip this if NONE, but chances are if a + * user specifies the fog fragment mode as a parameter, they'll want to + * enable it dynamically. Always specifying it (and hence always outputting + * it from the VS) avoids an extra VS variant. */ -static void VKD3D_PRINTF_FUNC(3, 4) validator_error(struct validation_context *ctx, - enum vkd3d_shader_error error, const char *format, ...) -{ - struct vkd3d_string_buffer buf; - va_list args; + if (vsir_signature_find_element_by_name(signature, "FOG", 0)) + return VKD3D_OK; - vkd3d_string_buffer_init(&buf); + for (unsigned int i = 0; i < signature->element_count; ++i) + register_idx = max(register_idx, signature->elements[i].register_index + 1); - va_start(args, format); - vkd3d_string_buffer_vprintf(&buf, format, args); - va_end(args); + if (!add_signature_element(signature, "FOG", 0, VKD3DSP_WRITEMASK_0, register_idx, VKD3DSIM_LINEAR)) + return VKD3D_ERROR_OUT_OF_MEMORY; + return VKD3D_OK; +} - if (ctx->invalid_instruction_idx) - { - vkd3d_shader_error(ctx->message_context, &ctx->null_location, error, "%s", buf.buffer); - WARN("VSIR validation error: %s\n", buf.buffer); - } - else +static enum vkd3d_result insert_fragment_fog_before_ret(struct vsir_program *program, + const struct vkd3d_shader_instruction *ret, enum vkd3d_shader_fog_fragment_mode mode, + uint32_t fog_signature_idx, uint32_t colour_signature_idx, uint32_t colour_temp, + size_t *ret_pos, struct vkd3d_shader_message_context *message_context) +{ + struct vkd3d_shader_instruction_array *instructions = &program->instructions; + struct vkd3d_shader_location loc = ret->location; + uint32_t ssa_factor = program->ssa_count++; + size_t pos = ret - instructions->elements; + struct vkd3d_shader_instruction *ins; + uint32_t ssa_temp, ssa_temp2; + + switch (mode) { - const struct vkd3d_shader_instruction *ins = &ctx->program->instructions.elements[ctx->instruction_idx]; - vkd3d_shader_error(ctx->message_context, &ins->location, error, - "instruction %zu: %s", ctx->instruction_idx + 1, buf.buffer); - WARN("VSIR validation error: instruction %zu: %s\n", ctx->instruction_idx + 1, buf.buffer); - } + case VKD3D_SHADER_FOG_FRAGMENT_LINEAR: + /* We generate the following code: + * + * add sr0, FOG_END, -vFOG.x + * mul_sat srFACTOR, sr0, FOG_SCALE + */ + if (!shader_instruction_array_insert_at(&program->instructions, pos, 4)) + return VKD3D_ERROR_OUT_OF_MEMORY; + *ret_pos = pos + 4; + + ssa_temp = program->ssa_count++; + + ins = &program->instructions.elements[pos]; + + vsir_instruction_init_with_params(program, ins, &loc, VKD3DSIH_ADD, 1, 2); + dst_param_init_ssa_float(&ins->dst[0], ssa_temp); + src_param_init_parameter(&ins->src[0], VKD3D_SHADER_PARAMETER_NAME_FOG_END, VKD3D_DATA_FLOAT); + vsir_src_param_init(&ins->src[1], VKD3DSPR_INPUT, VKD3D_DATA_FLOAT, 1); + ins->src[1].reg.idx[0].offset = fog_signature_idx; + ins->src[1].reg.dimension = VSIR_DIMENSION_VEC4; + ins->src[1].swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X); + ins->src[1].modifiers = VKD3DSPSM_NEG; + + vsir_instruction_init_with_params(program, ++ins, &loc, VKD3DSIH_MUL, 1, 2); + dst_param_init_ssa_float(&ins->dst[0], ssa_factor); + ins->dst[0].modifiers = VKD3DSPDM_SATURATE; + src_param_init_ssa_float(&ins->src[0], ssa_temp); + src_param_init_parameter(&ins->src[1], VKD3D_SHADER_PARAMETER_NAME_FOG_SCALE, VKD3D_DATA_FLOAT); + break; - vkd3d_string_buffer_cleanup(&buf); + case VKD3D_SHADER_FOG_FRAGMENT_EXP: + /* We generate the following code: + * + * mul sr0, FOG_SCALE, vFOG.x + * exp_sat srFACTOR, -sr0 + */ + if (!shader_instruction_array_insert_at(&program->instructions, pos, 4)) + return VKD3D_ERROR_OUT_OF_MEMORY; + *ret_pos = pos + 4; - if (!ctx->status) - ctx->status = VKD3D_ERROR_INVALID_SHADER; -} + ssa_temp = program->ssa_count++; -static void vsir_validate_register_without_indices(struct validation_context *ctx, - const struct vkd3d_shader_register *reg) -{ - if (reg->idx_count != 0) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, - "Invalid index count %u for a register of type %#x.", - reg->idx_count, reg->type); -} + ins = &program->instructions.elements[pos]; -static void vsir_validate_io_register(struct validation_context *ctx, - const struct vkd3d_shader_register *reg) -{ - const struct shader_signature *signature; - bool has_control_point = false; + vsir_instruction_init_with_params(program, ins, &loc, VKD3DSIH_MUL, 1, 2); + dst_param_init_ssa_float(&ins->dst[0], ssa_temp); + src_param_init_parameter(&ins->src[0], VKD3D_SHADER_PARAMETER_NAME_FOG_SCALE, VKD3D_DATA_FLOAT); + vsir_src_param_init(&ins->src[1], VKD3DSPR_INPUT, VKD3D_DATA_FLOAT, 1); + ins->src[1].reg.idx[0].offset = fog_signature_idx; + ins->src[1].reg.dimension = VSIR_DIMENSION_VEC4; + ins->src[1].swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X); - switch (reg->type) - { - case VKD3DSPR_INPUT: - signature = &ctx->program->input_signature; + vsir_instruction_init_with_params(program, ++ins, &loc, VKD3DSIH_EXP, 1, 1); + dst_param_init_ssa_float(&ins->dst[0], ssa_factor); + ins->dst[0].modifiers = VKD3DSPDM_SATURATE; + src_param_init_ssa_float(&ins->src[0], ssa_temp); + ins->src[0].modifiers = VKD3DSPSM_NEG; + break; - switch (ctx->program->shader_version.type) - { - case VKD3D_SHADER_TYPE_GEOMETRY: - case VKD3D_SHADER_TYPE_HULL: - case VKD3D_SHADER_TYPE_DOMAIN: - has_control_point = true; - break; - - default: - break; - } + case VKD3D_SHADER_FOG_FRAGMENT_EXP2: + /* We generate the following code: + * + * mul sr0, FOG_SCALE, vFOG.x + * mul sr1, sr0, sr0 + * exp_sat srFACTOR, -sr1 + */ + if (!shader_instruction_array_insert_at(&program->instructions, pos, 5)) + return VKD3D_ERROR_OUT_OF_MEMORY; + *ret_pos = pos + 5; + + ssa_temp = program->ssa_count++; + ssa_temp2 = program->ssa_count++; + + ins = &program->instructions.elements[pos]; + + vsir_instruction_init_with_params(program, ins, &loc, VKD3DSIH_MUL, 1, 2); + dst_param_init_ssa_float(&ins->dst[0], ssa_temp); + src_param_init_parameter(&ins->src[0], VKD3D_SHADER_PARAMETER_NAME_FOG_SCALE, VKD3D_DATA_FLOAT); + vsir_src_param_init(&ins->src[1], VKD3DSPR_INPUT, VKD3D_DATA_FLOAT, 1); + ins->src[1].reg.idx[0].offset = fog_signature_idx; + ins->src[1].reg.dimension = VSIR_DIMENSION_VEC4; + ins->src[1].swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X); + + vsir_instruction_init_with_params(program, ++ins, &loc, VKD3DSIH_MUL, 1, 2); + dst_param_init_ssa_float(&ins->dst[0], ssa_temp2); + src_param_init_ssa_float(&ins->src[0], ssa_temp); + src_param_init_ssa_float(&ins->src[1], ssa_temp); + + vsir_instruction_init_with_params(program, ++ins, &loc, VKD3DSIH_EXP, 1, 1); + dst_param_init_ssa_float(&ins->dst[0], ssa_factor); + ins->dst[0].modifiers = VKD3DSPDM_SATURATE; + src_param_init_ssa_float(&ins->src[0], ssa_temp2); + ins->src[0].modifiers = VKD3DSPSM_NEG; break; - case VKD3DSPR_OUTPUT: - switch (ctx->program->shader_version.type) + default: + vkd3d_unreachable(); + } + + /* We generate the following code: + * + * add sr0, FRAG_COLOUR, -FOG_COLOUR + * mad oC0, sr0, srFACTOR, FOG_COLOUR + */ + + vsir_instruction_init_with_params(program, ++ins, &loc, VKD3DSIH_ADD, 1, 2); + dst_param_init_ssa_float4(&ins->dst[0], program->ssa_count++); + src_param_init_temp_float4(&ins->src[0], colour_temp); + src_param_init_parameter_vec4(&ins->src[1], VKD3D_SHADER_PARAMETER_NAME_FOG_COLOUR, VKD3D_DATA_FLOAT); + ins->src[1].modifiers = VKD3DSPSM_NEG; + + vsir_instruction_init_with_params(program, ++ins, &loc, VKD3DSIH_MAD, 1, 3); + dst_param_init_output(&ins->dst[0], VKD3D_DATA_FLOAT, colour_signature_idx, + program->output_signature.elements[colour_signature_idx].mask); + src_param_init_ssa_float4(&ins->src[0], program->ssa_count - 1); + src_param_init_ssa_float(&ins->src[1], ssa_factor); + src_param_init_parameter_vec4(&ins->src[2], VKD3D_SHADER_PARAMETER_NAME_FOG_COLOUR, VKD3D_DATA_FLOAT); + + return VKD3D_OK; +} + +static enum vkd3d_result vsir_program_insert_fragment_fog(struct vsir_program *program, + struct vsir_transformation_context *ctx) +{ + struct vkd3d_shader_message_context *message_context = ctx->message_context; + uint32_t colour_signature_idx, fog_signature_idx, colour_temp; + const struct vkd3d_shader_parameter1 *mode_parameter = NULL; + static const struct vkd3d_shader_location no_loc; + const struct signature_element *fog_element; + enum vkd3d_shader_fog_fragment_mode mode; + struct vkd3d_shader_instruction *ins; + size_t new_pos; + int ret; + + if (program->shader_version.type != VKD3D_SHADER_TYPE_PIXEL) + return VKD3D_OK; + + if (!vsir_signature_find_sysval(&program->output_signature, VKD3D_SHADER_SV_TARGET, 0, &colour_signature_idx)) + return VKD3D_OK; + + if (!(mode_parameter = vsir_program_get_parameter(program, VKD3D_SHADER_PARAMETER_NAME_FOG_FRAGMENT_MODE))) + return VKD3D_OK; + + if (mode_parameter->type != VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT) + { + vkd3d_shader_error(message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, + "Unsupported fog fragment mode parameter type %#x.", mode_parameter->type); + return VKD3D_ERROR_NOT_IMPLEMENTED; + } + if (mode_parameter->data_type != VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32) + { + vkd3d_shader_error(message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, + "Invalid fog fragment mode parameter data type %#x.", mode_parameter->data_type); + return VKD3D_ERROR_INVALID_ARGUMENT; + } + mode = mode_parameter->u.immediate_constant.u.u32; + + if (mode == VKD3D_SHADER_FOG_FRAGMENT_NONE) + return VKD3D_OK; + + /* Should have been added by vsir_program_add_fog_input(). */ + if (!(fog_element = vsir_signature_find_element_by_name(&program->input_signature, "FOG", 0))) + { + ERR("Fog input not found.\n"); + return VKD3D_ERROR_INVALID_SHADER; + } + fog_signature_idx = fog_element - program->input_signature.elements; + + /* We're going to be reading from the output, so we need to go + * through the whole shader and convert it to a temp. */ + colour_temp = program->temp_count++; + + for (size_t i = 0; i < program->instructions.count; ++i) + { + ins = &program->instructions.elements[i]; + + if (vsir_instruction_is_dcl(ins)) + continue; + + if (ins->opcode == VKD3DSIH_RET) + { + if ((ret = insert_fragment_fog_before_ret(program, ins, mode, fog_signature_idx, + colour_signature_idx, colour_temp, &new_pos, message_context)) < 0) + return ret; + i = new_pos; + continue; + } + + for (size_t j = 0; j < ins->dst_count; ++j) + { + struct vkd3d_shader_dst_param *dst = &ins->dst[j]; + + /* Note we run after I/O normalization. */ + if (dst->reg.type == VKD3DSPR_OUTPUT && dst->reg.idx[0].offset == colour_signature_idx) { - case VKD3D_SHADER_TYPE_HULL: - if (ctx->phase == VKD3DSIH_HS_CONTROL_POINT_PHASE - || ctx->program->normalisation_level >= VSIR_FULLY_NORMALISED_IO) - { - signature = &ctx->program->output_signature; - has_control_point = ctx->program->normalisation_level >= VSIR_NORMALISED_HULL_CONTROL_POINT_IO; - } - else - { - signature = &ctx->program->patch_constant_signature; - } - break; + dst->reg.type = VKD3DSPR_TEMP; + dst->reg.idx[0].offset = colour_temp; + } + } + } - default: - signature = &ctx->program->output_signature; - break; + return VKD3D_OK; +} + +static enum vkd3d_result vsir_program_add_fog_output(struct vsir_program *program, + struct vsir_transformation_context *ctx) +{ + struct shader_signature *signature = &program->output_signature; + const struct vkd3d_shader_parameter1 *source_parameter; + uint32_t register_idx = 0; + + if (!is_pre_rasterization_shader(program->shader_version.type)) + return VKD3D_OK; + + if (!(source_parameter = vsir_program_get_parameter(program, VKD3D_SHADER_PARAMETER_NAME_FOG_SOURCE))) + return VKD3D_OK; + + if (source_parameter->type == VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT) + { + enum vkd3d_shader_fog_source source = source_parameter->u.immediate_constant.u.u32; + + if (source == VKD3D_SHADER_FOG_SOURCE_FOG) + return VKD3D_OK; + + if (source == VKD3D_SHADER_FOG_SOURCE_FOG_OR_SPECULAR_W + && !vsir_signature_find_element_by_name(signature, "COLOR", 1)) + return VKD3D_OK; + } + + if (vsir_signature_find_element_by_name(signature, "FOG", 0)) + return VKD3D_OK; + + for (unsigned int i = 0; i < signature->element_count; ++i) + register_idx = max(register_idx, signature->elements[i].register_index + 1); + + if (!add_signature_element(signature, "FOG", 0, VKD3DSP_WRITEMASK_0, register_idx, VKD3DSIM_LINEAR)) + return VKD3D_ERROR_OUT_OF_MEMORY; + return VKD3D_OK; +} + +static enum vkd3d_result insert_vertex_fog_before_ret(struct vsir_program *program, + const struct vkd3d_shader_instruction *ret, enum vkd3d_shader_fog_source source, uint32_t temp, + uint32_t fog_signature_idx, uint32_t source_signature_idx, size_t *ret_pos) +{ + const struct signature_element *e = &program->output_signature.elements[source_signature_idx]; + struct vkd3d_shader_instruction_array *instructions = &program->instructions; + size_t pos = ret - instructions->elements; + struct vkd3d_shader_instruction *ins; + + if (!shader_instruction_array_insert_at(&program->instructions, pos, 2)) + return VKD3D_ERROR_OUT_OF_MEMORY; + + ins = &program->instructions.elements[pos]; + + /* Write the fog output. */ + vsir_instruction_init_with_params(program, ins, &ret->location, VKD3DSIH_MOV, 1, 1); + dst_param_init_output(&ins->dst[0], VKD3D_DATA_FLOAT, fog_signature_idx, 0x1); + src_param_init_temp_float4(&ins->src[0], temp); + if (source == VKD3D_SHADER_FOG_SOURCE_Z) + ins->src[0].swizzle = VKD3D_SHADER_SWIZZLE(Z, Z, Z, Z); + else /* Position or specular W. */ + ins->src[0].swizzle = VKD3D_SHADER_SWIZZLE(W, W, W, W); + ++ins; + + /* Write the position or specular output. */ + vsir_instruction_init_with_params(program, ins, &ret->location, VKD3DSIH_MOV, 1, 1); + dst_param_init_output(&ins->dst[0], vkd3d_data_type_from_component_type(e->component_type), + source_signature_idx, e->mask); + src_param_init_temp_float4(&ins->src[0], temp); + ++ins; + + *ret_pos = pos + 2; + return VKD3D_OK; +} + +static enum vkd3d_result vsir_program_insert_vertex_fog(struct vsir_program *program, + struct vsir_transformation_context *ctx) +{ + struct vkd3d_shader_message_context *message_context = ctx->message_context; + const struct vkd3d_shader_parameter1 *source_parameter = NULL; + uint32_t fog_signature_idx, source_signature_idx, temp; + static const struct vkd3d_shader_location no_loc; + enum vkd3d_shader_fog_source source; + const struct signature_element *e; + + if (!is_pre_rasterization_shader(program->shader_version.type)) + return VKD3D_OK; + + if (!(source_parameter = vsir_program_get_parameter(program, VKD3D_SHADER_PARAMETER_NAME_FOG_SOURCE))) + return VKD3D_OK; + + if (source_parameter->type != VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT) + { + vkd3d_shader_error(message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, + "Unsupported fog source parameter type %#x.", source_parameter->type); + return VKD3D_ERROR_NOT_IMPLEMENTED; + } + if (source_parameter->data_type != VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32) + { + vkd3d_shader_error(message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, + "Invalid fog source parameter data type %#x.", source_parameter->data_type); + return VKD3D_ERROR_INVALID_ARGUMENT; + } + source = source_parameter->u.immediate_constant.u.u32; + + TRACE("Fog source %#x.\n", source); + + if (source == VKD3D_SHADER_FOG_SOURCE_FOG) + return VKD3D_OK; + + if (source == VKD3D_SHADER_FOG_SOURCE_FOG_OR_SPECULAR_W) + { + if (program->has_fog || !(e = vsir_signature_find_element_by_name(&program->output_signature, "COLOR", 1))) + return VKD3D_OK; + source_signature_idx = e - program->output_signature.elements; + } + else + { + if (!vsir_signature_find_sysval(&program->output_signature, + VKD3D_SHADER_SV_POSITION, 0, &source_signature_idx)) + { + vkd3d_shader_error(ctx->message_context, &no_loc, + VKD3D_SHADER_ERROR_VSIR_MISSING_SEMANTIC, "Shader does not write position."); + return VKD3D_ERROR_INVALID_SHADER; + } + } + + if (!(e = vsir_signature_find_element_by_name(&program->output_signature, "FOG", 0))) + { + ERR("Fog output not found.\n"); + return VKD3D_ERROR_INVALID_SHADER; + } + fog_signature_idx = e - program->output_signature.elements; + + temp = program->temp_count++; + + /* Insert a fog write before each ret, and convert either specular or + * position output to a temp. */ + for (size_t i = 0; i < program->instructions.count; ++i) + { + struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; + + if (vsir_instruction_is_dcl(ins)) + continue; + + if (ins->opcode == VKD3DSIH_RET) + { + size_t new_pos; + int ret; + + if ((ret = insert_vertex_fog_before_ret(program, ins, source, temp, + fog_signature_idx, source_signature_idx, &new_pos)) < 0) + return ret; + i = new_pos; + continue; + } + + for (size_t j = 0; j < ins->dst_count; ++j) + { + struct vkd3d_shader_dst_param *dst = &ins->dst[j]; + + /* Note we run after I/O normalization. */ + if (dst->reg.type == VKD3DSPR_OUTPUT && dst->reg.idx[0].offset == source_signature_idx) + { + dst->reg.type = VKD3DSPR_TEMP; + dst->reg.idx[0].offset = temp; } - break; + } + } - case VKD3DSPR_INCONTROLPOINT: - signature = &ctx->program->input_signature; - has_control_point = true; - break; + program->has_fog = true; - case VKD3DSPR_OUTCONTROLPOINT: - signature = &ctx->program->output_signature; - has_control_point = true; - break; + return VKD3D_OK; +} - case VKD3DSPR_PATCHCONST: - signature = &ctx->program->patch_constant_signature; - break; +struct validation_context +{ + struct vkd3d_shader_message_context *message_context; + const struct vsir_program *program; + size_t instruction_idx; + struct vkd3d_shader_location null_location; + bool invalid_instruction_idx; + enum vkd3d_result status; + bool dcl_temps_found; + enum vkd3d_shader_opcode phase; + bool inside_block; + + struct validation_context_temp_data + { + enum vsir_dimension dimension; + size_t first_seen; + } *temps; + + struct validation_context_ssa_data + { + enum vsir_dimension dimension; + enum vkd3d_data_type data_type; + size_t first_seen; + uint32_t write_mask; + uint32_t read_mask; + size_t first_assigned; + } *ssas; + + enum vkd3d_shader_opcode *blocks; + size_t depth; + size_t blocks_capacity; + + unsigned int outer_tess_idxs[4]; + unsigned int inner_tess_idxs[2]; + + struct validation_context_signature_data + { + struct validation_context_signature_stream_data + { + struct validation_context_signature_register_data + { + struct validation_context_signature_component_data + { + const struct signature_element *element; + } components[VKD3D_VEC4_SIZE]; + } registers[MAX_REG_OUTPUT]; + } streams[VKD3D_MAX_STREAM_COUNT]; + } input_signature_data, output_signature_data, patch_constant_signature_data; +}; + +static void VKD3D_PRINTF_FUNC(3, 4) validator_error(struct validation_context *ctx, + enum vkd3d_shader_error error, const char *format, ...) +{ + struct vkd3d_string_buffer buf; + va_list args; + + vkd3d_string_buffer_init(&buf); + + va_start(args, format); + vkd3d_string_buffer_vprintf(&buf, format, args); + va_end(args); + + if (ctx->invalid_instruction_idx) + { + vkd3d_shader_error(ctx->message_context, &ctx->null_location, error, "%s", buf.buffer); + WARN("VSIR validation error: %s\n", buf.buffer); + } + else + { + const struct vkd3d_shader_instruction *ins = &ctx->program->instructions.elements[ctx->instruction_idx]; + vkd3d_shader_error(ctx->message_context, &ins->location, error, + "instruction %zu: %s", ctx->instruction_idx + 1, buf.buffer); + WARN("VSIR validation error: instruction %zu: %s\n", ctx->instruction_idx + 1, buf.buffer); + } + + vkd3d_string_buffer_cleanup(&buf); + + if (!ctx->status) + ctx->status = VKD3D_ERROR_INVALID_SHADER; +} + +static void vsir_validate_register_without_indices(struct validation_context *ctx, + const struct vkd3d_shader_register *reg) +{ + if (reg->idx_count != 0) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, + "Invalid index count %u for a register of type %#x.", + reg->idx_count, reg->type); +} + +enum vsir_signature_type +{ + SIGNATURE_TYPE_INPUT, + SIGNATURE_TYPE_OUTPUT, + SIGNATURE_TYPE_PATCH_CONSTANT, +}; + +enum vsir_io_reg_type +{ + REG_V, + REG_O, + REG_VPC, + REG_VICP, + REG_VOCP, + REG_COUNT, +}; + +enum vsir_phase +{ + PHASE_NONE, + PHASE_CONTROL_POINT, + PHASE_FORK, + PHASE_JOIN, + PHASE_COUNT, +}; + +struct vsir_io_register_data +{ + unsigned int flags; + enum vsir_signature_type signature_type; + const struct shader_signature *signature; + unsigned int control_point_count; +}; + +enum +{ + INPUT_BIT = (1u << 0), + OUTPUT_BIT = (1u << 1), + CONTROL_POINT_BIT = (1u << 2), +}; + +static const struct vsir_io_register_data vsir_sm4_io_register_data + [VKD3D_SHADER_TYPE_GRAPHICS_COUNT][PHASE_COUNT][REG_COUNT] = +{ + [VKD3D_SHADER_TYPE_PIXEL][PHASE_NONE] = + { + [REG_V] = {INPUT_BIT, SIGNATURE_TYPE_INPUT}, + [REG_O] = {OUTPUT_BIT, SIGNATURE_TYPE_OUTPUT}, + }, + [VKD3D_SHADER_TYPE_VERTEX][PHASE_NONE] = + { + [REG_V] = {INPUT_BIT, SIGNATURE_TYPE_INPUT}, + [REG_O] = {OUTPUT_BIT, SIGNATURE_TYPE_OUTPUT}, + }, + [VKD3D_SHADER_TYPE_GEOMETRY][PHASE_NONE] = + { + [REG_V] = {INPUT_BIT | CONTROL_POINT_BIT, SIGNATURE_TYPE_INPUT}, + [REG_O] = {OUTPUT_BIT, SIGNATURE_TYPE_OUTPUT}, + }, + [VKD3D_SHADER_TYPE_HULL][PHASE_CONTROL_POINT] = + { + [REG_V] = {INPUT_BIT | CONTROL_POINT_BIT, SIGNATURE_TYPE_INPUT}, + [REG_O] = {OUTPUT_BIT, SIGNATURE_TYPE_OUTPUT}, + }, + [VKD3D_SHADER_TYPE_HULL][PHASE_FORK] = + { + [REG_VICP] = {INPUT_BIT | CONTROL_POINT_BIT, SIGNATURE_TYPE_INPUT}, + [REG_VOCP] = {INPUT_BIT | CONTROL_POINT_BIT, SIGNATURE_TYPE_OUTPUT}, + /* According to MSDN, vpc is not allowed in fork phases. However we + * don't really distinguish between fork and join phases, so we + * allow it. */ + [REG_VPC] = {INPUT_BIT, SIGNATURE_TYPE_PATCH_CONSTANT}, + [REG_O] = {OUTPUT_BIT, SIGNATURE_TYPE_PATCH_CONSTANT}, + }, + [VKD3D_SHADER_TYPE_HULL][PHASE_JOIN] = + { + [REG_VICP] = {INPUT_BIT | CONTROL_POINT_BIT, SIGNATURE_TYPE_INPUT}, + [REG_VOCP] = {INPUT_BIT | CONTROL_POINT_BIT, SIGNATURE_TYPE_OUTPUT}, + [REG_VPC] = {INPUT_BIT, SIGNATURE_TYPE_PATCH_CONSTANT}, + [REG_O] = {OUTPUT_BIT, SIGNATURE_TYPE_PATCH_CONSTANT}, + }, + [VKD3D_SHADER_TYPE_DOMAIN][PHASE_NONE] = + { + [REG_VICP] = {INPUT_BIT | CONTROL_POINT_BIT, SIGNATURE_TYPE_INPUT}, + [REG_VPC] = {INPUT_BIT, SIGNATURE_TYPE_PATCH_CONSTANT}, + [REG_O] = {OUTPUT_BIT, SIGNATURE_TYPE_OUTPUT}, + }, +}; + +static const struct vsir_io_register_data vsir_sm6_io_register_data + [VKD3D_SHADER_TYPE_GRAPHICS_COUNT][PHASE_COUNT][REG_COUNT] = +{ + [VKD3D_SHADER_TYPE_PIXEL][PHASE_NONE] = + { + [REG_V] = {INPUT_BIT, SIGNATURE_TYPE_INPUT}, + [REG_O] = {OUTPUT_BIT, SIGNATURE_TYPE_OUTPUT}, + }, + [VKD3D_SHADER_TYPE_VERTEX][PHASE_NONE] = + { + [REG_V] = {INPUT_BIT, SIGNATURE_TYPE_INPUT}, + [REG_O] = {OUTPUT_BIT, SIGNATURE_TYPE_OUTPUT}, + }, + [VKD3D_SHADER_TYPE_GEOMETRY][PHASE_NONE] = + { + [REG_V] = {INPUT_BIT | CONTROL_POINT_BIT, SIGNATURE_TYPE_INPUT}, + [REG_O] = {OUTPUT_BIT, SIGNATURE_TYPE_OUTPUT}, + }, + [VKD3D_SHADER_TYPE_HULL][PHASE_CONTROL_POINT] = + { + [REG_V] = {INPUT_BIT | CONTROL_POINT_BIT, SIGNATURE_TYPE_INPUT}, + [REG_O] = {OUTPUT_BIT | CONTROL_POINT_BIT, SIGNATURE_TYPE_OUTPUT}, + }, + [VKD3D_SHADER_TYPE_HULL][PHASE_FORK] = + { + [REG_V] = {INPUT_BIT | CONTROL_POINT_BIT, SIGNATURE_TYPE_INPUT}, + [REG_O] = {INPUT_BIT | CONTROL_POINT_BIT, SIGNATURE_TYPE_OUTPUT}, + [REG_VPC] = {INPUT_BIT | OUTPUT_BIT, SIGNATURE_TYPE_PATCH_CONSTANT}, + }, + [VKD3D_SHADER_TYPE_HULL][PHASE_JOIN] = + { + [REG_V] = {INPUT_BIT | CONTROL_POINT_BIT, SIGNATURE_TYPE_INPUT}, + [REG_O] = {INPUT_BIT | CONTROL_POINT_BIT, SIGNATURE_TYPE_OUTPUT}, + [REG_VPC] = {INPUT_BIT | OUTPUT_BIT, SIGNATURE_TYPE_PATCH_CONSTANT}, + }, + [VKD3D_SHADER_TYPE_DOMAIN][PHASE_NONE] = + { + [REG_V] = {INPUT_BIT | CONTROL_POINT_BIT, SIGNATURE_TYPE_INPUT}, + [REG_VPC] = {INPUT_BIT, SIGNATURE_TYPE_PATCH_CONSTANT}, + [REG_O] = {OUTPUT_BIT, SIGNATURE_TYPE_OUTPUT}, + }, +}; + +static const bool vsir_get_io_register_data(struct validation_context *ctx, + enum vkd3d_shader_register_type register_type, struct vsir_io_register_data *data) +{ + const struct vsir_io_register_data (*signature_register_data) + [VKD3D_SHADER_TYPE_GRAPHICS_COUNT][PHASE_COUNT][REG_COUNT]; + enum vsir_io_reg_type io_reg_type; + enum vsir_phase phase; + + if (ctx->program->shader_version.type >= ARRAY_SIZE(*signature_register_data)) + return NULL; + + if (ctx->program->normalisation_level >= VSIR_NORMALISED_SM6) + signature_register_data = &vsir_sm6_io_register_data; + else + signature_register_data = &vsir_sm4_io_register_data; + + switch (register_type) + { + case VKD3DSPR_INPUT: io_reg_type = REG_V; break; + case VKD3DSPR_OUTPUT: io_reg_type = REG_O; break; + case VKD3DSPR_INCONTROLPOINT: io_reg_type = REG_VICP; break; + case VKD3DSPR_OUTCONTROLPOINT: io_reg_type = REG_VOCP; break; + case VKD3DSPR_PATCHCONST: io_reg_type = REG_VPC; break; + + default: + return NULL; + } + + switch (ctx->phase) + { + case VKD3DSIH_HS_CONTROL_POINT_PHASE: phase = PHASE_CONTROL_POINT; break; + case VKD3DSIH_HS_FORK_PHASE: phase = PHASE_FORK; break; + case VKD3DSIH_HS_JOIN_PHASE: phase = PHASE_JOIN; break; + case VKD3DSIH_INVALID: phase = PHASE_NONE; break; + + default: + vkd3d_unreachable(); + } + + *data = (*signature_register_data)[ctx->program->shader_version.type][phase][io_reg_type]; + + if (!(data->flags & (INPUT_BIT | OUTPUT_BIT))) + return false; + + /* VSIR_NORMALISED_HULL_CONTROL_POINT_IO differs from VSIR_NORMALISED_SM4 + * for just a single flag. So we don't keep a whole copy of it, but just + * patch SM4 when needed. */ + if (ctx->program->normalisation_level == VSIR_NORMALISED_HULL_CONTROL_POINT_IO + && ctx->program->shader_version.type == VKD3D_SHADER_TYPE_HULL + && phase == PHASE_CONTROL_POINT && io_reg_type == REG_O) + { + VKD3D_ASSERT(!(data->flags & CONTROL_POINT_BIT)); + data->flags |= CONTROL_POINT_BIT; + } + + switch (data->signature_type) + { + case SIGNATURE_TYPE_INPUT: + data->signature = &ctx->program->input_signature; + data->control_point_count = ctx->program->input_control_point_count; + return true; + + case SIGNATURE_TYPE_OUTPUT: + data->signature = &ctx->program->output_signature; + data->control_point_count = ctx->program->output_control_point_count; + return true; + + case SIGNATURE_TYPE_PATCH_CONSTANT: + data->signature = &ctx->program->patch_constant_signature; + return true; default: vkd3d_unreachable(); } +} + +static void vsir_validate_io_register(struct validation_context *ctx, const struct vkd3d_shader_register *reg) +{ + unsigned int control_point_index, control_point_count; + const struct shader_signature *signature; + struct vsir_io_register_data io_reg_data; + bool has_control_point; + + if (!vsir_get_io_register_data(ctx, reg->type, &io_reg_data)) + { + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, + "Invalid usage of register type %#x.", reg->type); + return; + } + + signature = io_reg_data.signature; + has_control_point = io_reg_data.flags & CONTROL_POINT_BIT; + control_point_count = io_reg_data.control_point_count; - if (ctx->program->normalisation_level < VSIR_FULLY_NORMALISED_IO) + if (ctx->program->normalisation_level < VSIR_NORMALISED_SM6) { /* Indices are [register] or [control point, register]. Both are * allowed to have a relative address. */ unsigned int expected_idx_count = 1 + !!has_control_point; + control_point_index = 0; + if (reg->idx_count != expected_idx_count) { validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, @@ -6795,7 +7432,7 @@ static void vsir_validate_io_register(struct validation_context *ctx, /* If the signature element is not an array, indices are * [signature] or [control point, signature]. If the signature * element is an array, indices are [array, signature] or - * [control point, array, signature]. In any case `signature' is + * [array, control point, signature]. In any case `signature' is * not allowed to have a relative address, while the others are. */ if (reg->idx_count < 1) @@ -6829,6 +7466,7 @@ static void vsir_validate_io_register(struct validation_context *ctx, is_array = true; expected_idx_count = 1 + !!has_control_point + !!is_array; + control_point_index = !!is_array; if (reg->idx_count != expected_idx_count) { @@ -6837,7 +7475,18 @@ static void vsir_validate_io_register(struct validation_context *ctx, reg->idx_count, reg->type); return; } + + if (is_array && !reg->idx[0].rel_addr && reg->idx[0].offset >= element->register_count) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, + "Array index %u exceeds the signature element register count %u in a register of type %#x.", + reg->idx[0].offset, element->register_count, reg->type); } + + if (has_control_point && !reg->idx[control_point_index].rel_addr + && reg->idx[control_point_index].offset >= control_point_count) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, + "Control point index %u exceeds the control point count %u in a register of type %#x.", + reg->idx[control_point_index].offset, control_point_count, reg->type); } static void vsir_validate_temp_register(struct validation_context *ctx, @@ -7143,8 +7792,26 @@ static void vsir_validate_register(struct validation_context *ctx, for (i = 0; i < min(reg->idx_count, ARRAY_SIZE(reg->idx)); ++i) { const struct vkd3d_shader_src_param *param = reg->idx[i].rel_addr; - if (reg->idx[i].rel_addr) + if (param) + { vsir_validate_src_param(ctx, param); + + switch (param->reg.type) + { + case VKD3DSPR_TEMP: + case VKD3DSPR_SSA: + case VKD3DSPR_ADDR: + case VKD3DSPR_LOOP: + case VKD3DSPR_OUTPOINTID: + break; + + default: + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, + "Invalid register type %#x for a relative address parameter.", + param->reg.type); + break; + } + } } switch (reg->type) @@ -7185,6 +7852,10 @@ static void vsir_validate_register(struct validation_context *ctx, vsir_validate_register_without_indices(ctx, reg); break; + case VKD3DSPR_PRIMID: + vsir_validate_register_without_indices(ctx, reg); + break; + case VKD3DSPR_NULL: vsir_validate_register_without_indices(ctx, reg); break; @@ -7201,6 +7872,18 @@ static void vsir_validate_register(struct validation_context *ctx, vsir_validate_uav_register(ctx, reg); break; + case VKD3DSPR_OUTPOINTID: + vsir_validate_register_without_indices(ctx, reg); + break; + + case VKD3DSPR_FORKINSTID: + vsir_validate_register_without_indices(ctx, reg); + break; + + case VKD3DSPR_JOININSTID: + vsir_validate_register_without_indices(ctx, reg); + break; + case VKD3DSPR_INCONTROLPOINT: vsir_validate_io_register(ctx, reg); break; @@ -7213,6 +7896,38 @@ static void vsir_validate_register(struct validation_context *ctx, vsir_validate_io_register(ctx, reg); break; + case VKD3DSPR_TESSCOORD: + vsir_validate_register_without_indices(ctx, reg); + break; + + case VKD3DSPR_THREADID: + vsir_validate_register_without_indices(ctx, reg); + break; + + case VKD3DSPR_THREADGROUPID: + vsir_validate_register_without_indices(ctx, reg); + break; + + case VKD3DSPR_LOCALTHREADID: + vsir_validate_register_without_indices(ctx, reg); + break; + + case VKD3DSPR_LOCALTHREADINDEX: + vsir_validate_register_without_indices(ctx, reg); + break; + + case VKD3DSPR_COVERAGE: + vsir_validate_register_without_indices(ctx, reg); + break; + + case VKD3DSPR_SAMPLEMASK: + vsir_validate_register_without_indices(ctx, reg); + break; + + case VKD3DSPR_GSINSTID: + vsir_validate_register_without_indices(ctx, reg); + break; + case VKD3DSPR_DEPTHOUTGE: vsir_validate_register_without_indices(ctx, reg); break; @@ -7221,15 +7936,37 @@ static void vsir_validate_register(struct validation_context *ctx, vsir_validate_register_without_indices(ctx, reg); break; + case VKD3DSPR_OUTSTENCILREF: + vsir_validate_register_without_indices(ctx, reg); + break; + case VKD3DSPR_SSA: vsir_validate_ssa_register(ctx, reg); break; + case VKD3DSPR_WAVELANECOUNT: + vsir_validate_register_without_indices(ctx, reg); + break; + + case VKD3DSPR_WAVELANEINDEX: + vsir_validate_register_without_indices(ctx, reg); + break; + default: break; } } +static void vsir_validate_io_dst_param(struct validation_context *ctx, + const struct vkd3d_shader_dst_param *dst) +{ + struct vsir_io_register_data io_reg_data; + + if (!vsir_get_io_register_data(ctx, dst->reg.type, &io_reg_data) || !(io_reg_data.flags & OUTPUT_BIT)) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, + "Invalid register type %#x used as destination parameter.", dst->reg.type); +} + static void vsir_validate_dst_param(struct validation_context *ctx, const struct vkd3d_shader_dst_param *dst) { @@ -7304,15 +8041,28 @@ static void vsir_validate_dst_param(struct validation_context *ctx, case VKD3DSPR_IMMCONST64: case VKD3DSPR_SAMPLER: case VKD3DSPR_RESOURCE: - case VKD3DSPR_INPUT: validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, "Invalid %#x register used as destination parameter.", dst->reg.type); break; + case VKD3DSPR_INPUT: + vsir_validate_io_dst_param(ctx, dst); + break; + + case VKD3DSPR_OUTPUT: + vsir_validate_io_dst_param(ctx, dst); + break; + + case VKD3DSPR_INCONTROLPOINT: + vsir_validate_io_dst_param(ctx, dst); + break; + + case VKD3DSPR_OUTCONTROLPOINT: + vsir_validate_io_dst_param(ctx, dst); + break; + case VKD3DSPR_PATCHCONST: - if (ctx->program->shader_version.type != VKD3D_SHADER_TYPE_HULL) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, - "PATCHCONST register used as destination parameters are only allowed in Hull Shaders."); + vsir_validate_io_dst_param(ctx, dst); break; default: @@ -7420,13 +8170,6 @@ static bool vsir_validate_src_max_count(struct validation_context *ctx, return true; } -enum vsir_signature_type -{ - SIGNATURE_TYPE_INPUT, - SIGNATURE_TYPE_OUTPUT, - SIGNATURE_TYPE_PATCH_CONSTANT, -}; - static const char * const signature_type_names[] = { [SIGNATURE_TYPE_INPUT] = "input", @@ -7466,17 +8209,32 @@ sysval_validation_data[] = }; static void vsir_validate_signature_element(struct validation_context *ctx, - const struct shader_signature *signature, enum vsir_signature_type signature_type, - unsigned int idx) + const struct shader_signature *signature, struct validation_context_signature_data *signature_data, + enum vsir_signature_type signature_type, unsigned int idx) { + enum vkd3d_tessellator_domain expected_tess_domain = VKD3D_TESSELLATOR_DOMAIN_INVALID; + bool integer_type = false, is_outer = false, is_gs_output, require_index = true; const char *signature_type_name = signature_type_names[signature_type]; const struct signature_element *element = &signature->elements[idx]; - bool integer_type = false; + unsigned int semantic_index_max = 0, i, j; if (element->register_count == 0) validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, "element %u of %s signature: Invalid zero register count.", idx, signature_type_name); + if (element->register_index != UINT_MAX && (element->register_index >= MAX_REG_OUTPUT + || MAX_REG_OUTPUT - element->register_index < element->register_count)) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, + "element %u of %s signature: Invalid register index %u and count %u.", + idx, signature_type_name, element->register_index, element->register_count); + + is_gs_output = ctx->program->shader_version.type == VKD3D_SHADER_TYPE_GEOMETRY + && signature_type == SIGNATURE_TYPE_OUTPUT; + if (element->stream_index >= VKD3D_MAX_STREAM_COUNT || (element->stream_index != 0 && !is_gs_output)) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, + "element %u of %s signature: Invalid stream index %u.", + idx, signature_type_name, element->stream_index); + if (element->mask == 0 || (element->mask & ~0xf)) validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, "element %u of %s signature: Invalid mask %#x.", idx, signature_type_name, element->mask); @@ -7486,33 +8244,27 @@ static void vsir_validate_signature_element(struct validation_context *ctx, "element %u of %s signature: Non-contiguous mask %#x.", idx, signature_type_name, element->mask); - /* Here we'd likely want to validate that the usage mask is a subset of the - * signature mask. Unfortunately the D3DBC parser sometimes violates this. - * For example I've seen a shader like this: - * ps_3_0 - * [...] - * dcl_texcoord0 v0 - * [...] - * texld r2.xyzw, v0.xyzw, s1.xyzw - * [...] - * - * The dcl_textcoord0 instruction secretly has a .xy mask, which is used to - * compute the signature mask, but the texld instruction apparently uses all - * the components. Of course the last two components are ignored, but - * formally they seem to be used. So we end up with a signature element with - * mask .xy and usage mask .xyzw. - * - * The correct fix would probably be to make the D3DBC parser aware of which - * components are really used for each instruction, but that would take some - * time. */ - if (element->used_mask & ~0xf) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, - "element %u of %s signature: Invalid usage mask %#x.", - idx, signature_type_name, element->used_mask); + if (ctx->program->normalisation_level >= VSIR_NORMALISED_SM4) + { + if ((element->used_mask & element->mask) != element->used_mask) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, + "element %u of %s signature: Invalid usage mask %#x with mask %#x.", + idx, signature_type_name, element->used_mask, element->mask); + } + else + { + if (element->used_mask & ~0xf) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, + "element %u of %s signature: Invalid usage mask %#x.", + idx, signature_type_name, element->used_mask); + } switch (element->sysval_semantic) { case VKD3D_SHADER_SV_NONE: + case VKD3D_SHADER_SV_TARGET: + break; + case VKD3D_SHADER_SV_POSITION: case VKD3D_SHADER_SV_CLIP_DISTANCE: case VKD3D_SHADER_SV_CULL_DISTANCE: @@ -7523,18 +8275,43 @@ static void vsir_validate_signature_element(struct validation_context *ctx, case VKD3D_SHADER_SV_INSTANCE_ID: case VKD3D_SHADER_SV_IS_FRONT_FACE: case VKD3D_SHADER_SV_SAMPLE_INDEX: + case VKD3D_SHADER_SV_DEPTH: + case VKD3D_SHADER_SV_COVERAGE: + case VKD3D_SHADER_SV_DEPTH_GREATER_EQUAL: + case VKD3D_SHADER_SV_DEPTH_LESS_EQUAL: + case VKD3D_SHADER_SV_STENCIL_REF: + require_index = false; + break; + case VKD3D_SHADER_SV_TESS_FACTOR_QUADEDGE: + expected_tess_domain = VKD3D_TESSELLATOR_DOMAIN_QUAD; + semantic_index_max = 4; + is_outer = true; + break; + case VKD3D_SHADER_SV_TESS_FACTOR_QUADINT: + expected_tess_domain = VKD3D_TESSELLATOR_DOMAIN_QUAD; + semantic_index_max = 2; + is_outer = false; + break; + case VKD3D_SHADER_SV_TESS_FACTOR_TRIEDGE: + expected_tess_domain = VKD3D_TESSELLATOR_DOMAIN_TRIANGLE; + semantic_index_max = 3; + is_outer = true; + break; + case VKD3D_SHADER_SV_TESS_FACTOR_TRIINT: + expected_tess_domain = VKD3D_TESSELLATOR_DOMAIN_TRIANGLE; + semantic_index_max = 1; + is_outer = false; + break; + case VKD3D_SHADER_SV_TESS_FACTOR_LINEDET: case VKD3D_SHADER_SV_TESS_FACTOR_LINEDEN: - case VKD3D_SHADER_SV_TARGET: - case VKD3D_SHADER_SV_DEPTH: - case VKD3D_SHADER_SV_COVERAGE: - case VKD3D_SHADER_SV_DEPTH_GREATER_EQUAL: - case VKD3D_SHADER_SV_DEPTH_LESS_EQUAL: - case VKD3D_SHADER_SV_STENCIL_REF: + expected_tess_domain = VKD3D_TESSELLATOR_DOMAIN_LINE; + semantic_index_max = 2; + is_outer = true; break; default: @@ -7544,6 +8321,43 @@ static void vsir_validate_signature_element(struct validation_context *ctx, break; } + if (require_index && element->register_index == UINT_MAX) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, + "element %u of %s signature: System value semantic %#x requires a register index.", + idx, signature_type_name, element->sysval_semantic); + + if (expected_tess_domain != VKD3D_TESSELLATOR_DOMAIN_INVALID) + { + if (signature_type != SIGNATURE_TYPE_PATCH_CONSTANT) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, + "element %u of %s signature: System value semantic %#x is only valid " + "in the patch constant signature.", + idx, signature_type_name, element->sysval_semantic); + + if (ctx->program->tess_domain != expected_tess_domain) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, + "element %u of %s signature: Invalid system value semantic %#x for tessellator domain %#x.", + idx, signature_type_name, element->sysval_semantic, ctx->program->tess_domain); + + if (element->semantic_index >= semantic_index_max) + { + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, + "element %u of %s signature: Invalid semantic index %u for system value semantic %#x.", + idx, signature_type_name, element->semantic_index, element->sysval_semantic); + } + else + { + unsigned int *idx_pos = &(is_outer ? ctx->outer_tess_idxs : ctx->inner_tess_idxs)[element->semantic_index]; + + if (*idx_pos != ~0u) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, + "element %u of %s signature: Duplicate semantic index %u for system value semantic %#x.", + idx, signature_type_name, element->semantic_index, element->sysval_semantic); + else + *idx_pos = idx; + } + } + if (element->sysval_semantic < ARRAY_SIZE(sysval_validation_data)) { const struct sysval_validation_data_element *data = &sysval_validation_data[element->sysval_semantic]; @@ -7622,6 +8436,31 @@ static void vsir_validate_signature_element(struct validation_context *ctx, validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, "element %u of %s signature: Invalid interpolation mode %#x for integer component type.", idx, signature_type_name, element->interpolation_mode); + + if (element->stream_index >= VKD3D_MAX_STREAM_COUNT || !require_index) + return; + + for (i = element->register_index; i < MAX_REG_OUTPUT + && i - element->register_index < element->register_count; ++i) + { + struct validation_context_signature_stream_data *stream_data = &signature_data->streams[element->stream_index]; + struct validation_context_signature_register_data *register_data = &stream_data->registers[i]; + + for (j = 0; j < VKD3D_VEC4_SIZE; ++j) + { + struct validation_context_signature_component_data *component_data = ®ister_data->components[j]; + + if (!(element->mask & (1u << j))) + continue; + + if (!component_data->element) + component_data->element = element; + else + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, + "element %u of %s signature: Conflict with element %zu.", + idx, signature_type_name, component_data->element - signature->elements); + } + } } static const unsigned int allowed_signature_phases[] = @@ -7631,8 +8470,8 @@ static const unsigned int allowed_signature_phases[] = [SIGNATURE_TYPE_PATCH_CONSTANT] = HS_BIT | DS_BIT, }; -static void vsir_validate_signature(struct validation_context *ctx, - const struct shader_signature *signature, enum vsir_signature_type signature_type) +static void vsir_validate_signature(struct validation_context *ctx, const struct shader_signature *signature, + struct validation_context_signature_data *signature_data, enum vsir_signature_type signature_type) { unsigned int i; @@ -7642,7 +8481,110 @@ static void vsir_validate_signature(struct validation_context *ctx, "Unexpected %s signature.", signature_type_names[signature_type]); for (i = 0; i < signature->element_count; ++i) - vsir_validate_signature_element(ctx, signature, signature_type, i); + vsir_validate_signature_element(ctx, signature, signature_data, signature_type, i); + + if (signature_type == SIGNATURE_TYPE_PATCH_CONSTANT) + { + const struct signature_element *first_element, *element; + unsigned int expected_outer_count = 0; + unsigned int expected_inner_count = 0; + + switch (ctx->program->tess_domain) + { + case VKD3D_TESSELLATOR_DOMAIN_QUAD: + expected_outer_count = 4; + expected_inner_count = 2; + break; + + case VKD3D_TESSELLATOR_DOMAIN_TRIANGLE: + expected_outer_count = 3; + expected_inner_count = 1; + break; + + case VKD3D_TESSELLATOR_DOMAIN_LINE: + expected_outer_count = 2; + expected_inner_count = 0; + break; + + default: + break; + } + + /* After I/O normalisation tessellation factors are merged in a single array. */ + if (ctx->program->normalisation_level >= VSIR_NORMALISED_SM6) + { + expected_outer_count = min(1, expected_outer_count); + expected_inner_count = min(1, expected_inner_count); + } + + first_element = NULL; + for (i = 0; i < expected_outer_count; ++i) + { + if (ctx->outer_tess_idxs[i] == ~0u) + { + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, + "Missing outer system value semantic %u.", i); + } + else + { + element = &signature->elements[ctx->outer_tess_idxs[i]]; + + if (!first_element) + { + first_element = element; + continue; + } + + if (element->register_index != first_element->register_index + i) + { + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, + "Invalid register index %u for outer system value semantic %u, expected %u.", + element->register_index, i, first_element->register_index + i); + } + + if (element->mask != first_element->mask) + { + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_WRITE_MASK, + "Invalid mask %#x for outer system value semantic %u, expected %#x.", + element->mask, i, first_element->mask); + } + } + } + + first_element = NULL; + for (i = 0; i < expected_inner_count; ++i) + { + if (ctx->inner_tess_idxs[i] == ~0u) + { + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, + "Missing inner system value semantic %u.", i); + } + else + { + element = &signature->elements[ctx->inner_tess_idxs[i]]; + + if (!first_element) + { + first_element = element; + continue; + } + + if (element->register_index != first_element->register_index + i) + { + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, + "Invalid register index %u for inner system value semantic %u, expected %u.", + element->register_index, i, first_element->register_index + i); + } + + if (element->mask != first_element->mask) + { + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_WRITE_MASK, + "Invalid mask %#x for inner system value semantic %u, expected %#x.", + element->mask, i, first_element->mask); + } + } + } + } } static const char *name_from_cf_type(enum vsir_control_flow_type type) @@ -7754,6 +8696,206 @@ static void vsir_validate_dcl_hs_max_tessfactor(struct validation_context *ctx, instruction->declaration.max_tessellation_factor); } +static void vsir_validate_dcl_index_range(struct validation_context *ctx, + const struct vkd3d_shader_instruction *instruction) +{ + unsigned int i, j, base_register_idx, effective_write_mask = 0, control_point_count, first_component = UINT_MAX; + const struct vkd3d_shader_index_range *range = &instruction->declaration.index_range; + enum vkd3d_shader_sysval_semantic sysval = ~0u; + const struct shader_signature *signature; + struct vsir_io_register_data io_reg_data; + bool has_control_point; + + if (ctx->program->normalisation_level >= VSIR_NORMALISED_SM6) + { + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_HANDLER, + "DCL_INDEX_RANGE is not allowed with fully normalised input/output."); + return; + } + + if (range->dst.modifiers != VKD3DSPDM_NONE) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_MODIFIERS, + "Invalid modifier %#x on a DCL_INDEX_RANGE destination parameter.", range->dst.modifiers); + + if (range->dst.shift != 0) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SHIFT, + "Invalid shift %u on a DCL_INDEX_RANGE destination parameter.", range->dst.shift); + + if (!vsir_get_io_register_data(ctx, range->dst.reg.type, &io_reg_data)) + { + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, + "Invalid register type %#x in DCL_INDEX_RANGE instruction.", + range->dst.reg.type); + return; + } + + signature = io_reg_data.signature; + has_control_point = io_reg_data.flags & CONTROL_POINT_BIT; + control_point_count = io_reg_data.control_point_count; + + if (range->dst.reg.idx_count != 1 + !!has_control_point) + { + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, + "Invalid index count %u in DCL_INDEX_RANGE instruction.", + range->dst.reg.idx_count); + return; + } + + if (range->dst.reg.idx[0].rel_addr || (has_control_point && range->dst.reg.idx[1].rel_addr)) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, + "Invalid relative address in DCL_INDEX_RANGE instruction."); + + if (has_control_point) + { + if (range->dst.reg.idx[0].offset != control_point_count) + { + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, + "Invalid control point index %u in DCL_INDEX_RANGE instruction, expected %u.", + range->dst.reg.idx[0].offset, control_point_count); + } + + base_register_idx = range->dst.reg.idx[1].offset; + } + else + { + base_register_idx = range->dst.reg.idx[0].offset; + } + + if (range->register_count < 2) + { + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_RANGE, + "Invalid register count %u in DCL_INDEX_RANGE instruction, expected at least 2.", + range->register_count); + return; + } + + /* Check that for each register in the range the write mask intersects at + * most one (and possibly zero) signature elements. Keep track of the union + * of all signature element masks. */ + for (i = 0; i < range->register_count; ++i) + { + bool found = false; + + for (j = 0; j < signature->element_count; ++j) + { + const struct signature_element *element = &signature->elements[j]; + + if (base_register_idx + i != element->register_index || !(range->dst.write_mask & element->mask)) + continue; + + if (found) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_WRITE_MASK, + "Invalid write mask %#x on a DCL_INDEX_RANGE destination parameter.", + range->dst.write_mask); + + found = true; + + if (first_component == UINT_MAX) + first_component = vsir_write_mask_get_component_idx(element->mask); + else if (first_component != vsir_write_mask_get_component_idx(element->mask)) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_WRITE_MASK, + "Signature masks are not left-aligned within a DCL_INDEX_RANGE."); + + effective_write_mask |= element->mask; + } + } + + /* Check again to have at most one intersection for each register, but this + * time using the effective write mask. Also check that we have stabilized, + * i.e., the effective write mask now contains all the signature element + * masks. This important for being able to merge all the signature elements + * in a single one without conflicts (there is no hard reason why we + * couldn't support an effective write mask that stabilizes after more + * iterations, but the code would be more complicated, and we avoid that if + * we can). */ + for (i = 0; i < range->register_count; ++i) + { + bool found = false; + + for (j = 0; j < signature->element_count; ++j) + { + const struct signature_element *element = &signature->elements[j]; + + if (base_register_idx + i != element->register_index || !(effective_write_mask & element->mask)) + continue; + + if (element->sysval_semantic != VKD3D_SHADER_SV_NONE + && !vsir_sysval_semantic_is_tess_factor(element->sysval_semantic)) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, + "Invalid sysval semantic %#x on a signature element touched by DCL_INDEX_RANGE.", + element->sysval_semantic); + + if (sysval == ~0u) + { + sysval = element->sysval_semantic; + /* Line density and line detail can be arrayed together. */ + if (sysval == VKD3D_SHADER_SV_TESS_FACTOR_LINEDEN) + sysval = VKD3D_SHADER_SV_TESS_FACTOR_LINEDET; + } + else + { + if (sysval != element->sysval_semantic) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, + "Inconsistent sysval semantic %#x on a signature element touched by DCL_INDEX_RANGE, " + "%#x was already seen.", + element->sysval_semantic, sysval); + } + + if (found) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_WRITE_MASK, + "Invalid write mask %#x on a DCL_INDEX_RANGE destination parameter.", + range->dst.write_mask); + + found = true; + + if (~effective_write_mask & element->mask) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_WRITE_MASK, + "Invalid write mask %#x on a signature element touched by a " + "DCL_INDEX_RANGE instruction with effective write mask %#x.", + element->mask, effective_write_mask); + + if (first_component != vsir_write_mask_get_component_idx(element->mask)) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_WRITE_MASK, + "Signature element masks are not left-aligned within a DCL_INDEX_RANGE."); + } + } + + VKD3D_ASSERT(sysval != ~0u); +} + +static void vsir_validate_dcl_input(struct validation_context *ctx, + const struct vkd3d_shader_instruction *instruction) +{ + switch (instruction->declaration.dst.reg.type) + { + /* Signature input registers. */ + case VKD3DSPR_INPUT: + case VKD3DSPR_INCONTROLPOINT: + case VKD3DSPR_OUTCONTROLPOINT: + case VKD3DSPR_PATCHCONST: + /* Non-signature input registers. */ + case VKD3DSPR_PRIMID: + case VKD3DSPR_FORKINSTID: + case VKD3DSPR_JOININSTID: + case VKD3DSPR_THREADID: + case VKD3DSPR_THREADGROUPID: + case VKD3DSPR_LOCALTHREADID: + case VKD3DSPR_LOCALTHREADINDEX: + case VKD3DSPR_COVERAGE: + case VKD3DSPR_TESSCOORD: + case VKD3DSPR_OUTPOINTID: + case VKD3DSPR_GSINSTID: + case VKD3DSPR_WAVELANECOUNT: + case VKD3DSPR_WAVELANEINDEX: + break; + + default: + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, + "Invalid register type %#x in instruction DCL_INPUT.", + instruction->declaration.dst.reg.type); + } +} + static void vsir_validate_dcl_input_primitive(struct validation_context *ctx, const struct vkd3d_shader_instruction *instruction) { @@ -7763,6 +8905,105 @@ static void vsir_validate_dcl_input_primitive(struct validation_context *ctx, instruction->declaration.primitive_type.type); } +static void vsir_validate_dcl_input_ps(struct validation_context *ctx, + const struct vkd3d_shader_instruction *instruction) +{ + switch (instruction->declaration.dst.reg.type) + { + case VKD3DSPR_INPUT: + break; + + default: + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, + "Invalid register type %#x in instruction DCL_INPUT_PS.", + instruction->declaration.dst.reg.type); + } +} + +static void vsir_validate_dcl_input_ps_sgv(struct validation_context *ctx, + const struct vkd3d_shader_instruction *instruction) +{ + switch (instruction->declaration.register_semantic.reg.reg.type) + { + case VKD3DSPR_INPUT: + break; + + default: + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, + "Invalid register type %#x in instruction DCL_INPUT_PS_SGV.", + instruction->declaration.register_semantic.reg.reg.type); + } +} + +static void vsir_validate_dcl_input_ps_siv(struct validation_context *ctx, + const struct vkd3d_shader_instruction *instruction) +{ + switch (instruction->declaration.register_semantic.reg.reg.type) + { + case VKD3DSPR_INPUT: + break; + + default: + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, + "Invalid register type %#x in instruction DCL_INPUT_PS_SIV.", + instruction->declaration.register_semantic.reg.reg.type); + } +} + +static void vsir_validate_dcl_input_sgv(struct validation_context *ctx, + const struct vkd3d_shader_instruction *instruction) +{ + switch (instruction->declaration.register_semantic.reg.reg.type) + { + case VKD3DSPR_INPUT: + break; + + default: + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, + "Invalid register type %#x in instruction DCL_INPUT_SGV.", + instruction->declaration.register_semantic.reg.reg.type); + } +} + +static void vsir_validate_dcl_input_siv(struct validation_context *ctx, + const struct vkd3d_shader_instruction *instruction) +{ + switch (instruction->declaration.register_semantic.reg.reg.type) + { + case VKD3DSPR_INPUT: + case VKD3DSPR_PATCHCONST: + break; + + default: + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, + "Invalid register type %#x in instruction DCL_INPUT_SIV.", + instruction->declaration.register_semantic.reg.reg.type); + } +} + +static void vsir_validate_dcl_output(struct validation_context *ctx, + const struct vkd3d_shader_instruction *instruction) +{ + switch (instruction->declaration.dst.reg.type) + { + /* Signature output registers. */ + case VKD3DSPR_OUTPUT: + case VKD3DSPR_PATCHCONST: + /* Non-signature output registers. */ + case VKD3DSPR_DEPTHOUT: + case VKD3DSPR_SAMPLEMASK: + case VKD3DSPR_DEPTHOUTGE: + case VKD3DSPR_DEPTHOUTLE: + case VKD3DSPR_OUTSTENCILREF: + break; + + default: + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, + "Invalid register type %#x in instruction DCL_OUTPUT.", + instruction->declaration.dst.reg.type); + } +} + static void vsir_validate_dcl_output_control_point_count(struct validation_context *ctx, const struct vkd3d_shader_instruction *instruction) { @@ -7772,6 +9013,22 @@ static void vsir_validate_dcl_output_control_point_count(struct validation_conte instruction->declaration.count); } +static void vsir_validate_dcl_output_siv(struct validation_context *ctx, + const struct vkd3d_shader_instruction *instruction) +{ + switch (instruction->declaration.register_semantic.reg.reg.type) + { + case VKD3DSPR_OUTPUT: + case VKD3DSPR_PATCHCONST: + break; + + default: + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, + "Invalid register type %#x in instruction DCL_OUTPUT_SIV.", + instruction->declaration.register_semantic.reg.reg.type); + } +} + static void vsir_validate_dcl_output_topology(struct validation_context *ctx, const struct vkd3d_shader_instruction *instruction) { @@ -7801,6 +9058,11 @@ static void vsir_validate_dcl_tessellator_domain(struct validation_context *ctx, || instruction->declaration.tessellator_domain >= VKD3D_TESSELLATOR_DOMAIN_COUNT) validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION, "Tessellator domain %#x is invalid.", instruction->declaration.tessellator_domain); + + if (instruction->declaration.tessellator_domain != ctx->program->tess_domain) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION, + "DCL_TESSELLATOR_DOMAIN argument %#x doesn't match the shader tessellator domain %#x.", + instruction->declaration.tessellator_domain, ctx->program->tess_domain); } static void vsir_validate_dcl_tessellator_output_primitive(struct validation_context *ctx, @@ -8063,8 +9325,17 @@ static const struct vsir_validator_instruction_desc vsir_validator_instructions[ [VKD3DSIH_HS_JOIN_PHASE] = {0, 0, vsir_validate_hull_shader_phase}, [VKD3DSIH_DCL_GS_INSTANCES] = {0, 0, vsir_validate_dcl_gs_instances}, [VKD3DSIH_DCL_HS_MAX_TESSFACTOR] = {0, 0, vsir_validate_dcl_hs_max_tessfactor}, + [VKD3DSIH_DCL_INDEX_RANGE] = {0, 0, vsir_validate_dcl_index_range}, + [VKD3DSIH_DCL_INPUT] = {0, 0, vsir_validate_dcl_input}, [VKD3DSIH_DCL_INPUT_PRIMITIVE] = {0, 0, vsir_validate_dcl_input_primitive}, + [VKD3DSIH_DCL_INPUT_PS] = {0, 0, vsir_validate_dcl_input_ps}, + [VKD3DSIH_DCL_INPUT_PS_SGV] = {0, 0, vsir_validate_dcl_input_ps_sgv}, + [VKD3DSIH_DCL_INPUT_PS_SIV] = {0, 0, vsir_validate_dcl_input_ps_siv}, + [VKD3DSIH_DCL_INPUT_SGV] = {0, 0, vsir_validate_dcl_input_sgv}, + [VKD3DSIH_DCL_INPUT_SIV] = {0, 0, vsir_validate_dcl_input_siv}, + [VKD3DSIH_DCL_OUTPUT] = {0, 0, vsir_validate_dcl_output}, [VKD3DSIH_DCL_OUTPUT_CONTROL_POINT_COUNT] = {0, 0, vsir_validate_dcl_output_control_point_count}, + [VKD3DSIH_DCL_OUTPUT_SIV] = {0, 0, vsir_validate_dcl_output_siv}, [VKD3DSIH_DCL_OUTPUT_TOPOLOGY] = {0, 0, vsir_validate_dcl_output_topology}, [VKD3DSIH_DCL_TEMPS] = {0, 0, vsir_validate_dcl_temps}, [VKD3DSIH_DCL_TESSELLATOR_DOMAIN] = {0, 0, vsir_validate_dcl_tessellator_domain}, @@ -8177,6 +9448,12 @@ enum vkd3d_result vsir_program_validate(struct vsir_program *program, uint64_t c .status = VKD3D_OK, .phase = VKD3DSIH_INVALID, .invalid_instruction_idx = true, + .outer_tess_idxs[0] = ~0u, + .outer_tess_idxs[1] = ~0u, + .outer_tess_idxs[2] = ~0u, + .outer_tess_idxs[3] = ~0u, + .inner_tess_idxs[0] = ~0u, + .inner_tess_idxs[1] = ~0u, }; unsigned int i; @@ -8187,12 +9464,20 @@ enum vkd3d_result vsir_program_validate(struct vsir_program *program, uint64_t c { case VKD3D_SHADER_TYPE_HULL: case VKD3D_SHADER_TYPE_DOMAIN: + if (program->tess_domain == VKD3D_TESSELLATOR_DOMAIN_INVALID + || program->tess_domain >= VKD3D_TESSELLATOR_DOMAIN_COUNT) + validator_error(&ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION, + "Invalid tessellation domain %#x.", program->tess_domain); break; default: if (program->patch_constant_signature.element_count != 0) validator_error(&ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, "Patch constant signature is only valid for hull and domain shaders."); + + if (program->tess_domain != VKD3D_TESSELLATOR_DOMAIN_INVALID) + validator_error(&ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION, + "Invalid tessellation domain %#x.", program->tess_domain); } switch (program->shader_version.type) @@ -8226,9 +9511,47 @@ enum vkd3d_result vsir_program_validate(struct vsir_program *program, uint64_t c program->output_control_point_count); } - vsir_validate_signature(&ctx, &program->input_signature, SIGNATURE_TYPE_INPUT); - vsir_validate_signature(&ctx, &program->output_signature, SIGNATURE_TYPE_OUTPUT); - vsir_validate_signature(&ctx, &program->patch_constant_signature, SIGNATURE_TYPE_PATCH_CONSTANT); + vsir_validate_signature(&ctx, &program->input_signature, + &ctx.input_signature_data, SIGNATURE_TYPE_INPUT); + vsir_validate_signature(&ctx, &program->output_signature, + &ctx.output_signature_data, SIGNATURE_TYPE_OUTPUT); + vsir_validate_signature(&ctx, &program->patch_constant_signature, + &ctx.patch_constant_signature_data, SIGNATURE_TYPE_PATCH_CONSTANT); + + for (i = 0; i < sizeof(program->io_dcls) * CHAR_BIT; ++i) + { + if (!bitmap_is_set(program->io_dcls, i)) + continue; + + switch (i) + { + /* Input registers */ + case VKD3DSPR_PRIMID: + case VKD3DSPR_FORKINSTID: + case VKD3DSPR_JOININSTID: + case VKD3DSPR_THREADID: + case VKD3DSPR_THREADGROUPID: + case VKD3DSPR_LOCALTHREADID: + case VKD3DSPR_LOCALTHREADINDEX: + case VKD3DSPR_COVERAGE: + case VKD3DSPR_TESSCOORD: + case VKD3DSPR_OUTPOINTID: + case VKD3DSPR_GSINSTID: + case VKD3DSPR_WAVELANECOUNT: + case VKD3DSPR_WAVELANEINDEX: + /* Output registers */ + case VKD3DSPR_DEPTHOUT: + case VKD3DSPR_SAMPLEMASK: + case VKD3DSPR_DEPTHOUTGE: + case VKD3DSPR_DEPTHOUTLE: + case VKD3DSPR_OUTSTENCILREF: + break; + + default: + validator_error(&ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, + "Invalid input/output declaration %u.", i); + } + } if (!(ctx.temps = vkd3d_calloc(ctx.program->temp_count, sizeof(*ctx.temps)))) goto fail; @@ -8318,6 +9641,12 @@ enum vkd3d_result vsir_program_transform_early(struct vsir_program *program, uin if (program->shader_version.major <= 2) vsir_transform(&ctx, vsir_program_add_diffuse_output); + /* For vsir_program_insert_fragment_fog(). */ + vsir_transform(&ctx, vsir_program_add_fog_input); + + /* For vsir_program_insert_vertex_fog(). */ + vsir_transform(&ctx, vsir_program_add_fog_output); + return ctx.result; } @@ -8372,6 +9701,8 @@ enum vkd3d_result vsir_program_transform(struct vsir_program *program, uint64_t vsir_transform(&ctx, vsir_program_insert_point_size); vsir_transform(&ctx, vsir_program_insert_point_size_clamp); vsir_transform(&ctx, vsir_program_insert_point_coord); + vsir_transform(&ctx, vsir_program_insert_fragment_fog); + vsir_transform(&ctx, vsir_program_insert_vertex_fog); if (TRACE_ON()) vsir_program_trace(program); diff --git a/libs/vkd3d/libs/vkd3d-shader/msl.c b/libs/vkd3d/libs/vkd3d-shader/msl.c index df3edeaa4e6..bb85e62e94c 100644 --- a/libs/vkd3d/libs/vkd3d-shader/msl.c +++ b/libs/vkd3d/libs/vkd3d-shader/msl.c @@ -41,6 +41,8 @@ struct msl_generator const char *prefix; bool failed; + bool write_depth; + const struct vkd3d_shader_interface_info *interface_info; const struct vkd3d_shader_scan_descriptor_info1 *descriptor_info; }; @@ -153,6 +155,72 @@ static void msl_print_register_name(struct vkd3d_string_buffer *buffer, msl_print_register_datatype(buffer, gen, reg->data_type); break; + case VKD3DSPR_DEPTHOUT: + if (gen->program->shader_version.type != VKD3D_SHADER_TYPE_PIXEL) + msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, + "Internal compiler error: Unhandled depth output in shader type #%x.", + gen->program->shader_version.type); + vkd3d_string_buffer_printf(buffer, "o_depth"); + break; + + case VKD3DSPR_IMMCONST: + switch (reg->dimension) + { + case VSIR_DIMENSION_SCALAR: + switch (reg->data_type) + { + case VKD3D_DATA_INT: + vkd3d_string_buffer_printf(buffer, "as_type(%#xu)", reg->u.immconst_u32[0]); + break; + case VKD3D_DATA_UINT: + vkd3d_string_buffer_printf(buffer, "%#xu", reg->u.immconst_u32[0]); + break; + case VKD3D_DATA_FLOAT: + vkd3d_string_buffer_printf(buffer, "as_type(%#xu)", reg->u.immconst_u32[0]); + break; + default: + msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, + "Internal compiler error: Unhandled immconst datatype %#x.", reg->data_type); + vkd3d_string_buffer_printf(buffer, "", reg->data_type); + break; + } + break; + + case VSIR_DIMENSION_VEC4: + switch (reg->data_type) + { + case VKD3D_DATA_INT: + vkd3d_string_buffer_printf(buffer, "as_type(uint4(%#xu, %#xu, %#xu, %#xu))", + reg->u.immconst_u32[0], reg->u.immconst_u32[1], + reg->u.immconst_u32[2], reg->u.immconst_u32[3]); + break; + case VKD3D_DATA_UINT: + vkd3d_string_buffer_printf(buffer, "uint4(%#xu, %#xu, %#xu, %#xu)", + reg->u.immconst_u32[0], reg->u.immconst_u32[1], + reg->u.immconst_u32[2], reg->u.immconst_u32[3]); + vkd3d_string_buffer_printf(buffer, "%#xu", reg->u.immconst_u32[0]); + break; + case VKD3D_DATA_FLOAT: + vkd3d_string_buffer_printf(buffer, "as_type(uint4(%#xu, %#xu, %#xu, %#xu))", + reg->u.immconst_u32[0], reg->u.immconst_u32[1], + reg->u.immconst_u32[2], reg->u.immconst_u32[3]); + break; + default: + msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, + "Internal compiler error: Unhandled immconst datatype %#x.", reg->data_type); + vkd3d_string_buffer_printf(buffer, "", reg->data_type); + break; + } + break; + + default: + vkd3d_string_buffer_printf(buffer, "", reg->dimension); + msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, + "Internal compiler error: Unhandled dimension %#x.", reg->dimension); + break; + } + break; + case VKD3DSPR_CONSTBUFFER: if (reg->idx_count != 3) { @@ -215,19 +283,43 @@ static void msl_src_init(struct msl_src *msl_src, struct msl_generator *gen, const struct vkd3d_shader_src_param *vsir_src, uint32_t mask) { const struct vkd3d_shader_register *reg = &vsir_src->reg; + struct vkd3d_string_buffer *str; msl_src->str = vkd3d_string_buffer_get(&gen->string_buffers); if (reg->non_uniform) msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, "Internal compiler error: Unhandled 'non-uniform' modifier."); - if (vsir_src->modifiers) - msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, - "Internal compiler error: Unhandled source modifier(s) %#x.", vsir_src->modifiers); - msl_print_register_name(msl_src->str, gen, reg); + if (!vsir_src->modifiers) + str = msl_src->str; + else + str = vkd3d_string_buffer_get(&gen->string_buffers); + + msl_print_register_name(str, gen, reg); if (reg->dimension == VSIR_DIMENSION_VEC4) - msl_print_swizzle(msl_src->str, vsir_src->swizzle, mask); + msl_print_swizzle(str, vsir_src->swizzle, mask); + + switch (vsir_src->modifiers) + { + case VKD3DSPSM_NONE: + break; + case VKD3DSPSM_NEG: + vkd3d_string_buffer_printf(msl_src->str, "-%s", str->buffer); + break; + case VKD3DSPSM_ABS: + vkd3d_string_buffer_printf(msl_src->str, "abs(%s)", str->buffer); + break; + default: + vkd3d_string_buffer_printf(msl_src->str, "(%s)", + vsir_src->modifiers, str->buffer); + msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, + "Internal compiler error: Unhandled source modifier(s) %#x.", vsir_src->modifiers); + break; + } + + if (str != msl_src->str) + vkd3d_string_buffer_release(&gen->string_buffers, str); } static void msl_dst_cleanup(struct msl_dst *dst, struct vkd3d_string_buffer_cache *cache) @@ -253,7 +345,8 @@ static uint32_t msl_dst_init(struct msl_dst *msl_dst, struct msl_generator *gen, msl_dst->mask = vkd3d_string_buffer_get(&gen->string_buffers); msl_print_register_name(msl_dst->register_name, gen, &vsir_dst->reg); - msl_print_write_mask(msl_dst->mask, write_mask); + if (vsir_dst->reg.dimension == VSIR_DIMENSION_VEC4) + msl_print_write_mask(msl_dst->mask, write_mask); return write_mask; } @@ -261,22 +354,29 @@ static uint32_t msl_dst_init(struct msl_dst *msl_dst, struct msl_generator *gen, static void VKD3D_PRINTF_FUNC(3, 4) msl_print_assignment( struct msl_generator *gen, struct msl_dst *dst, const char *format, ...) { + uint32_t modifiers = dst->vsir->modifiers; va_list args; if (dst->vsir->shift) msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, "Internal compiler error: Unhandled destination shift %#x.", dst->vsir->shift); - if (dst->vsir->modifiers) + if (modifiers & ~VKD3DSPDM_SATURATE) msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, - "Internal compiler error: Unhandled destination modifier(s) %#x.", dst->vsir->modifiers); + "Internal compiler error: Unhandled destination modifier(s) %#x.", modifiers); msl_print_indent(gen->buffer, gen->indent); vkd3d_string_buffer_printf(gen->buffer, "%s%s = ", dst->register_name->buffer, dst->mask->buffer); + if (modifiers & VKD3DSPDM_SATURATE) + vkd3d_string_buffer_printf(gen->buffer, "saturate("); + va_start(args, format); vkd3d_string_buffer_vprintf(gen->buffer, format, args); va_end(args); + if (modifiers & VKD3DSPDM_SATURATE) + vkd3d_string_buffer_printf(gen->buffer, ")"); + vkd3d_string_buffer_printf(gen->buffer, ";\n"); } @@ -288,6 +388,164 @@ static void msl_unhandled(struct msl_generator *gen, const struct vkd3d_shader_i "Internal compiler error: Unhandled instruction %#x.", ins->opcode); } +static void msl_binop(struct msl_generator *gen, const struct vkd3d_shader_instruction *ins, const char *op) +{ + struct msl_src src[2]; + struct msl_dst dst; + uint32_t mask; + + mask = msl_dst_init(&dst, gen, ins, &ins->dst[0]); + msl_src_init(&src[0], gen, &ins->src[0], mask); + msl_src_init(&src[1], gen, &ins->src[1], mask); + + msl_print_assignment(gen, &dst, "%s %s %s", src[0].str->buffer, op, src[1].str->buffer); + + msl_src_cleanup(&src[1], &gen->string_buffers); + msl_src_cleanup(&src[0], &gen->string_buffers); + msl_dst_cleanup(&dst, &gen->string_buffers); +} + +static void msl_dot(struct msl_generator *gen, const struct vkd3d_shader_instruction *ins, uint32_t src_mask) +{ + unsigned int component_count; + struct msl_src src[2]; + struct msl_dst dst; + uint32_t dst_mask; + + dst_mask = msl_dst_init(&dst, gen, ins, &ins->dst[0]); + msl_src_init(&src[0], gen, &ins->src[0], src_mask); + msl_src_init(&src[1], gen, &ins->src[1], src_mask); + + if ((component_count = vsir_write_mask_component_count(dst_mask)) > 1) + msl_print_assignment(gen, &dst, "float%u(dot(%s, %s))", + component_count, src[0].str->buffer, src[1].str->buffer); + else + msl_print_assignment(gen, &dst, "dot(%s, %s)", src[0].str->buffer, src[1].str->buffer); + + msl_src_cleanup(&src[1], &gen->string_buffers); + msl_src_cleanup(&src[0], &gen->string_buffers); + msl_dst_cleanup(&dst, &gen->string_buffers); +} + +static void msl_intrinsic(struct msl_generator *gen, const struct vkd3d_shader_instruction *ins, const char *op) +{ + struct vkd3d_string_buffer *args; + struct msl_src src; + struct msl_dst dst; + unsigned int i; + uint32_t mask; + + mask = msl_dst_init(&dst, gen, ins, &ins->dst[0]); + args = vkd3d_string_buffer_get(&gen->string_buffers); + + for (i = 0; i < ins->src_count; ++i) + { + msl_src_init(&src, gen, &ins->src[i], mask); + vkd3d_string_buffer_printf(args, "%s%s", i ? ", " : "", src.str->buffer); + msl_src_cleanup(&src, &gen->string_buffers); + } + + msl_print_assignment(gen, &dst, "%s(%s)", op, args->buffer); + + vkd3d_string_buffer_release(&gen->string_buffers, args); + msl_dst_cleanup(&dst, &gen->string_buffers); +} + +static void msl_relop(struct msl_generator *gen, const struct vkd3d_shader_instruction *ins, const char *op) +{ + unsigned int mask_size; + struct msl_src src[2]; + struct msl_dst dst; + uint32_t mask; + + mask = msl_dst_init(&dst, gen, ins, &ins->dst[0]); + msl_src_init(&src[0], gen, &ins->src[0], mask); + msl_src_init(&src[1], gen, &ins->src[1], mask); + + if ((mask_size = vsir_write_mask_component_count(mask)) > 1) + msl_print_assignment(gen, &dst, "select(uint%u(0u), uint%u(0xffffffffu), bool%u(%s %s %s))", + mask_size, mask_size, mask_size, src[0].str->buffer, op, src[1].str->buffer); + else + msl_print_assignment(gen, &dst, "%s %s %s ? 0xffffffffu : 0u", + src[0].str->buffer, op, src[1].str->buffer); + + msl_src_cleanup(&src[1], &gen->string_buffers); + msl_src_cleanup(&src[0], &gen->string_buffers); + msl_dst_cleanup(&dst, &gen->string_buffers); +} + +static void msl_cast(struct msl_generator *gen, const struct vkd3d_shader_instruction *ins, const char *constructor) +{ + unsigned int component_count; + struct msl_src src; + struct msl_dst dst; + uint32_t mask; + + mask = msl_dst_init(&dst, gen, ins, &ins->dst[0]); + msl_src_init(&src, gen, &ins->src[0], mask); + + if ((component_count = vsir_write_mask_component_count(mask)) > 1) + msl_print_assignment(gen, &dst, "%s%u(%s)", constructor, component_count, src.str->buffer); + else + msl_print_assignment(gen, &dst, "%s(%s)", constructor, src.str->buffer); + + msl_src_cleanup(&src, &gen->string_buffers); + msl_dst_cleanup(&dst, &gen->string_buffers); +} + +static void msl_end_block(struct msl_generator *gen) +{ + --gen->indent; + msl_print_indent(gen->buffer, gen->indent); + vkd3d_string_buffer_printf(gen->buffer, "}\n"); +} + +static void msl_begin_block(struct msl_generator *gen) +{ + msl_print_indent(gen->buffer, gen->indent); + vkd3d_string_buffer_printf(gen->buffer, "{\n"); + ++gen->indent; +} + +static void msl_if(struct msl_generator *gen, const struct vkd3d_shader_instruction *ins) +{ + const char *condition; + struct msl_src src; + + msl_src_init(&src, gen, &ins->src[0], VKD3DSP_WRITEMASK_0); + + msl_print_indent(gen->buffer, gen->indent); + condition = ins->flags == VKD3D_SHADER_CONDITIONAL_OP_NZ ? "bool" : "!bool"; + vkd3d_string_buffer_printf(gen->buffer, "if (%s(%s))\n", condition, src.str->buffer); + + msl_src_cleanup(&src, &gen->string_buffers); + + msl_begin_block(gen); +} + +static void msl_else(struct msl_generator *gen) +{ + msl_end_block(gen); + msl_print_indent(gen->buffer, gen->indent); + vkd3d_string_buffer_printf(gen->buffer, "else\n"); + msl_begin_block(gen); +} + +static void msl_unary_op(struct msl_generator *gen, const struct vkd3d_shader_instruction *ins, const char *op) +{ + struct msl_src src; + struct msl_dst dst; + uint32_t mask; + + mask = msl_dst_init(&dst, gen, ins, &ins->dst[0]); + msl_src_init(&src, gen, &ins->src[0], mask); + + msl_print_assignment(gen, &dst, "%s%s", op, src.str->buffer); + + msl_src_cleanup(&src, &gen->string_buffers); + msl_dst_cleanup(&dst, &gen->string_buffers); +} + static void msl_mov(struct msl_generator *gen, const struct vkd3d_shader_instruction *ins) { struct msl_src src; @@ -303,6 +561,31 @@ static void msl_mov(struct msl_generator *gen, const struct vkd3d_shader_instruc msl_dst_cleanup(&dst, &gen->string_buffers); } +static void msl_movc(struct msl_generator *gen, const struct vkd3d_shader_instruction *ins) +{ + unsigned int component_count; + struct msl_src src[3]; + struct msl_dst dst; + uint32_t mask; + + mask = msl_dst_init(&dst, gen, ins, &ins->dst[0]); + msl_src_init(&src[0], gen, &ins->src[0], mask); + msl_src_init(&src[1], gen, &ins->src[1], mask); + msl_src_init(&src[2], gen, &ins->src[2], mask); + + if ((component_count = vsir_write_mask_component_count(mask)) > 1) + msl_print_assignment(gen, &dst, "select(%s, %s, bool%u(%s))", + src[2].str->buffer, src[1].str->buffer, component_count, src[0].str->buffer); + else + msl_print_assignment(gen, &dst, "select(%s, %s, bool(%s))", + src[2].str->buffer, src[1].str->buffer, src[0].str->buffer); + + msl_src_cleanup(&src[2], &gen->string_buffers); + msl_src_cleanup(&src[1], &gen->string_buffers); + msl_src_cleanup(&src[0], &gen->string_buffers); + msl_dst_cleanup(&dst, &gen->string_buffers); +} + static void msl_ret(struct msl_generator *gen, const struct vkd3d_shader_instruction *ins) { msl_print_indent(gen->buffer, gen->indent); @@ -315,17 +598,119 @@ static void msl_handle_instruction(struct msl_generator *gen, const struct vkd3d switch (ins->opcode) { - case VKD3DSIH_DCL_INPUT: - case VKD3DSIH_DCL_OUTPUT: - case VKD3DSIH_DCL_OUTPUT_SIV: + case VKD3DSIH_ADD: + msl_binop(gen, ins, "+"); + break; + case VKD3DSIH_AND: + msl_binop(gen, ins, "&"); + break; case VKD3DSIH_NOP: break; + case VKD3DSIH_DIV: + msl_binop(gen, ins, "/"); + break; + case VKD3DSIH_DP2: + msl_dot(gen, ins, vkd3d_write_mask_from_component_count(2)); + break; + case VKD3DSIH_DP3: + msl_dot(gen, ins, vkd3d_write_mask_from_component_count(3)); + break; + case VKD3DSIH_DP4: + msl_dot(gen, ins, VKD3DSP_WRITEMASK_ALL); + break; + case VKD3DSIH_ELSE: + msl_else(gen); + break; + case VKD3DSIH_ENDIF: + msl_end_block(gen); + break; + case VKD3DSIH_IEQ: + msl_relop(gen, ins, "=="); + break; + case VKD3DSIH_EXP: + msl_intrinsic(gen, ins, "exp2"); + break; + case VKD3DSIH_FRC: + msl_intrinsic(gen, ins, "fract"); + break; + case VKD3DSIH_FTOI: + msl_cast(gen, ins, "int"); + break; + case VKD3DSIH_FTOU: + msl_cast(gen, ins, "uint"); + break; + case VKD3DSIH_GEO: + msl_relop(gen, ins, ">="); + break; + case VKD3DSIH_IF: + msl_if(gen, ins); + break; + case VKD3DSIH_ISHL: + msl_binop(gen, ins, "<<"); + break; + case VKD3DSIH_ISHR: + case VKD3DSIH_USHR: + msl_binop(gen, ins, ">>"); + break; + case VKD3DSIH_LTO: + msl_relop(gen, ins, "<"); + break; + case VKD3DSIH_MAD: + msl_intrinsic(gen, ins, "fma"); + break; + case VKD3DSIH_MAX: + msl_intrinsic(gen, ins, "max"); + break; + case VKD3DSIH_MIN: + msl_intrinsic(gen, ins, "min"); + break; + case VKD3DSIH_INE: + case VKD3DSIH_NEU: + msl_relop(gen, ins, "!="); + break; + case VKD3DSIH_ITOF: + case VKD3DSIH_UTOF: + msl_cast(gen, ins, "float"); + break; + case VKD3DSIH_LOG: + msl_intrinsic(gen, ins, "log2"); + break; case VKD3DSIH_MOV: msl_mov(gen, ins); break; + case VKD3DSIH_MOVC: + msl_movc(gen, ins); + break; + case VKD3DSIH_MUL: + msl_binop(gen, ins, "*"); + break; + case VKD3DSIH_NOT: + msl_unary_op(gen, ins, "~"); + break; + case VKD3DSIH_OR: + msl_binop(gen, ins, "|"); + break; case VKD3DSIH_RET: msl_ret(gen, ins); break; + case VKD3DSIH_ROUND_NE: + msl_intrinsic(gen, ins, "rint"); + break; + case VKD3DSIH_ROUND_NI: + msl_intrinsic(gen, ins, "floor"); + break; + case VKD3DSIH_ROUND_PI: + msl_intrinsic(gen, ins, "ceil"); + break; + case VKD3DSIH_ROUND_Z: + msl_intrinsic(gen, ins, "trunc"); + break; + case VKD3DSIH_RSQ: + msl_intrinsic(gen, ins, "rsqrt"); + break; + case VKD3DSIH_SQRT: + msl_intrinsic(gen, ins, "sqrt"); + break; default: msl_unhandled(gen, ins); break; @@ -489,6 +874,16 @@ static void msl_generate_input_struct_declarations(struct msl_generator *gen) if (e->sysval_semantic) { + if (e->sysval_semantic == VKD3D_SHADER_SV_IS_FRONT_FACE) + { + if (type != VKD3D_SHADER_TYPE_PIXEL) + msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, + "Internal compiler error: Unhandled SV_IS_FRONT_FACE in shader type #%x.", type); + + msl_print_indent(gen->buffer, 1); + vkd3d_string_buffer_printf(buffer, "bool is_front_face [[front_facing]];\n"); + continue; + } msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, "Internal compiler error: Unhandled system value %#x.", e->sysval_semantic); continue; @@ -501,13 +896,6 @@ static void msl_generate_input_struct_declarations(struct msl_generator *gen) continue; } - if (e->interpolation_mode != VKD3DSIM_NONE) - { - msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, - "Internal compiler error: Unhandled interpolation mode %#x.", e->interpolation_mode); - continue; - } - if(e->register_count > 1) { msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, @@ -551,6 +939,18 @@ static void msl_generate_input_struct_declarations(struct msl_generator *gen) break; } + switch (e->interpolation_mode) + { + /* The default interpolation attribute. */ + case VKD3DSIM_LINEAR: + case VKD3DSIM_NONE: + break; + default: + msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, + "Internal compiler error: Unhandled interpolation mode %#x.", e->interpolation_mode); + break; + } + vkd3d_string_buffer_printf(buffer, ";\n"); } @@ -602,6 +1002,14 @@ static void msl_generate_output_struct_declarations(struct msl_generator *gen) { e = &signature->elements[i]; + if (e->sysval_semantic == VKD3D_SHADER_SV_DEPTH) + { + gen->write_depth = true; + msl_print_indent(gen->buffer, 1); + vkd3d_string_buffer_printf(buffer, "float shader_out_depth [[depth(any)]];\n"); + continue; + } + if (e->target_location == SIGNATURE_TARGET_LOCATION_UNUSED) continue; @@ -690,6 +1098,10 @@ static void msl_generate_entrypoint_prologue(struct msl_generator *gen) vkd3d_string_buffer_printf(buffer, " = input.shader_in_%u", i); msl_print_write_mask(buffer, e->mask); } + else if (e->sysval_semantic == VKD3D_SHADER_SV_IS_FRONT_FACE) + { + vkd3d_string_buffer_printf(buffer, ".u = uint4(input.is_front_face ? 0xffffffffu : 0u, 0, 0, 0)"); + } else { vkd3d_string_buffer_printf(buffer, " = ", e->sysval_semantic); @@ -711,6 +1123,12 @@ static void msl_generate_entrypoint_epilogue(struct msl_generator *gen) { e = &signature->elements[i]; + if (e->sysval_semantic == VKD3D_SHADER_SV_DEPTH) + { + vkd3d_string_buffer_printf(buffer, " output.shader_out_depth = shader_out_depth;\n"); + continue; + } + if (e->target_location == SIGNATURE_TARGET_LOCATION_UNUSED) continue; @@ -770,9 +1188,14 @@ static void msl_generate_entrypoint(struct msl_generator *gen) vkd3d_string_buffer_printf(gen->buffer, " vkd3d_vec4 %s_out[%u];\n", gen->prefix, 32); vkd3d_string_buffer_printf(gen->buffer, " vkd3d_%s_out output;\n", gen->prefix); + if (gen->write_depth) + vkd3d_string_buffer_printf(gen->buffer, " float shader_out_depth;\n"); + msl_generate_entrypoint_prologue(gen); vkd3d_string_buffer_printf(gen->buffer, " %s_main(%s_in, %s_out", gen->prefix, gen->prefix, gen->prefix); + if (gen->write_depth) + vkd3d_string_buffer_printf(gen->buffer, ", shader_out_depth"); if (gen->descriptor_info->descriptor_count) vkd3d_string_buffer_printf(gen->buffer, ", descriptors"); vkd3d_string_buffer_printf(gen->buffer, ");\n"); @@ -790,6 +1213,8 @@ static int msl_generator_generate(struct msl_generator *gen, struct vkd3d_shader MESSAGE("Generating a MSL shader. This is unsupported; you get to keep all the pieces if it breaks.\n"); vkd3d_string_buffer_printf(gen->buffer, "/* Generated by %s. */\n\n", vkd3d_shader_get_version(NULL, NULL)); + vkd3d_string_buffer_printf(gen->buffer, "#include \n\n"); + vkd3d_string_buffer_printf(gen->buffer, "using namespace metal;\n\n"); if (gen->program->global_flags) msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, @@ -808,6 +1233,8 @@ static int msl_generator_generate(struct msl_generator *gen, struct vkd3d_shader "void %s_main(thread vkd3d_vec4 *v, " "thread vkd3d_vec4 *o", gen->prefix); + if (gen->write_depth) + vkd3d_string_buffer_printf(gen->buffer, ", thread float& o_depth"); if (gen->descriptor_info->descriptor_count) vkd3d_string_buffer_printf(gen->buffer, ", constant vkd3d_%s_descriptors& descriptors", gen->prefix); vkd3d_string_buffer_printf(gen->buffer, ")\n{\n"); @@ -887,7 +1314,7 @@ int msl_compile(struct vsir_program *program, uint64_t config_flags, if ((ret = vsir_program_transform(program, config_flags, compile_info, message_context)) < 0) return ret; - VKD3D_ASSERT(program->normalisation_level == VSIR_FULLY_NORMALISED_IO); + VKD3D_ASSERT(program->normalisation_level == VSIR_NORMALISED_SM6); if ((ret = msl_generator_init(&generator, program, compile_info, descriptor_info, message_context)) < 0) return ret; diff --git a/libs/vkd3d/libs/vkd3d-shader/preproc.l b/libs/vkd3d/libs/vkd3d-shader/preproc.l index 4a8d0fddae1..d167415c356 100644 --- a/libs/vkd3d/libs/vkd3d-shader/preproc.l +++ b/libs/vkd3d/libs/vkd3d-shader/preproc.l @@ -20,6 +20,7 @@ %{ +#include "preproc.h" #include "preproc.tab.h" #undef ERROR /* defined in wingdi.h */ diff --git a/libs/vkd3d/libs/vkd3d-shader/spirv.c b/libs/vkd3d/libs/vkd3d-shader/spirv.c index bdfd632ad12..a7b935543a0 100644 --- a/libs/vkd3d/libs/vkd3d-shader/spirv.c +++ b/libs/vkd3d/libs/vkd3d-shader/spirv.c @@ -34,6 +34,32 @@ # include "vulkan/GLSL.std.450.h" #endif /* HAVE_SPIRV_UNIFIED1_GLSL_STD_450_H */ +#define VKD3D_SPIRV_VERSION_1_0 0x00010000 +#define VKD3D_SPIRV_VERSION_1_3 0x00010300 +#define VKD3D_SPIRV_GENERATOR_ID 18 +#define VKD3D_SPIRV_GENERATOR_VERSION 14 +#define VKD3D_SPIRV_GENERATOR_MAGIC vkd3d_make_u32(VKD3D_SPIRV_GENERATOR_VERSION, VKD3D_SPIRV_GENERATOR_ID) +#ifndef VKD3D_SHADER_UNSUPPORTED_SPIRV_PARSER +# define VKD3D_SHADER_UNSUPPORTED_SPIRV_PARSER 0 +#endif + +#define VKD3D_SPIRV_HEADER_SIZE 5 + +#define VKD3D_SPIRV_VERSION_MAJOR_SHIFT 16u +#define VKD3D_SPIRV_VERSION_MAJOR_MASK (0xffu << VKD3D_SPIRV_VERSION_MAJOR_SHIFT) +#define VKD3D_SPIRV_VERSION_MINOR_SHIFT 8u +#define VKD3D_SPIRV_VERSION_MINOR_MASK (0xffu << VKD3D_SPIRV_VERSION_MINOR_SHIFT) + +#define VKD3D_SPIRV_GENERATOR_ID_SHIFT 16u +#define VKD3D_SPIRV_GENERATOR_ID_MASK (0xffffu << VKD3D_SPIRV_GENERATOR_ID_SHIFT) +#define VKD3D_SPIRV_GENERATOR_VERSION_SHIFT 0u +#define VKD3D_SPIRV_GENERATOR_VERSION_MASK (0xffffu << VKD3D_SPIRV_GENERATOR_VERSION_SHIFT) + +#define VKD3D_SPIRV_INSTRUCTION_WORD_COUNT_SHIFT 16u +#define VKD3D_SPIRV_INSTRUCTION_WORD_COUNT_MASK (0xffffu << VKD3D_SPIRV_INSTRUCTION_WORD_COUNT_SHIFT) +#define VKD3D_SPIRV_INSTRUCTION_OP_SHIFT 0u +#define VKD3D_SPIRV_INSTRUCTION_OP_MASK (0xffffu << VKD3D_SPIRV_INSTRUCTION_OP_SHIFT) + #ifdef HAVE_SPIRV_TOOLS # include "spirv-tools/libspirv.h" @@ -82,7 +108,7 @@ static uint32_t get_binary_to_text_options(enum vkd3d_shader_compile_option_form return out; } -static enum vkd3d_result vkd3d_spirv_binary_to_text(const struct vkd3d_shader_code *spirv, +static enum vkd3d_result spirv_tools_binary_to_text(const struct vkd3d_shader_code *spirv, enum vkd3d_shader_spirv_environment environment, enum vkd3d_shader_compile_option_formatting_flags formatting, struct vkd3d_shader_code *out) { @@ -143,20 +169,6 @@ static enum vkd3d_result vkd3d_spirv_binary_to_text(const struct vkd3d_shader_co return result; } -static void vkd3d_spirv_dump(const struct vkd3d_shader_code *spirv, - enum vkd3d_shader_spirv_environment environment) -{ - static const enum vkd3d_shader_compile_option_formatting_flags formatting - = VKD3D_SHADER_COMPILE_OPTION_FORMATTING_INDENT | VKD3D_SHADER_COMPILE_OPTION_FORMATTING_HEADER; - struct vkd3d_shader_code text; - - if (!vkd3d_spirv_binary_to_text(spirv, environment, formatting, &text)) - { - vkd3d_shader_trace_text(text.code, text.size); - vkd3d_shader_free_shader_code(&text); - } -} - static bool vkd3d_spirv_validate(struct vkd3d_string_buffer *buffer, const struct vkd3d_shader_code *spirv, enum vkd3d_shader_spirv_environment environment) { @@ -180,14 +192,13 @@ static bool vkd3d_spirv_validate(struct vkd3d_string_buffer *buffer, const struc #else -static enum vkd3d_result vkd3d_spirv_binary_to_text(const struct vkd3d_shader_code *spirv, +static enum vkd3d_result spirv_tools_binary_to_text(const struct vkd3d_shader_code *spirv, enum vkd3d_shader_spirv_environment environment, enum vkd3d_shader_compile_option_formatting_flags formatting, struct vkd3d_shader_code *out) { return VKD3D_ERROR; } -static void vkd3d_spirv_dump(const struct vkd3d_shader_code *spirv, - enum vkd3d_shader_spirv_environment environment) {} + static bool vkd3d_spirv_validate(struct vkd3d_string_buffer *buffer, const struct vkd3d_shader_code *spirv, enum vkd3d_shader_spirv_environment environment) { @@ -196,6 +207,312 @@ static bool vkd3d_spirv_validate(struct vkd3d_string_buffer *buffer, const struc #endif /* HAVE_SPIRV_TOOLS */ +struct spirv_colours +{ + const char *reset; + const char *comment; +}; + +struct spirv_parser +{ + struct vkd3d_string_buffer_cache string_buffers; + struct vkd3d_shader_location location; + struct vkd3d_shader_message_context *message_context; + enum vkd3d_shader_compile_option_formatting_flags formatting; + struct spirv_colours colours; + bool failed; + + const uint32_t *code; + size_t pos; + size_t size; + + struct vkd3d_string_buffer *text; +}; + +static void VKD3D_PRINTF_FUNC(3, 4) spirv_parser_error(struct spirv_parser *parser, + enum vkd3d_shader_error error, const char *format, ...) +{ + va_list args; + + va_start(args, format); + vkd3d_shader_verror(parser->message_context, &parser->location, error, format, args); + va_end(args); + parser->failed = true; +} + +static uint32_t spirv_parser_read_u32(struct spirv_parser *parser) +{ + if (parser->pos >= parser->size) + { + parser->failed = true; + return 0; + } + + return parser->code[parser->pos++]; +} + +static void VKD3D_PRINTF_FUNC(2, 3) spirv_parser_print_comment(struct spirv_parser *parser, const char *format, ...) +{ + va_list args; + + if (!parser->text) + return; + + va_start(args, format); + vkd3d_string_buffer_printf(parser->text, "%s; ", parser->colours.comment); + vkd3d_string_buffer_vprintf(parser->text, format, args); + vkd3d_string_buffer_printf(parser->text, "%s\n", parser->colours.reset); + va_end(args); +} + +static void spirv_parser_print_generator(struct spirv_parser *parser, uint32_t magic) +{ + unsigned int id, version; + const char *name; + + id = (magic & VKD3D_SPIRV_GENERATOR_ID_MASK) >> VKD3D_SPIRV_GENERATOR_ID_SHIFT; + version = (magic & VKD3D_SPIRV_GENERATOR_VERSION_MASK) >> VKD3D_SPIRV_GENERATOR_VERSION_SHIFT; + + switch (id) + { + case VKD3D_SPIRV_GENERATOR_ID: + name = "Wine VKD3D Shader Compiler"; + break; + + default: + name = NULL; + break; + } + + if (name) + spirv_parser_print_comment(parser, "Generator: %s; %u", name, version); + else + spirv_parser_print_comment(parser, "Generator: Unknown (%#x); %u", id, version); +} + +static enum vkd3d_result spirv_parser_read_header(struct spirv_parser *parser) +{ + uint32_t magic, version, generator, bound, schema; + unsigned int major, minor; + + if (parser->pos > parser->size || parser->size - parser->pos < VKD3D_SPIRV_HEADER_SIZE) + { + spirv_parser_error(parser, VKD3D_SHADER_ERROR_SPV_INVALID_SHADER, + "Unexpected end while reading the SPIR-V header."); + return VKD3D_ERROR_INVALID_SHADER; + } + + magic = spirv_parser_read_u32(parser); + version = spirv_parser_read_u32(parser); + generator = spirv_parser_read_u32(parser); + bound = spirv_parser_read_u32(parser); + schema = spirv_parser_read_u32(parser); + + if (magic != SpvMagicNumber) + { + spirv_parser_error(parser, VKD3D_SHADER_ERROR_SPV_INVALID_SHADER, + "Invalid magic number %#08x.", magic); + return VKD3D_ERROR_INVALID_SHADER; + } + + if (version & ~(VKD3D_SPIRV_VERSION_MAJOR_MASK | VKD3D_SPIRV_VERSION_MINOR_MASK)) + { + spirv_parser_error(parser, VKD3D_SHADER_ERROR_SPV_INVALID_SHADER, + "Invalid version token %#08x.", version); + return VKD3D_ERROR_INVALID_SHADER; + } + + major = (version & VKD3D_SPIRV_VERSION_MAJOR_MASK) >> VKD3D_SPIRV_VERSION_MAJOR_SHIFT; + minor = (version & VKD3D_SPIRV_VERSION_MINOR_MASK) >> VKD3D_SPIRV_VERSION_MINOR_SHIFT; + if (major != 1 || minor > 0) + { + spirv_parser_error(parser, VKD3D_SHADER_ERROR_SPV_NOT_IMPLEMENTED, + "Unable to parse SPIR-V version %u.%u.", major, minor); + return VKD3D_ERROR_NOT_IMPLEMENTED; + } + + if (!bound) + { + spirv_parser_error(parser, VKD3D_SHADER_ERROR_SPV_INVALID_SHADER, + "Invalid zero id bound."); + return VKD3D_ERROR_INVALID_SHADER; + } + + if (schema) + { + spirv_parser_error(parser, VKD3D_SHADER_ERROR_SPV_NOT_IMPLEMENTED, + "Unable to handle instruction schema %#08x.", schema); + return VKD3D_ERROR_NOT_IMPLEMENTED; + } + + if (parser->formatting & VKD3D_SHADER_COMPILE_OPTION_FORMATTING_HEADER) + { + spirv_parser_print_comment(parser, "SPIR-V"); + spirv_parser_print_comment(parser, "Version: %u.%u", major, minor); + spirv_parser_print_generator(parser, generator); + spirv_parser_print_comment(parser, "Bound: %u", bound); + spirv_parser_print_comment(parser, "Schema: %u", schema); + } + + return VKD3D_OK; +} + +static enum vkd3d_result spirv_parser_parse_instruction(struct spirv_parser *parser) +{ + struct vkd3d_string_buffer *buffer; + uint16_t op, count; + unsigned int i; + uint32_t word; + + word = spirv_parser_read_u32(parser); + count = (word & VKD3D_SPIRV_INSTRUCTION_WORD_COUNT_MASK) >> VKD3D_SPIRV_INSTRUCTION_WORD_COUNT_SHIFT; + op = (word & VKD3D_SPIRV_INSTRUCTION_OP_MASK) >> VKD3D_SPIRV_INSTRUCTION_OP_SHIFT; + + if (!count) + { + spirv_parser_error(parser, VKD3D_SHADER_ERROR_SPV_INVALID_SHADER, + "Invalid word count %u.", count); + return VKD3D_ERROR_INVALID_SHADER; + } + + --count; + buffer = vkd3d_string_buffer_get(&parser->string_buffers); + for (i = 0; i < count; ++i) + { + word = spirv_parser_read_u32(parser); + vkd3d_string_buffer_printf(buffer, " 0x%08x", word); + } + spirv_parser_print_comment(parser, "%s", op, buffer->buffer); + vkd3d_string_buffer_release(&parser->string_buffers, buffer); + + spirv_parser_error(parser, VKD3D_SHADER_ERROR_SPV_NOT_IMPLEMENTED, + "Unrecognised instruction %#x.", op); + + return VKD3D_OK; +} + +static enum vkd3d_result spirv_parser_parse(struct spirv_parser *parser, struct vkd3d_shader_code *text) +{ + enum vkd3d_result ret; + + if (text) + parser->text = vkd3d_string_buffer_get(&parser->string_buffers); + + if ((ret = spirv_parser_read_header(parser)) < 0) + goto fail; + while (parser->pos < parser->size) + { + ++parser->location.line; + if ((ret = spirv_parser_parse_instruction(parser)) < 0) + goto fail; + } + + if (parser->failed) + { + ret = VKD3D_ERROR_INVALID_SHADER; + goto fail; + } + + if (text) + vkd3d_shader_code_from_string_buffer(text, parser->text); + + return VKD3D_OK; + +fail: + if (parser->text) + { + if (TRACE_ON()) + vkd3d_string_buffer_trace(parser->text); + vkd3d_string_buffer_release(&parser->string_buffers, parser->text); + } + return ret; +} + +static void spirv_parser_cleanup(struct spirv_parser *parser) +{ + vkd3d_string_buffer_cache_cleanup(&parser->string_buffers); +} + +static enum vkd3d_result spirv_parser_init(struct spirv_parser *parser, const struct vkd3d_shader_code *source, + const char *source_name, enum vkd3d_shader_compile_option_formatting_flags formatting, + struct vkd3d_shader_message_context *message_context) +{ + static const struct spirv_colours no_colours = + { + .reset = "", + .comment = "", + }; + static const struct spirv_colours colours = + { + .reset = "\x1b[m", + .comment = "\x1b[36m", + }; + + memset(parser, 0, sizeof(*parser)); + parser->location.source_name = source_name; + parser->message_context = message_context; + vkd3d_string_buffer_cache_init(&parser->string_buffers); + + if (source->size % 4) + { + vkd3d_string_buffer_cache_cleanup(&parser->string_buffers); + spirv_parser_error(parser, VKD3D_SHADER_ERROR_SPV_INVALID_SHADER, + "Shader size %zu is not a multiple of four.", source->size); + return VKD3D_ERROR_INVALID_SHADER; + } + + parser->formatting = formatting; + if (formatting & VKD3D_SHADER_COMPILE_OPTION_FORMATTING_COLOUR) + parser->colours = colours; + else + parser->colours = no_colours; + parser->code = source->code; + parser->size = source->size / 4; + + return VKD3D_OK; +} + +static enum vkd3d_result vkd3d_spirv_binary_to_text(const struct vkd3d_shader_code *spirv, + const char *source_name, enum vkd3d_shader_spirv_environment environment, + enum vkd3d_shader_compile_option_formatting_flags formatting, + struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context) +{ + struct spirv_parser parser; + enum vkd3d_result ret; + + if (!VKD3D_SHADER_UNSUPPORTED_SPIRV_PARSER) + return spirv_tools_binary_to_text(spirv, environment, formatting, out); + + MESSAGE("Creating a SPIR-V parser. This is unsupported; you get to keep all the pieces if it breaks.\n"); + + if ((ret = spirv_parser_init(&parser, spirv, source_name, formatting, message_context)) < 0) + return ret; + + ret = spirv_parser_parse(&parser, out); + + spirv_parser_cleanup(&parser); + + return ret; +} + +static void vkd3d_spirv_dump(const struct vkd3d_shader_code *spirv, enum vkd3d_shader_spirv_environment environment) +{ + static const enum vkd3d_shader_compile_option_formatting_flags formatting + = VKD3D_SHADER_COMPILE_OPTION_FORMATTING_INDENT | VKD3D_SHADER_COMPILE_OPTION_FORMATTING_HEADER; + struct vkd3d_shader_message_context message_context; + struct vkd3d_shader_code text; + + vkd3d_shader_message_context_init(&message_context, VKD3D_SHADER_LOG_INFO); + + if (!vkd3d_spirv_binary_to_text(spirv, NULL, environment, formatting, &text, &message_context)) + { + vkd3d_shader_trace_text(text.code, text.size); + vkd3d_shader_free_shader_code(&text); + } + + vkd3d_shader_message_context_cleanup(&message_context); +} + enum vkd3d_shader_input_sysval_semantic vkd3d_siv_from_sysval_indexed(enum vkd3d_shader_sysval_semantic sysval, unsigned int index) { @@ -247,12 +564,6 @@ enum vkd3d_shader_input_sysval_semantic vkd3d_siv_from_sysval_indexed(enum vkd3d } } -#define VKD3D_SPIRV_VERSION_1_0 0x00010000 -#define VKD3D_SPIRV_VERSION_1_3 0x00010300 -#define VKD3D_SPIRV_GENERATOR_ID 18 -#define VKD3D_SPIRV_GENERATOR_VERSION 14 -#define VKD3D_SPIRV_GENERATOR_MAGIC vkd3d_make_u32(VKD3D_SPIRV_GENERATOR_VERSION, VKD3D_SPIRV_GENERATOR_ID) - struct vkd3d_spirv_stream { uint32_t *words; @@ -2406,6 +2717,7 @@ struct vkd3d_hull_shader_variables struct ssa_register_info { enum vkd3d_data_type data_type; + uint8_t write_mask; uint32_t id; }; @@ -2471,7 +2783,6 @@ struct spirv_compiler bool emit_point_size; enum vkd3d_shader_opcode phase; - bool emit_default_control_point_phase; struct vkd3d_shader_phase control_point_phase; struct vkd3d_shader_phase patch_constant_phase; @@ -3316,13 +3627,19 @@ static uint32_t spirv_compiler_emit_variable(struct spirv_compiler *compiler, static const struct vkd3d_spec_constant_info { enum vkd3d_shader_parameter_name name; - uint32_t default_value; + union + { + uint32_t u; + float f; + } default_value; const char *debug_name; } vkd3d_shader_parameters[] = { - {VKD3D_SHADER_PARAMETER_NAME_RASTERIZER_SAMPLE_COUNT, 1, "sample_count"}, - {VKD3D_SHADER_PARAMETER_NAME_ALPHA_TEST_REF, 0, "alpha_test_ref"}, + {VKD3D_SHADER_PARAMETER_NAME_RASTERIZER_SAMPLE_COUNT, {.u = 1}, "sample_count"}, + {VKD3D_SHADER_PARAMETER_NAME_ALPHA_TEST_REF, {.f = 0.0f}, "alpha_test_ref"}, + {VKD3D_SHADER_PARAMETER_NAME_FOG_END, {.f = 1.0f}, "fog_end"}, + {VKD3D_SHADER_PARAMETER_NAME_FOG_SCALE, {.f = 1.0f}, "fog_scale"}, }; static const struct vkd3d_spec_constant_info *get_spec_constant_info(enum vkd3d_shader_parameter_name name) @@ -3383,7 +3700,7 @@ static uint32_t spirv_compiler_emit_spec_constant(struct spirv_compiler *compile const struct vkd3d_spec_constant_info *info; info = get_spec_constant_info(name); - default_value = info ? info->default_value : 0; + default_value = info ? info->default_value.u : 0; scalar_type_id = vkd3d_spirv_get_type_id(builder, vkd3d_component_type_from_data_type(type), 1); vector_type_id = vkd3d_spirv_get_type_id(builder, vkd3d_component_type_from_data_type(type), component_count); @@ -3574,6 +3891,24 @@ static bool spirv_compiler_get_register_info(struct spirv_compiler *compiler, register_info->is_aggregate = false; return true; } + else if (reg->type == VKD3DSPR_SSA) + { + const struct ssa_register_info *ssa = &compiler->ssa_register_info[reg->idx[0].offset]; + + if (!ssa->id) + { + /* Should only be from a missing instruction implementation. */ + VKD3D_ASSERT(compiler->failed); + return 0; + } + + memset(register_info, 0, sizeof(*register_info)); + register_info->id = ssa->id; + register_info->storage_class = SpvStorageClassMax; + register_info->component_type = vkd3d_component_type_from_data_type(ssa->data_type); + register_info->write_mask = ssa->write_mask; + return true; + } vkd3d_symbol_make_register(®_symbol, reg); if (!(entry = rb_get(&compiler->symbol_table, ®_symbol))) @@ -4181,67 +4516,14 @@ static uint32_t spirv_compiler_emit_constant_array(struct spirv_compiler *compil return const_id; } -static const struct ssa_register_info *spirv_compiler_get_ssa_register_info(const struct spirv_compiler *compiler, - const struct vkd3d_shader_register *reg) -{ - VKD3D_ASSERT(reg->idx[0].offset < compiler->ssa_register_count); - VKD3D_ASSERT(reg->idx_count == 1); - return &compiler->ssa_register_info[reg->idx[0].offset]; -} - static void spirv_compiler_set_ssa_register_info(const struct spirv_compiler *compiler, - const struct vkd3d_shader_register *reg, uint32_t val_id) + const struct vkd3d_shader_register *reg, uint32_t write_mask, uint32_t val_id) { unsigned int i = reg->idx[0].offset; VKD3D_ASSERT(i < compiler->ssa_register_count); compiler->ssa_register_info[i].data_type = reg->data_type; compiler->ssa_register_info[i].id = val_id; -} - -static uint32_t spirv_compiler_emit_load_ssa_reg(struct spirv_compiler *compiler, - const struct vkd3d_shader_register *reg, enum vkd3d_shader_component_type component_type, - uint32_t swizzle) -{ - struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; - enum vkd3d_shader_component_type reg_component_type; - const struct ssa_register_info *ssa; - unsigned int component_idx; - uint32_t type_id, val_id; - - ssa = spirv_compiler_get_ssa_register_info(compiler, reg); - val_id = ssa->id; - if (!val_id) - { - /* Should only be from a missing instruction implementation. */ - VKD3D_ASSERT(compiler->failed); - return 0; - } - VKD3D_ASSERT(vkd3d_swizzle_is_scalar(swizzle, reg)); - - reg_component_type = vkd3d_component_type_from_data_type(ssa->data_type); - - if (reg->dimension == VSIR_DIMENSION_SCALAR) - { - if (component_type != reg_component_type) - { - type_id = vkd3d_spirv_get_type_id(builder, component_type, 1); - val_id = vkd3d_spirv_build_op_bitcast(builder, type_id, val_id); - } - - return val_id; - } - - if (component_type != reg_component_type) - { - /* Required for resource loads with sampled type int, because DXIL has no signedness. - * Only 128-bit vector sizes are used. */ - type_id = vkd3d_spirv_get_type_id(builder, component_type, VKD3D_VEC4_SIZE); - val_id = vkd3d_spirv_build_op_bitcast(builder, type_id, val_id); - } - - type_id = vkd3d_spirv_get_type_id(builder, component_type, 1); - component_idx = vsir_swizzle_get_component(swizzle, 0); - return vkd3d_spirv_build_op_composite_extract1(builder, type_id, val_id, component_idx); + compiler->ssa_register_info[i].write_mask = write_mask; } static uint32_t spirv_compiler_emit_load_reg(struct spirv_compiler *compiler, @@ -4267,9 +4549,6 @@ static uint32_t spirv_compiler_emit_load_reg(struct spirv_compiler *compiler, component_count = vsir_write_mask_component_count(write_mask); component_type = vkd3d_component_type_from_data_type(reg->data_type); - if (reg->type == VKD3DSPR_SSA) - return spirv_compiler_emit_load_ssa_reg(compiler, reg, component_type, swizzle); - if (!spirv_compiler_get_register_info(compiler, reg, ®_info)) { type_id = vkd3d_spirv_get_type_id(builder, component_type, component_count); @@ -4294,9 +4573,9 @@ static uint32_t spirv_compiler_emit_load_reg(struct spirv_compiler *compiler, type_id = vkd3d_spirv_get_type_id(builder, reg_info.component_type, vsir_write_mask_component_count(reg_info.write_mask)); val_id = vkd3d_spirv_build_op_load(builder, type_id, reg_info.id, SpvMemoryAccessMaskNone); + swizzle = data_type_is_64_bit(reg->data_type) ? vsir_swizzle_32_from_64(swizzle) : swizzle; } - swizzle = data_type_is_64_bit(reg->data_type) ? vsir_swizzle_32_from_64(swizzle) : swizzle; val_id = spirv_compiler_emit_swizzle(compiler, val_id, reg_info.write_mask, reg_info.component_type, swizzle, val_write_mask); @@ -4497,7 +4776,7 @@ static void spirv_compiler_emit_store_reg(struct spirv_compiler *compiler, if (reg->type == VKD3DSPR_SSA) { - spirv_compiler_set_ssa_register_info(compiler, reg, val_id); + spirv_compiler_set_ssa_register_info(compiler, reg, write_mask, val_id); return; } @@ -4883,35 +5162,36 @@ static const struct vkd3d_spirv_builtin vkd3d_output_point_size_builtin = static const struct { enum vkd3d_shader_register_type reg_type; + SpvStorageClass storage_class; struct vkd3d_spirv_builtin builtin; } vkd3d_register_builtins[] = { - {VKD3DSPR_THREADID, {VKD3D_SHADER_COMPONENT_INT, 3, SpvBuiltInGlobalInvocationId}}, - {VKD3DSPR_LOCALTHREADID, {VKD3D_SHADER_COMPONENT_INT, 3, SpvBuiltInLocalInvocationId}}, - {VKD3DSPR_LOCALTHREADINDEX, {VKD3D_SHADER_COMPONENT_INT, 1, SpvBuiltInLocalInvocationIndex}}, - {VKD3DSPR_THREADGROUPID, {VKD3D_SHADER_COMPONENT_INT, 3, SpvBuiltInWorkgroupId}}, + {VKD3DSPR_THREADID, SpvStorageClassInput, {VKD3D_SHADER_COMPONENT_INT, 3, SpvBuiltInGlobalInvocationId}}, + {VKD3DSPR_LOCALTHREADID, SpvStorageClassInput, {VKD3D_SHADER_COMPONENT_INT, 3, SpvBuiltInLocalInvocationId}}, + {VKD3DSPR_LOCALTHREADINDEX, SpvStorageClassInput, {VKD3D_SHADER_COMPONENT_INT, 1, SpvBuiltInLocalInvocationIndex}}, + {VKD3DSPR_THREADGROUPID, SpvStorageClassInput, {VKD3D_SHADER_COMPONENT_INT, 3, SpvBuiltInWorkgroupId}}, - {VKD3DSPR_GSINSTID, {VKD3D_SHADER_COMPONENT_INT, 1, SpvBuiltInInvocationId}}, - {VKD3DSPR_OUTPOINTID, {VKD3D_SHADER_COMPONENT_INT, 1, SpvBuiltInInvocationId}}, + {VKD3DSPR_GSINSTID, SpvStorageClassInput, {VKD3D_SHADER_COMPONENT_INT, 1, SpvBuiltInInvocationId}}, + {VKD3DSPR_OUTPOINTID, SpvStorageClassInput, {VKD3D_SHADER_COMPONENT_INT, 1, SpvBuiltInInvocationId}}, - {VKD3DSPR_PRIMID, {VKD3D_SHADER_COMPONENT_INT, 1, SpvBuiltInPrimitiveId}}, + {VKD3DSPR_PRIMID, SpvStorageClassInput, {VKD3D_SHADER_COMPONENT_INT, 1, SpvBuiltInPrimitiveId}}, - {VKD3DSPR_TESSCOORD, {VKD3D_SHADER_COMPONENT_FLOAT, 3, SpvBuiltInTessCoord}}, + {VKD3DSPR_TESSCOORD, SpvStorageClassInput, {VKD3D_SHADER_COMPONENT_FLOAT, 3, SpvBuiltInTessCoord}}, - {VKD3DSPR_POINT_COORD, {VKD3D_SHADER_COMPONENT_FLOAT, 2, SpvBuiltInPointCoord}}, + {VKD3DSPR_POINT_COORD, SpvStorageClassInput, {VKD3D_SHADER_COMPONENT_FLOAT, 2, SpvBuiltInPointCoord}}, - {VKD3DSPR_COVERAGE, {VKD3D_SHADER_COMPONENT_UINT, 1, SpvBuiltInSampleMask, NULL, 1}}, - {VKD3DSPR_SAMPLEMASK, {VKD3D_SHADER_COMPONENT_UINT, 1, SpvBuiltInSampleMask, NULL, 1}}, + {VKD3DSPR_COVERAGE, SpvStorageClassInput, {VKD3D_SHADER_COMPONENT_UINT, 1, SpvBuiltInSampleMask, NULL, 1}}, + {VKD3DSPR_SAMPLEMASK, SpvStorageClassOutput, {VKD3D_SHADER_COMPONENT_UINT, 1, SpvBuiltInSampleMask, NULL, 1}}, - {VKD3DSPR_DEPTHOUT, {VKD3D_SHADER_COMPONENT_FLOAT, 1, SpvBuiltInFragDepth}}, - {VKD3DSPR_DEPTHOUTGE, {VKD3D_SHADER_COMPONENT_FLOAT, 1, SpvBuiltInFragDepth}}, - {VKD3DSPR_DEPTHOUTLE, {VKD3D_SHADER_COMPONENT_FLOAT, 1, SpvBuiltInFragDepth}}, + {VKD3DSPR_DEPTHOUT, SpvStorageClassOutput, {VKD3D_SHADER_COMPONENT_FLOAT, 1, SpvBuiltInFragDepth}}, + {VKD3DSPR_DEPTHOUTGE, SpvStorageClassOutput, {VKD3D_SHADER_COMPONENT_FLOAT, 1, SpvBuiltInFragDepth}}, + {VKD3DSPR_DEPTHOUTLE, SpvStorageClassOutput, {VKD3D_SHADER_COMPONENT_FLOAT, 1, SpvBuiltInFragDepth}}, - {VKD3DSPR_OUTSTENCILREF, {VKD3D_SHADER_COMPONENT_UINT, 1, SpvBuiltInFragStencilRefEXT}}, + {VKD3DSPR_OUTSTENCILREF, SpvStorageClassOutput, {VKD3D_SHADER_COMPONENT_UINT, 1, SpvBuiltInFragStencilRefEXT}}, - {VKD3DSPR_WAVELANECOUNT, {VKD3D_SHADER_COMPONENT_UINT, 1, SpvBuiltInSubgroupSize}}, - {VKD3DSPR_WAVELANEINDEX, {VKD3D_SHADER_COMPONENT_UINT, 1, SpvBuiltInSubgroupLocalInvocationId}}, + {VKD3DSPR_WAVELANECOUNT, SpvStorageClassInput, {VKD3D_SHADER_COMPONENT_UINT, 1, SpvBuiltInSubgroupSize}}, + {VKD3DSPR_WAVELANEINDEX, SpvStorageClassInput, {VKD3D_SHADER_COMPONENT_UINT, 1, SpvBuiltInSubgroupLocalInvocationId}}, }; static void spirv_compiler_emit_register_execution_mode(struct spirv_compiler *compiler, @@ -4970,14 +5250,18 @@ static const struct vkd3d_spirv_builtin *get_spirv_builtin_for_sysval( } static const struct vkd3d_spirv_builtin *get_spirv_builtin_for_register( - enum vkd3d_shader_register_type reg_type) + enum vkd3d_shader_register_type reg_type, SpvStorageClass *storage_class) { unsigned int i; for (i = 0; i < ARRAY_SIZE(vkd3d_register_builtins); ++i) { if (vkd3d_register_builtins[i].reg_type == reg_type) + { + if (storage_class) + *storage_class = vkd3d_register_builtins[i].storage_class; return &vkd3d_register_builtins[i].builtin; + } } return NULL; @@ -4990,7 +5274,7 @@ static const struct vkd3d_spirv_builtin *vkd3d_get_spirv_builtin(const struct sp if ((builtin = get_spirv_builtin_for_sysval(compiler, sysval))) return builtin; - if ((builtin = get_spirv_builtin_for_register(reg_type))) + if ((builtin = get_spirv_builtin_for_register(reg_type, NULL))) return builtin; if ((sysval != VKD3D_SHADER_SV_NONE && sysval != VKD3D_SHADER_SV_TARGET) @@ -5322,21 +5606,26 @@ static uint32_t spirv_compiler_emit_input(struct spirv_compiler *compiler, return input_id; } -static void spirv_compiler_emit_input_register(struct spirv_compiler *compiler, +static void spirv_compiler_emit_io_register(struct spirv_compiler *compiler, const struct vkd3d_shader_dst_param *dst) { struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; const struct vkd3d_shader_register *reg = &dst->reg; const struct vkd3d_spirv_builtin *builtin; struct vkd3d_symbol reg_symbol; + SpvStorageClass storage_class; + uint32_t write_mask, id; struct rb_entry *entry; - uint32_t write_mask; - uint32_t input_id; VKD3D_ASSERT(!reg->idx_count || !reg->idx[0].rel_addr); VKD3D_ASSERT(reg->idx_count < 2); - if (!(builtin = get_spirv_builtin_for_register(reg->type))) + if (reg->type == VKD3DSPR_RASTOUT && reg->idx[0].offset == VSIR_RASTOUT_POINT_SIZE) + { + builtin = &vkd3d_output_point_size_builtin; + storage_class = SpvStorageClassOutput; + } + else if (!(builtin = get_spirv_builtin_for_register(reg->type, &storage_class))) { FIXME("Unhandled register %#x.\n", reg->type); return; @@ -5347,14 +5636,15 @@ static void spirv_compiler_emit_input_register(struct spirv_compiler *compiler, if ((entry = rb_get(&compiler->symbol_table, ®_symbol))) return; - input_id = spirv_compiler_emit_builtin_variable(compiler, builtin, SpvStorageClassInput, 0); + id = spirv_compiler_emit_builtin_variable(compiler, builtin, storage_class, 0); write_mask = vkd3d_write_mask_from_component_count(builtin->component_count); - vkd3d_symbol_set_register_info(®_symbol, input_id, - SpvStorageClassInput, builtin->component_type, write_mask); + vkd3d_symbol_set_register_info(®_symbol, id, + storage_class, builtin->component_type, write_mask); reg_symbol.info.reg.is_aggregate = builtin->spirv_array_size; spirv_compiler_put_symbol(compiler, ®_symbol); - spirv_compiler_emit_register_debug_name(builder, input_id, reg); + spirv_compiler_emit_register_execution_mode(compiler, reg->type); + spirv_compiler_emit_register_debug_name(builder, id, reg); } static unsigned int get_shader_output_swizzle(const struct spirv_compiler *compiler, @@ -5458,41 +5748,6 @@ static void spirv_compiler_emit_shader_signature_outputs(struct spirv_compiler * } } -static void spirv_compiler_emit_output_register(struct spirv_compiler *compiler, - const struct vkd3d_shader_dst_param *dst) -{ - struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; - const struct vkd3d_shader_register *reg = &dst->reg; - const struct vkd3d_spirv_builtin *builtin; - struct vkd3d_symbol reg_symbol; - uint32_t write_mask; - uint32_t output_id; - - VKD3D_ASSERT(!reg->idx_count || !reg->idx[0].rel_addr); - VKD3D_ASSERT(reg->idx_count < 2); - - if (reg->type == VKD3DSPR_RASTOUT && reg->idx[0].offset == VSIR_RASTOUT_POINT_SIZE) - { - builtin = &vkd3d_output_point_size_builtin; - } - else if (!(builtin = get_spirv_builtin_for_register(reg->type))) - { - FIXME("Unhandled register %#x.\n", reg->type); - return; - } - - output_id = spirv_compiler_emit_builtin_variable(compiler, builtin, SpvStorageClassOutput, 0); - - vkd3d_symbol_make_register(®_symbol, reg); - write_mask = vkd3d_write_mask_from_component_count(builtin->component_count); - vkd3d_symbol_set_register_info(®_symbol, output_id, - SpvStorageClassOutput, builtin->component_type, write_mask); - reg_symbol.info.reg.is_aggregate = builtin->spirv_array_size; - spirv_compiler_put_symbol(compiler, ®_symbol); - spirv_compiler_emit_register_execution_mode(compiler, reg->type); - spirv_compiler_emit_register_debug_name(builder, output_id, reg); -} - static uint32_t spirv_compiler_emit_shader_phase_builtin_variable(struct spirv_compiler *compiler, const struct vkd3d_spirv_builtin *builtin, const unsigned int *array_sizes, unsigned int size_count) { @@ -5857,16 +6112,6 @@ static void spirv_compiler_emit_shader_epilogue_function(struct spirv_compiler * compiler->epilogue_function_id = 0; } -static void spirv_compiler_emit_hull_shader_builtins(struct spirv_compiler *compiler) -{ - struct vkd3d_shader_dst_param dst; - - memset(&dst, 0, sizeof(dst)); - vsir_register_init(&dst.reg, VKD3DSPR_OUTPOINTID, VKD3D_DATA_FLOAT, 0); - dst.write_mask = VKD3DSP_WRITEMASK_0; - spirv_compiler_emit_input_register(compiler, &dst); -} - static void spirv_compiler_emit_initial_declarations(struct spirv_compiler *compiler) { const struct vkd3d_shader_transform_feedback_info *xfb_info = compiler->xfb_info; @@ -5879,7 +6124,6 @@ static void spirv_compiler_emit_initial_declarations(struct spirv_compiler *comp break; case VKD3D_SHADER_TYPE_HULL: vkd3d_spirv_set_execution_model(builder, SpvExecutionModelTessellationControl); - spirv_compiler_emit_hull_shader_builtins(compiler); break; case VKD3D_SHADER_TYPE_DOMAIN: vkd3d_spirv_set_execution_model(builder, SpvExecutionModelTessellationEvaluation); @@ -6699,27 +6943,6 @@ static void spirv_compiler_emit_dcl_tgsm_structured(struct spirv_compiler *compi tgsm_structured->structure_count * stride, stride, tgsm_structured->zero_init); } -static void spirv_compiler_emit_dcl_input(struct spirv_compiler *compiler, - const struct vkd3d_shader_instruction *instruction) -{ - const struct vkd3d_shader_dst_param *dst = &instruction->declaration.dst; - - /* INPUT and PATCHCONST are handled in spirv_compiler_emit_io_declarations(). - * OUTPOINTID is handled in spirv_compiler_emit_hull_shader_builtins(). */ - if (dst->reg.type != VKD3DSPR_INPUT && dst->reg.type != VKD3DSPR_PATCHCONST - && dst->reg.type != VKD3DSPR_OUTPOINTID) - spirv_compiler_emit_input_register(compiler, dst); -} - -static void spirv_compiler_emit_dcl_output(struct spirv_compiler *compiler, - const struct vkd3d_shader_instruction *instruction) -{ - const struct vkd3d_shader_dst_param *dst = &instruction->declaration.dst; - - if (dst->reg.type != VKD3DSPR_OUTPUT && dst->reg.type != VKD3DSPR_PATCHCONST) - spirv_compiler_emit_output_register(compiler, dst); -} - static void spirv_compiler_emit_dcl_stream(struct spirv_compiler *compiler, const struct vkd3d_shader_instruction *instruction) { @@ -6822,15 +7045,11 @@ static void spirv_compiler_emit_dcl_gs_instances(struct spirv_compiler *compiler compiler->spirv_builder.invocation_count = instruction->declaration.count; } -static void spirv_compiler_emit_dcl_tessellator_domain(struct spirv_compiler *compiler, - const struct vkd3d_shader_instruction *instruction) +static void spirv_compiler_emit_tessellator_domain(struct spirv_compiler *compiler, + enum vkd3d_tessellator_domain domain) { - enum vkd3d_tessellator_domain domain = instruction->declaration.tessellator_domain; SpvExecutionMode mode; - if (compiler->shader_type == VKD3D_SHADER_TYPE_HULL && spirv_compiler_is_opengl_target(compiler)) - return; - switch (domain) { case VKD3D_TESSELLATOR_DOMAIN_LINE: @@ -6916,15 +7135,10 @@ static void spirv_compiler_emit_thread_group_size(struct spirv_compiler *compile SpvExecutionModeLocalSize, local_size, ARRAY_SIZE(local_size)); } -static void spirv_compiler_emit_default_control_point_phase(struct spirv_compiler *compiler); - static void spirv_compiler_leave_shader_phase(struct spirv_compiler *compiler) { struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; - if (is_in_control_point_phase(compiler) && compiler->emit_default_control_point_phase) - spirv_compiler_emit_default_control_point_phase(compiler); - vkd3d_spirv_build_op_function_end(builder); if (is_in_control_point_phase(compiler)) @@ -6969,9 +7183,6 @@ static void spirv_compiler_enter_shader_phase(struct spirv_compiler *compiler, phase->function_id = function_id; /* The insertion location must be set after the label is emitted. */ phase->function_location = 0; - - if (instruction->opcode == VKD3DSIH_HS_CONTROL_POINT_PHASE) - compiler->emit_default_control_point_phase = instruction->flags; } static void spirv_compiler_initialise_block(struct spirv_compiler *compiler) @@ -7000,63 +7211,6 @@ static void spirv_compiler_initialise_block(struct spirv_compiler *compiler) } } -static void spirv_compiler_emit_default_control_point_phase(struct spirv_compiler *compiler) -{ - const struct shader_signature *output_signature = &compiler->output_signature; - const struct shader_signature *input_signature = &compiler->input_signature; - uint32_t type_id, output_ptr_type_id, input_id, dst_id, invocation_id; - struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; - enum vkd3d_shader_component_type component_type; - struct vkd3d_shader_src_param invocation; - struct vkd3d_shader_register input_reg; - unsigned int component_count; - unsigned int i; - - vkd3d_spirv_build_op_label(builder, vkd3d_spirv_alloc_id(builder)); - spirv_compiler_initialise_block(compiler); - invocation_id = spirv_compiler_emit_load_invocation_id(compiler); - - memset(&invocation, 0, sizeof(invocation)); - vsir_register_init(&invocation.reg, VKD3DSPR_OUTPOINTID, VKD3D_DATA_INT, 0); - invocation.swizzle = VKD3D_SHADER_NO_SWIZZLE; - - vsir_register_init(&input_reg, VKD3DSPR_INPUT, VKD3D_DATA_FLOAT, 2); - input_reg.idx[0].offset = 0; - input_reg.idx[0].rel_addr = &invocation; - input_reg.idx[1].offset = 0; - input_id = spirv_compiler_get_register_id(compiler, &input_reg); - - VKD3D_ASSERT(input_signature->element_count == output_signature->element_count); - for (i = 0; i < output_signature->element_count; ++i) - { - const struct signature_element *output = &output_signature->elements[i]; - const struct signature_element *input = &input_signature->elements[i]; - struct vkd3d_shader_register_info output_reg_info; - struct vkd3d_shader_register output_reg; - - VKD3D_ASSERT(input->mask == output->mask); - VKD3D_ASSERT(input->component_type == output->component_type); - - input_reg.idx[1].offset = i; - input_id = spirv_compiler_get_register_id(compiler, &input_reg); - - vsir_register_init(&output_reg, VKD3DSPR_OUTPUT, VKD3D_DATA_FLOAT, 1); - output_reg.idx[0].offset = i; - spirv_compiler_get_register_info(compiler, &output_reg, &output_reg_info); - - component_type = output->component_type; - component_count = vsir_write_mask_component_count(output->mask); - type_id = vkd3d_spirv_get_type_id(builder, component_type, component_count); - output_ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, SpvStorageClassOutput, type_id); - - dst_id = vkd3d_spirv_build_op_access_chain1(builder, output_ptr_type_id, output_reg_info.id, invocation_id); - - vkd3d_spirv_build_op_copy_memory(builder, dst_id, input_id, SpvMemoryAccessMaskNone); - } - - vkd3d_spirv_build_op_return(builder); -} - static void spirv_compiler_emit_barrier(struct spirv_compiler *compiler, SpvScope execution_scope, SpvScope memory_scope, SpvMemorySemanticsMask semantics) { @@ -7501,7 +7655,7 @@ static void spirv_compiler_emit_mov(struct spirv_compiler *compiler, general_implementation: write_mask = dst->write_mask; - if (src->reg.type == VKD3DSPR_IMMCONST64 && !data_type_is_64_bit(dst->reg.data_type)) + if (data_type_is_64_bit(src->reg.data_type) && !data_type_is_64_bit(dst->reg.data_type)) write_mask = vsir_write_mask_64_from_32(write_mask); else if (!data_type_is_64_bit(src->reg.data_type) && data_type_is_64_bit(dst->reg.data_type)) write_mask = vsir_write_mask_32_from_64(write_mask); @@ -10214,13 +10368,6 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, case VKD3DSIH_DCL_TGSM_STRUCTURED: spirv_compiler_emit_dcl_tgsm_structured(compiler, instruction); break; - case VKD3DSIH_DCL_INPUT_PS: - case VKD3DSIH_DCL_INPUT: - spirv_compiler_emit_dcl_input(compiler, instruction); - break; - case VKD3DSIH_DCL_OUTPUT: - spirv_compiler_emit_dcl_output(compiler, instruction); - break; case VKD3DSIH_DCL_STREAM: spirv_compiler_emit_dcl_stream(compiler, instruction); break; @@ -10239,9 +10386,6 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, case VKD3DSIH_DCL_OUTPUT_CONTROL_POINT_COUNT: spirv_compiler_emit_output_vertex_count(compiler, instruction); break; - case VKD3DSIH_DCL_TESSELLATOR_DOMAIN: - spirv_compiler_emit_dcl_tessellator_domain(compiler, instruction); - break; case VKD3DSIH_DCL_TESSELLATOR_OUTPUT_PRIMITIVE: spirv_compiler_emit_tessellator_output_primitive(compiler, instruction->declaration.tessellator_output_primitive); @@ -10561,11 +10705,6 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, break; case VKD3DSIH_DCL_HS_MAX_TESSFACTOR: case VKD3DSIH_DCL_INPUT_CONTROL_POINT_COUNT: - case VKD3DSIH_DCL_INPUT_SGV: - case VKD3DSIH_DCL_INPUT_SIV: - case VKD3DSIH_DCL_INPUT_PS_SGV: - case VKD3DSIH_DCL_INPUT_PS_SIV: - case VKD3DSIH_DCL_OUTPUT_SIV: case VKD3DSIH_DCL_RESOURCE_RAW: case VKD3DSIH_DCL_RESOURCE_STRUCTURED: case VKD3DSIH_DCL_UAV_RAW: @@ -10586,6 +10725,8 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, static void spirv_compiler_emit_io_declarations(struct spirv_compiler *compiler) { + struct vkd3d_shader_dst_param dst; + for (unsigned int i = 0; i < compiler->input_signature.element_count; ++i) spirv_compiler_emit_input(compiler, VKD3DSPR_INPUT, i); @@ -10609,19 +10750,27 @@ static void spirv_compiler_emit_io_declarations(struct spirv_compiler *compiler) if (compiler->program->has_point_size) { - struct vkd3d_shader_dst_param dst; - vsir_dst_param_init(&dst, VKD3DSPR_RASTOUT, VKD3D_DATA_FLOAT, 1); dst.reg.idx[0].offset = VSIR_RASTOUT_POINT_SIZE; - spirv_compiler_emit_output_register(compiler, &dst); + spirv_compiler_emit_io_register(compiler, &dst); } if (compiler->program->has_point_coord) { - struct vkd3d_shader_dst_param dst; - vsir_dst_param_init(&dst, VKD3DSPR_POINT_COORD, VKD3D_DATA_FLOAT, 0); - spirv_compiler_emit_input_register(compiler, &dst); + spirv_compiler_emit_io_register(compiler, &dst); + } + + for (unsigned int i = 0; i < sizeof(compiler->program->io_dcls) * CHAR_BIT; ++i) + { + /* For hull shaders we internally generate references to OUTPOINTID, + * so that must always be enabled. */ + if (bitmap_is_set(compiler->program->io_dcls, i) + || (compiler->program->shader_version.type == VKD3D_SHADER_TYPE_HULL && i == VKD3DSPR_OUTPOINTID)) + { + vsir_dst_param_init(&dst, i, VKD3D_DATA_FLOAT, 0); + spirv_compiler_emit_io_register(compiler, &dst); + } } } @@ -10677,7 +10826,7 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, struct compile_info, compiler->message_context)) < 0) return result; - VKD3D_ASSERT(program->normalisation_level == VSIR_FULLY_NORMALISED_IO); + VKD3D_ASSERT(program->normalisation_level == VSIR_NORMALISED_SM6); max_element_count = max(program->output_signature.element_count, program->patch_constant_signature.element_count); if (!(compiler->output_info = vkd3d_calloc(max_element_count, sizeof(*compiler->output_info)))) @@ -10743,6 +10892,9 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, struct compiler->input_control_point_count = program->input_control_point_count; compiler->output_control_point_count = program->output_control_point_count; + if (program->shader_version.type == VKD3D_SHADER_TYPE_HULL && !spirv_compiler_is_opengl_target(compiler)) + spirv_compiler_emit_tessellator_domain(compiler, program->tess_domain); + if (compiler->shader_type != VKD3D_SHADER_TYPE_HULL) spirv_compiler_emit_shader_signature_outputs(compiler); @@ -10823,7 +10975,8 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, struct if (compile_info->target_type == VKD3D_SHADER_TARGET_SPIRV_TEXT) { struct vkd3d_shader_code text; - if (vkd3d_spirv_binary_to_text(spirv, environment, compiler->formatting, &text) != VKD3D_OK) + if (vkd3d_spirv_binary_to_text(spirv, compile_info->source_name, environment, + compiler->formatting, &text, compiler->message_context) != VKD3D_OK) return VKD3D_ERROR; vkd3d_shader_free_shader_code(spirv); *spirv = text; diff --git a/libs/vkd3d/libs/vkd3d-shader/tpf.c b/libs/vkd3d/libs/vkd3d-shader/tpf.c index 9c41e2c2053..872603052ac 100644 --- a/libs/vkd3d/libs/vkd3d-shader/tpf.c +++ b/libs/vkd3d/libs/vkd3d-shader/tpf.c @@ -166,21 +166,6 @@ STATIC_ASSERT(SM4_MAX_SRC_COUNT <= SPIRV_MAX_SRC_COUNT); /* The shift that corresponds to the D3D_SIF_TEXTURE_COMPONENTS mask. */ #define VKD3D_SM4_SIF_TEXTURE_COMPONENTS_SHIFT 2 -#define VKD3D_SM4_REQUIRES_DOUBLES 0x00000001 -#define VKD3D_SM4_REQUIRES_EARLY_DEPTH_STENCIL 0x00000002 -#define VKD3D_SM4_REQUIRES_UAVS_AT_EVERY_STAGE 0x00000004 -#define VKD3D_SM4_REQUIRES_64_UAVS 0x00000008 -#define VKD3D_SM4_REQUIRES_MINIMUM_PRECISION 0x00000010 -#define VKD3D_SM4_REQUIRES_11_1_DOUBLE_EXTENSIONS 0x00000020 -#define VKD3D_SM4_REQUIRES_11_1_SHADER_EXTENSIONS 0x00000040 -#define VKD3D_SM4_REQUIRES_LEVEL_9_COMPARISON_FILTERING 0x00000080 -#define VKD3D_SM4_REQUIRES_TILED_RESOURCES 0x00000100 -#define VKD3D_SM4_REQUIRES_STENCIL_REF 0x00000200 -#define VKD3D_SM4_REQUIRES_INNER_COVERAGE 0x00000400 -#define VKD3D_SM4_REQUIRES_TYPED_UAV_LOAD_ADDITIONAL_FORMATS 0x00000800 -#define VKD3D_SM4_REQUIRES_ROVS 0x00001000 -#define VKD3D_SM4_REQUIRES_VIEWPORT_AND_RT_ARRAY_INDEX_FROM_ANY_SHADER_FEEDING_RASTERIZER 0x00002000 - enum vkd3d_sm4_opcode { VKD3D_SM4_OP_ADD = 0x00, @@ -707,6 +692,7 @@ struct vkd3d_sm4_opcode_info char src_info[SM4_MAX_SRC_COUNT]; void (*read_opcode_func)(struct vkd3d_shader_instruction *ins, uint32_t opcode, uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv); + bool is_conditional_op; }; static const enum vkd3d_primitive_type output_primitive_type_table[] = @@ -1268,6 +1254,7 @@ static void shader_sm5_read_dcl_tessellator_domain(struct vkd3d_shader_instructi { ins->declaration.tessellator_domain = (opcode_token & VKD3D_SM5_TESSELLATOR_MASK) >> VKD3D_SM5_TESSELLATOR_SHIFT; + priv->p.program->tess_domain = ins->declaration.tessellator_domain; } static void shader_sm5_read_dcl_tessellator_partitioning(struct vkd3d_shader_instruction *ins, uint32_t opcode, @@ -1275,6 +1262,7 @@ static void shader_sm5_read_dcl_tessellator_partitioning(struct vkd3d_shader_ins { ins->declaration.tessellator_partitioning = (opcode_token & VKD3D_SM5_TESSELLATOR_MASK) >> VKD3D_SM5_TESSELLATOR_SHIFT; + priv->p.program->tess_partitioning = ins->declaration.tessellator_partitioning; } static void shader_sm5_read_dcl_tessellator_output_primitive(struct vkd3d_shader_instruction *ins, uint32_t opcode, @@ -1282,6 +1270,7 @@ static void shader_sm5_read_dcl_tessellator_output_primitive(struct vkd3d_shader { ins->declaration.tessellator_output_primitive = (opcode_token & VKD3D_SM5_TESSELLATOR_MASK) >> VKD3D_SM5_TESSELLATOR_SHIFT; + priv->p.program->tess_output_primitive = ins->declaration.tessellator_output_primitive; } static void shader_sm5_read_dcl_hs_max_tessfactor(struct vkd3d_shader_instruction *ins, uint32_t opcode, @@ -1407,8 +1396,6 @@ struct sm4_stat struct tpf_compiler { - /* OBJECTIVE: We want to get rid of this HLSL IR specific field. */ - struct hlsl_ctx *ctx; struct vsir_program *program; struct vkd3d_sm4_lookup_tables lookup; struct sm4_stat *stat; @@ -1439,18 +1426,18 @@ static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup) {VKD3D_SM4_OP_AND, VKD3DSIH_AND, "u", "uu"}, {VKD3D_SM4_OP_BREAK, VKD3DSIH_BREAK, "", ""}, {VKD3D_SM4_OP_BREAKC, VKD3DSIH_BREAKP, "", "u", - shader_sm4_read_conditional_op}, + shader_sm4_read_conditional_op, true}, {VKD3D_SM4_OP_CASE, VKD3DSIH_CASE, "", "u", shader_sm4_read_case_condition}, {VKD3D_SM4_OP_CONTINUE, VKD3DSIH_CONTINUE, "", ""}, {VKD3D_SM4_OP_CONTINUEC, VKD3DSIH_CONTINUEP, "", "u", - shader_sm4_read_conditional_op}, + shader_sm4_read_conditional_op, true}, {VKD3D_SM4_OP_CUT, VKD3DSIH_CUT, "", ""}, {VKD3D_SM4_OP_DEFAULT, VKD3DSIH_DEFAULT, "", ""}, {VKD3D_SM4_OP_DERIV_RTX, VKD3DSIH_DSX, "f", "f"}, {VKD3D_SM4_OP_DERIV_RTY, VKD3DSIH_DSY, "f", "f"}, {VKD3D_SM4_OP_DISCARD, VKD3DSIH_DISCARD, "", "u", - shader_sm4_read_conditional_op}, + shader_sm4_read_conditional_op, true}, {VKD3D_SM4_OP_DIV, VKD3DSIH_DIV, "f", "ff"}, {VKD3D_SM4_OP_DP2, VKD3DSIH_DP2, "f", "ff"}, {VKD3D_SM4_OP_DP3, VKD3DSIH_DP3, "f", "ff"}, @@ -1468,7 +1455,7 @@ static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup) {VKD3D_SM4_OP_GE, VKD3DSIH_GEO, "u", "ff"}, {VKD3D_SM4_OP_IADD, VKD3DSIH_IADD, "i", "ii"}, {VKD3D_SM4_OP_IF, VKD3DSIH_IF, "", "u", - shader_sm4_read_conditional_op}, + shader_sm4_read_conditional_op, true}, {VKD3D_SM4_OP_IEQ, VKD3DSIH_IEQ, "u", "ii"}, {VKD3D_SM4_OP_IGE, VKD3DSIH_IGE, "u", "ii"}, {VKD3D_SM4_OP_ILT, VKD3DSIH_ILT, "u", "ii"}, @@ -1502,7 +1489,7 @@ static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup) {VKD3D_SM4_OP_RESINFO, VKD3DSIH_RESINFO, "f", "i*"}, {VKD3D_SM4_OP_RET, VKD3DSIH_RET, "", ""}, {VKD3D_SM4_OP_RETC, VKD3DSIH_RETP, "", "u", - shader_sm4_read_conditional_op}, + shader_sm4_read_conditional_op, true}, {VKD3D_SM4_OP_ROUND_NE, VKD3DSIH_ROUND_NE, "f", "f"}, {VKD3D_SM4_OP_ROUND_NI, VKD3DSIH_ROUND_NI, "f", "f"}, {VKD3D_SM4_OP_ROUND_PI, VKD3DSIH_ROUND_PI, "f", "f"}, @@ -1967,16 +1954,6 @@ static const struct vkd3d_sm4_register_type_info *get_info_from_vkd3d_register_t return lookup->register_type_info_from_vkd3d[vkd3d_type]; } -static enum vkd3d_sm4_swizzle_type vkd3d_sm4_get_default_swizzle_type( - const struct vkd3d_sm4_lookup_tables *lookup, enum vkd3d_shader_register_type vkd3d_type) -{ - const struct vkd3d_sm4_register_type_info *register_type_info = - get_info_from_vkd3d_register_type(lookup, vkd3d_type); - - VKD3D_ASSERT(register_type_info); - return register_type_info->default_src_swizzle_type; -} - static enum vkd3d_sm4_stat_field get_stat_field_from_sm4_opcode( const struct vkd3d_sm4_lookup_tables *lookup, enum vkd3d_sm4_opcode sm4_opcode) { @@ -2816,7 +2793,7 @@ static bool shader_sm4_init(struct vkd3d_shader_sm4_parser *sm4, struct vsir_pro /* Estimate instruction count to avoid reallocation in most shaders. */ if (!vsir_program_init(program, compile_info, - &version, token_count / 7u + 20, VSIR_CF_STRUCTURED, VSIR_NOT_NORMALISED)) + &version, token_count / 7u + 20, VSIR_CF_STRUCTURED, VSIR_NORMALISED_SM4)) return false; vkd3d_shader_parser_init(&sm4->p, program, message_context, compile_info->source_name); sm4->ptr = sm4->start; @@ -2925,6 +2902,7 @@ int tpf_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t con program->input_signature = dxbc_desc.input_signature; program->output_signature = dxbc_desc.output_signature; program->patch_constant_signature = dxbc_desc.patch_constant_signature; + program->features = dxbc_desc.features; memset(&dxbc_desc, 0, sizeof(dxbc_desc)); /* DXBC stores used masks inverted for output signatures, for some reason. @@ -2993,8 +2971,6 @@ int tpf_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t con return VKD3D_OK; } -static void write_sm4_block(struct tpf_compiler *tpf, const struct hlsl_block *block); - bool sm4_register_from_semantic_name(const struct vkd3d_shader_version *version, const char *semantic_name, bool output, enum vkd3d_shader_register_type *type, bool *has_idx) { @@ -3217,18 +3193,17 @@ bool sm4_sysval_semantic_from_semantic_name(enum vkd3d_shader_sysval_semantic *s return true; } -static void add_section(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc, - uint32_t tag, struct vkd3d_bytecode_buffer *buffer) +static void add_section(struct tpf_compiler *tpf, uint32_t tag, struct vkd3d_bytecode_buffer *buffer) { /* Native D3DDisassemble() expects at least the sizes of the ISGN and OSGN * sections to be aligned. Without this, the sections themselves will be * aligned, but their reported sizes won't. */ size_t size = bytecode_align(buffer); - dxbc_writer_add_section(dxbc, tag, buffer->data, size); + dxbc_writer_add_section(&tpf->dxbc, tag, buffer->data, size); if (buffer->status < 0) - ctx->result = buffer->status; + tpf->result = buffer->status; } static int signature_element_pointer_compare(const void *x, const void *y) @@ -3289,7 +3264,7 @@ static void tpf_write_signature(struct tpf_compiler *tpf, const struct shader_si set_u32(&buffer, (2 + i * 6) * sizeof(uint32_t), string_offset); } - add_section(tpf->ctx, &tpf->dxbc, tag, &buffer); + add_section(tpf, tag, &buffer); vkd3d_free(sorted_elements); } @@ -3331,6 +3306,7 @@ static D3D_SHADER_VARIABLE_CLASS sm4_class(const struct hlsl_type *type) case HLSL_CLASS_HULL_SHADER: case HLSL_CLASS_GEOMETRY_SHADER: case HLSL_CLASS_BLEND_STATE: + case HLSL_CLASS_STREAM_OUTPUT: case HLSL_CLASS_NULL: break; } @@ -3507,28 +3483,6 @@ static D3D_SRV_DIMENSION sm4_rdef_resource_dimension(const struct hlsl_type *typ } } -struct extern_resource -{ - /* var is only not NULL if this resource is a whole variable, so it may be responsible for more - * than one component. */ - const struct hlsl_ir_var *var; - const struct hlsl_buffer *buffer; - - char *name; - bool is_user_packed; - - /* The data type of a single component of the resource. - * This might be different from the data type of the resource itself in 4.0 - * profiles, where an array (or multi-dimensional array) is handled as a - * single resource, unlike in 5.0. */ - struct hlsl_type *component_type; - - enum hlsl_regset regset; - unsigned int id, space, index, bind_count; - - struct vkd3d_shader_location loc; -}; - static int sm4_compare_extern_resources(const void *a, const void *b) { const struct extern_resource *aa = (const struct extern_resource *)a; @@ -3544,7 +3498,7 @@ static int sm4_compare_extern_resources(const void *a, const void *b) return vkd3d_u32_compare(aa->index, bb->index); } -static void sm4_free_extern_resources(struct extern_resource *extern_resources, unsigned int count) +void sm4_free_extern_resources(struct extern_resource *extern_resources, unsigned int count) { unsigned int i; @@ -3560,7 +3514,7 @@ static const char *string_skip_tag(const char *string) return string; } -static struct extern_resource *sm4_get_extern_resources(struct hlsl_ctx *ctx, unsigned int *count) +struct extern_resource *sm4_get_extern_resources(struct hlsl_ctx *ctx, unsigned int *count) { bool separate_components = ctx->profile->major_version == 5 && ctx->profile->minor_version == 0; struct extern_resource *extern_resources = NULL; @@ -3770,7 +3724,7 @@ static unsigned int get_component_index_from_default_initializer_index(struct hl vkd3d_unreachable(); } -static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) +void sm4_generate_rdef(struct hlsl_ctx *ctx, struct vkd3d_shader_code *rdef) { uint32_t binding_desc_size = (hlsl_version_ge(ctx, 5, 1) ? 10 : 8) * sizeof(uint32_t); size_t cbuffers_offset, resources_offset, creator_offset, string_offset; @@ -4000,36 +3954,41 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) creator_offset = put_string(&buffer, vkd3d_shader_get_version(NULL, NULL)); set_u32(&buffer, creator_position, creator_offset); - add_section(ctx, dxbc, TAG_RDEF, &buffer); - sm4_free_extern_resources(extern_resources, extern_resources_count); + + if (buffer.status) + { + vkd3d_free(buffer.data); + ctx->result = buffer.status; + return; + } + rdef->code = buffer.data; + rdef->size = buffer.size; } -static enum vkd3d_sm4_resource_type sm4_resource_dimension(const struct hlsl_type *type) +static enum vkd3d_sm4_resource_type sm4_resource_dimension(enum vkd3d_shader_resource_type resource_type) { - switch (type->sampler_dim) + switch (resource_type) { - case HLSL_SAMPLER_DIM_1D: + case VKD3D_SHADER_RESOURCE_TEXTURE_1D: return VKD3D_SM4_RESOURCE_TEXTURE_1D; - case HLSL_SAMPLER_DIM_2D: + case VKD3D_SHADER_RESOURCE_TEXTURE_2D: return VKD3D_SM4_RESOURCE_TEXTURE_2D; - case HLSL_SAMPLER_DIM_3D: + case VKD3D_SHADER_RESOURCE_TEXTURE_3D: return VKD3D_SM4_RESOURCE_TEXTURE_3D; - case HLSL_SAMPLER_DIM_CUBE: + case VKD3D_SHADER_RESOURCE_TEXTURE_CUBE: return VKD3D_SM4_RESOURCE_TEXTURE_CUBE; - case HLSL_SAMPLER_DIM_1DARRAY: + case VKD3D_SHADER_RESOURCE_TEXTURE_1DARRAY: return VKD3D_SM4_RESOURCE_TEXTURE_1DARRAY; - case HLSL_SAMPLER_DIM_2DARRAY: + case VKD3D_SHADER_RESOURCE_TEXTURE_2DARRAY: return VKD3D_SM4_RESOURCE_TEXTURE_2DARRAY; - case HLSL_SAMPLER_DIM_2DMS: + case VKD3D_SHADER_RESOURCE_TEXTURE_2DMS: return VKD3D_SM4_RESOURCE_TEXTURE_2DMS; - case HLSL_SAMPLER_DIM_2DMSARRAY: + case VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY: return VKD3D_SM4_RESOURCE_TEXTURE_2DMSARRAY; - case HLSL_SAMPLER_DIM_CUBEARRAY: + case VKD3D_SHADER_RESOURCE_TEXTURE_CUBEARRAY: return VKD3D_SM4_RESOURCE_TEXTURE_CUBEARRAY; - case HLSL_SAMPLER_DIM_BUFFER: - case HLSL_SAMPLER_DIM_RAW_BUFFER: - case HLSL_SAMPLER_DIM_STRUCTURED_BUFFER: + case VKD3D_SHADER_RESOURCE_BUFFER: return VKD3D_SM4_RESOURCE_BUFFER; default: vkd3d_unreachable(); @@ -4096,297 +4055,6 @@ struct sm4_instruction unsigned int idx_src_count; }; -static void sm4_register_from_node(struct vkd3d_shader_register *reg, uint32_t *writemask, - const struct hlsl_ir_node *instr) -{ - VKD3D_ASSERT(instr->reg.allocated); - reg->type = VKD3DSPR_TEMP; - reg->dimension = VSIR_DIMENSION_VEC4; - reg->idx[0].offset = instr->reg.id; - reg->idx_count = 1; - *writemask = instr->reg.writemask; -} - -static void sm4_numeric_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_register *reg, - enum vkd3d_shader_register_type type, uint32_t *writemask, const struct hlsl_deref *deref, - struct sm4_instruction *sm4_instr) -{ - const struct hlsl_ir_var *var = deref->var; - unsigned int offset_const_deref; - - reg->type = type; - reg->idx[0].offset = var->regs[HLSL_REGSET_NUMERIC].id; - reg->dimension = VSIR_DIMENSION_VEC4; - - VKD3D_ASSERT(var->regs[HLSL_REGSET_NUMERIC].allocated); - - if (!var->indexable) - { - offset_const_deref = hlsl_offset_from_deref_safe(ctx, deref); - reg->idx[0].offset += offset_const_deref / 4; - reg->idx_count = 1; - } - else - { - offset_const_deref = deref->const_offset; - reg->idx[1].offset = offset_const_deref / 4; - reg->idx_count = 2; - - if (deref->rel_offset.node) - { - struct vkd3d_shader_src_param *idx_src; - unsigned int idx_writemask; - - VKD3D_ASSERT(sm4_instr->idx_src_count < ARRAY_SIZE(sm4_instr->idx_srcs)); - idx_src = &sm4_instr->idx_srcs[sm4_instr->idx_src_count++]; - memset(idx_src, 0, sizeof(*idx_src)); - - reg->idx[1].rel_addr = idx_src; - sm4_register_from_node(&idx_src->reg, &idx_writemask, deref->rel_offset.node); - VKD3D_ASSERT(idx_writemask != 0); - idx_src->swizzle = swizzle_from_sm4(hlsl_swizzle_from_writemask(idx_writemask)); - } - } - - *writemask = 0xf & (0xf << (offset_const_deref % 4)); - if (var->regs[HLSL_REGSET_NUMERIC].writemask) - *writemask = hlsl_combine_writemasks(var->regs[HLSL_REGSET_NUMERIC].writemask, *writemask); -} - -static void sm4_register_from_deref(const struct tpf_compiler *tpf, struct vkd3d_shader_register *reg, - uint32_t *writemask, const struct hlsl_deref *deref, struct sm4_instruction *sm4_instr) -{ - const struct vkd3d_shader_version *version = &tpf->program->shader_version; - const struct hlsl_type *data_type = hlsl_deref_get_type(tpf->ctx, deref); - const struct hlsl_ir_var *var = deref->var; - struct hlsl_ctx *ctx = tpf->ctx; - - if (var->is_uniform) - { - enum hlsl_regset regset = hlsl_deref_get_regset(ctx, deref); - - if (regset == HLSL_REGSET_TEXTURES) - { - reg->type = VKD3DSPR_RESOURCE; - reg->dimension = VSIR_DIMENSION_VEC4; - if (vkd3d_shader_ver_ge(version, 5, 1)) - { - reg->idx[0].offset = var->regs[HLSL_REGSET_TEXTURES].id; - reg->idx[1].offset = var->regs[HLSL_REGSET_TEXTURES].index; /* FIXME: array index */ - reg->idx_count = 2; - } - else - { - reg->idx[0].offset = var->regs[HLSL_REGSET_TEXTURES].index; - reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref); - reg->idx_count = 1; - } - VKD3D_ASSERT(regset == HLSL_REGSET_TEXTURES); - *writemask = VKD3DSP_WRITEMASK_ALL; - } - else if (regset == HLSL_REGSET_UAVS) - { - reg->type = VKD3DSPR_UAV; - reg->dimension = VSIR_DIMENSION_VEC4; - if (vkd3d_shader_ver_ge(version, 5, 1)) - { - reg->idx[0].offset = var->regs[HLSL_REGSET_UAVS].id; - reg->idx[1].offset = var->regs[HLSL_REGSET_UAVS].index; /* FIXME: array index */ - reg->idx_count = 2; - } - else - { - reg->idx[0].offset = var->regs[HLSL_REGSET_UAVS].index; - reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref); - reg->idx_count = 1; - } - VKD3D_ASSERT(regset == HLSL_REGSET_UAVS); - *writemask = VKD3DSP_WRITEMASK_ALL; - } - else if (regset == HLSL_REGSET_SAMPLERS) - { - reg->type = VKD3DSPR_SAMPLER; - reg->dimension = VSIR_DIMENSION_NONE; - if (vkd3d_shader_ver_ge(version, 5, 1)) - { - reg->idx[0].offset = var->regs[HLSL_REGSET_SAMPLERS].id; - reg->idx[1].offset = var->regs[HLSL_REGSET_SAMPLERS].index; /* FIXME: array index */ - reg->idx_count = 2; - } - else - { - reg->idx[0].offset = var->regs[HLSL_REGSET_SAMPLERS].index; - reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref); - reg->idx_count = 1; - } - VKD3D_ASSERT(regset == HLSL_REGSET_SAMPLERS); - *writemask = VKD3DSP_WRITEMASK_ALL; - } - else - { - unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref) + var->buffer_offset; - - VKD3D_ASSERT(data_type->class <= HLSL_CLASS_VECTOR); - reg->type = VKD3DSPR_CONSTBUFFER; - reg->dimension = VSIR_DIMENSION_VEC4; - if (vkd3d_shader_ver_ge(version, 5, 1)) - { - reg->idx[0].offset = var->buffer->reg.id; - reg->idx[1].offset = var->buffer->reg.index; /* FIXME: array index */ - reg->idx[2].offset = offset / 4; - reg->idx_count = 3; - } - else - { - reg->idx[0].offset = var->buffer->reg.index; - reg->idx[1].offset = offset / 4; - reg->idx_count = 2; - } - *writemask = ((1u << data_type->dimx) - 1) << (offset & 3); - } - } - else if (var->is_input_semantic) - { - bool has_idx; - - if (sm4_register_from_semantic_name(version, var->semantic.name, false, ®->type, &has_idx)) - { - unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref); - - if (has_idx) - { - reg->idx[0].offset = var->semantic.index + offset / 4; - reg->idx_count = 1; - } - - if (shader_sm4_is_scalar_register(reg)) - reg->dimension = VSIR_DIMENSION_SCALAR; - else - reg->dimension = VSIR_DIMENSION_VEC4; - *writemask = ((1u << data_type->dimx) - 1) << (offset % 4); - } - else - { - struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref); - - VKD3D_ASSERT(hlsl_reg.allocated); - - if (version->type == VKD3D_SHADER_TYPE_DOMAIN) - reg->type = VKD3DSPR_PATCHCONST; - else - reg->type = VKD3DSPR_INPUT; - reg->dimension = VSIR_DIMENSION_VEC4; - reg->idx[0].offset = hlsl_reg.id; - reg->idx_count = 1; - *writemask = hlsl_reg.writemask; - } - } - else if (var->is_output_semantic) - { - bool has_idx; - - if (sm4_register_from_semantic_name(version, var->semantic.name, true, ®->type, &has_idx)) - { - unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref); - - if (has_idx) - { - reg->idx[0].offset = var->semantic.index + offset / 4; - reg->idx_count = 1; - } - - if (shader_sm4_is_scalar_register(reg)) - reg->dimension = VSIR_DIMENSION_SCALAR; - else - reg->dimension = VSIR_DIMENSION_VEC4; - *writemask = ((1u << data_type->dimx) - 1) << (offset % 4); - } - else - { - struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref); - - VKD3D_ASSERT(hlsl_reg.allocated); - reg->type = VKD3DSPR_OUTPUT; - reg->dimension = VSIR_DIMENSION_VEC4; - reg->idx[0].offset = hlsl_reg.id; - reg->idx_count = 1; - *writemask = hlsl_reg.writemask; - } - } - else - { - enum vkd3d_shader_register_type type = deref->var->indexable ? VKD3DSPR_IDXTEMP : VKD3DSPR_TEMP; - - sm4_numeric_register_from_deref(ctx, reg, type, writemask, deref, sm4_instr); - } -} - -static void sm4_src_from_deref(const struct tpf_compiler *tpf, struct vkd3d_shader_src_param *src, - const struct hlsl_deref *deref, unsigned int map_writemask, struct sm4_instruction *sm4_instr) -{ - unsigned int hlsl_swizzle; - uint32_t writemask; - - sm4_register_from_deref(tpf, &src->reg, &writemask, deref, sm4_instr); - if (vkd3d_sm4_get_default_swizzle_type(&tpf->lookup, src->reg.type) == VKD3D_SM4_SWIZZLE_VEC4) - { - hlsl_swizzle = hlsl_map_swizzle(hlsl_swizzle_from_writemask(writemask), map_writemask); - src->swizzle = swizzle_from_sm4(hlsl_swizzle); - } -} - -static void sm4_dst_from_node(struct vkd3d_shader_dst_param *dst, const struct hlsl_ir_node *instr) -{ - sm4_register_from_node(&dst->reg, &dst->write_mask, instr); -} - -static void sm4_src_from_constant_value(struct vkd3d_shader_src_param *src, - const struct hlsl_constant_value *value, unsigned int width, unsigned int map_writemask) -{ - src->swizzle = 0; - src->reg.type = VKD3DSPR_IMMCONST; - if (width == 1) - { - src->reg.dimension = VSIR_DIMENSION_SCALAR; - src->reg.u.immconst_u32[0] = value->u[0].u; - } - else - { - unsigned int i, j = 0; - - src->reg.dimension = VSIR_DIMENSION_VEC4; - for (i = 0; i < 4; ++i) - { - if ((map_writemask & (1u << i)) && (j < width)) - src->reg.u.immconst_u32[i] = value->u[j++].u; - else - src->reg.u.immconst_u32[i] = 0; - } - } -} - -static void sm4_src_from_node(const struct tpf_compiler *tpf, struct vkd3d_shader_src_param *src, - const struct hlsl_ir_node *instr, uint32_t map_writemask) -{ - unsigned int hlsl_swizzle; - uint32_t writemask; - - if (instr->type == HLSL_IR_CONSTANT) - { - struct hlsl_ir_constant *constant = hlsl_ir_constant(instr); - - sm4_src_from_constant_value(src, &constant->value, instr->data_type->dimx, map_writemask); - return; - } - - sm4_register_from_node(&src->reg, &writemask, instr); - if (vkd3d_sm4_get_default_swizzle_type(&tpf->lookup, src->reg.type) == VKD3D_SM4_SWIZZLE_VEC4) - { - hlsl_swizzle = hlsl_map_swizzle(hlsl_swizzle_from_writemask(writemask), map_writemask); - src->swizzle = swizzle_from_sm4(hlsl_swizzle); - } -} - static unsigned int sm4_get_index_addressing_from_reg(const struct vkd3d_shader_register *reg, unsigned int i) { @@ -4650,204 +4318,41 @@ static void write_sm4_instruction(const struct tpf_compiler *tpf, const struct s sm4_update_stat_counters(tpf, instr); } -static bool encode_texel_offset_as_aoffimmi(struct sm4_instruction *instr, - const struct hlsl_ir_node *texel_offset) +static void tpf_dcl_constant_buffer(const struct tpf_compiler *tpf, const struct vkd3d_shader_instruction *ins) { - struct sm4_instruction_modifier modif; - struct hlsl_ir_constant *offset; - - if (!texel_offset || texel_offset->type != HLSL_IR_CONSTANT) - return false; - offset = hlsl_ir_constant(texel_offset); - - modif.type = VKD3D_SM4_MODIFIER_AOFFIMMI; - modif.u.aoffimmi.u = offset->value.u[0].i; - modif.u.aoffimmi.v = 0; - modif.u.aoffimmi.w = 0; - if (offset->node.data_type->dimx > 1) - modif.u.aoffimmi.v = offset->value.u[1].i; - if (offset->node.data_type->dimx > 2) - modif.u.aoffimmi.w = offset->value.u[2].i; - if (modif.u.aoffimmi.u < -8 || modif.u.aoffimmi.u > 7 - || modif.u.aoffimmi.v < -8 || modif.u.aoffimmi.v > 7 - || modif.u.aoffimmi.w < -8 || modif.u.aoffimmi.w > 7) - return false; - - instr->modifiers[instr->modifier_count++] = modif; - return true; -} - -static void write_sm4_dcl_constant_buffer(const struct tpf_compiler *tpf, const struct hlsl_buffer *cbuffer) -{ - size_t size = (cbuffer->used_size + 3) / 4; + const struct vkd3d_shader_constant_buffer *cb = &ins->declaration.cb; + size_t size = (cb->size + 3) / 4; struct sm4_instruction instr = { .opcode = VKD3D_SM4_OP_DCL_CONSTANT_BUFFER, - .srcs[0].reg.dimension = VSIR_DIMENSION_VEC4, - .srcs[0].reg.type = VKD3DSPR_CONSTBUFFER, - .srcs[0].swizzle = VKD3D_SHADER_NO_SWIZZLE, + .srcs[0] = cb->src, .src_count = 1, }; - if (hlsl_version_ge(tpf->ctx, 5, 1)) + if (vkd3d_shader_ver_ge(&tpf->program->shader_version, 5, 1)) { - instr.srcs[0].reg.idx[0].offset = cbuffer->reg.id; - instr.srcs[0].reg.idx[1].offset = cbuffer->reg.index; - instr.srcs[0].reg.idx[2].offset = cbuffer->reg.index; /* FIXME: array end */ + instr.srcs[0].reg.idx[0].offset = cb->src.reg.idx[0].offset; + instr.srcs[0].reg.idx[1].offset = cb->range.first; + instr.srcs[0].reg.idx[2].offset = cb->range.last; instr.srcs[0].reg.idx_count = 3; instr.idx[0] = size; - instr.idx[1] = cbuffer->reg.space; + instr.idx[1] = cb->range.space; instr.idx_count = 2; } else { - instr.srcs[0].reg.idx[0].offset = cbuffer->reg.index; + instr.srcs[0].reg.idx[0].offset = cb->range.first; instr.srcs[0].reg.idx[1].offset = size; instr.srcs[0].reg.idx_count = 2; } - write_sm4_instruction(tpf, &instr); -} - -static void write_sm4_dcl_samplers(const struct tpf_compiler *tpf, const struct extern_resource *resource) -{ - unsigned int i; - struct sm4_instruction instr = - { - .opcode = VKD3D_SM4_OP_DCL_SAMPLER, - - .dsts[0].reg.type = VKD3DSPR_SAMPLER, - .dst_count = 1, - }; - - VKD3D_ASSERT(resource->regset == HLSL_REGSET_SAMPLERS); - - if (resource->component_type->sampler_dim == HLSL_SAMPLER_DIM_COMPARISON) - instr.extra_bits |= VKD3D_SM4_SAMPLER_COMPARISON << VKD3D_SM4_SAMPLER_MODE_SHIFT; - - for (i = 0; i < resource->bind_count; ++i) - { - if (resource->var && !resource->var->objects_usage[HLSL_REGSET_SAMPLERS][i].used) - continue; - - if (hlsl_version_ge(tpf->ctx, 5, 1)) - { - VKD3D_ASSERT(!i); - instr.dsts[0].reg.idx[0].offset = resource->id; - instr.dsts[0].reg.idx[1].offset = resource->index; - instr.dsts[0].reg.idx[2].offset = resource->index; /* FIXME: array end */ - instr.dsts[0].reg.idx_count = 3; - - instr.idx[0] = resource->space; - instr.idx_count = 1; - } - else - { - instr.dsts[0].reg.idx[0].offset = resource->index + i; - instr.dsts[0].reg.idx_count = 1; - } - write_sm4_instruction(tpf, &instr); - } -} - -static void write_sm4_dcl_textures(const struct tpf_compiler *tpf, const struct extern_resource *resource, - bool uav) -{ - const struct vkd3d_shader_version *version = &tpf->program->shader_version; - enum hlsl_regset regset = uav ? HLSL_REGSET_UAVS : HLSL_REGSET_TEXTURES; - struct hlsl_type *component_type; - struct sm4_instruction instr; - bool multisampled; - unsigned int i; - - VKD3D_ASSERT(resource->regset == regset); - - component_type = resource->component_type; - - for (i = 0; i < resource->bind_count; ++i) - { - if (resource->var && !resource->var->objects_usage[regset][i].used) - continue; - - instr = (struct sm4_instruction) - { - .dsts[0].reg.type = uav ? VKD3DSPR_UAV : VKD3DSPR_RESOURCE, - .dsts[0].reg.idx[0].offset = resource->id + i, - .dsts[0].reg.idx_count = 1, - .dst_count = 1, - - .idx[0] = sm4_data_type(component_type) * 0x1111, - .idx_count = 1, - }; - - multisampled = component_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS - || component_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY; - - if (!vkd3d_shader_ver_ge(version, 4, 1) && multisampled && !component_type->sample_count) - { - hlsl_error(tpf->ctx, &resource->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, - "Multisampled texture object declaration needs sample count for profile %u.%u.", - version->major, version->minor); - } - - if (vkd3d_shader_ver_ge(version, 5, 1)) - { - VKD3D_ASSERT(!i); - instr.dsts[0].reg.idx[0].offset = resource->id; - instr.dsts[0].reg.idx[1].offset = resource->index; - instr.dsts[0].reg.idx[2].offset = resource->index; /* FIXME: array end */ - instr.dsts[0].reg.idx_count = 3; - - instr.idx[1] = resource->space; - instr.idx_count = 2; - } - else - { - instr.dsts[0].reg.idx[0].offset = resource->index + i; - instr.dsts[0].reg.idx_count = 1; - } - - if (uav) - { - switch (component_type->sampler_dim) - { - case HLSL_SAMPLER_DIM_STRUCTURED_BUFFER: - instr.opcode = VKD3D_SM5_OP_DCL_UAV_STRUCTURED; - instr.byte_stride = component_type->e.resource.format->reg_size[HLSL_REGSET_NUMERIC] * 4; - break; - case HLSL_SAMPLER_DIM_RAW_BUFFER: - instr.opcode = VKD3D_SM5_OP_DCL_UAV_RAW; - break; - default: - instr.opcode = VKD3D_SM5_OP_DCL_UAV_TYPED; - break; - } - - if (component_type->e.resource.rasteriser_ordered) - instr.opcode |= VKD3DSUF_RASTERISER_ORDERED_VIEW << VKD3D_SM5_UAV_FLAGS_SHIFT; - } - else - { - switch (component_type->sampler_dim) - { - case HLSL_SAMPLER_DIM_RAW_BUFFER: - instr.opcode = VKD3D_SM5_OP_DCL_RESOURCE_RAW; - break; - default: - instr.opcode = VKD3D_SM4_OP_DCL_RESOURCE; - break; - } - } - instr.extra_bits |= (sm4_resource_dimension(component_type) << VKD3D_SM4_RESOURCE_TYPE_SHIFT); - - if (multisampled) - instr.extra_bits |= component_type->sample_count << VKD3D_SM4_RESOURCE_SAMPLE_COUNT_SHIFT; + if (ins->flags & VKD3DSI_INDEXED_DYNAMIC) + instr.extra_bits |= VKD3D_SM4_INDEX_TYPE_MASK; - write_sm4_instruction(tpf, &instr); - } + write_sm4_instruction(tpf, &instr); } static void tpf_dcl_temps(const struct tpf_compiler *tpf, unsigned int count) @@ -4924,42 +4429,116 @@ static void tpf_dcl_thread_group(const struct tpf_compiler *tpf, const struct vs write_sm4_instruction(tpf, &instr); } -static void write_sm4_dcl_global_flags(const struct tpf_compiler *tpf, uint32_t flags) +static void tpf_dcl_sampler(const struct tpf_compiler *tpf, const struct vkd3d_shader_instruction *ins) { + const struct vkd3d_shader_sampler *sampler = &ins->declaration.sampler; struct sm4_instruction instr = { - .opcode = VKD3D_SM4_OP_DCL_GLOBAL_FLAGS, - .extra_bits = flags << VKD3D_SM4_GLOBAL_FLAGS_SHIFT, + .opcode = VKD3D_SM4_OP_DCL_SAMPLER, + .extra_bits = ins->flags << VKD3D_SM4_SAMPLER_MODE_SHIFT, + + .dsts[0].reg.type = VKD3DSPR_SAMPLER, + .dst_count = 1, }; - write_sm4_instruction(tpf, &instr); -} + if (vkd3d_shader_ver_ge(&tpf->program->shader_version, 5, 1)) + { + instr.dsts[0].reg.idx[0].offset = sampler->src.reg.idx[0].offset; + instr.dsts[0].reg.idx[1].offset = sampler->range.first; + instr.dsts[0].reg.idx[2].offset = sampler->range.last; + instr.dsts[0].reg.idx_count = 3; -static void tpf_write_hs_decls(const struct tpf_compiler *tpf) -{ - struct sm4_instruction instr = + instr.idx[0] = ins->declaration.sampler.range.space; + instr.idx_count = 1; + } + else { - .opcode = VKD3D_SM5_OP_HS_DECLS, - }; + instr.dsts[0].reg.idx[0].offset = sampler->range.first; + instr.dsts[0].reg.idx_count = 1; + } write_sm4_instruction(tpf, &instr); } -static void tpf_write_hs_control_point_phase(const struct tpf_compiler *tpf) +static void tpf_dcl_texture(const struct tpf_compiler *tpf, const struct vkd3d_shader_instruction *ins) { - struct sm4_instruction instr = - { - .opcode = VKD3D_SM5_OP_HS_CONTROL_POINT_PHASE, + const struct vkd3d_shader_structured_resource *structured_resource = &ins->declaration.structured_resource; + const struct vkd3d_shader_semantic *semantic = &ins->declaration.semantic; + const struct vkd3d_shader_version *version = &tpf->program->shader_version; + const struct vkd3d_sm4_opcode_info *info; + struct sm4_instruction instr = {0}; + unsigned int i, k; + bool uav; + + info = get_info_from_vsir_opcode(&tpf->lookup, ins->opcode); + VKD3D_ASSERT(info); + + uav = ins->opcode == VKD3DSIH_DCL_UAV_TYPED + || ins->opcode == VKD3DSIH_DCL_UAV_RAW + || ins->opcode == VKD3DSIH_DCL_UAV_STRUCTURED; + + instr.opcode = info->opcode; + + instr.dsts[0] = semantic->resource.reg; + instr.dst_count = 1; + + for (k = 0; k < 4; ++k) + { + for (i = ARRAY_SIZE(data_type_table) - 1; i < ARRAY_SIZE(data_type_table); --i) + { + if (semantic->resource_data_type[k] == data_type_table[i]) + { + instr.idx[0] |= i << (4 * k); + break; + } + } + } + instr.idx_count = 1; + + if (vkd3d_shader_ver_ge(version, 5, 1)) + { + instr.dsts[0].reg.idx[0].offset = semantic->resource.reg.reg.idx[0].offset; + instr.dsts[0].reg.idx[1].offset = semantic->resource.range.first; + instr.dsts[0].reg.idx[2].offset = semantic->resource.range.last; + instr.dsts[0].reg.idx_count = 3; + + instr.idx[1] = semantic->resource.range.space; + instr.idx_count = 2; + } + else + { + instr.dsts[0].reg.idx[0].offset = semantic->resource.range.first; + instr.dsts[0].reg.idx_count = 1; + } + + if (uav) + instr.extra_bits |= ins->flags << VKD3D_SM5_UAV_FLAGS_SHIFT; + + instr.extra_bits |= (sm4_resource_dimension(ins->resource_type) << VKD3D_SM4_RESOURCE_TYPE_SHIFT); + instr.extra_bits |= semantic->sample_count << VKD3D_SM4_RESOURCE_SAMPLE_COUNT_SHIFT; + + if (ins->structured) + instr.byte_stride = structured_resource->byte_stride; + + write_sm4_instruction(tpf, &instr); +} + +static void write_sm4_dcl_global_flags(const struct tpf_compiler *tpf, uint32_t flags) +{ + struct sm4_instruction instr = + { + .opcode = VKD3D_SM4_OP_DCL_GLOBAL_FLAGS, + .extra_bits = flags << VKD3D_SM4_GLOBAL_FLAGS_SHIFT, }; write_sm4_instruction(tpf, &instr); } -static void tpf_write_hs_fork_phase(const struct tpf_compiler *tpf) +static void tpf_write_hs_decls(const struct tpf_compiler *tpf) { struct sm4_instruction instr = { - .opcode = VKD3D_SM5_OP_HS_FORK_PHASE, + .opcode = VKD3D_SM5_OP_HS_DECLS, }; write_sm4_instruction(tpf, &instr); @@ -5022,594 +4601,9 @@ static void tpf_write_dcl_tessellator_output_primitive(const struct tpf_compiler write_sm4_instruction(tpf, &instr); } -static void write_sm4_ret(const struct tpf_compiler *tpf) -{ - struct sm4_instruction instr = - { - .opcode = VKD3D_SM4_OP_RET, - }; - - write_sm4_instruction(tpf, &instr); -} - -static void write_sm4_ld(const struct tpf_compiler *tpf, const struct hlsl_ir_node *dst, - const struct hlsl_deref *resource, const struct hlsl_ir_node *coords, - const struct hlsl_ir_node *sample_index, const struct hlsl_ir_node *texel_offset, - enum hlsl_sampler_dim dim) -{ - const struct hlsl_type *resource_type = hlsl_deref_get_type(tpf->ctx, resource); - bool multisampled = resource_type->class == HLSL_CLASS_TEXTURE - && (resource_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS || resource_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY); - bool uav = (hlsl_deref_get_regset(tpf->ctx, resource) == HLSL_REGSET_UAVS); - const struct vkd3d_shader_version *version = &tpf->program->shader_version; - bool raw = resource_type->sampler_dim == HLSL_SAMPLER_DIM_RAW_BUFFER; - unsigned int coords_writemask = VKD3DSP_WRITEMASK_ALL; - struct sm4_instruction instr; - - memset(&instr, 0, sizeof(instr)); - if (uav) - instr.opcode = VKD3D_SM5_OP_LD_UAV_TYPED; - else if (raw) - instr.opcode = VKD3D_SM5_OP_LD_RAW; - else - instr.opcode = multisampled ? VKD3D_SM4_OP_LD2DMS : VKD3D_SM4_OP_LD; - - if (texel_offset) - { - if (!encode_texel_offset_as_aoffimmi(&instr, texel_offset)) - { - hlsl_error(tpf->ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, - "Offset must resolve to integer literal in the range -8 to 7."); - return; - } - } - - sm4_dst_from_node(&instr.dsts[0], dst); - instr.dst_count = 1; - - if (!uav) - { - /* Mipmap level is in the last component in the IR, but needs to be in the W - * component in the instruction. */ - unsigned int dim_count = hlsl_sampler_dim_count(dim); - - if (dim_count == 1) - coords_writemask = VKD3DSP_WRITEMASK_0 | VKD3DSP_WRITEMASK_3; - if (dim_count == 2) - coords_writemask = VKD3DSP_WRITEMASK_0 | VKD3DSP_WRITEMASK_1 | VKD3DSP_WRITEMASK_3; - } - - sm4_src_from_node(tpf, &instr.srcs[0], coords, coords_writemask); - - sm4_src_from_deref(tpf, &instr.srcs[1], resource, instr.dsts[0].write_mask, &instr); - - instr.src_count = 2; - - if (multisampled) - { - if (sample_index->type == HLSL_IR_CONSTANT) - { - struct vkd3d_shader_register *reg = &instr.srcs[2].reg; - struct hlsl_ir_constant *index; - - index = hlsl_ir_constant(sample_index); - - memset(&instr.srcs[2], 0, sizeof(instr.srcs[2])); - reg->type = VKD3DSPR_IMMCONST; - reg->dimension = VSIR_DIMENSION_SCALAR; - reg->u.immconst_u32[0] = index->value.u[0].u; - } - else if (version->major == 4 && version->minor == 0) - { - hlsl_error(tpf->ctx, &sample_index->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Expected literal sample index."); - } - else - { - sm4_src_from_node(tpf, &instr.srcs[2], sample_index, 0); - } - - ++instr.src_count; - } - - write_sm4_instruction(tpf, &instr); -} - -static void write_sm4_sample(const struct tpf_compiler *tpf, const struct hlsl_ir_resource_load *load) -{ - const struct hlsl_ir_node *texel_offset = load->texel_offset.node; - const struct hlsl_ir_node *coords = load->coords.node; - const struct hlsl_deref *resource = &load->resource; - const struct hlsl_deref *sampler = &load->sampler; - const struct hlsl_ir_node *dst = &load->node; - struct sm4_instruction instr; - - memset(&instr, 0, sizeof(instr)); - switch (load->load_type) - { - case HLSL_RESOURCE_SAMPLE: - instr.opcode = VKD3D_SM4_OP_SAMPLE; - break; - - case HLSL_RESOURCE_SAMPLE_CMP: - instr.opcode = VKD3D_SM4_OP_SAMPLE_C; - break; - - case HLSL_RESOURCE_SAMPLE_CMP_LZ: - instr.opcode = VKD3D_SM4_OP_SAMPLE_C_LZ; - break; - - case HLSL_RESOURCE_SAMPLE_LOD: - instr.opcode = VKD3D_SM4_OP_SAMPLE_LOD; - break; - - case HLSL_RESOURCE_SAMPLE_LOD_BIAS: - instr.opcode = VKD3D_SM4_OP_SAMPLE_B; - break; - - case HLSL_RESOURCE_SAMPLE_GRAD: - instr.opcode = VKD3D_SM4_OP_SAMPLE_GRAD; - break; - - default: - vkd3d_unreachable(); - } - - if (texel_offset) - { - if (!encode_texel_offset_as_aoffimmi(&instr, texel_offset)) - { - hlsl_error(tpf->ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, - "Offset must resolve to integer literal in the range -8 to 7."); - return; - } - } - - sm4_dst_from_node(&instr.dsts[0], dst); - instr.dst_count = 1; - - sm4_src_from_node(tpf, &instr.srcs[0], coords, VKD3DSP_WRITEMASK_ALL); - sm4_src_from_deref(tpf, &instr.srcs[1], resource, instr.dsts[0].write_mask, &instr); - sm4_src_from_deref(tpf, &instr.srcs[2], sampler, VKD3DSP_WRITEMASK_ALL, &instr); - instr.src_count = 3; - - if (load->load_type == HLSL_RESOURCE_SAMPLE_LOD - || load->load_type == HLSL_RESOURCE_SAMPLE_LOD_BIAS) - { - sm4_src_from_node(tpf, &instr.srcs[3], load->lod.node, VKD3DSP_WRITEMASK_ALL); - ++instr.src_count; - } - else if (load->load_type == HLSL_RESOURCE_SAMPLE_GRAD) - { - sm4_src_from_node(tpf, &instr.srcs[3], load->ddx.node, VKD3DSP_WRITEMASK_ALL); - sm4_src_from_node(tpf, &instr.srcs[4], load->ddy.node, VKD3DSP_WRITEMASK_ALL); - instr.src_count += 2; - } - else if (load->load_type == HLSL_RESOURCE_SAMPLE_CMP - || load->load_type == HLSL_RESOURCE_SAMPLE_CMP_LZ) - { - sm4_src_from_node(tpf, &instr.srcs[3], load->cmp.node, VKD3DSP_WRITEMASK_ALL); - ++instr.src_count; - } - - write_sm4_instruction(tpf, &instr); -} - -static void write_sm4_sampleinfo(const struct tpf_compiler *tpf, const struct hlsl_ir_resource_load *load) -{ - const struct hlsl_deref *resource = &load->resource; - const struct hlsl_ir_node *dst = &load->node; - struct sm4_instruction instr; - - VKD3D_ASSERT(dst->data_type->e.numeric.type == HLSL_TYPE_UINT || dst->data_type->e.numeric.type == HLSL_TYPE_FLOAT); - - memset(&instr, 0, sizeof(instr)); - instr.opcode = VKD3D_SM4_OP_SAMPLE_INFO; - if (dst->data_type->e.numeric.type == HLSL_TYPE_UINT) - instr.extra_bits |= VKD3DSI_SAMPLE_INFO_UINT << VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT; - - sm4_dst_from_node(&instr.dsts[0], dst); - instr.dst_count = 1; - - sm4_src_from_deref(tpf, &instr.srcs[0], resource, instr.dsts[0].write_mask, &instr); - instr.src_count = 1; - - write_sm4_instruction(tpf, &instr); -} - -static void write_sm4_resinfo(const struct tpf_compiler *tpf, const struct hlsl_ir_resource_load *load) -{ - const struct hlsl_deref *resource = &load->resource; - const struct hlsl_ir_node *dst = &load->node; - struct sm4_instruction instr; - - if (resource->data_type->sampler_dim == HLSL_SAMPLER_DIM_BUFFER - || resource->data_type->sampler_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER) - { - hlsl_fixme(tpf->ctx, &load->node.loc, "resinfo for buffers."); - return; - } - - VKD3D_ASSERT(dst->data_type->e.numeric.type == HLSL_TYPE_UINT || dst->data_type->e.numeric.type == HLSL_TYPE_FLOAT); - - memset(&instr, 0, sizeof(instr)); - instr.opcode = VKD3D_SM4_OP_RESINFO; - if (dst->data_type->e.numeric.type == HLSL_TYPE_UINT) - instr.extra_bits |= VKD3DSI_RESINFO_UINT << VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT; - - sm4_dst_from_node(&instr.dsts[0], dst); - instr.dst_count = 1; - - sm4_src_from_node(tpf, &instr.srcs[0], load->lod.node, VKD3DSP_WRITEMASK_ALL); - sm4_src_from_deref(tpf, &instr.srcs[1], resource, instr.dsts[0].write_mask, &instr); - instr.src_count = 2; - - write_sm4_instruction(tpf, &instr); -} - -static void write_sm4_if(struct tpf_compiler *tpf, const struct hlsl_ir_if *iff) -{ - struct sm4_instruction instr = - { - .opcode = VKD3D_SM4_OP_IF, - .extra_bits = VKD3D_SM4_CONDITIONAL_NZ, - .src_count = 1, - }; - - VKD3D_ASSERT(iff->condition.node->data_type->dimx == 1); - - sm4_src_from_node(tpf, &instr.srcs[0], iff->condition.node, VKD3DSP_WRITEMASK_ALL); - write_sm4_instruction(tpf, &instr); - - write_sm4_block(tpf, &iff->then_block); - - if (!list_empty(&iff->else_block.instrs)) - { - instr.opcode = VKD3D_SM4_OP_ELSE; - instr.src_count = 0; - write_sm4_instruction(tpf, &instr); - - write_sm4_block(tpf, &iff->else_block); - } - - instr.opcode = VKD3D_SM4_OP_ENDIF; - instr.src_count = 0; - write_sm4_instruction(tpf, &instr); -} - -static void write_sm4_jump(const struct tpf_compiler *tpf, const struct hlsl_ir_jump *jump) -{ - struct sm4_instruction instr = {0}; - - switch (jump->type) - { - case HLSL_IR_JUMP_BREAK: - instr.opcode = VKD3D_SM4_OP_BREAK; - break; - - case HLSL_IR_JUMP_CONTINUE: - instr.opcode = VKD3D_SM4_OP_CONTINUE; - break; - - case HLSL_IR_JUMP_DISCARD_NZ: - { - instr.opcode = VKD3D_SM4_OP_DISCARD; - instr.extra_bits = VKD3D_SM4_CONDITIONAL_NZ; - - memset(&instr.srcs[0], 0, sizeof(*instr.srcs)); - instr.src_count = 1; - sm4_src_from_node(tpf, &instr.srcs[0], jump->condition.node, VKD3DSP_WRITEMASK_ALL); - break; - } - - case HLSL_IR_JUMP_RETURN: - vkd3d_unreachable(); - - default: - hlsl_fixme(tpf->ctx, &jump->node.loc, "Jump type %s.", hlsl_jump_type_to_string(jump->type)); - return; - } - - write_sm4_instruction(tpf, &instr); -} - -/* Does this variable's data come directly from the API user, rather than being - * temporary or from a previous shader stage? - * I.e. is it a uniform or VS input? */ -static bool var_is_user_input(const struct vkd3d_shader_version *version, const struct hlsl_ir_var *var) -{ - if (var->is_uniform) - return true; - - return var->is_input_semantic && version->type == VKD3D_SHADER_TYPE_VERTEX; -} - -static void write_sm4_load(const struct tpf_compiler *tpf, const struct hlsl_ir_load *load) -{ - const struct vkd3d_shader_version *version = &tpf->program->shader_version; - const struct hlsl_type *type = load->node.data_type; - struct sm4_instruction instr; - - memset(&instr, 0, sizeof(instr)); - - sm4_dst_from_node(&instr.dsts[0], &load->node); - instr.dst_count = 1; - - VKD3D_ASSERT(hlsl_is_numeric_type(type)); - if (type->e.numeric.type == HLSL_TYPE_BOOL && var_is_user_input(version, load->src.var)) - { - struct hlsl_constant_value value; - - /* Uniform bools can be specified as anything, but internal bools always - * have 0 for false and ~0 for true. Normalize that here. */ - - instr.opcode = VKD3D_SM4_OP_MOVC; - - sm4_src_from_deref(tpf, &instr.srcs[0], &load->src, instr.dsts[0].write_mask, &instr); - - memset(&value, 0xff, sizeof(value)); - sm4_src_from_constant_value(&instr.srcs[1], &value, type->dimx, instr.dsts[0].write_mask); - memset(&value, 0, sizeof(value)); - sm4_src_from_constant_value(&instr.srcs[2], &value, type->dimx, instr.dsts[0].write_mask); - instr.src_count = 3; - } - else - { - instr.opcode = VKD3D_SM4_OP_MOV; - - sm4_src_from_deref(tpf, &instr.srcs[0], &load->src, instr.dsts[0].write_mask, &instr); - instr.src_count = 1; - } - - write_sm4_instruction(tpf, &instr); -} - -static void write_sm4_loop(struct tpf_compiler *tpf, const struct hlsl_ir_loop *loop) -{ - struct sm4_instruction instr = - { - .opcode = VKD3D_SM4_OP_LOOP, - }; - - write_sm4_instruction(tpf, &instr); - - write_sm4_block(tpf, &loop->body); - - instr.opcode = VKD3D_SM4_OP_ENDLOOP; - write_sm4_instruction(tpf, &instr); -} - -static void write_sm4_gather(const struct tpf_compiler *tpf, const struct hlsl_ir_node *dst, - const struct hlsl_deref *resource, const struct hlsl_deref *sampler, - const struct hlsl_ir_node *coords, uint32_t swizzle, const struct hlsl_ir_node *texel_offset) -{ - const struct vkd3d_shader_version *version = &tpf->program->shader_version; - struct vkd3d_shader_src_param *src; - struct sm4_instruction instr; - - memset(&instr, 0, sizeof(instr)); - - instr.opcode = VKD3D_SM4_OP_GATHER4; - - sm4_dst_from_node(&instr.dsts[0], dst); - instr.dst_count = 1; - - sm4_src_from_node(tpf, &instr.srcs[instr.src_count++], coords, VKD3DSP_WRITEMASK_ALL); - - if (texel_offset) - { - if (!encode_texel_offset_as_aoffimmi(&instr, texel_offset)) - { - if (!vkd3d_shader_ver_ge(version, 5, 0)) - { - hlsl_error(tpf->ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, - "Offset must resolve to integer literal in the range -8 to 7 for profiles < 5."); - return; - } - instr.opcode = VKD3D_SM5_OP_GATHER4_PO; - sm4_src_from_node(tpf, &instr.srcs[instr.src_count++], texel_offset, VKD3DSP_WRITEMASK_ALL); - } - } - - sm4_src_from_deref(tpf, &instr.srcs[instr.src_count++], resource, instr.dsts[0].write_mask, &instr); - - src = &instr.srcs[instr.src_count++]; - sm4_src_from_deref(tpf, src, sampler, VKD3DSP_WRITEMASK_ALL, &instr); - src->reg.dimension = VSIR_DIMENSION_VEC4; - src->swizzle = swizzle; - - write_sm4_instruction(tpf, &instr); -} - -static void write_sm4_resource_load(const struct tpf_compiler *tpf, const struct hlsl_ir_resource_load *load) -{ - const struct hlsl_ir_node *texel_offset = load->texel_offset.node; - const struct hlsl_ir_node *sample_index = load->sample_index.node; - const struct hlsl_ir_node *coords = load->coords.node; - - if (load->sampler.var && !load->sampler.var->is_uniform) - { - hlsl_fixme(tpf->ctx, &load->node.loc, "Sample using non-uniform sampler variable."); - return; - } - - if (!load->resource.var->is_uniform) - { - hlsl_fixme(tpf->ctx, &load->node.loc, "Load from non-uniform resource variable."); - return; - } - - switch (load->load_type) - { - case HLSL_RESOURCE_LOAD: - write_sm4_ld(tpf, &load->node, &load->resource, - coords, sample_index, texel_offset, load->sampling_dim); - break; - - case HLSL_RESOURCE_SAMPLE: - case HLSL_RESOURCE_SAMPLE_CMP: - case HLSL_RESOURCE_SAMPLE_CMP_LZ: - case HLSL_RESOURCE_SAMPLE_LOD: - case HLSL_RESOURCE_SAMPLE_LOD_BIAS: - case HLSL_RESOURCE_SAMPLE_GRAD: - /* Combined sample expressions were lowered. */ - VKD3D_ASSERT(load->sampler.var); - write_sm4_sample(tpf, load); - break; - - case HLSL_RESOURCE_GATHER_RED: - write_sm4_gather(tpf, &load->node, &load->resource, &load->sampler, coords, - VKD3D_SHADER_SWIZZLE(X, X, X, X), texel_offset); - break; - - case HLSL_RESOURCE_GATHER_GREEN: - write_sm4_gather(tpf, &load->node, &load->resource, &load->sampler, coords, - VKD3D_SHADER_SWIZZLE(Y, Y, Y, Y), texel_offset); - break; - - case HLSL_RESOURCE_GATHER_BLUE: - write_sm4_gather(tpf, &load->node, &load->resource, &load->sampler, coords, - VKD3D_SHADER_SWIZZLE(Z, Z, Z, Z), texel_offset); - break; - - case HLSL_RESOURCE_GATHER_ALPHA: - write_sm4_gather(tpf, &load->node, &load->resource, &load->sampler, coords, - VKD3D_SHADER_SWIZZLE(W, W, W, W), texel_offset); - break; - - case HLSL_RESOURCE_SAMPLE_INFO: - write_sm4_sampleinfo(tpf, load); - break; - - case HLSL_RESOURCE_RESINFO: - write_sm4_resinfo(tpf, load); - break; - - case HLSL_RESOURCE_SAMPLE_PROJ: - vkd3d_unreachable(); - } -} - -static void write_sm4_resource_store(const struct tpf_compiler *tpf, const struct hlsl_ir_resource_store *store) -{ - struct hlsl_type *resource_type = hlsl_deref_get_type(tpf->ctx, &store->resource); - struct hlsl_ir_node *coords = store->coords.node, *value = store->value.node; - struct sm4_instruction instr; - - if (!store->resource.var->is_uniform) - { - hlsl_fixme(tpf->ctx, &store->node.loc, "Store to non-uniform resource variable."); - return; - } - - if (resource_type->sampler_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER) - { - hlsl_fixme(tpf->ctx, &store->node.loc, "Structured buffers store is not implemented."); - return; - } - - memset(&instr, 0, sizeof(instr)); - - sm4_register_from_deref(tpf, &instr.dsts[0].reg, &instr.dsts[0].write_mask, &store->resource, &instr); - instr.dst_count = 1; - if (resource_type->sampler_dim == HLSL_SAMPLER_DIM_RAW_BUFFER) - { - instr.opcode = VKD3D_SM5_OP_STORE_RAW; - instr.dsts[0].write_mask = vkd3d_write_mask_from_component_count(value->data_type->dimx); - } - else - { - instr.opcode = VKD3D_SM5_OP_STORE_UAV_TYPED; - } - - sm4_src_from_node(tpf, &instr.srcs[0], coords, VKD3DSP_WRITEMASK_ALL); - sm4_src_from_node(tpf, &instr.srcs[1], value, VKD3DSP_WRITEMASK_ALL); - instr.src_count = 2; - - write_sm4_instruction(tpf, &instr); -} - -static void write_sm4_store(const struct tpf_compiler *tpf, const struct hlsl_ir_store *store) -{ - const struct hlsl_ir_node *rhs = store->rhs.node; - struct sm4_instruction instr; - uint32_t writemask; - - memset(&instr, 0, sizeof(instr)); - instr.opcode = VKD3D_SM4_OP_MOV; - - sm4_register_from_deref(tpf, &instr.dsts[0].reg, &writemask, &store->lhs, &instr); - instr.dsts[0].write_mask = hlsl_combine_writemasks(writemask, store->writemask); - instr.dst_count = 1; - - sm4_src_from_node(tpf, &instr.srcs[0], rhs, instr.dsts[0].write_mask); - instr.src_count = 1; - - write_sm4_instruction(tpf, &instr); -} - -static void write_sm4_switch(struct tpf_compiler *tpf, const struct hlsl_ir_switch *s) -{ - const struct hlsl_ir_node *selector = s->selector.node; - struct hlsl_ir_switch_case *c; - struct sm4_instruction instr; - - memset(&instr, 0, sizeof(instr)); - instr.opcode = VKD3D_SM4_OP_SWITCH; - - sm4_src_from_node(tpf, &instr.srcs[0], selector, VKD3DSP_WRITEMASK_ALL); - instr.src_count = 1; - - write_sm4_instruction(tpf, &instr); - - LIST_FOR_EACH_ENTRY(c, &s->cases, struct hlsl_ir_switch_case, entry) - { - memset(&instr, 0, sizeof(instr)); - if (c->is_default) - { - instr.opcode = VKD3D_SM4_OP_DEFAULT; - } - else - { - struct hlsl_constant_value value = { .u[0].u = c->value }; - - instr.opcode = VKD3D_SM4_OP_CASE; - sm4_src_from_constant_value(&instr.srcs[0], &value, 1, VKD3DSP_WRITEMASK_ALL); - instr.src_count = 1; - } - - write_sm4_instruction(tpf, &instr); - write_sm4_block(tpf, &c->body); - } - - memset(&instr, 0, sizeof(instr)); - instr.opcode = VKD3D_SM4_OP_ENDSWITCH; - - write_sm4_instruction(tpf, &instr); -} - -static void write_sm4_swizzle(const struct tpf_compiler *tpf, const struct hlsl_ir_swizzle *swizzle) -{ - unsigned int hlsl_swizzle; - struct sm4_instruction instr; - uint32_t writemask; - - memset(&instr, 0, sizeof(instr)); - instr.opcode = VKD3D_SM4_OP_MOV; - - sm4_dst_from_node(&instr.dsts[0], &swizzle->node); - instr.dst_count = 1; - - sm4_register_from_node(&instr.srcs[0].reg, &writemask, swizzle->val.node); - hlsl_swizzle = hlsl_map_swizzle(hlsl_combine_swizzles(hlsl_swizzle_from_writemask(writemask), - swizzle->swizzle, swizzle->node.data_type->dimx), instr.dsts[0].write_mask); - instr.srcs[0].swizzle = swizzle_from_sm4(hlsl_swizzle); - instr.src_count = 1; - - write_sm4_instruction(tpf, &instr); -} - static void tpf_simple_instruction(struct tpf_compiler *tpf, const struct vkd3d_shader_instruction *ins) { + struct sm4_instruction_modifier *modifier; const struct vkd3d_sm4_opcode_info *info; struct sm4_instruction instr = {0}; unsigned int dst_count, src_count; @@ -5655,6 +4649,22 @@ static void tpf_simple_instruction(struct tpf_compiler *tpf, const struct vkd3d_ for (unsigned int i = 0; i < ins->src_count; ++i) instr.srcs[i] = ins->src[i]; + if (ins->texel_offset.u || ins->texel_offset.v || ins->texel_offset.w) + { + VKD3D_ASSERT(instr.modifier_count < ARRAY_SIZE(instr.modifiers)); + modifier = &instr.modifiers[instr.modifier_count++]; + modifier->type = VKD3D_SM4_MODIFIER_AOFFIMMI; + modifier->u.aoffimmi.u = ins->texel_offset.u; + modifier->u.aoffimmi.v = ins->texel_offset.v; + modifier->u.aoffimmi.w = ins->texel_offset.w; + } + + if (info->is_conditional_op) + { + if (ins->flags == VKD3D_SHADER_CONDITIONAL_OP_NZ) + instr.extra_bits = VKD3D_SM4_CONDITIONAL_NZ; + } + write_sm4_instruction(tpf, &instr); } @@ -5662,6 +4672,10 @@ static void tpf_handle_instruction(struct tpf_compiler *tpf, const struct vkd3d_ { switch (ins->opcode) { + case VKD3DSIH_DCL_CONSTANT_BUFFER: + tpf_dcl_constant_buffer(tpf, ins); + break; + case VKD3DSIH_DCL_TEMPS: tpf_dcl_temps(tpf, ins->declaration.count); break; @@ -5702,8 +4716,25 @@ static void tpf_handle_instruction(struct tpf_compiler *tpf, const struct vkd3d_ tpf_dcl_siv_semantic(tpf, VKD3D_SM4_OP_DCL_OUTPUT_SIV, &ins->declaration.register_semantic, 0); break; + case VKD3DSIH_DCL_SAMPLER: + tpf_dcl_sampler(tpf, ins); + break; + + case VKD3DSIH_DCL: + case VKD3DSIH_DCL_RESOURCE_RAW: + case VKD3DSIH_DCL_UAV_RAW: + case VKD3DSIH_DCL_UAV_STRUCTURED: + case VKD3DSIH_DCL_UAV_TYPED: + tpf_dcl_texture(tpf, ins); + break; + case VKD3DSIH_ADD: case VKD3DSIH_AND: + case VKD3DSIH_BREAK: + case VKD3DSIH_CASE: + case VKD3DSIH_CONTINUE: + case VKD3DSIH_DEFAULT: + case VKD3DSIH_DISCARD: case VKD3DSIH_DIV: case VKD3DSIH_DP2: case VKD3DSIH_DP3: @@ -5714,6 +4745,10 @@ static void tpf_handle_instruction(struct tpf_compiler *tpf, const struct vkd3d_ case VKD3DSIH_DSY: case VKD3DSIH_DSY_COARSE: case VKD3DSIH_DSY_FINE: + case VKD3DSIH_ELSE: + case VKD3DSIH_ENDIF: + case VKD3DSIH_ENDLOOP: + case VKD3DSIH_ENDSWITCH: case VKD3DSIH_EQO: case VKD3DSIH_EXP: case VKD3DSIH_F16TOF32: @@ -5721,9 +4756,14 @@ static void tpf_handle_instruction(struct tpf_compiler *tpf, const struct vkd3d_ case VKD3DSIH_FRC: case VKD3DSIH_FTOI: case VKD3DSIH_FTOU: + case VKD3DSIH_GATHER4: + case VKD3DSIH_GATHER4_PO: case VKD3DSIH_GEO: + case VKD3DSIH_HS_CONTROL_POINT_PHASE: + case VKD3DSIH_HS_FORK_PHASE: case VKD3DSIH_IADD: case VKD3DSIH_IEQ: + case VKD3DSIH_IF: case VKD3DSIH_IGE: case VKD3DSIH_ILT: case VKD3DSIH_IMAD: @@ -5735,7 +4775,12 @@ static void tpf_handle_instruction(struct tpf_compiler *tpf, const struct vkd3d_ case VKD3DSIH_ISHL: case VKD3DSIH_ISHR: case VKD3DSIH_ITOF: + case VKD3DSIH_LD: + case VKD3DSIH_LD2DMS: + case VKD3DSIH_LD_RAW: + case VKD3DSIH_LD_UAV_TYPED: case VKD3DSIH_LOG: + case VKD3DSIH_LOOP: case VKD3DSIH_LTO: case VKD3DSIH_MAD: case VKD3DSIH_MAX: @@ -5747,14 +4792,25 @@ static void tpf_handle_instruction(struct tpf_compiler *tpf, const struct vkd3d_ case VKD3DSIH_NOT: case VKD3DSIH_OR: case VKD3DSIH_RCP: + case VKD3DSIH_RESINFO: + case VKD3DSIH_RET: case VKD3DSIH_ROUND_NE: case VKD3DSIH_ROUND_NI: case VKD3DSIH_ROUND_PI: case VKD3DSIH_ROUND_Z: case VKD3DSIH_RSQ: + case VKD3DSIH_SAMPLE: + case VKD3DSIH_SAMPLE_B: + case VKD3DSIH_SAMPLE_C: + case VKD3DSIH_SAMPLE_C_LZ: + case VKD3DSIH_SAMPLE_GRAD: case VKD3DSIH_SAMPLE_INFO: + case VKD3DSIH_SAMPLE_LOD: case VKD3DSIH_SINCOS: case VKD3DSIH_SQRT: + case VKD3DSIH_STORE_RAW: + case VKD3DSIH_STORE_UAV_TYPED: + case VKD3DSIH_SWITCH: case VKD3DSIH_UDIV: case VKD3DSIH_UGE: case VKD3DSIH_ULT: @@ -5772,102 +4828,23 @@ static void tpf_handle_instruction(struct tpf_compiler *tpf, const struct vkd3d_ } } -static void write_sm4_block(struct tpf_compiler *tpf, const struct hlsl_block *block) +static void tpf_write_program(struct tpf_compiler *tpf, const struct vsir_program *program) { - const struct hlsl_ir_node *instr; - unsigned int vsir_instr_idx; - - LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry) - { - if (instr->data_type) - { - if (instr->data_type->class != HLSL_CLASS_SCALAR && instr->data_type->class != HLSL_CLASS_VECTOR) - { - hlsl_fixme(tpf->ctx, &instr->loc, "Class %#x should have been lowered or removed.", - instr->data_type->class); - break; - } - - if (!instr->reg.allocated) - { - VKD3D_ASSERT(instr->type == HLSL_IR_CONSTANT); - continue; - } - } - - switch (instr->type) - { - case HLSL_IR_CALL: - case HLSL_IR_CONSTANT: - vkd3d_unreachable(); - - case HLSL_IR_IF: - write_sm4_if(tpf, hlsl_ir_if(instr)); - break; - - case HLSL_IR_JUMP: - write_sm4_jump(tpf, hlsl_ir_jump(instr)); - break; - - case HLSL_IR_LOAD: - write_sm4_load(tpf, hlsl_ir_load(instr)); - break; - - case HLSL_IR_RESOURCE_LOAD: - write_sm4_resource_load(tpf, hlsl_ir_resource_load(instr)); - break; - - case HLSL_IR_RESOURCE_STORE: - write_sm4_resource_store(tpf, hlsl_ir_resource_store(instr)); - break; - - case HLSL_IR_LOOP: - write_sm4_loop(tpf, hlsl_ir_loop(instr)); - break; - - case HLSL_IR_STORE: - write_sm4_store(tpf, hlsl_ir_store(instr)); - break; - - case HLSL_IR_SWITCH: - write_sm4_switch(tpf, hlsl_ir_switch(instr)); - break; - - case HLSL_IR_SWIZZLE: - write_sm4_swizzle(tpf, hlsl_ir_swizzle(instr)); - break; - - case HLSL_IR_VSIR_INSTRUCTION_REF: - vsir_instr_idx = hlsl_ir_vsir_instruction_ref(instr)->vsir_instr_idx; - tpf_handle_instruction(tpf, &tpf->program->instructions.elements[vsir_instr_idx]); - break; - - default: - hlsl_fixme(tpf->ctx, &instr->loc, "Instruction type %s.", hlsl_node_type_to_string(instr->type)); - } - } -} + unsigned int i; -static void tpf_write_shader_function(struct tpf_compiler *tpf, struct hlsl_ir_function_decl *func) -{ if (tpf->program->shader_version.type == VKD3D_SHADER_TYPE_COMPUTE) tpf_dcl_thread_group(tpf, &tpf->program->thread_group_size); - write_sm4_block(tpf, &func->body); - - write_sm4_ret(tpf); + for (i = 0; i < program->instructions.count; ++i) + tpf_handle_instruction(tpf, &program->instructions.elements[i]); } -static void tpf_write_shdr(struct tpf_compiler *tpf, struct hlsl_ir_function_decl *entry_func) +static void tpf_write_shdr(struct tpf_compiler *tpf) { - const struct vkd3d_shader_version *version = &tpf->program->shader_version; + const struct vsir_program *program = tpf->program; + const struct vkd3d_shader_version *version; struct vkd3d_bytecode_buffer buffer = {0}; - struct extern_resource *extern_resources; - unsigned int extern_resources_count, i; - const struct hlsl_buffer *cbuffer; - struct hlsl_ctx *ctx = tpf->ctx; size_t token_count_position; - uint32_t global_flags = 0; static const uint16_t shader_types[VKD3D_SHADER_TYPE_COUNT] = { @@ -5884,101 +4861,45 @@ static void tpf_write_shdr(struct tpf_compiler *tpf, struct hlsl_ir_function_dec tpf->buffer = &buffer; - extern_resources = sm4_get_extern_resources(ctx, &extern_resources_count); - + version = &program->shader_version; put_u32(&buffer, vkd3d_make_u32((version->major << 4) | version->minor, shader_types[version->type])); token_count_position = put_u32(&buffer, 0); - if (version->major == 4) - { - for (i = 0; i < extern_resources_count; ++i) - { - const struct extern_resource *resource = &extern_resources[i]; - const struct hlsl_type *type = resource->component_type; - - if (type && type->class == HLSL_CLASS_TEXTURE && type->sampler_dim == HLSL_SAMPLER_DIM_RAW_BUFFER) - { - global_flags |= VKD3DSGF_ENABLE_RAW_AND_STRUCTURED_BUFFERS; - break; - } - } - } - - if (entry_func->early_depth_test && vkd3d_shader_ver_ge(version, 5, 0)) - global_flags |= VKD3DSGF_FORCE_EARLY_DEPTH_STENCIL; - - if (global_flags) - write_sm4_dcl_global_flags(tpf, global_flags); + if (program->global_flags) + write_sm4_dcl_global_flags(tpf, program->global_flags); if (version->type == VKD3D_SHADER_TYPE_HULL) { tpf_write_hs_decls(tpf); - tpf_write_dcl_input_control_point_count(tpf, 1); /* TODO: Obtain from InputPatch */ - tpf_write_dcl_output_control_point_count(tpf, ctx->output_control_point_count); - tpf_write_dcl_tessellator_domain(tpf, ctx->domain); - tpf_write_dcl_tessellator_partitioning(tpf, ctx->partitioning); - tpf_write_dcl_tessellator_output_primitive(tpf, ctx->output_primitive); + tpf_write_dcl_input_control_point_count(tpf, program->input_control_point_count); + tpf_write_dcl_output_control_point_count(tpf, program->output_control_point_count); + tpf_write_dcl_tessellator_domain(tpf, program->tess_domain); + tpf_write_dcl_tessellator_partitioning(tpf, program->tess_partitioning); + tpf_write_dcl_tessellator_output_primitive(tpf, program->tess_output_primitive); } else if (version->type == VKD3D_SHADER_TYPE_DOMAIN) { - tpf_write_dcl_input_control_point_count(tpf, 0); /* TODO: Obtain from OutputPatch */ - tpf_write_dcl_tessellator_domain(tpf, ctx->domain); - } - - LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) - { - if (cbuffer->reg.allocated) - write_sm4_dcl_constant_buffer(tpf, cbuffer); - } - - for (i = 0; i < extern_resources_count; ++i) - { - const struct extern_resource *resource = &extern_resources[i]; - - if (resource->regset == HLSL_REGSET_SAMPLERS) - write_sm4_dcl_samplers(tpf, resource); - else if (resource->regset == HLSL_REGSET_TEXTURES) - write_sm4_dcl_textures(tpf, resource, false); - else if (resource->regset == HLSL_REGSET_UAVS) - write_sm4_dcl_textures(tpf, resource, true); + tpf_write_dcl_input_control_point_count(tpf, program->input_control_point_count); + tpf_write_dcl_tessellator_domain(tpf, program->tess_domain); } - if (version->type == VKD3D_SHADER_TYPE_HULL) - tpf_write_hs_control_point_phase(tpf); - - tpf_write_shader_function(tpf, entry_func); - - if (version->type == VKD3D_SHADER_TYPE_HULL) - { - tpf_write_hs_fork_phase(tpf); - tpf_write_shader_function(tpf, ctx->patch_constant_func); - } + tpf_write_program(tpf, program); set_u32(&buffer, token_count_position, bytecode_get_size(&buffer) / sizeof(uint32_t)); - add_section(ctx, &tpf->dxbc, TAG_SHDR, &buffer); + add_section(tpf, TAG_SHDR, &buffer); tpf->buffer = NULL; - - sm4_free_extern_resources(extern_resources, extern_resources_count); } static void tpf_write_sfi0(struct tpf_compiler *tpf) { - struct extern_resource *extern_resources; - unsigned int extern_resources_count; - struct hlsl_ctx *ctx = tpf->ctx; uint64_t *flags; flags = vkd3d_calloc(1, sizeof(*flags)); - extern_resources = sm4_get_extern_resources(ctx, &extern_resources_count); - for (unsigned int i = 0; i < extern_resources_count; ++i) - { - if (extern_resources[i].component_type && extern_resources[i].component_type->e.resource.rasteriser_ordered) - *flags |= VKD3D_SM4_REQUIRES_ROVS; - } - sm4_free_extern_resources(extern_resources, extern_resources_count); + if (tpf->program->features.rovs) + *flags |= DXBC_SFI0_REQUIRES_ROVS; /* FIXME: We also emit code that should require UAVS_AT_EVERY_STAGE, * STENCIL_REF, and TYPED_UAV_LOAD_ADDITIONAL_FORMATS. */ @@ -5993,7 +4914,6 @@ static void tpf_write_stat(struct tpf_compiler *tpf) { struct vkd3d_bytecode_buffer buffer = {0}; const struct sm4_stat *stat = tpf->stat; - struct hlsl_ctx *ctx = tpf->ctx; put_u32(&buffer, stat->fields[VKD3D_STAT_INSTR_COUNT]); put_u32(&buffer, stat->fields[VKD3D_STAT_TEMPS]); @@ -6025,7 +4945,7 @@ static void tpf_write_stat(struct tpf_compiler *tpf) put_u32(&buffer, stat->fields[VKD3D_STAT_LOD]); put_u32(&buffer, 0); /* Sample frequency */ - if (hlsl_version_ge(ctx, 5, 0)) + if (vkd3d_shader_ver_ge(&tpf->program->shader_version, 5, 0)) { put_u32(&buffer, stat->fields[VKD3D_STAT_DCL_GS_INSTANCES]); put_u32(&buffer, stat->fields[VKD3D_STAT_TESS_CONTROL_POINT_COUNT]); @@ -6037,15 +4957,19 @@ static void tpf_write_stat(struct tpf_compiler *tpf) put_u32(&buffer, stat->fields[VKD3D_STAT_STORE]); } - add_section(ctx, &tpf->dxbc, TAG_STAT, &buffer); + add_section(tpf, TAG_STAT, &buffer); +} + +static void tpf_write_section(struct tpf_compiler *tpf, uint32_t tag, const struct vkd3d_shader_code *code) +{ + struct vkd3d_bytecode_buffer buffer = {0}; + + bytecode_put_bytes(&buffer, code->code, code->size); + add_section(tpf, tag, &buffer); } -/* OBJECTIVE: Stop relying on ctx and entry_func on this function, receiving - * data from the other parameters instead, so they can be removed from the - * arguments and this function can be independent of HLSL structs. */ -int tpf_compile(struct vsir_program *program, uint64_t config_flags, - struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context, - struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) +int tpf_compile(struct vsir_program *program, uint64_t config_flags, const struct vkd3d_shader_code *rdef, + struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context) { enum vkd3d_shader_type shader_type = program->shader_version.type; struct tpf_compiler tpf = {0}; @@ -6053,7 +4977,6 @@ int tpf_compile(struct vsir_program *program, uint64_t config_flags, size_t i; int ret; - tpf.ctx = ctx; tpf.program = program; tpf.buffer = NULL; tpf.stat = &stat; @@ -6064,14 +4987,12 @@ int tpf_compile(struct vsir_program *program, uint64_t config_flags, tpf_write_signature(&tpf, &program->output_signature, TAG_OSGN); if (shader_type == VKD3D_SHADER_TYPE_HULL || shader_type == VKD3D_SHADER_TYPE_DOMAIN) tpf_write_signature(&tpf, &program->patch_constant_signature, TAG_PCSG); - write_sm4_rdef(ctx, &tpf.dxbc); - tpf_write_shdr(&tpf, entry_func); + tpf_write_section(&tpf, TAG_RDEF, rdef); + tpf_write_shdr(&tpf); tpf_write_sfi0(&tpf); tpf_write_stat(&tpf); ret = VKD3D_OK; - if (ctx->result) - ret = ctx->result; if (tpf.result) ret = tpf.result; diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c index d751f2dc6bf..86ec8f15fb7 100644 --- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c +++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c @@ -23,6 +23,8 @@ #include #include +/* VKD3D_DEBUG_ENV_NAME("VKD3D_SHADER_DEBUG"); */ + static inline int char_to_int(char c) { if ('0' <= c && c <= '9') @@ -805,6 +807,9 @@ struct vkd3d_shader_scan_context struct vkd3d_shader_scan_combined_resource_sampler_info *combined_sampler_info; size_t combined_samplers_size; + + enum vkd3d_shader_tessellator_output_primitive output_primitive; + enum vkd3d_shader_tessellator_partitioning partitioning; }; static VKD3D_PRINTF_FUNC(3, 4) void vkd3d_shader_scan_error(struct vkd3d_shader_scan_context *context, @@ -1262,6 +1267,12 @@ static int vkd3d_shader_scan_instruction(struct vkd3d_shader_scan_context *conte VKD3D_SHADER_RESOURCE_BUFFER, VKD3D_SHADER_RESOURCE_DATA_UINT, 0, instruction->declaration.structured_resource.byte_stride, false, instruction->flags); break; + case VKD3DSIH_DCL_TESSELLATOR_OUTPUT_PRIMITIVE: + context->output_primitive = instruction->declaration.tessellator_output_primitive; + break; + case VKD3DSIH_DCL_TESSELLATOR_PARTITIONING: + context->partitioning = instruction->declaration.tessellator_partitioning; + break; case VKD3DSIH_IF: case VKD3DSIH_IFC: cf_info = vkd3d_shader_scan_push_cf_info(context); @@ -1502,6 +1513,7 @@ static int vsir_program_scan(struct vsir_program *program, const struct vkd3d_sh struct vkd3d_shader_scan_descriptor_info1 *descriptor_info1) { struct vkd3d_shader_scan_combined_resource_sampler_info *combined_sampler_info; + struct vkd3d_shader_scan_hull_shader_tessellation_info *tessellation_info; struct vkd3d_shader_scan_descriptor_info1 local_descriptor_info1 = {0}; struct vkd3d_shader_scan_descriptor_info *descriptor_info; struct vkd3d_shader_scan_signature_info *signature_info; @@ -1530,6 +1542,8 @@ static int vsir_program_scan(struct vsir_program *program, const struct vkd3d_sh descriptor_info1 = &local_descriptor_info1; } + tessellation_info = vkd3d_find_struct(compile_info->next, SCAN_HULL_SHADER_TESSELLATION_INFO); + vkd3d_shader_scan_context_init(&context, &program->shader_version, compile_info, descriptor_info1, combined_sampler_info, message_context); @@ -1573,6 +1587,12 @@ static int vsir_program_scan(struct vsir_program *program, const struct vkd3d_sh if (!ret && descriptor_info) ret = convert_descriptor_info(descriptor_info, descriptor_info1); + if (!ret && tessellation_info) + { + tessellation_info->output_primitive = context.output_primitive; + tessellation_info->partitioning = context.partitioning; + } + if (ret < 0) { if (combined_sampler_info) @@ -1959,7 +1979,7 @@ const enum vkd3d_shader_target_type *vkd3d_shader_get_supported_target_types( static const enum vkd3d_shader_target_type dxbc_tpf_types[] = { VKD3D_SHADER_TARGET_SPIRV_BINARY, -#ifdef HAVE_SPIRV_TOOLS +#if defined(HAVE_SPIRV_TOOLS) || defined(VKD3D_SHADER_UNSUPPORTED_SPIRV_PARSER) VKD3D_SHADER_TARGET_SPIRV_TEXT, #endif VKD3D_SHADER_TARGET_D3D_ASM, @@ -1974,7 +1994,7 @@ const enum vkd3d_shader_target_type *vkd3d_shader_get_supported_target_types( static const enum vkd3d_shader_target_type hlsl_types[] = { VKD3D_SHADER_TARGET_SPIRV_BINARY, -#ifdef HAVE_SPIRV_TOOLS +#if defined(HAVE_SPIRV_TOOLS) || defined(VKD3D_SHADER_UNSUPPORTED_SPIRV_PARSER) VKD3D_SHADER_TARGET_SPIRV_TEXT, #endif VKD3D_SHADER_TARGET_D3D_ASM, @@ -1986,7 +2006,7 @@ const enum vkd3d_shader_target_type *vkd3d_shader_get_supported_target_types( static const enum vkd3d_shader_target_type d3dbc_types[] = { VKD3D_SHADER_TARGET_SPIRV_BINARY, -#ifdef HAVE_SPIRV_TOOLS +#if defined(HAVE_SPIRV_TOOLS) || defined(VKD3D_SHADER_UNSUPPORTED_SPIRV_PARSER) VKD3D_SHADER_TARGET_SPIRV_TEXT, #endif VKD3D_SHADER_TARGET_D3D_ASM, @@ -1996,7 +2016,7 @@ const enum vkd3d_shader_target_type *vkd3d_shader_get_supported_target_types( static const enum vkd3d_shader_target_type dxbc_dxil_types[] = { VKD3D_SHADER_TARGET_SPIRV_BINARY, -# ifdef HAVE_SPIRV_TOOLS +#if defined(HAVE_SPIRV_TOOLS) || defined(VKD3D_SHADER_UNSUPPORTED_SPIRV_PARSER) VKD3D_SHADER_TARGET_SPIRV_TEXT, # endif VKD3D_SHADER_TARGET_D3D_ASM, diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h index be7c0b73a22..3bfb0a7c3cd 100644 --- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h +++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h @@ -62,6 +62,8 @@ #define VKD3D_SHADER_COMPONENT_TYPE_COUNT (VKD3D_SHADER_COMPONENT_UINT64 + 1) #define VKD3D_SHADER_MINIMUM_PRECISION_COUNT (VKD3D_SHADER_MINIMUM_PRECISION_UINT_16 + 1) +#define VKD3D_MAX_STREAM_COUNT 4 + enum vkd3d_shader_error { VKD3D_SHADER_ERROR_DXBC_INVALID_SIZE = 1, @@ -163,6 +165,7 @@ enum vkd3d_shader_error VKD3D_SHADER_ERROR_HLSL_INVALID_OUTPUT_PRIMITIVE = 5037, VKD3D_SHADER_ERROR_HLSL_INVALID_PARTITIONING = 5038, VKD3D_SHADER_ERROR_HLSL_MISPLACED_SAMPLER_STATE = 5039, + VKD3D_SHADER_ERROR_HLSL_AMBIGUOUS_CALL = 5040, VKD3D_SHADER_WARNING_HLSL_IMPLICIT_TRUNCATION = 5300, VKD3D_SHADER_WARNING_HLSL_DIVISION_BY_ZERO = 5301, @@ -247,6 +250,7 @@ enum vkd3d_shader_error VKD3D_SHADER_ERROR_VSIR_INVALID_PARAMETER = 9020, VKD3D_SHADER_ERROR_VSIR_MISSING_SEMANTIC = 9021, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE = 9022, + VKD3D_SHADER_ERROR_VSIR_INVALID_RANGE = 9023, VKD3D_SHADER_WARNING_VSIR_DYNAMIC_DESCRIPTOR_ARRAY = 9300, @@ -1123,6 +1127,12 @@ bool vsir_signature_find_sysval(const struct shader_signature *signature, enum vkd3d_shader_sysval_semantic sysval, unsigned int semantic_index, unsigned int *element_index); void shader_signature_cleanup(struct shader_signature *signature); +struct vsir_features +{ + /* The shader requires rasteriser-ordered views. */ + bool rovs; +}; + struct dxbc_shader_desc { const uint32_t *byte_code; @@ -1131,6 +1141,7 @@ struct dxbc_shader_desc struct shader_signature input_signature; struct shader_signature output_signature; struct shader_signature patch_constant_signature; + struct vsir_features features; }; struct vkd3d_shader_register_semantic @@ -1400,9 +1411,10 @@ enum vsir_control_flow_type enum vsir_normalisation_level { - VSIR_NOT_NORMALISED, + VSIR_NORMALISED_SM1, + VSIR_NORMALISED_SM4, VSIR_NORMALISED_HULL_CONTROL_POINT_IO, - VSIR_FULLY_NORMALISED_IO, + VSIR_NORMALISED_SM6, }; struct vsir_program @@ -1428,9 +1440,16 @@ struct vsir_program bool use_vocp; bool has_point_size; bool has_point_coord; + bool has_fog; uint8_t diffuse_written_mask; enum vsir_control_flow_type cf_type; enum vsir_normalisation_level normalisation_level; + enum vkd3d_tessellator_domain tess_domain; + enum vkd3d_shader_tessellator_partitioning tess_partitioning; + enum vkd3d_shader_tessellator_output_primitive tess_output_primitive; + uint32_t io_dcls[VKD3D_BITMAP_SIZE(VKD3DSPR_COUNT)]; + + struct vsir_features features; const char **block_names; size_t block_name_count; @@ -1643,6 +1662,10 @@ int shader_extract_from_dxbc(const struct vkd3d_shader_code *dxbc, int shader_parse_input_signature(const struct vkd3d_shader_code *dxbc, struct vkd3d_shader_message_context *message_context, struct shader_signature *signature); +int d3dbc_compile(struct vsir_program *program, uint64_t config_flags, + const struct vkd3d_shader_compile_info *compile_info, const struct vkd3d_shader_code *ctab, + struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context); + int glsl_compile(struct vsir_program *program, uint64_t config_flags, const struct vkd3d_shader_scan_descriptor_info1 *descriptor_info, const struct vkd3d_shader_scan_combined_resource_sampler_info *combined_sampler_info, @@ -1661,6 +1684,9 @@ int msl_compile(struct vsir_program *program, uint64_t config_flags, const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context); +int tpf_compile(struct vsir_program *program, uint64_t config_flags, const struct vkd3d_shader_code *rdef, + struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context); + enum vkd3d_md5_variant { VKD3D_MD5_STANDARD, @@ -1942,6 +1968,21 @@ static inline void *vkd3d_find_struct_(const struct vkd3d_struct *chain, #define DXBC_MAX_SECTION_COUNT 7 +#define DXBC_SFI0_REQUIRES_DOUBLES 0x00000001u +#define DXBC_SFI0_REQUIRES_EARLY_DEPTH_STENCIL 0x00000002u +#define DXBC_SFI0_REQUIRES_UAVS_AT_EVERY_STAGE 0x00000004u +#define DXBC_SFI0_REQUIRES_64_UAVS 0x00000008u +#define DXBC_SFI0_REQUIRES_MINIMUM_PRECISION 0x00000010u +#define DXBC_SFI0_REQUIRES_11_1_DOUBLE_EXTENSIONS 0x00000020u +#define DXBC_SFI0_REQUIRES_11_1_SHADER_EXTENSIONS 0x00000040u +#define DXBC_SFI0_REQUIRES_LEVEL_9_COMPARISON_FILTERING 0x00000080u +#define DXBC_SFI0_REQUIRES_TILED_RESOURCES 0x00000100u +#define DXBC_SFI0_REQUIRES_STENCIL_REF 0x00000200u +#define DXBC_SFI0_REQUIRES_INNER_COVERAGE 0x00000400u +#define DXBC_SFI0_REQUIRES_TYPED_UAV_LOAD_ADDITIONAL_FORMATS 0x00000800u +#define DXBC_SFI0_REQUIRES_ROVS 0x00001000u +#define DXBC_SFI0_REQUIRES_VIEWPORT_AND_RT_ARRAY_INDEX_FROM_ANY_SHADER_FEEDING_RASTERIZER 0x00002000u + struct dxbc_writer { unsigned int section_count; diff --git a/libs/vkd3d/libs/vkd3d/command.c b/libs/vkd3d/libs/vkd3d/command.c index a55a97f6f2f..6c7bf167910 100644 --- a/libs/vkd3d/libs/vkd3d/command.c +++ b/libs/vkd3d/libs/vkd3d/command.c @@ -1255,6 +1255,74 @@ VkResult vkd3d_create_timeline_semaphore(const struct d3d12_device *device, uint return VK_CALL(vkCreateSemaphore(device->vk_device, &info, NULL, timeline_semaphore)); } +static void vkd3d_vk_descriptor_pool_array_cleanup(struct vkd3d_vk_descriptor_pool_array *array) +{ + vkd3d_free(array->pools); +} + +static void vkd3d_vk_descriptor_pool_array_init(struct vkd3d_vk_descriptor_pool_array *array) +{ + memset(array, 0, sizeof(*array)); +} + +static bool vkd3d_vk_descriptor_pool_array_push_array(struct vkd3d_vk_descriptor_pool_array *array, + const struct vkd3d_vk_descriptor_pool *pools, size_t count) +{ + if (!vkd3d_array_reserve((void **)&array->pools, &array->capacity, array->count + count, sizeof(*array->pools))) + return false; + + memcpy(&array->pools[array->count], pools, count * sizeof(*pools)); + array->count += count; + + return true; +} + +static bool vkd3d_vk_descriptor_pool_array_push(struct vkd3d_vk_descriptor_pool_array *array, + unsigned int descriptor_count, VkDescriptorPool vk_pool) +{ + struct vkd3d_vk_descriptor_pool pool = + { + .descriptor_count = descriptor_count, + .vk_pool = vk_pool, + }; + + return vkd3d_vk_descriptor_pool_array_push_array(array, &pool, 1); +} + +static VkDescriptorPool vkd3d_vk_descriptor_pool_array_find(struct vkd3d_vk_descriptor_pool_array *array, + unsigned int *descriptor_count) +{ + VkDescriptorPool vk_pool; + size_t i; + + for (i = 0; i < array->count; ++i) + { + if (array->pools[i].descriptor_count >= *descriptor_count) + { + *descriptor_count = array->pools[i].descriptor_count; + vk_pool = array->pools[i].vk_pool; + array->pools[i] = array->pools[--array->count]; + + return vk_pool; + } + } + + return VK_NULL_HANDLE; +} + +static void vkd3d_vk_descriptor_pool_array_destroy_pools(struct vkd3d_vk_descriptor_pool_array *array, + const struct d3d12_device *device) +{ + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + size_t i; + + for (i = 0; i < array->count; ++i) + { + VK_CALL(vkDestroyDescriptorPool(device->vk_device, array->pools[i].vk_pool, NULL)); + } + array->count = 0; +} + /* Command buffers */ static void d3d12_command_list_mark_as_invalid(struct d3d12_command_list *list, const char *message, ...) @@ -1376,18 +1444,6 @@ static bool d3d12_command_allocator_add_framebuffer(struct d3d12_command_allocat return true; } -static bool d3d12_command_allocator_add_descriptor_pool(struct d3d12_command_allocator *allocator, - VkDescriptorPool pool) -{ - if (!vkd3d_array_reserve((void **)&allocator->descriptor_pools, &allocator->descriptor_pools_size, - allocator->descriptor_pool_count + 1, sizeof(*allocator->descriptor_pools))) - return false; - - allocator->descriptor_pools[allocator->descriptor_pool_count++] = pool; - - return true; -} - static bool d3d12_command_allocator_add_view(struct d3d12_command_allocator *allocator, struct vkd3d_view *view) { @@ -1426,37 +1482,71 @@ static bool d3d12_command_allocator_add_transfer_buffer(struct d3d12_command_all } static VkDescriptorPool d3d12_command_allocator_allocate_descriptor_pool( - struct d3d12_command_allocator *allocator) + struct d3d12_command_allocator *allocator, enum vkd3d_shader_descriptor_type descriptor_type, + unsigned int descriptor_count, bool unbounded) { struct d3d12_device *device = allocator->device; const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; struct VkDescriptorPoolCreateInfo pool_desc; VkDevice vk_device = device->vk_device; + VkDescriptorPoolSize vk_pool_sizes[2]; + unsigned int pool_size, pool_limit; VkDescriptorPool vk_pool; VkResult vr; - if (allocator->free_descriptor_pool_count > 0) - { - vk_pool = allocator->free_descriptor_pools[allocator->free_descriptor_pool_count - 1]; - allocator->free_descriptor_pools[allocator->free_descriptor_pool_count - 1] = VK_NULL_HANDLE; - --allocator->free_descriptor_pool_count; - } - else + if (!(vk_pool = vkd3d_vk_descriptor_pool_array_find(&allocator->free_descriptor_pools[descriptor_type], + &descriptor_count))) { + pool_limit = device->vk_pool_limits[descriptor_type]; + + if (descriptor_count > pool_limit) + { + if (!unbounded) + { + ERR("Descriptor count %u exceeds maximum pool size %u.\n", descriptor_count, pool_limit); + return VK_NULL_HANDLE; + } + + WARN("Clamping descriptor count %u to maximum pool size %u for unbounded allocation.\n", + descriptor_count, pool_limit); + descriptor_count = pool_limit; + } + + pool_size = allocator->vk_pool_sizes[descriptor_type]; + if (descriptor_count > pool_size) + { + pool_size = 1u << (vkd3d_log2i(descriptor_count - 1) + 1); + pool_size = min(pool_limit, pool_size); + } + descriptor_count = pool_size; + + vk_pool_sizes[0].type = vk_descriptor_type_from_vkd3d_descriptor_type(descriptor_type, true); + vk_pool_sizes[0].descriptorCount = descriptor_count; + + vk_pool_sizes[1].type = vk_descriptor_type_from_vkd3d_descriptor_type(descriptor_type, false); + vk_pool_sizes[1].descriptorCount = descriptor_count; + pool_desc.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO; pool_desc.pNext = NULL; pool_desc.flags = 0; pool_desc.maxSets = 512; - pool_desc.poolSizeCount = device->vk_pool_count; - pool_desc.pPoolSizes = device->vk_pool_sizes; + pool_desc.poolSizeCount = 1; + if (vk_pool_sizes[1].type != vk_pool_sizes[0].type) + ++pool_desc.poolSizeCount; + pool_desc.pPoolSizes = vk_pool_sizes; + if ((vr = VK_CALL(vkCreateDescriptorPool(vk_device, &pool_desc, NULL, &vk_pool))) < 0) { ERR("Failed to create descriptor pool, vr %d.\n", vr); return VK_NULL_HANDLE; } + + if (!unbounded || descriptor_count < pool_limit) + allocator->vk_pool_sizes[descriptor_type] = min(pool_limit, descriptor_count * 2); } - if (!(d3d12_command_allocator_add_descriptor_pool(allocator, vk_pool))) + if (!(vkd3d_vk_descriptor_pool_array_push(&allocator->descriptor_pools[descriptor_type], + descriptor_count, vk_pool))) { ERR("Failed to add descriptor pool.\n"); VK_CALL(vkDestroyDescriptorPool(vk_device, vk_pool, NULL)); @@ -1466,9 +1556,9 @@ static VkDescriptorPool d3d12_command_allocator_allocate_descriptor_pool( return vk_pool; } -static VkDescriptorSet d3d12_command_allocator_allocate_descriptor_set( - struct d3d12_command_allocator *allocator, VkDescriptorSetLayout vk_set_layout, - unsigned int variable_binding_size, bool unbounded) +static VkDescriptorSet d3d12_command_allocator_allocate_descriptor_set(struct d3d12_command_allocator *allocator, + enum vkd3d_shader_descriptor_type descriptor_type, unsigned int descriptor_count, + VkDescriptorSetLayout vk_set_layout, unsigned int variable_binding_size, bool unbounded) { struct d3d12_device *device = allocator->device; const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; @@ -1478,14 +1568,15 @@ static VkDescriptorSet d3d12_command_allocator_allocate_descriptor_set( VkDescriptorSet vk_descriptor_set; VkResult vr; - if (!allocator->vk_descriptor_pool) - allocator->vk_descriptor_pool = d3d12_command_allocator_allocate_descriptor_pool(allocator); - if (!allocator->vk_descriptor_pool) + if (!allocator->vk_descriptor_pools[descriptor_type]) + allocator->vk_descriptor_pools[descriptor_type] = d3d12_command_allocator_allocate_descriptor_pool(allocator, + descriptor_type, descriptor_count, unbounded); + if (!allocator->vk_descriptor_pools[descriptor_type]) return VK_NULL_HANDLE; set_desc.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO; set_desc.pNext = NULL; - set_desc.descriptorPool = allocator->vk_descriptor_pool; + set_desc.descriptorPool = allocator->vk_descriptor_pools[descriptor_type]; set_desc.descriptorSetCount = 1; set_desc.pSetLayouts = &vk_set_layout; if (unbounded) @@ -1499,16 +1590,17 @@ static VkDescriptorSet d3d12_command_allocator_allocate_descriptor_set( if ((vr = VK_CALL(vkAllocateDescriptorSets(vk_device, &set_desc, &vk_descriptor_set))) >= 0) return vk_descriptor_set; - allocator->vk_descriptor_pool = VK_NULL_HANDLE; + allocator->vk_descriptor_pools[descriptor_type] = VK_NULL_HANDLE; if (vr == VK_ERROR_FRAGMENTED_POOL || vr == VK_ERROR_OUT_OF_POOL_MEMORY_KHR) - allocator->vk_descriptor_pool = d3d12_command_allocator_allocate_descriptor_pool(allocator); - if (!allocator->vk_descriptor_pool) + allocator->vk_descriptor_pools[descriptor_type] = d3d12_command_allocator_allocate_descriptor_pool(allocator, + descriptor_type, descriptor_count, unbounded); + if (!allocator->vk_descriptor_pools[descriptor_type]) { ERR("Failed to allocate descriptor set, vr %d.\n", vr); return VK_NULL_HANDLE; } - set_desc.descriptorPool = allocator->vk_descriptor_pool; + set_desc.descriptorPool = allocator->vk_descriptor_pools[descriptor_type]; if ((vr = VK_CALL(vkAllocateDescriptorSets(vk_device, &set_desc, &vk_descriptor_set))) < 0) { FIXME("Failed to allocate descriptor set from a new pool, vr %d.\n", vr); @@ -1534,38 +1626,50 @@ static void vkd3d_buffer_destroy(struct vkd3d_buffer *buffer, struct d3d12_devic VK_CALL(vkDestroyBuffer(device->vk_device, buffer->vk_buffer, NULL)); } +static void d3d12_command_allocator_reset_descriptor_pool_array(struct d3d12_command_allocator *allocator, + enum vkd3d_shader_descriptor_type type) +{ + struct vkd3d_vk_descriptor_pool_array *array = &allocator->descriptor_pools[type]; + struct d3d12_device *device = allocator->device; + const struct vkd3d_vk_device_procs *vk_procs; + const struct vkd3d_vk_descriptor_pool *pool; + size_t i; + + vk_procs = &device->vk_procs; + for (i = 0; i < array->count; ++i) + { + pool = &array->pools[i]; + if (pool->descriptor_count < allocator->vk_pool_sizes[type] + || !vkd3d_vk_descriptor_pool_array_push_array(&allocator->free_descriptor_pools[type], pool, 1)) + VK_CALL(vkDestroyDescriptorPool(device->vk_device, pool->vk_pool, NULL)); + else + VK_CALL(vkResetDescriptorPool(device->vk_device, pool->vk_pool, 0)); + } + array->count = 0; +} + static void d3d12_command_allocator_free_resources(struct d3d12_command_allocator *allocator, bool keep_reusable_resources) { struct d3d12_device *device = allocator->device; const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; - unsigned int i, j; + unsigned int i; - allocator->vk_descriptor_pool = VK_NULL_HANDLE; + memset(allocator->vk_descriptor_pools, 0, sizeof(allocator->vk_descriptor_pools)); if (keep_reusable_resources) { - if (vkd3d_array_reserve((void **)&allocator->free_descriptor_pools, - &allocator->free_descriptor_pools_size, - allocator->free_descriptor_pool_count + allocator->descriptor_pool_count, - sizeof(*allocator->free_descriptor_pools))) + for (i = 0; i < ARRAY_SIZE(allocator->descriptor_pools); ++i) { - for (i = 0, j = allocator->free_descriptor_pool_count; i < allocator->descriptor_pool_count; ++i, ++j) - { - VK_CALL(vkResetDescriptorPool(device->vk_device, allocator->descriptor_pools[i], 0)); - allocator->free_descriptor_pools[j] = allocator->descriptor_pools[i]; - } - allocator->free_descriptor_pool_count += allocator->descriptor_pool_count; - allocator->descriptor_pool_count = 0; + d3d12_command_allocator_reset_descriptor_pool_array(allocator, i); } } else { - for (i = 0; i < allocator->free_descriptor_pool_count; ++i) + for (i = 0; i < ARRAY_SIZE(allocator->free_descriptor_pools); ++i) { - VK_CALL(vkDestroyDescriptorPool(device->vk_device, allocator->free_descriptor_pools[i], NULL)); + vkd3d_vk_descriptor_pool_array_destroy_pools(&allocator->free_descriptor_pools[i], device); } - allocator->free_descriptor_pool_count = 0; } for (i = 0; i < allocator->transfer_buffer_count; ++i) @@ -1586,11 +1690,10 @@ static void d3d12_command_allocator_free_resources(struct d3d12_command_allocato } allocator->view_count = 0; - for (i = 0; i < allocator->descriptor_pool_count; ++i) + for (i = 0; i < ARRAY_SIZE(allocator->descriptor_pools); ++i) { - VK_CALL(vkDestroyDescriptorPool(device->vk_device, allocator->descriptor_pools[i], NULL)); + vkd3d_vk_descriptor_pool_array_destroy_pools(&allocator->descriptor_pools[i], device); } - allocator->descriptor_pool_count = 0; for (i = 0; i < allocator->framebuffer_count; ++i) { @@ -1647,6 +1750,7 @@ static ULONG STDMETHODCALLTYPE d3d12_command_allocator_Release(ID3D12CommandAllo { struct d3d12_command_allocator *allocator = impl_from_ID3D12CommandAllocator(iface); unsigned int refcount = vkd3d_atomic_decrement_u32(&allocator->refcount); + size_t i; TRACE("%p decreasing refcount to %u.\n", allocator, refcount); @@ -1664,8 +1768,11 @@ static ULONG STDMETHODCALLTYPE d3d12_command_allocator_Release(ID3D12CommandAllo vkd3d_free(allocator->transfer_buffers); vkd3d_free(allocator->buffer_views); vkd3d_free(allocator->views); - vkd3d_free(allocator->descriptor_pools); - vkd3d_free(allocator->free_descriptor_pools); + for (i = 0; i < ARRAY_SIZE(allocator->free_descriptor_pools); ++i) + { + vkd3d_vk_descriptor_pool_array_cleanup(&allocator->descriptor_pools[i]); + vkd3d_vk_descriptor_pool_array_cleanup(&allocator->free_descriptor_pools[i]); + } vkd3d_free(allocator->framebuffers); vkd3d_free(allocator->passes); @@ -1822,6 +1929,7 @@ static HRESULT d3d12_command_allocator_init(struct d3d12_command_allocator *allo struct vkd3d_queue *queue; VkResult vr; HRESULT hr; + size_t i; if (FAILED(hr = vkd3d_private_store_init(&allocator->private_store))) return hr; @@ -1851,11 +1959,12 @@ static HRESULT d3d12_command_allocator_init(struct d3d12_command_allocator *allo return hresult_from_vk_result(vr); } - allocator->vk_descriptor_pool = VK_NULL_HANDLE; + memset(allocator->vk_descriptor_pools, 0, sizeof(allocator->vk_descriptor_pools)); - allocator->free_descriptor_pools = NULL; - allocator->free_descriptor_pools_size = 0; - allocator->free_descriptor_pool_count = 0; + for (i = 0; i < ARRAY_SIZE(allocator->free_descriptor_pools); ++i) + { + vkd3d_vk_descriptor_pool_array_init(&allocator->free_descriptor_pools[i]); + } allocator->passes = NULL; allocator->passes_size = 0; @@ -1865,9 +1974,11 @@ static HRESULT d3d12_command_allocator_init(struct d3d12_command_allocator *allo allocator->framebuffers_size = 0; allocator->framebuffer_count = 0; - allocator->descriptor_pools = NULL; - allocator->descriptor_pools_size = 0; - allocator->descriptor_pool_count = 0; + for (i = 0; i < ARRAY_SIZE(allocator->descriptor_pools); ++i) + { + vkd3d_vk_descriptor_pool_array_init(&allocator->descriptor_pools[i]); + allocator->vk_pool_sizes[i] = min(VKD3D_INITIAL_DESCRIPTORS_POOL_SIZE, device->vk_pool_limits[i]); + } allocator->views = NULL; allocator->views_size = 0; @@ -2749,7 +2860,8 @@ static void d3d12_command_list_prepare_descriptors(struct d3d12_command_list *li } vk_descriptor_set = d3d12_command_allocator_allocate_descriptor_set(list->allocator, - layout->vk_layout, variable_binding_size, unbounded_offset != UINT_MAX); + layout->descriptor_type, layout->descriptor_count + variable_binding_size, layout->vk_layout, + variable_binding_size, unbounded_offset != UINT_MAX); bindings->descriptor_sets[bindings->descriptor_set_count++] = vk_descriptor_set; } @@ -2805,15 +2917,8 @@ static bool vk_write_descriptor_set_from_d3d12_desc(VkWriteDescriptorSet *vk_des break; } - if (range->descriptor_count == UINT_MAX) - { - vk_descriptor_write->dstSet = vk_descriptor_sets[set + 1]; - vk_descriptor_write->dstBinding = 0; - } - else - { - vk_descriptor_write->dstBinding += use_array ? 1 : range->descriptor_count; - } + vk_descriptor_write->dstSet = vk_descriptor_sets[range->image_set]; + vk_descriptor_write->dstBinding = use_array ? range->image_binding : range->image_binding + index; vk_image_info->sampler = VK_NULL_HANDLE; vk_image_info->imageView = u.view->v.u.vk_image_view; @@ -2934,10 +3039,11 @@ static void d3d12_command_list_update_descriptor_table(struct d3d12_command_list } static bool vk_write_descriptor_set_from_root_descriptor(VkWriteDescriptorSet *vk_descriptor_write, - const struct d3d12_root_parameter *root_parameter, VkDescriptorSet vk_descriptor_set, + const struct d3d12_root_parameter *root_parameter, const VkDescriptorSet *vk_descriptor_sets, VkBufferView *vk_buffer_view, const VkDescriptorBufferInfo *vk_buffer_info) { const struct d3d12_root_descriptor *root_descriptor; + VkDescriptorSet vk_descriptor_set; switch (root_parameter->parameter_type) { @@ -2956,6 +3062,7 @@ static bool vk_write_descriptor_set_from_root_descriptor(VkWriteDescriptorSet *v } root_descriptor = &root_parameter->u.descriptor; + vk_descriptor_set = vk_descriptor_sets ? vk_descriptor_sets[root_descriptor->set] : VK_NULL_HANDLE; vk_descriptor_write->sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; vk_descriptor_write->pNext = NULL; @@ -3011,7 +3118,7 @@ static void d3d12_command_list_update_push_descriptors(struct d3d12_command_list } if (!vk_write_descriptor_set_from_root_descriptor(&descriptor_writes[descriptor_count], - root_parameter, bindings->descriptor_sets[0], vk_buffer_view, vk_buffer_info)) + root_parameter, bindings->descriptor_sets, vk_buffer_view, vk_buffer_info)) continue; ++descriptor_count; @@ -3039,8 +3146,8 @@ static void d3d12_command_list_update_uav_counter_descriptors(struct d3d12_comma uav_counter_count = state->uav_counters.binding_count; if (!(vk_descriptor_writes = vkd3d_calloc(uav_counter_count, sizeof(*vk_descriptor_writes)))) return; - if (!(vk_descriptor_set = d3d12_command_allocator_allocate_descriptor_set( - list->allocator, state->uav_counters.vk_set_layout, 0, false))) + if (!(vk_descriptor_set = d3d12_command_allocator_allocate_descriptor_set(list->allocator, + VKD3D_SHADER_DESCRIPTOR_TYPE_UAV, uav_counter_count, state->uav_counters.vk_set_layout, 0, false))) goto done; for (i = 0; i < uav_counter_count; ++i) @@ -4612,8 +4719,7 @@ static void d3d12_command_list_set_root_cbv(struct d3d12_command_list *list, if (vk_info->KHR_push_descriptor) { - vk_write_descriptor_set_from_root_descriptor(&descriptor_write, - root_parameter, VK_NULL_HANDLE, NULL, &buffer_info); + vk_write_descriptor_set_from_root_descriptor(&descriptor_write, root_parameter, NULL, NULL, &buffer_info); VK_CALL(vkCmdPushDescriptorSetKHR(list->vk_command_buffer, bindings->vk_bind_point, root_signature->vk_pipeline_layout, 0, 1, &descriptor_write)); } @@ -4621,7 +4727,7 @@ static void d3d12_command_list_set_root_cbv(struct d3d12_command_list *list, { d3d12_command_list_prepare_descriptors(list, bind_point); vk_write_descriptor_set_from_root_descriptor(&descriptor_write, - root_parameter, bindings->descriptor_sets[0], NULL, &buffer_info); + root_parameter, bindings->descriptor_sets, NULL, &buffer_info); VK_CALL(vkUpdateDescriptorSets(list->device->vk_device, 1, &descriptor_write, 0, NULL)); VKD3D_ASSERT(index < ARRAY_SIZE(bindings->push_descriptors)); @@ -4685,8 +4791,7 @@ static void d3d12_command_list_set_root_descriptor(struct d3d12_command_list *li if (vk_info->KHR_push_descriptor) { - vk_write_descriptor_set_from_root_descriptor(&descriptor_write, - root_parameter, VK_NULL_HANDLE, &vk_buffer_view, NULL); + vk_write_descriptor_set_from_root_descriptor(&descriptor_write, root_parameter, NULL, &vk_buffer_view, NULL); VK_CALL(vkCmdPushDescriptorSetKHR(list->vk_command_buffer, bindings->vk_bind_point, root_signature->vk_pipeline_layout, 0, 1, &descriptor_write)); } @@ -4694,7 +4799,7 @@ static void d3d12_command_list_set_root_descriptor(struct d3d12_command_list *li { d3d12_command_list_prepare_descriptors(list, bind_point); vk_write_descriptor_set_from_root_descriptor(&descriptor_write, - root_parameter, bindings->descriptor_sets[0], &vk_buffer_view, NULL); + root_parameter, bindings->descriptor_sets, &vk_buffer_view, NULL); VK_CALL(vkUpdateDescriptorSets(list->device->vk_device, 1, &descriptor_write, 0, NULL)); VKD3D_ASSERT(index < ARRAY_SIZE(bindings->push_descriptors)); @@ -5371,8 +5476,8 @@ static void d3d12_command_list_clear_uav(struct d3d12_command_list *list, view->info.texture.vk_view_type, view->format->type, &pipeline); } - if (!(write_set.dstSet = d3d12_command_allocator_allocate_descriptor_set( - list->allocator, pipeline.vk_set_layout, 0, false))) + if (!(write_set.dstSet = d3d12_command_allocator_allocate_descriptor_set(list->allocator, + VKD3D_SHADER_DESCRIPTOR_TYPE_UAV, 1, pipeline.vk_set_layout, 0, false))) { ERR("Failed to allocate descriptor set.\n"); return; diff --git a/libs/vkd3d/libs/vkd3d/device.c b/libs/vkd3d/libs/vkd3d/device.c index e92373a36fa..7b491805a72 100644 --- a/libs/vkd3d/libs/vkd3d/device.c +++ b/libs/vkd3d/libs/vkd3d/device.c @@ -1473,16 +1473,21 @@ static void vkd3d_device_vk_heaps_descriptor_limits_init(struct vkd3d_device_des uav_divisor = properties->maxDescriptorSetUpdateAfterBindSampledImages >= (3u << 20) ? 3 : 2; } - limits->uniform_buffer_max_descriptors = min(properties->maxDescriptorSetUpdateAfterBindUniformBuffers, - properties->maxPerStageDescriptorUpdateAfterBindUniformBuffers - root_provision); - limits->sampled_image_max_descriptors = min(properties->maxDescriptorSetUpdateAfterBindSampledImages, - properties->maxPerStageDescriptorUpdateAfterBindSampledImages / srv_divisor - root_provision); - limits->storage_buffer_max_descriptors = min(properties->maxDescriptorSetUpdateAfterBindStorageBuffers, - properties->maxPerStageDescriptorUpdateAfterBindStorageBuffers - root_provision); - limits->storage_image_max_descriptors = min(properties->maxDescriptorSetUpdateAfterBindStorageImages, - properties->maxPerStageDescriptorUpdateAfterBindStorageImages / uav_divisor - root_provision); - limits->sampler_max_descriptors = min(properties->maxDescriptorSetUpdateAfterBindSamplers, - properties->maxPerStageDescriptorUpdateAfterBindSamplers - root_provision); + limits->uniform_buffer_max_descriptors = min(min(properties->maxDescriptorSetUpdateAfterBindUniformBuffers, + properties->maxPerStageDescriptorUpdateAfterBindUniformBuffers - root_provision), + VKD3D_MAX_DESCRIPTOR_SET_CBVS_SRVS_UAVS); + limits->sampled_image_max_descriptors = min(min(properties->maxDescriptorSetUpdateAfterBindSampledImages, + properties->maxPerStageDescriptorUpdateAfterBindSampledImages / srv_divisor - root_provision), + VKD3D_MAX_DESCRIPTOR_SET_CBVS_SRVS_UAVS); + limits->storage_buffer_max_descriptors = min(min(properties->maxDescriptorSetUpdateAfterBindStorageBuffers, + properties->maxPerStageDescriptorUpdateAfterBindStorageBuffers - root_provision), + VKD3D_MAX_DESCRIPTOR_SET_CBVS_SRVS_UAVS); + limits->storage_image_max_descriptors = min(min(properties->maxDescriptorSetUpdateAfterBindStorageImages, + properties->maxPerStageDescriptorUpdateAfterBindStorageImages / uav_divisor - root_provision), + VKD3D_MAX_DESCRIPTOR_SET_CBVS_SRVS_UAVS); + limits->sampler_max_descriptors = min(min(properties->maxDescriptorSetUpdateAfterBindSamplers, + properties->maxPerStageDescriptorUpdateAfterBindSamplers - root_provision), + VKD3D_MAX_DESCRIPTOR_SET_CBVS_SRVS_UAVS); limits->sampler_max_descriptors = min(limits->sampler_max_descriptors, VKD3D_MAX_DESCRIPTOR_SET_SAMPLERS); } @@ -2677,39 +2682,16 @@ static void vkd3d_time_domains_init(struct d3d12_device *device) static void device_init_descriptor_pool_sizes(struct d3d12_device *device) { const struct vkd3d_device_descriptor_limits *limits = &device->vk_info.descriptor_limits; - VkDescriptorPoolSize *pool_sizes = device->vk_pool_sizes; + unsigned int *pool_sizes = device->vk_pool_limits; - if (device->use_vk_heaps) - { - pool_sizes[0].type = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER; - pool_sizes[0].descriptorCount = min(limits->storage_image_max_descriptors, - VKD3D_MAX_UAV_CLEAR_DESCRIPTORS_PER_TYPE); - pool_sizes[1].type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; - pool_sizes[1].descriptorCount = pool_sizes[0].descriptorCount; - pool_sizes[2].type = VK_DESCRIPTOR_TYPE_SAMPLER; - pool_sizes[2].descriptorCount = min(limits->sampler_max_descriptors, D3D12_MAX_LIVE_STATIC_SAMPLERS); - device->vk_pool_count = 3; - return; - } - - VKD3D_ASSERT(ARRAY_SIZE(device->vk_pool_sizes) >= 6); - pool_sizes[0].type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; - pool_sizes[0].descriptorCount = min(limits->uniform_buffer_max_descriptors, + pool_sizes[VKD3D_SHADER_DESCRIPTOR_TYPE_CBV] = min(limits->uniform_buffer_max_descriptors, VKD3D_MAX_VIRTUAL_HEAP_DESCRIPTORS_PER_TYPE); - pool_sizes[1].type = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER; - pool_sizes[1].descriptorCount = min(limits->sampled_image_max_descriptors, + pool_sizes[VKD3D_SHADER_DESCRIPTOR_TYPE_SRV] = min(limits->sampled_image_max_descriptors, VKD3D_MAX_VIRTUAL_HEAP_DESCRIPTORS_PER_TYPE); - pool_sizes[2].type = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; - pool_sizes[2].descriptorCount = pool_sizes[1].descriptorCount; - pool_sizes[3].type = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER; - pool_sizes[3].descriptorCount = min(limits->storage_image_max_descriptors, + pool_sizes[VKD3D_SHADER_DESCRIPTOR_TYPE_UAV] = min(limits->storage_image_max_descriptors, VKD3D_MAX_VIRTUAL_HEAP_DESCRIPTORS_PER_TYPE); - pool_sizes[4].type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; - pool_sizes[4].descriptorCount = pool_sizes[3].descriptorCount; - pool_sizes[5].type = VK_DESCRIPTOR_TYPE_SAMPLER; - pool_sizes[5].descriptorCount = min(limits->sampler_max_descriptors, + pool_sizes[VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER] = min(limits->sampler_max_descriptors, VKD3D_MAX_VIRTUAL_HEAP_DESCRIPTORS_PER_TYPE); - device->vk_pool_count = 6; }; static void vkd3d_desc_object_cache_init(struct vkd3d_desc_object_cache *cache, size_t size) diff --git a/libs/vkd3d/libs/vkd3d/state.c b/libs/vkd3d/libs/vkd3d/state.c index 2b0f81d3812..32f34479ea1 100644 --- a/libs/vkd3d/libs/vkd3d/state.c +++ b/libs/vkd3d/libs/vkd3d/state.c @@ -265,25 +265,6 @@ static enum vkd3d_shader_visibility vkd3d_shader_visibility_from_d3d12(D3D12_SHA } } -static VkDescriptorType vk_descriptor_type_from_vkd3d_descriptor_type(enum vkd3d_shader_descriptor_type type, - bool is_buffer) -{ - switch (type) - { - case VKD3D_SHADER_DESCRIPTOR_TYPE_SRV: - return is_buffer ? VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER : VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; - case VKD3D_SHADER_DESCRIPTOR_TYPE_UAV: - return is_buffer ? VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER : VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; - case VKD3D_SHADER_DESCRIPTOR_TYPE_CBV: - return VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; - case VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER: - return VK_DESCRIPTOR_TYPE_SAMPLER; - default: - FIXME("Unhandled descriptor range type type %#x.\n", type); - return VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; - } -} - static enum vkd3d_shader_descriptor_type vkd3d_descriptor_type_from_d3d12_range_type( D3D12_DESCRIPTOR_RANGE_TYPE type) { @@ -717,6 +698,8 @@ struct vk_binding_array VkDescriptorSetLayoutBinding *bindings; size_t capacity, count; + enum vkd3d_shader_descriptor_type descriptor_type; + unsigned int descriptor_set; unsigned int table_index; unsigned int unbounded_offset; VkDescriptorSetLayoutCreateFlags flags; @@ -754,14 +737,24 @@ static bool vk_binding_array_add_binding(struct vk_binding_array *array, return true; } +static void vk_binding_array_make_unbound(struct vk_binding_array *array, + unsigned int offset, unsigned int table_index) +{ + array->unbounded_offset = offset; + array->table_index = table_index; +} + struct vkd3d_descriptor_set_context { struct vk_binding_array vk_bindings[VKD3D_MAX_DESCRIPTOR_SETS]; + struct vk_binding_array *current_binding_array[VKD3D_SHADER_DESCRIPTOR_TYPE_COUNT]; unsigned int table_index; - unsigned int unbounded_offset; unsigned int descriptor_index; unsigned int uav_counter_index; unsigned int push_constant_index; + + struct vk_binding_array *push_descriptor_set; + bool push_descriptor; }; static void descriptor_set_context_cleanup(struct vkd3d_descriptor_set_context *context) @@ -786,46 +779,66 @@ static bool vkd3d_validate_descriptor_set_count(struct d3d12_device *device, uns return true; } -static struct vk_binding_array *d3d12_root_signature_current_vk_binding_array( - struct d3d12_root_signature *root_signature, struct vkd3d_descriptor_set_context *context) +static struct vk_binding_array *d3d12_root_signature_append_vk_binding_array( + struct d3d12_root_signature *root_signature, enum vkd3d_shader_descriptor_type descriptor_type, + VkDescriptorSetLayoutCreateFlags flags, struct vkd3d_descriptor_set_context *context) { + struct vk_binding_array *array; + unsigned int set; + if (root_signature->vk_set_count >= ARRAY_SIZE(context->vk_bindings)) return NULL; - return &context->vk_bindings[root_signature->vk_set_count]; + set = root_signature->vk_set_count++; + array = &context->vk_bindings[set]; + array->descriptor_type = descriptor_type; + array->descriptor_set = set; + array->unbounded_offset = UINT_MAX; + array->flags = flags; + + return array; } -static void d3d12_root_signature_append_vk_binding_array(struct d3d12_root_signature *root_signature, - VkDescriptorSetLayoutCreateFlags flags, struct vkd3d_descriptor_set_context *context) +static struct vk_binding_array *d3d12_root_signature_vk_binding_array_for_type( + struct d3d12_root_signature *root_signature, enum vkd3d_shader_descriptor_type descriptor_type, + struct vkd3d_descriptor_set_context *context) { - struct vk_binding_array *array; + struct vk_binding_array *array, **current; - if (!(array = d3d12_root_signature_current_vk_binding_array(root_signature, context)) || !array->count) - return; + if (context->push_descriptor) + { + if (!context->push_descriptor_set) + context->push_descriptor_set = d3d12_root_signature_append_vk_binding_array(root_signature, + descriptor_type, VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR, context); - array->table_index = context->table_index; - array->unbounded_offset = context->unbounded_offset; - array->flags = flags; + return context->push_descriptor_set; + } - ++root_signature->vk_set_count; + current = context->current_binding_array; + if (!(array = current[descriptor_type])) + { + array = d3d12_root_signature_append_vk_binding_array(root_signature, descriptor_type, 0, context); + current[descriptor_type] = array; + } + + return array; } static HRESULT d3d12_root_signature_append_vk_binding(struct d3d12_root_signature *root_signature, - enum vkd3d_shader_descriptor_type descriptor_type, unsigned int register_space, - unsigned int register_idx, bool buffer_descriptor, enum vkd3d_shader_visibility shader_visibility, - unsigned int descriptor_count, struct vkd3d_descriptor_set_context *context, - const VkSampler *immutable_sampler, unsigned int *binding_idx) + struct vk_binding_array *array, enum vkd3d_shader_descriptor_type descriptor_type, + unsigned int register_space, unsigned int register_idx, bool buffer_descriptor, + enum vkd3d_shader_visibility shader_visibility, unsigned int descriptor_count, + struct vkd3d_descriptor_set_context *context, const VkSampler *immutable_sampler) { struct vkd3d_shader_descriptor_offset *offset = root_signature->descriptor_offsets ? &root_signature->descriptor_offsets[context->descriptor_index] : NULL; struct vkd3d_shader_resource_binding *mapping; - struct vk_binding_array *array; + VkDescriptorType vk_descriptor_type; unsigned int idx; - if (!(array = d3d12_root_signature_current_vk_binding_array(root_signature, context)) - || !(vk_binding_array_add_binding(&context->vk_bindings[root_signature->vk_set_count], - vk_descriptor_type_from_vkd3d_descriptor_type(descriptor_type, buffer_descriptor), descriptor_count, - stage_flags_from_vkd3d_shader_visibility(shader_visibility), immutable_sampler, &idx))) + vk_descriptor_type = vk_descriptor_type_from_vkd3d_descriptor_type(descriptor_type, buffer_descriptor); + if (!vk_binding_array_add_binding(array, vk_descriptor_type, descriptor_count, + stage_flags_from_vkd3d_shader_visibility(shader_visibility), immutable_sampler, &idx)) return E_OUTOFMEMORY; mapping = &root_signature->descriptor_mapping[context->descriptor_index++]; @@ -834,7 +847,7 @@ static HRESULT d3d12_root_signature_append_vk_binding(struct d3d12_root_signatur mapping->register_index = register_idx; mapping->shader_visibility = shader_visibility; mapping->flags = buffer_descriptor ? VKD3D_SHADER_BINDING_FLAG_BUFFER : VKD3D_SHADER_BINDING_FLAG_IMAGE; - mapping->binding.set = root_signature->vk_set_count; + mapping->binding.set = array->descriptor_set; mapping->binding.binding = idx; mapping->binding.count = descriptor_count; if (offset) @@ -843,12 +856,6 @@ static HRESULT d3d12_root_signature_append_vk_binding(struct d3d12_root_signatur offset->dynamic_offset_index = ~0u; } - if (context->unbounded_offset != UINT_MAX) - d3d12_root_signature_append_vk_binding_array(root_signature, 0, context); - - if (binding_idx) - *binding_idx = idx; - return S_OK; } @@ -911,7 +918,7 @@ static unsigned int vk_binding_count_from_descriptor_range(const struct d3d12_ro } static HRESULT d3d12_root_signature_init_descriptor_table_binding(struct d3d12_root_signature *root_signature, - const struct d3d12_root_descriptor_table_range *range, D3D12_SHADER_VISIBILITY visibility, + struct d3d12_root_descriptor_table_range *range, D3D12_SHADER_VISIBILITY visibility, unsigned int vk_binding_array_count, unsigned int bindings_per_range, struct vkd3d_descriptor_set_context *context) { @@ -919,34 +926,49 @@ static HRESULT d3d12_root_signature_init_descriptor_table_binding(struct d3d12_r bool is_buffer = range->type != VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER; enum vkd3d_shader_descriptor_type descriptor_type = range->type; unsigned int i, register_space = range->register_space; + struct vk_binding_array *array; HRESULT hr; - if (range->descriptor_count == UINT_MAX) - context->unbounded_offset = range->offset; + if (!(array = d3d12_root_signature_vk_binding_array_for_type(root_signature, descriptor_type, context))) + return E_OUTOFMEMORY; + range->set = array->descriptor_set - root_signature->main_set; + range->binding = array->count; for (i = 0; i < bindings_per_range; ++i) { - if (FAILED(hr = d3d12_root_signature_append_vk_binding(root_signature, descriptor_type, + if (FAILED(hr = d3d12_root_signature_append_vk_binding(root_signature, array, descriptor_type, register_space, range->base_register_idx + i, is_buffer, shader_visibility, - vk_binding_array_count, context, NULL, NULL))) + vk_binding_array_count, context, NULL))) return hr; } - if (descriptor_type != VKD3D_SHADER_DESCRIPTOR_TYPE_SRV && descriptor_type != VKD3D_SHADER_DESCRIPTOR_TYPE_UAV) + if (range->descriptor_count == UINT_MAX) { - context->unbounded_offset = UINT_MAX; - return S_OK; + vk_binding_array_make_unbound(array, range->offset, context->table_index); + context->current_binding_array[descriptor_type] = NULL; } + if (descriptor_type != VKD3D_SHADER_DESCRIPTOR_TYPE_SRV && descriptor_type != VKD3D_SHADER_DESCRIPTOR_TYPE_UAV) + return S_OK; + + if (!(array = d3d12_root_signature_vk_binding_array_for_type(root_signature, descriptor_type, context))) + return E_OUTOFMEMORY; + + range->image_set = array->descriptor_set - root_signature->main_set; + range->image_binding = array->count; for (i = 0; i < bindings_per_range; ++i) { - if (FAILED(hr = d3d12_root_signature_append_vk_binding(root_signature, descriptor_type, + if (FAILED(hr = d3d12_root_signature_append_vk_binding(root_signature, array, descriptor_type, register_space, range->base_register_idx + i, false, shader_visibility, - vk_binding_array_count, context, NULL, NULL))) + vk_binding_array_count, context, NULL))) return hr; } - context->unbounded_offset = UINT_MAX; + if (range->descriptor_count == UINT_MAX) + { + vk_binding_array_make_unbound(array, range->offset, context->table_index); + context->current_binding_array[descriptor_type] = NULL; + } return S_OK; } @@ -1199,16 +1221,16 @@ static HRESULT d3d12_root_signature_init_root_descriptor_tables(struct d3d12_roo if (use_vk_heaps) { - /* set, binding and vk_binding_count are not used. */ + /* set, binding, image_set, image_binding, and vk_binding_count are not used. */ range->set = 0; range->binding = 0; + range->image_set = 0; + range->image_binding = 0; range->vk_binding_count = 0; d3d12_root_signature_map_descriptor_heap_binding(root_signature, range, shader_visibility, context); continue; } - range->set = root_signature->vk_set_count - root_signature->main_set; - if (root_signature->use_descriptor_arrays) { if (j && range->type != table->ranges[j - 1].type) @@ -1229,6 +1251,8 @@ static HRESULT d3d12_root_signature_init_root_descriptor_tables(struct d3d12_roo range->set = base_range->set; range->binding = base_range->binding; + range->image_set = base_range->image_set; + range->image_binding = base_range->image_binding; range->vk_binding_count = base_range->vk_binding_count - rel_offset; d3d12_root_signature_map_descriptor_unbounded_binding(root_signature, range, rel_offset, shader_visibility, context); @@ -1251,8 +1275,6 @@ static HRESULT d3d12_root_signature_init_root_descriptor_tables(struct d3d12_roo bindings_per_range = range->descriptor_count; } - range->binding = context->vk_bindings[root_signature->vk_set_count].count; - if (FAILED(hr = d3d12_root_signature_init_descriptor_table_binding(root_signature, range, p->ShaderVisibility, vk_binding_array_count, bindings_per_range, context))) return hr; @@ -1266,7 +1288,9 @@ static HRESULT d3d12_root_signature_init_root_descriptor_tables(struct d3d12_roo static HRESULT d3d12_root_signature_init_root_descriptors(struct d3d12_root_signature *root_signature, const D3D12_ROOT_SIGNATURE_DESC *desc, struct vkd3d_descriptor_set_context *context) { - unsigned int binding, i; + enum vkd3d_shader_descriptor_type descriptor_type; + struct vk_binding_array *array; + unsigned int i; HRESULT hr; root_signature->push_descriptor_mask = 0; @@ -1281,14 +1305,19 @@ static HRESULT d3d12_root_signature_init_root_descriptors(struct d3d12_root_sign root_signature->push_descriptor_mask |= 1u << i; - if (FAILED(hr = d3d12_root_signature_append_vk_binding(root_signature, - vkd3d_descriptor_type_from_d3d12_root_parameter_type(p->ParameterType), + descriptor_type = vkd3d_descriptor_type_from_d3d12_root_parameter_type(p->ParameterType); + if (!(array = d3d12_root_signature_vk_binding_array_for_type(root_signature, descriptor_type, context))) + return E_OUTOFMEMORY; + + root_signature->parameters[i].parameter_type = p->ParameterType; + root_signature->parameters[i].u.descriptor.set = array->descriptor_set; + root_signature->parameters[i].u.descriptor.binding = array->count; + + if (FAILED(hr = d3d12_root_signature_append_vk_binding(root_signature, array, descriptor_type, p->u.Descriptor.RegisterSpace, p->u.Descriptor.ShaderRegister, true, - vkd3d_shader_visibility_from_d3d12(p->ShaderVisibility), 1, context, NULL, &binding))) + vkd3d_shader_visibility_from_d3d12(p->ShaderVisibility), 1, context, NULL))) return hr; - root_signature->parameters[i].parameter_type = p->ParameterType; - root_signature->parameters[i].u.descriptor.binding = binding; } return S_OK; @@ -1298,10 +1327,19 @@ static HRESULT d3d12_root_signature_init_static_samplers(struct d3d12_root_signa struct d3d12_device *device, const D3D12_ROOT_SIGNATURE_DESC *desc, struct vkd3d_descriptor_set_context *context) { + struct vk_binding_array *array; unsigned int i; HRESULT hr; VKD3D_ASSERT(root_signature->static_sampler_count == desc->NumStaticSamplers); + + if (!desc->NumStaticSamplers) + return S_OK; + + if (!(array = d3d12_root_signature_vk_binding_array_for_type(root_signature, + VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER, context))) + return E_OUTOFMEMORY; + for (i = 0; i < desc->NumStaticSamplers; ++i) { const D3D12_STATIC_SAMPLER_DESC *s = &desc->pStaticSamplers[i]; @@ -1309,16 +1347,13 @@ static HRESULT d3d12_root_signature_init_static_samplers(struct d3d12_root_signa if (FAILED(hr = vkd3d_create_static_sampler(device, s, &root_signature->static_samplers[i]))) return hr; - if (FAILED(hr = d3d12_root_signature_append_vk_binding(root_signature, + if (FAILED(hr = d3d12_root_signature_append_vk_binding(root_signature, array, VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER, s->RegisterSpace, s->ShaderRegister, false, vkd3d_shader_visibility_from_d3d12(s->ShaderVisibility), 1, context, - &root_signature->static_samplers[i], NULL))) + &root_signature->static_samplers[i]))) return hr; } - if (device->use_vk_heaps) - d3d12_root_signature_append_vk_binding_array(root_signature, 0, context); - return S_OK; } @@ -1450,29 +1485,52 @@ static HRESULT vkd3d_create_pipeline_layout(struct d3d12_device *device, return S_OK; } +static HRESULT d3d12_descriptor_set_layout_init(struct d3d12_descriptor_set_layout *layout, + struct d3d12_device *device, const struct vk_binding_array *array) +{ + unsigned int descriptor_count; + bool unbounded; + HRESULT hr; + size_t i; + + descriptor_count = array->unbounded_offset; + if (!(unbounded = descriptor_count != UINT_MAX)) + { + for (i = 0, descriptor_count = 0; i < array->count; ++i) + { + descriptor_count += array->bindings[i].descriptorCount; + } + } + + if (FAILED(hr = vkd3d_create_descriptor_set_layout(device, array->flags, + array->count, unbounded, array->bindings, &layout->vk_layout))) + return hr; + layout->descriptor_type = array->descriptor_type; + layout->descriptor_count = descriptor_count; + layout->unbounded_offset = array->unbounded_offset; + layout->table_index = array->table_index; + + return S_OK; +} + static HRESULT d3d12_root_signature_create_descriptor_set_layouts(struct d3d12_root_signature *root_signature, struct vkd3d_descriptor_set_context *context) { unsigned int i; HRESULT hr; - d3d12_root_signature_append_vk_binding_array(root_signature, 0, context); - if (!vkd3d_validate_descriptor_set_count(root_signature->device, root_signature->vk_set_count)) return E_INVALIDARG; for (i = 0; i < root_signature->vk_set_count; ++i) { - struct d3d12_descriptor_set_layout *layout = &root_signature->descriptor_set_layouts[i]; - struct vk_binding_array *array = &context->vk_bindings[i]; + const struct vk_binding_array *array = &context->vk_bindings[i]; VKD3D_ASSERT(array->count); - if (FAILED(hr = vkd3d_create_descriptor_set_layout(root_signature->device, array->flags, array->count, - array->unbounded_offset != UINT_MAX, array->bindings, &layout->vk_layout))) + if (FAILED(hr = d3d12_descriptor_set_layout_init(&root_signature->descriptor_set_layouts[i], + root_signature->device, array))) return hr; - layout->unbounded_offset = array->unbounded_offset; - layout->table_index = array->table_index; } return S_OK; @@ -1518,7 +1576,6 @@ static HRESULT d3d12_root_signature_init(struct d3d12_root_signature *root_signa HRESULT hr; memset(&context, 0, sizeof(context)); - context.unbounded_offset = UINT_MAX; root_signature->ID3D12RootSignature_iface.lpVtbl = &d3d12_root_signature_vtbl; root_signature->refcount = 1; @@ -1580,17 +1637,11 @@ static HRESULT d3d12_root_signature_init(struct d3d12_root_signature *root_signa sizeof(*root_signature->static_samplers)))) goto fail; + context.push_descriptor = vk_info->KHR_push_descriptor; if (FAILED(hr = d3d12_root_signature_init_root_descriptors(root_signature, desc, &context))) goto fail; - - /* We use KHR_push_descriptor for root descriptor parameters. */ - if (vk_info->KHR_push_descriptor) - { - d3d12_root_signature_append_vk_binding_array(root_signature, - VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR, &context); - } - - root_signature->main_set = root_signature->vk_set_count; + root_signature->main_set = !!context.push_descriptor_set; + context.push_descriptor = false; if (FAILED(hr = d3d12_root_signature_init_push_constants(root_signature, desc, root_signature->push_constant_ranges, &root_signature->push_constant_range_count))) diff --git a/libs/vkd3d/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/libs/vkd3d/vkd3d_private.h index 97a99782d6a..8488d5db3fa 100644 --- a/libs/vkd3d/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/libs/vkd3d/vkd3d_private.h @@ -58,12 +58,17 @@ #define VKD3D_MAX_VK_SYNC_OBJECTS 4u #define VKD3D_MAX_DEVICE_BLOCKED_QUEUES 16u #define VKD3D_MAX_DESCRIPTOR_SETS 64u +/* Direct3D 12 binding tier 3 has a limit of "1,000,000+" CBVs, SRVs and UAVs. + * I am not sure what the "+" is supposed to mean: it probably hints that + * implementations may have an even higher limit, but that's pretty obvious, + * that table is for guaranteed minimum limits. */ +#define VKD3D_MAX_DESCRIPTOR_SET_CBVS_SRVS_UAVS 1000000u /* D3D12 binding tier 3 has a limit of 2048 samplers. */ #define VKD3D_MAX_DESCRIPTOR_SET_SAMPLERS 2048u -/* The main limitation here is the simple descriptor pool recycling scheme - * requiring each pool to contain all descriptor types used by vkd3d. Limit - * this number to prevent excessive pool memory use. */ #define VKD3D_MAX_VIRTUAL_HEAP_DESCRIPTORS_PER_TYPE (16 * 1024u) +#define VKD3D_INITIAL_DESCRIPTORS_POOL_SIZE 1024u + +#define VKD3D_SHADER_DESCRIPTOR_TYPE_COUNT (VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER + 1) extern uint64_t object_global_serial_id; @@ -770,6 +775,25 @@ static inline struct d3d12_dsv_desc *d3d12_dsv_desc_from_cpu_handle(D3D12_CPU_DE void d3d12_dsv_desc_create_dsv(struct d3d12_dsv_desc *dsv_desc, struct d3d12_device *device, struct d3d12_resource *resource, const D3D12_DEPTH_STENCIL_VIEW_DESC *desc); +static inline VkDescriptorType vk_descriptor_type_from_vkd3d_descriptor_type(enum vkd3d_shader_descriptor_type type, + bool is_buffer) +{ + switch (type) + { + case VKD3D_SHADER_DESCRIPTOR_TYPE_SRV: + return is_buffer ? VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER : VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; + case VKD3D_SHADER_DESCRIPTOR_TYPE_UAV: + return is_buffer ? VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER : VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; + case VKD3D_SHADER_DESCRIPTOR_TYPE_CBV: + return VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + case VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER: + return VK_DESCRIPTOR_TYPE_SAMPLER; + default: + FIXME("Unhandled descriptor range type type %#x.\n", type); + return VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; + } +} + enum vkd3d_vk_descriptor_set_index { VKD3D_SET_INDEX_SAMPLER, @@ -899,6 +923,8 @@ struct d3d12_root_descriptor_table_range unsigned int vk_binding_count; uint32_t set; uint32_t binding; + uint32_t image_set; + uint32_t image_binding; enum vkd3d_shader_descriptor_type type; uint32_t descriptor_magic; @@ -920,6 +946,7 @@ struct d3d12_root_constant struct d3d12_root_descriptor { + uint32_t set; uint32_t binding; }; @@ -936,7 +963,9 @@ struct d3d12_root_parameter struct d3d12_descriptor_set_layout { + enum vkd3d_shader_descriptor_type descriptor_type; VkDescriptorSetLayout vk_layout; + unsigned int descriptor_count; unsigned int unbounded_offset; unsigned int table_index; }; @@ -1135,6 +1164,18 @@ struct vkd3d_buffer VkDeviceMemory vk_memory; }; +struct vkd3d_vk_descriptor_pool +{ + unsigned int descriptor_count; + VkDescriptorPool vk_pool; +}; + +struct vkd3d_vk_descriptor_pool_array +{ + struct vkd3d_vk_descriptor_pool *pools; + size_t capacity, count; +}; + /* ID3D12CommandAllocator */ struct d3d12_command_allocator { @@ -1146,11 +1187,9 @@ struct d3d12_command_allocator VkCommandPool vk_command_pool; - VkDescriptorPool vk_descriptor_pool; + VkDescriptorPool vk_descriptor_pools[VKD3D_SHADER_DESCRIPTOR_TYPE_COUNT]; - VkDescriptorPool *free_descriptor_pools; - size_t free_descriptor_pools_size; - size_t free_descriptor_pool_count; + struct vkd3d_vk_descriptor_pool_array free_descriptor_pools[VKD3D_SHADER_DESCRIPTOR_TYPE_COUNT]; VkRenderPass *passes; size_t passes_size; @@ -1160,9 +1199,8 @@ struct d3d12_command_allocator size_t framebuffers_size; size_t framebuffer_count; - VkDescriptorPool *descriptor_pools; - size_t descriptor_pools_size; - size_t descriptor_pool_count; + struct vkd3d_vk_descriptor_pool_array descriptor_pools[VKD3D_SHADER_DESCRIPTOR_TYPE_COUNT]; + unsigned int vk_pool_sizes[VKD3D_SHADER_DESCRIPTOR_TYPE_COUNT]; struct vkd3d_view **views; size_t views_size; @@ -1516,8 +1554,6 @@ struct vkd3d_desc_object_cache size_t size; }; -#define VKD3D_DESCRIPTOR_POOL_COUNT 6 - /* ID3D12Device */ struct d3d12_device { @@ -1536,8 +1572,7 @@ struct d3d12_device struct vkd3d_desc_object_cache view_desc_cache; struct vkd3d_desc_object_cache cbuffer_desc_cache; - VkDescriptorPoolSize vk_pool_sizes[VKD3D_DESCRIPTOR_POOL_COUNT]; - unsigned int vk_pool_count; + unsigned int vk_pool_limits[VKD3D_SHADER_DESCRIPTOR_TYPE_COUNT]; struct vkd3d_vk_descriptor_heap_layout vk_descriptor_heap_layouts[VKD3D_SET_INDEX_COUNT]; bool use_vk_heaps; -- 2.45.2