From 6d3321209413caa5f5a3d96bf3802552ee56602e Mon Sep 17 00:00:00 2001 From: Alistair Leslie-Hughes Date: Tue, 16 Apr 2024 12:05:29 +1000 Subject: [PATCH] Updated vkd3d to 7d6f0f2592a8aedf749c2dff36ea330e9ccb49d1. --- libs/vkd3d/Makefile.in | 1 + libs/vkd3d/include/private/vkd3d_common.h | 6 + libs/vkd3d/libs/vkd3d-shader/d3d_asm.c | 528 +++--- libs/vkd3d/libs/vkd3d-shader/d3dbc.c | 135 +- libs/vkd3d/libs/vkd3d-shader/dxil.c | 158 ++ libs/vkd3d/libs/vkd3d-shader/fx.c | 150 +- libs/vkd3d/libs/vkd3d-shader/hlsl.c | 104 +- libs/vkd3d/libs/vkd3d-shader/hlsl.h | 77 +- libs/vkd3d/libs/vkd3d-shader/hlsl.y | 495 ++++-- libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 134 +- .../libs/vkd3d-shader/hlsl_constant_ops.c | 23 +- libs/vkd3d/libs/vkd3d-shader/ir.c | 1451 ++++++++++------- libs/vkd3d/libs/vkd3d-shader/spirv.c | 35 +- libs/vkd3d/libs/vkd3d-shader/tpf.c | 35 +- .../libs/vkd3d-shader/vkd3d_shader_private.h | 25 + libs/vkd3d/libs/vkd3d/cache.c | 59 + libs/vkd3d/libs/vkd3d/device.c | 62 + libs/vkd3d/libs/vkd3d/resource.c | 12 + libs/vkd3d/libs/vkd3d/state.c | 28 +- libs/vkd3d/libs/vkd3d/vkd3d_private.h | 7 + 20 files changed, 2329 insertions(+), 1196 deletions(-) create mode 100644 libs/vkd3d/libs/vkd3d/cache.c diff --git a/libs/vkd3d/Makefile.in b/libs/vkd3d/Makefile.in index 448e9a0e61d..94e4833dc9a 100644 --- a/libs/vkd3d/Makefile.in +++ b/libs/vkd3d/Makefile.in @@ -30,6 +30,7 @@ SOURCES = \ libs/vkd3d-shader/spirv.c \ libs/vkd3d-shader/tpf.c \ libs/vkd3d-shader/vkd3d_shader_main.c \ + libs/vkd3d/cache.c \ libs/vkd3d/command.c \ libs/vkd3d/device.c \ libs/vkd3d/resource.c \ diff --git a/libs/vkd3d/include/private/vkd3d_common.h b/libs/vkd3d/include/private/vkd3d_common.h index 1cc8ecc38f3..f9df47d339c 100644 --- a/libs/vkd3d/include/private/vkd3d_common.h +++ b/libs/vkd3d/include/private/vkd3d_common.h @@ -438,6 +438,12 @@ struct vkd3d_mutex #endif }; +#ifdef _WIN32 +#define VKD3D_MUTEX_INITIALIZER {{NULL, -1, 0, 0, 0, 0}} +#else +#define VKD3D_MUTEX_INITIALIZER PTHREAD_MUTEX_INITIALIZER +#endif + static inline void vkd3d_mutex_init(struct vkd3d_mutex *lock) { #ifdef _WIN32 diff --git a/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c index 2b5feb94103..459fdfc9abf 100644 --- a/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c +++ b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c @@ -330,37 +330,6 @@ static const char * const shader_opcode_names[] = [VKD3DSIH_XOR ] = "xor", }; -static const struct -{ - enum vkd3d_shader_input_sysval_semantic sysval_semantic; - const char *sysval_name; -} -shader_input_sysval_semantic_names[] = -{ - {VKD3D_SIV_POSITION, "position"}, - {VKD3D_SIV_CLIP_DISTANCE, "clip_distance"}, - {VKD3D_SIV_CULL_DISTANCE, "cull_distance"}, - {VKD3D_SIV_RENDER_TARGET_ARRAY_INDEX, "render_target_array_index"}, - {VKD3D_SIV_VIEWPORT_ARRAY_INDEX, "viewport_array_index"}, - {VKD3D_SIV_VERTEX_ID, "vertex_id"}, - {VKD3D_SIV_INSTANCE_ID, "instance_id"}, - {VKD3D_SIV_PRIMITIVE_ID, "primitive_id"}, - {VKD3D_SIV_IS_FRONT_FACE, "is_front_face"}, - {VKD3D_SIV_SAMPLE_INDEX, "sample_index"}, - {VKD3D_SIV_QUAD_U0_TESS_FACTOR, "finalQuadUeq0EdgeTessFactor"}, - {VKD3D_SIV_QUAD_V0_TESS_FACTOR, "finalQuadVeq0EdgeTessFactor"}, - {VKD3D_SIV_QUAD_U1_TESS_FACTOR, "finalQuadUeq1EdgeTessFactor"}, - {VKD3D_SIV_QUAD_V1_TESS_FACTOR, "finalQuadVeq1EdgeTessFactor"}, - {VKD3D_SIV_QUAD_U_INNER_TESS_FACTOR, "finalQuadUInsideTessFactor"}, - {VKD3D_SIV_QUAD_V_INNER_TESS_FACTOR, "finalQuadVInsideTessFactor"}, - {VKD3D_SIV_TRIANGLE_U_TESS_FACTOR, "finalTriUeq0EdgeTessFactor"}, - {VKD3D_SIV_TRIANGLE_V_TESS_FACTOR, "finalTriVeq0EdgeTessFactor"}, - {VKD3D_SIV_TRIANGLE_W_TESS_FACTOR, "finalTriWeq0EdgeTessFactor"}, - {VKD3D_SIV_TRIANGLE_INNER_TESS_FACTOR, "finalTriInsideTessFactor"}, - {VKD3D_SIV_LINE_DETAIL_TESS_FACTOR, "finalLineDetailTessFactor"}, - {VKD3D_SIV_LINE_DENSITY_TESS_FACTOR, "finalLineDensityTessFactor"}, -}; - struct vkd3d_d3d_asm_colours { const char *reset; @@ -615,21 +584,54 @@ static void shader_print_tessellator_partitioning(struct vkd3d_d3d_asm_compiler vkd3d_string_buffer_printf(buffer, "%s%s%s", prefix, partitioning, suffix); } -static void shader_dump_shader_input_sysval_semantic(struct vkd3d_d3d_asm_compiler *compiler, - enum vkd3d_shader_input_sysval_semantic semantic) +static void shader_print_input_sysval_semantic(struct vkd3d_d3d_asm_compiler *compiler, + const char *prefix, enum vkd3d_shader_input_sysval_semantic semantic, const char *suffix) { unsigned int i; + static const struct + { + enum vkd3d_shader_input_sysval_semantic sysval_semantic; + const char *sysval_name; + } + shader_input_sysval_semantic_names[] = + { + {VKD3D_SIV_POSITION, "position"}, + {VKD3D_SIV_CLIP_DISTANCE, "clip_distance"}, + {VKD3D_SIV_CULL_DISTANCE, "cull_distance"}, + {VKD3D_SIV_RENDER_TARGET_ARRAY_INDEX, "render_target_array_index"}, + {VKD3D_SIV_VIEWPORT_ARRAY_INDEX, "viewport_array_index"}, + {VKD3D_SIV_VERTEX_ID, "vertex_id"}, + {VKD3D_SIV_INSTANCE_ID, "instance_id"}, + {VKD3D_SIV_PRIMITIVE_ID, "primitive_id"}, + {VKD3D_SIV_IS_FRONT_FACE, "is_front_face"}, + {VKD3D_SIV_SAMPLE_INDEX, "sample_index"}, + {VKD3D_SIV_QUAD_U0_TESS_FACTOR, "finalQuadUeq0EdgeTessFactor"}, + {VKD3D_SIV_QUAD_V0_TESS_FACTOR, "finalQuadVeq0EdgeTessFactor"}, + {VKD3D_SIV_QUAD_U1_TESS_FACTOR, "finalQuadUeq1EdgeTessFactor"}, + {VKD3D_SIV_QUAD_V1_TESS_FACTOR, "finalQuadVeq1EdgeTessFactor"}, + {VKD3D_SIV_QUAD_U_INNER_TESS_FACTOR, "finalQuadUInsideTessFactor"}, + {VKD3D_SIV_QUAD_V_INNER_TESS_FACTOR, "finalQuadVInsideTessFactor"}, + {VKD3D_SIV_TRIANGLE_U_TESS_FACTOR, "finalTriUeq0EdgeTessFactor"}, + {VKD3D_SIV_TRIANGLE_V_TESS_FACTOR, "finalTriVeq0EdgeTessFactor"}, + {VKD3D_SIV_TRIANGLE_W_TESS_FACTOR, "finalTriWeq0EdgeTessFactor"}, + {VKD3D_SIV_TRIANGLE_INNER_TESS_FACTOR, "finalTriInsideTessFactor"}, + {VKD3D_SIV_LINE_DETAIL_TESS_FACTOR, "finalLineDetailTessFactor"}, + {VKD3D_SIV_LINE_DENSITY_TESS_FACTOR, "finalLineDensityTessFactor"}, + }; + for (i = 0; i < ARRAY_SIZE(shader_input_sysval_semantic_names); ++i) { - if (shader_input_sysval_semantic_names[i].sysval_semantic == semantic) - { - vkd3d_string_buffer_printf(&compiler->buffer, "%s", shader_input_sysval_semantic_names[i].sysval_name); - return; - } + if (shader_input_sysval_semantic_names[i].sysval_semantic != semantic) + continue; + + vkd3d_string_buffer_printf(&compiler->buffer, "%s%s%s", + prefix, shader_input_sysval_semantic_names[i].sysval_name, suffix); + return; } - vkd3d_string_buffer_printf(&compiler->buffer, "unknown_shader_input_sysval_semantic(%#x)", semantic); + vkd3d_string_buffer_printf(&compiler->buffer, "%s%s%s%s", + prefix, compiler->colours.error, semantic, compiler->colours.reset, suffix); } static void shader_dump_resource_type(struct vkd3d_d3d_asm_compiler *compiler, enum vkd3d_shader_resource_type type) @@ -704,124 +706,129 @@ static void shader_dump_resource_data_type(struct vkd3d_d3d_asm_compiler *compil vkd3d_string_buffer_printf(&compiler->buffer, ")"); } -static void shader_dump_decl_usage(struct vkd3d_d3d_asm_compiler *compiler, - const struct vkd3d_shader_semantic *semantic, uint32_t flags) +static void shader_print_dcl_usage(struct vkd3d_d3d_asm_compiler *compiler, + const char *prefix, const struct vkd3d_shader_semantic *semantic, uint32_t flags, const char *suffix) { struct vkd3d_string_buffer *buffer = &compiler->buffer; + unsigned int usage_idx; + const char *usage; + bool indexed; if (semantic->resource.reg.reg.type == VKD3DSPR_COMBINED_SAMPLER) { switch (semantic->resource_type) { case VKD3D_SHADER_RESOURCE_TEXTURE_2D: - shader_addline(buffer, "_2d"); + usage = "2d"; break; - case VKD3D_SHADER_RESOURCE_TEXTURE_3D: - shader_addline(buffer, "_volume"); + usage = "volume"; break; - case VKD3D_SHADER_RESOURCE_TEXTURE_CUBE: - shader_addline(buffer, "_cube"); + usage = "cube"; break; - default: - shader_addline(buffer, "_unknown_resource_type(%#x)", semantic->resource_type); - break; + vkd3d_string_buffer_printf(buffer, "%s%s%s%s", + prefix, compiler->colours.error, semantic->resource_type, compiler->colours.reset, suffix); + return; } + + vkd3d_string_buffer_printf(buffer, "%s%s%s", prefix, usage, suffix); + return; } - else if (semantic->resource.reg.reg.type == VKD3DSPR_RESOURCE || semantic->resource.reg.reg.type == VKD3DSPR_UAV) + + if (semantic->resource.reg.reg.type == VKD3DSPR_RESOURCE || semantic->resource.reg.reg.type == VKD3DSPR_UAV) { + vkd3d_string_buffer_printf(buffer, "%s", prefix); if (semantic->resource.reg.reg.type == VKD3DSPR_RESOURCE) - shader_addline(buffer, "_resource"); + vkd3d_string_buffer_printf(buffer, "resource_"); - shader_addline(buffer, "_"); shader_dump_resource_type(compiler, semantic->resource_type); if (semantic->resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_2DMS || semantic->resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY) { - shader_addline(buffer, "(%u)", semantic->sample_count); + vkd3d_string_buffer_printf(buffer, "(%u)", semantic->sample_count); } if (semantic->resource.reg.reg.type == VKD3DSPR_UAV) shader_dump_uav_flags(compiler, flags); - shader_addline(buffer, " "); + vkd3d_string_buffer_printf(buffer, " "); shader_dump_resource_data_type(compiler, semantic->resource_data_type); + vkd3d_string_buffer_printf(buffer, "%s", suffix); + return; } - else - { - /* Pixel shaders 3.0 don't have usage semantics. */ - if (!vkd3d_shader_ver_ge(&compiler->shader_version, 3, 0) - && compiler->shader_version.type == VKD3D_SHADER_TYPE_PIXEL) - return; - else - shader_addline(buffer, "_"); - - switch (semantic->usage) - { - case VKD3D_DECL_USAGE_POSITION: - shader_addline(buffer, "position%u", semantic->usage_idx); - break; - - case VKD3D_DECL_USAGE_BLEND_INDICES: - shader_addline(buffer, "blend"); - break; - - case VKD3D_DECL_USAGE_BLEND_WEIGHT: - shader_addline(buffer, "weight"); - break; - - case VKD3D_DECL_USAGE_NORMAL: - shader_addline(buffer, "normal%u", semantic->usage_idx); - break; - - case VKD3D_DECL_USAGE_PSIZE: - shader_addline(buffer, "psize"); - break; - - case VKD3D_DECL_USAGE_COLOR: - if (!semantic->usage_idx) - shader_addline(buffer, "color"); - else - shader_addline(buffer, "specular%u", (semantic->usage_idx - 1)); - break; - - case VKD3D_DECL_USAGE_TEXCOORD: - shader_addline(buffer, "texcoord%u", semantic->usage_idx); - break; - - case VKD3D_DECL_USAGE_TANGENT: - shader_addline(buffer, "tangent"); - break; - - case VKD3D_DECL_USAGE_BINORMAL: - shader_addline(buffer, "binormal"); - break; - - case VKD3D_DECL_USAGE_TESS_FACTOR: - shader_addline(buffer, "tessfactor"); - break; - - case VKD3D_DECL_USAGE_POSITIONT: - shader_addline(buffer, "positionT%u", semantic->usage_idx); - break; - - case VKD3D_DECL_USAGE_FOG: - shader_addline(buffer, "fog"); - break; - case VKD3D_DECL_USAGE_DEPTH: - shader_addline(buffer, "depth"); - break; + /* Pixel shaders 3.0 don't have usage semantics. */ + if (!vkd3d_shader_ver_ge(&compiler->shader_version, 3, 0) + && compiler->shader_version.type == VKD3D_SHADER_TYPE_PIXEL) + return; - case VKD3D_DECL_USAGE_SAMPLE: - shader_addline(buffer, "sample"); + indexed = false; + usage_idx = semantic->usage_idx; + switch (semantic->usage) + { + case VKD3D_DECL_USAGE_POSITION: + usage = "position"; + indexed = true; + break; + case VKD3D_DECL_USAGE_BLEND_INDICES: + usage = "blend"; + break; + case VKD3D_DECL_USAGE_BLEND_WEIGHT: + usage = "weight"; + break; + case VKD3D_DECL_USAGE_NORMAL: + usage = "normal"; + indexed = true; + break; + case VKD3D_DECL_USAGE_PSIZE: + usage = "psize"; + break; + case VKD3D_DECL_USAGE_COLOR: + if (semantic->usage_idx) + { + usage = "specular"; + indexed = true; + --usage_idx; break; - - default: - shader_addline(buffer, "", semantic->usage); - FIXME("Unrecognised semantic usage %#x.\n", semantic->usage); - } + } + usage = "color"; + break; + case VKD3D_DECL_USAGE_TEXCOORD: + usage = "texcoord"; + indexed = true; + break; + case VKD3D_DECL_USAGE_TANGENT: + usage = "tangent"; + break; + case VKD3D_DECL_USAGE_BINORMAL: + usage = "binormal"; + break; + case VKD3D_DECL_USAGE_TESS_FACTOR: + usage = "tessfactor"; + break; + case VKD3D_DECL_USAGE_POSITIONT: + usage = "positionT"; + indexed = true; + break; + case VKD3D_DECL_USAGE_FOG: + usage = "fog"; + break; + case VKD3D_DECL_USAGE_DEPTH: + usage = "depth"; + break; + case VKD3D_DECL_USAGE_SAMPLE: + usage = "sample"; + break; + default: + vkd3d_string_buffer_printf(buffer, "%s%s%s%s", + prefix, compiler->colours.error, semantic->usage, usage_idx, compiler->colours.reset, suffix); + return; } + + if (indexed) + vkd3d_string_buffer_printf(buffer, "%s%s%u%s", prefix, usage, usage_idx, suffix); + else + vkd3d_string_buffer_printf(buffer, "%s%s%s", prefix, usage, suffix); } static void shader_print_src_param(struct vkd3d_d3d_asm_compiler *compiler, @@ -937,8 +944,8 @@ static void shader_print_subscript_range(struct vkd3d_d3d_asm_compiler *compiler vkd3d_string_buffer_printf(&compiler->buffer, "*]"); } -static void shader_dump_register(struct vkd3d_d3d_asm_compiler *compiler, const struct vkd3d_shader_register *reg, - bool is_declaration) +static void shader_print_register(struct vkd3d_d3d_asm_compiler *compiler, const char *prefix, + const struct vkd3d_shader_register *reg, bool is_declaration, const char *suffix) { struct vkd3d_string_buffer *buffer = &compiler->buffer; unsigned int offset = reg->idx[0].offset; @@ -947,22 +954,23 @@ static void shader_dump_register(struct vkd3d_d3d_asm_compiler *compiler, const static const char * const rastout_reg_names[] = {"oPos", "oFog", "oPts"}; static const char * const misctype_reg_names[] = {"vPos", "vFace"}; - shader_addline(buffer, "%s", reg->type == VKD3DSPR_LABEL ? compiler->colours.label : compiler->colours.reg); + vkd3d_string_buffer_printf(buffer, "%s%s", prefix, + reg->type == VKD3DSPR_LABEL ? compiler->colours.label : compiler->colours.reg); switch (reg->type) { case VKD3DSPR_TEMP: - shader_addline(buffer, "r"); + vkd3d_string_buffer_printf(buffer, "r"); break; case VKD3DSPR_INPUT: - shader_addline(buffer, "v"); + vkd3d_string_buffer_printf(buffer, "v"); break; case VKD3DSPR_CONST: case VKD3DSPR_CONST2: case VKD3DSPR_CONST3: case VKD3DSPR_CONST4: - shader_addline(buffer, "c"); + vkd3d_string_buffer_printf(buffer, "c"); offset = shader_get_float_offset(reg->type, offset); break; @@ -972,205 +980,202 @@ static void shader_dump_register(struct vkd3d_d3d_asm_compiler *compiler, const break; case VKD3DSPR_RASTOUT: - shader_addline(buffer, "%s", rastout_reg_names[offset]); + vkd3d_string_buffer_printf(buffer, "%s", rastout_reg_names[offset]); break; case VKD3DSPR_COLOROUT: - shader_addline(buffer, "oC"); + vkd3d_string_buffer_printf(buffer, "oC"); break; case VKD3DSPR_DEPTHOUT: - shader_addline(buffer, "oDepth"); + vkd3d_string_buffer_printf(buffer, "oDepth"); break; case VKD3DSPR_DEPTHOUTGE: - shader_addline(buffer, "oDepthGE"); + vkd3d_string_buffer_printf(buffer, "oDepthGE"); break; case VKD3DSPR_DEPTHOUTLE: - shader_addline(buffer, "oDepthLE"); + vkd3d_string_buffer_printf(buffer, "oDepthLE"); break; case VKD3DSPR_ATTROUT: - shader_addline(buffer, "oD"); + vkd3d_string_buffer_printf(buffer, "oD"); break; case VKD3DSPR_TEXCRDOUT: /* Vertex shaders >= 3.0 use general purpose output registers * (VKD3DSPR_OUTPUT), which can include an address token. */ if (vkd3d_shader_ver_ge(&compiler->shader_version, 3, 0)) - shader_addline(buffer, "o"); + vkd3d_string_buffer_printf(buffer, "o"); else - shader_addline(buffer, "oT"); + vkd3d_string_buffer_printf(buffer, "oT"); break; case VKD3DSPR_CONSTINT: - shader_addline(buffer, "i"); + vkd3d_string_buffer_printf(buffer, "i"); break; case VKD3DSPR_CONSTBOOL: - shader_addline(buffer, "b"); + vkd3d_string_buffer_printf(buffer, "b"); break; case VKD3DSPR_LABEL: - shader_addline(buffer, "l"); + vkd3d_string_buffer_printf(buffer, "l"); break; case VKD3DSPR_LOOP: - shader_addline(buffer, "aL"); + vkd3d_string_buffer_printf(buffer, "aL"); break; case VKD3DSPR_COMBINED_SAMPLER: case VKD3DSPR_SAMPLER: - shader_addline(buffer, "s"); + vkd3d_string_buffer_printf(buffer, "s"); is_descriptor = true; break; case VKD3DSPR_MISCTYPE: if (offset > 1) - { - FIXME("Unhandled misctype register %u.\n", offset); - shader_addline(buffer, "", offset); - } + vkd3d_string_buffer_printf(buffer, "%s%s", + compiler->colours.error, offset, compiler->colours.reset); else - { - shader_addline(buffer, "%s", misctype_reg_names[offset]); - } + vkd3d_string_buffer_printf(buffer, "%s", misctype_reg_names[offset]); break; case VKD3DSPR_PREDICATE: - shader_addline(buffer, "p"); + vkd3d_string_buffer_printf(buffer, "p"); break; case VKD3DSPR_IMMCONST: - shader_addline(buffer, "l"); + vkd3d_string_buffer_printf(buffer, "l"); break; case VKD3DSPR_IMMCONST64: - shader_addline(buffer, "d"); + vkd3d_string_buffer_printf(buffer, "d"); break; case VKD3DSPR_CONSTBUFFER: - shader_addline(buffer, "cb"); + vkd3d_string_buffer_printf(buffer, "cb"); is_descriptor = true; break; case VKD3DSPR_IMMCONSTBUFFER: - shader_addline(buffer, "icb"); + vkd3d_string_buffer_printf(buffer, "icb"); break; case VKD3DSPR_PRIMID: - shader_addline(buffer, "primID"); + vkd3d_string_buffer_printf(buffer, "primID"); break; case VKD3DSPR_NULL: - shader_addline(buffer, "null"); + vkd3d_string_buffer_printf(buffer, "null"); break; case VKD3DSPR_RASTERIZER: - shader_addline(buffer, "rasterizer"); + vkd3d_string_buffer_printf(buffer, "rasterizer"); break; case VKD3DSPR_RESOURCE: - shader_addline(buffer, "t"); + vkd3d_string_buffer_printf(buffer, "t"); is_descriptor = true; break; case VKD3DSPR_UAV: - shader_addline(buffer, "u"); + vkd3d_string_buffer_printf(buffer, "u"); is_descriptor = true; break; case VKD3DSPR_OUTPOINTID: - shader_addline(buffer, "vOutputControlPointID"); + vkd3d_string_buffer_printf(buffer, "vOutputControlPointID"); break; case VKD3DSPR_FORKINSTID: - shader_addline(buffer, "vForkInstanceId"); + vkd3d_string_buffer_printf(buffer, "vForkInstanceId"); break; case VKD3DSPR_JOININSTID: - shader_addline(buffer, "vJoinInstanceId"); + vkd3d_string_buffer_printf(buffer, "vJoinInstanceId"); break; case VKD3DSPR_INCONTROLPOINT: - shader_addline(buffer, "vicp"); + vkd3d_string_buffer_printf(buffer, "vicp"); break; case VKD3DSPR_OUTCONTROLPOINT: - shader_addline(buffer, "vocp"); + vkd3d_string_buffer_printf(buffer, "vocp"); break; case VKD3DSPR_PATCHCONST: - shader_addline(buffer, "vpc"); + vkd3d_string_buffer_printf(buffer, "vpc"); break; case VKD3DSPR_TESSCOORD: - shader_addline(buffer, "vDomainLocation"); + vkd3d_string_buffer_printf(buffer, "vDomainLocation"); break; case VKD3DSPR_GROUPSHAREDMEM: - shader_addline(buffer, "g"); + vkd3d_string_buffer_printf(buffer, "g"); break; case VKD3DSPR_THREADID: - shader_addline(buffer, "vThreadID"); + vkd3d_string_buffer_printf(buffer, "vThreadID"); break; case VKD3DSPR_THREADGROUPID: - shader_addline(buffer, "vThreadGroupID"); + vkd3d_string_buffer_printf(buffer, "vThreadGroupID"); break; case VKD3DSPR_LOCALTHREADID: - shader_addline(buffer, "vThreadIDInGroup"); + vkd3d_string_buffer_printf(buffer, "vThreadIDInGroup"); break; case VKD3DSPR_LOCALTHREADINDEX: - shader_addline(buffer, "vThreadIDInGroupFlattened"); + vkd3d_string_buffer_printf(buffer, "vThreadIDInGroupFlattened"); break; case VKD3DSPR_IDXTEMP: - shader_addline(buffer, "x"); + vkd3d_string_buffer_printf(buffer, "x"); break; case VKD3DSPR_STREAM: - shader_addline(buffer, "m"); + vkd3d_string_buffer_printf(buffer, "m"); break; case VKD3DSPR_FUNCTIONBODY: - shader_addline(buffer, "fb"); + vkd3d_string_buffer_printf(buffer, "fb"); break; case VKD3DSPR_FUNCTIONPOINTER: - shader_addline(buffer, "fp"); + vkd3d_string_buffer_printf(buffer, "fp"); break; case VKD3DSPR_COVERAGE: - shader_addline(buffer, "vCoverage"); + vkd3d_string_buffer_printf(buffer, "vCoverage"); break; case VKD3DSPR_SAMPLEMASK: - shader_addline(buffer, "oMask"); + vkd3d_string_buffer_printf(buffer, "oMask"); break; case VKD3DSPR_GSINSTID: - shader_addline(buffer, "vGSInstanceID"); + vkd3d_string_buffer_printf(buffer, "vGSInstanceID"); break; case VKD3DSPR_OUTSTENCILREF: - shader_addline(buffer, "oStencilRef"); + vkd3d_string_buffer_printf(buffer, "oStencilRef"); break; case VKD3DSPR_UNDEF: - shader_addline(buffer, "undef"); + vkd3d_string_buffer_printf(buffer, "undef"); break; case VKD3DSPR_SSA: - shader_addline(buffer, "sr"); + vkd3d_string_buffer_printf(buffer, "sr"); break; default: - shader_addline(buffer, "", reg->type); + vkd3d_string_buffer_printf(buffer, "%s%s", + compiler->colours.error, reg->type, compiler->colours.reset); break; } @@ -1189,7 +1194,7 @@ static void shader_dump_register(struct vkd3d_d3d_asm_compiler *compiler, const break; } - shader_addline(buffer, "%s(", compiler->colours.reset); + vkd3d_string_buffer_printf(buffer, "%s(", compiler->colours.reset); switch (reg->dimension) { case VSIR_DIMENSION_SCALAR: @@ -1210,7 +1215,8 @@ static void shader_dump_register(struct vkd3d_d3d_asm_compiler *compiler, const shader_print_uint_literal(compiler, "", reg->u.immconst_u32[0], ""); break; default: - shader_addline(buffer, "", reg->data_type); + vkd3d_string_buffer_printf(buffer, "%s%s", + compiler->colours.error, reg->data_type, compiler->colours.reset); break; } break; @@ -1249,20 +1255,22 @@ static void shader_dump_register(struct vkd3d_d3d_asm_compiler *compiler, const shader_print_uint_literal(compiler, ", ", reg->u.immconst_u32[3], ""); break; default: - shader_addline(buffer, "", reg->data_type); + vkd3d_string_buffer_printf(buffer, "%s%s", + compiler->colours.error, reg->data_type, compiler->colours.reset); break; } break; default: - shader_addline(buffer, "", reg->dimension); + vkd3d_string_buffer_printf(buffer, "%s%s", + compiler->colours.error, reg->dimension, compiler->colours.reset); break; } - shader_addline(buffer, ")"); + vkd3d_string_buffer_printf(buffer, ")"); } else if (reg->type == VKD3DSPR_IMMCONST64) { - shader_addline(buffer, "%s(", compiler->colours.reset); + vkd3d_string_buffer_printf(buffer, "%s(", compiler->colours.reset); /* A double2 vector is treated as a float4 vector in enum vsir_dimension. */ if (reg->dimension == VSIR_DIMENSION_SCALAR || reg->dimension == VSIR_DIMENSION_VEC4) { @@ -1280,14 +1288,16 @@ static void shader_dump_register(struct vkd3d_d3d_asm_compiler *compiler, const } else { - shader_addline(buffer, "", reg->data_type); + vkd3d_string_buffer_printf(buffer, "%s%s", + compiler->colours.error, reg->data_type, compiler->colours.reset); } } else { - shader_addline(buffer, "", reg->dimension); + vkd3d_string_buffer_printf(buffer, "%s%s", + compiler->colours.error, reg->dimension, compiler->colours.reset); } - shader_addline(buffer, ")"); + vkd3d_string_buffer_printf(buffer, ")"); } else if (reg->type != VKD3DSPR_RASTOUT && reg->type != VKD3DSPR_MISCTYPE @@ -1331,7 +1341,7 @@ static void shader_dump_register(struct vkd3d_d3d_asm_compiler *compiler, const } else { - shader_addline(buffer, "%s", compiler->colours.reset); + vkd3d_string_buffer_printf(buffer, "%s", compiler->colours.reset); } if (reg->type == VKD3DSPR_FUNCTIONPOINTER) @@ -1339,8 +1349,9 @@ static void shader_dump_register(struct vkd3d_d3d_asm_compiler *compiler, const } else { - shader_addline(buffer, "%s", compiler->colours.reset); + vkd3d_string_buffer_printf(buffer, "%s", compiler->colours.reset); } + vkd3d_string_buffer_printf(buffer, "%s", suffix); } static void shader_print_precision(struct vkd3d_d3d_asm_compiler *compiler, const struct vkd3d_shader_register *reg) @@ -1384,8 +1395,8 @@ static void shader_print_non_uniform(struct vkd3d_d3d_asm_compiler *compiler, co compiler->colours.modifier, compiler->colours.reset); } -static void shader_dump_reg_type(struct vkd3d_d3d_asm_compiler *compiler, - const struct vkd3d_shader_register *reg) +static void shader_print_reg_type(struct vkd3d_d3d_asm_compiler *compiler, + const char *prefix, const struct vkd3d_shader_register *reg, const char *suffix) { static const char *dimensions[] = { @@ -1398,7 +1409,10 @@ static void shader_dump_reg_type(struct vkd3d_d3d_asm_compiler *compiler, const char *dimension; if (!(compiler->flags & VSIR_ASM_FLAG_DUMP_TYPES)) + { + vkd3d_string_buffer_printf(buffer, "%s%s", prefix, suffix); return; + } if (reg->data_type == VKD3D_DATA_UNUSED) return; @@ -1408,9 +1422,9 @@ static void shader_dump_reg_type(struct vkd3d_d3d_asm_compiler *compiler, else dimension = "??"; - shader_addline(buffer, " <%s", dimension); + vkd3d_string_buffer_printf(buffer, "%s <%s", prefix, dimension); shader_dump_data_type(compiler, reg->data_type); - shader_addline(buffer, ">"); + vkd3d_string_buffer_printf(buffer, ">%s", suffix); } static void shader_print_write_mask(struct vkd3d_d3d_asm_compiler *compiler, @@ -1444,8 +1458,7 @@ static void shader_print_dst_param(struct vkd3d_d3d_asm_compiler *compiler, { uint32_t write_mask = param->write_mask; - vkd3d_string_buffer_printf(&compiler->buffer, "%s", prefix); - shader_dump_register(compiler, ¶m->reg, is_declaration); + shader_print_register(compiler, prefix, ¶m->reg, is_declaration, ""); if (write_mask && param->reg.dimension == VSIR_DIMENSION_VEC4) { @@ -1457,8 +1470,7 @@ static void shader_print_dst_param(struct vkd3d_d3d_asm_compiler *compiler, shader_print_precision(compiler, ¶m->reg); shader_print_non_uniform(compiler, ¶m->reg); - shader_dump_reg_type(compiler, ¶m->reg); - vkd3d_string_buffer_printf(&compiler->buffer, "%s", suffix); + shader_print_reg_type(compiler, "", ¶m->reg, suffix); } static void shader_print_src_param(struct vkd3d_d3d_asm_compiler *compiler, @@ -1468,6 +1480,7 @@ static void shader_print_src_param(struct vkd3d_d3d_asm_compiler *compiler, struct vkd3d_string_buffer *buffer = &compiler->buffer; uint32_t swizzle = param->swizzle; const char *modifier = ""; + bool is_abs = false; if (src_modifier == VKD3DSPSM_NEG || src_modifier == VKD3DSPSM_BIASNEG @@ -1482,9 +1495,9 @@ static void shader_print_src_param(struct vkd3d_d3d_asm_compiler *compiler, vkd3d_string_buffer_printf(buffer, "%s%s", prefix, modifier); if (src_modifier == VKD3DSPSM_ABS || src_modifier == VKD3DSPSM_ABSNEG) - vkd3d_string_buffer_printf(buffer, "|"); + is_abs = true; - shader_dump_register(compiler, ¶m->reg, false); + shader_print_register(compiler, is_abs ? "|" : "", ¶m->reg, false, ""); switch (src_modifier) { @@ -1543,13 +1556,12 @@ static void shader_print_src_param(struct vkd3d_d3d_asm_compiler *compiler, swizzle_chars[swizzle_z], swizzle_chars[swizzle_w], compiler->colours.reset); } - if (src_modifier == VKD3DSPSM_ABS || src_modifier == VKD3DSPSM_ABSNEG) + if (is_abs) vkd3d_string_buffer_printf(buffer, "|"); shader_print_precision(compiler, ¶m->reg); shader_print_non_uniform(compiler, ¶m->reg); - shader_dump_reg_type(compiler, ¶m->reg); - vkd3d_string_buffer_printf(buffer, "%s", suffix); + shader_print_reg_type(compiler, "", ¶m->reg, suffix); } static void shader_dump_ins_modifiers(struct vkd3d_d3d_asm_compiler *compiler, @@ -1578,87 +1590,93 @@ static void shader_dump_ins_modifiers(struct vkd3d_d3d_asm_compiler *compiler, if (mmask) FIXME("Unrecognised modifier %#x.\n", mmask); } -static void shader_dump_primitive_type(struct vkd3d_d3d_asm_compiler *compiler, - const struct vkd3d_shader_primitive_type *primitive_type) +static void shader_print_primitive_type(struct vkd3d_d3d_asm_compiler *compiler, + const char *prefix, const struct vkd3d_shader_primitive_type *p, const char *suffix) { struct vkd3d_string_buffer *buffer = &compiler->buffer; + const char *primitive_type; - switch (primitive_type->type) + switch (p->type) { case VKD3D_PT_UNDEFINED: - shader_addline(buffer, "undefined"); + primitive_type = "undefined"; break; case VKD3D_PT_POINTLIST: - shader_addline(buffer, "pointlist"); + primitive_type = "pointlist"; break; case VKD3D_PT_LINELIST: - shader_addline(buffer, "linelist"); + primitive_type = "linelist"; break; case VKD3D_PT_LINESTRIP: - shader_addline(buffer, "linestrip"); + primitive_type = "linestrip"; break; case VKD3D_PT_TRIANGLELIST: - shader_addline(buffer, "trianglelist"); + primitive_type = "trianglelist"; break; case VKD3D_PT_TRIANGLESTRIP: - shader_addline(buffer, "trianglestrip"); + primitive_type = "trianglestrip"; break; case VKD3D_PT_TRIANGLEFAN: - shader_addline(buffer, "trianglefan"); + primitive_type = "trianglefan"; break; case VKD3D_PT_LINELIST_ADJ: - shader_addline(buffer, "linelist_adj"); + primitive_type = "linelist_adj"; break; case VKD3D_PT_LINESTRIP_ADJ: - shader_addline(buffer, "linestrip_adj"); + primitive_type = "linestrip_adj"; break; case VKD3D_PT_TRIANGLELIST_ADJ: - shader_addline(buffer, "trianglelist_adj"); + primitive_type = "trianglelist_adj"; break; case VKD3D_PT_TRIANGLESTRIP_ADJ: - shader_addline(buffer, "trianglestrip_adj"); + primitive_type = "trianglestrip_adj"; break; case VKD3D_PT_PATCH: - shader_addline(buffer, "patch%u", primitive_type->patch_vertex_count); - break; + vkd3d_string_buffer_printf(buffer, "%spatch%u%s", prefix, p->patch_vertex_count, suffix); + return; default: - shader_addline(buffer, "", primitive_type->type); - break; + vkd3d_string_buffer_printf(buffer, "%s%s%s%s", + prefix, compiler->colours.error, p->type, compiler->colours.reset, suffix); + return; } + vkd3d_string_buffer_printf(buffer, "%s%s%s", prefix, primitive_type, suffix); } -static void shader_dump_interpolation_mode(struct vkd3d_d3d_asm_compiler *compiler, - enum vkd3d_shader_interpolation_mode interpolation_mode) +static void shader_print_interpolation_mode(struct vkd3d_d3d_asm_compiler *compiler, + const char *prefix, enum vkd3d_shader_interpolation_mode m, const char *suffix) { struct vkd3d_string_buffer *buffer = &compiler->buffer; + const char *mode; - switch (interpolation_mode) + switch (m) { case VKD3DSIM_CONSTANT: - shader_addline(buffer, "constant"); + mode = "constant"; break; case VKD3DSIM_LINEAR: - shader_addline(buffer, "linear"); + mode = "linear"; break; case VKD3DSIM_LINEAR_CENTROID: - shader_addline(buffer, "linear centroid"); + mode = "linear centroid"; break; case VKD3DSIM_LINEAR_NOPERSPECTIVE: - shader_addline(buffer, "linear noperspective"); + mode = "linear noperspective"; break; case VKD3DSIM_LINEAR_SAMPLE: - shader_addline(buffer, "linear sample"); + mode = "linear sample"; break; case VKD3DSIM_LINEAR_NOPERSPECTIVE_CENTROID: - shader_addline(buffer, "linear noperspective centroid"); + mode = "linear noperspective centroid"; break; case VKD3DSIM_LINEAR_NOPERSPECTIVE_SAMPLE: - shader_addline(buffer, "linear noperspective sample"); + mode = "linear noperspective sample"; break; default: - shader_addline(buffer, "", interpolation_mode); - break; + vkd3d_string_buffer_printf(buffer, "%s%s%s%s", + prefix, compiler->colours.error, m, compiler->colours.reset, suffix); + return; } + vkd3d_string_buffer_printf(buffer, "%s%s%s", prefix, mode, suffix); } const char *shader_get_type_prefix(enum vkd3d_shader_type type) @@ -1849,16 +1867,15 @@ static void shader_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler, case VKD3DSIH_DCL: case VKD3DSIH_DCL_UAV_TYPED: vkd3d_string_buffer_printf(buffer, "%s", compiler->colours.opcode); - shader_dump_decl_usage(compiler, &ins->declaration.semantic, ins->flags); + shader_print_dcl_usage(compiler, "_", &ins->declaration.semantic, ins->flags, ""); shader_dump_ins_modifiers(compiler, &ins->declaration.semantic.resource.reg); - vkd3d_string_buffer_printf(buffer, "%s ", compiler->colours.reset); - shader_dump_register(compiler, &ins->declaration.semantic.resource.reg.reg, true); + vkd3d_string_buffer_printf(buffer, "%s", compiler->colours.reset); + shader_print_register(compiler, " ", &ins->declaration.semantic.resource.reg.reg, true, ""); shader_dump_register_space(compiler, ins->declaration.semantic.resource.range.space); break; case VKD3DSIH_DCL_CONSTANT_BUFFER: - vkd3d_string_buffer_printf(buffer, " "); - shader_dump_register(compiler, &ins->declaration.cb.src.reg, true); + shader_print_register(compiler, " ", &ins->declaration.cb.src.reg, true, ""); if (vkd3d_shader_ver_ge(&compiler->shader_version, 6, 0)) shader_print_subscript(compiler, ins->declaration.cb.size, NULL); else if (vkd3d_shader_ver_ge(&compiler->shader_version, 5, 1)) @@ -1906,8 +1923,7 @@ static void shader_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler, break; case VKD3DSIH_DCL_INPUT_PS: - vkd3d_string_buffer_printf(buffer, " "); - shader_dump_interpolation_mode(compiler, ins->flags); + shader_print_interpolation_mode(compiler, " ", ins->flags, ""); shader_print_dst_param(compiler, " ", &ins->declaration.dst, true, ""); break; @@ -1916,16 +1932,13 @@ static void shader_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler, case VKD3DSIH_DCL_INPUT_SIV: case VKD3DSIH_DCL_OUTPUT_SIV: shader_print_dst_param(compiler, " ", &ins->declaration.register_semantic.reg, true, ""); - shader_addline(buffer, ", "); - shader_dump_shader_input_sysval_semantic(compiler, ins->declaration.register_semantic.sysval_semantic); + shader_print_input_sysval_semantic(compiler, ", ", ins->declaration.register_semantic.sysval_semantic, ""); break; case VKD3DSIH_DCL_INPUT_PS_SIV: - vkd3d_string_buffer_printf(buffer, " "); - shader_dump_interpolation_mode(compiler, ins->flags); + shader_print_interpolation_mode(compiler, " ", ins->flags, ""); shader_print_dst_param(compiler, " ", &ins->declaration.register_semantic.reg, true, ""); - shader_addline(buffer, ", "); - shader_dump_shader_input_sysval_semantic(compiler, ins->declaration.register_semantic.sysval_semantic); + shader_print_input_sysval_semantic(compiler, ", ", ins->declaration.register_semantic.sysval_semantic, ""); break; case VKD3DSIH_DCL_INPUT: @@ -1935,8 +1948,7 @@ static void shader_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler, case VKD3DSIH_DCL_INPUT_PRIMITIVE: case VKD3DSIH_DCL_OUTPUT_TOPOLOGY: - vkd3d_string_buffer_printf(buffer, " "); - shader_dump_primitive_type(compiler, &ins->declaration.primitive_type); + shader_print_primitive_type(compiler, " ", &ins->declaration.primitive_type, ""); break; case VKD3DSIH_DCL_INTERFACE: @@ -1958,10 +1970,8 @@ static void shader_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler, break; case VKD3DSIH_DCL_SAMPLER: - vkd3d_string_buffer_printf(buffer, " "); - shader_dump_register(compiler, &ins->declaration.sampler.src.reg, true); - if (ins->flags == VKD3DSI_SAMPLER_COMPARISON_MODE) - shader_addline(buffer, ", comparisonMode"); + shader_print_register(compiler, " ", &ins->declaration.sampler.src.reg, true, + ins->flags == VKD3DSI_SAMPLER_COMPARISON_MODE ? ", comparisonMode" : ""); shader_dump_register_space(compiler, ins->declaration.sampler.range.space); break; @@ -2354,6 +2364,10 @@ enum vkd3d_result d3d_asm_compile(const struct vsir_program *program, break; case VKD3DSIH_LABEL: + case VKD3DSIH_HS_DECLS: + case VKD3DSIH_HS_CONTROL_POINT_PHASE: + case VKD3DSIH_HS_FORK_PHASE: + case VKD3DSIH_HS_JOIN_PHASE: indent = 0; break; diff --git a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c index 099729fbb6c..4685afa082d 100644 --- a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c +++ b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c @@ -1522,63 +1522,94 @@ D3DXPARAMETER_CLASS hlsl_sm1_class(const struct hlsl_type *type) D3DXPARAMETER_TYPE hlsl_sm1_base_type(const struct hlsl_type *type) { - switch (type->base_type) + switch (type->class) { - case HLSL_TYPE_BOOL: - return D3DXPT_BOOL; - case HLSL_TYPE_FLOAT: - case HLSL_TYPE_HALF: - return D3DXPT_FLOAT; - case HLSL_TYPE_INT: - case HLSL_TYPE_UINT: - return D3DXPT_INT; - case HLSL_TYPE_PIXELSHADER: - return D3DXPT_PIXELSHADER; - case HLSL_TYPE_SAMPLER: - switch (type->sampler_dim) + case HLSL_CLASS_SCALAR: + case HLSL_CLASS_VECTOR: + case HLSL_CLASS_MATRIX: + switch (type->base_type) { - case HLSL_SAMPLER_DIM_1D: - return D3DXPT_SAMPLER1D; - case HLSL_SAMPLER_DIM_2D: - return D3DXPT_SAMPLER2D; - case HLSL_SAMPLER_DIM_3D: - return D3DXPT_SAMPLER3D; - case HLSL_SAMPLER_DIM_CUBE: - return D3DXPT_SAMPLERCUBE; - case HLSL_SAMPLER_DIM_GENERIC: - return D3DXPT_SAMPLER; + case HLSL_TYPE_BOOL: + return D3DXPT_BOOL; + /* Actually double behaves differently depending on DLL version: + * For <= 36, it maps to D3DXPT_FLOAT. + * For 37-40, it maps to zero (D3DXPT_VOID). + * For >= 41, it maps to 39, which is D3D_SVT_DOUBLE (note D3D_SVT_* + * values are mostly compatible with D3DXPT_*). + * However, the latter two cases look like bugs, and a reasonable + * application certainly wouldn't know what to do with them. + * For fx_2_0 it's always D3DXPT_FLOAT regardless of DLL version. */ + case HLSL_TYPE_DOUBLE: + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_HALF: + return D3DXPT_FLOAT; + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: + return D3DXPT_INT; default: - ERR("Invalid dimension %#x.\n", type->sampler_dim); vkd3d_unreachable(); } - break; - case HLSL_TYPE_STRING: - return D3DXPT_STRING; - case HLSL_TYPE_TEXTURE: - switch (type->sampler_dim) + + case HLSL_CLASS_OBJECT: + switch (type->base_type) { - case HLSL_SAMPLER_DIM_1D: - return D3DXPT_TEXTURE1D; - case HLSL_SAMPLER_DIM_2D: - return D3DXPT_TEXTURE2D; - case HLSL_SAMPLER_DIM_3D: - return D3DXPT_TEXTURE3D; - case HLSL_SAMPLER_DIM_CUBE: - return D3DXPT_TEXTURECUBE; - case HLSL_SAMPLER_DIM_GENERIC: - return D3DXPT_TEXTURE; + case HLSL_TYPE_PIXELSHADER: + return D3DXPT_PIXELSHADER; + case HLSL_TYPE_SAMPLER: + switch (type->sampler_dim) + { + case HLSL_SAMPLER_DIM_1D: + return D3DXPT_SAMPLER1D; + case HLSL_SAMPLER_DIM_2D: + return D3DXPT_SAMPLER2D; + case HLSL_SAMPLER_DIM_3D: + return D3DXPT_SAMPLER3D; + case HLSL_SAMPLER_DIM_CUBE: + return D3DXPT_SAMPLERCUBE; + case HLSL_SAMPLER_DIM_GENERIC: + return D3DXPT_SAMPLER; + default: + ERR("Invalid dimension %#x.\n", type->sampler_dim); + vkd3d_unreachable(); + } + break; + case HLSL_TYPE_STRING: + return D3DXPT_STRING; + case HLSL_TYPE_TEXTURE: + switch (type->sampler_dim) + { + case HLSL_SAMPLER_DIM_1D: + return D3DXPT_TEXTURE1D; + case HLSL_SAMPLER_DIM_2D: + return D3DXPT_TEXTURE2D; + case HLSL_SAMPLER_DIM_3D: + return D3DXPT_TEXTURE3D; + case HLSL_SAMPLER_DIM_CUBE: + return D3DXPT_TEXTURECUBE; + case HLSL_SAMPLER_DIM_GENERIC: + return D3DXPT_TEXTURE; + default: + ERR("Invalid dimension %#x.\n", type->sampler_dim); + vkd3d_unreachable(); + } + break; + case HLSL_TYPE_VERTEXSHADER: + return D3DXPT_VERTEXSHADER; + case HLSL_TYPE_VOID: + return D3DXPT_VOID; default: - ERR("Invalid dimension %#x.\n", type->sampler_dim); vkd3d_unreachable(); } - break; - case HLSL_TYPE_VERTEXSHADER: - return D3DXPT_VERTEXSHADER; - case HLSL_TYPE_VOID: - return D3DXPT_VOID; - default: vkd3d_unreachable(); + + case HLSL_CLASS_ARRAY: + return hlsl_sm1_base_type(type->e.array.type); + + case HLSL_CLASS_STRUCT: + return D3DXPT_VOID; } + + vkd3d_unreachable(); } static void write_sm1_type(struct vkd3d_bytecode_buffer *buffer, struct hlsl_type *type, unsigned int ctab_start) @@ -2572,19 +2603,11 @@ static void write_sm1_instructions(struct hlsl_ctx *ctx, struct vkd3d_bytecode_b { if (instr->data_type) { - if (instr->data_type->class == HLSL_CLASS_MATRIX) + if (instr->data_type->class != HLSL_CLASS_SCALAR && instr->data_type->class != HLSL_CLASS_VECTOR) { - /* These need to be lowered. */ - hlsl_fixme(ctx, &instr->loc, "SM1 matrix expression."); - continue; - } - else if (instr->data_type->class == HLSL_CLASS_OBJECT) - { - hlsl_fixme(ctx, &instr->loc, "Object copy."); + hlsl_fixme(ctx, &instr->loc, "Class %#x should have been lowered or removed.", instr->data_type->class); break; } - - assert(instr->data_type->class == HLSL_CLASS_SCALAR || instr->data_type->class == HLSL_CLASS_VECTOR); } switch (instr->type) diff --git a/libs/vkd3d/libs/vkd3d-shader/dxil.c b/libs/vkd3d/libs/vkd3d-shader/dxil.c index b5a61d99d3f..da8ba662dbc 100644 --- a/libs/vkd3d/libs/vkd3d-shader/dxil.c +++ b/libs/vkd3d/libs/vkd3d-shader/dxil.c @@ -404,6 +404,7 @@ enum dx_intrinsic_opcode DX_ATOMIC_BINOP = 78, DX_ATOMIC_CMP_XCHG = 79, DX_BARRIER = 80, + DX_CALCULATE_LOD = 81, DX_DISCARD = 82, DX_DERIV_COARSEX = 83, DX_DERIV_COARSEY = 84, @@ -2885,6 +2886,122 @@ static enum vkd3d_result value_allocate_constant_array(struct sm6_value *dst, co return VKD3D_OK; } +static enum vkd3d_result sm6_parser_init_constexpr_gep(struct sm6_parser *sm6, const struct dxil_record *record, + struct sm6_value *dst) +{ + const struct sm6_type *elem_type, *pointee_type, *gep_type, *ptr_type; + struct sm6_value *operands[3]; + unsigned int i, j, offset; + uint64_t value; + + i = 0; + pointee_type = (record->operand_count & 1) ? sm6_parser_get_type(sm6, record->operands[i++]) : NULL; + + if (!dxil_record_validate_operand_count(record, i + 6, i + 6, sm6)) + return VKD3D_ERROR_INVALID_SHADER; + + for (j = 0; i < record->operand_count; i += 2, ++j) + { + if (!(elem_type = sm6_parser_get_type(sm6, record->operands[i]))) + return VKD3D_ERROR_INVALID_SHADER; + + if ((value = record->operands[i + 1]) >= sm6->cur_max_value) + { + WARN("Invalid value index %"PRIu64".\n", value); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, + "Invalid value index %"PRIu64".", value); + return VKD3D_ERROR_INVALID_SHADER; + } + else if (value == sm6->value_count) + { + WARN("Invalid value self-reference at %"PRIu64".\n", value); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, + "Invalid value self-reference for a constexpr GEP."); + return VKD3D_ERROR_INVALID_SHADER; + } + + operands[j] = &sm6->values[value]; + if (value > sm6->value_count) + { + operands[j]->type = elem_type; + } + else if (operands[j]->type != elem_type) + { + WARN("Type mismatch.\n"); + vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_TYPE_MISMATCH, + "Type mismatch in constexpr GEP elements."); + } + } + + if (operands[0]->u.reg.idx_count > 1) + { + WARN("Unsupported stacked GEP.\n"); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, + "A GEP instruction on the result of a previous GEP is unsupported."); + return VKD3D_ERROR_INVALID_SHADER; + } + + if (!sm6_value_is_constant_zero(operands[1])) + { + WARN("Expected constant zero.\n"); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, + "The pointer dereference index for a constexpr GEP instruction is not constant zero."); + return VKD3D_ERROR_INVALID_SHADER; + } + if (!sm6_value_is_constant(operands[2]) || !sm6_type_is_integer(operands[2]->type)) + { + WARN("Element index is not constant int.\n"); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, + "A constexpr GEP element index is not a constant integer."); + return VKD3D_ERROR_INVALID_SHADER; + } + + dst->structure_stride = operands[0]->structure_stride; + + ptr_type = operands[0]->type; + if (!sm6_type_is_pointer(ptr_type)) + { + WARN("Constexpr GEP base value is not a pointer.\n"); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, + "A constexpr GEP base value is not a pointer."); + return VKD3D_ERROR_INVALID_SHADER; + } + + if (!pointee_type) + { + pointee_type = ptr_type->u.pointer.type; + } + else if (pointee_type != ptr_type->u.pointer.type) + { + WARN("Explicit pointee type mismatch.\n"); + vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_TYPE_MISMATCH, + "Explicit pointee type for constexpr GEP does not match the element type."); + } + + offset = sm6_value_get_constant_uint(operands[2]); + if (!(gep_type = sm6_type_get_element_type_at_index(pointee_type, offset))) + { + WARN("Failed to get element type.\n"); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, + "Failed to get the element type of a constexpr GEP."); + return VKD3D_ERROR_INVALID_SHADER; + } + + if (!(dst->type = sm6_type_get_pointer_to_type(gep_type, ptr_type->u.pointer.addr_space, sm6))) + { + WARN("Failed to get pointer type for type %u.\n", gep_type->class); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_MODULE, + "Module does not define a pointer type for a constexpr GEP result."); + return VKD3D_ERROR_INVALID_SHADER; + } + dst->u.reg = operands[0]->u.reg; + dst->u.reg.idx[1].offset = offset; + dst->u.reg.idx[1].is_in_bounds = record->code == CST_CODE_CE_INBOUNDS_GEP; + dst->u.reg.idx_count = 2; + + return VKD3D_OK; +} + static enum vkd3d_result sm6_parser_constants_init(struct sm6_parser *sm6, const struct dxil_block *block) { enum vkd3d_shader_register_type reg_type = VKD3DSPR_INVALID; @@ -3005,6 +3122,12 @@ static enum vkd3d_result sm6_parser_constants_init(struct sm6_parser *sm6, const break; + case CST_CODE_CE_GEP: + case CST_CODE_CE_INBOUNDS_GEP: + if ((ret = sm6_parser_init_constexpr_gep(sm6, record, dst)) < 0) + return ret; + break; + case CST_CODE_UNDEF: dxil_record_validate_operand_max_count(record, 0, sm6); dst->u.reg.type = VKD3DSPR_UNDEF; @@ -4364,6 +4487,40 @@ static void sm6_parser_emit_dx_buffer_update_counter(struct sm6_parser *sm6, enu instruction_dst_param_init_ssa_scalar(ins, sm6); } +static void sm6_parser_emit_dx_calculate_lod(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, + const struct sm6_value **operands, struct function_emission_state *state) +{ + const struct sm6_value *resource, *sampler; + struct vkd3d_shader_src_param *src_params; + struct vkd3d_shader_instruction *ins; + struct vkd3d_shader_register coord; + unsigned int clamp; + + resource = operands[0]; + sampler = operands[1]; + if (!sm6_value_validate_is_texture_handle(resource, op, sm6) + || !sm6_value_validate_is_sampler_handle(sampler, op, sm6)) + { + return; + } + + if (!sm6_parser_emit_coordinate_construct(sm6, &operands[2], 3, NULL, state, &coord)) + return; + + clamp = sm6_value_get_constant_uint(operands[5]); + + ins = state->ins; + vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_LOD); + if (!(src_params = instruction_src_params_alloc(ins, 3, sm6))) + return; + src_param_init_vector_from_reg(&src_params[0], &coord); + src_params[1].reg = resource->u.handle.reg; + src_param_init_scalar(&src_params[1], !clamp); + src_param_init_vector_from_reg(&src_params[2], &sampler->u.handle.reg); + + instruction_dst_param_init_ssa_scalar(ins, sm6); +} + static void sm6_parser_emit_dx_cbuffer_load(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, const struct sm6_value **operands, struct function_emission_state *state) { @@ -5392,6 +5549,7 @@ static const struct sm6_dx_opcode_info sm6_dx_op_table[] = [DX_BUFFER_LOAD ] = {"o", "Hii", sm6_parser_emit_dx_buffer_load}, [DX_BUFFER_STORE ] = {"v", "Hiiooooc", sm6_parser_emit_dx_buffer_store}, [DX_BUFFER_UPDATE_COUNTER ] = {"i", "H8", sm6_parser_emit_dx_buffer_update_counter}, + [DX_CALCULATE_LOD ] = {"f", "HHfffb", sm6_parser_emit_dx_calculate_lod}, [DX_CBUFFER_LOAD_LEGACY ] = {"o", "Hi", sm6_parser_emit_dx_cbuffer_load}, [DX_COS ] = {"g", "R", sm6_parser_emit_dx_sincos}, [DX_COUNT_BITS ] = {"i", "m", sm6_parser_emit_dx_unary}, diff --git a/libs/vkd3d/libs/vkd3d-shader/fx.c b/libs/vkd3d/libs/vkd3d-shader/fx.c index 466908cd82b..1d90cd70e03 100644 --- a/libs/vkd3d/libs/vkd3d-shader/fx.c +++ b/libs/vkd3d/libs/vkd3d-shader/fx.c @@ -115,6 +115,9 @@ static uint32_t write_string(const char *string, struct fx_write_context *fx) static void write_pass(struct hlsl_ir_var *var, struct fx_write_context *fx) { + if (var->state_block_count) + hlsl_fixme(fx->ctx, &var->loc, "Write state block assignments."); + fx->ops->write_pass(var, fx); } @@ -401,14 +404,6 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co uint32_t name_offset, offset, size, stride, numeric_desc; uint32_t elements_count = 0; const char *name; - static const uint32_t variable_type[] = - { - [HLSL_CLASS_SCALAR] = 1, - [HLSL_CLASS_VECTOR] = 1, - [HLSL_CLASS_MATRIX] = 1, - [HLSL_CLASS_OBJECT] = 2, - [HLSL_CLASS_STRUCT] = 3, - }; struct hlsl_ctx *ctx = fx->ctx; /* Resolve arrays to element type and number of elements. */ @@ -428,13 +423,19 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co case HLSL_CLASS_SCALAR: case HLSL_CLASS_VECTOR: case HLSL_CLASS_MATRIX: + put_u32_unaligned(buffer, 1); + break; + case HLSL_CLASS_OBJECT: + put_u32_unaligned(buffer, 2); + break; + case HLSL_CLASS_STRUCT: - put_u32_unaligned(buffer, variable_type[type->class]); + put_u32_unaligned(buffer, 3); break; - default: - hlsl_fixme(ctx, &ctx->location, "Writing type class %u is not implemented.", type->class); - return 0; + + case HLSL_CLASS_ARRAY: + vkd3d_unreachable(); } size = stride = type->reg_size[HLSL_REGSET_NUMERIC] * sizeof(float); @@ -630,7 +631,6 @@ static uint32_t write_fx_2_parameter(const struct hlsl_type *type, const char *n { struct vkd3d_bytecode_buffer *buffer = &fx->unstructured; uint32_t semantic_offset, offset, elements_count = 0, name_offset; - struct hlsl_ctx *ctx = fx->ctx; size_t i; /* Resolve arrays to element type and number of elements. */ @@ -643,22 +643,6 @@ static uint32_t write_fx_2_parameter(const struct hlsl_type *type, const char *n name_offset = write_string(name, fx); semantic_offset = write_string(semantic->name, fx); - switch (type->base_type) - { - case HLSL_TYPE_HALF: - case HLSL_TYPE_FLOAT: - case HLSL_TYPE_BOOL: - case HLSL_TYPE_INT: - case HLSL_TYPE_UINT: - case HLSL_TYPE_VOID: - case HLSL_TYPE_TEXTURE: - break; - default: - hlsl_fixme(ctx, &ctx->location, "Writing parameter type %u is not implemented.", - type->base_type); - return 0; - }; - offset = put_u32(buffer, hlsl_sm1_base_type(type)); put_u32(buffer, hlsl_sm1_class(type)); put_u32(buffer, name_offset); @@ -688,6 +672,9 @@ static uint32_t write_fx_2_parameter(const struct hlsl_type *type, const char *n for (i = 0; i < type->e.record.field_count; ++i) { const struct hlsl_struct_field *field = &type->e.record.fields[i]; + + /* Validated in check_invalid_object_fields(). */ + assert(hlsl_is_numeric_type(field->type)); write_fx_2_parameter(field->type, field->name, &field->semantic, fx); } } @@ -746,7 +733,7 @@ static uint32_t write_fx_2_initial_value(const struct hlsl_ir_var *var, struct f { struct vkd3d_bytecode_buffer *buffer = &fx->unstructured; const struct hlsl_type *type = var->data_type; - uint32_t i, offset, size, elements_count = 1; + uint32_t offset, size, elements_count = 1; size = get_fx_2_type_size(type); @@ -756,63 +743,80 @@ static uint32_t write_fx_2_initial_value(const struct hlsl_ir_var *var, struct f type = hlsl_get_multiarray_element_type(type); } - if (type->class == HLSL_CLASS_OBJECT) - { - /* Objects are given sequential ids. */ - offset = put_u32(buffer, fx->object_variable_count++); - for (i = 1; i < elements_count; ++i) - put_u32(buffer, fx->object_variable_count++); - } - else + /* Note that struct fields must all be numeric; + * this was validated in check_invalid_object_fields(). */ + switch (type->class) { - /* FIXME: write actual initial value */ - offset = put_u32(buffer, 0); + case HLSL_CLASS_SCALAR: + case HLSL_CLASS_VECTOR: + case HLSL_CLASS_MATRIX: + case HLSL_CLASS_STRUCT: + /* FIXME: write actual initial value */ + offset = put_u32(buffer, 0); - for (i = 1; i < size / sizeof(uint32_t); ++i) - put_u32(buffer, 0); + for (uint32_t i = 1; i < size / sizeof(uint32_t); ++i) + put_u32(buffer, 0); + break; + + default: + /* Objects are given sequential ids. */ + offset = put_u32(buffer, fx->object_variable_count++); + for (uint32_t i = 1; i < elements_count; ++i) + put_u32(buffer, fx->object_variable_count++); + break; } return offset; } -static bool is_type_supported_fx_2(const struct hlsl_type *type) +static bool is_type_supported_fx_2(struct hlsl_ctx *ctx, const struct hlsl_type *type, + const struct vkd3d_shader_location *loc) { - type = hlsl_get_multiarray_element_type(type); - - if (type->class == HLSL_CLASS_STRUCT) - return true; - - switch (type->base_type) + switch (type->class) { - case HLSL_TYPE_FLOAT: - case HLSL_TYPE_HALF: - case HLSL_TYPE_DOUBLE: - case HLSL_TYPE_INT: - case HLSL_TYPE_UINT: - case HLSL_TYPE_BOOL: - case HLSL_TYPE_PIXELSHADER: - case HLSL_TYPE_VERTEXSHADER: - case HLSL_TYPE_STRING: + case HLSL_CLASS_STRUCT: + /* Note that the fields must all be numeric; this was validated in + * check_invalid_object_fields(). */ return true; - case HLSL_TYPE_TEXTURE: - case HLSL_TYPE_SAMPLER: - switch (type->sampler_dim) + + case HLSL_CLASS_SCALAR: + case HLSL_CLASS_VECTOR: + case HLSL_CLASS_MATRIX: + return true; + + case HLSL_CLASS_ARRAY: + return is_type_supported_fx_2(ctx, type->e.array.type, loc); + + case HLSL_CLASS_OBJECT: + switch (type->base_type) { - case HLSL_SAMPLER_DIM_1D: - case HLSL_SAMPLER_DIM_2D: - case HLSL_SAMPLER_DIM_3D: - case HLSL_SAMPLER_DIM_CUBE: - case HLSL_SAMPLER_DIM_GENERIC: - return true; + case HLSL_TYPE_TEXTURE: + switch (type->sampler_dim) + { + case HLSL_SAMPLER_DIM_1D: + case HLSL_SAMPLER_DIM_2D: + case HLSL_SAMPLER_DIM_3D: + case HLSL_SAMPLER_DIM_CUBE: + case HLSL_SAMPLER_DIM_GENERIC: + return true; + default: + return false; + } + break; + + case HLSL_TYPE_SAMPLER: + case HLSL_TYPE_STRING: + case HLSL_TYPE_PIXELSHADER: + case HLSL_TYPE_VERTEXSHADER: + hlsl_fixme(ctx, loc, "Write fx 2.0 parameter object type %#x.", type->base_type); + return false; + default: - ; + return false; } - break; - default: - return false; } - return false; + vkd3d_unreachable(); } static void write_fx_2_parameters(struct fx_write_context *fx) @@ -828,7 +832,7 @@ static void write_fx_2_parameters(struct fx_write_context *fx) LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) { - if (!is_type_supported_fx_2(var->data_type)) + if (!is_type_supported_fx_2(ctx, var->data_type, &var->loc)) continue; desc_offset = write_fx_2_parameter(var->data_type, var->name, &var->semantic, fx); diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.c b/libs/vkd3d/libs/vkd3d-shader/hlsl.c index 5638a03a8f5..ed80e2b75c8 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.c +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.c @@ -134,14 +134,39 @@ struct hlsl_ir_var *hlsl_get_var(struct hlsl_scope *scope, const char *name) return hlsl_get_var(scope->upper, name); } -void hlsl_free_var(struct hlsl_ir_var *decl) +static void free_state_block_entry(struct hlsl_state_block_entry *entry) +{ + vkd3d_free(entry->name); + vkd3d_free(entry->args); + hlsl_block_cleanup(entry->instrs); + vkd3d_free(entry->instrs); + vkd3d_free(entry); +} + +void hlsl_free_state_block(struct hlsl_state_block *state_block) { unsigned int k; + assert(state_block); + for (k = 0; k < state_block->count; ++k) + free_state_block_entry(state_block->entries[k]); + vkd3d_free(state_block->entries); + vkd3d_free(state_block); +} + +void hlsl_free_var(struct hlsl_ir_var *decl) +{ + unsigned int k, i; + vkd3d_free((void *)decl->name); hlsl_cleanup_semantic(&decl->semantic); for (k = 0; k <= HLSL_REGSET_LAST_OBJECT; ++k) vkd3d_free((void *)decl->objects_usage[k]); + + for (i = 0; i < decl->state_block_count; ++i) + hlsl_free_state_block(decl->state_blocks[i]); + vkd3d_free(decl->state_blocks); + vkd3d_free(decl); } @@ -1561,6 +1586,27 @@ struct hlsl_ir_node *hlsl_new_swizzle(struct hlsl_ctx *ctx, uint32_t s, unsigned return &swizzle->node; } +struct hlsl_ir_node *hlsl_new_stateblock_constant(struct hlsl_ctx *ctx, const char *name, + struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_stateblock_constant *constant; + struct hlsl_type *type = hlsl_get_scalar_type(ctx, HLSL_TYPE_INT); + + if (!(constant = hlsl_alloc(ctx, sizeof(*constant)))) + return NULL; + + init_node(&constant->node, HLSL_IR_STATEBLOCK_CONSTANT, type, loc); + + if (!(constant->name = hlsl_alloc(ctx, strlen(name) + 1))) + { + vkd3d_free(constant); + return NULL; + } + strcpy(constant->name, name); + + return &constant->node; +} + bool hlsl_index_is_noncontiguous(struct hlsl_ir_index *index) { struct hlsl_type *type = index->val.node->data_type; @@ -1570,7 +1616,10 @@ bool hlsl_index_is_noncontiguous(struct hlsl_ir_index *index) bool hlsl_index_is_resource_access(struct hlsl_ir_index *index) { - return index->val.node->data_type->class == HLSL_CLASS_OBJECT; + const struct hlsl_type *type = index->val.node->data_type; + + return type->class == HLSL_CLASS_OBJECT + && (type->base_type == HLSL_TYPE_TEXTURE || type->base_type == HLSL_TYPE_UAV); } bool hlsl_index_chain_has_resource_access(struct hlsl_ir_index *index) @@ -1881,6 +1930,12 @@ static struct hlsl_ir_node *clone_index(struct hlsl_ctx *ctx, struct clone_instr return dst; } +static struct hlsl_ir_node *clone_stateblock_constant(struct hlsl_ctx *ctx, + struct clone_instr_map *map, struct hlsl_ir_stateblock_constant *constant) +{ + return hlsl_new_stateblock_constant(ctx, constant->name, &constant->node.loc); +} + void hlsl_free_ir_switch_case(struct hlsl_ir_switch_case *c) { hlsl_block_cleanup(&c->body); @@ -1976,6 +2031,9 @@ static struct hlsl_ir_node *clone_instr(struct hlsl_ctx *ctx, case HLSL_IR_SWIZZLE: return clone_swizzle(ctx, map, hlsl_ir_swizzle(instr)); + + case HLSL_IR_STATEBLOCK_CONSTANT: + return clone_stateblock_constant(ctx, map, hlsl_ir_stateblock_constant(instr)); } vkd3d_unreachable(); @@ -2631,7 +2689,6 @@ const char *debug_hlsl_expr_op(enum hlsl_ir_expr_op op) [HLSL_OP3_CMP] = "cmp", [HLSL_OP3_DP2ADD] = "dp2add", - [HLSL_OP3_MOVC] = "movc", [HLSL_OP3_TERNARY] = "ternary", }; @@ -2808,6 +2865,12 @@ static void dump_ir_index(struct vkd3d_string_buffer *buffer, const struct hlsl_ vkd3d_string_buffer_printf(buffer, "]"); } +static void dump_ir_stateblock_constant(struct vkd3d_string_buffer *buffer, + const struct hlsl_ir_stateblock_constant *constant) +{ + vkd3d_string_buffer_printf(buffer, "%s", constant->name); +} + static void dump_ir_switch(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, const struct hlsl_ir_switch *s) { struct hlsl_ir_switch_case *c; @@ -2896,6 +2959,10 @@ static void dump_instr(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, case HLSL_IR_SWIZZLE: dump_ir_swizzle(buffer, hlsl_ir_swizzle(instr)); break; + + case HLSL_IR_STATEBLOCK_CONSTANT: + dump_ir_stateblock_constant(buffer, hlsl_ir_stateblock_constant(instr)); + break; } } @@ -3068,6 +3135,12 @@ static void free_ir_index(struct hlsl_ir_index *index) vkd3d_free(index); } +static void free_ir_stateblock_constant(struct hlsl_ir_stateblock_constant *constant) +{ + vkd3d_free(constant->name); + vkd3d_free(constant); +} + void hlsl_free_instr(struct hlsl_ir_node *node) { assert(list_empty(&node->uses)); @@ -3125,6 +3198,10 @@ void hlsl_free_instr(struct hlsl_ir_node *node) case HLSL_IR_SWITCH: free_ir_switch(hlsl_ir_switch(node)); break; + + case HLSL_IR_STATEBLOCK_CONSTANT: + free_ir_stateblock_constant(hlsl_ir_stateblock_constant(node)); + break; } } @@ -3290,7 +3367,9 @@ const struct hlsl_profile_info *hlsl_get_target_info(const char *target) {"cs_4_0", VKD3D_SHADER_TYPE_COMPUTE, 4, 0, 0, 0, false}, {"cs_4_1", VKD3D_SHADER_TYPE_COMPUTE, 4, 1, 0, 0, false}, {"cs_5_0", VKD3D_SHADER_TYPE_COMPUTE, 5, 0, 0, 0, false}, + {"cs_5_1", VKD3D_SHADER_TYPE_COMPUTE, 5, 1, 0, 0, false}, {"ds_5_0", VKD3D_SHADER_TYPE_DOMAIN, 5, 0, 0, 0, false}, + {"ds_5_1", VKD3D_SHADER_TYPE_DOMAIN, 5, 1, 0, 0, false}, {"fx_2_0", VKD3D_SHADER_TYPE_EFFECT, 2, 0, 0, 0, false}, {"fx_4_0", VKD3D_SHADER_TYPE_EFFECT, 4, 0, 0, 0, false}, {"fx_4_1", VKD3D_SHADER_TYPE_EFFECT, 4, 1, 0, 0, false}, @@ -3298,7 +3377,9 @@ const struct hlsl_profile_info *hlsl_get_target_info(const char *target) {"gs_4_0", VKD3D_SHADER_TYPE_GEOMETRY, 4, 0, 0, 0, false}, {"gs_4_1", VKD3D_SHADER_TYPE_GEOMETRY, 4, 1, 0, 0, false}, {"gs_5_0", VKD3D_SHADER_TYPE_GEOMETRY, 5, 0, 0, 0, false}, + {"gs_5_1", VKD3D_SHADER_TYPE_GEOMETRY, 5, 1, 0, 0, false}, {"hs_5_0", VKD3D_SHADER_TYPE_HULL, 5, 0, 0, 0, false}, + {"hs_5_1", VKD3D_SHADER_TYPE_HULL, 5, 1, 0, 0, false}, {"ps.1.0", VKD3D_SHADER_TYPE_PIXEL, 1, 0, 0, 0, false}, {"ps.1.1", VKD3D_SHADER_TYPE_PIXEL, 1, 1, 0, 0, false}, {"ps.1.2", VKD3D_SHADER_TYPE_PIXEL, 1, 2, 0, 0, false}, @@ -3326,6 +3407,7 @@ const struct hlsl_profile_info *hlsl_get_target_info(const char *target) {"ps_4_0_level_9_3", VKD3D_SHADER_TYPE_PIXEL, 4, 0, 9, 3, false}, {"ps_4_1", VKD3D_SHADER_TYPE_PIXEL, 4, 1, 0, 0, false}, {"ps_5_0", VKD3D_SHADER_TYPE_PIXEL, 5, 0, 0, 0, false}, + {"ps_5_1", VKD3D_SHADER_TYPE_PIXEL, 5, 1, 0, 0, false}, {"tx_1_0", VKD3D_SHADER_TYPE_TEXTURE, 1, 0, 0, 0, false}, {"vs.1.0", VKD3D_SHADER_TYPE_VERTEX, 1, 0, 0, 0, false}, {"vs.1.1", VKD3D_SHADER_TYPE_VERTEX, 1, 1, 0, 0, false}, @@ -3347,6 +3429,7 @@ const struct hlsl_profile_info *hlsl_get_target_info(const char *target) {"vs_4_0_level_9_3", VKD3D_SHADER_TYPE_VERTEX, 4, 0, 9, 3, false}, {"vs_4_1", VKD3D_SHADER_TYPE_VERTEX, 4, 1, 0, 0, false}, {"vs_5_0", VKD3D_SHADER_TYPE_VERTEX, 5, 0, 0, 0, false}, + {"vs_5_1", VKD3D_SHADER_TYPE_VERTEX, 5, 1, 0, 0, false}, }; for (i = 0; i < ARRAY_SIZE(profiles); ++i) @@ -3651,6 +3734,21 @@ static void hlsl_ctx_cleanup(struct hlsl_ctx *ctx) rb_destroy(&ctx->functions, free_function_rb, NULL); + /* State blocks must be free before the variables, because they contain instructions that may + * refer to them. */ + LIST_FOR_EACH_ENTRY_SAFE(scope, next_scope, &ctx->scopes, struct hlsl_scope, entry) + { + LIST_FOR_EACH_ENTRY_SAFE(var, next_var, &scope->vars, struct hlsl_ir_var, scope_entry) + { + for (i = 0; i < var->state_block_count; ++i) + hlsl_free_state_block(var->state_blocks[i]); + vkd3d_free(var->state_blocks); + var->state_blocks = NULL; + var->state_block_count = 0; + var->state_block_capacity = 0; + } + } + LIST_FOR_EACH_ENTRY_SAFE(scope, next_scope, &ctx->scopes, struct hlsl_scope, entry) { LIST_FOR_EACH_ENTRY_SAFE(var, next_var, &scope->vars, struct hlsl_ir_var, scope_entry) diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h index aa9cb14fc8d..4225098bc87 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.h +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h @@ -298,6 +298,7 @@ enum hlsl_ir_node_type HLSL_IR_STORE, HLSL_IR_SWIZZLE, HLSL_IR_SWITCH, + HLSL_IR_STATEBLOCK_CONSTANT, }; /* Common data for every type of IR instruction node. */ @@ -423,6 +424,14 @@ struct hlsl_ir_var /* Scope that contains annotations for this variable. */ struct hlsl_scope *annotations; + /* A dynamic array containing the state block on the variable's declaration, if any. + * An array variable may contain multiple state blocks. + * A technique pass will always contain one. + * These are only really used for effect profiles. */ + struct hlsl_state_block **state_blocks; + unsigned int state_block_count; + size_t state_block_capacity; + /* Indexes of the IR instructions where the variable is first written and last read (liveness * range). The IR instructions are numerated starting from 2, because 0 means unused, and 1 * means function entry. */ @@ -458,6 +467,38 @@ struct hlsl_ir_var uint32_t is_separated_resource : 1; }; +/* This struct is used to represent assignments in state block entries: + * name = {args[0], args[1], ...}; + * - or - + * name = args[0] + * - or - + * name[lhs_index] = args[0] + * - or - + * name[lhs_index] = {args[0], args[1], ...}; + */ +struct hlsl_state_block_entry +{ + /* For assignments, the name in the lhs. */ + char *name; + + /* Whether the lhs in the assignment is indexed and, in that case, its index. */ + bool lhs_has_index; + unsigned int lhs_index; + + /* Instructions present in the rhs. */ + struct hlsl_block *instrs; + + /* For assignments, arguments of the rhs initializer. */ + struct hlsl_ir_node **args; + unsigned int args_count; +}; + +struct hlsl_state_block +{ + struct hlsl_state_block_entry **entries; + size_t count, capacity; +}; + /* Sized array of variables representing a function's parameters. */ struct hlsl_func_parameters { @@ -601,14 +642,9 @@ enum hlsl_ir_expr_op /* DP2ADD(a, b, c) computes the scalar product of a.xy and b.xy, * then adds c. */ HLSL_OP3_DP2ADD, - /* MOVC(a, b, c) returns c if a is bitwise zero and b otherwise. - * TERNARY(a, b, c) returns c if a == 0 and b otherwise. - * They differ for floating point numbers, because - * -0.0 == 0.0, but it is not bitwise zero. CMP(a, b, c) returns b - if a >= 0, and c otherwise. It's used only for SM1-SM3 targets, while - SM4+ is using MOVC in such cases. */ + /* TERNARY(a, b, c) returns 'b' if 'a' is true and 'c' otherwise. 'a' must always be boolean. + * CMP(a, b, c) returns 'b' if 'a' >= 0, and 'c' otherwise. It's used only for SM1-SM3 targets. */ HLSL_OP3_CMP, - HLSL_OP3_MOVC, HLSL_OP3_TERNARY, }; @@ -754,6 +790,14 @@ struct hlsl_ir_constant struct hlsl_reg reg; }; +/* Stateblock constants are undeclared values found on state blocks or technique passes descriptions, + * that do not concern regular pixel, vertex, or compute shaders, except for parsing. */ +struct hlsl_ir_stateblock_constant +{ + struct hlsl_ir_node node; + char *name; +}; + struct hlsl_scope { /* Item entry for hlsl_ctx.scopes. */ @@ -932,6 +976,16 @@ struct hlsl_ctx bool warn_implicit_truncation; }; +static inline bool hlsl_version_ge(const struct hlsl_ctx *ctx, unsigned int major, unsigned int minor) +{ + return ctx->profile->major_version > major || (ctx->profile->major_version == major && ctx->profile->minor_version >= minor); +} + +static inline bool hlsl_version_lt(const struct hlsl_ctx *ctx, unsigned int major, unsigned int minor) +{ + return !hlsl_version_ge(ctx, major, minor); +} + struct hlsl_resource_load_params { struct hlsl_type *format; @@ -1019,6 +1073,12 @@ static inline struct hlsl_ir_switch *hlsl_ir_switch(const struct hlsl_ir_node *n return CONTAINING_RECORD(node, struct hlsl_ir_switch, node); } +static inline struct hlsl_ir_stateblock_constant *hlsl_ir_stateblock_constant(const struct hlsl_ir_node *node) +{ + assert(node->type == HLSL_IR_STATEBLOCK_CONSTANT); + return CONTAINING_RECORD(node, struct hlsl_ir_stateblock_constant, node); +} + static inline void hlsl_block_init(struct hlsl_block *block) { list_init(&block->instrs); @@ -1211,6 +1271,7 @@ void hlsl_replace_node(struct hlsl_ir_node *old, struct hlsl_ir_node *new); void hlsl_free_attribute(struct hlsl_attribute *attr); void hlsl_free_instr(struct hlsl_ir_node *node); void hlsl_free_instr_list(struct list *list); +void hlsl_free_state_block(struct hlsl_state_block *state_block); void hlsl_free_type(struct hlsl_type *type); void hlsl_free_var(struct hlsl_ir_var *decl); @@ -1292,6 +1353,8 @@ struct hlsl_type *hlsl_new_struct_type(struct hlsl_ctx *ctx, const char *name, struct hlsl_struct_field *fields, size_t field_count); struct hlsl_ir_node *hlsl_new_swizzle(struct hlsl_ctx *ctx, uint32_t s, unsigned int components, struct hlsl_ir_node *val, const struct vkd3d_shader_location *loc); +struct hlsl_ir_node *hlsl_new_stateblock_constant(struct hlsl_ctx *ctx, const char *name, + struct vkd3d_shader_location *loc); struct hlsl_ir_var *hlsl_new_synthetic_var(struct hlsl_ctx *ctx, const char *template, struct hlsl_type *type, const struct vkd3d_shader_location *loc); struct hlsl_ir_var *hlsl_new_synthetic_var_named(struct hlsl_ctx *ctx, const char *name, diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.y b/libs/vkd3d/libs/vkd3d-shader/hlsl.y index e02e0c540f9..8f71556757a 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.y +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.y @@ -77,6 +77,10 @@ struct parse_variable_def struct hlsl_type *basic_type; uint32_t modifiers; struct vkd3d_shader_location modifiers_loc; + + struct hlsl_state_block **state_blocks; + unsigned int state_block_count; + size_t state_block_capacity; }; struct parse_function @@ -114,6 +118,12 @@ struct parse_attribute_list const struct hlsl_attribute **attrs; }; +struct state_block_index +{ + bool has_index; + unsigned int index; +}; + } %code provides @@ -931,24 +941,10 @@ static void free_parse_variable_def(struct parse_variable_def *v) vkd3d_free(v->arrays.sizes); vkd3d_free(v->name); hlsl_cleanup_semantic(&v->semantic); + assert(!v->state_blocks); vkd3d_free(v); } -static bool shader_is_sm_5_1(const struct hlsl_ctx *ctx) -{ - return ctx->profile->major_version == 5 && ctx->profile->minor_version >= 1; -} - -static bool shader_profile_version_ge(const struct hlsl_ctx *ctx, unsigned int major, unsigned int minor) -{ - return ctx->profile->major_version > major || (ctx->profile->major_version == major && ctx->profile->minor_version >= minor); -} - -static bool shader_profile_version_lt(const struct hlsl_ctx *ctx, unsigned int major, unsigned int minor) -{ - return !shader_profile_version_ge(ctx, major, minor); -} - static bool gen_struct_fields(struct hlsl_ctx *ctx, struct parse_fields *fields, struct hlsl_type *type, uint32_t modifiers, struct list *defs) { @@ -971,7 +967,7 @@ static bool gen_struct_fields(struct hlsl_ctx *ctx, struct parse_fields *fields, field->type = type; - if (shader_is_sm_5_1(ctx) && type->class == HLSL_CLASS_OBJECT) + if (hlsl_version_ge(ctx, 5, 1) && hlsl_type_is_resource(type)) { for (k = 0; k < v->arrays.count; ++k) unbounded_res_array |= (v->arrays.sizes[k] == HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT); @@ -1121,7 +1117,7 @@ static bool add_func_parameter(struct hlsl_ctx *ctx, struct hlsl_func_parameters } static bool add_pass(struct hlsl_ctx *ctx, const char *name, struct hlsl_scope *annotations, - const struct vkd3d_shader_location *loc) + struct hlsl_state_block *state_block, const struct vkd3d_shader_location *loc) { struct hlsl_ir_var *var; struct hlsl_type *type; @@ -1131,6 +1127,11 @@ static bool add_pass(struct hlsl_ctx *ctx, const char *name, struct hlsl_scope * return false; var->annotations = annotations; + var->state_blocks = hlsl_alloc(ctx, sizeof(*var->state_blocks)); + var->state_blocks[0] = state_block; + var->state_block_count = 1; + var->state_block_capacity = 1; + if (!hlsl_add_var(ctx, var, false)) { struct hlsl_ir_var *old = hlsl_get_var(ctx->cur_scope, var->name); @@ -1216,7 +1217,7 @@ static struct hlsl_reg_reservation parse_packoffset(struct hlsl_ctx *ctx, const struct hlsl_reg_reservation reservation = {0}; char *endptr; - if (shader_profile_version_lt(ctx, 4, 0)) + if (hlsl_version_lt(ctx, 4, 0)) return reservation; reservation.offset_index = strtoul(reg_string + 1, &endptr, 10); @@ -1299,6 +1300,7 @@ static unsigned int evaluate_static_expression_as_uint(struct hlsl_ctx *ctx, str case HLSL_IR_RESOURCE_STORE: case HLSL_IR_STORE: case HLSL_IR_SWITCH: + case HLSL_IR_STATEBLOCK_CONSTANT: hlsl_error(ctx, &node->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, "Expected literal expression."); } @@ -2177,7 +2179,7 @@ static void declare_var(struct hlsl_ctx *ctx, struct parse_variable_def *v) type = basic_type; - if (shader_is_sm_5_1(ctx) && type->class == HLSL_CLASS_OBJECT) + if (hlsl_version_ge(ctx, 5, 1) && hlsl_type_is_resource(type)) { for (i = 0; i < v->arrays.count; ++i) unbounded_res_array |= (v->arrays.sizes[i] == HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT); @@ -2362,8 +2364,25 @@ static struct hlsl_block *initialize_vars(struct hlsl_ctx *ctx, struct list *var free_parse_variable_def(v); continue; } + type = var->data_type; + var->state_blocks = v->state_blocks; + var->state_block_count = v->state_block_count; + var->state_block_capacity = v->state_block_capacity; + v->state_block_count = 0; + v->state_block_capacity = 0; + v->state_blocks = NULL; + + if (var->state_blocks && hlsl_type_component_count(type) != var->state_block_count) + { + hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, + "Expected %u state blocks, but got %u.", + hlsl_type_component_count(type), var->state_block_count); + free_parse_variable_def(v); + continue; + } + if (v->initializer.args_count) { if (v->initializer.braces) @@ -2663,12 +2682,14 @@ static bool elementwise_intrinsic_convert_args(struct hlsl_ctx *ctx, static bool elementwise_intrinsic_float_convert_args(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { + enum hlsl_base_type base_type; struct hlsl_type *type; if (!(type = elementwise_intrinsic_get_common_type(ctx, params, loc))) return false; - type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_FLOAT, type->dimx, type->dimy); + base_type = type->base_type == HLSL_TYPE_HALF ? HLSL_TYPE_HALF : HLSL_TYPE_FLOAT; + type = hlsl_get_numeric_type(ctx, type->class, base_type, type->dimx, type->dimy); return convert_args(ctx, params, type, loc); } @@ -2728,81 +2749,62 @@ static bool intrinsic_acos(struct hlsl_ctx *ctx, return write_acos_or_asin(ctx, params, loc, false); } -static bool intrinsic_all(struct hlsl_ctx *ctx, - const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +/* Find the type corresponding to the given source type, with the same + * dimensions but a different base type. */ +static struct hlsl_type *convert_numeric_type(const struct hlsl_ctx *ctx, + const struct hlsl_type *type, enum hlsl_base_type base_type) { - struct hlsl_ir_node *arg = params->args[0], *mul, *one, *zero, *load; + return hlsl_get_numeric_type(ctx, type->class, base_type, type->dimx, type->dimy); +} + +static bool add_combine_components(struct hlsl_ctx *ctx, const struct parse_initializer *params, + struct hlsl_ir_node *arg, enum hlsl_ir_expr_op op, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *res, *load; unsigned int i, count; - if (!(one = hlsl_new_float_constant(ctx, 1.0f, loc))) - return false; - hlsl_block_add_instr(params->instrs, one); + count = hlsl_type_component_count(arg->data_type); - if (!(zero = hlsl_new_float_constant(ctx, 0.0f, loc))) + if (!(res = hlsl_add_load_component(ctx, params->instrs, arg, 0, loc))) return false; - hlsl_block_add_instr(params->instrs, zero); - mul = one; - - count = hlsl_type_component_count(arg->data_type); - for (i = 0; i < count; ++i) + for (i = 1; i < count; ++i) { if (!(load = hlsl_add_load_component(ctx, params->instrs, arg, i, loc))) return false; - if (!(mul = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, load, mul, loc))) - return false; + if (!(res = hlsl_new_binary_expr(ctx, op, res, load))) + return NULL; + hlsl_block_add_instr(params->instrs, res); } - return !!add_binary_comparison_expr(ctx, params->instrs, HLSL_OP2_NEQUAL, mul, zero, loc); + return true; } -static bool intrinsic_any(struct hlsl_ctx *ctx, +static bool intrinsic_all(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { - struct hlsl_ir_node *arg = params->args[0], *dot, *or, *zero, *bfalse, *load; - unsigned int i, count; + struct hlsl_ir_node *arg = params->args[0], *cast; + struct hlsl_type *bool_type; - if (arg->data_type->class != HLSL_CLASS_VECTOR && arg->data_type->class != HLSL_CLASS_SCALAR) - { - hlsl_fixme(ctx, loc, "any() implementation for non-vector, non-scalar"); + bool_type = convert_numeric_type(ctx, arg->data_type, HLSL_TYPE_BOOL); + if (!(cast = add_cast(ctx, params->instrs, arg, bool_type, loc))) return false; - } - - if (arg->data_type->base_type == HLSL_TYPE_FLOAT) - { - if (!(zero = hlsl_new_float_constant(ctx, 0.0f, loc))) - return false; - hlsl_block_add_instr(params->instrs, zero); - - if (!(dot = add_binary_dot_expr(ctx, params->instrs, arg, arg, loc))) - return false; - return !!add_binary_comparison_expr(ctx, params->instrs, HLSL_OP2_NEQUAL, dot, zero, loc); - } - else if (arg->data_type->base_type == HLSL_TYPE_BOOL) - { - if (!(bfalse = hlsl_new_bool_constant(ctx, false, loc))) - return false; - hlsl_block_add_instr(params->instrs, bfalse); - - or = bfalse; - - count = hlsl_type_component_count(arg->data_type); - for (i = 0; i < count; ++i) - { - if (!(load = hlsl_add_load_component(ctx, params->instrs, arg, i, loc))) - return false; + return add_combine_components(ctx, params, cast, HLSL_OP2_LOGIC_AND, loc); +} - if (!(or = add_binary_bitwise_expr(ctx, params->instrs, HLSL_OP2_BIT_OR, or, load, loc))) - return false; - } +static bool intrinsic_any(struct hlsl_ctx *ctx, const struct parse_initializer *params, + const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *arg = params->args[0], *cast; + struct hlsl_type *bool_type; - return true; - } + bool_type = convert_numeric_type(ctx, arg->data_type, HLSL_TYPE_BOOL); + if (!(cast = add_cast(ctx, params->instrs, arg, bool_type, loc))) + return false; - hlsl_fixme(ctx, loc, "any() implementation for non-float, non-bool"); - return false; + return add_combine_components(ctx, params, cast, HLSL_OP2_LOGIC_OR, loc); } static bool intrinsic_asin(struct hlsl_ctx *ctx, @@ -2870,20 +2872,20 @@ static bool write_atan_or_atan2(struct hlsl_ctx *ctx, type->name, type->name, type->name); if (ret < 0) { - vkd3d_string_buffer_cleanup(buf); + hlsl_release_string_buffer(ctx, buf); return false; } ret = vkd3d_string_buffer_printf(buf, body_template, type->name); if (ret < 0) { - vkd3d_string_buffer_cleanup(buf); + hlsl_release_string_buffer(ctx, buf); return false; } func = hlsl_compile_internal_function(ctx, atan2_mode ? atan2_name : atan_name, buf->buffer); - vkd3d_string_buffer_cleanup(buf); + hlsl_release_string_buffer(ctx, buf); if (!func) return false; @@ -2903,15 +2905,6 @@ static bool intrinsic_atan2(struct hlsl_ctx *ctx, return write_atan_or_atan2(ctx, params, loc, true); } - -/* Find the type corresponding to the given source type, with the same - * dimensions but a different base type. */ -static struct hlsl_type *convert_numeric_type(const struct hlsl_ctx *ctx, - const struct hlsl_type *type, enum hlsl_base_type base_type) -{ - return hlsl_get_numeric_type(ctx, type->class, base_type, type->dimx, type->dimy); -} - static bool intrinsic_asfloat(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { @@ -3035,6 +3028,46 @@ static bool intrinsic_cos(struct hlsl_ctx *ctx, return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_COS, arg, loc); } +static bool write_cosh_or_sinh(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc, bool sinh_mode) +{ + struct hlsl_ir_function_decl *func; + struct hlsl_ir_node *arg; + const char *fn_name, *type_name; + char *body; + + static const char template[] = + "%s %s(%s x)\n" + "{\n" + " return (exp(x) %s exp(-x)) / 2;\n" + "}\n"; + static const char fn_name_sinh[] = "sinh"; + static const char fn_name_cosh[] = "cosh"; + + if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) + return false; + + type_name = arg->data_type->name; + fn_name = sinh_mode ? fn_name_sinh : fn_name_cosh; + + if (!(body = hlsl_sprintf_alloc(ctx, template, + type_name, fn_name, type_name, sinh_mode ? "-" : "+"))) + return false; + + func = hlsl_compile_internal_function(ctx, fn_name, body); + vkd3d_free(body); + if (!func) + return false; + + return add_user_call(ctx, func, params, loc); +} + +static bool intrinsic_cosh(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + return write_cosh_or_sinh(ctx, params, loc, false); +} + static bool intrinsic_cross(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { @@ -3747,6 +3780,59 @@ static bool intrinsic_reflect(struct hlsl_ctx *ctx, return !!add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_ADD, i, neg, loc); } +static bool intrinsic_refract(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + struct hlsl_type *r_type = params->args[0]->data_type; + struct hlsl_type *n_type = params->args[1]->data_type; + struct hlsl_type *i_type = params->args[2]->data_type; + struct hlsl_type *res_type, *idx_type, *scal_type; + struct parse_initializer mut_params; + struct hlsl_ir_function_decl *func; + enum hlsl_base_type base; + char *body; + + static const char template[] = + "%s refract(%s r, %s n, %s i)\n" + "{\n" + " %s d, t;\n" + " d = dot(r, n);\n" + " t = 1 - i.x * i.x * (1 - d * d);\n" + " return t >= 0.0 ? i.x * r - (i.x * d + sqrt(t)) * n : 0;\n" + "}"; + + if (r_type->class == HLSL_CLASS_MATRIX + || n_type->class == HLSL_CLASS_MATRIX + || i_type->class == HLSL_CLASS_MATRIX) + { + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Matrix arguments are not supported."); + return false; + } + + assert(params->args_count == 3); + mut_params = *params; + mut_params.args_count = 2; + if (!(res_type = elementwise_intrinsic_get_common_type(ctx, &mut_params, loc))) + return false; + + base = expr_common_base_type(res_type->base_type, i_type->base_type); + base = base == HLSL_TYPE_HALF ? HLSL_TYPE_HALF : HLSL_TYPE_FLOAT; + res_type = convert_numeric_type(ctx, res_type, base); + idx_type = convert_numeric_type(ctx, i_type, base); + scal_type = hlsl_get_scalar_type(ctx, base); + + if (!(body = hlsl_sprintf_alloc(ctx, template, res_type->name, res_type->name, + res_type->name, idx_type->name, scal_type->name))) + return false; + + func = hlsl_compile_internal_function(ctx, "refract", body); + vkd3d_free(body); + if (!func) + return false; + + return add_user_call(ctx, func, params, loc); +} + static bool intrinsic_round(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { @@ -3827,6 +3913,12 @@ static bool intrinsic_sin(struct hlsl_ctx *ctx, return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_SIN, arg, loc); } +static bool intrinsic_sinh(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + return write_cosh_or_sinh(ctx, params, loc, true); +} + /* smoothstep(a, b, x) = p^2 (3 - 2p), where p = saturate((x - a)/(b - a)) */ static bool intrinsic_smoothstep(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) @@ -3899,6 +3991,39 @@ static bool intrinsic_tan(struct hlsl_ctx *ctx, return !!add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_DIV, sin, cos, loc); } +static bool intrinsic_tanh(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_function_decl *func; + struct hlsl_ir_node *arg; + struct hlsl_type *type; + char *body; + + static const char template[] = + "%s tanh(%s x)\n" + "{\n" + " %s exp_pos, exp_neg;\n" + " exp_pos = exp(x);\n" + " exp_neg = exp(-x);\n" + " return (exp_pos - exp_neg) / (exp_pos + exp_neg);\n" + "}\n"; + + if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) + return false; + type = arg->data_type; + + if (!(body = hlsl_sprintf_alloc(ctx, template, + type->name, type->name, type->name))) + return false; + + func = hlsl_compile_internal_function(ctx, "tanh", body); + vkd3d_free(body); + if (!func) + return false; + + return add_user_call(ctx, func, params, loc); +} + static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc, const char *name, enum hlsl_sampler_dim dim) { @@ -3967,7 +4092,7 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * return false; } - if (shader_profile_version_ge(ctx, 4, 0)) + if (hlsl_version_ge(ctx, 4, 0)) { unsigned int count = hlsl_sampler_dim_count(dim); struct hlsl_ir_node *divisor; @@ -4014,7 +4139,7 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * return false; initialize_var_components(ctx, params->instrs, var, &idx, coords); - if (shader_profile_version_ge(ctx, 4, 0)) + if (hlsl_version_ge(ctx, 4, 0)) { if (!(half = hlsl_new_float_constant(ctx, 0.5f, loc))) return false; @@ -4200,7 +4325,7 @@ static bool intrinsic_d3dcolor_to_ubyte4(struct hlsl_ctx *ctx, if (!(ret = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, arg, c, loc))) return false; - if (shader_profile_version_ge(ctx, 4, 0)) + if (hlsl_version_ge(ctx, 4, 0)) return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_TRUNC, ret, loc); return true; @@ -4231,6 +4356,7 @@ intrinsic_functions[] = {"clamp", 3, true, intrinsic_clamp}, {"clip", 1, true, intrinsic_clip}, {"cos", 1, true, intrinsic_cos}, + {"cosh", 1, true, intrinsic_cosh}, {"cross", 2, true, intrinsic_cross}, {"ddx", 1, true, intrinsic_ddx}, {"ddx_coarse", 1, true, intrinsic_ddx_coarse}, @@ -4262,15 +4388,18 @@ intrinsic_functions[] = {"pow", 2, true, intrinsic_pow}, {"radians", 1, true, intrinsic_radians}, {"reflect", 2, true, intrinsic_reflect}, + {"refract", 3, true, intrinsic_refract}, {"round", 1, true, intrinsic_round}, {"rsqrt", 1, true, intrinsic_rsqrt}, {"saturate", 1, true, intrinsic_saturate}, {"sign", 1, true, intrinsic_sign}, {"sin", 1, true, intrinsic_sin}, + {"sinh", 1, true, intrinsic_sinh}, {"smoothstep", 3, true, intrinsic_smoothstep}, {"sqrt", 1, true, intrinsic_sqrt}, {"step", 2, true, intrinsic_step}, {"tan", 1, true, intrinsic_tan}, + {"tanh", 1, true, intrinsic_tanh}, {"tex1D", -1, false, intrinsic_tex1D}, {"tex2D", -1, false, intrinsic_tex2D}, {"tex2Dlod", 2, false, intrinsic_tex2Dlod}, @@ -4405,26 +4534,34 @@ static bool add_ternary(struct hlsl_ctx *ctx, struct hlsl_block *block, if (!(cond = add_implicit_conversion(ctx, block, cond, cond_type, &cond->loc))) return false; } - else if (common_type->dimx == 1 && common_type->dimy == 1) - { - common_type = hlsl_get_numeric_type(ctx, cond_type->class, - common_type->base_type, cond_type->dimx, cond_type->dimy); - } - else if (cond_type->dimx != common_type->dimx || cond_type->dimy != common_type->dimy) + else { - /* This condition looks wrong but is correct. - * floatN is compatible with float1xN, but not with floatNx1. */ - - struct vkd3d_string_buffer *cond_string, *value_string; + cond_type = hlsl_get_numeric_type(ctx, cond_type->class, HLSL_TYPE_BOOL, + cond_type->dimx, cond_type->dimy); + if (!(cond = add_implicit_conversion(ctx, block, cond, cond_type, &cond->loc))) + return false; - cond_string = hlsl_type_to_string(ctx, cond_type); - value_string = hlsl_type_to_string(ctx, common_type); - if (cond_string && value_string) - hlsl_error(ctx, &first->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, - "Ternary condition type '%s' is not compatible with value type '%s'.", - cond_string->buffer, value_string->buffer); - hlsl_release_string_buffer(ctx, cond_string); - hlsl_release_string_buffer(ctx, value_string); + if (common_type->dimx == 1 && common_type->dimy == 1) + { + common_type = hlsl_get_numeric_type(ctx, cond_type->class, + common_type->base_type, cond_type->dimx, cond_type->dimy); + } + else if (cond_type->dimx != common_type->dimx || cond_type->dimy != common_type->dimy) + { + /* This condition looks wrong but is correct. + * floatN is compatible with float1xN, but not with floatNx1. */ + + struct vkd3d_string_buffer *cond_string, *value_string; + + cond_string = hlsl_type_to_string(ctx, cond_type); + value_string = hlsl_type_to_string(ctx, common_type); + if (cond_string && value_string) + hlsl_error(ctx, &first->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Ternary condition type '%s' is not compatible with value type '%s'.", + cond_string->buffer, value_string->buffer); + hlsl_release_string_buffer(ctx, cond_string); + hlsl_release_string_buffer(ctx, value_string); + } } if (!(first = add_implicit_conversion(ctx, block, first, common_type, &first->loc))) @@ -4449,9 +4586,16 @@ static bool add_ternary(struct hlsl_ctx *ctx, struct hlsl_block *block, hlsl_release_string_buffer(ctx, second_string); } + cond_type = hlsl_get_numeric_type(ctx, cond_type->class, HLSL_TYPE_BOOL, + cond_type->dimx, cond_type->dimy); + if (!(cond = add_implicit_conversion(ctx, block, cond, cond_type, &cond->loc))) + return false; + common_type = first->data_type; } + assert(cond->data_type->base_type == HLSL_TYPE_BOOL); + args[0] = cond; args[1] = first; args[2] = second; @@ -5280,6 +5424,16 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, hlsl_release_string_buffer(ctx, string); } +static bool state_block_add_entry(struct hlsl_state_block *state_block, struct hlsl_state_block_entry *entry) +{ + if (!vkd3d_array_reserve((void **)&state_block->entries, &state_block->capacity, state_block->count + 1, + sizeof(*state_block->entries))) + return false; + + state_block->entries[state_block->count++] = entry; + return true; +} + } %locations @@ -5320,6 +5474,8 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, struct parse_attribute_list attr_list; struct hlsl_ir_switch_case *switch_case; struct hlsl_scope *scope; + struct hlsl_state_block *state_block; + struct state_block_index state_block_index; } %token KW_BLENDSTATE @@ -5519,6 +5675,7 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, %type any_identifier %type var_identifier +%type stateblock_lhs_identifier %type name_opt %type parameter @@ -5533,6 +5690,10 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, %type semantic +%type state_block + +%type state_block_index_opt + %type switch_case %type field_type @@ -5543,6 +5704,7 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, %type type_no_void %type typedef_type +%type state_block_list %type type_spec %type variable_decl %type variable_def @@ -5573,9 +5735,9 @@ name_opt: | any_identifier pass: - KW_PASS name_opt annotations_opt '{' '}' + KW_PASS name_opt annotations_opt '{' state_block_start state_block '}' { - if (!add_pass(ctx, $2, $3, &@1)) + if (!add_pass(ctx, $2, $3, $6, &@1)) YYABORT; } @@ -6474,7 +6636,7 @@ type_no_void: { validate_texture_format_type(ctx, $3, &@3); - if (shader_profile_version_lt(ctx, 4, 1)) + if (hlsl_version_lt(ctx, 4, 1)) { hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Multisampled texture object declaration needs sample count for profile %s.", ctx->profile->name); @@ -6513,7 +6675,7 @@ type_no_void: $$ = hlsl_get_type(ctx->cur_scope, $1, true, true); if ($$->is_minimum_precision) { - if (shader_profile_version_lt(ctx, 4, 0)) + if (hlsl_version_lt(ctx, 4, 0)) { hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Target profile doesn't support minimum-precision types."); @@ -6677,22 +6839,91 @@ variable_decl: $$->reg_reservation = $3.reg_reservation; } -state: - any_identifier '=' expr ';' +state_block_start: + %empty { - vkd3d_free($1); - destroy_block($3); + ctx->in_state_block = 1; } -state_block_start: +stateblock_lhs_identifier: + any_identifier + { + $$ = $1; + } + | KW_PIXELSHADER + { + if (!($$ = hlsl_strdup(ctx, "pixelshader"))) + YYABORT; + } + | KW_VERTEXSHADER + { + if (!($$ = hlsl_strdup(ctx, "vertexshader"))) + YYABORT; + } + +state_block_index_opt: %empty { - ctx->in_state_block = 1; + $$.has_index = false; + $$.index = 0; } + | '[' C_INTEGER ']' + { + if ($2 < 0) + { + hlsl_error(ctx, &@2, VKD3D_SHADER_ERROR_HLSL_INVALID_INDEX, + "State block array index is not a positive integer constant."); + YYABORT; + } + $$.has_index = true; + $$.index = $2; + } state_block: %empty - | state_block state + { + if (!($$ = hlsl_alloc(ctx, sizeof(*$$)))) + YYABORT; + } + | state_block stateblock_lhs_identifier state_block_index_opt '=' complex_initializer ';' + { + struct hlsl_state_block_entry *entry; + + if (!(entry = hlsl_alloc(ctx, sizeof(*entry)))) + YYABORT; + + entry->name = $2; + entry->lhs_has_index = $3.has_index; + entry->lhs_index = $3.index; + + entry->instrs = $5.instrs; + entry->args = $5.args; + entry->args_count = $5.args_count; + + $$ = $1; + state_block_add_entry($$, entry); + } + +state_block_list: + '{' state_block '}' + { + if (!($$ = hlsl_alloc(ctx, sizeof(*$$)))) + YYABORT; + + if(!(vkd3d_array_reserve((void **)&$$->state_blocks, &$$->state_block_capacity, + $$->state_block_count + 1, sizeof(*$$->state_blocks)))) + YYABORT; + $$->state_blocks[$$->state_block_count++] = $2; + } + | state_block_list ',' '{' state_block '}' + { + $$ = $1; + + if(!(vkd3d_array_reserve((void **)&$$->state_blocks, &$$->state_block_capacity, + $$->state_block_count + 1, sizeof(*$$->state_blocks)))) + YYABORT; + $$->state_blocks[$$->state_block_count++] = $4; + } variable_def: variable_decl @@ -6705,6 +6936,24 @@ variable_def: { $$ = $1; ctx->in_state_block = 0; + + if(!(vkd3d_array_reserve((void **)&$$->state_blocks, &$$->state_block_capacity, + $$->state_block_count + 1, sizeof(*$$->state_blocks)))) + YYABORT; + $$->state_blocks[$$->state_block_count++] = $4; + } + | variable_decl '{' state_block_start state_block_list '}' + { + $$ = $1; + ctx->in_state_block = 0; + + $$->state_blocks = $4->state_blocks; + $$->state_block_count = $4->state_block_count; + $$->state_block_capacity = $4->state_block_capacity; + $4->state_blocks = NULL; + $4->state_block_count = 0; + $4->state_block_capacity = 0; + free_parse_variable_def($4); } variable_def_typed: @@ -7330,15 +7579,13 @@ primary_expr: { if (ctx->in_state_block) { - struct hlsl_ir_load *load; - struct hlsl_ir_var *var; + struct hlsl_ir_node *constant; - if (!(var = hlsl_new_synthetic_var(ctx, "state_block_expr", - hlsl_get_scalar_type(ctx, HLSL_TYPE_INT), &@1))) + if (!(constant = hlsl_new_stateblock_constant(ctx, $1, &@1))) YYABORT; - if (!(load = hlsl_new_var_load(ctx, var, &@1))) - YYABORT; - if (!($$ = make_block(ctx, &load->node))) + vkd3d_free($1); + + if (!($$ = make_block(ctx, constant))) YYABORT; } else diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c index 6f2de93767b..eaa72836d8a 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c @@ -1565,7 +1565,7 @@ static bool copy_propagation_replace_with_single_instr(struct hlsl_ctx *ctx, var->name, start, start + count, debug_hlsl_swizzle(swizzle, instr_component_count), new_instr, debug_hlsl_swizzle(ret_swizzle, instr_component_count)); - if (instr->data_type->class != HLSL_CLASS_OBJECT) + if (new_instr->data_type->class == HLSL_CLASS_SCALAR || new_instr->data_type->class == HLSL_CLASS_VECTOR) { struct hlsl_ir_node *swizzle_node; @@ -1742,7 +1742,7 @@ static void copy_propagation_record_store(struct hlsl_ctx *ctx, struct hlsl_ir_s { unsigned int writemask = store->writemask; - if (store->rhs.node->data_type->class == HLSL_CLASS_OBJECT) + if (!hlsl_is_numeric_type(store->rhs.node->data_type)) writemask = VKD3DSP_WRITEMASK_0; copy_propagation_set_value(ctx, var_def, start, writemask, store->rhs.node, store->node.index); } @@ -2954,12 +2954,11 @@ static bool lower_logic_not(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, st return true; } -/* Use movc/cmp for the ternary operator. */ +/* Lower TERNARY to CMP for SM1. */ static bool lower_ternary(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) { struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = { 0 }, *replacement; - struct hlsl_ir_node *zero, *cond, *first, *second; - struct hlsl_constant_value zero_value = { 0 }; + struct hlsl_ir_node *cond, *first, *second, *float_cond, *neg; struct hlsl_ir_expr *expr; struct hlsl_type *type; @@ -2980,48 +2979,25 @@ static bool lower_ternary(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, stru return false; } - if (ctx->profile->major_version < 4) - { - struct hlsl_ir_node *abs, *neg; + assert(cond->data_type->base_type == HLSL_TYPE_BOOL); - if (!(abs = hlsl_new_unary_expr(ctx, HLSL_OP1_ABS, cond, &instr->loc))) - return false; - hlsl_block_add_instr(block, abs); + type = hlsl_get_numeric_type(ctx, instr->data_type->class, HLSL_TYPE_FLOAT, + instr->data_type->dimx, instr->data_type->dimy); - if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, abs, &instr->loc))) - return false; - hlsl_block_add_instr(block, neg); - - operands[0] = neg; - operands[1] = second; - operands[2] = first; - if (!(replacement = hlsl_new_expr(ctx, HLSL_OP3_CMP, operands, first->data_type, &instr->loc))) - return false; - } - else - { - if (cond->data_type->base_type == HLSL_TYPE_FLOAT) - { - if (!(zero = hlsl_new_constant(ctx, cond->data_type, &zero_value, &instr->loc))) - return false; - hlsl_block_add_instr(block, zero); + if (!(float_cond = hlsl_new_cast(ctx, cond, type, &instr->loc))) + return false; + hlsl_block_add_instr(block, float_cond); - operands[0] = zero; - operands[1] = cond; - type = cond->data_type; - type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_BOOL, type->dimx, type->dimy); - if (!(cond = hlsl_new_expr(ctx, HLSL_OP2_NEQUAL, operands, type, &instr->loc))) - return false; - hlsl_block_add_instr(block, cond); - } + if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, float_cond, &instr->loc))) + return false; + hlsl_block_add_instr(block, neg); - memset(operands, 0, sizeof(operands)); - operands[0] = cond; - operands[1] = first; - operands[2] = second; - if (!(replacement = hlsl_new_expr(ctx, HLSL_OP3_MOVC, operands, first->data_type, &instr->loc))) - return false; - } + memset(operands, 0, sizeof(operands)); + operands[0] = neg; + operands[1] = second; + operands[2] = first; + if (!(replacement = hlsl_new_expr(ctx, HLSL_OP3_CMP, operands, first->data_type, &instr->loc))) + return false; hlsl_block_add_instr(block, replacement); return true; @@ -3319,11 +3295,21 @@ static bool lower_casts_to_bool(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr struct hlsl_ir_node *hlsl_add_conditional(struct hlsl_ctx *ctx, struct hlsl_block *instrs, struct hlsl_ir_node *condition, struct hlsl_ir_node *if_true, struct hlsl_ir_node *if_false) { + struct hlsl_type *cond_type = condition->data_type; struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS]; struct hlsl_ir_node *cond; assert(hlsl_types_are_equal(if_true->data_type, if_false->data_type)); + if (cond_type->base_type != HLSL_TYPE_BOOL) + { + cond_type = hlsl_get_numeric_type(ctx, cond_type->class, HLSL_TYPE_BOOL, cond_type->dimx, cond_type->dimy); + + if (!(condition = hlsl_new_cast(ctx, condition, cond_type, &condition->loc))) + return NULL; + hlsl_block_add_instr(instrs, condition); + } + operands[0] = condition; operands[1] = if_true; operands[2] = if_false; @@ -3760,6 +3746,9 @@ static bool dce(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) case HLSL_IR_RESOURCE_STORE: case HLSL_IR_SWITCH: break; + case HLSL_IR_STATEBLOCK_CONSTANT: + /* Stateblock constants should not appear in the shader program. */ + vkd3d_unreachable(); } return false; @@ -3848,6 +3837,22 @@ static void allocate_register_reservations(struct hlsl_ctx *ctx) } } +static void deref_mark_last_read(struct hlsl_deref *deref, unsigned int last_read) +{ + unsigned int i; + + if (hlsl_deref_is_lowered(deref)) + { + if (deref->rel_offset.node) + deref->rel_offset.node->last_read = last_read; + } + else + { + for (i = 0; i < deref->path_len; ++i) + deref->path[i].node->last_read = last_read; + } +} + /* Compute the earliest and latest liveness for each variable. In the case that * a variable is accessed inside of a loop, we promote its liveness to extend * to at least the range of the entire loop. We also do this for nodes, so that @@ -3867,6 +3872,9 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop case HLSL_IR_CALL: /* We should have inlined all calls before computing liveness. */ vkd3d_unreachable(); + case HLSL_IR_STATEBLOCK_CONSTANT: + /* Stateblock constants should not appear in the shader program. */ + vkd3d_unreachable(); case HLSL_IR_STORE: { @@ -3876,8 +3884,7 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop if (!var->first_write) var->first_write = loop_first ? min(instr->index, loop_first) : instr->index; store->rhs.node->last_read = last_read; - if (store->lhs.rel_offset.node) - store->lhs.rel_offset.node->last_read = last_read; + deref_mark_last_read(&store->lhs, last_read); break; } case HLSL_IR_EXPR: @@ -3904,8 +3911,7 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop var = load->src.var; var->last_read = max(var->last_read, last_read); - if (load->src.rel_offset.node) - load->src.rel_offset.node->last_read = last_read; + deref_mark_last_read(&load->src, last_read); break; } case HLSL_IR_LOOP: @@ -3922,14 +3928,12 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop var = load->resource.var; var->last_read = max(var->last_read, last_read); - if (load->resource.rel_offset.node) - load->resource.rel_offset.node->last_read = last_read; + deref_mark_last_read(&load->resource, last_read); if ((var = load->sampler.var)) { var->last_read = max(var->last_read, last_read); - if (load->sampler.rel_offset.node) - load->sampler.rel_offset.node->last_read = last_read; + deref_mark_last_read(&load->sampler, last_read); } if (load->coords.node) @@ -3954,8 +3958,7 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop var = store->resource.var; var->last_read = max(var->last_read, last_read); - if (store->resource.rel_offset.node) - store->resource.rel_offset.node->last_read = last_read; + deref_mark_last_read(&store->resource, last_read); store->coords.node->last_read = last_read; store->value.node->last_read = last_read; break; @@ -4790,7 +4793,9 @@ static void validate_buffer_offsets(struct hlsl_ctx *ctx) continue; if (var1->reg_reservation.offset_type - || (var1->data_type->class == HLSL_CLASS_OBJECT && var1->reg_reservation.reg_type)) + || var1->reg_reservation.reg_type == 's' + || var1->reg_reservation.reg_type == 't' + || var1->reg_reservation.reg_type == 'u') buffer->manually_packed_elements = true; else buffer->automatically_packed_elements = true; @@ -5400,11 +5405,11 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry hlsl_transform_ir(ctx, split_matrix_copies, body, NULL); lower_ir(ctx, lower_narrowing_casts, body); - lower_ir(ctx, lower_casts_to_bool, body); lower_ir(ctx, lower_int_dot, body); lower_ir(ctx, lower_int_division, body); lower_ir(ctx, lower_int_modulus, body); lower_ir(ctx, lower_int_abs, body); + lower_ir(ctx, lower_casts_to_bool, body); lower_ir(ctx, lower_float_modulus, body); hlsl_transform_ir(ctx, fold_redundant_casts, body, NULL); do @@ -5420,13 +5425,6 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry remove_unreachable_code(ctx, body); hlsl_transform_ir(ctx, normalize_switch_cases, body, NULL); - if (profile-> major_version < 4) - { - lower_ir(ctx, lower_nonfloat_exprs, body); - /* Constants casted to float must be folded. */ - hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, body, NULL); - } - lower_ir(ctx, lower_nonconstant_vector_derefs, body); lower_ir(ctx, lower_casts_to_bool, body); lower_ir(ctx, lower_int_dot, body); @@ -5438,9 +5436,15 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry hlsl_transform_ir(ctx, track_object_components_usage, body, NULL); sort_synthetic_separated_samplers_first(ctx); - lower_ir(ctx, lower_ternary, body); if (profile->major_version < 4) { + lower_ir(ctx, lower_ternary, body); + + lower_ir(ctx, lower_nonfloat_exprs, body); + /* Constants casted to float must be folded, and new casts to bool also need to be lowered. */ + hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, body, NULL); + lower_ir(ctx, lower_casts_to_bool, body); + lower_ir(ctx, lower_casts_to_int, body); lower_ir(ctx, lower_division, body); lower_ir(ctx, lower_sqrt, body); @@ -5463,6 +5467,10 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry lower_ir(ctx, validate_nonconstant_vector_store_derefs, body); + do + compute_liveness(ctx, entry_func); + while (hlsl_transform_ir(ctx, dce, body, NULL)); + /* TODO: move forward, remove when no longer needed */ transform_derefs(ctx, replace_deref_path_with_offset, body); while (hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, body, NULL)); diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c index b76b1fce507..4cea98e9286 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c @@ -1177,30 +1177,11 @@ static bool fold_ternary(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, assert(dst_type->base_type == src2->node.data_type->base_type); assert(dst_type->base_type == src3->node.data_type->base_type); + assert(src1->node.data_type->base_type == HLSL_TYPE_BOOL); for (k = 0; k < dst_type->dimx; ++k) - { - switch (src1->node.data_type->base_type) - { - case HLSL_TYPE_FLOAT: - case HLSL_TYPE_HALF: - dst->u[k] = src1->value.u[k].f != 0.0f ? src2->value.u[k] : src3->value.u[k]; - break; - - case HLSL_TYPE_DOUBLE: - dst->u[k] = src1->value.u[k].d != 0.0 ? src2->value.u[k] : src3->value.u[k]; - break; + dst->u[k] = src1->value.u[k].u ? src2->value.u[k] : src3->value.u[k]; - case HLSL_TYPE_INT: - case HLSL_TYPE_UINT: - case HLSL_TYPE_BOOL: - dst->u[k] = src1->value.u[k].u ? src2->value.u[k] : src3->value.u[k]; - break; - - default: - vkd3d_unreachable(); - } - } return true; } diff --git a/libs/vkd3d/libs/vkd3d-shader/ir.c b/libs/vkd3d/libs/vkd3d-shader/ir.c index 8af537390f9..610d907d981 100644 --- a/libs/vkd3d/libs/vkd3d-shader/ir.c +++ b/libs/vkd3d/libs/vkd3d-shader/ir.c @@ -17,6 +17,7 @@ */ #include "vkd3d_shader_private.h" +#include "vkd3d_types.h" bool vsir_program_init(struct vsir_program *program, const struct vkd3d_shader_version *version, unsigned int reserve) { @@ -56,19 +57,6 @@ static void vkd3d_shader_instruction_make_nop(struct vkd3d_shader_instruction *i vsir_instruction_init(ins, &location, VKD3DSIH_NOP); } -static void remove_dcl_temps(struct vsir_program *program) -{ - unsigned int i; - - for (i = 0; i < program->instructions.count; ++i) - { - struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; - - if (ins->handler_idx == VKD3DSIH_DCL_TEMPS) - vkd3d_shader_instruction_make_nop(ins); - } -} - static bool vsir_instruction_init_with_params(struct vsir_program *program, struct vkd3d_shader_instruction *ins, const struct vkd3d_shader_location *location, enum vkd3d_shader_opcode handler_idx, unsigned int dst_count, unsigned int src_count) @@ -94,85 +82,163 @@ static bool vsir_instruction_init_with_params(struct vsir_program *program, return true; } -static enum vkd3d_result vsir_program_lower_texkills(struct vsir_program *program) +static enum vkd3d_result vsir_program_lower_texkill(struct vsir_program *program, + struct vkd3d_shader_instruction *texkill, unsigned int *tmp_idx) { + const unsigned int components_read = 3 + (program->shader_version.major >= 2); struct vkd3d_shader_instruction_array *instructions = &program->instructions; - struct vkd3d_shader_instruction *texkill_ins, *ins; - unsigned int components_read = 3 + (program->shader_version.major >= 2); - unsigned int tmp_idx = ~0u; - unsigned int i, k; - - for (i = 0; i < instructions->count; ++i) - { - texkill_ins = &instructions->elements[i]; + size_t pos = texkill - instructions->elements; + struct vkd3d_shader_instruction *ins; + unsigned int j; - if (texkill_ins->handler_idx != VKD3DSIH_TEXKILL) - continue; + if (!shader_instruction_array_insert_at(instructions, pos + 1, components_read + 1)) + return VKD3D_ERROR_OUT_OF_MEMORY; - if (!shader_instruction_array_insert_at(instructions, i + 1, components_read + 1)) - return VKD3D_ERROR_OUT_OF_MEMORY; + if (*tmp_idx == ~0u) + *tmp_idx = program->temp_count++; - if (tmp_idx == ~0u) - tmp_idx = program->temp_count++; + /* tmp = ins->dst[0] < 0 */ - /* tmp = ins->dst[0] < 0 */ + ins = &instructions->elements[pos + 1]; + if (!vsir_instruction_init_with_params(program, ins, &texkill->location, VKD3DSIH_LTO, 1, 2)) + return VKD3D_ERROR_OUT_OF_MEMORY; - ins = &instructions->elements[i + 1]; - if (!vsir_instruction_init_with_params(program, ins, &texkill_ins->location, VKD3DSIH_LTO, 1, 2)) + vsir_register_init(&ins->dst[0].reg, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1); + ins->dst[0].reg.dimension = VSIR_DIMENSION_VEC4; + ins->dst[0].reg.idx[0].offset = *tmp_idx; + ins->dst[0].write_mask = VKD3DSP_WRITEMASK_ALL; + + ins->src[0].reg = texkill->dst[0].reg; + ins->src[0].swizzle = VKD3D_SHADER_NO_SWIZZLE; + vsir_register_init(&ins->src[1].reg, VKD3DSPR_IMMCONST, VKD3D_DATA_FLOAT, 0); + ins->src[1].reg.dimension = VSIR_DIMENSION_VEC4; + ins->src[1].reg.u.immconst_f32[0] = 0.0f; + ins->src[1].reg.u.immconst_f32[1] = 0.0f; + ins->src[1].reg.u.immconst_f32[2] = 0.0f; + ins->src[1].reg.u.immconst_f32[3] = 0.0f; + + /* tmp.x = tmp.x || tmp.y */ + /* tmp.x = tmp.x || tmp.z */ + /* tmp.x = tmp.x || tmp.w, if sm >= 2.0 */ + + for (j = 1; j < components_read; ++j) + { + ins = &instructions->elements[pos + 1 + j]; + if (!(vsir_instruction_init_with_params(program, ins, &texkill->location, VKD3DSIH_OR, 1, 2))) return VKD3D_ERROR_OUT_OF_MEMORY; vsir_register_init(&ins->dst[0].reg, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1); ins->dst[0].reg.dimension = VSIR_DIMENSION_VEC4; - ins->dst[0].reg.idx[0].offset = tmp_idx; - ins->dst[0].write_mask = VKD3DSP_WRITEMASK_ALL; + ins->dst[0].reg.idx[0].offset = *tmp_idx; + ins->dst[0].write_mask = VKD3DSP_WRITEMASK_0; - ins->src[0].reg = texkill_ins->dst[0].reg; - vsir_register_init(&ins->src[1].reg, VKD3DSPR_IMMCONST, VKD3D_DATA_FLOAT, 0); + vsir_register_init(&ins->src[0].reg, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1); + ins->src[0].reg.dimension = VSIR_DIMENSION_VEC4; + ins->src[0].reg.idx[0].offset = *tmp_idx; + ins->src[0].swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X); + vsir_register_init(&ins->src[1].reg, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1); ins->src[1].reg.dimension = VSIR_DIMENSION_VEC4; - ins->src[1].reg.u.immconst_f32[0] = 0.0f; - ins->src[1].reg.u.immconst_f32[1] = 0.0f; - ins->src[1].reg.u.immconst_f32[2] = 0.0f; - ins->src[1].reg.u.immconst_f32[3] = 0.0f; + ins->src[1].reg.idx[0].offset = *tmp_idx; + ins->src[1].swizzle = vkd3d_shader_create_swizzle(j, j, j, j); + } - /* tmp.x = tmp.x || tmp.y */ - /* tmp.x = tmp.x || tmp.z */ - /* tmp.x = tmp.x || tmp.w, if sm >= 2.0 */ + /* discard_nz tmp.x */ - for (k = 1; k < components_read; ++k) - { - ins = &instructions->elements[i + 1 + k]; - if (!(vsir_instruction_init_with_params(program, ins, &texkill_ins->location, VKD3DSIH_OR, 1, 2))) - return VKD3D_ERROR_OUT_OF_MEMORY; + ins = &instructions->elements[pos + 1 + components_read]; + if (!(vsir_instruction_init_with_params(program, ins, &texkill->location, VKD3DSIH_DISCARD, 0, 1))) + return VKD3D_ERROR_OUT_OF_MEMORY; + ins->flags = VKD3D_SHADER_CONDITIONAL_OP_NZ; - vsir_register_init(&ins->dst[0].reg, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1); - ins->dst[0].reg.dimension = VSIR_DIMENSION_VEC4; - ins->dst[0].reg.idx[0].offset = tmp_idx; - ins->dst[0].write_mask = VKD3DSP_WRITEMASK_0; - - vsir_register_init(&ins->src[0].reg, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1); - ins->src[0].reg.dimension = VSIR_DIMENSION_VEC4; - ins->src[0].reg.idx[0].offset = tmp_idx; - ins->src[0].swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X); - vsir_register_init(&ins->src[1].reg, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1); - ins->src[1].reg.dimension = VSIR_DIMENSION_VEC4; - ins->src[1].reg.idx[0].offset = tmp_idx; - ins->src[1].swizzle = vkd3d_shader_create_swizzle(k, k, k, k); - } + vsir_register_init(&ins->src[0].reg, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1); + ins->src[0].reg.dimension = VSIR_DIMENSION_VEC4; + ins->src[0].reg.idx[0].offset = *tmp_idx; + ins->src[0].swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X); - /* discard_nz tmp.x */ + /* Make the original instruction no-op */ + vkd3d_shader_instruction_make_nop(texkill); - ins = &instructions->elements[i + 1 + components_read]; - if (!(vsir_instruction_init_with_params(program, ins, &texkill_ins->location, VKD3DSIH_DISCARD, 0, 1))) - return VKD3D_ERROR_OUT_OF_MEMORY; - ins->flags = VKD3D_SHADER_CONDITIONAL_OP_NZ; + return VKD3D_OK; +} - vsir_register_init(&ins->src[0].reg, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1); - ins->src[0].reg.dimension = VSIR_DIMENSION_VEC4; - ins->src[0].reg.idx[0].offset = tmp_idx; - ins->src[0].swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X); +/* The Shader Model 5 Assembly documentation states: "If components of a mad + * instruction are tagged as precise, the hardware must execute a mad instruction + * or the exact equivalent, and it cannot split it into a multiply followed by an add." + * But DXIL.rst states the opposite: "Floating point multiply & add. This operation is + * not fused for "precise" operations." + * Windows drivers seem to conform with the latter, for SM 4-5 and SM 6. */ +static enum vkd3d_result vsir_program_lower_precise_mad(struct vsir_program *program, + struct vkd3d_shader_instruction *mad, unsigned int *tmp_idx) +{ + struct vkd3d_shader_instruction_array *instructions = &program->instructions; + struct vkd3d_shader_instruction *mul_ins, *add_ins; + size_t pos = mad - instructions->elements; + struct vkd3d_shader_dst_param *mul_dst; + + if (!(mad->flags & VKD3DSI_PRECISE_XYZW)) + return VKD3D_OK; + + if (!shader_instruction_array_insert_at(instructions, pos + 1, 1)) + return VKD3D_ERROR_OUT_OF_MEMORY; + + if (*tmp_idx == ~0u) + *tmp_idx = program->temp_count++; + + mul_ins = &instructions->elements[pos]; + add_ins = &instructions->elements[pos + 1]; + + mul_ins->handler_idx = VKD3DSIH_MUL; + mul_ins->src_count = 2; + + if (!(vsir_instruction_init_with_params(program, add_ins, &mul_ins->location, VKD3DSIH_ADD, 1, 2))) + return VKD3D_ERROR_OUT_OF_MEMORY; + + add_ins->flags = mul_ins->flags & VKD3DSI_PRECISE_XYZW; + + mul_dst = mul_ins->dst; + *add_ins->dst = *mul_dst; + + mul_dst->modifiers = 0; + vsir_register_init(&mul_dst->reg, VKD3DSPR_TEMP, mul_ins->src[0].reg.data_type, 1); + mul_dst->reg.dimension = add_ins->dst->reg.dimension; + mul_dst->reg.idx[0].offset = *tmp_idx; + + add_ins->src[0].reg = mul_dst->reg; + add_ins->src[0].swizzle = vsir_swizzle_from_writemask(mul_dst->write_mask); + add_ins->src[0].modifiers = 0; + add_ins->src[1] = mul_ins->src[2]; + + return VKD3D_OK; +} + +static enum vkd3d_result vsir_program_lower_instructions(struct vsir_program *program) +{ + struct vkd3d_shader_instruction_array *instructions = &program->instructions; + unsigned int tmp_idx = ~0u, i; + enum vkd3d_result ret; + + for (i = 0; i < instructions->count; ++i) + { + struct vkd3d_shader_instruction *ins = &instructions->elements[i]; + + switch (ins->handler_idx) + { + case VKD3DSIH_TEXKILL: + if ((ret = vsir_program_lower_texkill(program, ins, &tmp_idx)) < 0) + return ret; + break; - /* Make the original instruction no-op */ - vkd3d_shader_instruction_make_nop(texkill_ins); + case VKD3DSIH_MAD: + if ((ret = vsir_program_lower_precise_mad(program, ins, &tmp_idx)) < 0) + return ret; + break; + + case VKD3DSIH_DCL_TEMPS: + vkd3d_shader_instruction_make_nop(ins); + break; + + default: + break; + } } return VKD3D_OK; @@ -2577,97 +2643,6 @@ static enum vkd3d_result lower_switch_to_if_ladder(struct vsir_program *program) } } - /* Second subpass: creating new blocks might have broken - * references in PHI instructions, so we use the block map to fix - * them. */ - current_label = 0; - for (i = 0; i < ins_count; ++i) - { - struct vkd3d_shader_instruction *ins = &instructions[i]; - struct vkd3d_shader_src_param *new_src; - unsigned int j, l, new_src_count = 0; - - switch (ins->handler_idx) - { - case VKD3DSIH_LABEL: - current_label = label_from_src_param(&ins->src[0]); - continue; - - case VKD3DSIH_PHI: - break; - - default: - continue; - } - - /* First count how many source parameters we need. */ - for (j = 0; j < ins->src_count; j += 2) - { - unsigned int source_label = label_from_src_param(&ins->src[j + 1]); - size_t k, match_count = 0; - - for (k = 0; k < map_count; ++k) - { - struct lower_switch_to_if_ladder_block_mapping *mapping = &block_map[k]; - - if (mapping->switch_label == source_label && mapping->target_label == current_label) - match_count += 1; - } - - new_src_count += (match_count != 0) ? 2 * match_count : 2; - } - - assert(new_src_count >= ins->src_count); - - /* Allocate more source parameters if needed. */ - if (new_src_count == ins->src_count) - { - new_src = ins->src; - } - else - { - if (!(new_src = vsir_program_get_src_params(program, new_src_count))) - { - ERR("Failed to allocate %u source parameters.\n", new_src_count); - goto fail; - } - } - - /* Then do the copy. */ - for (j = 0, l = 0; j < ins->src_count; j += 2) - { - unsigned int source_label = label_from_src_param(&ins->src[j + 1]); - size_t k, match_count = 0; - - for (k = 0; k < map_count; ++k) - { - struct lower_switch_to_if_ladder_block_mapping *mapping = &block_map[k]; - - if (mapping->switch_label == source_label && mapping->target_label == current_label) - { - match_count += 1; - - new_src[l] = ins->src[j]; - new_src[l + 1] = ins->src[j + 1]; - new_src[l + 1].reg.idx[0].offset = mapping->if_label; - l += 2; - } - } - - if (match_count == 0) - { - new_src[l] = ins->src[j]; - new_src[l + 1] = ins->src[j + 1]; - l += 2; - } - } - - assert(l == new_src_count); - - ins->src_count = new_src_count; - ins->src = new_src; - } - vkd3d_free(program->instructions.elements); vkd3d_free(block_map); program->instructions.elements = instructions; @@ -2685,148 +2660,139 @@ fail: return VKD3D_ERROR_OUT_OF_MEMORY; } -static void materialize_ssas_to_temps_process_src_param(struct vsir_program *program, - struct vkd3d_shader_src_param *src); +struct ssas_to_temps_alloc +{ + unsigned int *table; + unsigned int next_temp_idx; +}; -/* This is idempotent: it can be safely applied more than once on the - * same register. */ -static void materialize_ssas_to_temps_process_reg(struct vsir_program *program, struct vkd3d_shader_register *reg) +static bool ssas_to_temps_alloc_init(struct ssas_to_temps_alloc *alloc, unsigned int ssa_count, unsigned int temp_count) { - unsigned int i; + size_t i = ssa_count * sizeof(*alloc->table); - if (reg->type == VKD3DSPR_SSA) + if (!(alloc->table = vkd3d_malloc(i))) { - reg->type = VKD3DSPR_TEMP; - reg->idx[0].offset += program->temp_count; + ERR("Failed to allocate SSA table.\n"); + return false; } + memset(alloc->table, 0xff, i); - for (i = 0; i < reg->idx_count; ++i) - if (reg->idx[i].rel_addr) - materialize_ssas_to_temps_process_src_param(program, reg->idx[i].rel_addr); -} - -static void materialize_ssas_to_temps_process_dst_param(struct vsir_program *program, - struct vkd3d_shader_dst_param *dst) -{ - materialize_ssas_to_temps_process_reg(program, &dst->reg); -} - -static void materialize_ssas_to_temps_process_src_param(struct vsir_program *program, - struct vkd3d_shader_src_param *src) -{ - materialize_ssas_to_temps_process_reg(program, &src->reg); + alloc->next_temp_idx = temp_count; + return true; } -static const struct vkd3d_shader_src_param *materialize_ssas_to_temps_compute_source(struct vkd3d_shader_instruction *ins, - unsigned int label) +/* This is idempotent: it can be safely applied more than once on the + * same register. */ +static void materialize_ssas_to_temps_process_reg(struct vsir_program *program, struct ssas_to_temps_alloc *alloc, + struct vkd3d_shader_register *reg) { unsigned int i; - assert(ins->handler_idx == VKD3DSIH_PHI); - - for (i = 0; i < ins->src_count; i += 2) + if (reg->type == VKD3DSPR_SSA && alloc->table[reg->idx[0].offset] != UINT_MAX) { - if (label_from_src_param(&ins->src[i + 1]) == label) - return &ins->src[i]; + reg->type = VKD3DSPR_TEMP; + reg->idx[0].offset = alloc->table[reg->idx[0].offset]; } - vkd3d_unreachable(); + for (i = 0; i < reg->idx_count; ++i) + if (reg->idx[i].rel_addr) + materialize_ssas_to_temps_process_reg(program, alloc, ®->idx[i].rel_addr->reg); } -static bool materialize_ssas_to_temps_synthesize_mov(struct vsir_program *program, - struct vkd3d_shader_instruction *instruction, const struct vkd3d_shader_location *loc, - const struct vkd3d_shader_dst_param *dest, const struct vkd3d_shader_src_param *cond, - const struct vkd3d_shader_src_param *source, bool invert) +struct ssas_to_temps_block_info { - struct vkd3d_shader_src_param *src; - struct vkd3d_shader_dst_param *dst; - - if (!vsir_instruction_init_with_params(program, instruction, loc, - cond ? VKD3DSIH_MOVC : VKD3DSIH_MOV, 1, cond ? 3 : 1)) - return false; - - dst = instruction->dst; - src = instruction->src; - - dst[0] = *dest; - materialize_ssas_to_temps_process_dst_param(program, &dst[0]); + struct phi_incoming_to_temp + { + struct vkd3d_shader_src_param *src; + struct vkd3d_shader_dst_param *dst; + } *incomings; + size_t incoming_capacity; + size_t incoming_count; +}; - assert(dst[0].write_mask == VKD3DSP_WRITEMASK_0); - assert(dst[0].modifiers == 0); - assert(dst[0].shift == 0); +static void ssas_to_temps_block_info_cleanup(struct ssas_to_temps_block_info *block_info, + size_t count) +{ + size_t i; - if (cond) - { - src[0] = *cond; - src[1 + invert] = *source; - memset(&src[2 - invert], 0, sizeof(src[2 - invert])); - src[2 - invert].reg = dst[0].reg; - materialize_ssas_to_temps_process_src_param(program, &src[1]); - materialize_ssas_to_temps_process_src_param(program, &src[2]); - } - else - { - src[0] = *source; - materialize_ssas_to_temps_process_src_param(program, &src[0]); - } + for (i = 0; i < count; ++i) + vkd3d_free(block_info[i].incomings); - return true; + vkd3d_free(block_info); } -static enum vkd3d_result vsir_program_materialise_ssas_to_temps(struct vsir_program *program) +static enum vkd3d_result vsir_program_materialise_phi_ssas_to_temps(struct vsir_program *program) { + size_t ins_capacity = 0, ins_count = 0, phi_count, incoming_count, i; + struct ssas_to_temps_block_info *info, *block_info = NULL; struct vkd3d_shader_instruction *instructions = NULL; - struct materialize_ssas_to_temps_block_data - { - size_t phi_begin; - size_t phi_count; - } *block_index = NULL; - size_t ins_capacity = 0, ins_count = 0, i; + struct ssas_to_temps_alloc alloc = {0}; unsigned int current_label = 0; - if (!reserve_instructions(&instructions, &ins_capacity, program->instructions.count)) - goto fail; - - if (!(block_index = vkd3d_calloc(program->block_count, sizeof(*block_index)))) + if (!(block_info = vkd3d_calloc(program->block_count, sizeof(*block_info)))) { - ERR("Failed to allocate block index.\n"); + ERR("Failed to allocate block info array.\n"); goto fail; } - for (i = 0; i < program->instructions.count; ++i) + if (!ssas_to_temps_alloc_init(&alloc, program->ssa_count, program->temp_count)) + goto fail; + + for (i = 0, phi_count = 0, incoming_count = 0; i < program->instructions.count; ++i) { struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; + unsigned int j, temp_idx; - switch (ins->handler_idx) + /* Only phi src/dst SSA values need be converted here. Structurisation may + * introduce new cases of undominated SSA use, which will be handled later. */ + if (ins->handler_idx != VKD3DSIH_PHI) + continue; + ++phi_count; + + temp_idx = alloc.next_temp_idx++; + + for (j = 0; j < ins->src_count; j += 2) { - case VKD3DSIH_LABEL: - current_label = label_from_src_param(&ins->src[0]); - break; + struct phi_incoming_to_temp *incoming; + unsigned int label; - case VKD3DSIH_PHI: - assert(current_label != 0); - assert(i != 0); - if (block_index[current_label - 1].phi_begin == 0) - block_index[current_label - 1].phi_begin = i; - block_index[current_label - 1].phi_count += 1; - break; + label = label_from_src_param(&ins->src[j + 1]); + assert(label); - default: - current_label = 0; - break; + info = &block_info[label - 1]; + + if (!(vkd3d_array_reserve((void **)&info->incomings, &info->incoming_capacity, info->incoming_count + 1, + sizeof(*info->incomings)))) + goto fail; + + incoming = &info->incomings[info->incoming_count++]; + incoming->src = &ins->src[j]; + incoming->dst = ins->dst; + + alloc.table[ins->dst->reg.idx[0].offset] = temp_idx; + + ++incoming_count; } + + materialize_ssas_to_temps_process_reg(program, &alloc, &ins->dst->reg); } + if (!phi_count) + goto done; + + if (!reserve_instructions(&instructions, &ins_capacity, program->instructions.count + incoming_count - phi_count)) + goto fail; + for (i = 0; i < program->instructions.count; ++i) { - struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; + struct vkd3d_shader_instruction *mov_ins, *ins = &program->instructions.elements[i]; size_t j; for (j = 0; j < ins->dst_count; ++j) - materialize_ssas_to_temps_process_dst_param(program, &ins->dst[j]); + materialize_ssas_to_temps_process_reg(program, &alloc, &ins->dst[j].reg); for (j = 0; j < ins->src_count; ++j) - materialize_ssas_to_temps_process_src_param(program, &ins->src[j]); + materialize_ssas_to_temps_process_reg(program, &alloc, &ins->src[j].reg); switch (ins->handler_idx) { @@ -2835,65 +2801,21 @@ static enum vkd3d_result vsir_program_materialise_ssas_to_temps(struct vsir_prog break; case VKD3DSIH_BRANCH: - { - if (vsir_register_is_label(&ins->src[0].reg)) - { - const struct materialize_ssas_to_temps_block_data *data = &block_index[label_from_src_param(&ins->src[0]) - 1]; - - if (!reserve_instructions(&instructions, &ins_capacity, ins_count + data->phi_count)) - goto fail; - - for (j = data->phi_begin; j < data->phi_begin + data->phi_count; ++j) - { - const struct vkd3d_shader_src_param *source; - - source = materialize_ssas_to_temps_compute_source(&program->instructions.elements[j], - current_label); - if (!materialize_ssas_to_temps_synthesize_mov(program, &instructions[ins_count], - &ins->location, &program->instructions.elements[j].dst[0], NULL, source, false)) - goto fail; + case VKD3DSIH_SWITCH_MONOLITHIC: + info = &block_info[current_label - 1]; - ++ins_count; - } - } - else + for (j = 0; j < info->incoming_count; ++j) { - struct materialize_ssas_to_temps_block_data *data_true = &block_index[label_from_src_param(&ins->src[1]) - 1], - *data_false = &block_index[label_from_src_param(&ins->src[2]) - 1]; - const struct vkd3d_shader_src_param *cond = &ins->src[0]; + struct phi_incoming_to_temp *incoming = &info->incomings[j]; - if (!reserve_instructions(&instructions, &ins_capacity, - ins_count + data_true->phi_count + data_false->phi_count)) + mov_ins = &instructions[ins_count++]; + if (!vsir_instruction_init_with_params(program, mov_ins, &ins->location, VKD3DSIH_MOV, 1, 0)) goto fail; - - for (j = data_true->phi_begin; j < data_true->phi_begin + data_true->phi_count; ++j) - { - const struct vkd3d_shader_src_param *source; - - source = materialize_ssas_to_temps_compute_source(&program->instructions.elements[j], - current_label); - if (!materialize_ssas_to_temps_synthesize_mov(program, &instructions[ins_count], - &ins->location, &program->instructions.elements[j].dst[0], cond, source, false)) - goto fail; - - ++ins_count; - } - - for (j = data_false->phi_begin; j < data_false->phi_begin + data_false->phi_count; ++j) - { - const struct vkd3d_shader_src_param *source; - - source = materialize_ssas_to_temps_compute_source(&program->instructions.elements[j], - current_label); - if (!materialize_ssas_to_temps_synthesize_mov(program, &instructions[ins_count], - &ins->location, &program->instructions.elements[j].dst[0], cond, source, true)) - goto fail; - - ++ins_count; - } + *mov_ins->dst = *incoming->dst; + mov_ins->src = incoming->src; + mov_ins->src_count = 1; } break; - } case VKD3DSIH_PHI: continue; @@ -2902,25 +2824,24 @@ static enum vkd3d_result vsir_program_materialise_ssas_to_temps(struct vsir_prog break; } - if (!reserve_instructions(&instructions, &ins_capacity, ins_count + 1)) - goto fail; - instructions[ins_count++] = *ins; } vkd3d_free(program->instructions.elements); - vkd3d_free(block_index); program->instructions.elements = instructions; program->instructions.capacity = ins_capacity; program->instructions.count = ins_count; - program->temp_count += program->ssa_count; - program->ssa_count = 0; + program->temp_count = alloc.next_temp_idx; +done: + ssas_to_temps_block_info_cleanup(block_info, program->block_count); + vkd3d_free(alloc.table); return VKD3D_OK; fail: vkd3d_free(instructions); - vkd3d_free(block_index); + ssas_to_temps_block_info_cleanup(block_info, program->block_count); + vkd3d_free(alloc.table); return VKD3D_ERROR_OUT_OF_MEMORY; } @@ -3061,19 +2982,19 @@ struct vsir_cfg_structure union { struct vsir_block *block; - struct + struct vsir_cfg_structure_loop { struct vsir_cfg_structure_list body; unsigned idx; } loop; - struct + struct vsir_cfg_structure_selection { struct vkd3d_shader_src_param *condition; struct vsir_cfg_structure_list if_body; struct vsir_cfg_structure_list else_body; bool invert_condition; } selection; - struct + struct vsir_cfg_structure_jump { enum vsir_cfg_jump_type { @@ -3157,6 +3078,14 @@ static void vsir_cfg_structure_cleanup(struct vsir_cfg_structure *structure) } } +struct vsir_cfg_emit_target +{ + struct vkd3d_shader_instruction *instructions; + size_t ins_capacity, ins_count; + unsigned int jump_target_temp_idx; + unsigned int temp_count; +}; + struct vsir_cfg { struct vkd3d_shader_message_context *message_context; @@ -3200,15 +3129,15 @@ struct vsir_cfg * block), but we still try to keep `begin' as forward as * possible, to keep the loop scope as small as possible. */ bool synthetic; + /* The number of jump instructions (both conditional and + * unconditional) that target this loop. */ + unsigned int target_count; } *loop_intervals; size_t loop_interval_count, loop_interval_capacity; struct vsir_cfg_structure_list structured_program; - struct vkd3d_shader_instruction *instructions; - size_t ins_capacity, ins_count; - unsigned int jump_target_temp_idx; - unsigned int temp_count; + struct vsir_cfg_emit_target *target; }; static void vsir_cfg_cleanup(struct vsir_cfg *cfg) @@ -3248,6 +3177,7 @@ static enum vkd3d_result vsir_cfg_add_loop_interval(struct vsir_cfg *cfg, unsign interval->begin = begin; interval->end = end; interval->synthetic = synthetic; + interval->target_count = 0; return VKD3D_OK; } @@ -3402,7 +3332,7 @@ static void vsir_cfg_dump_structured_program(struct vsir_cfg *cfg) } static enum vkd3d_result vsir_cfg_init(struct vsir_cfg *cfg, struct vsir_program *program, - struct vkd3d_shader_message_context *message_context) + struct vkd3d_shader_message_context *message_context, struct vsir_cfg_emit_target *target) { struct vsir_block *current_block = NULL; enum vkd3d_result ret; @@ -3412,6 +3342,7 @@ static enum vkd3d_result vsir_cfg_init(struct vsir_cfg *cfg, struct vsir_program cfg->message_context = message_context; cfg->program = program; cfg->block_count = program->block_count; + cfg->target = target; vsir_block_list_init(&cfg->order); @@ -4250,53 +4181,157 @@ fail: return VKD3D_ERROR_OUT_OF_MEMORY; } -static void vsir_cfg_remove_trailing_continue(struct vsir_cfg_structure_list *list, unsigned int target) +static void vsir_cfg_remove_trailing_continue(struct vsir_cfg *cfg, + struct vsir_cfg_structure_list *list, unsigned int target) { struct vsir_cfg_structure *last = &list->structures[list->count - 1]; if (last->type == STRUCTURE_TYPE_JUMP && last->u.jump.type == JUMP_CONTINUE && !last->u.jump.condition && last->u.jump.target == target) + { --list->count; + assert(cfg->loop_intervals[target].target_count > 0); + --cfg->loop_intervals[target].target_count; + } } -static enum vkd3d_result vsir_cfg_synthesize_selections(struct vsir_cfg_structure_list *list) +static struct vsir_cfg_structure *vsir_cfg_get_trailing_break(struct vsir_cfg_structure_list *list) { - enum vkd3d_result ret; - size_t i; + struct vsir_cfg_structure *structure; + size_t count = list->count; - for (i = 0; i < list->count; ++i) - { - struct vsir_cfg_structure *structure = &list->structures[i], new_selection, *new_jump; + if (count == 0) + return NULL; - if (structure->type != STRUCTURE_TYPE_JUMP || !structure->u.jump.condition) - continue; + structure = &list->structures[count - 1]; - vsir_cfg_structure_init(&new_selection, STRUCTURE_TYPE_SELECTION); - new_selection.u.selection.condition = structure->u.jump.condition; - new_selection.u.selection.invert_condition = structure->u.jump.invert_condition; + if (structure->type != STRUCTURE_TYPE_JUMP || structure->u.jump.type != JUMP_BREAK + || structure->u.jump.condition) + return NULL; - if (!(new_jump = vsir_cfg_structure_list_append(&new_selection.u.selection.if_body, - STRUCTURE_TYPE_JUMP))) - return VKD3D_ERROR_OUT_OF_MEMORY; - new_jump->u.jump.type = structure->u.jump.type; - new_jump->u.jump.target = structure->u.jump.target; + return structure; +} - /* Move the rest of the structure list in the else branch - * rather than leaving it after the selection construct. The - * reason is that this is more conducive to further - * optimization, because all the conditional `break's appear - * as the last instruction of a branch of a cascade of - * selection constructs at the end of the structure list we're - * processing, instead of being buried in the middle of the - * structure list itself. */ - if ((ret = vsir_cfg_structure_list_append_from_region(&new_selection.u.selection.else_body, - &list->structures[i + 1], list->count - i - 1)) < 0) - return ret; +/* When the last instruction in both branches of a selection construct + * is an unconditional break, any of them can be moved after the + * selection construct. If they break the same loop both of them can + * be moved out, otherwise we can choose which one: we choose the one + * that breaks the innermost loop, because we hope to eventually + * remove the loop itself. + * + * In principle a similar movement could be done when the last + * instructions are continue and continue, or continue and break. But + * in practice I don't think those situations can happen given the + * previous passes we do on the program, so we don't care. */ +static enum vkd3d_result vsir_cfg_move_breaks_out_of_selections(struct vsir_cfg *cfg, + struct vsir_cfg_structure_list *list) +{ + struct vsir_cfg_structure *selection, *if_break, *else_break, *new_break; + unsigned int if_target, else_target, max_target; + size_t pos = list->count - 1; + + selection = &list->structures[pos]; + assert(selection->type == STRUCTURE_TYPE_SELECTION); + + if_break = vsir_cfg_get_trailing_break(&selection->u.selection.if_body); + else_break = vsir_cfg_get_trailing_break(&selection->u.selection.else_body); + + if (!if_break || !else_break) + return VKD3D_OK; + + if_target = if_break->u.jump.target; + else_target = else_break->u.jump.target; + max_target = max(if_target, else_target); + + if (!(new_break = vsir_cfg_structure_list_append(list, STRUCTURE_TYPE_JUMP))) + return VKD3D_ERROR_OUT_OF_MEMORY; + new_break->u.jump.type = JUMP_BREAK; + new_break->u.jump.target = max_target; + ++cfg->loop_intervals[max_target].target_count; + + /* Pointer `selection' could have been invalidated by the append + * operation. */ + selection = &list->structures[pos]; + assert(selection->type == STRUCTURE_TYPE_SELECTION); + + if (if_target == max_target) + { + --selection->u.selection.if_body.count; + assert(cfg->loop_intervals[if_target].target_count > 0); + --cfg->loop_intervals[if_target].target_count; + } + + if (else_target == max_target) + { + --selection->u.selection.else_body.count; + assert(cfg->loop_intervals[else_target].target_count > 0); + --cfg->loop_intervals[else_target].target_count; + } + + return VKD3D_OK; +} + +static enum vkd3d_result vsir_cfg_move_breaks_out_of_selections_recursively(struct vsir_cfg *cfg, + struct vsir_cfg_structure_list *list) +{ + struct vsir_cfg_structure *trailing; + + if (list->count == 0) + return VKD3D_OK; + + trailing = &list->structures[list->count - 1]; + + if (trailing->type != STRUCTURE_TYPE_SELECTION) + return VKD3D_OK; + + vsir_cfg_move_breaks_out_of_selections_recursively(cfg, &trailing->u.selection.if_body); + vsir_cfg_move_breaks_out_of_selections_recursively(cfg, &trailing->u.selection.else_body); + + return vsir_cfg_move_breaks_out_of_selections(cfg, list); +} + +static enum vkd3d_result vsir_cfg_synthesize_selections(struct vsir_cfg *cfg, + struct vsir_cfg_structure_list *list) +{ + enum vkd3d_result ret; + size_t i; + + for (i = 0; i < list->count; ++i) + { + struct vsir_cfg_structure *structure = &list->structures[i], new_selection, *new_jump; + + if (structure->type != STRUCTURE_TYPE_JUMP || !structure->u.jump.condition) + continue; + + vsir_cfg_structure_init(&new_selection, STRUCTURE_TYPE_SELECTION); + new_selection.u.selection.condition = structure->u.jump.condition; + new_selection.u.selection.invert_condition = structure->u.jump.invert_condition; + + if (!(new_jump = vsir_cfg_structure_list_append(&new_selection.u.selection.if_body, + STRUCTURE_TYPE_JUMP))) + return VKD3D_ERROR_OUT_OF_MEMORY; + new_jump->u.jump.type = structure->u.jump.type; + new_jump->u.jump.target = structure->u.jump.target; + + /* Move the rest of the structure list in the else branch + * rather than leaving it after the selection construct. The + * reason is that this is more conducive to further + * optimization, because all the conditional `break's appear + * as the last instruction of a branch of a cascade of + * selection constructs at the end of the structure list we're + * processing, instead of being buried in the middle of the + * structure list itself. */ + if ((ret = vsir_cfg_structure_list_append_from_region(&new_selection.u.selection.else_body, + &list->structures[i + 1], list->count - i - 1)) < 0) + return ret; *structure = new_selection; list->count = i + 1; - if ((ret = vsir_cfg_synthesize_selections(&structure->u.selection.else_body)) < 0) + if ((ret = vsir_cfg_synthesize_selections(cfg, &structure->u.selection.else_body)) < 0) + return ret; + + if ((ret = vsir_cfg_move_breaks_out_of_selections(cfg, list)) < 0) return ret; break; @@ -4305,40 +4340,164 @@ static enum vkd3d_result vsir_cfg_synthesize_selections(struct vsir_cfg_structur return VKD3D_OK; } +static enum vkd3d_result vsir_cfg_append_loop(struct vsir_cfg *cfg, + struct vsir_cfg_structure_list *new_list, struct vsir_cfg_structure *loop) +{ + struct vsir_cfg_structure_list *loop_body = &loop->u.loop.body; + unsigned int target, loop_idx = loop->u.loop.idx; + struct vsir_cfg_structure *trailing_break; + enum vkd3d_result ret; + + trailing_break = vsir_cfg_get_trailing_break(loop_body); + + /* If the loop's last instruction is not a break, we cannot remove + * the loop itself. */ + if (!trailing_break) + { + if ((ret = vsir_cfg_structure_list_append_from_region(new_list, loop, 1)) < 0) + return ret; + memset(loop, 0, sizeof(*loop)); + return VKD3D_OK; + } + + target = trailing_break->u.jump.target; + assert(cfg->loop_intervals[target].target_count > 0); + + /* If the loop is not targeted by any jump, we can remove it. The + * trailing `break' then targets another loop, so we have to keep + * it. */ + if (cfg->loop_intervals[loop_idx].target_count == 0) + { + if ((ret = vsir_cfg_structure_list_append_from_region(new_list, + &loop_body->structures[0], loop_body->count)) < 0) + return ret; + loop_body->count = 0; + return VKD3D_OK; + } + + /* If the loop is targeted only by its own trailing `break' + * instruction, then we can remove it together with the `break' + * itself. */ + if (target == loop_idx && cfg->loop_intervals[loop_idx].target_count == 1) + { + --cfg->loop_intervals[loop_idx].target_count; + if ((ret = vsir_cfg_structure_list_append_from_region(new_list, + &loop_body->structures[0], loop_body->count - 1)) < 0) + return ret; + loop_body->count = 0; + return VKD3D_OK; + } + + if ((ret = vsir_cfg_structure_list_append_from_region(new_list, loop, 1)) < 0) + return ret; + memset(loop, 0, sizeof(*loop)); + + return VKD3D_OK; +} + static enum vkd3d_result vsir_cfg_optimize_recurse(struct vsir_cfg *cfg, struct vsir_cfg_structure_list *list) { + struct vsir_cfg_structure_list old_list = *list, *new_list = list; enum vkd3d_result ret; size_t i; - for (i = 0; i < list->count; ++i) + memset(new_list, 0, sizeof(*new_list)); + + for (i = 0; i < old_list.count; ++i) { - struct vsir_cfg_structure *loop = &list->structures[i]; + struct vsir_cfg_structure *loop = &old_list.structures[i], *selection; struct vsir_cfg_structure_list *loop_body; if (loop->type != STRUCTURE_TYPE_LOOP) + { + if ((ret = vsir_cfg_structure_list_append_from_region(new_list, loop, 1)) < 0) + goto out; + memset(loop, 0, sizeof(*loop)); continue; + } loop_body = &loop->u.loop.body; if (loop_body->count == 0) + { + if ((ret = vsir_cfg_structure_list_append_from_region(new_list, loop, 1)) < 0) + goto out; + memset(loop, 0, sizeof(*loop)); continue; + } - vsir_cfg_remove_trailing_continue(loop_body, loop->u.loop.idx); + vsir_cfg_remove_trailing_continue(cfg, loop_body, loop->u.loop.idx); if ((ret = vsir_cfg_optimize_recurse(cfg, loop_body)) < 0) - return ret; + goto out; - if ((ret = vsir_cfg_synthesize_selections(loop_body)) < 0) - return ret; + if ((ret = vsir_cfg_synthesize_selections(cfg, loop_body)) < 0) + goto out; + + if ((ret = vsir_cfg_append_loop(cfg, new_list, loop)) < 0) + goto out; + + /* If the last pushed instruction is a selection and one of the branches terminates with a + * `break', start pushing to the other branch, in the hope of eventually push a `break' + * there too and be able to remove a loop. */ + if (new_list->count == 0) + continue; + + selection = &new_list->structures[new_list->count - 1]; + + if (selection->type == STRUCTURE_TYPE_SELECTION) + { + if (vsir_cfg_get_trailing_break(&selection->u.selection.if_body)) + new_list = &selection->u.selection.else_body; + else if (vsir_cfg_get_trailing_break(&selection->u.selection.else_body)) + new_list = &selection->u.selection.if_body; + } } - return VKD3D_OK; + ret = vsir_cfg_move_breaks_out_of_selections_recursively(cfg, list); + +out: + vsir_cfg_structure_list_cleanup(&old_list); + + return ret; +} + +static void vsir_cfg_count_targets(struct vsir_cfg *cfg, struct vsir_cfg_structure_list *list) +{ + size_t i; + + for (i = 0; i < list->count; ++i) + { + struct vsir_cfg_structure *structure = &list->structures[i]; + + switch (structure->type) + { + case STRUCTURE_TYPE_BLOCK: + break; + + case STRUCTURE_TYPE_LOOP: + vsir_cfg_count_targets(cfg, &structure->u.loop.body); + break; + + case STRUCTURE_TYPE_SELECTION: + vsir_cfg_count_targets(cfg, &structure->u.selection.if_body); + vsir_cfg_count_targets(cfg, &structure->u.selection.else_body); + break; + + case STRUCTURE_TYPE_JUMP: + if (structure->u.jump.type == JUMP_BREAK || structure->u.jump.type == JUMP_CONTINUE) + ++cfg->loop_intervals[structure->u.jump.target].target_count; + break; + } + } } static enum vkd3d_result vsir_cfg_optimize(struct vsir_cfg *cfg) { enum vkd3d_result ret; + vsir_cfg_count_targets(cfg, &cfg->structured_program); + ret = vsir_cfg_optimize_recurse(cfg, &cfg->structured_program); if (TRACE_ON()) @@ -4348,199 +4507,244 @@ static enum vkd3d_result vsir_cfg_optimize(struct vsir_cfg *cfg) } static enum vkd3d_result vsir_cfg_structure_list_emit(struct vsir_cfg *cfg, - struct vsir_cfg_structure_list *list, unsigned int loop_idx) + struct vsir_cfg_structure_list *list, unsigned int loop_idx); + +static enum vkd3d_result vsir_cfg_structure_list_emit_block(struct vsir_cfg *cfg, + struct vsir_block *block) +{ + struct vsir_cfg_emit_target *target = cfg->target; + + if (!reserve_instructions(&target->instructions, &target->ins_capacity, + target->ins_count + (block->end - block->begin))) + return VKD3D_ERROR_OUT_OF_MEMORY; + + memcpy(&target->instructions[target->ins_count], block->begin, + (char *)block->end - (char *)block->begin); + + target->ins_count += block->end - block->begin; + + return VKD3D_OK; +} + +static enum vkd3d_result vsir_cfg_structure_list_emit_loop(struct vsir_cfg *cfg, + struct vsir_cfg_structure_loop *loop, unsigned int loop_idx) { + struct vsir_cfg_emit_target *target = cfg->target; const struct vkd3d_shader_location no_loc = {0}; enum vkd3d_result ret; - size_t i; - for (i = 0; i < list->count; ++i) + if (!reserve_instructions(&target->instructions, &target->ins_capacity, target->ins_count + 1)) + return VKD3D_ERROR_OUT_OF_MEMORY; + + vsir_instruction_init(&target->instructions[target->ins_count++], &no_loc, VKD3DSIH_LOOP); + + if ((ret = vsir_cfg_structure_list_emit(cfg, &loop->body, loop->idx)) < 0) + return ret; + + if (!reserve_instructions(&target->instructions, &target->ins_capacity, target->ins_count + 5)) + return VKD3D_ERROR_OUT_OF_MEMORY; + + vsir_instruction_init(&target->instructions[target->ins_count++], &no_loc, VKD3DSIH_ENDLOOP); + + /* Add a trampoline to implement multilevel jumping depending on the stored + * jump_target value. */ + if (loop_idx != UINT_MAX) { - struct vsir_cfg_structure *structure = &list->structures[i]; + /* If the multilevel jump is a `continue' and the target is the loop we're inside + * right now, then we can finally do the `continue'. */ + const unsigned int outer_continue_target = loop_idx << 1 | 1; + /* If the multilevel jump is a `continue' to any other target, or if it is a `break' + * and the target is not the loop we just finished emitting, then it means that + * we have to reach an outer loop, so we keep breaking. */ + const unsigned int inner_break_target = loop->idx << 1; + + if (!vsir_instruction_init_with_params(cfg->program, &target->instructions[target->ins_count], + &no_loc, VKD3DSIH_IEQ, 1, 2)) + return VKD3D_ERROR_OUT_OF_MEMORY; - switch (structure->type) - { - case STRUCTURE_TYPE_BLOCK: - { - struct vsir_block *block = structure->u.block; + dst_param_init_temp_bool(&target->instructions[target->ins_count].dst[0], target->temp_count); + src_param_init_temp_uint(&target->instructions[target->ins_count].src[0], target->jump_target_temp_idx); + src_param_init_const_uint(&target->instructions[target->ins_count].src[1], outer_continue_target); - if (!reserve_instructions(&cfg->instructions, &cfg->ins_capacity, cfg->ins_count + (block->end - block->begin))) - return VKD3D_ERROR_OUT_OF_MEMORY; + ++target->ins_count; - memcpy(&cfg->instructions[cfg->ins_count], block->begin, (char *)block->end - (char *)block->begin); + if (!vsir_instruction_init_with_params(cfg->program, &target->instructions[target->ins_count], + &no_loc, VKD3DSIH_CONTINUEP, 0, 1)) + return VKD3D_ERROR_OUT_OF_MEMORY; - cfg->ins_count += block->end - block->begin; - break; - } + src_param_init_temp_bool(&target->instructions[target->ins_count].src[0], target->temp_count); - case STRUCTURE_TYPE_LOOP: - { - if (!reserve_instructions(&cfg->instructions, &cfg->ins_capacity, cfg->ins_count + 1)) - return VKD3D_ERROR_OUT_OF_MEMORY; + ++target->ins_count; + ++target->temp_count; - vsir_instruction_init(&cfg->instructions[cfg->ins_count++], &no_loc, VKD3DSIH_LOOP); + if (!vsir_instruction_init_with_params(cfg->program, &target->instructions[target->ins_count], + &no_loc, VKD3DSIH_IEQ, 1, 2)) + return VKD3D_ERROR_OUT_OF_MEMORY; - if ((ret = vsir_cfg_structure_list_emit(cfg, &structure->u.loop.body, structure->u.loop.idx)) < 0) - return ret; + dst_param_init_temp_bool(&target->instructions[target->ins_count].dst[0], target->temp_count); + src_param_init_temp_uint(&target->instructions[target->ins_count].src[0], target->jump_target_temp_idx); + src_param_init_const_uint(&target->instructions[target->ins_count].src[1], inner_break_target); - if (!reserve_instructions(&cfg->instructions, &cfg->ins_capacity, cfg->ins_count + 5)) - return VKD3D_ERROR_OUT_OF_MEMORY; + ++target->ins_count; - vsir_instruction_init(&cfg->instructions[cfg->ins_count++], &no_loc, VKD3DSIH_ENDLOOP); + if (!vsir_instruction_init_with_params(cfg->program, &target->instructions[target->ins_count], + &no_loc, VKD3DSIH_BREAKP, 0, 1)) + return VKD3D_ERROR_OUT_OF_MEMORY; + target->instructions[target->ins_count].flags |= VKD3D_SHADER_CONDITIONAL_OP_Z; - /* Add a trampoline to implement multilevel jumping depending on the stored - * jump_target value. */ - if (loop_idx != UINT_MAX) - { - /* If the multilevel jump is a `continue' and the target is the loop we're inside - * right now, then we can finally do the `continue'. */ - const unsigned int outer_continue_target = loop_idx << 1 | 1; - /* If the multilevel jump is a `continue' to any other target, or if it is a `break' - * and the target is not the loop we just finished emitting, then it means that - * we have to reach an outer loop, so we keep breaking. */ - const unsigned int inner_break_target = structure->u.loop.idx << 1; + src_param_init_temp_bool(&target->instructions[target->ins_count].src[0], target->temp_count); - if (!vsir_instruction_init_with_params(cfg->program, &cfg->instructions[cfg->ins_count], - &no_loc, VKD3DSIH_IEQ, 1, 2)) - return VKD3D_ERROR_OUT_OF_MEMORY; + ++target->ins_count; + ++target->temp_count; + } - dst_param_init_temp_bool(&cfg->instructions[cfg->ins_count].dst[0], cfg->temp_count); - src_param_init_temp_uint(&cfg->instructions[cfg->ins_count].src[0], cfg->jump_target_temp_idx); - src_param_init_const_uint(&cfg->instructions[cfg->ins_count].src[1], outer_continue_target); + return VKD3D_OK; +} - ++cfg->ins_count; +static enum vkd3d_result vsir_cfg_structure_list_emit_selection(struct vsir_cfg *cfg, + struct vsir_cfg_structure_selection *selection, unsigned int loop_idx) +{ + struct vsir_cfg_emit_target *target = cfg->target; + const struct vkd3d_shader_location no_loc = {0}; + enum vkd3d_result ret; - if (!vsir_instruction_init_with_params(cfg->program, &cfg->instructions[cfg->ins_count], - &no_loc, VKD3DSIH_CONTINUEP, 0, 1)) - return VKD3D_ERROR_OUT_OF_MEMORY; + if (!reserve_instructions(&target->instructions, &target->ins_capacity, target->ins_count + 1)) + return VKD3D_ERROR_OUT_OF_MEMORY; - src_param_init_temp_bool(&cfg->instructions[cfg->ins_count].src[0], cfg->temp_count); + if (!vsir_instruction_init_with_params(cfg->program, &target->instructions[target->ins_count], + &no_loc, VKD3DSIH_IF, 0, 1)) + return VKD3D_ERROR_OUT_OF_MEMORY; - ++cfg->ins_count; - ++cfg->temp_count; + target->instructions[target->ins_count].src[0] = *selection->condition; - if (!vsir_instruction_init_with_params(cfg->program, &cfg->instructions[cfg->ins_count], - &no_loc, VKD3DSIH_IEQ, 1, 2)) - return VKD3D_ERROR_OUT_OF_MEMORY; + if (selection->invert_condition) + target->instructions[target->ins_count].flags |= VKD3D_SHADER_CONDITIONAL_OP_Z; - dst_param_init_temp_bool(&cfg->instructions[cfg->ins_count].dst[0], cfg->temp_count); - src_param_init_temp_uint(&cfg->instructions[cfg->ins_count].src[0], cfg->jump_target_temp_idx); - src_param_init_const_uint(&cfg->instructions[cfg->ins_count].src[1], inner_break_target); + ++target->ins_count; - ++cfg->ins_count; + if ((ret = vsir_cfg_structure_list_emit(cfg, &selection->if_body, loop_idx)) < 0) + return ret; - if (!vsir_instruction_init_with_params(cfg->program, &cfg->instructions[cfg->ins_count], - &no_loc, VKD3DSIH_BREAKP, 0, 1)) - return VKD3D_ERROR_OUT_OF_MEMORY; - cfg->instructions[cfg->ins_count].flags |= VKD3D_SHADER_CONDITIONAL_OP_Z; + if (selection->else_body.count != 0) + { + if (!reserve_instructions(&target->instructions, &target->ins_capacity, target->ins_count + 1)) + return VKD3D_ERROR_OUT_OF_MEMORY; - src_param_init_temp_bool(&cfg->instructions[cfg->ins_count].src[0], cfg->temp_count); + vsir_instruction_init(&target->instructions[target->ins_count++], &no_loc, VKD3DSIH_ELSE); - ++cfg->ins_count; - ++cfg->temp_count; - } + if ((ret = vsir_cfg_structure_list_emit(cfg, &selection->else_body, loop_idx)) < 0) + return ret; + } - break; - } + if (!reserve_instructions(&target->instructions, &target->ins_capacity, target->ins_count + 1)) + return VKD3D_ERROR_OUT_OF_MEMORY; - case STRUCTURE_TYPE_SELECTION: - if (!reserve_instructions(&cfg->instructions, &cfg->ins_capacity, cfg->ins_count + 1)) - return VKD3D_ERROR_OUT_OF_MEMORY; + vsir_instruction_init(&target->instructions[target->ins_count++], &no_loc, VKD3DSIH_ENDIF); - if (!vsir_instruction_init_with_params(cfg->program, &cfg->instructions[cfg->ins_count], &no_loc, - VKD3DSIH_IF, 0, 1)) - return VKD3D_ERROR_OUT_OF_MEMORY; + return VKD3D_OK; +} - cfg->instructions[cfg->ins_count].src[0] = *structure->u.selection.condition; +static enum vkd3d_result vsir_cfg_structure_list_emit_jump(struct vsir_cfg *cfg, + struct vsir_cfg_structure_jump *jump, unsigned int loop_idx) +{ + struct vsir_cfg_emit_target *target = cfg->target; + const struct vkd3d_shader_location no_loc = {0}; + /* Encode the jump target as the loop index plus a bit to remember whether + * we're breaking or continueing. */ + unsigned int jump_target = jump->target << 1; + enum vkd3d_shader_opcode opcode; - if (structure->u.selection.invert_condition) - cfg->instructions[cfg->ins_count].flags |= VKD3D_SHADER_CONDITIONAL_OP_Z; + switch (jump->type) + { + case JUMP_CONTINUE: + /* If we're continueing the loop we're directly inside, then we can emit a + * `continue'. Otherwise we first have to break all the loops between here + * and the loop to continue, recording our intention to continue + * in the lowest bit of jump_target. */ + if (jump->target == loop_idx) + { + opcode = jump->condition ? VKD3DSIH_CONTINUEP : VKD3DSIH_CONTINUE; + break; + } + jump_target |= 1; + /* fall through */ - ++cfg->ins_count; + case JUMP_BREAK: + opcode = jump->condition ? VKD3DSIH_BREAKP : VKD3DSIH_BREAK; + break; - if ((ret = vsir_cfg_structure_list_emit(cfg, &structure->u.selection.if_body, loop_idx)) < 0) - return ret; + case JUMP_RET: + assert(!jump->condition); + opcode = VKD3DSIH_RET; + break; - if (structure->u.selection.else_body.count != 0) - { - if (!reserve_instructions(&cfg->instructions, &cfg->ins_capacity, cfg->ins_count + 1)) - return VKD3D_ERROR_OUT_OF_MEMORY; + default: + vkd3d_unreachable(); + } - vsir_instruction_init(&cfg->instructions[cfg->ins_count++], &no_loc, VKD3DSIH_ELSE); + if (!reserve_instructions(&target->instructions, &target->ins_capacity, target->ins_count + 2)) + return VKD3D_ERROR_OUT_OF_MEMORY; - if ((ret = vsir_cfg_structure_list_emit(cfg, &structure->u.selection.else_body, loop_idx)) < 0) - return ret; - } + if (opcode == VKD3DSIH_BREAK || opcode == VKD3DSIH_BREAKP) + { + if (!vsir_instruction_init_with_params(cfg->program, &target->instructions[target->ins_count], + &no_loc, VKD3DSIH_MOV, 1, 1)) + return VKD3D_ERROR_OUT_OF_MEMORY; - if (!reserve_instructions(&cfg->instructions, &cfg->ins_capacity, cfg->ins_count + 1)) - return VKD3D_ERROR_OUT_OF_MEMORY; + dst_param_init_temp_uint(&target->instructions[target->ins_count].dst[0], target->jump_target_temp_idx); + src_param_init_const_uint(&target->instructions[target->ins_count].src[0], jump_target); - vsir_instruction_init(&cfg->instructions[cfg->ins_count++], &no_loc, VKD3DSIH_ENDIF); - break; + ++target->ins_count; + } - case STRUCTURE_TYPE_JUMP: - { - /* Encode the jump target as the loop index plus a bit to remember whether - * we're breaking or continueing. */ - unsigned int jump_target = structure->u.jump.target << 1; - enum vkd3d_shader_opcode opcode; + if (!vsir_instruction_init_with_params(cfg->program, &target->instructions[target->ins_count], + &no_loc, opcode, 0, !!jump->condition)) + return VKD3D_ERROR_OUT_OF_MEMORY; - switch (structure->u.jump.type) - { - case JUMP_CONTINUE: - /* If we're continueing the loop we're directly inside, then we can emit a - * `continue'. Otherwise we first have to break all the loops between here - * and the loop to continue, recording our intention to continue - * in the lowest bit of jump_target. */ - if (structure->u.jump.target == loop_idx) - { - opcode = structure->u.jump.condition ? VKD3DSIH_CONTINUEP : VKD3DSIH_CONTINUE; - break; - } - jump_target |= 1; - /* fall through */ - - case JUMP_BREAK: - opcode = structure->u.jump.condition ? VKD3DSIH_BREAKP : VKD3DSIH_BREAK; - break; - - case JUMP_RET: - assert(!structure->u.jump.condition); - opcode = VKD3DSIH_RET; - break; - - default: - vkd3d_unreachable(); - } + if (jump->invert_condition) + target->instructions[target->ins_count].flags |= VKD3D_SHADER_CONDITIONAL_OP_Z; - if (!reserve_instructions(&cfg->instructions, &cfg->ins_capacity, cfg->ins_count + 2)) - return VKD3D_ERROR_OUT_OF_MEMORY; + if (jump->condition) + target->instructions[target->ins_count].src[0] = *jump->condition; - if (opcode == VKD3DSIH_BREAK || opcode == VKD3DSIH_BREAKP) - { - if (!vsir_instruction_init_with_params(cfg->program, &cfg->instructions[cfg->ins_count], - &no_loc, VKD3DSIH_MOV, 1, 1)) - return VKD3D_ERROR_OUT_OF_MEMORY; + ++target->ins_count; - dst_param_init_temp_uint(&cfg->instructions[cfg->ins_count].dst[0], cfg->jump_target_temp_idx); - src_param_init_const_uint(&cfg->instructions[cfg->ins_count].src[0], jump_target); + return VKD3D_OK; +} - ++cfg->ins_count; - } +static enum vkd3d_result vsir_cfg_structure_list_emit(struct vsir_cfg *cfg, + struct vsir_cfg_structure_list *list, unsigned int loop_idx) +{ + enum vkd3d_result ret; + size_t i; - if (!vsir_instruction_init_with_params(cfg->program, &cfg->instructions[cfg->ins_count], - &no_loc, opcode, 0, !!structure->u.jump.condition)) - return VKD3D_ERROR_OUT_OF_MEMORY; + for (i = 0; i < list->count; ++i) + { + struct vsir_cfg_structure *structure = &list->structures[i]; - if (structure->u.jump.invert_condition) - cfg->instructions[cfg->ins_count].flags |= VKD3D_SHADER_CONDITIONAL_OP_Z; + switch (structure->type) + { + case STRUCTURE_TYPE_BLOCK: + if ((ret = vsir_cfg_structure_list_emit_block(cfg, structure->u.block)) < 0) + return ret; + break; - if (structure->u.jump.condition) - cfg->instructions[cfg->ins_count].src[0] = *structure->u.jump.condition; + case STRUCTURE_TYPE_LOOP: + if ((ret = vsir_cfg_structure_list_emit_loop(cfg, &structure->u.loop, loop_idx)) < 0) + return ret; + break; - ++cfg->ins_count; + case STRUCTURE_TYPE_SELECTION: + if ((ret = vsir_cfg_structure_list_emit_selection(cfg, &structure->u.selection, + loop_idx)) < 0) + return ret; + break; + + case STRUCTURE_TYPE_JUMP: + if ((ret = vsir_cfg_structure_list_emit_jump(cfg, &structure->u.jump, + loop_idx)) < 0) + return ret; break; - } default: vkd3d_unreachable(); @@ -4551,40 +4755,191 @@ static enum vkd3d_result vsir_cfg_structure_list_emit(struct vsir_cfg *cfg, } static enum vkd3d_result vsir_cfg_emit_structured_program(struct vsir_cfg *cfg) +{ + return vsir_cfg_structure_list_emit(cfg, &cfg->structured_program, UINT_MAX); +} + +static enum vkd3d_result vsir_program_structurize_function(struct vsir_program *program, + struct vkd3d_shader_message_context *message_context, struct vsir_cfg_emit_target *target) { enum vkd3d_result ret; - size_t i; + struct vsir_cfg cfg; + + if ((ret = vsir_cfg_init(&cfg, program, message_context, target)) < 0) + return ret; - cfg->jump_target_temp_idx = cfg->program->temp_count; - cfg->temp_count = cfg->program->temp_count + 1; + vsir_cfg_compute_dominators(&cfg); - if (!reserve_instructions(&cfg->instructions, &cfg->ins_capacity, cfg->program->instructions.count)) + if ((ret = vsir_cfg_compute_loops(&cfg)) < 0) + goto out; + + if ((ret = vsir_cfg_sort_nodes(&cfg)) < 0) + goto out; + + if ((ret = vsir_cfg_generate_synthetic_loop_intervals(&cfg)) < 0) + goto out; + + if ((ret = vsir_cfg_build_structured_program(&cfg)) < 0) + goto out; + + if ((ret = vsir_cfg_optimize(&cfg)) < 0) + goto out; + + ret = vsir_cfg_emit_structured_program(&cfg); + +out: + vsir_cfg_cleanup(&cfg); + + return ret; +} + +static enum vkd3d_result vsir_program_structurize(struct vsir_program *program, + struct vkd3d_shader_message_context *message_context) +{ + struct vsir_cfg_emit_target target = {0}; + enum vkd3d_result ret; + unsigned int i; + + target.jump_target_temp_idx = program->temp_count; + target.temp_count = program->temp_count + 1; + + if (!reserve_instructions(&target.instructions, &target.ins_capacity, program->instructions.count)) return VKD3D_ERROR_OUT_OF_MEMORY; /* Copy declarations until the first block. */ - for (i = 0; i < cfg->program->instructions.count; ++i) + for (i = 0; i < program->instructions.count; ++i) { - struct vkd3d_shader_instruction *ins = &cfg->program->instructions.elements[i]; + struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; if (ins->handler_idx == VKD3DSIH_LABEL) break; - cfg->instructions[cfg->ins_count++] = *ins; + target.instructions[target.ins_count++] = *ins; } - if ((ret = vsir_cfg_structure_list_emit(cfg, &cfg->structured_program, UINT_MAX)) < 0) + if ((ret = vsir_program_structurize_function(program, message_context, &target)) < 0) goto fail; - vkd3d_free(cfg->program->instructions.elements); - cfg->program->instructions.elements = cfg->instructions; - cfg->program->instructions.capacity = cfg->ins_capacity; - cfg->program->instructions.count = cfg->ins_count; - cfg->program->temp_count = cfg->temp_count; + vkd3d_free(program->instructions.elements); + program->instructions.elements = target.instructions; + program->instructions.capacity = target.ins_capacity; + program->instructions.count = target.ins_count; + program->temp_count = target.temp_count; return VKD3D_OK; fail: - vkd3d_free(cfg->instructions); + vkd3d_free(target.instructions); + + return ret; +} + +static void register_map_undominated_use(struct vkd3d_shader_register *reg, struct ssas_to_temps_alloc *alloc, + struct vsir_block *block, struct vsir_block **origin_blocks) +{ + unsigned int i; + + if (!register_is_ssa(reg)) + return; + + i = reg->idx[0].offset; + if (alloc->table[i] == UINT_MAX && !vsir_block_dominates(origin_blocks[i], block)) + alloc->table[i] = alloc->next_temp_idx++; + + for (i = 0; i < reg->idx_count; ++i) + if (reg->idx[i].rel_addr) + register_map_undominated_use(®->idx[i].rel_addr->reg, alloc, block, origin_blocks); +} + +/* Drivers are not necessarily optimised to handle very large numbers of temps. For example, + * using them only where necessary fixes stuttering issues in Horizon Zero Dawn on RADV. + * This can also result in the backend emitting less code because temps typically need an + * access chain and a load/store. Conversion of phi SSA values to temps should eliminate all + * undominated SSA use, but structurisation may create new occurrences. */ +static enum vkd3d_result vsir_cfg_materialize_undominated_ssas_to_temps(struct vsir_cfg *cfg) +{ + struct vsir_program *program = cfg->program; + struct ssas_to_temps_alloc alloc = {0}; + struct vsir_block **origin_blocks; + unsigned int j; + size_t i; + + if (!(origin_blocks = vkd3d_calloc(program->ssa_count, sizeof(*origin_blocks)))) + { + ERR("Failed to allocate origin block array.\n"); + return VKD3D_ERROR_OUT_OF_MEMORY; + } + if (!ssas_to_temps_alloc_init(&alloc, program->ssa_count, program->temp_count)) + { + vkd3d_free(origin_blocks); + return VKD3D_ERROR_OUT_OF_MEMORY; + } + + for (i = 0; i < cfg->block_count; ++i) + { + struct vsir_block *block = &cfg->blocks[i]; + struct vkd3d_shader_instruction *ins; + + for (ins = block->begin; ins <= block->end; ++ins) + { + for (j = 0; j < ins->dst_count; ++j) + { + if (register_is_ssa(&ins->dst[j].reg)) + origin_blocks[ins->dst[j].reg.idx[0].offset] = block; + } + } + } + + for (i = 0; i < cfg->block_count; ++i) + { + struct vsir_block *block = &cfg->blocks[i]; + struct vkd3d_shader_instruction *ins; + + for (ins = block->begin; ins <= block->end; ++ins) + { + for (j = 0; j < ins->src_count; ++j) + register_map_undominated_use(&ins->src[j].reg, &alloc, block, origin_blocks); + } + } + + if (alloc.next_temp_idx == program->temp_count) + goto done; + + TRACE("Emitting temps for %u values with undominated usage.\n", alloc.next_temp_idx - program->temp_count); + + for (i = 0; i < program->instructions.count; ++i) + { + struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; + + for (j = 0; j < ins->dst_count; ++j) + materialize_ssas_to_temps_process_reg(program, &alloc, &ins->dst[j].reg); + + for (j = 0; j < ins->src_count; ++j) + materialize_ssas_to_temps_process_reg(program, &alloc, &ins->src[j].reg); + } + + program->temp_count = alloc.next_temp_idx; +done: + vkd3d_free(origin_blocks); + vkd3d_free(alloc.table); + + return VKD3D_OK; +} + +static enum vkd3d_result vsir_program_materialize_undominated_ssas_to_temps(struct vsir_program *program, + struct vkd3d_shader_message_context *message_context) +{ + enum vkd3d_result ret; + struct vsir_cfg cfg; + + if ((ret = vsir_cfg_init(&cfg, program, message_context, NULL)) < 0) + return ret; + + vsir_cfg_compute_dominators(&cfg); + + ret = vsir_cfg_materialize_undominated_ssas_to_temps(&cfg); + + vsir_cfg_cleanup(&cfg); return ret; } @@ -5459,63 +5814,25 @@ enum vkd3d_result vsir_program_normalise(struct vsir_program *program, uint64_t { enum vkd3d_result result = VKD3D_OK; - remove_dcl_temps(program); - - if ((result = vsir_program_lower_texkills(program)) < 0) + if ((result = vsir_program_lower_instructions(program)) < 0) return result; if (program->shader_version.major >= 6) { - struct vsir_cfg cfg; - - if ((result = lower_switch_to_if_ladder(program)) < 0) - return result; - - if ((result = vsir_program_materialise_ssas_to_temps(program)) < 0) - return result; - - if ((result = vsir_cfg_init(&cfg, program, message_context)) < 0) + if ((result = vsir_program_materialise_phi_ssas_to_temps(program)) < 0) return result; - vsir_cfg_compute_dominators(&cfg); - - if ((result = vsir_cfg_compute_loops(&cfg)) < 0) - { - vsir_cfg_cleanup(&cfg); + if ((result = lower_switch_to_if_ladder(program)) < 0) return result; - } - if ((result = vsir_cfg_sort_nodes(&cfg)) < 0) - { - vsir_cfg_cleanup(&cfg); + if ((result = vsir_program_structurize(program, message_context)) < 0) return result; - } - if ((result = vsir_cfg_generate_synthetic_loop_intervals(&cfg)) < 0) - { - vsir_cfg_cleanup(&cfg); + if ((result = vsir_program_flatten_control_flow_constructs(program, message_context)) < 0) return result; - } - if ((result = vsir_cfg_build_structured_program(&cfg)) < 0) - { - vsir_cfg_cleanup(&cfg); + if ((result = vsir_program_materialize_undominated_ssas_to_temps(program, message_context)) < 0) return result; - } - - if ((result = vsir_cfg_optimize(&cfg)) < 0) - { - vsir_cfg_cleanup(&cfg); - return result; - } - - if ((result = vsir_cfg_emit_structured_program(&cfg)) < 0) - { - vsir_cfg_cleanup(&cfg); - return result; - } - - vsir_cfg_cleanup(&cfg); } else { @@ -5545,10 +5862,10 @@ enum vkd3d_result vsir_program_normalise(struct vsir_program *program, uint64_t if ((result = vsir_program_normalise_combined_samplers(program, message_context)) < 0) return result; - } - if ((result = vsir_program_flatten_control_flow_constructs(program, message_context)) < 0) - return result; + if ((result = vsir_program_flatten_control_flow_constructs(program, message_context)) < 0) + return result; + } if (TRACE_ON()) vkd3d_shader_trace(program); diff --git a/libs/vkd3d/libs/vkd3d-shader/spirv.c b/libs/vkd3d/libs/vkd3d-shader/spirv.c index 1cae2d7d9d4..dfab1cb229b 100644 --- a/libs/vkd3d/libs/vkd3d-shader/spirv.c +++ b/libs/vkd3d/libs/vkd3d-shader/spirv.c @@ -4419,11 +4419,11 @@ static void spirv_compiler_emit_store_dst_components(struct spirv_compiler *comp { unsigned int component_count = vsir_write_mask_component_count(dst->write_mask); struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; - uint32_t type_id, val_id; + uint32_t type_id, dst_type_id, val_id; + type_id = vkd3d_spirv_get_type_id(builder, component_type, component_count); if (component_count > 1) { - type_id = vkd3d_spirv_get_type_id(builder, component_type, component_count); val_id = vkd3d_spirv_build_op_composite_construct(builder, type_id, component_ids, component_count); } @@ -4431,6 +4431,11 @@ static void spirv_compiler_emit_store_dst_components(struct spirv_compiler *comp { val_id = *component_ids; } + + dst_type_id = vkd3d_spirv_get_type_id_for_data_type(builder, dst->reg.data_type, component_count); + if (dst_type_id != type_id) + val_id = vkd3d_spirv_build_op_bitcast(builder, dst_type_id, val_id); + spirv_compiler_emit_store_dst(compiler, dst, val_id); } @@ -7283,8 +7288,12 @@ static void spirv_compiler_emit_mov(struct spirv_compiler *compiler, } general_implementation: - write_mask = (src->reg.type == VKD3DSPR_IMMCONST64 && !data_type_is_64_bit(dst->reg.data_type)) - ? vsir_write_mask_64_from_32(dst->write_mask) : dst->write_mask; + write_mask = dst->write_mask; + if (src->reg.type == VKD3DSPR_IMMCONST64 && !data_type_is_64_bit(dst->reg.data_type)) + write_mask = vsir_write_mask_64_from_32(write_mask); + else if (!data_type_is_64_bit(src->reg.data_type) && data_type_is_64_bit(dst->reg.data_type)) + write_mask = vsir_write_mask_32_from_64(write_mask); + val_id = spirv_compiler_emit_load_src(compiler, src, write_mask); if (dst->reg.data_type != src->reg.data_type) { @@ -8895,8 +8904,8 @@ static void spirv_compiler_emit_store_tgsm(struct spirv_compiler *compiler, const struct vkd3d_shader_dst_param *dst = instruction->dst; const struct vkd3d_shader_src_param *src = instruction->src; uint32_t base_coordinate_id, component_idx; - const struct vkd3d_shader_src_param *data; struct vkd3d_shader_register_info reg_info; + struct vkd3d_shader_src_param data; unsigned int component_count; if (!spirv_compiler_get_register_info(compiler, &dst->reg, ®_info)) @@ -8908,8 +8917,9 @@ static void spirv_compiler_emit_store_tgsm(struct spirv_compiler *compiler, base_coordinate_id = spirv_compiler_emit_raw_structured_addressing(compiler, type_id, reg_info.structure_stride, &src[0], VKD3DSP_WRITEMASK_0, &src[1], VKD3DSP_WRITEMASK_0); - data = &src[instruction->src_count - 1]; - val_id = spirv_compiler_emit_load_src(compiler, data, dst->write_mask); + data = src[instruction->src_count - 1]; + data.reg.data_type = VKD3D_DATA_UINT; + val_id = spirv_compiler_emit_load_src(compiler, &data, dst->write_mask); component_count = vsir_write_mask_component_count(dst->write_mask); for (component_idx = 0; component_idx < component_count; ++component_idx) @@ -9334,6 +9344,7 @@ static void spirv_compiler_emit_resinfo(struct spirv_compiler *compiler, const struct vkd3d_shader_dst_param *dst = instruction->dst; const struct vkd3d_shader_src_param *src = instruction->src; uint32_t type_id, lod_id, val_id, miplevel_count_id; + enum vkd3d_shader_component_type component_type; uint32_t constituents[VKD3D_VEC4_SIZE]; unsigned int i, size_component_count; struct vkd3d_shader_image image; @@ -9370,10 +9381,16 @@ static void spirv_compiler_emit_resinfo(struct spirv_compiler *compiler, val_id = vkd3d_spirv_build_op_composite_construct(builder, type_id, constituents, i + 2); + component_type = VKD3D_SHADER_COMPONENT_FLOAT; + type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_FLOAT, VKD3D_VEC4_SIZE); if (instruction->flags == VKD3DSI_RESINFO_UINT) { - val_id = vkd3d_spirv_build_op_bitcast(builder, type_id, val_id); + /* SSA registers must match the specified result type. */ + if (!register_is_ssa(&dst->reg)) + val_id = vkd3d_spirv_build_op_bitcast(builder, type_id, val_id); + else + component_type = VKD3D_SHADER_COMPONENT_UINT; } else { @@ -9382,7 +9399,7 @@ static void spirv_compiler_emit_resinfo(struct spirv_compiler *compiler, val_id = vkd3d_spirv_build_op_convert_utof(builder, type_id, val_id); } val_id = spirv_compiler_emit_swizzle(compiler, val_id, VKD3DSP_WRITEMASK_ALL, - VKD3D_SHADER_COMPONENT_FLOAT, src[1].swizzle, dst->write_mask); + component_type, src[1].swizzle, dst->write_mask); spirv_compiler_emit_store_dst(compiler, dst, val_id); } diff --git a/libs/vkd3d/libs/vkd3d-shader/tpf.c b/libs/vkd3d/libs/vkd3d-shader/tpf.c index 4d0658313d5..d5019a5dd63 100644 --- a/libs/vkd3d/libs/vkd3d-shader/tpf.c +++ b/libs/vkd3d/libs/vkd3d-shader/tpf.c @@ -3385,10 +3385,10 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) if (profile->major_version >= 5) { - put_u32(&buffer, TAG_RD11); + put_u32(&buffer, hlsl_version_ge(ctx, 5, 1) ? TAG_RD11_REVERSE : TAG_RD11); put_u32(&buffer, 15 * sizeof(uint32_t)); /* size of RDEF header including this header */ put_u32(&buffer, 6 * sizeof(uint32_t)); /* size of buffer desc */ - put_u32(&buffer, 8 * sizeof(uint32_t)); /* size of binding desc */ + put_u32(&buffer, (hlsl_version_ge(ctx, 5, 1) ? 10 : 8) * sizeof(uint32_t)); /* size of binding desc */ put_u32(&buffer, 10 * sizeof(uint32_t)); /* size of variable desc */ put_u32(&buffer, 9 * sizeof(uint32_t)); /* size of type desc */ put_u32(&buffer, 3 * sizeof(uint32_t)); /* size of member desc */ @@ -3405,6 +3405,9 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) const struct extern_resource *resource = &extern_resources[i]; uint32_t flags = 0; + if (hlsl_version_ge(ctx, 5, 1)) + hlsl_fixme(ctx, &resource->var->loc, "Shader model 5.1 resource reflection."); + if (resource->is_user_packed) flags |= D3D_SIF_USERPACKED; @@ -3437,6 +3440,9 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) if (!cbuffer->reg.allocated) continue; + if (hlsl_version_ge(ctx, 5, 1)) + hlsl_fixme(ctx, &cbuffer->loc, "Shader model 5.1 resource reflection."); + if (cbuffer->reservation.reg_type) flags |= D3D_SIF_USERPACKED; @@ -5343,7 +5349,7 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex &expr->node, arg1, arg2); break; - case HLSL_OP3_MOVC: + case HLSL_OP3_TERNARY: write_sm4_ternary_op(tpf, VKD3D_SM4_OP_MOVC, &expr->node, arg1, arg2, arg3); break; @@ -5399,7 +5405,8 @@ static void write_sm4_jump(const struct tpf_writer *tpf, const struct hlsl_ir_ju case HLSL_IR_JUMP_DISCARD_NZ: { - instr.opcode = VKD3D_SM4_OP_DISCARD | VKD3D_SM4_CONDITIONAL_NZ; + instr.opcode = VKD3D_SM4_OP_DISCARD; + instr.extra_bits = VKD3D_SM4_CONDITIONAL_NZ; memset(&instr.srcs[0], 0, sizeof(*instr.srcs)); instr.src_count = 1; @@ -5700,19 +5707,13 @@ static void write_sm4_block(const struct tpf_writer *tpf, const struct hlsl_bloc { if (instr->data_type) { - if (instr->data_type->class == HLSL_CLASS_MATRIX) - { - hlsl_fixme(tpf->ctx, &instr->loc, "Matrix operations need to be lowered."); - break; - } - else if (instr->data_type->class == HLSL_CLASS_OBJECT) + if (instr->data_type->class != HLSL_CLASS_SCALAR && instr->data_type->class != HLSL_CLASS_VECTOR) { - hlsl_fixme(tpf->ctx, &instr->loc, "Object copy."); + hlsl_fixme(tpf->ctx, &instr->loc, "Class %#x should have been lowered or removed.", + instr->data_type->class); break; } - assert(instr->data_type->class == HLSL_CLASS_SCALAR || instr->data_type->class == HLSL_CLASS_VECTOR); - if (!instr->reg.allocated) { assert(instr->type == HLSL_IR_CONSTANT); @@ -5808,13 +5809,21 @@ static void write_sm4_shdr(struct hlsl_ctx *ctx, LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) { if (cbuffer->reg.allocated) + { + if (hlsl_version_ge(ctx, 5, 1)) + hlsl_fixme(ctx, &cbuffer->loc, "Shader model 5.1 resource definition."); + write_sm4_dcl_constant_buffer(&tpf, cbuffer); + } } for (i = 0; i < extern_resources_count; ++i) { const struct extern_resource *resource = &extern_resources[i]; + if (hlsl_version_ge(ctx, 5, 1)) + hlsl_fixme(ctx, &resource->var->loc, "Shader model 5.1 resource declaration."); + if (resource->regset == HLSL_REGSET_SAMPLERS) write_sm4_dcl_samplers(&tpf, resource); else if (resource->regset == HLSL_REGSET_TEXTURES) diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h index 6d442cd517d..9b37bbef70b 100644 --- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h +++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h @@ -1761,6 +1761,31 @@ static inline unsigned int vkd3d_compact_swizzle(uint32_t swizzle, uint32_t writ return compacted_swizzle; } +static inline uint32_t vsir_swizzle_from_writemask(unsigned int writemask) +{ + static const unsigned int swizzles[16] = + { + 0, + VKD3D_SHADER_SWIZZLE(X, X, X, X), + VKD3D_SHADER_SWIZZLE(Y, Y, Y, Y), + VKD3D_SHADER_SWIZZLE(X, Y, X, X), + VKD3D_SHADER_SWIZZLE(Z, Z, Z, Z), + VKD3D_SHADER_SWIZZLE(X, Z, X, X), + VKD3D_SHADER_SWIZZLE(Y, Z, X, X), + VKD3D_SHADER_SWIZZLE(X, Y, Z, X), + VKD3D_SHADER_SWIZZLE(W, W, W, W), + VKD3D_SHADER_SWIZZLE(X, W, X, X), + VKD3D_SHADER_SWIZZLE(Y, W, X, X), + VKD3D_SHADER_SWIZZLE(X, Y, W, X), + VKD3D_SHADER_SWIZZLE(Z, W, X, X), + VKD3D_SHADER_SWIZZLE(X, Z, W, X), + VKD3D_SHADER_SWIZZLE(Y, Z, W, X), + VKD3D_SHADER_SWIZZLE(X, Y, Z, W), + }; + + return swizzles[writemask & 0xf]; +} + struct vkd3d_struct { enum vkd3d_shader_structure_type type; diff --git a/libs/vkd3d/libs/vkd3d/cache.c b/libs/vkd3d/libs/vkd3d/cache.c new file mode 100644 index 00000000000..56ba6990420 --- /dev/null +++ b/libs/vkd3d/libs/vkd3d/cache.c @@ -0,0 +1,59 @@ +/* + * Copyright 2024 Stefan Dösinger for CodeWeavers + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#include "vkd3d_private.h" + +struct vkd3d_shader_cache +{ + unsigned int refcount; +}; + +int vkd3d_shader_open_cache(struct vkd3d_shader_cache **cache) +{ + struct vkd3d_shader_cache *object; + + TRACE("%p.\n", cache); + + object = vkd3d_malloc(sizeof(*object)); + if (!object) + return VKD3D_ERROR_OUT_OF_MEMORY; + + object->refcount = 1; + *cache = object; + + return VKD3D_OK; +} + +unsigned int vkd3d_shader_cache_incref(struct vkd3d_shader_cache *cache) +{ + unsigned int refcount = vkd3d_atomic_increment_u32(&cache->refcount); + TRACE("cache %p refcount %u.\n", cache, refcount); + return refcount; +} + +unsigned int vkd3d_shader_cache_decref(struct vkd3d_shader_cache *cache) +{ + unsigned int refcount = vkd3d_atomic_decrement_u32(&cache->refcount); + TRACE("cache %p refcount %u.\n", cache, refcount); + + if (refcount) + return refcount; + + vkd3d_free(cache); + return 0; +} diff --git a/libs/vkd3d/libs/vkd3d/device.c b/libs/vkd3d/libs/vkd3d/device.c index 65db8b70bfd..90de27c53b6 100644 --- a/libs/vkd3d/libs/vkd3d/device.c +++ b/libs/vkd3d/libs/vkd3d/device.c @@ -2529,11 +2529,17 @@ struct d3d12_cache_session ID3D12ShaderCacheSession ID3D12ShaderCacheSession_iface; unsigned int refcount; + struct list cache_list_entry; + struct d3d12_device *device; struct vkd3d_private_store private_store; D3D12_SHADER_CACHE_SESSION_DESC desc; + struct vkd3d_shader_cache *cache; }; +static struct vkd3d_mutex cache_list_mutex = VKD3D_MUTEX_INITIALIZER; +static struct list cache_list = LIST_INIT(cache_list); + static inline struct d3d12_cache_session *impl_from_ID3D12ShaderCacheSession(ID3D12ShaderCacheSession *iface) { return CONTAINING_RECORD(iface, struct d3d12_cache_session, ID3D12ShaderCacheSession_iface); @@ -2582,6 +2588,11 @@ static void d3d12_cache_session_destroy(struct d3d12_cache_session *session) TRACE("Destroying cache session %p.\n", session); + vkd3d_mutex_lock(&cache_list_mutex); + list_remove(&session->cache_list_entry); + vkd3d_mutex_unlock(&cache_list_mutex); + + vkd3d_shader_cache_decref(session->cache); vkd3d_private_store_destroy(&session->private_store); vkd3d_free(session); @@ -2707,11 +2718,14 @@ static const struct ID3D12ShaderCacheSessionVtbl d3d12_cache_session_vtbl = static HRESULT d3d12_cache_session_init(struct d3d12_cache_session *session, struct d3d12_device *device, const D3D12_SHADER_CACHE_SESSION_DESC *desc) { + struct d3d12_cache_session *i; + enum vkd3d_result ret; HRESULT hr; session->ID3D12ShaderCacheSession_iface.lpVtbl = &d3d12_cache_session_vtbl; session->refcount = 1; session->desc = *desc; + session->cache = NULL; if (!session->desc.MaximumValueFileSizeBytes) session->desc.MaximumValueFileSizeBytes = 128 * 1024 * 1024; @@ -2723,9 +2737,56 @@ static HRESULT d3d12_cache_session_init(struct d3d12_cache_session *session, if (FAILED(hr = vkd3d_private_store_init(&session->private_store))) return hr; + vkd3d_mutex_lock(&cache_list_mutex); + + /* We expect the number of open caches to be small. */ + LIST_FOR_EACH_ENTRY(i, &cache_list, struct d3d12_cache_session, cache_list_entry) + { + if (!memcmp(&i->desc.Identifier, &desc->Identifier, sizeof(desc->Identifier))) + { + TRACE("Found an existing cache %p from session %p.\n", i->cache, i); + if (desc->Version == i->desc.Version) + { + session->desc = i->desc; + vkd3d_shader_cache_incref(session->cache = i->cache); + break; + } + else + { + WARN("version mismatch: Existing %"PRIu64" new %"PRIu64".\n", + i->desc.Version, desc->Version); + hr = DXGI_ERROR_ALREADY_EXISTS; + goto error; + } + } + } + + if (!session->cache) + { + if (session->desc.Mode == D3D12_SHADER_CACHE_MODE_DISK) + FIXME("Disk caches are not yet implemented.\n"); + + ret = vkd3d_shader_open_cache(&session->cache); + if (ret) + { + WARN("Failed to open shader cache.\n"); + hr = hresult_from_vkd3d_result(ret); + goto error; + } + } + + /* Add it to the list even if we reused an existing cache. The other session might be destroyed, + * but the cache stays alive and can be opened a third time. */ + list_add_tail(&cache_list, &session->cache_list_entry); d3d12_device_add_ref(session->device = device); + vkd3d_mutex_unlock(&cache_list_mutex); return S_OK; + +error: + vkd3d_private_store_destroy(&session->private_store); + vkd3d_mutex_unlock(&cache_list_mutex); + return hr; } /* ID3D12Device */ @@ -4874,6 +4935,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateShaderCacheSession(ID3D12Dev WARN("No output pointer, returning S_FALSE.\n"); return S_FALSE; } + *session = NULL; if (!(object = vkd3d_malloc(sizeof(*object)))) return E_OUTOFMEMORY; diff --git a/libs/vkd3d/libs/vkd3d/resource.c b/libs/vkd3d/libs/vkd3d/resource.c index b83a45d0606..179999148bc 100644 --- a/libs/vkd3d/libs/vkd3d/resource.c +++ b/libs/vkd3d/libs/vkd3d/resource.c @@ -1893,6 +1893,13 @@ HRESULT d3d12_resource_validate_desc(const D3D12_RESOURCE_DESC1 *desc, struct d3 WARN("Invalid sample count 0.\n"); return E_INVALIDARG; } + if (desc->SampleDesc.Count > 1 + && !(desc->Flags & (D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET | D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL))) + { + WARN("Sample count %u invalid without ALLOW_RENDER_TARGET or ALLOW_DEPTH_STENCIL.\n", + desc->SampleDesc.Count); + return E_INVALIDARG; + } if (!(format = vkd3d_format_from_d3d12_resource_desc(device, desc, 0))) { @@ -1996,6 +2003,11 @@ static HRESULT d3d12_resource_init(struct d3d12_resource *resource, struct d3d12 WARN("Invalid initial resource state %#x.\n", initial_state); return E_INVALIDARG; } + if (initial_state == D3D12_RESOURCE_STATE_RENDER_TARGET && !(desc->Flags & D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET)) + { + WARN("Invalid initial resource state %#x for non-render-target.\n", initial_state); + return E_INVALIDARG; + } if (optimized_clear_value && d3d12_resource_is_buffer(resource)) { diff --git a/libs/vkd3d/libs/vkd3d/state.c b/libs/vkd3d/libs/vkd3d/state.c index 08cc110e8f7..b8328216a29 100644 --- a/libs/vkd3d/libs/vkd3d/state.c +++ b/libs/vkd3d/libs/vkd3d/state.c @@ -2045,6 +2045,9 @@ static ULONG STDMETHODCALLTYPE d3d12_pipeline_state_Release(ID3D12PipelineState d3d12_pipeline_uav_counter_state_cleanup(&state->uav_counters, device); + if (state->implicit_root_signature) + d3d12_root_signature_Release(state->implicit_root_signature); + vkd3d_free(state); d3d12_device_release(device); @@ -2413,8 +2416,8 @@ static HRESULT d3d12_pipeline_state_init_compute(struct d3d12_pipeline_state *st const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; struct vkd3d_shader_interface_info shader_interface; struct vkd3d_shader_descriptor_offset_info offset_info; - const struct d3d12_root_signature *root_signature; struct vkd3d_shader_spirv_target_info target_info; + struct d3d12_root_signature *root_signature; VkPipelineLayout vk_pipeline_layout; HRESULT hr; @@ -2425,13 +2428,27 @@ static HRESULT d3d12_pipeline_state_init_compute(struct d3d12_pipeline_state *st if (!(root_signature = unsafe_impl_from_ID3D12RootSignature(desc->root_signature))) { - WARN("Root signature is NULL.\n"); - return E_INVALIDARG; + TRACE("Root signature is NULL, looking for an embedded signature.\n"); + if (FAILED(hr = d3d12_root_signature_create(device, + desc->cs.pShaderBytecode, desc->cs.BytecodeLength, &root_signature))) + { + WARN("Failed to find an embedded root signature, hr %s.\n", debugstr_hresult(hr)); + return hr; + } + state->implicit_root_signature = &root_signature->ID3D12RootSignature_iface; + } + else + { + state->implicit_root_signature = NULL; } if (FAILED(hr = d3d12_pipeline_state_find_and_init_uav_counters(state, device, root_signature, &desc->cs, VK_SHADER_STAGE_COMPUTE_BIT))) + { + if (state->implicit_root_signature) + d3d12_root_signature_Release(state->implicit_root_signature); return hr; + } memset(&target_info, 0, sizeof(target_info)); target_info.type = VKD3D_SHADER_STRUCTURE_TYPE_SPIRV_TARGET_INFO; @@ -2476,6 +2493,8 @@ static HRESULT d3d12_pipeline_state_init_compute(struct d3d12_pipeline_state *st { WARN("Failed to create Vulkan compute pipeline, hr %s.\n", debugstr_hresult(hr)); d3d12_pipeline_uav_counter_state_cleanup(&state->uav_counters, device); + if (state->implicit_root_signature) + d3d12_root_signature_Release(state->implicit_root_signature); return hr; } @@ -2483,6 +2502,8 @@ static HRESULT d3d12_pipeline_state_init_compute(struct d3d12_pipeline_state *st { VK_CALL(vkDestroyPipeline(device->vk_device, state->u.compute.vk_pipeline, NULL)); d3d12_pipeline_uav_counter_state_cleanup(&state->uav_counters, device); + if (state->implicit_root_signature) + d3d12_root_signature_Release(state->implicit_root_signature); return hr; } @@ -3484,6 +3505,7 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s goto fail; state->vk_bind_point = VK_PIPELINE_BIND_POINT_GRAPHICS; + state->implicit_root_signature = NULL; d3d12_device_add_ref(state->device = device); return S_OK; diff --git a/libs/vkd3d/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/libs/vkd3d/vkd3d_private.h index 14c8eb54574..9f5f759667a 100644 --- a/libs/vkd3d/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/libs/vkd3d/vkd3d_private.h @@ -1213,6 +1213,7 @@ struct d3d12_pipeline_state struct d3d12_pipeline_uav_counter_state uav_counters; + ID3D12RootSignature *implicit_root_signature; struct d3d12_device *device; struct vkd3d_private_store private_store; @@ -1927,4 +1928,10 @@ static inline void vkd3d_prepend_struct(void *header, void *structure) vkd3d_header->next = vkd3d_structure; } +struct vkd3d_shader_cache; + +int vkd3d_shader_open_cache(struct vkd3d_shader_cache **cache); +unsigned int vkd3d_shader_cache_incref(struct vkd3d_shader_cache *cache); +unsigned int vkd3d_shader_cache_decref(struct vkd3d_shader_cache *cache); + #endif /* __VKD3D_PRIVATE_H */ -- 2.43.0