wine-staging/patches/vkd3d-latest/0005-Updated-vkd3d-to-7d6f0f2592a8aedf749c2dff36ea330e9cc.patch
2024-04-17 09:21:44 +10:00

5532 lines
219 KiB
Diff

From 79df439b6169fac8d43c95411edbad8ee5a2c482 Mon Sep 17 00:00:00 2001
From: Alistair Leslie-Hughes <leslie_alistair@hotmail.com>
Date: Tue, 16 Apr 2024 12:05:29 +1000
Subject: [PATCH] Updated vkd3d to 7d6f0f2592a8aedf749c2dff36ea330e9ccb49d1.
---
libs/vkd3d/Makefile.in | 1 +
libs/vkd3d/include/private/vkd3d_common.h | 6 +
libs/vkd3d/libs/vkd3d-shader/d3d_asm.c | 528 +++---
libs/vkd3d/libs/vkd3d-shader/d3dbc.c | 135 +-
libs/vkd3d/libs/vkd3d-shader/dxil.c | 158 ++
libs/vkd3d/libs/vkd3d-shader/fx.c | 150 +-
libs/vkd3d/libs/vkd3d-shader/hlsl.c | 104 +-
libs/vkd3d/libs/vkd3d-shader/hlsl.h | 77 +-
libs/vkd3d/libs/vkd3d-shader/hlsl.y | 495 ++++--
libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 134 +-
.../libs/vkd3d-shader/hlsl_constant_ops.c | 23 +-
libs/vkd3d/libs/vkd3d-shader/ir.c | 1451 ++++++++++-------
libs/vkd3d/libs/vkd3d-shader/spirv.c | 35 +-
libs/vkd3d/libs/vkd3d-shader/tpf.c | 35 +-
.../libs/vkd3d-shader/vkd3d_shader_private.h | 25 +
libs/vkd3d/libs/vkd3d/cache.c | 59 +
libs/vkd3d/libs/vkd3d/device.c | 62 +
libs/vkd3d/libs/vkd3d/resource.c | 12 +
libs/vkd3d/libs/vkd3d/state.c | 28 +-
libs/vkd3d/libs/vkd3d/vkd3d_private.h | 7 +
20 files changed, 2329 insertions(+), 1196 deletions(-)
create mode 100644 libs/vkd3d/libs/vkd3d/cache.c
diff --git a/libs/vkd3d/Makefile.in b/libs/vkd3d/Makefile.in
index 448e9a0e61d..94e4833dc9a 100644
--- a/libs/vkd3d/Makefile.in
+++ b/libs/vkd3d/Makefile.in
@@ -30,6 +30,7 @@ SOURCES = \
libs/vkd3d-shader/spirv.c \
libs/vkd3d-shader/tpf.c \
libs/vkd3d-shader/vkd3d_shader_main.c \
+ libs/vkd3d/cache.c \
libs/vkd3d/command.c \
libs/vkd3d/device.c \
libs/vkd3d/resource.c \
diff --git a/libs/vkd3d/include/private/vkd3d_common.h b/libs/vkd3d/include/private/vkd3d_common.h
index 1cc8ecc38f3..f9df47d339c 100644
--- a/libs/vkd3d/include/private/vkd3d_common.h
+++ b/libs/vkd3d/include/private/vkd3d_common.h
@@ -438,6 +438,12 @@ struct vkd3d_mutex
#endif
};
+#ifdef _WIN32
+#define VKD3D_MUTEX_INITIALIZER {{NULL, -1, 0, 0, 0, 0}}
+#else
+#define VKD3D_MUTEX_INITIALIZER PTHREAD_MUTEX_INITIALIZER
+#endif
+
static inline void vkd3d_mutex_init(struct vkd3d_mutex *lock)
{
#ifdef _WIN32
diff --git a/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c
index 2b5feb94103..459fdfc9abf 100644
--- a/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c
+++ b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c
@@ -330,37 +330,6 @@ static const char * const shader_opcode_names[] =
[VKD3DSIH_XOR ] = "xor",
};
-static const struct
-{
- enum vkd3d_shader_input_sysval_semantic sysval_semantic;
- const char *sysval_name;
-}
-shader_input_sysval_semantic_names[] =
-{
- {VKD3D_SIV_POSITION, "position"},
- {VKD3D_SIV_CLIP_DISTANCE, "clip_distance"},
- {VKD3D_SIV_CULL_DISTANCE, "cull_distance"},
- {VKD3D_SIV_RENDER_TARGET_ARRAY_INDEX, "render_target_array_index"},
- {VKD3D_SIV_VIEWPORT_ARRAY_INDEX, "viewport_array_index"},
- {VKD3D_SIV_VERTEX_ID, "vertex_id"},
- {VKD3D_SIV_INSTANCE_ID, "instance_id"},
- {VKD3D_SIV_PRIMITIVE_ID, "primitive_id"},
- {VKD3D_SIV_IS_FRONT_FACE, "is_front_face"},
- {VKD3D_SIV_SAMPLE_INDEX, "sample_index"},
- {VKD3D_SIV_QUAD_U0_TESS_FACTOR, "finalQuadUeq0EdgeTessFactor"},
- {VKD3D_SIV_QUAD_V0_TESS_FACTOR, "finalQuadVeq0EdgeTessFactor"},
- {VKD3D_SIV_QUAD_U1_TESS_FACTOR, "finalQuadUeq1EdgeTessFactor"},
- {VKD3D_SIV_QUAD_V1_TESS_FACTOR, "finalQuadVeq1EdgeTessFactor"},
- {VKD3D_SIV_QUAD_U_INNER_TESS_FACTOR, "finalQuadUInsideTessFactor"},
- {VKD3D_SIV_QUAD_V_INNER_TESS_FACTOR, "finalQuadVInsideTessFactor"},
- {VKD3D_SIV_TRIANGLE_U_TESS_FACTOR, "finalTriUeq0EdgeTessFactor"},
- {VKD3D_SIV_TRIANGLE_V_TESS_FACTOR, "finalTriVeq0EdgeTessFactor"},
- {VKD3D_SIV_TRIANGLE_W_TESS_FACTOR, "finalTriWeq0EdgeTessFactor"},
- {VKD3D_SIV_TRIANGLE_INNER_TESS_FACTOR, "finalTriInsideTessFactor"},
- {VKD3D_SIV_LINE_DETAIL_TESS_FACTOR, "finalLineDetailTessFactor"},
- {VKD3D_SIV_LINE_DENSITY_TESS_FACTOR, "finalLineDensityTessFactor"},
-};
-
struct vkd3d_d3d_asm_colours
{
const char *reset;
@@ -615,21 +584,54 @@ static void shader_print_tessellator_partitioning(struct vkd3d_d3d_asm_compiler
vkd3d_string_buffer_printf(buffer, "%s%s%s", prefix, partitioning, suffix);
}
-static void shader_dump_shader_input_sysval_semantic(struct vkd3d_d3d_asm_compiler *compiler,
- enum vkd3d_shader_input_sysval_semantic semantic)
+static void shader_print_input_sysval_semantic(struct vkd3d_d3d_asm_compiler *compiler,
+ const char *prefix, enum vkd3d_shader_input_sysval_semantic semantic, const char *suffix)
{
unsigned int i;
+ static const struct
+ {
+ enum vkd3d_shader_input_sysval_semantic sysval_semantic;
+ const char *sysval_name;
+ }
+ shader_input_sysval_semantic_names[] =
+ {
+ {VKD3D_SIV_POSITION, "position"},
+ {VKD3D_SIV_CLIP_DISTANCE, "clip_distance"},
+ {VKD3D_SIV_CULL_DISTANCE, "cull_distance"},
+ {VKD3D_SIV_RENDER_TARGET_ARRAY_INDEX, "render_target_array_index"},
+ {VKD3D_SIV_VIEWPORT_ARRAY_INDEX, "viewport_array_index"},
+ {VKD3D_SIV_VERTEX_ID, "vertex_id"},
+ {VKD3D_SIV_INSTANCE_ID, "instance_id"},
+ {VKD3D_SIV_PRIMITIVE_ID, "primitive_id"},
+ {VKD3D_SIV_IS_FRONT_FACE, "is_front_face"},
+ {VKD3D_SIV_SAMPLE_INDEX, "sample_index"},
+ {VKD3D_SIV_QUAD_U0_TESS_FACTOR, "finalQuadUeq0EdgeTessFactor"},
+ {VKD3D_SIV_QUAD_V0_TESS_FACTOR, "finalQuadVeq0EdgeTessFactor"},
+ {VKD3D_SIV_QUAD_U1_TESS_FACTOR, "finalQuadUeq1EdgeTessFactor"},
+ {VKD3D_SIV_QUAD_V1_TESS_FACTOR, "finalQuadVeq1EdgeTessFactor"},
+ {VKD3D_SIV_QUAD_U_INNER_TESS_FACTOR, "finalQuadUInsideTessFactor"},
+ {VKD3D_SIV_QUAD_V_INNER_TESS_FACTOR, "finalQuadVInsideTessFactor"},
+ {VKD3D_SIV_TRIANGLE_U_TESS_FACTOR, "finalTriUeq0EdgeTessFactor"},
+ {VKD3D_SIV_TRIANGLE_V_TESS_FACTOR, "finalTriVeq0EdgeTessFactor"},
+ {VKD3D_SIV_TRIANGLE_W_TESS_FACTOR, "finalTriWeq0EdgeTessFactor"},
+ {VKD3D_SIV_TRIANGLE_INNER_TESS_FACTOR, "finalTriInsideTessFactor"},
+ {VKD3D_SIV_LINE_DETAIL_TESS_FACTOR, "finalLineDetailTessFactor"},
+ {VKD3D_SIV_LINE_DENSITY_TESS_FACTOR, "finalLineDensityTessFactor"},
+ };
+
for (i = 0; i < ARRAY_SIZE(shader_input_sysval_semantic_names); ++i)
{
- if (shader_input_sysval_semantic_names[i].sysval_semantic == semantic)
- {
- vkd3d_string_buffer_printf(&compiler->buffer, "%s", shader_input_sysval_semantic_names[i].sysval_name);
- return;
- }
+ if (shader_input_sysval_semantic_names[i].sysval_semantic != semantic)
+ continue;
+
+ vkd3d_string_buffer_printf(&compiler->buffer, "%s%s%s",
+ prefix, shader_input_sysval_semantic_names[i].sysval_name, suffix);
+ return;
}
- vkd3d_string_buffer_printf(&compiler->buffer, "unknown_shader_input_sysval_semantic(%#x)", semantic);
+ vkd3d_string_buffer_printf(&compiler->buffer, "%s%s<unhandled input sysval semantic %#x>%s%s",
+ prefix, compiler->colours.error, semantic, compiler->colours.reset, suffix);
}
static void shader_dump_resource_type(struct vkd3d_d3d_asm_compiler *compiler, enum vkd3d_shader_resource_type type)
@@ -704,124 +706,129 @@ static void shader_dump_resource_data_type(struct vkd3d_d3d_asm_compiler *compil
vkd3d_string_buffer_printf(&compiler->buffer, ")");
}
-static void shader_dump_decl_usage(struct vkd3d_d3d_asm_compiler *compiler,
- const struct vkd3d_shader_semantic *semantic, uint32_t flags)
+static void shader_print_dcl_usage(struct vkd3d_d3d_asm_compiler *compiler,
+ const char *prefix, const struct vkd3d_shader_semantic *semantic, uint32_t flags, const char *suffix)
{
struct vkd3d_string_buffer *buffer = &compiler->buffer;
+ unsigned int usage_idx;
+ const char *usage;
+ bool indexed;
if (semantic->resource.reg.reg.type == VKD3DSPR_COMBINED_SAMPLER)
{
switch (semantic->resource_type)
{
case VKD3D_SHADER_RESOURCE_TEXTURE_2D:
- shader_addline(buffer, "_2d");
+ usage = "2d";
break;
-
case VKD3D_SHADER_RESOURCE_TEXTURE_3D:
- shader_addline(buffer, "_volume");
+ usage = "volume";
break;
-
case VKD3D_SHADER_RESOURCE_TEXTURE_CUBE:
- shader_addline(buffer, "_cube");
+ usage = "cube";
break;
-
default:
- shader_addline(buffer, "_unknown_resource_type(%#x)", semantic->resource_type);
- break;
+ vkd3d_string_buffer_printf(buffer, "%s%s<unhandled resource type %#x>%s%s",
+ prefix, compiler->colours.error, semantic->resource_type, compiler->colours.reset, suffix);
+ return;
}
+
+ vkd3d_string_buffer_printf(buffer, "%s%s%s", prefix, usage, suffix);
+ return;
}
- else if (semantic->resource.reg.reg.type == VKD3DSPR_RESOURCE || semantic->resource.reg.reg.type == VKD3DSPR_UAV)
+
+ if (semantic->resource.reg.reg.type == VKD3DSPR_RESOURCE || semantic->resource.reg.reg.type == VKD3DSPR_UAV)
{
+ vkd3d_string_buffer_printf(buffer, "%s", prefix);
if (semantic->resource.reg.reg.type == VKD3DSPR_RESOURCE)
- shader_addline(buffer, "_resource");
+ vkd3d_string_buffer_printf(buffer, "resource_");
- shader_addline(buffer, "_");
shader_dump_resource_type(compiler, semantic->resource_type);
if (semantic->resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_2DMS
|| semantic->resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY)
{
- shader_addline(buffer, "(%u)", semantic->sample_count);
+ vkd3d_string_buffer_printf(buffer, "(%u)", semantic->sample_count);
}
if (semantic->resource.reg.reg.type == VKD3DSPR_UAV)
shader_dump_uav_flags(compiler, flags);
- shader_addline(buffer, " ");
+ vkd3d_string_buffer_printf(buffer, " ");
shader_dump_resource_data_type(compiler, semantic->resource_data_type);
+ vkd3d_string_buffer_printf(buffer, "%s", suffix);
+ return;
}
- else
- {
- /* Pixel shaders 3.0 don't have usage semantics. */
- if (!vkd3d_shader_ver_ge(&compiler->shader_version, 3, 0)
- && compiler->shader_version.type == VKD3D_SHADER_TYPE_PIXEL)
- return;
- else
- shader_addline(buffer, "_");
-
- switch (semantic->usage)
- {
- case VKD3D_DECL_USAGE_POSITION:
- shader_addline(buffer, "position%u", semantic->usage_idx);
- break;
-
- case VKD3D_DECL_USAGE_BLEND_INDICES:
- shader_addline(buffer, "blend");
- break;
-
- case VKD3D_DECL_USAGE_BLEND_WEIGHT:
- shader_addline(buffer, "weight");
- break;
-
- case VKD3D_DECL_USAGE_NORMAL:
- shader_addline(buffer, "normal%u", semantic->usage_idx);
- break;
-
- case VKD3D_DECL_USAGE_PSIZE:
- shader_addline(buffer, "psize");
- break;
-
- case VKD3D_DECL_USAGE_COLOR:
- if (!semantic->usage_idx)
- shader_addline(buffer, "color");
- else
- shader_addline(buffer, "specular%u", (semantic->usage_idx - 1));
- break;
-
- case VKD3D_DECL_USAGE_TEXCOORD:
- shader_addline(buffer, "texcoord%u", semantic->usage_idx);
- break;
-
- case VKD3D_DECL_USAGE_TANGENT:
- shader_addline(buffer, "tangent");
- break;
-
- case VKD3D_DECL_USAGE_BINORMAL:
- shader_addline(buffer, "binormal");
- break;
-
- case VKD3D_DECL_USAGE_TESS_FACTOR:
- shader_addline(buffer, "tessfactor");
- break;
-
- case VKD3D_DECL_USAGE_POSITIONT:
- shader_addline(buffer, "positionT%u", semantic->usage_idx);
- break;
-
- case VKD3D_DECL_USAGE_FOG:
- shader_addline(buffer, "fog");
- break;
- case VKD3D_DECL_USAGE_DEPTH:
- shader_addline(buffer, "depth");
- break;
+ /* Pixel shaders 3.0 don't have usage semantics. */
+ if (!vkd3d_shader_ver_ge(&compiler->shader_version, 3, 0)
+ && compiler->shader_version.type == VKD3D_SHADER_TYPE_PIXEL)
+ return;
- case VKD3D_DECL_USAGE_SAMPLE:
- shader_addline(buffer, "sample");
+ indexed = false;
+ usage_idx = semantic->usage_idx;
+ switch (semantic->usage)
+ {
+ case VKD3D_DECL_USAGE_POSITION:
+ usage = "position";
+ indexed = true;
+ break;
+ case VKD3D_DECL_USAGE_BLEND_INDICES:
+ usage = "blend";
+ break;
+ case VKD3D_DECL_USAGE_BLEND_WEIGHT:
+ usage = "weight";
+ break;
+ case VKD3D_DECL_USAGE_NORMAL:
+ usage = "normal";
+ indexed = true;
+ break;
+ case VKD3D_DECL_USAGE_PSIZE:
+ usage = "psize";
+ break;
+ case VKD3D_DECL_USAGE_COLOR:
+ if (semantic->usage_idx)
+ {
+ usage = "specular";
+ indexed = true;
+ --usage_idx;
break;
-
- default:
- shader_addline(buffer, "<unknown_semantic(%#x)>", semantic->usage);
- FIXME("Unrecognised semantic usage %#x.\n", semantic->usage);
- }
+ }
+ usage = "color";
+ break;
+ case VKD3D_DECL_USAGE_TEXCOORD:
+ usage = "texcoord";
+ indexed = true;
+ break;
+ case VKD3D_DECL_USAGE_TANGENT:
+ usage = "tangent";
+ break;
+ case VKD3D_DECL_USAGE_BINORMAL:
+ usage = "binormal";
+ break;
+ case VKD3D_DECL_USAGE_TESS_FACTOR:
+ usage = "tessfactor";
+ break;
+ case VKD3D_DECL_USAGE_POSITIONT:
+ usage = "positionT";
+ indexed = true;
+ break;
+ case VKD3D_DECL_USAGE_FOG:
+ usage = "fog";
+ break;
+ case VKD3D_DECL_USAGE_DEPTH:
+ usage = "depth";
+ break;
+ case VKD3D_DECL_USAGE_SAMPLE:
+ usage = "sample";
+ break;
+ default:
+ vkd3d_string_buffer_printf(buffer, "%s%s<unhandled usage %#x, index %u>%s%s",
+ prefix, compiler->colours.error, semantic->usage, usage_idx, compiler->colours.reset, suffix);
+ return;
}
+
+ if (indexed)
+ vkd3d_string_buffer_printf(buffer, "%s%s%u%s", prefix, usage, usage_idx, suffix);
+ else
+ vkd3d_string_buffer_printf(buffer, "%s%s%s", prefix, usage, suffix);
}
static void shader_print_src_param(struct vkd3d_d3d_asm_compiler *compiler,
@@ -937,8 +944,8 @@ static void shader_print_subscript_range(struct vkd3d_d3d_asm_compiler *compiler
vkd3d_string_buffer_printf(&compiler->buffer, "*]");
}
-static void shader_dump_register(struct vkd3d_d3d_asm_compiler *compiler, const struct vkd3d_shader_register *reg,
- bool is_declaration)
+static void shader_print_register(struct vkd3d_d3d_asm_compiler *compiler, const char *prefix,
+ const struct vkd3d_shader_register *reg, bool is_declaration, const char *suffix)
{
struct vkd3d_string_buffer *buffer = &compiler->buffer;
unsigned int offset = reg->idx[0].offset;
@@ -947,22 +954,23 @@ static void shader_dump_register(struct vkd3d_d3d_asm_compiler *compiler, const
static const char * const rastout_reg_names[] = {"oPos", "oFog", "oPts"};
static const char * const misctype_reg_names[] = {"vPos", "vFace"};
- shader_addline(buffer, "%s", reg->type == VKD3DSPR_LABEL ? compiler->colours.label : compiler->colours.reg);
+ vkd3d_string_buffer_printf(buffer, "%s%s", prefix,
+ reg->type == VKD3DSPR_LABEL ? compiler->colours.label : compiler->colours.reg);
switch (reg->type)
{
case VKD3DSPR_TEMP:
- shader_addline(buffer, "r");
+ vkd3d_string_buffer_printf(buffer, "r");
break;
case VKD3DSPR_INPUT:
- shader_addline(buffer, "v");
+ vkd3d_string_buffer_printf(buffer, "v");
break;
case VKD3DSPR_CONST:
case VKD3DSPR_CONST2:
case VKD3DSPR_CONST3:
case VKD3DSPR_CONST4:
- shader_addline(buffer, "c");
+ vkd3d_string_buffer_printf(buffer, "c");
offset = shader_get_float_offset(reg->type, offset);
break;
@@ -972,205 +980,202 @@ static void shader_dump_register(struct vkd3d_d3d_asm_compiler *compiler, const
break;
case VKD3DSPR_RASTOUT:
- shader_addline(buffer, "%s", rastout_reg_names[offset]);
+ vkd3d_string_buffer_printf(buffer, "%s", rastout_reg_names[offset]);
break;
case VKD3DSPR_COLOROUT:
- shader_addline(buffer, "oC");
+ vkd3d_string_buffer_printf(buffer, "oC");
break;
case VKD3DSPR_DEPTHOUT:
- shader_addline(buffer, "oDepth");
+ vkd3d_string_buffer_printf(buffer, "oDepth");
break;
case VKD3DSPR_DEPTHOUTGE:
- shader_addline(buffer, "oDepthGE");
+ vkd3d_string_buffer_printf(buffer, "oDepthGE");
break;
case VKD3DSPR_DEPTHOUTLE:
- shader_addline(buffer, "oDepthLE");
+ vkd3d_string_buffer_printf(buffer, "oDepthLE");
break;
case VKD3DSPR_ATTROUT:
- shader_addline(buffer, "oD");
+ vkd3d_string_buffer_printf(buffer, "oD");
break;
case VKD3DSPR_TEXCRDOUT:
/* Vertex shaders >= 3.0 use general purpose output registers
* (VKD3DSPR_OUTPUT), which can include an address token. */
if (vkd3d_shader_ver_ge(&compiler->shader_version, 3, 0))
- shader_addline(buffer, "o");
+ vkd3d_string_buffer_printf(buffer, "o");
else
- shader_addline(buffer, "oT");
+ vkd3d_string_buffer_printf(buffer, "oT");
break;
case VKD3DSPR_CONSTINT:
- shader_addline(buffer, "i");
+ vkd3d_string_buffer_printf(buffer, "i");
break;
case VKD3DSPR_CONSTBOOL:
- shader_addline(buffer, "b");
+ vkd3d_string_buffer_printf(buffer, "b");
break;
case VKD3DSPR_LABEL:
- shader_addline(buffer, "l");
+ vkd3d_string_buffer_printf(buffer, "l");
break;
case VKD3DSPR_LOOP:
- shader_addline(buffer, "aL");
+ vkd3d_string_buffer_printf(buffer, "aL");
break;
case VKD3DSPR_COMBINED_SAMPLER:
case VKD3DSPR_SAMPLER:
- shader_addline(buffer, "s");
+ vkd3d_string_buffer_printf(buffer, "s");
is_descriptor = true;
break;
case VKD3DSPR_MISCTYPE:
if (offset > 1)
- {
- FIXME("Unhandled misctype register %u.\n", offset);
- shader_addline(buffer, "<unhandled misctype %#x>", offset);
- }
+ vkd3d_string_buffer_printf(buffer, "%s<unhandled misctype %#x>%s",
+ compiler->colours.error, offset, compiler->colours.reset);
else
- {
- shader_addline(buffer, "%s", misctype_reg_names[offset]);
- }
+ vkd3d_string_buffer_printf(buffer, "%s", misctype_reg_names[offset]);
break;
case VKD3DSPR_PREDICATE:
- shader_addline(buffer, "p");
+ vkd3d_string_buffer_printf(buffer, "p");
break;
case VKD3DSPR_IMMCONST:
- shader_addline(buffer, "l");
+ vkd3d_string_buffer_printf(buffer, "l");
break;
case VKD3DSPR_IMMCONST64:
- shader_addline(buffer, "d");
+ vkd3d_string_buffer_printf(buffer, "d");
break;
case VKD3DSPR_CONSTBUFFER:
- shader_addline(buffer, "cb");
+ vkd3d_string_buffer_printf(buffer, "cb");
is_descriptor = true;
break;
case VKD3DSPR_IMMCONSTBUFFER:
- shader_addline(buffer, "icb");
+ vkd3d_string_buffer_printf(buffer, "icb");
break;
case VKD3DSPR_PRIMID:
- shader_addline(buffer, "primID");
+ vkd3d_string_buffer_printf(buffer, "primID");
break;
case VKD3DSPR_NULL:
- shader_addline(buffer, "null");
+ vkd3d_string_buffer_printf(buffer, "null");
break;
case VKD3DSPR_RASTERIZER:
- shader_addline(buffer, "rasterizer");
+ vkd3d_string_buffer_printf(buffer, "rasterizer");
break;
case VKD3DSPR_RESOURCE:
- shader_addline(buffer, "t");
+ vkd3d_string_buffer_printf(buffer, "t");
is_descriptor = true;
break;
case VKD3DSPR_UAV:
- shader_addline(buffer, "u");
+ vkd3d_string_buffer_printf(buffer, "u");
is_descriptor = true;
break;
case VKD3DSPR_OUTPOINTID:
- shader_addline(buffer, "vOutputControlPointID");
+ vkd3d_string_buffer_printf(buffer, "vOutputControlPointID");
break;
case VKD3DSPR_FORKINSTID:
- shader_addline(buffer, "vForkInstanceId");
+ vkd3d_string_buffer_printf(buffer, "vForkInstanceId");
break;
case VKD3DSPR_JOININSTID:
- shader_addline(buffer, "vJoinInstanceId");
+ vkd3d_string_buffer_printf(buffer, "vJoinInstanceId");
break;
case VKD3DSPR_INCONTROLPOINT:
- shader_addline(buffer, "vicp");
+ vkd3d_string_buffer_printf(buffer, "vicp");
break;
case VKD3DSPR_OUTCONTROLPOINT:
- shader_addline(buffer, "vocp");
+ vkd3d_string_buffer_printf(buffer, "vocp");
break;
case VKD3DSPR_PATCHCONST:
- shader_addline(buffer, "vpc");
+ vkd3d_string_buffer_printf(buffer, "vpc");
break;
case VKD3DSPR_TESSCOORD:
- shader_addline(buffer, "vDomainLocation");
+ vkd3d_string_buffer_printf(buffer, "vDomainLocation");
break;
case VKD3DSPR_GROUPSHAREDMEM:
- shader_addline(buffer, "g");
+ vkd3d_string_buffer_printf(buffer, "g");
break;
case VKD3DSPR_THREADID:
- shader_addline(buffer, "vThreadID");
+ vkd3d_string_buffer_printf(buffer, "vThreadID");
break;
case VKD3DSPR_THREADGROUPID:
- shader_addline(buffer, "vThreadGroupID");
+ vkd3d_string_buffer_printf(buffer, "vThreadGroupID");
break;
case VKD3DSPR_LOCALTHREADID:
- shader_addline(buffer, "vThreadIDInGroup");
+ vkd3d_string_buffer_printf(buffer, "vThreadIDInGroup");
break;
case VKD3DSPR_LOCALTHREADINDEX:
- shader_addline(buffer, "vThreadIDInGroupFlattened");
+ vkd3d_string_buffer_printf(buffer, "vThreadIDInGroupFlattened");
break;
case VKD3DSPR_IDXTEMP:
- shader_addline(buffer, "x");
+ vkd3d_string_buffer_printf(buffer, "x");
break;
case VKD3DSPR_STREAM:
- shader_addline(buffer, "m");
+ vkd3d_string_buffer_printf(buffer, "m");
break;
case VKD3DSPR_FUNCTIONBODY:
- shader_addline(buffer, "fb");
+ vkd3d_string_buffer_printf(buffer, "fb");
break;
case VKD3DSPR_FUNCTIONPOINTER:
- shader_addline(buffer, "fp");
+ vkd3d_string_buffer_printf(buffer, "fp");
break;
case VKD3DSPR_COVERAGE:
- shader_addline(buffer, "vCoverage");
+ vkd3d_string_buffer_printf(buffer, "vCoverage");
break;
case VKD3DSPR_SAMPLEMASK:
- shader_addline(buffer, "oMask");
+ vkd3d_string_buffer_printf(buffer, "oMask");
break;
case VKD3DSPR_GSINSTID:
- shader_addline(buffer, "vGSInstanceID");
+ vkd3d_string_buffer_printf(buffer, "vGSInstanceID");
break;
case VKD3DSPR_OUTSTENCILREF:
- shader_addline(buffer, "oStencilRef");
+ vkd3d_string_buffer_printf(buffer, "oStencilRef");
break;
case VKD3DSPR_UNDEF:
- shader_addline(buffer, "undef");
+ vkd3d_string_buffer_printf(buffer, "undef");
break;
case VKD3DSPR_SSA:
- shader_addline(buffer, "sr");
+ vkd3d_string_buffer_printf(buffer, "sr");
break;
default:
- shader_addline(buffer, "<unhandled_rtype(%#x)>", reg->type);
+ vkd3d_string_buffer_printf(buffer, "%s<unhandled register type %#x>%s",
+ compiler->colours.error, reg->type, compiler->colours.reset);
break;
}
@@ -1189,7 +1194,7 @@ static void shader_dump_register(struct vkd3d_d3d_asm_compiler *compiler, const
break;
}
- shader_addline(buffer, "%s(", compiler->colours.reset);
+ vkd3d_string_buffer_printf(buffer, "%s(", compiler->colours.reset);
switch (reg->dimension)
{
case VSIR_DIMENSION_SCALAR:
@@ -1210,7 +1215,8 @@ static void shader_dump_register(struct vkd3d_d3d_asm_compiler *compiler, const
shader_print_uint_literal(compiler, "", reg->u.immconst_u32[0], "");
break;
default:
- shader_addline(buffer, "<unhandled data type %#x>", reg->data_type);
+ vkd3d_string_buffer_printf(buffer, "%s<unhandled data type %#x>%s",
+ compiler->colours.error, reg->data_type, compiler->colours.reset);
break;
}
break;
@@ -1249,20 +1255,22 @@ static void shader_dump_register(struct vkd3d_d3d_asm_compiler *compiler, const
shader_print_uint_literal(compiler, ", ", reg->u.immconst_u32[3], "");
break;
default:
- shader_addline(buffer, "<unhandled data type %#x>", reg->data_type);
+ vkd3d_string_buffer_printf(buffer, "%s<unhandled data type %#x>%s",
+ compiler->colours.error, reg->data_type, compiler->colours.reset);
break;
}
break;
default:
- shader_addline(buffer, "<unhandled immconst dimension %#x>", reg->dimension);
+ vkd3d_string_buffer_printf(buffer, "%s<unhandled immconst dimension %#x>%s",
+ compiler->colours.error, reg->dimension, compiler->colours.reset);
break;
}
- shader_addline(buffer, ")");
+ vkd3d_string_buffer_printf(buffer, ")");
}
else if (reg->type == VKD3DSPR_IMMCONST64)
{
- shader_addline(buffer, "%s(", compiler->colours.reset);
+ vkd3d_string_buffer_printf(buffer, "%s(", compiler->colours.reset);
/* A double2 vector is treated as a float4 vector in enum vsir_dimension. */
if (reg->dimension == VSIR_DIMENSION_SCALAR || reg->dimension == VSIR_DIMENSION_VEC4)
{
@@ -1280,14 +1288,16 @@ static void shader_dump_register(struct vkd3d_d3d_asm_compiler *compiler, const
}
else
{
- shader_addline(buffer, "<unhandled data type %#x>", reg->data_type);
+ vkd3d_string_buffer_printf(buffer, "%s<unhandled data type %#x>%s",
+ compiler->colours.error, reg->data_type, compiler->colours.reset);
}
}
else
{
- shader_addline(buffer, "<unhandled immconst64 dimension %#x>", reg->dimension);
+ vkd3d_string_buffer_printf(buffer, "%s<unhandled immconst64 dimension %#x>%s",
+ compiler->colours.error, reg->dimension, compiler->colours.reset);
}
- shader_addline(buffer, ")");
+ vkd3d_string_buffer_printf(buffer, ")");
}
else if (reg->type != VKD3DSPR_RASTOUT
&& reg->type != VKD3DSPR_MISCTYPE
@@ -1331,7 +1341,7 @@ static void shader_dump_register(struct vkd3d_d3d_asm_compiler *compiler, const
}
else
{
- shader_addline(buffer, "%s", compiler->colours.reset);
+ vkd3d_string_buffer_printf(buffer, "%s", compiler->colours.reset);
}
if (reg->type == VKD3DSPR_FUNCTIONPOINTER)
@@ -1339,8 +1349,9 @@ static void shader_dump_register(struct vkd3d_d3d_asm_compiler *compiler, const
}
else
{
- shader_addline(buffer, "%s", compiler->colours.reset);
+ vkd3d_string_buffer_printf(buffer, "%s", compiler->colours.reset);
}
+ vkd3d_string_buffer_printf(buffer, "%s", suffix);
}
static void shader_print_precision(struct vkd3d_d3d_asm_compiler *compiler, const struct vkd3d_shader_register *reg)
@@ -1384,8 +1395,8 @@ static void shader_print_non_uniform(struct vkd3d_d3d_asm_compiler *compiler, co
compiler->colours.modifier, compiler->colours.reset);
}
-static void shader_dump_reg_type(struct vkd3d_d3d_asm_compiler *compiler,
- const struct vkd3d_shader_register *reg)
+static void shader_print_reg_type(struct vkd3d_d3d_asm_compiler *compiler,
+ const char *prefix, const struct vkd3d_shader_register *reg, const char *suffix)
{
static const char *dimensions[] =
{
@@ -1398,7 +1409,10 @@ static void shader_dump_reg_type(struct vkd3d_d3d_asm_compiler *compiler,
const char *dimension;
if (!(compiler->flags & VSIR_ASM_FLAG_DUMP_TYPES))
+ {
+ vkd3d_string_buffer_printf(buffer, "%s%s", prefix, suffix);
return;
+ }
if (reg->data_type == VKD3D_DATA_UNUSED)
return;
@@ -1408,9 +1422,9 @@ static void shader_dump_reg_type(struct vkd3d_d3d_asm_compiler *compiler,
else
dimension = "??";
- shader_addline(buffer, " <%s", dimension);
+ vkd3d_string_buffer_printf(buffer, "%s <%s", prefix, dimension);
shader_dump_data_type(compiler, reg->data_type);
- shader_addline(buffer, ">");
+ vkd3d_string_buffer_printf(buffer, ">%s", suffix);
}
static void shader_print_write_mask(struct vkd3d_d3d_asm_compiler *compiler,
@@ -1444,8 +1458,7 @@ static void shader_print_dst_param(struct vkd3d_d3d_asm_compiler *compiler,
{
uint32_t write_mask = param->write_mask;
- vkd3d_string_buffer_printf(&compiler->buffer, "%s", prefix);
- shader_dump_register(compiler, &param->reg, is_declaration);
+ shader_print_register(compiler, prefix, &param->reg, is_declaration, "");
if (write_mask && param->reg.dimension == VSIR_DIMENSION_VEC4)
{
@@ -1457,8 +1470,7 @@ static void shader_print_dst_param(struct vkd3d_d3d_asm_compiler *compiler,
shader_print_precision(compiler, &param->reg);
shader_print_non_uniform(compiler, &param->reg);
- shader_dump_reg_type(compiler, &param->reg);
- vkd3d_string_buffer_printf(&compiler->buffer, "%s", suffix);
+ shader_print_reg_type(compiler, "", &param->reg, suffix);
}
static void shader_print_src_param(struct vkd3d_d3d_asm_compiler *compiler,
@@ -1468,6 +1480,7 @@ static void shader_print_src_param(struct vkd3d_d3d_asm_compiler *compiler,
struct vkd3d_string_buffer *buffer = &compiler->buffer;
uint32_t swizzle = param->swizzle;
const char *modifier = "";
+ bool is_abs = false;
if (src_modifier == VKD3DSPSM_NEG
|| src_modifier == VKD3DSPSM_BIASNEG
@@ -1482,9 +1495,9 @@ static void shader_print_src_param(struct vkd3d_d3d_asm_compiler *compiler,
vkd3d_string_buffer_printf(buffer, "%s%s", prefix, modifier);
if (src_modifier == VKD3DSPSM_ABS || src_modifier == VKD3DSPSM_ABSNEG)
- vkd3d_string_buffer_printf(buffer, "|");
+ is_abs = true;
- shader_dump_register(compiler, &param->reg, false);
+ shader_print_register(compiler, is_abs ? "|" : "", &param->reg, false, "");
switch (src_modifier)
{
@@ -1543,13 +1556,12 @@ static void shader_print_src_param(struct vkd3d_d3d_asm_compiler *compiler,
swizzle_chars[swizzle_z], swizzle_chars[swizzle_w], compiler->colours.reset);
}
- if (src_modifier == VKD3DSPSM_ABS || src_modifier == VKD3DSPSM_ABSNEG)
+ if (is_abs)
vkd3d_string_buffer_printf(buffer, "|");
shader_print_precision(compiler, &param->reg);
shader_print_non_uniform(compiler, &param->reg);
- shader_dump_reg_type(compiler, &param->reg);
- vkd3d_string_buffer_printf(buffer, "%s", suffix);
+ shader_print_reg_type(compiler, "", &param->reg, suffix);
}
static void shader_dump_ins_modifiers(struct vkd3d_d3d_asm_compiler *compiler,
@@ -1578,87 +1590,93 @@ static void shader_dump_ins_modifiers(struct vkd3d_d3d_asm_compiler *compiler,
if (mmask) FIXME("Unrecognised modifier %#x.\n", mmask);
}
-static void shader_dump_primitive_type(struct vkd3d_d3d_asm_compiler *compiler,
- const struct vkd3d_shader_primitive_type *primitive_type)
+static void shader_print_primitive_type(struct vkd3d_d3d_asm_compiler *compiler,
+ const char *prefix, const struct vkd3d_shader_primitive_type *p, const char *suffix)
{
struct vkd3d_string_buffer *buffer = &compiler->buffer;
+ const char *primitive_type;
- switch (primitive_type->type)
+ switch (p->type)
{
case VKD3D_PT_UNDEFINED:
- shader_addline(buffer, "undefined");
+ primitive_type = "undefined";
break;
case VKD3D_PT_POINTLIST:
- shader_addline(buffer, "pointlist");
+ primitive_type = "pointlist";
break;
case VKD3D_PT_LINELIST:
- shader_addline(buffer, "linelist");
+ primitive_type = "linelist";
break;
case VKD3D_PT_LINESTRIP:
- shader_addline(buffer, "linestrip");
+ primitive_type = "linestrip";
break;
case VKD3D_PT_TRIANGLELIST:
- shader_addline(buffer, "trianglelist");
+ primitive_type = "trianglelist";
break;
case VKD3D_PT_TRIANGLESTRIP:
- shader_addline(buffer, "trianglestrip");
+ primitive_type = "trianglestrip";
break;
case VKD3D_PT_TRIANGLEFAN:
- shader_addline(buffer, "trianglefan");
+ primitive_type = "trianglefan";
break;
case VKD3D_PT_LINELIST_ADJ:
- shader_addline(buffer, "linelist_adj");
+ primitive_type = "linelist_adj";
break;
case VKD3D_PT_LINESTRIP_ADJ:
- shader_addline(buffer, "linestrip_adj");
+ primitive_type = "linestrip_adj";
break;
case VKD3D_PT_TRIANGLELIST_ADJ:
- shader_addline(buffer, "trianglelist_adj");
+ primitive_type = "trianglelist_adj";
break;
case VKD3D_PT_TRIANGLESTRIP_ADJ:
- shader_addline(buffer, "trianglestrip_adj");
+ primitive_type = "trianglestrip_adj";
break;
case VKD3D_PT_PATCH:
- shader_addline(buffer, "patch%u", primitive_type->patch_vertex_count);
- break;
+ vkd3d_string_buffer_printf(buffer, "%spatch%u%s", prefix, p->patch_vertex_count, suffix);
+ return;
default:
- shader_addline(buffer, "<unrecognized_primitive_type %#x>", primitive_type->type);
- break;
+ vkd3d_string_buffer_printf(buffer, "%s%s<unhandled primitive type %#x>%s%s",
+ prefix, compiler->colours.error, p->type, compiler->colours.reset, suffix);
+ return;
}
+ vkd3d_string_buffer_printf(buffer, "%s%s%s", prefix, primitive_type, suffix);
}
-static void shader_dump_interpolation_mode(struct vkd3d_d3d_asm_compiler *compiler,
- enum vkd3d_shader_interpolation_mode interpolation_mode)
+static void shader_print_interpolation_mode(struct vkd3d_d3d_asm_compiler *compiler,
+ const char *prefix, enum vkd3d_shader_interpolation_mode m, const char *suffix)
{
struct vkd3d_string_buffer *buffer = &compiler->buffer;
+ const char *mode;
- switch (interpolation_mode)
+ switch (m)
{
case VKD3DSIM_CONSTANT:
- shader_addline(buffer, "constant");
+ mode = "constant";
break;
case VKD3DSIM_LINEAR:
- shader_addline(buffer, "linear");
+ mode = "linear";
break;
case VKD3DSIM_LINEAR_CENTROID:
- shader_addline(buffer, "linear centroid");
+ mode = "linear centroid";
break;
case VKD3DSIM_LINEAR_NOPERSPECTIVE:
- shader_addline(buffer, "linear noperspective");
+ mode = "linear noperspective";
break;
case VKD3DSIM_LINEAR_SAMPLE:
- shader_addline(buffer, "linear sample");
+ mode = "linear sample";
break;
case VKD3DSIM_LINEAR_NOPERSPECTIVE_CENTROID:
- shader_addline(buffer, "linear noperspective centroid");
+ mode = "linear noperspective centroid";
break;
case VKD3DSIM_LINEAR_NOPERSPECTIVE_SAMPLE:
- shader_addline(buffer, "linear noperspective sample");
+ mode = "linear noperspective sample";
break;
default:
- shader_addline(buffer, "<unrecognized_interpolation_mode %#x>", interpolation_mode);
- break;
+ vkd3d_string_buffer_printf(buffer, "%s%s<unhandled interpolation mode %#x>%s%s",
+ prefix, compiler->colours.error, m, compiler->colours.reset, suffix);
+ return;
}
+ vkd3d_string_buffer_printf(buffer, "%s%s%s", prefix, mode, suffix);
}
const char *shader_get_type_prefix(enum vkd3d_shader_type type)
@@ -1849,16 +1867,15 @@ static void shader_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler,
case VKD3DSIH_DCL:
case VKD3DSIH_DCL_UAV_TYPED:
vkd3d_string_buffer_printf(buffer, "%s", compiler->colours.opcode);
- shader_dump_decl_usage(compiler, &ins->declaration.semantic, ins->flags);
+ shader_print_dcl_usage(compiler, "_", &ins->declaration.semantic, ins->flags, "");
shader_dump_ins_modifiers(compiler, &ins->declaration.semantic.resource.reg);
- vkd3d_string_buffer_printf(buffer, "%s ", compiler->colours.reset);
- shader_dump_register(compiler, &ins->declaration.semantic.resource.reg.reg, true);
+ vkd3d_string_buffer_printf(buffer, "%s", compiler->colours.reset);
+ shader_print_register(compiler, " ", &ins->declaration.semantic.resource.reg.reg, true, "");
shader_dump_register_space(compiler, ins->declaration.semantic.resource.range.space);
break;
case VKD3DSIH_DCL_CONSTANT_BUFFER:
- vkd3d_string_buffer_printf(buffer, " ");
- shader_dump_register(compiler, &ins->declaration.cb.src.reg, true);
+ shader_print_register(compiler, " ", &ins->declaration.cb.src.reg, true, "");
if (vkd3d_shader_ver_ge(&compiler->shader_version, 6, 0))
shader_print_subscript(compiler, ins->declaration.cb.size, NULL);
else if (vkd3d_shader_ver_ge(&compiler->shader_version, 5, 1))
@@ -1906,8 +1923,7 @@ static void shader_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler,
break;
case VKD3DSIH_DCL_INPUT_PS:
- vkd3d_string_buffer_printf(buffer, " ");
- shader_dump_interpolation_mode(compiler, ins->flags);
+ shader_print_interpolation_mode(compiler, " ", ins->flags, "");
shader_print_dst_param(compiler, " ", &ins->declaration.dst, true, "");
break;
@@ -1916,16 +1932,13 @@ static void shader_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler,
case VKD3DSIH_DCL_INPUT_SIV:
case VKD3DSIH_DCL_OUTPUT_SIV:
shader_print_dst_param(compiler, " ", &ins->declaration.register_semantic.reg, true, "");
- shader_addline(buffer, ", ");
- shader_dump_shader_input_sysval_semantic(compiler, ins->declaration.register_semantic.sysval_semantic);
+ shader_print_input_sysval_semantic(compiler, ", ", ins->declaration.register_semantic.sysval_semantic, "");
break;
case VKD3DSIH_DCL_INPUT_PS_SIV:
- vkd3d_string_buffer_printf(buffer, " ");
- shader_dump_interpolation_mode(compiler, ins->flags);
+ shader_print_interpolation_mode(compiler, " ", ins->flags, "");
shader_print_dst_param(compiler, " ", &ins->declaration.register_semantic.reg, true, "");
- shader_addline(buffer, ", ");
- shader_dump_shader_input_sysval_semantic(compiler, ins->declaration.register_semantic.sysval_semantic);
+ shader_print_input_sysval_semantic(compiler, ", ", ins->declaration.register_semantic.sysval_semantic, "");
break;
case VKD3DSIH_DCL_INPUT:
@@ -1935,8 +1948,7 @@ static void shader_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler,
case VKD3DSIH_DCL_INPUT_PRIMITIVE:
case VKD3DSIH_DCL_OUTPUT_TOPOLOGY:
- vkd3d_string_buffer_printf(buffer, " ");
- shader_dump_primitive_type(compiler, &ins->declaration.primitive_type);
+ shader_print_primitive_type(compiler, " ", &ins->declaration.primitive_type, "");
break;
case VKD3DSIH_DCL_INTERFACE:
@@ -1958,10 +1970,8 @@ static void shader_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler,
break;
case VKD3DSIH_DCL_SAMPLER:
- vkd3d_string_buffer_printf(buffer, " ");
- shader_dump_register(compiler, &ins->declaration.sampler.src.reg, true);
- if (ins->flags == VKD3DSI_SAMPLER_COMPARISON_MODE)
- shader_addline(buffer, ", comparisonMode");
+ shader_print_register(compiler, " ", &ins->declaration.sampler.src.reg, true,
+ ins->flags == VKD3DSI_SAMPLER_COMPARISON_MODE ? ", comparisonMode" : "");
shader_dump_register_space(compiler, ins->declaration.sampler.range.space);
break;
@@ -2354,6 +2364,10 @@ enum vkd3d_result d3d_asm_compile(const struct vsir_program *program,
break;
case VKD3DSIH_LABEL:
+ case VKD3DSIH_HS_DECLS:
+ case VKD3DSIH_HS_CONTROL_POINT_PHASE:
+ case VKD3DSIH_HS_FORK_PHASE:
+ case VKD3DSIH_HS_JOIN_PHASE:
indent = 0;
break;
diff --git a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c
index 099729fbb6c..4685afa082d 100644
--- a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c
+++ b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c
@@ -1522,63 +1522,94 @@ D3DXPARAMETER_CLASS hlsl_sm1_class(const struct hlsl_type *type)
D3DXPARAMETER_TYPE hlsl_sm1_base_type(const struct hlsl_type *type)
{
- switch (type->base_type)
+ switch (type->class)
{
- case HLSL_TYPE_BOOL:
- return D3DXPT_BOOL;
- case HLSL_TYPE_FLOAT:
- case HLSL_TYPE_HALF:
- return D3DXPT_FLOAT;
- case HLSL_TYPE_INT:
- case HLSL_TYPE_UINT:
- return D3DXPT_INT;
- case HLSL_TYPE_PIXELSHADER:
- return D3DXPT_PIXELSHADER;
- case HLSL_TYPE_SAMPLER:
- switch (type->sampler_dim)
+ case HLSL_CLASS_SCALAR:
+ case HLSL_CLASS_VECTOR:
+ case HLSL_CLASS_MATRIX:
+ switch (type->base_type)
{
- case HLSL_SAMPLER_DIM_1D:
- return D3DXPT_SAMPLER1D;
- case HLSL_SAMPLER_DIM_2D:
- return D3DXPT_SAMPLER2D;
- case HLSL_SAMPLER_DIM_3D:
- return D3DXPT_SAMPLER3D;
- case HLSL_SAMPLER_DIM_CUBE:
- return D3DXPT_SAMPLERCUBE;
- case HLSL_SAMPLER_DIM_GENERIC:
- return D3DXPT_SAMPLER;
+ case HLSL_TYPE_BOOL:
+ return D3DXPT_BOOL;
+ /* Actually double behaves differently depending on DLL version:
+ * For <= 36, it maps to D3DXPT_FLOAT.
+ * For 37-40, it maps to zero (D3DXPT_VOID).
+ * For >= 41, it maps to 39, which is D3D_SVT_DOUBLE (note D3D_SVT_*
+ * values are mostly compatible with D3DXPT_*).
+ * However, the latter two cases look like bugs, and a reasonable
+ * application certainly wouldn't know what to do with them.
+ * For fx_2_0 it's always D3DXPT_FLOAT regardless of DLL version. */
+ case HLSL_TYPE_DOUBLE:
+ case HLSL_TYPE_FLOAT:
+ case HLSL_TYPE_HALF:
+ return D3DXPT_FLOAT;
+ case HLSL_TYPE_INT:
+ case HLSL_TYPE_UINT:
+ return D3DXPT_INT;
default:
- ERR("Invalid dimension %#x.\n", type->sampler_dim);
vkd3d_unreachable();
}
- break;
- case HLSL_TYPE_STRING:
- return D3DXPT_STRING;
- case HLSL_TYPE_TEXTURE:
- switch (type->sampler_dim)
+
+ case HLSL_CLASS_OBJECT:
+ switch (type->base_type)
{
- case HLSL_SAMPLER_DIM_1D:
- return D3DXPT_TEXTURE1D;
- case HLSL_SAMPLER_DIM_2D:
- return D3DXPT_TEXTURE2D;
- case HLSL_SAMPLER_DIM_3D:
- return D3DXPT_TEXTURE3D;
- case HLSL_SAMPLER_DIM_CUBE:
- return D3DXPT_TEXTURECUBE;
- case HLSL_SAMPLER_DIM_GENERIC:
- return D3DXPT_TEXTURE;
+ case HLSL_TYPE_PIXELSHADER:
+ return D3DXPT_PIXELSHADER;
+ case HLSL_TYPE_SAMPLER:
+ switch (type->sampler_dim)
+ {
+ case HLSL_SAMPLER_DIM_1D:
+ return D3DXPT_SAMPLER1D;
+ case HLSL_SAMPLER_DIM_2D:
+ return D3DXPT_SAMPLER2D;
+ case HLSL_SAMPLER_DIM_3D:
+ return D3DXPT_SAMPLER3D;
+ case HLSL_SAMPLER_DIM_CUBE:
+ return D3DXPT_SAMPLERCUBE;
+ case HLSL_SAMPLER_DIM_GENERIC:
+ return D3DXPT_SAMPLER;
+ default:
+ ERR("Invalid dimension %#x.\n", type->sampler_dim);
+ vkd3d_unreachable();
+ }
+ break;
+ case HLSL_TYPE_STRING:
+ return D3DXPT_STRING;
+ case HLSL_TYPE_TEXTURE:
+ switch (type->sampler_dim)
+ {
+ case HLSL_SAMPLER_DIM_1D:
+ return D3DXPT_TEXTURE1D;
+ case HLSL_SAMPLER_DIM_2D:
+ return D3DXPT_TEXTURE2D;
+ case HLSL_SAMPLER_DIM_3D:
+ return D3DXPT_TEXTURE3D;
+ case HLSL_SAMPLER_DIM_CUBE:
+ return D3DXPT_TEXTURECUBE;
+ case HLSL_SAMPLER_DIM_GENERIC:
+ return D3DXPT_TEXTURE;
+ default:
+ ERR("Invalid dimension %#x.\n", type->sampler_dim);
+ vkd3d_unreachable();
+ }
+ break;
+ case HLSL_TYPE_VERTEXSHADER:
+ return D3DXPT_VERTEXSHADER;
+ case HLSL_TYPE_VOID:
+ return D3DXPT_VOID;
default:
- ERR("Invalid dimension %#x.\n", type->sampler_dim);
vkd3d_unreachable();
}
- break;
- case HLSL_TYPE_VERTEXSHADER:
- return D3DXPT_VERTEXSHADER;
- case HLSL_TYPE_VOID:
- return D3DXPT_VOID;
- default:
vkd3d_unreachable();
+
+ case HLSL_CLASS_ARRAY:
+ return hlsl_sm1_base_type(type->e.array.type);
+
+ case HLSL_CLASS_STRUCT:
+ return D3DXPT_VOID;
}
+
+ vkd3d_unreachable();
}
static void write_sm1_type(struct vkd3d_bytecode_buffer *buffer, struct hlsl_type *type, unsigned int ctab_start)
@@ -2572,19 +2603,11 @@ static void write_sm1_instructions(struct hlsl_ctx *ctx, struct vkd3d_bytecode_b
{
if (instr->data_type)
{
- if (instr->data_type->class == HLSL_CLASS_MATRIX)
+ if (instr->data_type->class != HLSL_CLASS_SCALAR && instr->data_type->class != HLSL_CLASS_VECTOR)
{
- /* These need to be lowered. */
- hlsl_fixme(ctx, &instr->loc, "SM1 matrix expression.");
- continue;
- }
- else if (instr->data_type->class == HLSL_CLASS_OBJECT)
- {
- hlsl_fixme(ctx, &instr->loc, "Object copy.");
+ hlsl_fixme(ctx, &instr->loc, "Class %#x should have been lowered or removed.", instr->data_type->class);
break;
}
-
- assert(instr->data_type->class == HLSL_CLASS_SCALAR || instr->data_type->class == HLSL_CLASS_VECTOR);
}
switch (instr->type)
diff --git a/libs/vkd3d/libs/vkd3d-shader/dxil.c b/libs/vkd3d/libs/vkd3d-shader/dxil.c
index b5a61d99d3f..da8ba662dbc 100644
--- a/libs/vkd3d/libs/vkd3d-shader/dxil.c
+++ b/libs/vkd3d/libs/vkd3d-shader/dxil.c
@@ -404,6 +404,7 @@ enum dx_intrinsic_opcode
DX_ATOMIC_BINOP = 78,
DX_ATOMIC_CMP_XCHG = 79,
DX_BARRIER = 80,
+ DX_CALCULATE_LOD = 81,
DX_DISCARD = 82,
DX_DERIV_COARSEX = 83,
DX_DERIV_COARSEY = 84,
@@ -2885,6 +2886,122 @@ static enum vkd3d_result value_allocate_constant_array(struct sm6_value *dst, co
return VKD3D_OK;
}
+static enum vkd3d_result sm6_parser_init_constexpr_gep(struct sm6_parser *sm6, const struct dxil_record *record,
+ struct sm6_value *dst)
+{
+ const struct sm6_type *elem_type, *pointee_type, *gep_type, *ptr_type;
+ struct sm6_value *operands[3];
+ unsigned int i, j, offset;
+ uint64_t value;
+
+ i = 0;
+ pointee_type = (record->operand_count & 1) ? sm6_parser_get_type(sm6, record->operands[i++]) : NULL;
+
+ if (!dxil_record_validate_operand_count(record, i + 6, i + 6, sm6))
+ return VKD3D_ERROR_INVALID_SHADER;
+
+ for (j = 0; i < record->operand_count; i += 2, ++j)
+ {
+ if (!(elem_type = sm6_parser_get_type(sm6, record->operands[i])))
+ return VKD3D_ERROR_INVALID_SHADER;
+
+ if ((value = record->operands[i + 1]) >= sm6->cur_max_value)
+ {
+ WARN("Invalid value index %"PRIu64".\n", value);
+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND,
+ "Invalid value index %"PRIu64".", value);
+ return VKD3D_ERROR_INVALID_SHADER;
+ }
+ else if (value == sm6->value_count)
+ {
+ WARN("Invalid value self-reference at %"PRIu64".\n", value);
+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND,
+ "Invalid value self-reference for a constexpr GEP.");
+ return VKD3D_ERROR_INVALID_SHADER;
+ }
+
+ operands[j] = &sm6->values[value];
+ if (value > sm6->value_count)
+ {
+ operands[j]->type = elem_type;
+ }
+ else if (operands[j]->type != elem_type)
+ {
+ WARN("Type mismatch.\n");
+ vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_TYPE_MISMATCH,
+ "Type mismatch in constexpr GEP elements.");
+ }
+ }
+
+ if (operands[0]->u.reg.idx_count > 1)
+ {
+ WARN("Unsupported stacked GEP.\n");
+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND,
+ "A GEP instruction on the result of a previous GEP is unsupported.");
+ return VKD3D_ERROR_INVALID_SHADER;
+ }
+
+ if (!sm6_value_is_constant_zero(operands[1]))
+ {
+ WARN("Expected constant zero.\n");
+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND,
+ "The pointer dereference index for a constexpr GEP instruction is not constant zero.");
+ return VKD3D_ERROR_INVALID_SHADER;
+ }
+ if (!sm6_value_is_constant(operands[2]) || !sm6_type_is_integer(operands[2]->type))
+ {
+ WARN("Element index is not constant int.\n");
+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND,
+ "A constexpr GEP element index is not a constant integer.");
+ return VKD3D_ERROR_INVALID_SHADER;
+ }
+
+ dst->structure_stride = operands[0]->structure_stride;
+
+ ptr_type = operands[0]->type;
+ if (!sm6_type_is_pointer(ptr_type))
+ {
+ WARN("Constexpr GEP base value is not a pointer.\n");
+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND,
+ "A constexpr GEP base value is not a pointer.");
+ return VKD3D_ERROR_INVALID_SHADER;
+ }
+
+ if (!pointee_type)
+ {
+ pointee_type = ptr_type->u.pointer.type;
+ }
+ else if (pointee_type != ptr_type->u.pointer.type)
+ {
+ WARN("Explicit pointee type mismatch.\n");
+ vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_TYPE_MISMATCH,
+ "Explicit pointee type for constexpr GEP does not match the element type.");
+ }
+
+ offset = sm6_value_get_constant_uint(operands[2]);
+ if (!(gep_type = sm6_type_get_element_type_at_index(pointee_type, offset)))
+ {
+ WARN("Failed to get element type.\n");
+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND,
+ "Failed to get the element type of a constexpr GEP.");
+ return VKD3D_ERROR_INVALID_SHADER;
+ }
+
+ if (!(dst->type = sm6_type_get_pointer_to_type(gep_type, ptr_type->u.pointer.addr_space, sm6)))
+ {
+ WARN("Failed to get pointer type for type %u.\n", gep_type->class);
+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_MODULE,
+ "Module does not define a pointer type for a constexpr GEP result.");
+ return VKD3D_ERROR_INVALID_SHADER;
+ }
+ dst->u.reg = operands[0]->u.reg;
+ dst->u.reg.idx[1].offset = offset;
+ dst->u.reg.idx[1].is_in_bounds = record->code == CST_CODE_CE_INBOUNDS_GEP;
+ dst->u.reg.idx_count = 2;
+
+ return VKD3D_OK;
+}
+
static enum vkd3d_result sm6_parser_constants_init(struct sm6_parser *sm6, const struct dxil_block *block)
{
enum vkd3d_shader_register_type reg_type = VKD3DSPR_INVALID;
@@ -3005,6 +3122,12 @@ static enum vkd3d_result sm6_parser_constants_init(struct sm6_parser *sm6, const
break;
+ case CST_CODE_CE_GEP:
+ case CST_CODE_CE_INBOUNDS_GEP:
+ if ((ret = sm6_parser_init_constexpr_gep(sm6, record, dst)) < 0)
+ return ret;
+ break;
+
case CST_CODE_UNDEF:
dxil_record_validate_operand_max_count(record, 0, sm6);
dst->u.reg.type = VKD3DSPR_UNDEF;
@@ -4364,6 +4487,40 @@ static void sm6_parser_emit_dx_buffer_update_counter(struct sm6_parser *sm6, enu
instruction_dst_param_init_ssa_scalar(ins, sm6);
}
+static void sm6_parser_emit_dx_calculate_lod(struct sm6_parser *sm6, enum dx_intrinsic_opcode op,
+ const struct sm6_value **operands, struct function_emission_state *state)
+{
+ const struct sm6_value *resource, *sampler;
+ struct vkd3d_shader_src_param *src_params;
+ struct vkd3d_shader_instruction *ins;
+ struct vkd3d_shader_register coord;
+ unsigned int clamp;
+
+ resource = operands[0];
+ sampler = operands[1];
+ if (!sm6_value_validate_is_texture_handle(resource, op, sm6)
+ || !sm6_value_validate_is_sampler_handle(sampler, op, sm6))
+ {
+ return;
+ }
+
+ if (!sm6_parser_emit_coordinate_construct(sm6, &operands[2], 3, NULL, state, &coord))
+ return;
+
+ clamp = sm6_value_get_constant_uint(operands[5]);
+
+ ins = state->ins;
+ vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_LOD);
+ if (!(src_params = instruction_src_params_alloc(ins, 3, sm6)))
+ return;
+ src_param_init_vector_from_reg(&src_params[0], &coord);
+ src_params[1].reg = resource->u.handle.reg;
+ src_param_init_scalar(&src_params[1], !clamp);
+ src_param_init_vector_from_reg(&src_params[2], &sampler->u.handle.reg);
+
+ instruction_dst_param_init_ssa_scalar(ins, sm6);
+}
+
static void sm6_parser_emit_dx_cbuffer_load(struct sm6_parser *sm6, enum dx_intrinsic_opcode op,
const struct sm6_value **operands, struct function_emission_state *state)
{
@@ -5392,6 +5549,7 @@ static const struct sm6_dx_opcode_info sm6_dx_op_table[] =
[DX_BUFFER_LOAD ] = {"o", "Hii", sm6_parser_emit_dx_buffer_load},
[DX_BUFFER_STORE ] = {"v", "Hiiooooc", sm6_parser_emit_dx_buffer_store},
[DX_BUFFER_UPDATE_COUNTER ] = {"i", "H8", sm6_parser_emit_dx_buffer_update_counter},
+ [DX_CALCULATE_LOD ] = {"f", "HHfffb", sm6_parser_emit_dx_calculate_lod},
[DX_CBUFFER_LOAD_LEGACY ] = {"o", "Hi", sm6_parser_emit_dx_cbuffer_load},
[DX_COS ] = {"g", "R", sm6_parser_emit_dx_sincos},
[DX_COUNT_BITS ] = {"i", "m", sm6_parser_emit_dx_unary},
diff --git a/libs/vkd3d/libs/vkd3d-shader/fx.c b/libs/vkd3d/libs/vkd3d-shader/fx.c
index 466908cd82b..1d90cd70e03 100644
--- a/libs/vkd3d/libs/vkd3d-shader/fx.c
+++ b/libs/vkd3d/libs/vkd3d-shader/fx.c
@@ -115,6 +115,9 @@ static uint32_t write_string(const char *string, struct fx_write_context *fx)
static void write_pass(struct hlsl_ir_var *var, struct fx_write_context *fx)
{
+ if (var->state_block_count)
+ hlsl_fixme(fx->ctx, &var->loc, "Write state block assignments.");
+
fx->ops->write_pass(var, fx);
}
@@ -401,14 +404,6 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co
uint32_t name_offset, offset, size, stride, numeric_desc;
uint32_t elements_count = 0;
const char *name;
- static const uint32_t variable_type[] =
- {
- [HLSL_CLASS_SCALAR] = 1,
- [HLSL_CLASS_VECTOR] = 1,
- [HLSL_CLASS_MATRIX] = 1,
- [HLSL_CLASS_OBJECT] = 2,
- [HLSL_CLASS_STRUCT] = 3,
- };
struct hlsl_ctx *ctx = fx->ctx;
/* Resolve arrays to element type and number of elements. */
@@ -428,13 +423,19 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co
case HLSL_CLASS_SCALAR:
case HLSL_CLASS_VECTOR:
case HLSL_CLASS_MATRIX:
+ put_u32_unaligned(buffer, 1);
+ break;
+
case HLSL_CLASS_OBJECT:
+ put_u32_unaligned(buffer, 2);
+ break;
+
case HLSL_CLASS_STRUCT:
- put_u32_unaligned(buffer, variable_type[type->class]);
+ put_u32_unaligned(buffer, 3);
break;
- default:
- hlsl_fixme(ctx, &ctx->location, "Writing type class %u is not implemented.", type->class);
- return 0;
+
+ case HLSL_CLASS_ARRAY:
+ vkd3d_unreachable();
}
size = stride = type->reg_size[HLSL_REGSET_NUMERIC] * sizeof(float);
@@ -630,7 +631,6 @@ static uint32_t write_fx_2_parameter(const struct hlsl_type *type, const char *n
{
struct vkd3d_bytecode_buffer *buffer = &fx->unstructured;
uint32_t semantic_offset, offset, elements_count = 0, name_offset;
- struct hlsl_ctx *ctx = fx->ctx;
size_t i;
/* Resolve arrays to element type and number of elements. */
@@ -643,22 +643,6 @@ static uint32_t write_fx_2_parameter(const struct hlsl_type *type, const char *n
name_offset = write_string(name, fx);
semantic_offset = write_string(semantic->name, fx);
- switch (type->base_type)
- {
- case HLSL_TYPE_HALF:
- case HLSL_TYPE_FLOAT:
- case HLSL_TYPE_BOOL:
- case HLSL_TYPE_INT:
- case HLSL_TYPE_UINT:
- case HLSL_TYPE_VOID:
- case HLSL_TYPE_TEXTURE:
- break;
- default:
- hlsl_fixme(ctx, &ctx->location, "Writing parameter type %u is not implemented.",
- type->base_type);
- return 0;
- };
-
offset = put_u32(buffer, hlsl_sm1_base_type(type));
put_u32(buffer, hlsl_sm1_class(type));
put_u32(buffer, name_offset);
@@ -688,6 +672,9 @@ static uint32_t write_fx_2_parameter(const struct hlsl_type *type, const char *n
for (i = 0; i < type->e.record.field_count; ++i)
{
const struct hlsl_struct_field *field = &type->e.record.fields[i];
+
+ /* Validated in check_invalid_object_fields(). */
+ assert(hlsl_is_numeric_type(field->type));
write_fx_2_parameter(field->type, field->name, &field->semantic, fx);
}
}
@@ -746,7 +733,7 @@ static uint32_t write_fx_2_initial_value(const struct hlsl_ir_var *var, struct f
{
struct vkd3d_bytecode_buffer *buffer = &fx->unstructured;
const struct hlsl_type *type = var->data_type;
- uint32_t i, offset, size, elements_count = 1;
+ uint32_t offset, size, elements_count = 1;
size = get_fx_2_type_size(type);
@@ -756,63 +743,80 @@ static uint32_t write_fx_2_initial_value(const struct hlsl_ir_var *var, struct f
type = hlsl_get_multiarray_element_type(type);
}
- if (type->class == HLSL_CLASS_OBJECT)
- {
- /* Objects are given sequential ids. */
- offset = put_u32(buffer, fx->object_variable_count++);
- for (i = 1; i < elements_count; ++i)
- put_u32(buffer, fx->object_variable_count++);
- }
- else
+ /* Note that struct fields must all be numeric;
+ * this was validated in check_invalid_object_fields(). */
+ switch (type->class)
{
- /* FIXME: write actual initial value */
- offset = put_u32(buffer, 0);
+ case HLSL_CLASS_SCALAR:
+ case HLSL_CLASS_VECTOR:
+ case HLSL_CLASS_MATRIX:
+ case HLSL_CLASS_STRUCT:
+ /* FIXME: write actual initial value */
+ offset = put_u32(buffer, 0);
- for (i = 1; i < size / sizeof(uint32_t); ++i)
- put_u32(buffer, 0);
+ for (uint32_t i = 1; i < size / sizeof(uint32_t); ++i)
+ put_u32(buffer, 0);
+ break;
+
+ default:
+ /* Objects are given sequential ids. */
+ offset = put_u32(buffer, fx->object_variable_count++);
+ for (uint32_t i = 1; i < elements_count; ++i)
+ put_u32(buffer, fx->object_variable_count++);
+ break;
}
return offset;
}
-static bool is_type_supported_fx_2(const struct hlsl_type *type)
+static bool is_type_supported_fx_2(struct hlsl_ctx *ctx, const struct hlsl_type *type,
+ const struct vkd3d_shader_location *loc)
{
- type = hlsl_get_multiarray_element_type(type);
-
- if (type->class == HLSL_CLASS_STRUCT)
- return true;
-
- switch (type->base_type)
+ switch (type->class)
{
- case HLSL_TYPE_FLOAT:
- case HLSL_TYPE_HALF:
- case HLSL_TYPE_DOUBLE:
- case HLSL_TYPE_INT:
- case HLSL_TYPE_UINT:
- case HLSL_TYPE_BOOL:
- case HLSL_TYPE_PIXELSHADER:
- case HLSL_TYPE_VERTEXSHADER:
- case HLSL_TYPE_STRING:
+ case HLSL_CLASS_STRUCT:
+ /* Note that the fields must all be numeric; this was validated in
+ * check_invalid_object_fields(). */
return true;
- case HLSL_TYPE_TEXTURE:
- case HLSL_TYPE_SAMPLER:
- switch (type->sampler_dim)
+
+ case HLSL_CLASS_SCALAR:
+ case HLSL_CLASS_VECTOR:
+ case HLSL_CLASS_MATRIX:
+ return true;
+
+ case HLSL_CLASS_ARRAY:
+ return is_type_supported_fx_2(ctx, type->e.array.type, loc);
+
+ case HLSL_CLASS_OBJECT:
+ switch (type->base_type)
{
- case HLSL_SAMPLER_DIM_1D:
- case HLSL_SAMPLER_DIM_2D:
- case HLSL_SAMPLER_DIM_3D:
- case HLSL_SAMPLER_DIM_CUBE:
- case HLSL_SAMPLER_DIM_GENERIC:
- return true;
+ case HLSL_TYPE_TEXTURE:
+ switch (type->sampler_dim)
+ {
+ case HLSL_SAMPLER_DIM_1D:
+ case HLSL_SAMPLER_DIM_2D:
+ case HLSL_SAMPLER_DIM_3D:
+ case HLSL_SAMPLER_DIM_CUBE:
+ case HLSL_SAMPLER_DIM_GENERIC:
+ return true;
+ default:
+ return false;
+ }
+ break;
+
+ case HLSL_TYPE_SAMPLER:
+ case HLSL_TYPE_STRING:
+ case HLSL_TYPE_PIXELSHADER:
+ case HLSL_TYPE_VERTEXSHADER:
+ hlsl_fixme(ctx, loc, "Write fx 2.0 parameter object type %#x.", type->base_type);
+ return false;
+
default:
- ;
+ return false;
}
- break;
- default:
- return false;
}
- return false;
+ vkd3d_unreachable();
}
static void write_fx_2_parameters(struct fx_write_context *fx)
@@ -828,7 +832,7 @@ static void write_fx_2_parameters(struct fx_write_context *fx)
LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry)
{
- if (!is_type_supported_fx_2(var->data_type))
+ if (!is_type_supported_fx_2(ctx, var->data_type, &var->loc))
continue;
desc_offset = write_fx_2_parameter(var->data_type, var->name, &var->semantic, fx);
diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.c b/libs/vkd3d/libs/vkd3d-shader/hlsl.c
index 5638a03a8f5..ed80e2b75c8 100644
--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.c
+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.c
@@ -134,14 +134,39 @@ struct hlsl_ir_var *hlsl_get_var(struct hlsl_scope *scope, const char *name)
return hlsl_get_var(scope->upper, name);
}
-void hlsl_free_var(struct hlsl_ir_var *decl)
+static void free_state_block_entry(struct hlsl_state_block_entry *entry)
+{
+ vkd3d_free(entry->name);
+ vkd3d_free(entry->args);
+ hlsl_block_cleanup(entry->instrs);
+ vkd3d_free(entry->instrs);
+ vkd3d_free(entry);
+}
+
+void hlsl_free_state_block(struct hlsl_state_block *state_block)
{
unsigned int k;
+ assert(state_block);
+ for (k = 0; k < state_block->count; ++k)
+ free_state_block_entry(state_block->entries[k]);
+ vkd3d_free(state_block->entries);
+ vkd3d_free(state_block);
+}
+
+void hlsl_free_var(struct hlsl_ir_var *decl)
+{
+ unsigned int k, i;
+
vkd3d_free((void *)decl->name);
hlsl_cleanup_semantic(&decl->semantic);
for (k = 0; k <= HLSL_REGSET_LAST_OBJECT; ++k)
vkd3d_free((void *)decl->objects_usage[k]);
+
+ for (i = 0; i < decl->state_block_count; ++i)
+ hlsl_free_state_block(decl->state_blocks[i]);
+ vkd3d_free(decl->state_blocks);
+
vkd3d_free(decl);
}
@@ -1561,6 +1586,27 @@ struct hlsl_ir_node *hlsl_new_swizzle(struct hlsl_ctx *ctx, uint32_t s, unsigned
return &swizzle->node;
}
+struct hlsl_ir_node *hlsl_new_stateblock_constant(struct hlsl_ctx *ctx, const char *name,
+ struct vkd3d_shader_location *loc)
+{
+ struct hlsl_ir_stateblock_constant *constant;
+ struct hlsl_type *type = hlsl_get_scalar_type(ctx, HLSL_TYPE_INT);
+
+ if (!(constant = hlsl_alloc(ctx, sizeof(*constant))))
+ return NULL;
+
+ init_node(&constant->node, HLSL_IR_STATEBLOCK_CONSTANT, type, loc);
+
+ if (!(constant->name = hlsl_alloc(ctx, strlen(name) + 1)))
+ {
+ vkd3d_free(constant);
+ return NULL;
+ }
+ strcpy(constant->name, name);
+
+ return &constant->node;
+}
+
bool hlsl_index_is_noncontiguous(struct hlsl_ir_index *index)
{
struct hlsl_type *type = index->val.node->data_type;
@@ -1570,7 +1616,10 @@ bool hlsl_index_is_noncontiguous(struct hlsl_ir_index *index)
bool hlsl_index_is_resource_access(struct hlsl_ir_index *index)
{
- return index->val.node->data_type->class == HLSL_CLASS_OBJECT;
+ const struct hlsl_type *type = index->val.node->data_type;
+
+ return type->class == HLSL_CLASS_OBJECT
+ && (type->base_type == HLSL_TYPE_TEXTURE || type->base_type == HLSL_TYPE_UAV);
}
bool hlsl_index_chain_has_resource_access(struct hlsl_ir_index *index)
@@ -1881,6 +1930,12 @@ static struct hlsl_ir_node *clone_index(struct hlsl_ctx *ctx, struct clone_instr
return dst;
}
+static struct hlsl_ir_node *clone_stateblock_constant(struct hlsl_ctx *ctx,
+ struct clone_instr_map *map, struct hlsl_ir_stateblock_constant *constant)
+{
+ return hlsl_new_stateblock_constant(ctx, constant->name, &constant->node.loc);
+}
+
void hlsl_free_ir_switch_case(struct hlsl_ir_switch_case *c)
{
hlsl_block_cleanup(&c->body);
@@ -1976,6 +2031,9 @@ static struct hlsl_ir_node *clone_instr(struct hlsl_ctx *ctx,
case HLSL_IR_SWIZZLE:
return clone_swizzle(ctx, map, hlsl_ir_swizzle(instr));
+
+ case HLSL_IR_STATEBLOCK_CONSTANT:
+ return clone_stateblock_constant(ctx, map, hlsl_ir_stateblock_constant(instr));
}
vkd3d_unreachable();
@@ -2631,7 +2689,6 @@ const char *debug_hlsl_expr_op(enum hlsl_ir_expr_op op)
[HLSL_OP3_CMP] = "cmp",
[HLSL_OP3_DP2ADD] = "dp2add",
- [HLSL_OP3_MOVC] = "movc",
[HLSL_OP3_TERNARY] = "ternary",
};
@@ -2808,6 +2865,12 @@ static void dump_ir_index(struct vkd3d_string_buffer *buffer, const struct hlsl_
vkd3d_string_buffer_printf(buffer, "]");
}
+static void dump_ir_stateblock_constant(struct vkd3d_string_buffer *buffer,
+ const struct hlsl_ir_stateblock_constant *constant)
+{
+ vkd3d_string_buffer_printf(buffer, "%s", constant->name);
+}
+
static void dump_ir_switch(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, const struct hlsl_ir_switch *s)
{
struct hlsl_ir_switch_case *c;
@@ -2896,6 +2959,10 @@ static void dump_instr(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer,
case HLSL_IR_SWIZZLE:
dump_ir_swizzle(buffer, hlsl_ir_swizzle(instr));
break;
+
+ case HLSL_IR_STATEBLOCK_CONSTANT:
+ dump_ir_stateblock_constant(buffer, hlsl_ir_stateblock_constant(instr));
+ break;
}
}
@@ -3068,6 +3135,12 @@ static void free_ir_index(struct hlsl_ir_index *index)
vkd3d_free(index);
}
+static void free_ir_stateblock_constant(struct hlsl_ir_stateblock_constant *constant)
+{
+ vkd3d_free(constant->name);
+ vkd3d_free(constant);
+}
+
void hlsl_free_instr(struct hlsl_ir_node *node)
{
assert(list_empty(&node->uses));
@@ -3125,6 +3198,10 @@ void hlsl_free_instr(struct hlsl_ir_node *node)
case HLSL_IR_SWITCH:
free_ir_switch(hlsl_ir_switch(node));
break;
+
+ case HLSL_IR_STATEBLOCK_CONSTANT:
+ free_ir_stateblock_constant(hlsl_ir_stateblock_constant(node));
+ break;
}
}
@@ -3290,7 +3367,9 @@ const struct hlsl_profile_info *hlsl_get_target_info(const char *target)
{"cs_4_0", VKD3D_SHADER_TYPE_COMPUTE, 4, 0, 0, 0, false},
{"cs_4_1", VKD3D_SHADER_TYPE_COMPUTE, 4, 1, 0, 0, false},
{"cs_5_0", VKD3D_SHADER_TYPE_COMPUTE, 5, 0, 0, 0, false},
+ {"cs_5_1", VKD3D_SHADER_TYPE_COMPUTE, 5, 1, 0, 0, false},
{"ds_5_0", VKD3D_SHADER_TYPE_DOMAIN, 5, 0, 0, 0, false},
+ {"ds_5_1", VKD3D_SHADER_TYPE_DOMAIN, 5, 1, 0, 0, false},
{"fx_2_0", VKD3D_SHADER_TYPE_EFFECT, 2, 0, 0, 0, false},
{"fx_4_0", VKD3D_SHADER_TYPE_EFFECT, 4, 0, 0, 0, false},
{"fx_4_1", VKD3D_SHADER_TYPE_EFFECT, 4, 1, 0, 0, false},
@@ -3298,7 +3377,9 @@ const struct hlsl_profile_info *hlsl_get_target_info(const char *target)
{"gs_4_0", VKD3D_SHADER_TYPE_GEOMETRY, 4, 0, 0, 0, false},
{"gs_4_1", VKD3D_SHADER_TYPE_GEOMETRY, 4, 1, 0, 0, false},
{"gs_5_0", VKD3D_SHADER_TYPE_GEOMETRY, 5, 0, 0, 0, false},
+ {"gs_5_1", VKD3D_SHADER_TYPE_GEOMETRY, 5, 1, 0, 0, false},
{"hs_5_0", VKD3D_SHADER_TYPE_HULL, 5, 0, 0, 0, false},
+ {"hs_5_1", VKD3D_SHADER_TYPE_HULL, 5, 1, 0, 0, false},
{"ps.1.0", VKD3D_SHADER_TYPE_PIXEL, 1, 0, 0, 0, false},
{"ps.1.1", VKD3D_SHADER_TYPE_PIXEL, 1, 1, 0, 0, false},
{"ps.1.2", VKD3D_SHADER_TYPE_PIXEL, 1, 2, 0, 0, false},
@@ -3326,6 +3407,7 @@ const struct hlsl_profile_info *hlsl_get_target_info(const char *target)
{"ps_4_0_level_9_3", VKD3D_SHADER_TYPE_PIXEL, 4, 0, 9, 3, false},
{"ps_4_1", VKD3D_SHADER_TYPE_PIXEL, 4, 1, 0, 0, false},
{"ps_5_0", VKD3D_SHADER_TYPE_PIXEL, 5, 0, 0, 0, false},
+ {"ps_5_1", VKD3D_SHADER_TYPE_PIXEL, 5, 1, 0, 0, false},
{"tx_1_0", VKD3D_SHADER_TYPE_TEXTURE, 1, 0, 0, 0, false},
{"vs.1.0", VKD3D_SHADER_TYPE_VERTEX, 1, 0, 0, 0, false},
{"vs.1.1", VKD3D_SHADER_TYPE_VERTEX, 1, 1, 0, 0, false},
@@ -3347,6 +3429,7 @@ const struct hlsl_profile_info *hlsl_get_target_info(const char *target)
{"vs_4_0_level_9_3", VKD3D_SHADER_TYPE_VERTEX, 4, 0, 9, 3, false},
{"vs_4_1", VKD3D_SHADER_TYPE_VERTEX, 4, 1, 0, 0, false},
{"vs_5_0", VKD3D_SHADER_TYPE_VERTEX, 5, 0, 0, 0, false},
+ {"vs_5_1", VKD3D_SHADER_TYPE_VERTEX, 5, 1, 0, 0, false},
};
for (i = 0; i < ARRAY_SIZE(profiles); ++i)
@@ -3651,6 +3734,21 @@ static void hlsl_ctx_cleanup(struct hlsl_ctx *ctx)
rb_destroy(&ctx->functions, free_function_rb, NULL);
+ /* State blocks must be free before the variables, because they contain instructions that may
+ * refer to them. */
+ LIST_FOR_EACH_ENTRY_SAFE(scope, next_scope, &ctx->scopes, struct hlsl_scope, entry)
+ {
+ LIST_FOR_EACH_ENTRY_SAFE(var, next_var, &scope->vars, struct hlsl_ir_var, scope_entry)
+ {
+ for (i = 0; i < var->state_block_count; ++i)
+ hlsl_free_state_block(var->state_blocks[i]);
+ vkd3d_free(var->state_blocks);
+ var->state_blocks = NULL;
+ var->state_block_count = 0;
+ var->state_block_capacity = 0;
+ }
+ }
+
LIST_FOR_EACH_ENTRY_SAFE(scope, next_scope, &ctx->scopes, struct hlsl_scope, entry)
{
LIST_FOR_EACH_ENTRY_SAFE(var, next_var, &scope->vars, struct hlsl_ir_var, scope_entry)
diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h
index aa9cb14fc8d..4225098bc87 100644
--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.h
+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h
@@ -298,6 +298,7 @@ enum hlsl_ir_node_type
HLSL_IR_STORE,
HLSL_IR_SWIZZLE,
HLSL_IR_SWITCH,
+ HLSL_IR_STATEBLOCK_CONSTANT,
};
/* Common data for every type of IR instruction node. */
@@ -423,6 +424,14 @@ struct hlsl_ir_var
/* Scope that contains annotations for this variable. */
struct hlsl_scope *annotations;
+ /* A dynamic array containing the state block on the variable's declaration, if any.
+ * An array variable may contain multiple state blocks.
+ * A technique pass will always contain one.
+ * These are only really used for effect profiles. */
+ struct hlsl_state_block **state_blocks;
+ unsigned int state_block_count;
+ size_t state_block_capacity;
+
/* Indexes of the IR instructions where the variable is first written and last read (liveness
* range). The IR instructions are numerated starting from 2, because 0 means unused, and 1
* means function entry. */
@@ -458,6 +467,38 @@ struct hlsl_ir_var
uint32_t is_separated_resource : 1;
};
+/* This struct is used to represent assignments in state block entries:
+ * name = {args[0], args[1], ...};
+ * - or -
+ * name = args[0]
+ * - or -
+ * name[lhs_index] = args[0]
+ * - or -
+ * name[lhs_index] = {args[0], args[1], ...};
+ */
+struct hlsl_state_block_entry
+{
+ /* For assignments, the name in the lhs. */
+ char *name;
+
+ /* Whether the lhs in the assignment is indexed and, in that case, its index. */
+ bool lhs_has_index;
+ unsigned int lhs_index;
+
+ /* Instructions present in the rhs. */
+ struct hlsl_block *instrs;
+
+ /* For assignments, arguments of the rhs initializer. */
+ struct hlsl_ir_node **args;
+ unsigned int args_count;
+};
+
+struct hlsl_state_block
+{
+ struct hlsl_state_block_entry **entries;
+ size_t count, capacity;
+};
+
/* Sized array of variables representing a function's parameters. */
struct hlsl_func_parameters
{
@@ -601,14 +642,9 @@ enum hlsl_ir_expr_op
/* DP2ADD(a, b, c) computes the scalar product of a.xy and b.xy,
* then adds c. */
HLSL_OP3_DP2ADD,
- /* MOVC(a, b, c) returns c if a is bitwise zero and b otherwise.
- * TERNARY(a, b, c) returns c if a == 0 and b otherwise.
- * They differ for floating point numbers, because
- * -0.0 == 0.0, but it is not bitwise zero. CMP(a, b, c) returns b
- if a >= 0, and c otherwise. It's used only for SM1-SM3 targets, while
- SM4+ is using MOVC in such cases. */
+ /* TERNARY(a, b, c) returns 'b' if 'a' is true and 'c' otherwise. 'a' must always be boolean.
+ * CMP(a, b, c) returns 'b' if 'a' >= 0, and 'c' otherwise. It's used only for SM1-SM3 targets. */
HLSL_OP3_CMP,
- HLSL_OP3_MOVC,
HLSL_OP3_TERNARY,
};
@@ -754,6 +790,14 @@ struct hlsl_ir_constant
struct hlsl_reg reg;
};
+/* Stateblock constants are undeclared values found on state blocks or technique passes descriptions,
+ * that do not concern regular pixel, vertex, or compute shaders, except for parsing. */
+struct hlsl_ir_stateblock_constant
+{
+ struct hlsl_ir_node node;
+ char *name;
+};
+
struct hlsl_scope
{
/* Item entry for hlsl_ctx.scopes. */
@@ -932,6 +976,16 @@ struct hlsl_ctx
bool warn_implicit_truncation;
};
+static inline bool hlsl_version_ge(const struct hlsl_ctx *ctx, unsigned int major, unsigned int minor)
+{
+ return ctx->profile->major_version > major || (ctx->profile->major_version == major && ctx->profile->minor_version >= minor);
+}
+
+static inline bool hlsl_version_lt(const struct hlsl_ctx *ctx, unsigned int major, unsigned int minor)
+{
+ return !hlsl_version_ge(ctx, major, minor);
+}
+
struct hlsl_resource_load_params
{
struct hlsl_type *format;
@@ -1019,6 +1073,12 @@ static inline struct hlsl_ir_switch *hlsl_ir_switch(const struct hlsl_ir_node *n
return CONTAINING_RECORD(node, struct hlsl_ir_switch, node);
}
+static inline struct hlsl_ir_stateblock_constant *hlsl_ir_stateblock_constant(const struct hlsl_ir_node *node)
+{
+ assert(node->type == HLSL_IR_STATEBLOCK_CONSTANT);
+ return CONTAINING_RECORD(node, struct hlsl_ir_stateblock_constant, node);
+}
+
static inline void hlsl_block_init(struct hlsl_block *block)
{
list_init(&block->instrs);
@@ -1211,6 +1271,7 @@ void hlsl_replace_node(struct hlsl_ir_node *old, struct hlsl_ir_node *new);
void hlsl_free_attribute(struct hlsl_attribute *attr);
void hlsl_free_instr(struct hlsl_ir_node *node);
void hlsl_free_instr_list(struct list *list);
+void hlsl_free_state_block(struct hlsl_state_block *state_block);
void hlsl_free_type(struct hlsl_type *type);
void hlsl_free_var(struct hlsl_ir_var *decl);
@@ -1292,6 +1353,8 @@ struct hlsl_type *hlsl_new_struct_type(struct hlsl_ctx *ctx, const char *name,
struct hlsl_struct_field *fields, size_t field_count);
struct hlsl_ir_node *hlsl_new_swizzle(struct hlsl_ctx *ctx, uint32_t s, unsigned int components,
struct hlsl_ir_node *val, const struct vkd3d_shader_location *loc);
+struct hlsl_ir_node *hlsl_new_stateblock_constant(struct hlsl_ctx *ctx, const char *name,
+ struct vkd3d_shader_location *loc);
struct hlsl_ir_var *hlsl_new_synthetic_var(struct hlsl_ctx *ctx, const char *template,
struct hlsl_type *type, const struct vkd3d_shader_location *loc);
struct hlsl_ir_var *hlsl_new_synthetic_var_named(struct hlsl_ctx *ctx, const char *name,
diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.y b/libs/vkd3d/libs/vkd3d-shader/hlsl.y
index e02e0c540f9..8f71556757a 100644
--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.y
+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.y
@@ -77,6 +77,10 @@ struct parse_variable_def
struct hlsl_type *basic_type;
uint32_t modifiers;
struct vkd3d_shader_location modifiers_loc;
+
+ struct hlsl_state_block **state_blocks;
+ unsigned int state_block_count;
+ size_t state_block_capacity;
};
struct parse_function
@@ -114,6 +118,12 @@ struct parse_attribute_list
const struct hlsl_attribute **attrs;
};
+struct state_block_index
+{
+ bool has_index;
+ unsigned int index;
+};
+
}
%code provides
@@ -931,24 +941,10 @@ static void free_parse_variable_def(struct parse_variable_def *v)
vkd3d_free(v->arrays.sizes);
vkd3d_free(v->name);
hlsl_cleanup_semantic(&v->semantic);
+ assert(!v->state_blocks);
vkd3d_free(v);
}
-static bool shader_is_sm_5_1(const struct hlsl_ctx *ctx)
-{
- return ctx->profile->major_version == 5 && ctx->profile->minor_version >= 1;
-}
-
-static bool shader_profile_version_ge(const struct hlsl_ctx *ctx, unsigned int major, unsigned int minor)
-{
- return ctx->profile->major_version > major || (ctx->profile->major_version == major && ctx->profile->minor_version >= minor);
-}
-
-static bool shader_profile_version_lt(const struct hlsl_ctx *ctx, unsigned int major, unsigned int minor)
-{
- return !shader_profile_version_ge(ctx, major, minor);
-}
-
static bool gen_struct_fields(struct hlsl_ctx *ctx, struct parse_fields *fields,
struct hlsl_type *type, uint32_t modifiers, struct list *defs)
{
@@ -971,7 +967,7 @@ static bool gen_struct_fields(struct hlsl_ctx *ctx, struct parse_fields *fields,
field->type = type;
- if (shader_is_sm_5_1(ctx) && type->class == HLSL_CLASS_OBJECT)
+ if (hlsl_version_ge(ctx, 5, 1) && hlsl_type_is_resource(type))
{
for (k = 0; k < v->arrays.count; ++k)
unbounded_res_array |= (v->arrays.sizes[k] == HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT);
@@ -1121,7 +1117,7 @@ static bool add_func_parameter(struct hlsl_ctx *ctx, struct hlsl_func_parameters
}
static bool add_pass(struct hlsl_ctx *ctx, const char *name, struct hlsl_scope *annotations,
- const struct vkd3d_shader_location *loc)
+ struct hlsl_state_block *state_block, const struct vkd3d_shader_location *loc)
{
struct hlsl_ir_var *var;
struct hlsl_type *type;
@@ -1131,6 +1127,11 @@ static bool add_pass(struct hlsl_ctx *ctx, const char *name, struct hlsl_scope *
return false;
var->annotations = annotations;
+ var->state_blocks = hlsl_alloc(ctx, sizeof(*var->state_blocks));
+ var->state_blocks[0] = state_block;
+ var->state_block_count = 1;
+ var->state_block_capacity = 1;
+
if (!hlsl_add_var(ctx, var, false))
{
struct hlsl_ir_var *old = hlsl_get_var(ctx->cur_scope, var->name);
@@ -1216,7 +1217,7 @@ static struct hlsl_reg_reservation parse_packoffset(struct hlsl_ctx *ctx, const
struct hlsl_reg_reservation reservation = {0};
char *endptr;
- if (shader_profile_version_lt(ctx, 4, 0))
+ if (hlsl_version_lt(ctx, 4, 0))
return reservation;
reservation.offset_index = strtoul(reg_string + 1, &endptr, 10);
@@ -1299,6 +1300,7 @@ static unsigned int evaluate_static_expression_as_uint(struct hlsl_ctx *ctx, str
case HLSL_IR_RESOURCE_STORE:
case HLSL_IR_STORE:
case HLSL_IR_SWITCH:
+ case HLSL_IR_STATEBLOCK_CONSTANT:
hlsl_error(ctx, &node->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX,
"Expected literal expression.");
}
@@ -2177,7 +2179,7 @@ static void declare_var(struct hlsl_ctx *ctx, struct parse_variable_def *v)
type = basic_type;
- if (shader_is_sm_5_1(ctx) && type->class == HLSL_CLASS_OBJECT)
+ if (hlsl_version_ge(ctx, 5, 1) && hlsl_type_is_resource(type))
{
for (i = 0; i < v->arrays.count; ++i)
unbounded_res_array |= (v->arrays.sizes[i] == HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT);
@@ -2362,8 +2364,25 @@ static struct hlsl_block *initialize_vars(struct hlsl_ctx *ctx, struct list *var
free_parse_variable_def(v);
continue;
}
+
type = var->data_type;
+ var->state_blocks = v->state_blocks;
+ var->state_block_count = v->state_block_count;
+ var->state_block_capacity = v->state_block_capacity;
+ v->state_block_count = 0;
+ v->state_block_capacity = 0;
+ v->state_blocks = NULL;
+
+ if (var->state_blocks && hlsl_type_component_count(type) != var->state_block_count)
+ {
+ hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT,
+ "Expected %u state blocks, but got %u.",
+ hlsl_type_component_count(type), var->state_block_count);
+ free_parse_variable_def(v);
+ continue;
+ }
+
if (v->initializer.args_count)
{
if (v->initializer.braces)
@@ -2663,12 +2682,14 @@ static bool elementwise_intrinsic_convert_args(struct hlsl_ctx *ctx,
static bool elementwise_intrinsic_float_convert_args(struct hlsl_ctx *ctx,
const struct parse_initializer *params, const struct vkd3d_shader_location *loc)
{
+ enum hlsl_base_type base_type;
struct hlsl_type *type;
if (!(type = elementwise_intrinsic_get_common_type(ctx, params, loc)))
return false;
- type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_FLOAT, type->dimx, type->dimy);
+ base_type = type->base_type == HLSL_TYPE_HALF ? HLSL_TYPE_HALF : HLSL_TYPE_FLOAT;
+ type = hlsl_get_numeric_type(ctx, type->class, base_type, type->dimx, type->dimy);
return convert_args(ctx, params, type, loc);
}
@@ -2728,81 +2749,62 @@ static bool intrinsic_acos(struct hlsl_ctx *ctx,
return write_acos_or_asin(ctx, params, loc, false);
}
-static bool intrinsic_all(struct hlsl_ctx *ctx,
- const struct parse_initializer *params, const struct vkd3d_shader_location *loc)
+/* Find the type corresponding to the given source type, with the same
+ * dimensions but a different base type. */
+static struct hlsl_type *convert_numeric_type(const struct hlsl_ctx *ctx,
+ const struct hlsl_type *type, enum hlsl_base_type base_type)
{
- struct hlsl_ir_node *arg = params->args[0], *mul, *one, *zero, *load;
+ return hlsl_get_numeric_type(ctx, type->class, base_type, type->dimx, type->dimy);
+}
+
+static bool add_combine_components(struct hlsl_ctx *ctx, const struct parse_initializer *params,
+ struct hlsl_ir_node *arg, enum hlsl_ir_expr_op op, const struct vkd3d_shader_location *loc)
+{
+ struct hlsl_ir_node *res, *load;
unsigned int i, count;
- if (!(one = hlsl_new_float_constant(ctx, 1.0f, loc)))
- return false;
- hlsl_block_add_instr(params->instrs, one);
+ count = hlsl_type_component_count(arg->data_type);
- if (!(zero = hlsl_new_float_constant(ctx, 0.0f, loc)))
+ if (!(res = hlsl_add_load_component(ctx, params->instrs, arg, 0, loc)))
return false;
- hlsl_block_add_instr(params->instrs, zero);
- mul = one;
-
- count = hlsl_type_component_count(arg->data_type);
- for (i = 0; i < count; ++i)
+ for (i = 1; i < count; ++i)
{
if (!(load = hlsl_add_load_component(ctx, params->instrs, arg, i, loc)))
return false;
- if (!(mul = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, load, mul, loc)))
- return false;
+ if (!(res = hlsl_new_binary_expr(ctx, op, res, load)))
+ return NULL;
+ hlsl_block_add_instr(params->instrs, res);
}
- return !!add_binary_comparison_expr(ctx, params->instrs, HLSL_OP2_NEQUAL, mul, zero, loc);
+ return true;
}
-static bool intrinsic_any(struct hlsl_ctx *ctx,
+static bool intrinsic_all(struct hlsl_ctx *ctx,
const struct parse_initializer *params, const struct vkd3d_shader_location *loc)
{
- struct hlsl_ir_node *arg = params->args[0], *dot, *or, *zero, *bfalse, *load;
- unsigned int i, count;
+ struct hlsl_ir_node *arg = params->args[0], *cast;
+ struct hlsl_type *bool_type;
- if (arg->data_type->class != HLSL_CLASS_VECTOR && arg->data_type->class != HLSL_CLASS_SCALAR)
- {
- hlsl_fixme(ctx, loc, "any() implementation for non-vector, non-scalar");
+ bool_type = convert_numeric_type(ctx, arg->data_type, HLSL_TYPE_BOOL);
+ if (!(cast = add_cast(ctx, params->instrs, arg, bool_type, loc)))
return false;
- }
-
- if (arg->data_type->base_type == HLSL_TYPE_FLOAT)
- {
- if (!(zero = hlsl_new_float_constant(ctx, 0.0f, loc)))
- return false;
- hlsl_block_add_instr(params->instrs, zero);
-
- if (!(dot = add_binary_dot_expr(ctx, params->instrs, arg, arg, loc)))
- return false;
- return !!add_binary_comparison_expr(ctx, params->instrs, HLSL_OP2_NEQUAL, dot, zero, loc);
- }
- else if (arg->data_type->base_type == HLSL_TYPE_BOOL)
- {
- if (!(bfalse = hlsl_new_bool_constant(ctx, false, loc)))
- return false;
- hlsl_block_add_instr(params->instrs, bfalse);
-
- or = bfalse;
-
- count = hlsl_type_component_count(arg->data_type);
- for (i = 0; i < count; ++i)
- {
- if (!(load = hlsl_add_load_component(ctx, params->instrs, arg, i, loc)))
- return false;
+ return add_combine_components(ctx, params, cast, HLSL_OP2_LOGIC_AND, loc);
+}
- if (!(or = add_binary_bitwise_expr(ctx, params->instrs, HLSL_OP2_BIT_OR, or, load, loc)))
- return false;
- }
+static bool intrinsic_any(struct hlsl_ctx *ctx, const struct parse_initializer *params,
+ const struct vkd3d_shader_location *loc)
+{
+ struct hlsl_ir_node *arg = params->args[0], *cast;
+ struct hlsl_type *bool_type;
- return true;
- }
+ bool_type = convert_numeric_type(ctx, arg->data_type, HLSL_TYPE_BOOL);
+ if (!(cast = add_cast(ctx, params->instrs, arg, bool_type, loc)))
+ return false;
- hlsl_fixme(ctx, loc, "any() implementation for non-float, non-bool");
- return false;
+ return add_combine_components(ctx, params, cast, HLSL_OP2_LOGIC_OR, loc);
}
static bool intrinsic_asin(struct hlsl_ctx *ctx,
@@ -2870,20 +2872,20 @@ static bool write_atan_or_atan2(struct hlsl_ctx *ctx,
type->name, type->name, type->name);
if (ret < 0)
{
- vkd3d_string_buffer_cleanup(buf);
+ hlsl_release_string_buffer(ctx, buf);
return false;
}
ret = vkd3d_string_buffer_printf(buf, body_template, type->name);
if (ret < 0)
{
- vkd3d_string_buffer_cleanup(buf);
+ hlsl_release_string_buffer(ctx, buf);
return false;
}
func = hlsl_compile_internal_function(ctx,
atan2_mode ? atan2_name : atan_name, buf->buffer);
- vkd3d_string_buffer_cleanup(buf);
+ hlsl_release_string_buffer(ctx, buf);
if (!func)
return false;
@@ -2903,15 +2905,6 @@ static bool intrinsic_atan2(struct hlsl_ctx *ctx,
return write_atan_or_atan2(ctx, params, loc, true);
}
-
-/* Find the type corresponding to the given source type, with the same
- * dimensions but a different base type. */
-static struct hlsl_type *convert_numeric_type(const struct hlsl_ctx *ctx,
- const struct hlsl_type *type, enum hlsl_base_type base_type)
-{
- return hlsl_get_numeric_type(ctx, type->class, base_type, type->dimx, type->dimy);
-}
-
static bool intrinsic_asfloat(struct hlsl_ctx *ctx,
const struct parse_initializer *params, const struct vkd3d_shader_location *loc)
{
@@ -3035,6 +3028,46 @@ static bool intrinsic_cos(struct hlsl_ctx *ctx,
return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_COS, arg, loc);
}
+static bool write_cosh_or_sinh(struct hlsl_ctx *ctx,
+ const struct parse_initializer *params, const struct vkd3d_shader_location *loc, bool sinh_mode)
+{
+ struct hlsl_ir_function_decl *func;
+ struct hlsl_ir_node *arg;
+ const char *fn_name, *type_name;
+ char *body;
+
+ static const char template[] =
+ "%s %s(%s x)\n"
+ "{\n"
+ " return (exp(x) %s exp(-x)) / 2;\n"
+ "}\n";
+ static const char fn_name_sinh[] = "sinh";
+ static const char fn_name_cosh[] = "cosh";
+
+ if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc)))
+ return false;
+
+ type_name = arg->data_type->name;
+ fn_name = sinh_mode ? fn_name_sinh : fn_name_cosh;
+
+ if (!(body = hlsl_sprintf_alloc(ctx, template,
+ type_name, fn_name, type_name, sinh_mode ? "-" : "+")))
+ return false;
+
+ func = hlsl_compile_internal_function(ctx, fn_name, body);
+ vkd3d_free(body);
+ if (!func)
+ return false;
+
+ return add_user_call(ctx, func, params, loc);
+}
+
+static bool intrinsic_cosh(struct hlsl_ctx *ctx,
+ const struct parse_initializer *params, const struct vkd3d_shader_location *loc)
+{
+ return write_cosh_or_sinh(ctx, params, loc, false);
+}
+
static bool intrinsic_cross(struct hlsl_ctx *ctx,
const struct parse_initializer *params, const struct vkd3d_shader_location *loc)
{
@@ -3747,6 +3780,59 @@ static bool intrinsic_reflect(struct hlsl_ctx *ctx,
return !!add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_ADD, i, neg, loc);
}
+static bool intrinsic_refract(struct hlsl_ctx *ctx,
+ const struct parse_initializer *params, const struct vkd3d_shader_location *loc)
+{
+ struct hlsl_type *r_type = params->args[0]->data_type;
+ struct hlsl_type *n_type = params->args[1]->data_type;
+ struct hlsl_type *i_type = params->args[2]->data_type;
+ struct hlsl_type *res_type, *idx_type, *scal_type;
+ struct parse_initializer mut_params;
+ struct hlsl_ir_function_decl *func;
+ enum hlsl_base_type base;
+ char *body;
+
+ static const char template[] =
+ "%s refract(%s r, %s n, %s i)\n"
+ "{\n"
+ " %s d, t;\n"
+ " d = dot(r, n);\n"
+ " t = 1 - i.x * i.x * (1 - d * d);\n"
+ " return t >= 0.0 ? i.x * r - (i.x * d + sqrt(t)) * n : 0;\n"
+ "}";
+
+ if (r_type->class == HLSL_CLASS_MATRIX
+ || n_type->class == HLSL_CLASS_MATRIX
+ || i_type->class == HLSL_CLASS_MATRIX)
+ {
+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Matrix arguments are not supported.");
+ return false;
+ }
+
+ assert(params->args_count == 3);
+ mut_params = *params;
+ mut_params.args_count = 2;
+ if (!(res_type = elementwise_intrinsic_get_common_type(ctx, &mut_params, loc)))
+ return false;
+
+ base = expr_common_base_type(res_type->base_type, i_type->base_type);
+ base = base == HLSL_TYPE_HALF ? HLSL_TYPE_HALF : HLSL_TYPE_FLOAT;
+ res_type = convert_numeric_type(ctx, res_type, base);
+ idx_type = convert_numeric_type(ctx, i_type, base);
+ scal_type = hlsl_get_scalar_type(ctx, base);
+
+ if (!(body = hlsl_sprintf_alloc(ctx, template, res_type->name, res_type->name,
+ res_type->name, idx_type->name, scal_type->name)))
+ return false;
+
+ func = hlsl_compile_internal_function(ctx, "refract", body);
+ vkd3d_free(body);
+ if (!func)
+ return false;
+
+ return add_user_call(ctx, func, params, loc);
+}
+
static bool intrinsic_round(struct hlsl_ctx *ctx,
const struct parse_initializer *params, const struct vkd3d_shader_location *loc)
{
@@ -3827,6 +3913,12 @@ static bool intrinsic_sin(struct hlsl_ctx *ctx,
return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_SIN, arg, loc);
}
+static bool intrinsic_sinh(struct hlsl_ctx *ctx,
+ const struct parse_initializer *params, const struct vkd3d_shader_location *loc)
+{
+ return write_cosh_or_sinh(ctx, params, loc, true);
+}
+
/* smoothstep(a, b, x) = p^2 (3 - 2p), where p = saturate((x - a)/(b - a)) */
static bool intrinsic_smoothstep(struct hlsl_ctx *ctx,
const struct parse_initializer *params, const struct vkd3d_shader_location *loc)
@@ -3899,6 +3991,39 @@ static bool intrinsic_tan(struct hlsl_ctx *ctx,
return !!add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_DIV, sin, cos, loc);
}
+static bool intrinsic_tanh(struct hlsl_ctx *ctx,
+ const struct parse_initializer *params, const struct vkd3d_shader_location *loc)
+{
+ struct hlsl_ir_function_decl *func;
+ struct hlsl_ir_node *arg;
+ struct hlsl_type *type;
+ char *body;
+
+ static const char template[] =
+ "%s tanh(%s x)\n"
+ "{\n"
+ " %s exp_pos, exp_neg;\n"
+ " exp_pos = exp(x);\n"
+ " exp_neg = exp(-x);\n"
+ " return (exp_pos - exp_neg) / (exp_pos + exp_neg);\n"
+ "}\n";
+
+ if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc)))
+ return false;
+ type = arg->data_type;
+
+ if (!(body = hlsl_sprintf_alloc(ctx, template,
+ type->name, type->name, type->name)))
+ return false;
+
+ func = hlsl_compile_internal_function(ctx, "tanh", body);
+ vkd3d_free(body);
+ if (!func)
+ return false;
+
+ return add_user_call(ctx, func, params, loc);
+}
+
static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer *params,
const struct vkd3d_shader_location *loc, const char *name, enum hlsl_sampler_dim dim)
{
@@ -3967,7 +4092,7 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer *
return false;
}
- if (shader_profile_version_ge(ctx, 4, 0))
+ if (hlsl_version_ge(ctx, 4, 0))
{
unsigned int count = hlsl_sampler_dim_count(dim);
struct hlsl_ir_node *divisor;
@@ -4014,7 +4139,7 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer *
return false;
initialize_var_components(ctx, params->instrs, var, &idx, coords);
- if (shader_profile_version_ge(ctx, 4, 0))
+ if (hlsl_version_ge(ctx, 4, 0))
{
if (!(half = hlsl_new_float_constant(ctx, 0.5f, loc)))
return false;
@@ -4200,7 +4325,7 @@ static bool intrinsic_d3dcolor_to_ubyte4(struct hlsl_ctx *ctx,
if (!(ret = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, arg, c, loc)))
return false;
- if (shader_profile_version_ge(ctx, 4, 0))
+ if (hlsl_version_ge(ctx, 4, 0))
return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_TRUNC, ret, loc);
return true;
@@ -4231,6 +4356,7 @@ intrinsic_functions[] =
{"clamp", 3, true, intrinsic_clamp},
{"clip", 1, true, intrinsic_clip},
{"cos", 1, true, intrinsic_cos},
+ {"cosh", 1, true, intrinsic_cosh},
{"cross", 2, true, intrinsic_cross},
{"ddx", 1, true, intrinsic_ddx},
{"ddx_coarse", 1, true, intrinsic_ddx_coarse},
@@ -4262,15 +4388,18 @@ intrinsic_functions[] =
{"pow", 2, true, intrinsic_pow},
{"radians", 1, true, intrinsic_radians},
{"reflect", 2, true, intrinsic_reflect},
+ {"refract", 3, true, intrinsic_refract},
{"round", 1, true, intrinsic_round},
{"rsqrt", 1, true, intrinsic_rsqrt},
{"saturate", 1, true, intrinsic_saturate},
{"sign", 1, true, intrinsic_sign},
{"sin", 1, true, intrinsic_sin},
+ {"sinh", 1, true, intrinsic_sinh},
{"smoothstep", 3, true, intrinsic_smoothstep},
{"sqrt", 1, true, intrinsic_sqrt},
{"step", 2, true, intrinsic_step},
{"tan", 1, true, intrinsic_tan},
+ {"tanh", 1, true, intrinsic_tanh},
{"tex1D", -1, false, intrinsic_tex1D},
{"tex2D", -1, false, intrinsic_tex2D},
{"tex2Dlod", 2, false, intrinsic_tex2Dlod},
@@ -4405,26 +4534,34 @@ static bool add_ternary(struct hlsl_ctx *ctx, struct hlsl_block *block,
if (!(cond = add_implicit_conversion(ctx, block, cond, cond_type, &cond->loc)))
return false;
}
- else if (common_type->dimx == 1 && common_type->dimy == 1)
- {
- common_type = hlsl_get_numeric_type(ctx, cond_type->class,
- common_type->base_type, cond_type->dimx, cond_type->dimy);
- }
- else if (cond_type->dimx != common_type->dimx || cond_type->dimy != common_type->dimy)
+ else
{
- /* This condition looks wrong but is correct.
- * floatN is compatible with float1xN, but not with floatNx1. */
-
- struct vkd3d_string_buffer *cond_string, *value_string;
+ cond_type = hlsl_get_numeric_type(ctx, cond_type->class, HLSL_TYPE_BOOL,
+ cond_type->dimx, cond_type->dimy);
+ if (!(cond = add_implicit_conversion(ctx, block, cond, cond_type, &cond->loc)))
+ return false;
- cond_string = hlsl_type_to_string(ctx, cond_type);
- value_string = hlsl_type_to_string(ctx, common_type);
- if (cond_string && value_string)
- hlsl_error(ctx, &first->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE,
- "Ternary condition type '%s' is not compatible with value type '%s'.",
- cond_string->buffer, value_string->buffer);
- hlsl_release_string_buffer(ctx, cond_string);
- hlsl_release_string_buffer(ctx, value_string);
+ if (common_type->dimx == 1 && common_type->dimy == 1)
+ {
+ common_type = hlsl_get_numeric_type(ctx, cond_type->class,
+ common_type->base_type, cond_type->dimx, cond_type->dimy);
+ }
+ else if (cond_type->dimx != common_type->dimx || cond_type->dimy != common_type->dimy)
+ {
+ /* This condition looks wrong but is correct.
+ * floatN is compatible with float1xN, but not with floatNx1. */
+
+ struct vkd3d_string_buffer *cond_string, *value_string;
+
+ cond_string = hlsl_type_to_string(ctx, cond_type);
+ value_string = hlsl_type_to_string(ctx, common_type);
+ if (cond_string && value_string)
+ hlsl_error(ctx, &first->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE,
+ "Ternary condition type '%s' is not compatible with value type '%s'.",
+ cond_string->buffer, value_string->buffer);
+ hlsl_release_string_buffer(ctx, cond_string);
+ hlsl_release_string_buffer(ctx, value_string);
+ }
}
if (!(first = add_implicit_conversion(ctx, block, first, common_type, &first->loc)))
@@ -4449,9 +4586,16 @@ static bool add_ternary(struct hlsl_ctx *ctx, struct hlsl_block *block,
hlsl_release_string_buffer(ctx, second_string);
}
+ cond_type = hlsl_get_numeric_type(ctx, cond_type->class, HLSL_TYPE_BOOL,
+ cond_type->dimx, cond_type->dimy);
+ if (!(cond = add_implicit_conversion(ctx, block, cond, cond_type, &cond->loc)))
+ return false;
+
common_type = first->data_type;
}
+ assert(cond->data_type->base_type == HLSL_TYPE_BOOL);
+
args[0] = cond;
args[1] = first;
args[2] = second;
@@ -5280,6 +5424,16 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim,
hlsl_release_string_buffer(ctx, string);
}
+static bool state_block_add_entry(struct hlsl_state_block *state_block, struct hlsl_state_block_entry *entry)
+{
+ if (!vkd3d_array_reserve((void **)&state_block->entries, &state_block->capacity, state_block->count + 1,
+ sizeof(*state_block->entries)))
+ return false;
+
+ state_block->entries[state_block->count++] = entry;
+ return true;
+}
+
}
%locations
@@ -5320,6 +5474,8 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim,
struct parse_attribute_list attr_list;
struct hlsl_ir_switch_case *switch_case;
struct hlsl_scope *scope;
+ struct hlsl_state_block *state_block;
+ struct state_block_index state_block_index;
}
%token KW_BLENDSTATE
@@ -5519,6 +5675,7 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim,
%type <name> any_identifier
%type <name> var_identifier
+%type <name> stateblock_lhs_identifier
%type <name> name_opt
%type <parameter> parameter
@@ -5533,6 +5690,10 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim,
%type <semantic> semantic
+%type <state_block> state_block
+
+%type <state_block_index> state_block_index_opt
+
%type <switch_case> switch_case
%type <type> field_type
@@ -5543,6 +5704,7 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim,
%type <type> type_no_void
%type <type> typedef_type
+%type <variable_def> state_block_list
%type <variable_def> type_spec
%type <variable_def> variable_decl
%type <variable_def> variable_def
@@ -5573,9 +5735,9 @@ name_opt:
| any_identifier
pass:
- KW_PASS name_opt annotations_opt '{' '}'
+ KW_PASS name_opt annotations_opt '{' state_block_start state_block '}'
{
- if (!add_pass(ctx, $2, $3, &@1))
+ if (!add_pass(ctx, $2, $3, $6, &@1))
YYABORT;
}
@@ -6474,7 +6636,7 @@ type_no_void:
{
validate_texture_format_type(ctx, $3, &@3);
- if (shader_profile_version_lt(ctx, 4, 1))
+ if (hlsl_version_lt(ctx, 4, 1))
{
hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE,
"Multisampled texture object declaration needs sample count for profile %s.", ctx->profile->name);
@@ -6513,7 +6675,7 @@ type_no_void:
$$ = hlsl_get_type(ctx->cur_scope, $1, true, true);
if ($$->is_minimum_precision)
{
- if (shader_profile_version_lt(ctx, 4, 0))
+ if (hlsl_version_lt(ctx, 4, 0))
{
hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE,
"Target profile doesn't support minimum-precision types.");
@@ -6677,22 +6839,91 @@ variable_decl:
$$->reg_reservation = $3.reg_reservation;
}
-state:
- any_identifier '=' expr ';'
+state_block_start:
+ %empty
{
- vkd3d_free($1);
- destroy_block($3);
+ ctx->in_state_block = 1;
}
-state_block_start:
+stateblock_lhs_identifier:
+ any_identifier
+ {
+ $$ = $1;
+ }
+ | KW_PIXELSHADER
+ {
+ if (!($$ = hlsl_strdup(ctx, "pixelshader")))
+ YYABORT;
+ }
+ | KW_VERTEXSHADER
+ {
+ if (!($$ = hlsl_strdup(ctx, "vertexshader")))
+ YYABORT;
+ }
+
+state_block_index_opt:
%empty
{
- ctx->in_state_block = 1;
+ $$.has_index = false;
+ $$.index = 0;
}
+ | '[' C_INTEGER ']'
+ {
+ if ($2 < 0)
+ {
+ hlsl_error(ctx, &@2, VKD3D_SHADER_ERROR_HLSL_INVALID_INDEX,
+ "State block array index is not a positive integer constant.");
+ YYABORT;
+ }
+ $$.has_index = true;
+ $$.index = $2;
+ }
state_block:
%empty
- | state_block state
+ {
+ if (!($$ = hlsl_alloc(ctx, sizeof(*$$))))
+ YYABORT;
+ }
+ | state_block stateblock_lhs_identifier state_block_index_opt '=' complex_initializer ';'
+ {
+ struct hlsl_state_block_entry *entry;
+
+ if (!(entry = hlsl_alloc(ctx, sizeof(*entry))))
+ YYABORT;
+
+ entry->name = $2;
+ entry->lhs_has_index = $3.has_index;
+ entry->lhs_index = $3.index;
+
+ entry->instrs = $5.instrs;
+ entry->args = $5.args;
+ entry->args_count = $5.args_count;
+
+ $$ = $1;
+ state_block_add_entry($$, entry);
+ }
+
+state_block_list:
+ '{' state_block '}'
+ {
+ if (!($$ = hlsl_alloc(ctx, sizeof(*$$))))
+ YYABORT;
+
+ if(!(vkd3d_array_reserve((void **)&$$->state_blocks, &$$->state_block_capacity,
+ $$->state_block_count + 1, sizeof(*$$->state_blocks))))
+ YYABORT;
+ $$->state_blocks[$$->state_block_count++] = $2;
+ }
+ | state_block_list ',' '{' state_block '}'
+ {
+ $$ = $1;
+
+ if(!(vkd3d_array_reserve((void **)&$$->state_blocks, &$$->state_block_capacity,
+ $$->state_block_count + 1, sizeof(*$$->state_blocks))))
+ YYABORT;
+ $$->state_blocks[$$->state_block_count++] = $4;
+ }
variable_def:
variable_decl
@@ -6705,6 +6936,24 @@ variable_def:
{
$$ = $1;
ctx->in_state_block = 0;
+
+ if(!(vkd3d_array_reserve((void **)&$$->state_blocks, &$$->state_block_capacity,
+ $$->state_block_count + 1, sizeof(*$$->state_blocks))))
+ YYABORT;
+ $$->state_blocks[$$->state_block_count++] = $4;
+ }
+ | variable_decl '{' state_block_start state_block_list '}'
+ {
+ $$ = $1;
+ ctx->in_state_block = 0;
+
+ $$->state_blocks = $4->state_blocks;
+ $$->state_block_count = $4->state_block_count;
+ $$->state_block_capacity = $4->state_block_capacity;
+ $4->state_blocks = NULL;
+ $4->state_block_count = 0;
+ $4->state_block_capacity = 0;
+ free_parse_variable_def($4);
}
variable_def_typed:
@@ -7330,15 +7579,13 @@ primary_expr:
{
if (ctx->in_state_block)
{
- struct hlsl_ir_load *load;
- struct hlsl_ir_var *var;
+ struct hlsl_ir_node *constant;
- if (!(var = hlsl_new_synthetic_var(ctx, "state_block_expr",
- hlsl_get_scalar_type(ctx, HLSL_TYPE_INT), &@1)))
+ if (!(constant = hlsl_new_stateblock_constant(ctx, $1, &@1)))
YYABORT;
- if (!(load = hlsl_new_var_load(ctx, var, &@1)))
- YYABORT;
- if (!($$ = make_block(ctx, &load->node)))
+ vkd3d_free($1);
+
+ if (!($$ = make_block(ctx, constant)))
YYABORT;
}
else
diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c
index 6f2de93767b..eaa72836d8a 100644
--- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c
+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c
@@ -1565,7 +1565,7 @@ static bool copy_propagation_replace_with_single_instr(struct hlsl_ctx *ctx,
var->name, start, start + count, debug_hlsl_swizzle(swizzle, instr_component_count),
new_instr, debug_hlsl_swizzle(ret_swizzle, instr_component_count));
- if (instr->data_type->class != HLSL_CLASS_OBJECT)
+ if (new_instr->data_type->class == HLSL_CLASS_SCALAR || new_instr->data_type->class == HLSL_CLASS_VECTOR)
{
struct hlsl_ir_node *swizzle_node;
@@ -1742,7 +1742,7 @@ static void copy_propagation_record_store(struct hlsl_ctx *ctx, struct hlsl_ir_s
{
unsigned int writemask = store->writemask;
- if (store->rhs.node->data_type->class == HLSL_CLASS_OBJECT)
+ if (!hlsl_is_numeric_type(store->rhs.node->data_type))
writemask = VKD3DSP_WRITEMASK_0;
copy_propagation_set_value(ctx, var_def, start, writemask, store->rhs.node, store->node.index);
}
@@ -2954,12 +2954,11 @@ static bool lower_logic_not(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, st
return true;
}
-/* Use movc/cmp for the ternary operator. */
+/* Lower TERNARY to CMP for SM1. */
static bool lower_ternary(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block)
{
struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = { 0 }, *replacement;
- struct hlsl_ir_node *zero, *cond, *first, *second;
- struct hlsl_constant_value zero_value = { 0 };
+ struct hlsl_ir_node *cond, *first, *second, *float_cond, *neg;
struct hlsl_ir_expr *expr;
struct hlsl_type *type;
@@ -2980,48 +2979,25 @@ static bool lower_ternary(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, stru
return false;
}
- if (ctx->profile->major_version < 4)
- {
- struct hlsl_ir_node *abs, *neg;
+ assert(cond->data_type->base_type == HLSL_TYPE_BOOL);
- if (!(abs = hlsl_new_unary_expr(ctx, HLSL_OP1_ABS, cond, &instr->loc)))
- return false;
- hlsl_block_add_instr(block, abs);
+ type = hlsl_get_numeric_type(ctx, instr->data_type->class, HLSL_TYPE_FLOAT,
+ instr->data_type->dimx, instr->data_type->dimy);
- if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, abs, &instr->loc)))
- return false;
- hlsl_block_add_instr(block, neg);
-
- operands[0] = neg;
- operands[1] = second;
- operands[2] = first;
- if (!(replacement = hlsl_new_expr(ctx, HLSL_OP3_CMP, operands, first->data_type, &instr->loc)))
- return false;
- }
- else
- {
- if (cond->data_type->base_type == HLSL_TYPE_FLOAT)
- {
- if (!(zero = hlsl_new_constant(ctx, cond->data_type, &zero_value, &instr->loc)))
- return false;
- hlsl_block_add_instr(block, zero);
+ if (!(float_cond = hlsl_new_cast(ctx, cond, type, &instr->loc)))
+ return false;
+ hlsl_block_add_instr(block, float_cond);
- operands[0] = zero;
- operands[1] = cond;
- type = cond->data_type;
- type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_BOOL, type->dimx, type->dimy);
- if (!(cond = hlsl_new_expr(ctx, HLSL_OP2_NEQUAL, operands, type, &instr->loc)))
- return false;
- hlsl_block_add_instr(block, cond);
- }
+ if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, float_cond, &instr->loc)))
+ return false;
+ hlsl_block_add_instr(block, neg);
- memset(operands, 0, sizeof(operands));
- operands[0] = cond;
- operands[1] = first;
- operands[2] = second;
- if (!(replacement = hlsl_new_expr(ctx, HLSL_OP3_MOVC, operands, first->data_type, &instr->loc)))
- return false;
- }
+ memset(operands, 0, sizeof(operands));
+ operands[0] = neg;
+ operands[1] = second;
+ operands[2] = first;
+ if (!(replacement = hlsl_new_expr(ctx, HLSL_OP3_CMP, operands, first->data_type, &instr->loc)))
+ return false;
hlsl_block_add_instr(block, replacement);
return true;
@@ -3319,11 +3295,21 @@ static bool lower_casts_to_bool(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr
struct hlsl_ir_node *hlsl_add_conditional(struct hlsl_ctx *ctx, struct hlsl_block *instrs,
struct hlsl_ir_node *condition, struct hlsl_ir_node *if_true, struct hlsl_ir_node *if_false)
{
+ struct hlsl_type *cond_type = condition->data_type;
struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS];
struct hlsl_ir_node *cond;
assert(hlsl_types_are_equal(if_true->data_type, if_false->data_type));
+ if (cond_type->base_type != HLSL_TYPE_BOOL)
+ {
+ cond_type = hlsl_get_numeric_type(ctx, cond_type->class, HLSL_TYPE_BOOL, cond_type->dimx, cond_type->dimy);
+
+ if (!(condition = hlsl_new_cast(ctx, condition, cond_type, &condition->loc)))
+ return NULL;
+ hlsl_block_add_instr(instrs, condition);
+ }
+
operands[0] = condition;
operands[1] = if_true;
operands[2] = if_false;
@@ -3760,6 +3746,9 @@ static bool dce(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context)
case HLSL_IR_RESOURCE_STORE:
case HLSL_IR_SWITCH:
break;
+ case HLSL_IR_STATEBLOCK_CONSTANT:
+ /* Stateblock constants should not appear in the shader program. */
+ vkd3d_unreachable();
}
return false;
@@ -3848,6 +3837,22 @@ static void allocate_register_reservations(struct hlsl_ctx *ctx)
}
}
+static void deref_mark_last_read(struct hlsl_deref *deref, unsigned int last_read)
+{
+ unsigned int i;
+
+ if (hlsl_deref_is_lowered(deref))
+ {
+ if (deref->rel_offset.node)
+ deref->rel_offset.node->last_read = last_read;
+ }
+ else
+ {
+ for (i = 0; i < deref->path_len; ++i)
+ deref->path[i].node->last_read = last_read;
+ }
+}
+
/* Compute the earliest and latest liveness for each variable. In the case that
* a variable is accessed inside of a loop, we promote its liveness to extend
* to at least the range of the entire loop. We also do this for nodes, so that
@@ -3867,6 +3872,9 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop
case HLSL_IR_CALL:
/* We should have inlined all calls before computing liveness. */
vkd3d_unreachable();
+ case HLSL_IR_STATEBLOCK_CONSTANT:
+ /* Stateblock constants should not appear in the shader program. */
+ vkd3d_unreachable();
case HLSL_IR_STORE:
{
@@ -3876,8 +3884,7 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop
if (!var->first_write)
var->first_write = loop_first ? min(instr->index, loop_first) : instr->index;
store->rhs.node->last_read = last_read;
- if (store->lhs.rel_offset.node)
- store->lhs.rel_offset.node->last_read = last_read;
+ deref_mark_last_read(&store->lhs, last_read);
break;
}
case HLSL_IR_EXPR:
@@ -3904,8 +3911,7 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop
var = load->src.var;
var->last_read = max(var->last_read, last_read);
- if (load->src.rel_offset.node)
- load->src.rel_offset.node->last_read = last_read;
+ deref_mark_last_read(&load->src, last_read);
break;
}
case HLSL_IR_LOOP:
@@ -3922,14 +3928,12 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop
var = load->resource.var;
var->last_read = max(var->last_read, last_read);
- if (load->resource.rel_offset.node)
- load->resource.rel_offset.node->last_read = last_read;
+ deref_mark_last_read(&load->resource, last_read);
if ((var = load->sampler.var))
{
var->last_read = max(var->last_read, last_read);
- if (load->sampler.rel_offset.node)
- load->sampler.rel_offset.node->last_read = last_read;
+ deref_mark_last_read(&load->sampler, last_read);
}
if (load->coords.node)
@@ -3954,8 +3958,7 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop
var = store->resource.var;
var->last_read = max(var->last_read, last_read);
- if (store->resource.rel_offset.node)
- store->resource.rel_offset.node->last_read = last_read;
+ deref_mark_last_read(&store->resource, last_read);
store->coords.node->last_read = last_read;
store->value.node->last_read = last_read;
break;
@@ -4790,7 +4793,9 @@ static void validate_buffer_offsets(struct hlsl_ctx *ctx)
continue;
if (var1->reg_reservation.offset_type
- || (var1->data_type->class == HLSL_CLASS_OBJECT && var1->reg_reservation.reg_type))
+ || var1->reg_reservation.reg_type == 's'
+ || var1->reg_reservation.reg_type == 't'
+ || var1->reg_reservation.reg_type == 'u')
buffer->manually_packed_elements = true;
else
buffer->automatically_packed_elements = true;
@@ -5400,11 +5405,11 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry
hlsl_transform_ir(ctx, split_matrix_copies, body, NULL);
lower_ir(ctx, lower_narrowing_casts, body);
- lower_ir(ctx, lower_casts_to_bool, body);
lower_ir(ctx, lower_int_dot, body);
lower_ir(ctx, lower_int_division, body);
lower_ir(ctx, lower_int_modulus, body);
lower_ir(ctx, lower_int_abs, body);
+ lower_ir(ctx, lower_casts_to_bool, body);
lower_ir(ctx, lower_float_modulus, body);
hlsl_transform_ir(ctx, fold_redundant_casts, body, NULL);
do
@@ -5420,13 +5425,6 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry
remove_unreachable_code(ctx, body);
hlsl_transform_ir(ctx, normalize_switch_cases, body, NULL);
- if (profile-> major_version < 4)
- {
- lower_ir(ctx, lower_nonfloat_exprs, body);
- /* Constants casted to float must be folded. */
- hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, body, NULL);
- }
-
lower_ir(ctx, lower_nonconstant_vector_derefs, body);
lower_ir(ctx, lower_casts_to_bool, body);
lower_ir(ctx, lower_int_dot, body);
@@ -5438,9 +5436,15 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry
hlsl_transform_ir(ctx, track_object_components_usage, body, NULL);
sort_synthetic_separated_samplers_first(ctx);
- lower_ir(ctx, lower_ternary, body);
if (profile->major_version < 4)
{
+ lower_ir(ctx, lower_ternary, body);
+
+ lower_ir(ctx, lower_nonfloat_exprs, body);
+ /* Constants casted to float must be folded, and new casts to bool also need to be lowered. */
+ hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, body, NULL);
+ lower_ir(ctx, lower_casts_to_bool, body);
+
lower_ir(ctx, lower_casts_to_int, body);
lower_ir(ctx, lower_division, body);
lower_ir(ctx, lower_sqrt, body);
@@ -5463,6 +5467,10 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry
lower_ir(ctx, validate_nonconstant_vector_store_derefs, body);
+ do
+ compute_liveness(ctx, entry_func);
+ while (hlsl_transform_ir(ctx, dce, body, NULL));
+
/* TODO: move forward, remove when no longer needed */
transform_derefs(ctx, replace_deref_path_with_offset, body);
while (hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, body, NULL));
diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c
index b76b1fce507..4cea98e9286 100644
--- a/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c
+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c
@@ -1177,30 +1177,11 @@ static bool fold_ternary(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst,
assert(dst_type->base_type == src2->node.data_type->base_type);
assert(dst_type->base_type == src3->node.data_type->base_type);
+ assert(src1->node.data_type->base_type == HLSL_TYPE_BOOL);
for (k = 0; k < dst_type->dimx; ++k)
- {
- switch (src1->node.data_type->base_type)
- {
- case HLSL_TYPE_FLOAT:
- case HLSL_TYPE_HALF:
- dst->u[k] = src1->value.u[k].f != 0.0f ? src2->value.u[k] : src3->value.u[k];
- break;
-
- case HLSL_TYPE_DOUBLE:
- dst->u[k] = src1->value.u[k].d != 0.0 ? src2->value.u[k] : src3->value.u[k];
- break;
+ dst->u[k] = src1->value.u[k].u ? src2->value.u[k] : src3->value.u[k];
- case HLSL_TYPE_INT:
- case HLSL_TYPE_UINT:
- case HLSL_TYPE_BOOL:
- dst->u[k] = src1->value.u[k].u ? src2->value.u[k] : src3->value.u[k];
- break;
-
- default:
- vkd3d_unreachable();
- }
- }
return true;
}
diff --git a/libs/vkd3d/libs/vkd3d-shader/ir.c b/libs/vkd3d/libs/vkd3d-shader/ir.c
index 8af537390f9..610d907d981 100644
--- a/libs/vkd3d/libs/vkd3d-shader/ir.c
+++ b/libs/vkd3d/libs/vkd3d-shader/ir.c
@@ -17,6 +17,7 @@
*/
#include "vkd3d_shader_private.h"
+#include "vkd3d_types.h"
bool vsir_program_init(struct vsir_program *program, const struct vkd3d_shader_version *version, unsigned int reserve)
{
@@ -56,19 +57,6 @@ static void vkd3d_shader_instruction_make_nop(struct vkd3d_shader_instruction *i
vsir_instruction_init(ins, &location, VKD3DSIH_NOP);
}
-static void remove_dcl_temps(struct vsir_program *program)
-{
- unsigned int i;
-
- for (i = 0; i < program->instructions.count; ++i)
- {
- struct vkd3d_shader_instruction *ins = &program->instructions.elements[i];
-
- if (ins->handler_idx == VKD3DSIH_DCL_TEMPS)
- vkd3d_shader_instruction_make_nop(ins);
- }
-}
-
static bool vsir_instruction_init_with_params(struct vsir_program *program,
struct vkd3d_shader_instruction *ins, const struct vkd3d_shader_location *location,
enum vkd3d_shader_opcode handler_idx, unsigned int dst_count, unsigned int src_count)
@@ -94,85 +82,163 @@ static bool vsir_instruction_init_with_params(struct vsir_program *program,
return true;
}
-static enum vkd3d_result vsir_program_lower_texkills(struct vsir_program *program)
+static enum vkd3d_result vsir_program_lower_texkill(struct vsir_program *program,
+ struct vkd3d_shader_instruction *texkill, unsigned int *tmp_idx)
{
+ const unsigned int components_read = 3 + (program->shader_version.major >= 2);
struct vkd3d_shader_instruction_array *instructions = &program->instructions;
- struct vkd3d_shader_instruction *texkill_ins, *ins;
- unsigned int components_read = 3 + (program->shader_version.major >= 2);
- unsigned int tmp_idx = ~0u;
- unsigned int i, k;
-
- for (i = 0; i < instructions->count; ++i)
- {
- texkill_ins = &instructions->elements[i];
+ size_t pos = texkill - instructions->elements;
+ struct vkd3d_shader_instruction *ins;
+ unsigned int j;
- if (texkill_ins->handler_idx != VKD3DSIH_TEXKILL)
- continue;
+ if (!shader_instruction_array_insert_at(instructions, pos + 1, components_read + 1))
+ return VKD3D_ERROR_OUT_OF_MEMORY;
- if (!shader_instruction_array_insert_at(instructions, i + 1, components_read + 1))
- return VKD3D_ERROR_OUT_OF_MEMORY;
+ if (*tmp_idx == ~0u)
+ *tmp_idx = program->temp_count++;
- if (tmp_idx == ~0u)
- tmp_idx = program->temp_count++;
+ /* tmp = ins->dst[0] < 0 */
- /* tmp = ins->dst[0] < 0 */
+ ins = &instructions->elements[pos + 1];
+ if (!vsir_instruction_init_with_params(program, ins, &texkill->location, VKD3DSIH_LTO, 1, 2))
+ return VKD3D_ERROR_OUT_OF_MEMORY;
- ins = &instructions->elements[i + 1];
- if (!vsir_instruction_init_with_params(program, ins, &texkill_ins->location, VKD3DSIH_LTO, 1, 2))
+ vsir_register_init(&ins->dst[0].reg, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1);
+ ins->dst[0].reg.dimension = VSIR_DIMENSION_VEC4;
+ ins->dst[0].reg.idx[0].offset = *tmp_idx;
+ ins->dst[0].write_mask = VKD3DSP_WRITEMASK_ALL;
+
+ ins->src[0].reg = texkill->dst[0].reg;
+ ins->src[0].swizzle = VKD3D_SHADER_NO_SWIZZLE;
+ vsir_register_init(&ins->src[1].reg, VKD3DSPR_IMMCONST, VKD3D_DATA_FLOAT, 0);
+ ins->src[1].reg.dimension = VSIR_DIMENSION_VEC4;
+ ins->src[1].reg.u.immconst_f32[0] = 0.0f;
+ ins->src[1].reg.u.immconst_f32[1] = 0.0f;
+ ins->src[1].reg.u.immconst_f32[2] = 0.0f;
+ ins->src[1].reg.u.immconst_f32[3] = 0.0f;
+
+ /* tmp.x = tmp.x || tmp.y */
+ /* tmp.x = tmp.x || tmp.z */
+ /* tmp.x = tmp.x || tmp.w, if sm >= 2.0 */
+
+ for (j = 1; j < components_read; ++j)
+ {
+ ins = &instructions->elements[pos + 1 + j];
+ if (!(vsir_instruction_init_with_params(program, ins, &texkill->location, VKD3DSIH_OR, 1, 2)))
return VKD3D_ERROR_OUT_OF_MEMORY;
vsir_register_init(&ins->dst[0].reg, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1);
ins->dst[0].reg.dimension = VSIR_DIMENSION_VEC4;
- ins->dst[0].reg.idx[0].offset = tmp_idx;
- ins->dst[0].write_mask = VKD3DSP_WRITEMASK_ALL;
+ ins->dst[0].reg.idx[0].offset = *tmp_idx;
+ ins->dst[0].write_mask = VKD3DSP_WRITEMASK_0;
- ins->src[0].reg = texkill_ins->dst[0].reg;
- vsir_register_init(&ins->src[1].reg, VKD3DSPR_IMMCONST, VKD3D_DATA_FLOAT, 0);
+ vsir_register_init(&ins->src[0].reg, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1);
+ ins->src[0].reg.dimension = VSIR_DIMENSION_VEC4;
+ ins->src[0].reg.idx[0].offset = *tmp_idx;
+ ins->src[0].swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X);
+ vsir_register_init(&ins->src[1].reg, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1);
ins->src[1].reg.dimension = VSIR_DIMENSION_VEC4;
- ins->src[1].reg.u.immconst_f32[0] = 0.0f;
- ins->src[1].reg.u.immconst_f32[1] = 0.0f;
- ins->src[1].reg.u.immconst_f32[2] = 0.0f;
- ins->src[1].reg.u.immconst_f32[3] = 0.0f;
+ ins->src[1].reg.idx[0].offset = *tmp_idx;
+ ins->src[1].swizzle = vkd3d_shader_create_swizzle(j, j, j, j);
+ }
- /* tmp.x = tmp.x || tmp.y */
- /* tmp.x = tmp.x || tmp.z */
- /* tmp.x = tmp.x || tmp.w, if sm >= 2.0 */
+ /* discard_nz tmp.x */
- for (k = 1; k < components_read; ++k)
- {
- ins = &instructions->elements[i + 1 + k];
- if (!(vsir_instruction_init_with_params(program, ins, &texkill_ins->location, VKD3DSIH_OR, 1, 2)))
- return VKD3D_ERROR_OUT_OF_MEMORY;
+ ins = &instructions->elements[pos + 1 + components_read];
+ if (!(vsir_instruction_init_with_params(program, ins, &texkill->location, VKD3DSIH_DISCARD, 0, 1)))
+ return VKD3D_ERROR_OUT_OF_MEMORY;
+ ins->flags = VKD3D_SHADER_CONDITIONAL_OP_NZ;
- vsir_register_init(&ins->dst[0].reg, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1);
- ins->dst[0].reg.dimension = VSIR_DIMENSION_VEC4;
- ins->dst[0].reg.idx[0].offset = tmp_idx;
- ins->dst[0].write_mask = VKD3DSP_WRITEMASK_0;
-
- vsir_register_init(&ins->src[0].reg, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1);
- ins->src[0].reg.dimension = VSIR_DIMENSION_VEC4;
- ins->src[0].reg.idx[0].offset = tmp_idx;
- ins->src[0].swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X);
- vsir_register_init(&ins->src[1].reg, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1);
- ins->src[1].reg.dimension = VSIR_DIMENSION_VEC4;
- ins->src[1].reg.idx[0].offset = tmp_idx;
- ins->src[1].swizzle = vkd3d_shader_create_swizzle(k, k, k, k);
- }
+ vsir_register_init(&ins->src[0].reg, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1);
+ ins->src[0].reg.dimension = VSIR_DIMENSION_VEC4;
+ ins->src[0].reg.idx[0].offset = *tmp_idx;
+ ins->src[0].swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X);
- /* discard_nz tmp.x */
+ /* Make the original instruction no-op */
+ vkd3d_shader_instruction_make_nop(texkill);
- ins = &instructions->elements[i + 1 + components_read];
- if (!(vsir_instruction_init_with_params(program, ins, &texkill_ins->location, VKD3DSIH_DISCARD, 0, 1)))
- return VKD3D_ERROR_OUT_OF_MEMORY;
- ins->flags = VKD3D_SHADER_CONDITIONAL_OP_NZ;
+ return VKD3D_OK;
+}
- vsir_register_init(&ins->src[0].reg, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1);
- ins->src[0].reg.dimension = VSIR_DIMENSION_VEC4;
- ins->src[0].reg.idx[0].offset = tmp_idx;
- ins->src[0].swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X);
+/* The Shader Model 5 Assembly documentation states: "If components of a mad
+ * instruction are tagged as precise, the hardware must execute a mad instruction
+ * or the exact equivalent, and it cannot split it into a multiply followed by an add."
+ * But DXIL.rst states the opposite: "Floating point multiply & add. This operation is
+ * not fused for "precise" operations."
+ * Windows drivers seem to conform with the latter, for SM 4-5 and SM 6. */
+static enum vkd3d_result vsir_program_lower_precise_mad(struct vsir_program *program,
+ struct vkd3d_shader_instruction *mad, unsigned int *tmp_idx)
+{
+ struct vkd3d_shader_instruction_array *instructions = &program->instructions;
+ struct vkd3d_shader_instruction *mul_ins, *add_ins;
+ size_t pos = mad - instructions->elements;
+ struct vkd3d_shader_dst_param *mul_dst;
+
+ if (!(mad->flags & VKD3DSI_PRECISE_XYZW))
+ return VKD3D_OK;
+
+ if (!shader_instruction_array_insert_at(instructions, pos + 1, 1))
+ return VKD3D_ERROR_OUT_OF_MEMORY;
+
+ if (*tmp_idx == ~0u)
+ *tmp_idx = program->temp_count++;
+
+ mul_ins = &instructions->elements[pos];
+ add_ins = &instructions->elements[pos + 1];
+
+ mul_ins->handler_idx = VKD3DSIH_MUL;
+ mul_ins->src_count = 2;
+
+ if (!(vsir_instruction_init_with_params(program, add_ins, &mul_ins->location, VKD3DSIH_ADD, 1, 2)))
+ return VKD3D_ERROR_OUT_OF_MEMORY;
+
+ add_ins->flags = mul_ins->flags & VKD3DSI_PRECISE_XYZW;
+
+ mul_dst = mul_ins->dst;
+ *add_ins->dst = *mul_dst;
+
+ mul_dst->modifiers = 0;
+ vsir_register_init(&mul_dst->reg, VKD3DSPR_TEMP, mul_ins->src[0].reg.data_type, 1);
+ mul_dst->reg.dimension = add_ins->dst->reg.dimension;
+ mul_dst->reg.idx[0].offset = *tmp_idx;
+
+ add_ins->src[0].reg = mul_dst->reg;
+ add_ins->src[0].swizzle = vsir_swizzle_from_writemask(mul_dst->write_mask);
+ add_ins->src[0].modifiers = 0;
+ add_ins->src[1] = mul_ins->src[2];
+
+ return VKD3D_OK;
+}
+
+static enum vkd3d_result vsir_program_lower_instructions(struct vsir_program *program)
+{
+ struct vkd3d_shader_instruction_array *instructions = &program->instructions;
+ unsigned int tmp_idx = ~0u, i;
+ enum vkd3d_result ret;
+
+ for (i = 0; i < instructions->count; ++i)
+ {
+ struct vkd3d_shader_instruction *ins = &instructions->elements[i];
+
+ switch (ins->handler_idx)
+ {
+ case VKD3DSIH_TEXKILL:
+ if ((ret = vsir_program_lower_texkill(program, ins, &tmp_idx)) < 0)
+ return ret;
+ break;
- /* Make the original instruction no-op */
- vkd3d_shader_instruction_make_nop(texkill_ins);
+ case VKD3DSIH_MAD:
+ if ((ret = vsir_program_lower_precise_mad(program, ins, &tmp_idx)) < 0)
+ return ret;
+ break;
+
+ case VKD3DSIH_DCL_TEMPS:
+ vkd3d_shader_instruction_make_nop(ins);
+ break;
+
+ default:
+ break;
+ }
}
return VKD3D_OK;
@@ -2577,97 +2643,6 @@ static enum vkd3d_result lower_switch_to_if_ladder(struct vsir_program *program)
}
}
- /* Second subpass: creating new blocks might have broken
- * references in PHI instructions, so we use the block map to fix
- * them. */
- current_label = 0;
- for (i = 0; i < ins_count; ++i)
- {
- struct vkd3d_shader_instruction *ins = &instructions[i];
- struct vkd3d_shader_src_param *new_src;
- unsigned int j, l, new_src_count = 0;
-
- switch (ins->handler_idx)
- {
- case VKD3DSIH_LABEL:
- current_label = label_from_src_param(&ins->src[0]);
- continue;
-
- case VKD3DSIH_PHI:
- break;
-
- default:
- continue;
- }
-
- /* First count how many source parameters we need. */
- for (j = 0; j < ins->src_count; j += 2)
- {
- unsigned int source_label = label_from_src_param(&ins->src[j + 1]);
- size_t k, match_count = 0;
-
- for (k = 0; k < map_count; ++k)
- {
- struct lower_switch_to_if_ladder_block_mapping *mapping = &block_map[k];
-
- if (mapping->switch_label == source_label && mapping->target_label == current_label)
- match_count += 1;
- }
-
- new_src_count += (match_count != 0) ? 2 * match_count : 2;
- }
-
- assert(new_src_count >= ins->src_count);
-
- /* Allocate more source parameters if needed. */
- if (new_src_count == ins->src_count)
- {
- new_src = ins->src;
- }
- else
- {
- if (!(new_src = vsir_program_get_src_params(program, new_src_count)))
- {
- ERR("Failed to allocate %u source parameters.\n", new_src_count);
- goto fail;
- }
- }
-
- /* Then do the copy. */
- for (j = 0, l = 0; j < ins->src_count; j += 2)
- {
- unsigned int source_label = label_from_src_param(&ins->src[j + 1]);
- size_t k, match_count = 0;
-
- for (k = 0; k < map_count; ++k)
- {
- struct lower_switch_to_if_ladder_block_mapping *mapping = &block_map[k];
-
- if (mapping->switch_label == source_label && mapping->target_label == current_label)
- {
- match_count += 1;
-
- new_src[l] = ins->src[j];
- new_src[l + 1] = ins->src[j + 1];
- new_src[l + 1].reg.idx[0].offset = mapping->if_label;
- l += 2;
- }
- }
-
- if (match_count == 0)
- {
- new_src[l] = ins->src[j];
- new_src[l + 1] = ins->src[j + 1];
- l += 2;
- }
- }
-
- assert(l == new_src_count);
-
- ins->src_count = new_src_count;
- ins->src = new_src;
- }
-
vkd3d_free(program->instructions.elements);
vkd3d_free(block_map);
program->instructions.elements = instructions;
@@ -2685,148 +2660,139 @@ fail:
return VKD3D_ERROR_OUT_OF_MEMORY;
}
-static void materialize_ssas_to_temps_process_src_param(struct vsir_program *program,
- struct vkd3d_shader_src_param *src);
+struct ssas_to_temps_alloc
+{
+ unsigned int *table;
+ unsigned int next_temp_idx;
+};
-/* This is idempotent: it can be safely applied more than once on the
- * same register. */
-static void materialize_ssas_to_temps_process_reg(struct vsir_program *program, struct vkd3d_shader_register *reg)
+static bool ssas_to_temps_alloc_init(struct ssas_to_temps_alloc *alloc, unsigned int ssa_count, unsigned int temp_count)
{
- unsigned int i;
+ size_t i = ssa_count * sizeof(*alloc->table);
- if (reg->type == VKD3DSPR_SSA)
+ if (!(alloc->table = vkd3d_malloc(i)))
{
- reg->type = VKD3DSPR_TEMP;
- reg->idx[0].offset += program->temp_count;
+ ERR("Failed to allocate SSA table.\n");
+ return false;
}
+ memset(alloc->table, 0xff, i);
- for (i = 0; i < reg->idx_count; ++i)
- if (reg->idx[i].rel_addr)
- materialize_ssas_to_temps_process_src_param(program, reg->idx[i].rel_addr);
-}
-
-static void materialize_ssas_to_temps_process_dst_param(struct vsir_program *program,
- struct vkd3d_shader_dst_param *dst)
-{
- materialize_ssas_to_temps_process_reg(program, &dst->reg);
-}
-
-static void materialize_ssas_to_temps_process_src_param(struct vsir_program *program,
- struct vkd3d_shader_src_param *src)
-{
- materialize_ssas_to_temps_process_reg(program, &src->reg);
+ alloc->next_temp_idx = temp_count;
+ return true;
}
-static const struct vkd3d_shader_src_param *materialize_ssas_to_temps_compute_source(struct vkd3d_shader_instruction *ins,
- unsigned int label)
+/* This is idempotent: it can be safely applied more than once on the
+ * same register. */
+static void materialize_ssas_to_temps_process_reg(struct vsir_program *program, struct ssas_to_temps_alloc *alloc,
+ struct vkd3d_shader_register *reg)
{
unsigned int i;
- assert(ins->handler_idx == VKD3DSIH_PHI);
-
- for (i = 0; i < ins->src_count; i += 2)
+ if (reg->type == VKD3DSPR_SSA && alloc->table[reg->idx[0].offset] != UINT_MAX)
{
- if (label_from_src_param(&ins->src[i + 1]) == label)
- return &ins->src[i];
+ reg->type = VKD3DSPR_TEMP;
+ reg->idx[0].offset = alloc->table[reg->idx[0].offset];
}
- vkd3d_unreachable();
+ for (i = 0; i < reg->idx_count; ++i)
+ if (reg->idx[i].rel_addr)
+ materialize_ssas_to_temps_process_reg(program, alloc, &reg->idx[i].rel_addr->reg);
}
-static bool materialize_ssas_to_temps_synthesize_mov(struct vsir_program *program,
- struct vkd3d_shader_instruction *instruction, const struct vkd3d_shader_location *loc,
- const struct vkd3d_shader_dst_param *dest, const struct vkd3d_shader_src_param *cond,
- const struct vkd3d_shader_src_param *source, bool invert)
+struct ssas_to_temps_block_info
{
- struct vkd3d_shader_src_param *src;
- struct vkd3d_shader_dst_param *dst;
-
- if (!vsir_instruction_init_with_params(program, instruction, loc,
- cond ? VKD3DSIH_MOVC : VKD3DSIH_MOV, 1, cond ? 3 : 1))
- return false;
-
- dst = instruction->dst;
- src = instruction->src;
-
- dst[0] = *dest;
- materialize_ssas_to_temps_process_dst_param(program, &dst[0]);
+ struct phi_incoming_to_temp
+ {
+ struct vkd3d_shader_src_param *src;
+ struct vkd3d_shader_dst_param *dst;
+ } *incomings;
+ size_t incoming_capacity;
+ size_t incoming_count;
+};
- assert(dst[0].write_mask == VKD3DSP_WRITEMASK_0);
- assert(dst[0].modifiers == 0);
- assert(dst[0].shift == 0);
+static void ssas_to_temps_block_info_cleanup(struct ssas_to_temps_block_info *block_info,
+ size_t count)
+{
+ size_t i;
- if (cond)
- {
- src[0] = *cond;
- src[1 + invert] = *source;
- memset(&src[2 - invert], 0, sizeof(src[2 - invert]));
- src[2 - invert].reg = dst[0].reg;
- materialize_ssas_to_temps_process_src_param(program, &src[1]);
- materialize_ssas_to_temps_process_src_param(program, &src[2]);
- }
- else
- {
- src[0] = *source;
- materialize_ssas_to_temps_process_src_param(program, &src[0]);
- }
+ for (i = 0; i < count; ++i)
+ vkd3d_free(block_info[i].incomings);
- return true;
+ vkd3d_free(block_info);
}
-static enum vkd3d_result vsir_program_materialise_ssas_to_temps(struct vsir_program *program)
+static enum vkd3d_result vsir_program_materialise_phi_ssas_to_temps(struct vsir_program *program)
{
+ size_t ins_capacity = 0, ins_count = 0, phi_count, incoming_count, i;
+ struct ssas_to_temps_block_info *info, *block_info = NULL;
struct vkd3d_shader_instruction *instructions = NULL;
- struct materialize_ssas_to_temps_block_data
- {
- size_t phi_begin;
- size_t phi_count;
- } *block_index = NULL;
- size_t ins_capacity = 0, ins_count = 0, i;
+ struct ssas_to_temps_alloc alloc = {0};
unsigned int current_label = 0;
- if (!reserve_instructions(&instructions, &ins_capacity, program->instructions.count))
- goto fail;
-
- if (!(block_index = vkd3d_calloc(program->block_count, sizeof(*block_index))))
+ if (!(block_info = vkd3d_calloc(program->block_count, sizeof(*block_info))))
{
- ERR("Failed to allocate block index.\n");
+ ERR("Failed to allocate block info array.\n");
goto fail;
}
- for (i = 0; i < program->instructions.count; ++i)
+ if (!ssas_to_temps_alloc_init(&alloc, program->ssa_count, program->temp_count))
+ goto fail;
+
+ for (i = 0, phi_count = 0, incoming_count = 0; i < program->instructions.count; ++i)
{
struct vkd3d_shader_instruction *ins = &program->instructions.elements[i];
+ unsigned int j, temp_idx;
- switch (ins->handler_idx)
+ /* Only phi src/dst SSA values need be converted here. Structurisation may
+ * introduce new cases of undominated SSA use, which will be handled later. */
+ if (ins->handler_idx != VKD3DSIH_PHI)
+ continue;
+ ++phi_count;
+
+ temp_idx = alloc.next_temp_idx++;
+
+ for (j = 0; j < ins->src_count; j += 2)
{
- case VKD3DSIH_LABEL:
- current_label = label_from_src_param(&ins->src[0]);
- break;
+ struct phi_incoming_to_temp *incoming;
+ unsigned int label;
- case VKD3DSIH_PHI:
- assert(current_label != 0);
- assert(i != 0);
- if (block_index[current_label - 1].phi_begin == 0)
- block_index[current_label - 1].phi_begin = i;
- block_index[current_label - 1].phi_count += 1;
- break;
+ label = label_from_src_param(&ins->src[j + 1]);
+ assert(label);
- default:
- current_label = 0;
- break;
+ info = &block_info[label - 1];
+
+ if (!(vkd3d_array_reserve((void **)&info->incomings, &info->incoming_capacity, info->incoming_count + 1,
+ sizeof(*info->incomings))))
+ goto fail;
+
+ incoming = &info->incomings[info->incoming_count++];
+ incoming->src = &ins->src[j];
+ incoming->dst = ins->dst;
+
+ alloc.table[ins->dst->reg.idx[0].offset] = temp_idx;
+
+ ++incoming_count;
}
+
+ materialize_ssas_to_temps_process_reg(program, &alloc, &ins->dst->reg);
}
+ if (!phi_count)
+ goto done;
+
+ if (!reserve_instructions(&instructions, &ins_capacity, program->instructions.count + incoming_count - phi_count))
+ goto fail;
+
for (i = 0; i < program->instructions.count; ++i)
{
- struct vkd3d_shader_instruction *ins = &program->instructions.elements[i];
+ struct vkd3d_shader_instruction *mov_ins, *ins = &program->instructions.elements[i];
size_t j;
for (j = 0; j < ins->dst_count; ++j)
- materialize_ssas_to_temps_process_dst_param(program, &ins->dst[j]);
+ materialize_ssas_to_temps_process_reg(program, &alloc, &ins->dst[j].reg);
for (j = 0; j < ins->src_count; ++j)
- materialize_ssas_to_temps_process_src_param(program, &ins->src[j]);
+ materialize_ssas_to_temps_process_reg(program, &alloc, &ins->src[j].reg);
switch (ins->handler_idx)
{
@@ -2835,65 +2801,21 @@ static enum vkd3d_result vsir_program_materialise_ssas_to_temps(struct vsir_prog
break;
case VKD3DSIH_BRANCH:
- {
- if (vsir_register_is_label(&ins->src[0].reg))
- {
- const struct materialize_ssas_to_temps_block_data *data = &block_index[label_from_src_param(&ins->src[0]) - 1];
-
- if (!reserve_instructions(&instructions, &ins_capacity, ins_count + data->phi_count))
- goto fail;
-
- for (j = data->phi_begin; j < data->phi_begin + data->phi_count; ++j)
- {
- const struct vkd3d_shader_src_param *source;
-
- source = materialize_ssas_to_temps_compute_source(&program->instructions.elements[j],
- current_label);
- if (!materialize_ssas_to_temps_synthesize_mov(program, &instructions[ins_count],
- &ins->location, &program->instructions.elements[j].dst[0], NULL, source, false))
- goto fail;
+ case VKD3DSIH_SWITCH_MONOLITHIC:
+ info = &block_info[current_label - 1];
- ++ins_count;
- }
- }
- else
+ for (j = 0; j < info->incoming_count; ++j)
{
- struct materialize_ssas_to_temps_block_data *data_true = &block_index[label_from_src_param(&ins->src[1]) - 1],
- *data_false = &block_index[label_from_src_param(&ins->src[2]) - 1];
- const struct vkd3d_shader_src_param *cond = &ins->src[0];
+ struct phi_incoming_to_temp *incoming = &info->incomings[j];
- if (!reserve_instructions(&instructions, &ins_capacity,
- ins_count + data_true->phi_count + data_false->phi_count))
+ mov_ins = &instructions[ins_count++];
+ if (!vsir_instruction_init_with_params(program, mov_ins, &ins->location, VKD3DSIH_MOV, 1, 0))
goto fail;
-
- for (j = data_true->phi_begin; j < data_true->phi_begin + data_true->phi_count; ++j)
- {
- const struct vkd3d_shader_src_param *source;
-
- source = materialize_ssas_to_temps_compute_source(&program->instructions.elements[j],
- current_label);
- if (!materialize_ssas_to_temps_synthesize_mov(program, &instructions[ins_count],
- &ins->location, &program->instructions.elements[j].dst[0], cond, source, false))
- goto fail;
-
- ++ins_count;
- }
-
- for (j = data_false->phi_begin; j < data_false->phi_begin + data_false->phi_count; ++j)
- {
- const struct vkd3d_shader_src_param *source;
-
- source = materialize_ssas_to_temps_compute_source(&program->instructions.elements[j],
- current_label);
- if (!materialize_ssas_to_temps_synthesize_mov(program, &instructions[ins_count],
- &ins->location, &program->instructions.elements[j].dst[0], cond, source, true))
- goto fail;
-
- ++ins_count;
- }
+ *mov_ins->dst = *incoming->dst;
+ mov_ins->src = incoming->src;
+ mov_ins->src_count = 1;
}
break;
- }
case VKD3DSIH_PHI:
continue;
@@ -2902,25 +2824,24 @@ static enum vkd3d_result vsir_program_materialise_ssas_to_temps(struct vsir_prog
break;
}
- if (!reserve_instructions(&instructions, &ins_capacity, ins_count + 1))
- goto fail;
-
instructions[ins_count++] = *ins;
}
vkd3d_free(program->instructions.elements);
- vkd3d_free(block_index);
program->instructions.elements = instructions;
program->instructions.capacity = ins_capacity;
program->instructions.count = ins_count;
- program->temp_count += program->ssa_count;
- program->ssa_count = 0;
+ program->temp_count = alloc.next_temp_idx;
+done:
+ ssas_to_temps_block_info_cleanup(block_info, program->block_count);
+ vkd3d_free(alloc.table);
return VKD3D_OK;
fail:
vkd3d_free(instructions);
- vkd3d_free(block_index);
+ ssas_to_temps_block_info_cleanup(block_info, program->block_count);
+ vkd3d_free(alloc.table);
return VKD3D_ERROR_OUT_OF_MEMORY;
}
@@ -3061,19 +2982,19 @@ struct vsir_cfg_structure
union
{
struct vsir_block *block;
- struct
+ struct vsir_cfg_structure_loop
{
struct vsir_cfg_structure_list body;
unsigned idx;
} loop;
- struct
+ struct vsir_cfg_structure_selection
{
struct vkd3d_shader_src_param *condition;
struct vsir_cfg_structure_list if_body;
struct vsir_cfg_structure_list else_body;
bool invert_condition;
} selection;
- struct
+ struct vsir_cfg_structure_jump
{
enum vsir_cfg_jump_type
{
@@ -3157,6 +3078,14 @@ static void vsir_cfg_structure_cleanup(struct vsir_cfg_structure *structure)
}
}
+struct vsir_cfg_emit_target
+{
+ struct vkd3d_shader_instruction *instructions;
+ size_t ins_capacity, ins_count;
+ unsigned int jump_target_temp_idx;
+ unsigned int temp_count;
+};
+
struct vsir_cfg
{
struct vkd3d_shader_message_context *message_context;
@@ -3200,15 +3129,15 @@ struct vsir_cfg
* block), but we still try to keep `begin' as forward as
* possible, to keep the loop scope as small as possible. */
bool synthetic;
+ /* The number of jump instructions (both conditional and
+ * unconditional) that target this loop. */
+ unsigned int target_count;
} *loop_intervals;
size_t loop_interval_count, loop_interval_capacity;
struct vsir_cfg_structure_list structured_program;
- struct vkd3d_shader_instruction *instructions;
- size_t ins_capacity, ins_count;
- unsigned int jump_target_temp_idx;
- unsigned int temp_count;
+ struct vsir_cfg_emit_target *target;
};
static void vsir_cfg_cleanup(struct vsir_cfg *cfg)
@@ -3248,6 +3177,7 @@ static enum vkd3d_result vsir_cfg_add_loop_interval(struct vsir_cfg *cfg, unsign
interval->begin = begin;
interval->end = end;
interval->synthetic = synthetic;
+ interval->target_count = 0;
return VKD3D_OK;
}
@@ -3402,7 +3332,7 @@ static void vsir_cfg_dump_structured_program(struct vsir_cfg *cfg)
}
static enum vkd3d_result vsir_cfg_init(struct vsir_cfg *cfg, struct vsir_program *program,
- struct vkd3d_shader_message_context *message_context)
+ struct vkd3d_shader_message_context *message_context, struct vsir_cfg_emit_target *target)
{
struct vsir_block *current_block = NULL;
enum vkd3d_result ret;
@@ -3412,6 +3342,7 @@ static enum vkd3d_result vsir_cfg_init(struct vsir_cfg *cfg, struct vsir_program
cfg->message_context = message_context;
cfg->program = program;
cfg->block_count = program->block_count;
+ cfg->target = target;
vsir_block_list_init(&cfg->order);
@@ -4250,53 +4181,157 @@ fail:
return VKD3D_ERROR_OUT_OF_MEMORY;
}
-static void vsir_cfg_remove_trailing_continue(struct vsir_cfg_structure_list *list, unsigned int target)
+static void vsir_cfg_remove_trailing_continue(struct vsir_cfg *cfg,
+ struct vsir_cfg_structure_list *list, unsigned int target)
{
struct vsir_cfg_structure *last = &list->structures[list->count - 1];
if (last->type == STRUCTURE_TYPE_JUMP && last->u.jump.type == JUMP_CONTINUE
&& !last->u.jump.condition && last->u.jump.target == target)
+ {
--list->count;
+ assert(cfg->loop_intervals[target].target_count > 0);
+ --cfg->loop_intervals[target].target_count;
+ }
}
-static enum vkd3d_result vsir_cfg_synthesize_selections(struct vsir_cfg_structure_list *list)
+static struct vsir_cfg_structure *vsir_cfg_get_trailing_break(struct vsir_cfg_structure_list *list)
{
- enum vkd3d_result ret;
- size_t i;
+ struct vsir_cfg_structure *structure;
+ size_t count = list->count;
- for (i = 0; i < list->count; ++i)
- {
- struct vsir_cfg_structure *structure = &list->structures[i], new_selection, *new_jump;
+ if (count == 0)
+ return NULL;
- if (structure->type != STRUCTURE_TYPE_JUMP || !structure->u.jump.condition)
- continue;
+ structure = &list->structures[count - 1];
- vsir_cfg_structure_init(&new_selection, STRUCTURE_TYPE_SELECTION);
- new_selection.u.selection.condition = structure->u.jump.condition;
- new_selection.u.selection.invert_condition = structure->u.jump.invert_condition;
+ if (structure->type != STRUCTURE_TYPE_JUMP || structure->u.jump.type != JUMP_BREAK
+ || structure->u.jump.condition)
+ return NULL;
- if (!(new_jump = vsir_cfg_structure_list_append(&new_selection.u.selection.if_body,
- STRUCTURE_TYPE_JUMP)))
- return VKD3D_ERROR_OUT_OF_MEMORY;
- new_jump->u.jump.type = structure->u.jump.type;
- new_jump->u.jump.target = structure->u.jump.target;
+ return structure;
+}
- /* Move the rest of the structure list in the else branch
- * rather than leaving it after the selection construct. The
- * reason is that this is more conducive to further
- * optimization, because all the conditional `break's appear
- * as the last instruction of a branch of a cascade of
- * selection constructs at the end of the structure list we're
- * processing, instead of being buried in the middle of the
- * structure list itself. */
- if ((ret = vsir_cfg_structure_list_append_from_region(&new_selection.u.selection.else_body,
- &list->structures[i + 1], list->count - i - 1)) < 0)
- return ret;
+/* When the last instruction in both branches of a selection construct
+ * is an unconditional break, any of them can be moved after the
+ * selection construct. If they break the same loop both of them can
+ * be moved out, otherwise we can choose which one: we choose the one
+ * that breaks the innermost loop, because we hope to eventually
+ * remove the loop itself.
+ *
+ * In principle a similar movement could be done when the last
+ * instructions are continue and continue, or continue and break. But
+ * in practice I don't think those situations can happen given the
+ * previous passes we do on the program, so we don't care. */
+static enum vkd3d_result vsir_cfg_move_breaks_out_of_selections(struct vsir_cfg *cfg,
+ struct vsir_cfg_structure_list *list)
+{
+ struct vsir_cfg_structure *selection, *if_break, *else_break, *new_break;
+ unsigned int if_target, else_target, max_target;
+ size_t pos = list->count - 1;
+
+ selection = &list->structures[pos];
+ assert(selection->type == STRUCTURE_TYPE_SELECTION);
+
+ if_break = vsir_cfg_get_trailing_break(&selection->u.selection.if_body);
+ else_break = vsir_cfg_get_trailing_break(&selection->u.selection.else_body);
+
+ if (!if_break || !else_break)
+ return VKD3D_OK;
+
+ if_target = if_break->u.jump.target;
+ else_target = else_break->u.jump.target;
+ max_target = max(if_target, else_target);
+
+ if (!(new_break = vsir_cfg_structure_list_append(list, STRUCTURE_TYPE_JUMP)))
+ return VKD3D_ERROR_OUT_OF_MEMORY;
+ new_break->u.jump.type = JUMP_BREAK;
+ new_break->u.jump.target = max_target;
+ ++cfg->loop_intervals[max_target].target_count;
+
+ /* Pointer `selection' could have been invalidated by the append
+ * operation. */
+ selection = &list->structures[pos];
+ assert(selection->type == STRUCTURE_TYPE_SELECTION);
+
+ if (if_target == max_target)
+ {
+ --selection->u.selection.if_body.count;
+ assert(cfg->loop_intervals[if_target].target_count > 0);
+ --cfg->loop_intervals[if_target].target_count;
+ }
+
+ if (else_target == max_target)
+ {
+ --selection->u.selection.else_body.count;
+ assert(cfg->loop_intervals[else_target].target_count > 0);
+ --cfg->loop_intervals[else_target].target_count;
+ }
+
+ return VKD3D_OK;
+}
+
+static enum vkd3d_result vsir_cfg_move_breaks_out_of_selections_recursively(struct vsir_cfg *cfg,
+ struct vsir_cfg_structure_list *list)
+{
+ struct vsir_cfg_structure *trailing;
+
+ if (list->count == 0)
+ return VKD3D_OK;
+
+ trailing = &list->structures[list->count - 1];
+
+ if (trailing->type != STRUCTURE_TYPE_SELECTION)
+ return VKD3D_OK;
+
+ vsir_cfg_move_breaks_out_of_selections_recursively(cfg, &trailing->u.selection.if_body);
+ vsir_cfg_move_breaks_out_of_selections_recursively(cfg, &trailing->u.selection.else_body);
+
+ return vsir_cfg_move_breaks_out_of_selections(cfg, list);
+}
+
+static enum vkd3d_result vsir_cfg_synthesize_selections(struct vsir_cfg *cfg,
+ struct vsir_cfg_structure_list *list)
+{
+ enum vkd3d_result ret;
+ size_t i;
+
+ for (i = 0; i < list->count; ++i)
+ {
+ struct vsir_cfg_structure *structure = &list->structures[i], new_selection, *new_jump;
+
+ if (structure->type != STRUCTURE_TYPE_JUMP || !structure->u.jump.condition)
+ continue;
+
+ vsir_cfg_structure_init(&new_selection, STRUCTURE_TYPE_SELECTION);
+ new_selection.u.selection.condition = structure->u.jump.condition;
+ new_selection.u.selection.invert_condition = structure->u.jump.invert_condition;
+
+ if (!(new_jump = vsir_cfg_structure_list_append(&new_selection.u.selection.if_body,
+ STRUCTURE_TYPE_JUMP)))
+ return VKD3D_ERROR_OUT_OF_MEMORY;
+ new_jump->u.jump.type = structure->u.jump.type;
+ new_jump->u.jump.target = structure->u.jump.target;
+
+ /* Move the rest of the structure list in the else branch
+ * rather than leaving it after the selection construct. The
+ * reason is that this is more conducive to further
+ * optimization, because all the conditional `break's appear
+ * as the last instruction of a branch of a cascade of
+ * selection constructs at the end of the structure list we're
+ * processing, instead of being buried in the middle of the
+ * structure list itself. */
+ if ((ret = vsir_cfg_structure_list_append_from_region(&new_selection.u.selection.else_body,
+ &list->structures[i + 1], list->count - i - 1)) < 0)
+ return ret;
*structure = new_selection;
list->count = i + 1;
- if ((ret = vsir_cfg_synthesize_selections(&structure->u.selection.else_body)) < 0)
+ if ((ret = vsir_cfg_synthesize_selections(cfg, &structure->u.selection.else_body)) < 0)
+ return ret;
+
+ if ((ret = vsir_cfg_move_breaks_out_of_selections(cfg, list)) < 0)
return ret;
break;
@@ -4305,40 +4340,164 @@ static enum vkd3d_result vsir_cfg_synthesize_selections(struct vsir_cfg_structur
return VKD3D_OK;
}
+static enum vkd3d_result vsir_cfg_append_loop(struct vsir_cfg *cfg,
+ struct vsir_cfg_structure_list *new_list, struct vsir_cfg_structure *loop)
+{
+ struct vsir_cfg_structure_list *loop_body = &loop->u.loop.body;
+ unsigned int target, loop_idx = loop->u.loop.idx;
+ struct vsir_cfg_structure *trailing_break;
+ enum vkd3d_result ret;
+
+ trailing_break = vsir_cfg_get_trailing_break(loop_body);
+
+ /* If the loop's last instruction is not a break, we cannot remove
+ * the loop itself. */
+ if (!trailing_break)
+ {
+ if ((ret = vsir_cfg_structure_list_append_from_region(new_list, loop, 1)) < 0)
+ return ret;
+ memset(loop, 0, sizeof(*loop));
+ return VKD3D_OK;
+ }
+
+ target = trailing_break->u.jump.target;
+ assert(cfg->loop_intervals[target].target_count > 0);
+
+ /* If the loop is not targeted by any jump, we can remove it. The
+ * trailing `break' then targets another loop, so we have to keep
+ * it. */
+ if (cfg->loop_intervals[loop_idx].target_count == 0)
+ {
+ if ((ret = vsir_cfg_structure_list_append_from_region(new_list,
+ &loop_body->structures[0], loop_body->count)) < 0)
+ return ret;
+ loop_body->count = 0;
+ return VKD3D_OK;
+ }
+
+ /* If the loop is targeted only by its own trailing `break'
+ * instruction, then we can remove it together with the `break'
+ * itself. */
+ if (target == loop_idx && cfg->loop_intervals[loop_idx].target_count == 1)
+ {
+ --cfg->loop_intervals[loop_idx].target_count;
+ if ((ret = vsir_cfg_structure_list_append_from_region(new_list,
+ &loop_body->structures[0], loop_body->count - 1)) < 0)
+ return ret;
+ loop_body->count = 0;
+ return VKD3D_OK;
+ }
+
+ if ((ret = vsir_cfg_structure_list_append_from_region(new_list, loop, 1)) < 0)
+ return ret;
+ memset(loop, 0, sizeof(*loop));
+
+ return VKD3D_OK;
+}
+
static enum vkd3d_result vsir_cfg_optimize_recurse(struct vsir_cfg *cfg, struct vsir_cfg_structure_list *list)
{
+ struct vsir_cfg_structure_list old_list = *list, *new_list = list;
enum vkd3d_result ret;
size_t i;
- for (i = 0; i < list->count; ++i)
+ memset(new_list, 0, sizeof(*new_list));
+
+ for (i = 0; i < old_list.count; ++i)
{
- struct vsir_cfg_structure *loop = &list->structures[i];
+ struct vsir_cfg_structure *loop = &old_list.structures[i], *selection;
struct vsir_cfg_structure_list *loop_body;
if (loop->type != STRUCTURE_TYPE_LOOP)
+ {
+ if ((ret = vsir_cfg_structure_list_append_from_region(new_list, loop, 1)) < 0)
+ goto out;
+ memset(loop, 0, sizeof(*loop));
continue;
+ }
loop_body = &loop->u.loop.body;
if (loop_body->count == 0)
+ {
+ if ((ret = vsir_cfg_structure_list_append_from_region(new_list, loop, 1)) < 0)
+ goto out;
+ memset(loop, 0, sizeof(*loop));
continue;
+ }
- vsir_cfg_remove_trailing_continue(loop_body, loop->u.loop.idx);
+ vsir_cfg_remove_trailing_continue(cfg, loop_body, loop->u.loop.idx);
if ((ret = vsir_cfg_optimize_recurse(cfg, loop_body)) < 0)
- return ret;
+ goto out;
- if ((ret = vsir_cfg_synthesize_selections(loop_body)) < 0)
- return ret;
+ if ((ret = vsir_cfg_synthesize_selections(cfg, loop_body)) < 0)
+ goto out;
+
+ if ((ret = vsir_cfg_append_loop(cfg, new_list, loop)) < 0)
+ goto out;
+
+ /* If the last pushed instruction is a selection and one of the branches terminates with a
+ * `break', start pushing to the other branch, in the hope of eventually push a `break'
+ * there too and be able to remove a loop. */
+ if (new_list->count == 0)
+ continue;
+
+ selection = &new_list->structures[new_list->count - 1];
+
+ if (selection->type == STRUCTURE_TYPE_SELECTION)
+ {
+ if (vsir_cfg_get_trailing_break(&selection->u.selection.if_body))
+ new_list = &selection->u.selection.else_body;
+ else if (vsir_cfg_get_trailing_break(&selection->u.selection.else_body))
+ new_list = &selection->u.selection.if_body;
+ }
}
- return VKD3D_OK;
+ ret = vsir_cfg_move_breaks_out_of_selections_recursively(cfg, list);
+
+out:
+ vsir_cfg_structure_list_cleanup(&old_list);
+
+ return ret;
+}
+
+static void vsir_cfg_count_targets(struct vsir_cfg *cfg, struct vsir_cfg_structure_list *list)
+{
+ size_t i;
+
+ for (i = 0; i < list->count; ++i)
+ {
+ struct vsir_cfg_structure *structure = &list->structures[i];
+
+ switch (structure->type)
+ {
+ case STRUCTURE_TYPE_BLOCK:
+ break;
+
+ case STRUCTURE_TYPE_LOOP:
+ vsir_cfg_count_targets(cfg, &structure->u.loop.body);
+ break;
+
+ case STRUCTURE_TYPE_SELECTION:
+ vsir_cfg_count_targets(cfg, &structure->u.selection.if_body);
+ vsir_cfg_count_targets(cfg, &structure->u.selection.else_body);
+ break;
+
+ case STRUCTURE_TYPE_JUMP:
+ if (structure->u.jump.type == JUMP_BREAK || structure->u.jump.type == JUMP_CONTINUE)
+ ++cfg->loop_intervals[structure->u.jump.target].target_count;
+ break;
+ }
+ }
}
static enum vkd3d_result vsir_cfg_optimize(struct vsir_cfg *cfg)
{
enum vkd3d_result ret;
+ vsir_cfg_count_targets(cfg, &cfg->structured_program);
+
ret = vsir_cfg_optimize_recurse(cfg, &cfg->structured_program);
if (TRACE_ON())
@@ -4348,199 +4507,244 @@ static enum vkd3d_result vsir_cfg_optimize(struct vsir_cfg *cfg)
}
static enum vkd3d_result vsir_cfg_structure_list_emit(struct vsir_cfg *cfg,
- struct vsir_cfg_structure_list *list, unsigned int loop_idx)
+ struct vsir_cfg_structure_list *list, unsigned int loop_idx);
+
+static enum vkd3d_result vsir_cfg_structure_list_emit_block(struct vsir_cfg *cfg,
+ struct vsir_block *block)
+{
+ struct vsir_cfg_emit_target *target = cfg->target;
+
+ if (!reserve_instructions(&target->instructions, &target->ins_capacity,
+ target->ins_count + (block->end - block->begin)))
+ return VKD3D_ERROR_OUT_OF_MEMORY;
+
+ memcpy(&target->instructions[target->ins_count], block->begin,
+ (char *)block->end - (char *)block->begin);
+
+ target->ins_count += block->end - block->begin;
+
+ return VKD3D_OK;
+}
+
+static enum vkd3d_result vsir_cfg_structure_list_emit_loop(struct vsir_cfg *cfg,
+ struct vsir_cfg_structure_loop *loop, unsigned int loop_idx)
{
+ struct vsir_cfg_emit_target *target = cfg->target;
const struct vkd3d_shader_location no_loc = {0};
enum vkd3d_result ret;
- size_t i;
- for (i = 0; i < list->count; ++i)
+ if (!reserve_instructions(&target->instructions, &target->ins_capacity, target->ins_count + 1))
+ return VKD3D_ERROR_OUT_OF_MEMORY;
+
+ vsir_instruction_init(&target->instructions[target->ins_count++], &no_loc, VKD3DSIH_LOOP);
+
+ if ((ret = vsir_cfg_structure_list_emit(cfg, &loop->body, loop->idx)) < 0)
+ return ret;
+
+ if (!reserve_instructions(&target->instructions, &target->ins_capacity, target->ins_count + 5))
+ return VKD3D_ERROR_OUT_OF_MEMORY;
+
+ vsir_instruction_init(&target->instructions[target->ins_count++], &no_loc, VKD3DSIH_ENDLOOP);
+
+ /* Add a trampoline to implement multilevel jumping depending on the stored
+ * jump_target value. */
+ if (loop_idx != UINT_MAX)
{
- struct vsir_cfg_structure *structure = &list->structures[i];
+ /* If the multilevel jump is a `continue' and the target is the loop we're inside
+ * right now, then we can finally do the `continue'. */
+ const unsigned int outer_continue_target = loop_idx << 1 | 1;
+ /* If the multilevel jump is a `continue' to any other target, or if it is a `break'
+ * and the target is not the loop we just finished emitting, then it means that
+ * we have to reach an outer loop, so we keep breaking. */
+ const unsigned int inner_break_target = loop->idx << 1;
+
+ if (!vsir_instruction_init_with_params(cfg->program, &target->instructions[target->ins_count],
+ &no_loc, VKD3DSIH_IEQ, 1, 2))
+ return VKD3D_ERROR_OUT_OF_MEMORY;
- switch (structure->type)
- {
- case STRUCTURE_TYPE_BLOCK:
- {
- struct vsir_block *block = structure->u.block;
+ dst_param_init_temp_bool(&target->instructions[target->ins_count].dst[0], target->temp_count);
+ src_param_init_temp_uint(&target->instructions[target->ins_count].src[0], target->jump_target_temp_idx);
+ src_param_init_const_uint(&target->instructions[target->ins_count].src[1], outer_continue_target);
- if (!reserve_instructions(&cfg->instructions, &cfg->ins_capacity, cfg->ins_count + (block->end - block->begin)))
- return VKD3D_ERROR_OUT_OF_MEMORY;
+ ++target->ins_count;
- memcpy(&cfg->instructions[cfg->ins_count], block->begin, (char *)block->end - (char *)block->begin);
+ if (!vsir_instruction_init_with_params(cfg->program, &target->instructions[target->ins_count],
+ &no_loc, VKD3DSIH_CONTINUEP, 0, 1))
+ return VKD3D_ERROR_OUT_OF_MEMORY;
- cfg->ins_count += block->end - block->begin;
- break;
- }
+ src_param_init_temp_bool(&target->instructions[target->ins_count].src[0], target->temp_count);
- case STRUCTURE_TYPE_LOOP:
- {
- if (!reserve_instructions(&cfg->instructions, &cfg->ins_capacity, cfg->ins_count + 1))
- return VKD3D_ERROR_OUT_OF_MEMORY;
+ ++target->ins_count;
+ ++target->temp_count;
- vsir_instruction_init(&cfg->instructions[cfg->ins_count++], &no_loc, VKD3DSIH_LOOP);
+ if (!vsir_instruction_init_with_params(cfg->program, &target->instructions[target->ins_count],
+ &no_loc, VKD3DSIH_IEQ, 1, 2))
+ return VKD3D_ERROR_OUT_OF_MEMORY;
- if ((ret = vsir_cfg_structure_list_emit(cfg, &structure->u.loop.body, structure->u.loop.idx)) < 0)
- return ret;
+ dst_param_init_temp_bool(&target->instructions[target->ins_count].dst[0], target->temp_count);
+ src_param_init_temp_uint(&target->instructions[target->ins_count].src[0], target->jump_target_temp_idx);
+ src_param_init_const_uint(&target->instructions[target->ins_count].src[1], inner_break_target);
- if (!reserve_instructions(&cfg->instructions, &cfg->ins_capacity, cfg->ins_count + 5))
- return VKD3D_ERROR_OUT_OF_MEMORY;
+ ++target->ins_count;
- vsir_instruction_init(&cfg->instructions[cfg->ins_count++], &no_loc, VKD3DSIH_ENDLOOP);
+ if (!vsir_instruction_init_with_params(cfg->program, &target->instructions[target->ins_count],
+ &no_loc, VKD3DSIH_BREAKP, 0, 1))
+ return VKD3D_ERROR_OUT_OF_MEMORY;
+ target->instructions[target->ins_count].flags |= VKD3D_SHADER_CONDITIONAL_OP_Z;
- /* Add a trampoline to implement multilevel jumping depending on the stored
- * jump_target value. */
- if (loop_idx != UINT_MAX)
- {
- /* If the multilevel jump is a `continue' and the target is the loop we're inside
- * right now, then we can finally do the `continue'. */
- const unsigned int outer_continue_target = loop_idx << 1 | 1;
- /* If the multilevel jump is a `continue' to any other target, or if it is a `break'
- * and the target is not the loop we just finished emitting, then it means that
- * we have to reach an outer loop, so we keep breaking. */
- const unsigned int inner_break_target = structure->u.loop.idx << 1;
+ src_param_init_temp_bool(&target->instructions[target->ins_count].src[0], target->temp_count);
- if (!vsir_instruction_init_with_params(cfg->program, &cfg->instructions[cfg->ins_count],
- &no_loc, VKD3DSIH_IEQ, 1, 2))
- return VKD3D_ERROR_OUT_OF_MEMORY;
+ ++target->ins_count;
+ ++target->temp_count;
+ }
- dst_param_init_temp_bool(&cfg->instructions[cfg->ins_count].dst[0], cfg->temp_count);
- src_param_init_temp_uint(&cfg->instructions[cfg->ins_count].src[0], cfg->jump_target_temp_idx);
- src_param_init_const_uint(&cfg->instructions[cfg->ins_count].src[1], outer_continue_target);
+ return VKD3D_OK;
+}
- ++cfg->ins_count;
+static enum vkd3d_result vsir_cfg_structure_list_emit_selection(struct vsir_cfg *cfg,
+ struct vsir_cfg_structure_selection *selection, unsigned int loop_idx)
+{
+ struct vsir_cfg_emit_target *target = cfg->target;
+ const struct vkd3d_shader_location no_loc = {0};
+ enum vkd3d_result ret;
- if (!vsir_instruction_init_with_params(cfg->program, &cfg->instructions[cfg->ins_count],
- &no_loc, VKD3DSIH_CONTINUEP, 0, 1))
- return VKD3D_ERROR_OUT_OF_MEMORY;
+ if (!reserve_instructions(&target->instructions, &target->ins_capacity, target->ins_count + 1))
+ return VKD3D_ERROR_OUT_OF_MEMORY;
- src_param_init_temp_bool(&cfg->instructions[cfg->ins_count].src[0], cfg->temp_count);
+ if (!vsir_instruction_init_with_params(cfg->program, &target->instructions[target->ins_count],
+ &no_loc, VKD3DSIH_IF, 0, 1))
+ return VKD3D_ERROR_OUT_OF_MEMORY;
- ++cfg->ins_count;
- ++cfg->temp_count;
+ target->instructions[target->ins_count].src[0] = *selection->condition;
- if (!vsir_instruction_init_with_params(cfg->program, &cfg->instructions[cfg->ins_count],
- &no_loc, VKD3DSIH_IEQ, 1, 2))
- return VKD3D_ERROR_OUT_OF_MEMORY;
+ if (selection->invert_condition)
+ target->instructions[target->ins_count].flags |= VKD3D_SHADER_CONDITIONAL_OP_Z;
- dst_param_init_temp_bool(&cfg->instructions[cfg->ins_count].dst[0], cfg->temp_count);
- src_param_init_temp_uint(&cfg->instructions[cfg->ins_count].src[0], cfg->jump_target_temp_idx);
- src_param_init_const_uint(&cfg->instructions[cfg->ins_count].src[1], inner_break_target);
+ ++target->ins_count;
- ++cfg->ins_count;
+ if ((ret = vsir_cfg_structure_list_emit(cfg, &selection->if_body, loop_idx)) < 0)
+ return ret;
- if (!vsir_instruction_init_with_params(cfg->program, &cfg->instructions[cfg->ins_count],
- &no_loc, VKD3DSIH_BREAKP, 0, 1))
- return VKD3D_ERROR_OUT_OF_MEMORY;
- cfg->instructions[cfg->ins_count].flags |= VKD3D_SHADER_CONDITIONAL_OP_Z;
+ if (selection->else_body.count != 0)
+ {
+ if (!reserve_instructions(&target->instructions, &target->ins_capacity, target->ins_count + 1))
+ return VKD3D_ERROR_OUT_OF_MEMORY;
- src_param_init_temp_bool(&cfg->instructions[cfg->ins_count].src[0], cfg->temp_count);
+ vsir_instruction_init(&target->instructions[target->ins_count++], &no_loc, VKD3DSIH_ELSE);
- ++cfg->ins_count;
- ++cfg->temp_count;
- }
+ if ((ret = vsir_cfg_structure_list_emit(cfg, &selection->else_body, loop_idx)) < 0)
+ return ret;
+ }
- break;
- }
+ if (!reserve_instructions(&target->instructions, &target->ins_capacity, target->ins_count + 1))
+ return VKD3D_ERROR_OUT_OF_MEMORY;
- case STRUCTURE_TYPE_SELECTION:
- if (!reserve_instructions(&cfg->instructions, &cfg->ins_capacity, cfg->ins_count + 1))
- return VKD3D_ERROR_OUT_OF_MEMORY;
+ vsir_instruction_init(&target->instructions[target->ins_count++], &no_loc, VKD3DSIH_ENDIF);
- if (!vsir_instruction_init_with_params(cfg->program, &cfg->instructions[cfg->ins_count], &no_loc,
- VKD3DSIH_IF, 0, 1))
- return VKD3D_ERROR_OUT_OF_MEMORY;
+ return VKD3D_OK;
+}
- cfg->instructions[cfg->ins_count].src[0] = *structure->u.selection.condition;
+static enum vkd3d_result vsir_cfg_structure_list_emit_jump(struct vsir_cfg *cfg,
+ struct vsir_cfg_structure_jump *jump, unsigned int loop_idx)
+{
+ struct vsir_cfg_emit_target *target = cfg->target;
+ const struct vkd3d_shader_location no_loc = {0};
+ /* Encode the jump target as the loop index plus a bit to remember whether
+ * we're breaking or continueing. */
+ unsigned int jump_target = jump->target << 1;
+ enum vkd3d_shader_opcode opcode;
- if (structure->u.selection.invert_condition)
- cfg->instructions[cfg->ins_count].flags |= VKD3D_SHADER_CONDITIONAL_OP_Z;
+ switch (jump->type)
+ {
+ case JUMP_CONTINUE:
+ /* If we're continueing the loop we're directly inside, then we can emit a
+ * `continue'. Otherwise we first have to break all the loops between here
+ * and the loop to continue, recording our intention to continue
+ * in the lowest bit of jump_target. */
+ if (jump->target == loop_idx)
+ {
+ opcode = jump->condition ? VKD3DSIH_CONTINUEP : VKD3DSIH_CONTINUE;
+ break;
+ }
+ jump_target |= 1;
+ /* fall through */
- ++cfg->ins_count;
+ case JUMP_BREAK:
+ opcode = jump->condition ? VKD3DSIH_BREAKP : VKD3DSIH_BREAK;
+ break;
- if ((ret = vsir_cfg_structure_list_emit(cfg, &structure->u.selection.if_body, loop_idx)) < 0)
- return ret;
+ case JUMP_RET:
+ assert(!jump->condition);
+ opcode = VKD3DSIH_RET;
+ break;
- if (structure->u.selection.else_body.count != 0)
- {
- if (!reserve_instructions(&cfg->instructions, &cfg->ins_capacity, cfg->ins_count + 1))
- return VKD3D_ERROR_OUT_OF_MEMORY;
+ default:
+ vkd3d_unreachable();
+ }
- vsir_instruction_init(&cfg->instructions[cfg->ins_count++], &no_loc, VKD3DSIH_ELSE);
+ if (!reserve_instructions(&target->instructions, &target->ins_capacity, target->ins_count + 2))
+ return VKD3D_ERROR_OUT_OF_MEMORY;
- if ((ret = vsir_cfg_structure_list_emit(cfg, &structure->u.selection.else_body, loop_idx)) < 0)
- return ret;
- }
+ if (opcode == VKD3DSIH_BREAK || opcode == VKD3DSIH_BREAKP)
+ {
+ if (!vsir_instruction_init_with_params(cfg->program, &target->instructions[target->ins_count],
+ &no_loc, VKD3DSIH_MOV, 1, 1))
+ return VKD3D_ERROR_OUT_OF_MEMORY;
- if (!reserve_instructions(&cfg->instructions, &cfg->ins_capacity, cfg->ins_count + 1))
- return VKD3D_ERROR_OUT_OF_MEMORY;
+ dst_param_init_temp_uint(&target->instructions[target->ins_count].dst[0], target->jump_target_temp_idx);
+ src_param_init_const_uint(&target->instructions[target->ins_count].src[0], jump_target);
- vsir_instruction_init(&cfg->instructions[cfg->ins_count++], &no_loc, VKD3DSIH_ENDIF);
- break;
+ ++target->ins_count;
+ }
- case STRUCTURE_TYPE_JUMP:
- {
- /* Encode the jump target as the loop index plus a bit to remember whether
- * we're breaking or continueing. */
- unsigned int jump_target = structure->u.jump.target << 1;
- enum vkd3d_shader_opcode opcode;
+ if (!vsir_instruction_init_with_params(cfg->program, &target->instructions[target->ins_count],
+ &no_loc, opcode, 0, !!jump->condition))
+ return VKD3D_ERROR_OUT_OF_MEMORY;
- switch (structure->u.jump.type)
- {
- case JUMP_CONTINUE:
- /* If we're continueing the loop we're directly inside, then we can emit a
- * `continue'. Otherwise we first have to break all the loops between here
- * and the loop to continue, recording our intention to continue
- * in the lowest bit of jump_target. */
- if (structure->u.jump.target == loop_idx)
- {
- opcode = structure->u.jump.condition ? VKD3DSIH_CONTINUEP : VKD3DSIH_CONTINUE;
- break;
- }
- jump_target |= 1;
- /* fall through */
-
- case JUMP_BREAK:
- opcode = structure->u.jump.condition ? VKD3DSIH_BREAKP : VKD3DSIH_BREAK;
- break;
-
- case JUMP_RET:
- assert(!structure->u.jump.condition);
- opcode = VKD3DSIH_RET;
- break;
-
- default:
- vkd3d_unreachable();
- }
+ if (jump->invert_condition)
+ target->instructions[target->ins_count].flags |= VKD3D_SHADER_CONDITIONAL_OP_Z;
- if (!reserve_instructions(&cfg->instructions, &cfg->ins_capacity, cfg->ins_count + 2))
- return VKD3D_ERROR_OUT_OF_MEMORY;
+ if (jump->condition)
+ target->instructions[target->ins_count].src[0] = *jump->condition;
- if (opcode == VKD3DSIH_BREAK || opcode == VKD3DSIH_BREAKP)
- {
- if (!vsir_instruction_init_with_params(cfg->program, &cfg->instructions[cfg->ins_count],
- &no_loc, VKD3DSIH_MOV, 1, 1))
- return VKD3D_ERROR_OUT_OF_MEMORY;
+ ++target->ins_count;
- dst_param_init_temp_uint(&cfg->instructions[cfg->ins_count].dst[0], cfg->jump_target_temp_idx);
- src_param_init_const_uint(&cfg->instructions[cfg->ins_count].src[0], jump_target);
+ return VKD3D_OK;
+}
- ++cfg->ins_count;
- }
+static enum vkd3d_result vsir_cfg_structure_list_emit(struct vsir_cfg *cfg,
+ struct vsir_cfg_structure_list *list, unsigned int loop_idx)
+{
+ enum vkd3d_result ret;
+ size_t i;
- if (!vsir_instruction_init_with_params(cfg->program, &cfg->instructions[cfg->ins_count],
- &no_loc, opcode, 0, !!structure->u.jump.condition))
- return VKD3D_ERROR_OUT_OF_MEMORY;
+ for (i = 0; i < list->count; ++i)
+ {
+ struct vsir_cfg_structure *structure = &list->structures[i];
- if (structure->u.jump.invert_condition)
- cfg->instructions[cfg->ins_count].flags |= VKD3D_SHADER_CONDITIONAL_OP_Z;
+ switch (structure->type)
+ {
+ case STRUCTURE_TYPE_BLOCK:
+ if ((ret = vsir_cfg_structure_list_emit_block(cfg, structure->u.block)) < 0)
+ return ret;
+ break;
- if (structure->u.jump.condition)
- cfg->instructions[cfg->ins_count].src[0] = *structure->u.jump.condition;
+ case STRUCTURE_TYPE_LOOP:
+ if ((ret = vsir_cfg_structure_list_emit_loop(cfg, &structure->u.loop, loop_idx)) < 0)
+ return ret;
+ break;
- ++cfg->ins_count;
+ case STRUCTURE_TYPE_SELECTION:
+ if ((ret = vsir_cfg_structure_list_emit_selection(cfg, &structure->u.selection,
+ loop_idx)) < 0)
+ return ret;
+ break;
+
+ case STRUCTURE_TYPE_JUMP:
+ if ((ret = vsir_cfg_structure_list_emit_jump(cfg, &structure->u.jump,
+ loop_idx)) < 0)
+ return ret;
break;
- }
default:
vkd3d_unreachable();
@@ -4551,40 +4755,191 @@ static enum vkd3d_result vsir_cfg_structure_list_emit(struct vsir_cfg *cfg,
}
static enum vkd3d_result vsir_cfg_emit_structured_program(struct vsir_cfg *cfg)
+{
+ return vsir_cfg_structure_list_emit(cfg, &cfg->structured_program, UINT_MAX);
+}
+
+static enum vkd3d_result vsir_program_structurize_function(struct vsir_program *program,
+ struct vkd3d_shader_message_context *message_context, struct vsir_cfg_emit_target *target)
{
enum vkd3d_result ret;
- size_t i;
+ struct vsir_cfg cfg;
+
+ if ((ret = vsir_cfg_init(&cfg, program, message_context, target)) < 0)
+ return ret;
- cfg->jump_target_temp_idx = cfg->program->temp_count;
- cfg->temp_count = cfg->program->temp_count + 1;
+ vsir_cfg_compute_dominators(&cfg);
- if (!reserve_instructions(&cfg->instructions, &cfg->ins_capacity, cfg->program->instructions.count))
+ if ((ret = vsir_cfg_compute_loops(&cfg)) < 0)
+ goto out;
+
+ if ((ret = vsir_cfg_sort_nodes(&cfg)) < 0)
+ goto out;
+
+ if ((ret = vsir_cfg_generate_synthetic_loop_intervals(&cfg)) < 0)
+ goto out;
+
+ if ((ret = vsir_cfg_build_structured_program(&cfg)) < 0)
+ goto out;
+
+ if ((ret = vsir_cfg_optimize(&cfg)) < 0)
+ goto out;
+
+ ret = vsir_cfg_emit_structured_program(&cfg);
+
+out:
+ vsir_cfg_cleanup(&cfg);
+
+ return ret;
+}
+
+static enum vkd3d_result vsir_program_structurize(struct vsir_program *program,
+ struct vkd3d_shader_message_context *message_context)
+{
+ struct vsir_cfg_emit_target target = {0};
+ enum vkd3d_result ret;
+ unsigned int i;
+
+ target.jump_target_temp_idx = program->temp_count;
+ target.temp_count = program->temp_count + 1;
+
+ if (!reserve_instructions(&target.instructions, &target.ins_capacity, program->instructions.count))
return VKD3D_ERROR_OUT_OF_MEMORY;
/* Copy declarations until the first block. */
- for (i = 0; i < cfg->program->instructions.count; ++i)
+ for (i = 0; i < program->instructions.count; ++i)
{
- struct vkd3d_shader_instruction *ins = &cfg->program->instructions.elements[i];
+ struct vkd3d_shader_instruction *ins = &program->instructions.elements[i];
if (ins->handler_idx == VKD3DSIH_LABEL)
break;
- cfg->instructions[cfg->ins_count++] = *ins;
+ target.instructions[target.ins_count++] = *ins;
}
- if ((ret = vsir_cfg_structure_list_emit(cfg, &cfg->structured_program, UINT_MAX)) < 0)
+ if ((ret = vsir_program_structurize_function(program, message_context, &target)) < 0)
goto fail;
- vkd3d_free(cfg->program->instructions.elements);
- cfg->program->instructions.elements = cfg->instructions;
- cfg->program->instructions.capacity = cfg->ins_capacity;
- cfg->program->instructions.count = cfg->ins_count;
- cfg->program->temp_count = cfg->temp_count;
+ vkd3d_free(program->instructions.elements);
+ program->instructions.elements = target.instructions;
+ program->instructions.capacity = target.ins_capacity;
+ program->instructions.count = target.ins_count;
+ program->temp_count = target.temp_count;
return VKD3D_OK;
fail:
- vkd3d_free(cfg->instructions);
+ vkd3d_free(target.instructions);
+
+ return ret;
+}
+
+static void register_map_undominated_use(struct vkd3d_shader_register *reg, struct ssas_to_temps_alloc *alloc,
+ struct vsir_block *block, struct vsir_block **origin_blocks)
+{
+ unsigned int i;
+
+ if (!register_is_ssa(reg))
+ return;
+
+ i = reg->idx[0].offset;
+ if (alloc->table[i] == UINT_MAX && !vsir_block_dominates(origin_blocks[i], block))
+ alloc->table[i] = alloc->next_temp_idx++;
+
+ for (i = 0; i < reg->idx_count; ++i)
+ if (reg->idx[i].rel_addr)
+ register_map_undominated_use(&reg->idx[i].rel_addr->reg, alloc, block, origin_blocks);
+}
+
+/* Drivers are not necessarily optimised to handle very large numbers of temps. For example,
+ * using them only where necessary fixes stuttering issues in Horizon Zero Dawn on RADV.
+ * This can also result in the backend emitting less code because temps typically need an
+ * access chain and a load/store. Conversion of phi SSA values to temps should eliminate all
+ * undominated SSA use, but structurisation may create new occurrences. */
+static enum vkd3d_result vsir_cfg_materialize_undominated_ssas_to_temps(struct vsir_cfg *cfg)
+{
+ struct vsir_program *program = cfg->program;
+ struct ssas_to_temps_alloc alloc = {0};
+ struct vsir_block **origin_blocks;
+ unsigned int j;
+ size_t i;
+
+ if (!(origin_blocks = vkd3d_calloc(program->ssa_count, sizeof(*origin_blocks))))
+ {
+ ERR("Failed to allocate origin block array.\n");
+ return VKD3D_ERROR_OUT_OF_MEMORY;
+ }
+ if (!ssas_to_temps_alloc_init(&alloc, program->ssa_count, program->temp_count))
+ {
+ vkd3d_free(origin_blocks);
+ return VKD3D_ERROR_OUT_OF_MEMORY;
+ }
+
+ for (i = 0; i < cfg->block_count; ++i)
+ {
+ struct vsir_block *block = &cfg->blocks[i];
+ struct vkd3d_shader_instruction *ins;
+
+ for (ins = block->begin; ins <= block->end; ++ins)
+ {
+ for (j = 0; j < ins->dst_count; ++j)
+ {
+ if (register_is_ssa(&ins->dst[j].reg))
+ origin_blocks[ins->dst[j].reg.idx[0].offset] = block;
+ }
+ }
+ }
+
+ for (i = 0; i < cfg->block_count; ++i)
+ {
+ struct vsir_block *block = &cfg->blocks[i];
+ struct vkd3d_shader_instruction *ins;
+
+ for (ins = block->begin; ins <= block->end; ++ins)
+ {
+ for (j = 0; j < ins->src_count; ++j)
+ register_map_undominated_use(&ins->src[j].reg, &alloc, block, origin_blocks);
+ }
+ }
+
+ if (alloc.next_temp_idx == program->temp_count)
+ goto done;
+
+ TRACE("Emitting temps for %u values with undominated usage.\n", alloc.next_temp_idx - program->temp_count);
+
+ for (i = 0; i < program->instructions.count; ++i)
+ {
+ struct vkd3d_shader_instruction *ins = &program->instructions.elements[i];
+
+ for (j = 0; j < ins->dst_count; ++j)
+ materialize_ssas_to_temps_process_reg(program, &alloc, &ins->dst[j].reg);
+
+ for (j = 0; j < ins->src_count; ++j)
+ materialize_ssas_to_temps_process_reg(program, &alloc, &ins->src[j].reg);
+ }
+
+ program->temp_count = alloc.next_temp_idx;
+done:
+ vkd3d_free(origin_blocks);
+ vkd3d_free(alloc.table);
+
+ return VKD3D_OK;
+}
+
+static enum vkd3d_result vsir_program_materialize_undominated_ssas_to_temps(struct vsir_program *program,
+ struct vkd3d_shader_message_context *message_context)
+{
+ enum vkd3d_result ret;
+ struct vsir_cfg cfg;
+
+ if ((ret = vsir_cfg_init(&cfg, program, message_context, NULL)) < 0)
+ return ret;
+
+ vsir_cfg_compute_dominators(&cfg);
+
+ ret = vsir_cfg_materialize_undominated_ssas_to_temps(&cfg);
+
+ vsir_cfg_cleanup(&cfg);
return ret;
}
@@ -5459,63 +5814,25 @@ enum vkd3d_result vsir_program_normalise(struct vsir_program *program, uint64_t
{
enum vkd3d_result result = VKD3D_OK;
- remove_dcl_temps(program);
-
- if ((result = vsir_program_lower_texkills(program)) < 0)
+ if ((result = vsir_program_lower_instructions(program)) < 0)
return result;
if (program->shader_version.major >= 6)
{
- struct vsir_cfg cfg;
-
- if ((result = lower_switch_to_if_ladder(program)) < 0)
- return result;
-
- if ((result = vsir_program_materialise_ssas_to_temps(program)) < 0)
- return result;
-
- if ((result = vsir_cfg_init(&cfg, program, message_context)) < 0)
+ if ((result = vsir_program_materialise_phi_ssas_to_temps(program)) < 0)
return result;
- vsir_cfg_compute_dominators(&cfg);
-
- if ((result = vsir_cfg_compute_loops(&cfg)) < 0)
- {
- vsir_cfg_cleanup(&cfg);
+ if ((result = lower_switch_to_if_ladder(program)) < 0)
return result;
- }
- if ((result = vsir_cfg_sort_nodes(&cfg)) < 0)
- {
- vsir_cfg_cleanup(&cfg);
+ if ((result = vsir_program_structurize(program, message_context)) < 0)
return result;
- }
- if ((result = vsir_cfg_generate_synthetic_loop_intervals(&cfg)) < 0)
- {
- vsir_cfg_cleanup(&cfg);
+ if ((result = vsir_program_flatten_control_flow_constructs(program, message_context)) < 0)
return result;
- }
- if ((result = vsir_cfg_build_structured_program(&cfg)) < 0)
- {
- vsir_cfg_cleanup(&cfg);
+ if ((result = vsir_program_materialize_undominated_ssas_to_temps(program, message_context)) < 0)
return result;
- }
-
- if ((result = vsir_cfg_optimize(&cfg)) < 0)
- {
- vsir_cfg_cleanup(&cfg);
- return result;
- }
-
- if ((result = vsir_cfg_emit_structured_program(&cfg)) < 0)
- {
- vsir_cfg_cleanup(&cfg);
- return result;
- }
-
- vsir_cfg_cleanup(&cfg);
}
else
{
@@ -5545,10 +5862,10 @@ enum vkd3d_result vsir_program_normalise(struct vsir_program *program, uint64_t
if ((result = vsir_program_normalise_combined_samplers(program, message_context)) < 0)
return result;
- }
- if ((result = vsir_program_flatten_control_flow_constructs(program, message_context)) < 0)
- return result;
+ if ((result = vsir_program_flatten_control_flow_constructs(program, message_context)) < 0)
+ return result;
+ }
if (TRACE_ON())
vkd3d_shader_trace(program);
diff --git a/libs/vkd3d/libs/vkd3d-shader/spirv.c b/libs/vkd3d/libs/vkd3d-shader/spirv.c
index 1cae2d7d9d4..dfab1cb229b 100644
--- a/libs/vkd3d/libs/vkd3d-shader/spirv.c
+++ b/libs/vkd3d/libs/vkd3d-shader/spirv.c
@@ -4419,11 +4419,11 @@ static void spirv_compiler_emit_store_dst_components(struct spirv_compiler *comp
{
unsigned int component_count = vsir_write_mask_component_count(dst->write_mask);
struct vkd3d_spirv_builder *builder = &compiler->spirv_builder;
- uint32_t type_id, val_id;
+ uint32_t type_id, dst_type_id, val_id;
+ type_id = vkd3d_spirv_get_type_id(builder, component_type, component_count);
if (component_count > 1)
{
- type_id = vkd3d_spirv_get_type_id(builder, component_type, component_count);
val_id = vkd3d_spirv_build_op_composite_construct(builder,
type_id, component_ids, component_count);
}
@@ -4431,6 +4431,11 @@ static void spirv_compiler_emit_store_dst_components(struct spirv_compiler *comp
{
val_id = *component_ids;
}
+
+ dst_type_id = vkd3d_spirv_get_type_id_for_data_type(builder, dst->reg.data_type, component_count);
+ if (dst_type_id != type_id)
+ val_id = vkd3d_spirv_build_op_bitcast(builder, dst_type_id, val_id);
+
spirv_compiler_emit_store_dst(compiler, dst, val_id);
}
@@ -7283,8 +7288,12 @@ static void spirv_compiler_emit_mov(struct spirv_compiler *compiler,
}
general_implementation:
- write_mask = (src->reg.type == VKD3DSPR_IMMCONST64 && !data_type_is_64_bit(dst->reg.data_type))
- ? vsir_write_mask_64_from_32(dst->write_mask) : dst->write_mask;
+ write_mask = dst->write_mask;
+ if (src->reg.type == VKD3DSPR_IMMCONST64 && !data_type_is_64_bit(dst->reg.data_type))
+ write_mask = vsir_write_mask_64_from_32(write_mask);
+ else if (!data_type_is_64_bit(src->reg.data_type) && data_type_is_64_bit(dst->reg.data_type))
+ write_mask = vsir_write_mask_32_from_64(write_mask);
+
val_id = spirv_compiler_emit_load_src(compiler, src, write_mask);
if (dst->reg.data_type != src->reg.data_type)
{
@@ -8895,8 +8904,8 @@ static void spirv_compiler_emit_store_tgsm(struct spirv_compiler *compiler,
const struct vkd3d_shader_dst_param *dst = instruction->dst;
const struct vkd3d_shader_src_param *src = instruction->src;
uint32_t base_coordinate_id, component_idx;
- const struct vkd3d_shader_src_param *data;
struct vkd3d_shader_register_info reg_info;
+ struct vkd3d_shader_src_param data;
unsigned int component_count;
if (!spirv_compiler_get_register_info(compiler, &dst->reg, &reg_info))
@@ -8908,8 +8917,9 @@ static void spirv_compiler_emit_store_tgsm(struct spirv_compiler *compiler,
base_coordinate_id = spirv_compiler_emit_raw_structured_addressing(compiler,
type_id, reg_info.structure_stride, &src[0], VKD3DSP_WRITEMASK_0, &src[1], VKD3DSP_WRITEMASK_0);
- data = &src[instruction->src_count - 1];
- val_id = spirv_compiler_emit_load_src(compiler, data, dst->write_mask);
+ data = src[instruction->src_count - 1];
+ data.reg.data_type = VKD3D_DATA_UINT;
+ val_id = spirv_compiler_emit_load_src(compiler, &data, dst->write_mask);
component_count = vsir_write_mask_component_count(dst->write_mask);
for (component_idx = 0; component_idx < component_count; ++component_idx)
@@ -9334,6 +9344,7 @@ static void spirv_compiler_emit_resinfo(struct spirv_compiler *compiler,
const struct vkd3d_shader_dst_param *dst = instruction->dst;
const struct vkd3d_shader_src_param *src = instruction->src;
uint32_t type_id, lod_id, val_id, miplevel_count_id;
+ enum vkd3d_shader_component_type component_type;
uint32_t constituents[VKD3D_VEC4_SIZE];
unsigned int i, size_component_count;
struct vkd3d_shader_image image;
@@ -9370,10 +9381,16 @@ static void spirv_compiler_emit_resinfo(struct spirv_compiler *compiler,
val_id = vkd3d_spirv_build_op_composite_construct(builder,
type_id, constituents, i + 2);
+ component_type = VKD3D_SHADER_COMPONENT_FLOAT;
+
type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_FLOAT, VKD3D_VEC4_SIZE);
if (instruction->flags == VKD3DSI_RESINFO_UINT)
{
- val_id = vkd3d_spirv_build_op_bitcast(builder, type_id, val_id);
+ /* SSA registers must match the specified result type. */
+ if (!register_is_ssa(&dst->reg))
+ val_id = vkd3d_spirv_build_op_bitcast(builder, type_id, val_id);
+ else
+ component_type = VKD3D_SHADER_COMPONENT_UINT;
}
else
{
@@ -9382,7 +9399,7 @@ static void spirv_compiler_emit_resinfo(struct spirv_compiler *compiler,
val_id = vkd3d_spirv_build_op_convert_utof(builder, type_id, val_id);
}
val_id = spirv_compiler_emit_swizzle(compiler, val_id, VKD3DSP_WRITEMASK_ALL,
- VKD3D_SHADER_COMPONENT_FLOAT, src[1].swizzle, dst->write_mask);
+ component_type, src[1].swizzle, dst->write_mask);
spirv_compiler_emit_store_dst(compiler, dst, val_id);
}
diff --git a/libs/vkd3d/libs/vkd3d-shader/tpf.c b/libs/vkd3d/libs/vkd3d-shader/tpf.c
index 4d0658313d5..d5019a5dd63 100644
--- a/libs/vkd3d/libs/vkd3d-shader/tpf.c
+++ b/libs/vkd3d/libs/vkd3d-shader/tpf.c
@@ -3385,10 +3385,10 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc)
if (profile->major_version >= 5)
{
- put_u32(&buffer, TAG_RD11);
+ put_u32(&buffer, hlsl_version_ge(ctx, 5, 1) ? TAG_RD11_REVERSE : TAG_RD11);
put_u32(&buffer, 15 * sizeof(uint32_t)); /* size of RDEF header including this header */
put_u32(&buffer, 6 * sizeof(uint32_t)); /* size of buffer desc */
- put_u32(&buffer, 8 * sizeof(uint32_t)); /* size of binding desc */
+ put_u32(&buffer, (hlsl_version_ge(ctx, 5, 1) ? 10 : 8) * sizeof(uint32_t)); /* size of binding desc */
put_u32(&buffer, 10 * sizeof(uint32_t)); /* size of variable desc */
put_u32(&buffer, 9 * sizeof(uint32_t)); /* size of type desc */
put_u32(&buffer, 3 * sizeof(uint32_t)); /* size of member desc */
@@ -3405,6 +3405,9 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc)
const struct extern_resource *resource = &extern_resources[i];
uint32_t flags = 0;
+ if (hlsl_version_ge(ctx, 5, 1))
+ hlsl_fixme(ctx, &resource->var->loc, "Shader model 5.1 resource reflection.");
+
if (resource->is_user_packed)
flags |= D3D_SIF_USERPACKED;
@@ -3437,6 +3440,9 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc)
if (!cbuffer->reg.allocated)
continue;
+ if (hlsl_version_ge(ctx, 5, 1))
+ hlsl_fixme(ctx, &cbuffer->loc, "Shader model 5.1 resource reflection.");
+
if (cbuffer->reservation.reg_type)
flags |= D3D_SIF_USERPACKED;
@@ -5343,7 +5349,7 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex
&expr->node, arg1, arg2);
break;
- case HLSL_OP3_MOVC:
+ case HLSL_OP3_TERNARY:
write_sm4_ternary_op(tpf, VKD3D_SM4_OP_MOVC, &expr->node, arg1, arg2, arg3);
break;
@@ -5399,7 +5405,8 @@ static void write_sm4_jump(const struct tpf_writer *tpf, const struct hlsl_ir_ju
case HLSL_IR_JUMP_DISCARD_NZ:
{
- instr.opcode = VKD3D_SM4_OP_DISCARD | VKD3D_SM4_CONDITIONAL_NZ;
+ instr.opcode = VKD3D_SM4_OP_DISCARD;
+ instr.extra_bits = VKD3D_SM4_CONDITIONAL_NZ;
memset(&instr.srcs[0], 0, sizeof(*instr.srcs));
instr.src_count = 1;
@@ -5700,19 +5707,13 @@ static void write_sm4_block(const struct tpf_writer *tpf, const struct hlsl_bloc
{
if (instr->data_type)
{
- if (instr->data_type->class == HLSL_CLASS_MATRIX)
- {
- hlsl_fixme(tpf->ctx, &instr->loc, "Matrix operations need to be lowered.");
- break;
- }
- else if (instr->data_type->class == HLSL_CLASS_OBJECT)
+ if (instr->data_type->class != HLSL_CLASS_SCALAR && instr->data_type->class != HLSL_CLASS_VECTOR)
{
- hlsl_fixme(tpf->ctx, &instr->loc, "Object copy.");
+ hlsl_fixme(tpf->ctx, &instr->loc, "Class %#x should have been lowered or removed.",
+ instr->data_type->class);
break;
}
- assert(instr->data_type->class == HLSL_CLASS_SCALAR || instr->data_type->class == HLSL_CLASS_VECTOR);
-
if (!instr->reg.allocated)
{
assert(instr->type == HLSL_IR_CONSTANT);
@@ -5808,13 +5809,21 @@ static void write_sm4_shdr(struct hlsl_ctx *ctx,
LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry)
{
if (cbuffer->reg.allocated)
+ {
+ if (hlsl_version_ge(ctx, 5, 1))
+ hlsl_fixme(ctx, &cbuffer->loc, "Shader model 5.1 resource definition.");
+
write_sm4_dcl_constant_buffer(&tpf, cbuffer);
+ }
}
for (i = 0; i < extern_resources_count; ++i)
{
const struct extern_resource *resource = &extern_resources[i];
+ if (hlsl_version_ge(ctx, 5, 1))
+ hlsl_fixme(ctx, &resource->var->loc, "Shader model 5.1 resource declaration.");
+
if (resource->regset == HLSL_REGSET_SAMPLERS)
write_sm4_dcl_samplers(&tpf, resource);
else if (resource->regset == HLSL_REGSET_TEXTURES)
diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h
index 6d442cd517d..9b37bbef70b 100644
--- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h
+++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h
@@ -1761,6 +1761,31 @@ static inline unsigned int vkd3d_compact_swizzle(uint32_t swizzle, uint32_t writ
return compacted_swizzle;
}
+static inline uint32_t vsir_swizzle_from_writemask(unsigned int writemask)
+{
+ static const unsigned int swizzles[16] =
+ {
+ 0,
+ VKD3D_SHADER_SWIZZLE(X, X, X, X),
+ VKD3D_SHADER_SWIZZLE(Y, Y, Y, Y),
+ VKD3D_SHADER_SWIZZLE(X, Y, X, X),
+ VKD3D_SHADER_SWIZZLE(Z, Z, Z, Z),
+ VKD3D_SHADER_SWIZZLE(X, Z, X, X),
+ VKD3D_SHADER_SWIZZLE(Y, Z, X, X),
+ VKD3D_SHADER_SWIZZLE(X, Y, Z, X),
+ VKD3D_SHADER_SWIZZLE(W, W, W, W),
+ VKD3D_SHADER_SWIZZLE(X, W, X, X),
+ VKD3D_SHADER_SWIZZLE(Y, W, X, X),
+ VKD3D_SHADER_SWIZZLE(X, Y, W, X),
+ VKD3D_SHADER_SWIZZLE(Z, W, X, X),
+ VKD3D_SHADER_SWIZZLE(X, Z, W, X),
+ VKD3D_SHADER_SWIZZLE(Y, Z, W, X),
+ VKD3D_SHADER_SWIZZLE(X, Y, Z, W),
+ };
+
+ return swizzles[writemask & 0xf];
+}
+
struct vkd3d_struct
{
enum vkd3d_shader_structure_type type;
diff --git a/libs/vkd3d/libs/vkd3d/cache.c b/libs/vkd3d/libs/vkd3d/cache.c
new file mode 100644
index 00000000000..56ba6990420
--- /dev/null
+++ b/libs/vkd3d/libs/vkd3d/cache.c
@@ -0,0 +1,59 @@
+/*
+ * Copyright 2024 Stefan Dösinger for CodeWeavers
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
+ */
+
+#include "vkd3d_private.h"
+
+struct vkd3d_shader_cache
+{
+ unsigned int refcount;
+};
+
+int vkd3d_shader_open_cache(struct vkd3d_shader_cache **cache)
+{
+ struct vkd3d_shader_cache *object;
+
+ TRACE("%p.\n", cache);
+
+ object = vkd3d_malloc(sizeof(*object));
+ if (!object)
+ return VKD3D_ERROR_OUT_OF_MEMORY;
+
+ object->refcount = 1;
+ *cache = object;
+
+ return VKD3D_OK;
+}
+
+unsigned int vkd3d_shader_cache_incref(struct vkd3d_shader_cache *cache)
+{
+ unsigned int refcount = vkd3d_atomic_increment_u32(&cache->refcount);
+ TRACE("cache %p refcount %u.\n", cache, refcount);
+ return refcount;
+}
+
+unsigned int vkd3d_shader_cache_decref(struct vkd3d_shader_cache *cache)
+{
+ unsigned int refcount = vkd3d_atomic_decrement_u32(&cache->refcount);
+ TRACE("cache %p refcount %u.\n", cache, refcount);
+
+ if (refcount)
+ return refcount;
+
+ vkd3d_free(cache);
+ return 0;
+}
diff --git a/libs/vkd3d/libs/vkd3d/device.c b/libs/vkd3d/libs/vkd3d/device.c
index 65db8b70bfd..90de27c53b6 100644
--- a/libs/vkd3d/libs/vkd3d/device.c
+++ b/libs/vkd3d/libs/vkd3d/device.c
@@ -2529,11 +2529,17 @@ struct d3d12_cache_session
ID3D12ShaderCacheSession ID3D12ShaderCacheSession_iface;
unsigned int refcount;
+ struct list cache_list_entry;
+
struct d3d12_device *device;
struct vkd3d_private_store private_store;
D3D12_SHADER_CACHE_SESSION_DESC desc;
+ struct vkd3d_shader_cache *cache;
};
+static struct vkd3d_mutex cache_list_mutex = VKD3D_MUTEX_INITIALIZER;
+static struct list cache_list = LIST_INIT(cache_list);
+
static inline struct d3d12_cache_session *impl_from_ID3D12ShaderCacheSession(ID3D12ShaderCacheSession *iface)
{
return CONTAINING_RECORD(iface, struct d3d12_cache_session, ID3D12ShaderCacheSession_iface);
@@ -2582,6 +2588,11 @@ static void d3d12_cache_session_destroy(struct d3d12_cache_session *session)
TRACE("Destroying cache session %p.\n", session);
+ vkd3d_mutex_lock(&cache_list_mutex);
+ list_remove(&session->cache_list_entry);
+ vkd3d_mutex_unlock(&cache_list_mutex);
+
+ vkd3d_shader_cache_decref(session->cache);
vkd3d_private_store_destroy(&session->private_store);
vkd3d_free(session);
@@ -2707,11 +2718,14 @@ static const struct ID3D12ShaderCacheSessionVtbl d3d12_cache_session_vtbl =
static HRESULT d3d12_cache_session_init(struct d3d12_cache_session *session,
struct d3d12_device *device, const D3D12_SHADER_CACHE_SESSION_DESC *desc)
{
+ struct d3d12_cache_session *i;
+ enum vkd3d_result ret;
HRESULT hr;
session->ID3D12ShaderCacheSession_iface.lpVtbl = &d3d12_cache_session_vtbl;
session->refcount = 1;
session->desc = *desc;
+ session->cache = NULL;
if (!session->desc.MaximumValueFileSizeBytes)
session->desc.MaximumValueFileSizeBytes = 128 * 1024 * 1024;
@@ -2723,9 +2737,56 @@ static HRESULT d3d12_cache_session_init(struct d3d12_cache_session *session,
if (FAILED(hr = vkd3d_private_store_init(&session->private_store)))
return hr;
+ vkd3d_mutex_lock(&cache_list_mutex);
+
+ /* We expect the number of open caches to be small. */
+ LIST_FOR_EACH_ENTRY(i, &cache_list, struct d3d12_cache_session, cache_list_entry)
+ {
+ if (!memcmp(&i->desc.Identifier, &desc->Identifier, sizeof(desc->Identifier)))
+ {
+ TRACE("Found an existing cache %p from session %p.\n", i->cache, i);
+ if (desc->Version == i->desc.Version)
+ {
+ session->desc = i->desc;
+ vkd3d_shader_cache_incref(session->cache = i->cache);
+ break;
+ }
+ else
+ {
+ WARN("version mismatch: Existing %"PRIu64" new %"PRIu64".\n",
+ i->desc.Version, desc->Version);
+ hr = DXGI_ERROR_ALREADY_EXISTS;
+ goto error;
+ }
+ }
+ }
+
+ if (!session->cache)
+ {
+ if (session->desc.Mode == D3D12_SHADER_CACHE_MODE_DISK)
+ FIXME("Disk caches are not yet implemented.\n");
+
+ ret = vkd3d_shader_open_cache(&session->cache);
+ if (ret)
+ {
+ WARN("Failed to open shader cache.\n");
+ hr = hresult_from_vkd3d_result(ret);
+ goto error;
+ }
+ }
+
+ /* Add it to the list even if we reused an existing cache. The other session might be destroyed,
+ * but the cache stays alive and can be opened a third time. */
+ list_add_tail(&cache_list, &session->cache_list_entry);
d3d12_device_add_ref(session->device = device);
+ vkd3d_mutex_unlock(&cache_list_mutex);
return S_OK;
+
+error:
+ vkd3d_private_store_destroy(&session->private_store);
+ vkd3d_mutex_unlock(&cache_list_mutex);
+ return hr;
}
/* ID3D12Device */
@@ -4874,6 +4935,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateShaderCacheSession(ID3D12Dev
WARN("No output pointer, returning S_FALSE.\n");
return S_FALSE;
}
+ *session = NULL;
if (!(object = vkd3d_malloc(sizeof(*object))))
return E_OUTOFMEMORY;
diff --git a/libs/vkd3d/libs/vkd3d/resource.c b/libs/vkd3d/libs/vkd3d/resource.c
index b83a45d0606..179999148bc 100644
--- a/libs/vkd3d/libs/vkd3d/resource.c
+++ b/libs/vkd3d/libs/vkd3d/resource.c
@@ -1893,6 +1893,13 @@ HRESULT d3d12_resource_validate_desc(const D3D12_RESOURCE_DESC1 *desc, struct d3
WARN("Invalid sample count 0.\n");
return E_INVALIDARG;
}
+ if (desc->SampleDesc.Count > 1
+ && !(desc->Flags & (D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET | D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL)))
+ {
+ WARN("Sample count %u invalid without ALLOW_RENDER_TARGET or ALLOW_DEPTH_STENCIL.\n",
+ desc->SampleDesc.Count);
+ return E_INVALIDARG;
+ }
if (!(format = vkd3d_format_from_d3d12_resource_desc(device, desc, 0)))
{
@@ -1996,6 +2003,11 @@ static HRESULT d3d12_resource_init(struct d3d12_resource *resource, struct d3d12
WARN("Invalid initial resource state %#x.\n", initial_state);
return E_INVALIDARG;
}
+ if (initial_state == D3D12_RESOURCE_STATE_RENDER_TARGET && !(desc->Flags & D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET))
+ {
+ WARN("Invalid initial resource state %#x for non-render-target.\n", initial_state);
+ return E_INVALIDARG;
+ }
if (optimized_clear_value && d3d12_resource_is_buffer(resource))
{
diff --git a/libs/vkd3d/libs/vkd3d/state.c b/libs/vkd3d/libs/vkd3d/state.c
index 08cc110e8f7..b8328216a29 100644
--- a/libs/vkd3d/libs/vkd3d/state.c
+++ b/libs/vkd3d/libs/vkd3d/state.c
@@ -2045,6 +2045,9 @@ static ULONG STDMETHODCALLTYPE d3d12_pipeline_state_Release(ID3D12PipelineState
d3d12_pipeline_uav_counter_state_cleanup(&state->uav_counters, device);
+ if (state->implicit_root_signature)
+ d3d12_root_signature_Release(state->implicit_root_signature);
+
vkd3d_free(state);
d3d12_device_release(device);
@@ -2413,8 +2416,8 @@ static HRESULT d3d12_pipeline_state_init_compute(struct d3d12_pipeline_state *st
const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
struct vkd3d_shader_interface_info shader_interface;
struct vkd3d_shader_descriptor_offset_info offset_info;
- const struct d3d12_root_signature *root_signature;
struct vkd3d_shader_spirv_target_info target_info;
+ struct d3d12_root_signature *root_signature;
VkPipelineLayout vk_pipeline_layout;
HRESULT hr;
@@ -2425,13 +2428,27 @@ static HRESULT d3d12_pipeline_state_init_compute(struct d3d12_pipeline_state *st
if (!(root_signature = unsafe_impl_from_ID3D12RootSignature(desc->root_signature)))
{
- WARN("Root signature is NULL.\n");
- return E_INVALIDARG;
+ TRACE("Root signature is NULL, looking for an embedded signature.\n");
+ if (FAILED(hr = d3d12_root_signature_create(device,
+ desc->cs.pShaderBytecode, desc->cs.BytecodeLength, &root_signature)))
+ {
+ WARN("Failed to find an embedded root signature, hr %s.\n", debugstr_hresult(hr));
+ return hr;
+ }
+ state->implicit_root_signature = &root_signature->ID3D12RootSignature_iface;
+ }
+ else
+ {
+ state->implicit_root_signature = NULL;
}
if (FAILED(hr = d3d12_pipeline_state_find_and_init_uav_counters(state, device, root_signature,
&desc->cs, VK_SHADER_STAGE_COMPUTE_BIT)))
+ {
+ if (state->implicit_root_signature)
+ d3d12_root_signature_Release(state->implicit_root_signature);
return hr;
+ }
memset(&target_info, 0, sizeof(target_info));
target_info.type = VKD3D_SHADER_STRUCTURE_TYPE_SPIRV_TARGET_INFO;
@@ -2476,6 +2493,8 @@ static HRESULT d3d12_pipeline_state_init_compute(struct d3d12_pipeline_state *st
{
WARN("Failed to create Vulkan compute pipeline, hr %s.\n", debugstr_hresult(hr));
d3d12_pipeline_uav_counter_state_cleanup(&state->uav_counters, device);
+ if (state->implicit_root_signature)
+ d3d12_root_signature_Release(state->implicit_root_signature);
return hr;
}
@@ -2483,6 +2502,8 @@ static HRESULT d3d12_pipeline_state_init_compute(struct d3d12_pipeline_state *st
{
VK_CALL(vkDestroyPipeline(device->vk_device, state->u.compute.vk_pipeline, NULL));
d3d12_pipeline_uav_counter_state_cleanup(&state->uav_counters, device);
+ if (state->implicit_root_signature)
+ d3d12_root_signature_Release(state->implicit_root_signature);
return hr;
}
@@ -3484,6 +3505,7 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s
goto fail;
state->vk_bind_point = VK_PIPELINE_BIND_POINT_GRAPHICS;
+ state->implicit_root_signature = NULL;
d3d12_device_add_ref(state->device = device);
return S_OK;
diff --git a/libs/vkd3d/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/libs/vkd3d/vkd3d_private.h
index 14c8eb54574..9f5f759667a 100644
--- a/libs/vkd3d/libs/vkd3d/vkd3d_private.h
+++ b/libs/vkd3d/libs/vkd3d/vkd3d_private.h
@@ -1213,6 +1213,7 @@ struct d3d12_pipeline_state
struct d3d12_pipeline_uav_counter_state uav_counters;
+ ID3D12RootSignature *implicit_root_signature;
struct d3d12_device *device;
struct vkd3d_private_store private_store;
@@ -1927,4 +1928,10 @@ static inline void vkd3d_prepend_struct(void *header, void *structure)
vkd3d_header->next = vkd3d_structure;
}
+struct vkd3d_shader_cache;
+
+int vkd3d_shader_open_cache(struct vkd3d_shader_cache **cache);
+unsigned int vkd3d_shader_cache_incref(struct vkd3d_shader_cache *cache);
+unsigned int vkd3d_shader_cache_decref(struct vkd3d_shader_cache *cache);
+
#endif /* __VKD3D_PRIVATE_H */
--
2.43.0