wine-staging/patches/vkd3d-latest/0006-Updated-vkd3d-to-5827197246214a3b1a362f19a0ac4de426e.patch

1916 lines
77 KiB
Diff
Raw Normal View History

2024-12-14 11:06:53 +11:00
From f8ff05b86acf4c5b18f389ae877ce138cb00e7d6 Mon Sep 17 00:00:00 2001
From: Alistair Leslie-Hughes <leslie_alistair@hotmail.com>
Date: Sat, 14 Dec 2024 11:00:37 +1100
Subject: [PATCH] Updated vkd3d to 5827197246214a3b1a362f19a0ac4de426e4a3e2.
---
libs/vkd3d/libs/vkd3d-shader/d3dbc.c | 32 +-
libs/vkd3d/libs/vkd3d-shader/dxil.c | 2 +-
libs/vkd3d/libs/vkd3d-shader/glsl.c | 2 +-
libs/vkd3d/libs/vkd3d-shader/hlsl.c | 79 ++-
libs/vkd3d/libs/vkd3d-shader/hlsl.h | 63 +-
libs/vkd3d/libs/vkd3d-shader/hlsl.y | 110 ++--
libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 592 ++++++++++++------
.../libs/vkd3d-shader/hlsl_constant_ops.c | 149 ++++-
libs/vkd3d/libs/vkd3d-shader/ir.c | 50 +-
libs/vkd3d/libs/vkd3d-shader/msl.c | 2 +-
libs/vkd3d/libs/vkd3d-shader/spirv.c | 2 +-
libs/vkd3d/libs/vkd3d-shader/tpf.c | 2 +-
.../libs/vkd3d-shader/vkd3d_shader_private.h | 5 +-
13 files changed, 766 insertions(+), 324 deletions(-)
diff --git a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c
index e7dd65d1fef..fbd5d7ffbd7 100644
--- a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c
+++ b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c
@@ -633,7 +633,32 @@ static void add_signature_mask(struct vkd3d_shader_sm1_parser *sm1, bool output,
return;
}
+ /* Normally VSIR mandates that the register mask is a subset of the usage
+ * mask, and the usage mask is a subset of the signature mask. This is
+ * doesn't always happen with SM1-3 registers, because of the limited
+ * flexibility with expressing swizzles.
+ *
+ * For example it's easy to find shaders like this:
+ * ps_3_0
+ * [...]
+ * dcl_texcoord0 v0
+ * [...]
+ * texld r2.xyzw, v0.xyzw, s1.xyzw
+ * [...]
+ *
+ * The dcl_textcoord0 instruction secretly has a .xy mask, which is used to
+ * compute the signature mask, but the texld instruction apparently uses all
+ * the components. Of course the last two components are ignored, but
+ * formally they seem to be used. So we end up with a signature element with
+ * mask .xy and usage mask .xyzw.
+ *
+ * In order to avoid this problem, when generating VSIR code with SM4
+ * normalisation level we remove the unused components in the write mask. We
+ * don't do that when targetting the SM1 normalisation level (i.e., when
+ * disassembling) so as to generate the same disassembly code as native. */
element->used_mask |= mask;
+ if (program->normalisation_level >= VSIR_NORMALISED_SM4)
+ element->used_mask &= element->mask;
}
static bool add_signature_element_from_register(struct vkd3d_shader_sm1_parser *sm1,
@@ -1265,6 +1290,7 @@ static enum vkd3d_result shader_sm1_init(struct vkd3d_shader_sm1_parser *sm1, st
const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context)
{
const struct vkd3d_shader_location location = {.source_name = compile_info->source_name};
+ enum vsir_normalisation_level normalisation_level;
const uint32_t *code = compile_info->source.code;
size_t code_size = compile_info->source.size;
struct vkd3d_shader_version version;
@@ -1315,9 +1341,13 @@ static enum vkd3d_result shader_sm1_init(struct vkd3d_shader_sm1_parser *sm1, st
sm1->start = &code[1];
sm1->end = &code[token_count];
+ normalisation_level = VSIR_NORMALISED_SM1;
+ if (compile_info->target_type != VKD3D_SHADER_TARGET_D3D_ASM)
+ normalisation_level = VSIR_NORMALISED_SM4;
+
/* Estimate instruction count to avoid reallocation in most shaders. */
if (!vsir_program_init(program, compile_info, &version,
- code_size != ~(size_t)0 ? token_count / 4u + 4 : 16, VSIR_CF_STRUCTURED, VSIR_NOT_NORMALISED))
+ code_size != ~(size_t)0 ? token_count / 4u + 4 : 16, VSIR_CF_STRUCTURED, normalisation_level))
return VKD3D_ERROR_OUT_OF_MEMORY;
vkd3d_shader_parser_init(&sm1->p, program, message_context, compile_info->source_name);
diff --git a/libs/vkd3d/libs/vkd3d-shader/dxil.c b/libs/vkd3d/libs/vkd3d-shader/dxil.c
index d76f9bcc772..4493602dfb7 100644
--- a/libs/vkd3d/libs/vkd3d-shader/dxil.c
+++ b/libs/vkd3d/libs/vkd3d-shader/dxil.c
@@ -10356,7 +10356,7 @@ static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, struct vsir_pro
/* Estimate instruction count to avoid reallocation in most shaders. */
count = max(token_count, 400) - 400;
if (!vsir_program_init(program, compile_info, &version,
- (count + (count >> 2)) / 2u + 10, VSIR_CF_BLOCKS, VSIR_FULLY_NORMALISED_IO))
+ (count + (count >> 2)) / 2u + 10, VSIR_CF_BLOCKS, VSIR_NORMALISED_SM6))
return VKD3D_ERROR_OUT_OF_MEMORY;
vkd3d_shader_parser_init(&sm6->p, program, message_context, compile_info->source_name);
sm6->ptr = &sm6->start[1];
diff --git a/libs/vkd3d/libs/vkd3d-shader/glsl.c b/libs/vkd3d/libs/vkd3d-shader/glsl.c
index 113c7eee65f..ab6604bd703 100644
--- a/libs/vkd3d/libs/vkd3d-shader/glsl.c
+++ b/libs/vkd3d/libs/vkd3d-shader/glsl.c
@@ -2469,7 +2469,7 @@ int glsl_compile(struct vsir_program *program, uint64_t config_flags,
if ((ret = vsir_program_transform(program, config_flags, compile_info, message_context)) < 0)
return ret;
- VKD3D_ASSERT(program->normalisation_level == VSIR_FULLY_NORMALISED_IO);
+ VKD3D_ASSERT(program->normalisation_level == VSIR_NORMALISED_SM6);
vkd3d_glsl_generator_init(&generator, program, compile_info,
descriptor_info, combined_sampler_info, message_context);
diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.c b/libs/vkd3d/libs/vkd3d-shader/hlsl.c
index e7518404aa0..84da2fcbc9f 100644
--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.c
+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.c
@@ -1854,22 +1854,45 @@ struct hlsl_ir_node *hlsl_new_resource_store(struct hlsl_ctx *ctx, const struct
return &store->node;
}
-struct hlsl_ir_node *hlsl_new_swizzle(struct hlsl_ctx *ctx, uint32_t s, unsigned int components,
+struct hlsl_ir_node *hlsl_new_swizzle(struct hlsl_ctx *ctx, uint32_t s, unsigned int component_count,
struct hlsl_ir_node *val, const struct vkd3d_shader_location *loc)
{
struct hlsl_ir_swizzle *swizzle;
struct hlsl_type *type;
+ VKD3D_ASSERT(val->data_type->class <= HLSL_CLASS_VECTOR);
+
if (!(swizzle = hlsl_alloc(ctx, sizeof(*swizzle))))
return NULL;
- VKD3D_ASSERT(hlsl_is_numeric_type(val->data_type));
- if (components == 1)
+ if (component_count > 1)
+ type = hlsl_get_vector_type(ctx, val->data_type->e.numeric.type, component_count);
+ else
type = hlsl_get_scalar_type(ctx, val->data_type->e.numeric.type);
+ init_node(&swizzle->node, HLSL_IR_SWIZZLE, type, loc);
+ hlsl_src_from_node(&swizzle->val, val);
+ swizzle->u.vector = s;
+
+ return &swizzle->node;
+}
+
+struct hlsl_ir_node *hlsl_new_matrix_swizzle(struct hlsl_ctx *ctx, struct hlsl_matrix_swizzle s,
+ unsigned int component_count, struct hlsl_ir_node *val, const struct vkd3d_shader_location *loc)
+{
+ struct hlsl_ir_swizzle *swizzle;
+ struct hlsl_type *type;
+
+ VKD3D_ASSERT(val->data_type->class == HLSL_CLASS_MATRIX);
+
+ if (!(swizzle = hlsl_alloc(ctx, sizeof(*swizzle))))
+ return NULL;
+ if (component_count > 1)
+ type = hlsl_get_vector_type(ctx, val->data_type->e.numeric.type, component_count);
else
- type = hlsl_get_vector_type(ctx, val->data_type->e.numeric.type, components);
+ type = hlsl_get_scalar_type(ctx, val->data_type->e.numeric.type);
init_node(&swizzle->node, HLSL_IR_SWIZZLE, type, loc);
hlsl_src_from_node(&swizzle->val, val);
- swizzle->swizzle = s;
+ swizzle->u.matrix = s;
+
return &swizzle->node;
}
@@ -2064,8 +2087,8 @@ struct hlsl_ir_node *hlsl_new_jump(struct hlsl_ctx *ctx, enum hlsl_ir_jump_type
return &jump->node;
}
-struct hlsl_ir_node *hlsl_new_loop(struct hlsl_ctx *ctx,
- struct hlsl_block *block, enum hlsl_ir_loop_unroll_type unroll_type,
+struct hlsl_ir_node *hlsl_new_loop(struct hlsl_ctx *ctx, struct hlsl_block *iter,
+ struct hlsl_block *block, enum hlsl_loop_unroll_type unroll_type,
unsigned int unroll_limit, const struct vkd3d_shader_location *loc)
{
struct hlsl_ir_loop *loop;
@@ -2076,6 +2099,10 @@ struct hlsl_ir_node *hlsl_new_loop(struct hlsl_ctx *ctx,
hlsl_block_init(&loop->body);
hlsl_block_add_block(&loop->body, block);
+ hlsl_block_init(&loop->iter);
+ if (iter)
+ hlsl_block_add_block(&loop->iter, iter);
+
loop->unroll_type = unroll_type;
loop->unroll_limit = unroll_limit;
return &loop->node;
@@ -2231,14 +2258,21 @@ static struct hlsl_ir_node *clone_load(struct hlsl_ctx *ctx, struct clone_instr_
static struct hlsl_ir_node *clone_loop(struct hlsl_ctx *ctx, struct clone_instr_map *map, struct hlsl_ir_loop *src)
{
+ struct hlsl_block iter, body;
struct hlsl_ir_node *dst;
- struct hlsl_block body;
+
+ if (!clone_block(ctx, &iter, &src->iter, map))
+ return NULL;
if (!clone_block(ctx, &body, &src->body, map))
+ {
+ hlsl_block_cleanup(&iter);
return NULL;
+ }
- if (!(dst = hlsl_new_loop(ctx, &body, src->unroll_type, src->unroll_limit, &src->node.loc)))
+ if (!(dst = hlsl_new_loop(ctx, &iter, &body, src->unroll_type, src->unroll_limit, &src->node.loc)))
{
+ hlsl_block_cleanup(&iter);
hlsl_block_cleanup(&body);
return NULL;
}
@@ -2320,8 +2354,12 @@ static struct hlsl_ir_node *clone_store(struct hlsl_ctx *ctx, struct clone_instr
static struct hlsl_ir_node *clone_swizzle(struct hlsl_ctx *ctx,
struct clone_instr_map *map, struct hlsl_ir_swizzle *src)
{
- return hlsl_new_swizzle(ctx, src->swizzle, src->node.data_type->dimx,
- map_instr(map, src->val.node), &src->node.loc);
+ if (src->val.node->data_type->class == HLSL_CLASS_MATRIX)
+ return hlsl_new_matrix_swizzle(ctx, src->u.matrix, src->node.data_type->dimx,
+ map_instr(map, src->val.node), &src->node.loc);
+ else
+ return hlsl_new_swizzle(ctx, src->u.vector, src->node.data_type->dimx,
+ map_instr(map, src->val.node), &src->node.loc);
}
static struct hlsl_ir_node *clone_index(struct hlsl_ctx *ctx, struct clone_instr_map *map,
@@ -3401,11 +3439,12 @@ static void dump_ir_swizzle(struct vkd3d_string_buffer *buffer, const struct hls
{
vkd3d_string_buffer_printf(buffer, ".");
for (i = 0; i < swizzle->node.data_type->dimx; ++i)
- vkd3d_string_buffer_printf(buffer, "_m%u%u", (swizzle->swizzle >> i * 8) & 0xf, (swizzle->swizzle >> (i * 8 + 4)) & 0xf);
+ vkd3d_string_buffer_printf(buffer, "_m%u%u",
+ swizzle->u.matrix.components[i].y, swizzle->u.matrix.components[i].x);
}
else
{
- vkd3d_string_buffer_printf(buffer, "%s", debug_hlsl_swizzle(swizzle->swizzle, swizzle->node.data_type->dimx));
+ vkd3d_string_buffer_printf(buffer, "%s", debug_hlsl_swizzle(swizzle->u.vector, swizzle->node.data_type->dimx));
}
}
@@ -3713,6 +3752,7 @@ static void free_ir_load(struct hlsl_ir_load *load)
static void free_ir_loop(struct hlsl_ir_loop *loop)
{
hlsl_block_cleanup(&loop->body);
+ hlsl_block_cleanup(&loop->iter);
vkd3d_free(loop);
}
@@ -3967,8 +4007,8 @@ void hlsl_add_function(struct hlsl_ctx *ctx, char *name, struct hlsl_ir_function
uint32_t hlsl_map_swizzle(uint32_t swizzle, unsigned int writemask)
{
+ unsigned int src_component = 0;
uint32_t ret = 0;
- unsigned int i;
/* Leave replicate swizzles alone; some instructions need them. */
if (swizzle == HLSL_SWIZZLE(X, X, X, X)
@@ -3977,13 +4017,10 @@ uint32_t hlsl_map_swizzle(uint32_t swizzle, unsigned int writemask)
|| swizzle == HLSL_SWIZZLE(W, W, W, W))
return swizzle;
- for (i = 0; i < 4; ++i)
+ for (unsigned int dst_component = 0; dst_component < 4; ++dst_component)
{
- if (writemask & (1 << i))
- {
- ret |= (swizzle & 3) << (i * 2);
- swizzle >>= 2;
- }
+ if (writemask & (1 << dst_component))
+ hlsl_swizzle_set_component(&ret, dst_component, hlsl_swizzle_get_component(swizzle, src_component++));
}
return ret;
}
@@ -4036,7 +4073,7 @@ uint32_t hlsl_combine_swizzles(uint32_t first, uint32_t second, unsigned int dim
for (i = 0; i < dim; ++i)
{
unsigned int s = hlsl_swizzle_get_component(second, i);
- ret |= hlsl_swizzle_get_component(first, s) << HLSL_SWIZZLE_SHIFT(i);
+ hlsl_swizzle_set_component(&ret, i, hlsl_swizzle_get_component(first, s));
}
return ret;
}
diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h
index 5f05ceda004..3c0bbf0a3e2 100644
--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.h
+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h
@@ -50,31 +50,17 @@
* DEALINGS IN THE SOFTWARE.
*/
-#define HLSL_SWIZZLE_X (0u)
-#define HLSL_SWIZZLE_Y (1u)
-#define HLSL_SWIZZLE_Z (2u)
-#define HLSL_SWIZZLE_W (3u)
-
-#define HLSL_SWIZZLE(x, y, z, w) \
- (((HLSL_SWIZZLE_ ## x) << 0) \
- | ((HLSL_SWIZZLE_ ## y) << 2) \
- | ((HLSL_SWIZZLE_ ## z) << 4) \
- | ((HLSL_SWIZZLE_ ## w) << 6))
-
-#define HLSL_SWIZZLE_MASK (0x3u)
-#define HLSL_SWIZZLE_SHIFT(idx) (2u * (idx))
+#define HLSL_SWIZZLE VKD3D_SHADER_SWIZZLE
static inline unsigned int hlsl_swizzle_get_component(uint32_t swizzle, unsigned int idx)
{
- return (swizzle >> HLSL_SWIZZLE_SHIFT(idx)) & HLSL_SWIZZLE_MASK;
+ return vsir_swizzle_get_component(swizzle, idx);
}
-static inline uint32_t vsir_swizzle_from_hlsl(uint32_t swizzle)
+static inline void hlsl_swizzle_set_component(uint32_t *swizzle, unsigned int idx, unsigned int component)
{
- return vkd3d_shader_create_swizzle(hlsl_swizzle_get_component(swizzle, 0),
- hlsl_swizzle_get_component(swizzle, 1),
- hlsl_swizzle_get_component(swizzle, 2),
- hlsl_swizzle_get_component(swizzle, 3));
+ *swizzle &= ~(VKD3D_SHADER_SWIZZLE_MASK << VKD3D_SHADER_SWIZZLE_SHIFT(idx));
+ *swizzle |= component << VKD3D_SHADER_SWIZZLE_SHIFT(idx);
}
enum hlsl_type_class
@@ -659,21 +645,30 @@ struct hlsl_ir_if
struct hlsl_block else_block;
};
-enum hlsl_ir_loop_unroll_type
+enum hlsl_loop_unroll_type
+{
+ HLSL_LOOP_UNROLL,
+ HLSL_LOOP_FORCE_UNROLL,
+ HLSL_LOOP_FORCE_LOOP
+};
+
+enum hlsl_loop_type
{
- HLSL_IR_LOOP_UNROLL,
- HLSL_IR_LOOP_FORCE_UNROLL,
- HLSL_IR_LOOP_FORCE_LOOP
+ HLSL_LOOP_FOR,
+ HLSL_LOOP_WHILE,
+ HLSL_LOOP_DO_WHILE
};
struct hlsl_ir_loop
{
struct hlsl_ir_node node;
+ struct hlsl_block iter;
/* loop condition is stored in the body (as "if (!condition) break;") */
struct hlsl_block body;
+ enum hlsl_loop_type type;
unsigned int next_index; /* liveness index of the end of the loop */
unsigned int unroll_limit;
- enum hlsl_ir_loop_unroll_type unroll_type;
+ enum hlsl_loop_unroll_type unroll_type;
};
struct hlsl_ir_switch_case
@@ -793,7 +788,17 @@ struct hlsl_ir_swizzle
{
struct hlsl_ir_node node;
struct hlsl_src val;
- uint32_t swizzle;
+ union
+ {
+ uint32_t vector;
+ struct hlsl_matrix_swizzle
+ {
+ struct
+ {
+ uint8_t x, y;
+ } components[4];
+ } matrix;
+ } u;
};
struct hlsl_ir_index
@@ -1550,8 +1555,11 @@ struct hlsl_ir_node *hlsl_new_compile(struct hlsl_ctx *ctx, enum hlsl_compile_ty
struct hlsl_block *args_instrs, const struct vkd3d_shader_location *loc);
struct hlsl_ir_node *hlsl_new_index(struct hlsl_ctx *ctx, struct hlsl_ir_node *val,
struct hlsl_ir_node *idx, const struct vkd3d_shader_location *loc);
-struct hlsl_ir_node *hlsl_new_loop(struct hlsl_ctx *ctx,
- struct hlsl_block *block, enum hlsl_ir_loop_unroll_type unroll_type, unsigned int unroll_limit, const struct vkd3d_shader_location *loc);
+struct hlsl_ir_node *hlsl_new_loop(struct hlsl_ctx *ctx, struct hlsl_block *iter,
+ struct hlsl_block *block, enum hlsl_loop_unroll_type unroll_type,
+ unsigned int unroll_limit, const struct vkd3d_shader_location *loc);
+struct hlsl_ir_node *hlsl_new_matrix_swizzle(struct hlsl_ctx *ctx, struct hlsl_matrix_swizzle s,
+ unsigned int width, struct hlsl_ir_node *val, const struct vkd3d_shader_location *loc);
struct hlsl_ir_node *hlsl_new_resource_load(struct hlsl_ctx *ctx,
const struct hlsl_resource_load_params *params, const struct vkd3d_shader_location *loc);
struct hlsl_ir_node *hlsl_new_resource_store(struct hlsl_ctx *ctx, const struct hlsl_deref *resource,
@@ -1642,6 +1650,7 @@ struct hlsl_reg hlsl_reg_from_deref(struct hlsl_ctx *ctx, const struct hlsl_dere
bool hlsl_copy_propagation_execute(struct hlsl_ctx *ctx, struct hlsl_block *block);
bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context);
bool hlsl_fold_constant_identities(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context);
+bool hlsl_normalize_binary_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context);
bool hlsl_fold_constant_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context);
bool hlsl_transform_ir(struct hlsl_ctx *ctx, bool (*func)(struct hlsl_ctx *ctx, struct hlsl_ir_node *, void *),
struct hlsl_block *block, void *context);
diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.y b/libs/vkd3d/libs/vkd3d-shader/hlsl.y
index afa41f4b1c2..ce9f7fd6a77 100644
--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.y
+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.y
@@ -555,13 +555,6 @@ static bool append_conditional_break(struct hlsl_ctx *ctx, struct hlsl_block *co
return true;
}
-enum loop_type
-{
- LOOP_FOR,
- LOOP_WHILE,
- LOOP_DO_WHILE
-};
-
static void check_attribute_list_for_duplicates(struct hlsl_ctx *ctx, const struct parse_attribute_list *attrs)
{
unsigned int i, j;
@@ -577,8 +570,8 @@ static void check_attribute_list_for_duplicates(struct hlsl_ctx *ctx, const stru
}
}
-static void resolve_loop_continue(struct hlsl_ctx *ctx, struct hlsl_block *block, enum loop_type type,
- struct hlsl_block *cond, struct hlsl_block *iter)
+static void resolve_loop_continue(struct hlsl_ctx *ctx, struct hlsl_block *block,
+ enum hlsl_loop_type type, struct hlsl_block *cond)
{
struct hlsl_ir_node *instr, *next;
@@ -588,8 +581,8 @@ static void resolve_loop_continue(struct hlsl_ctx *ctx, struct hlsl_block *block
{
struct hlsl_ir_if *iff = hlsl_ir_if(instr);
- resolve_loop_continue(ctx, &iff->then_block, type, cond, iter);
- resolve_loop_continue(ctx, &iff->else_block, type, cond, iter);
+ resolve_loop_continue(ctx, &iff->then_block, type, cond);
+ resolve_loop_continue(ctx, &iff->else_block, type, cond);
}
else if (instr->type == HLSL_IR_JUMP)
{
@@ -599,7 +592,7 @@ static void resolve_loop_continue(struct hlsl_ctx *ctx, struct hlsl_block *block
if (jump->type != HLSL_IR_JUMP_UNRESOLVED_CONTINUE)
continue;
- if (type == LOOP_DO_WHILE)
+ if (type == HLSL_LOOP_DO_WHILE)
{
if (!hlsl_clone_block(ctx, &cond_block, cond))
return;
@@ -610,13 +603,6 @@ static void resolve_loop_continue(struct hlsl_ctx *ctx, struct hlsl_block *block
}
list_move_before(&instr->entry, &cond_block.instrs);
}
- else if (type == LOOP_FOR)
- {
- if (!hlsl_clone_block(ctx, &cond_block, iter))
- return;
- list_move_before(&instr->entry, &cond_block.instrs);
- }
- jump->type = HLSL_IR_JUMP_CONTINUE;
}
}
}
@@ -740,11 +726,11 @@ static unsigned int evaluate_static_expression_as_uint(struct hlsl_ctx *ctx, str
return res.number.u;
}
-static struct hlsl_block *create_loop(struct hlsl_ctx *ctx, enum loop_type type,
+static struct hlsl_block *create_loop(struct hlsl_ctx *ctx, enum hlsl_loop_type type,
const struct parse_attribute_list *attributes, struct hlsl_block *init, struct hlsl_block *cond,
struct hlsl_block *iter, struct hlsl_block *body, const struct vkd3d_shader_location *loc)
{
- enum hlsl_ir_loop_unroll_type unroll_type = HLSL_IR_LOOP_UNROLL;
+ enum hlsl_loop_unroll_type unroll_type = HLSL_LOOP_UNROLL;
unsigned int i, unroll_limit = 0;
struct hlsl_ir_node *loop;
@@ -775,11 +761,11 @@ static struct hlsl_block *create_loop(struct hlsl_ctx *ctx, enum loop_type type,
hlsl_block_cleanup(&expr);
}
- unroll_type = HLSL_IR_LOOP_FORCE_UNROLL;
+ unroll_type = HLSL_LOOP_FORCE_UNROLL;
}
else if (!strcmp(attr->name, "loop"))
{
- unroll_type = HLSL_IR_LOOP_FORCE_LOOP;
+ unroll_type = HLSL_LOOP_FORCE_LOOP;
}
else if (!strcmp(attr->name, "fastopt")
|| !strcmp(attr->name, "allow_uav_condition"))
@@ -792,7 +778,7 @@ static struct hlsl_block *create_loop(struct hlsl_ctx *ctx, enum loop_type type,
}
}
- resolve_loop_continue(ctx, body, type, cond, iter);
+ resolve_loop_continue(ctx, body, type, cond);
if (!init && !(init = make_empty_block(ctx)))
goto oom;
@@ -800,15 +786,12 @@ static struct hlsl_block *create_loop(struct hlsl_ctx *ctx, enum loop_type type,
if (!append_conditional_break(ctx, cond))
goto oom;
- if (iter)
- hlsl_block_add_block(body, iter);
-
- if (type == LOOP_DO_WHILE)
+ if (type == HLSL_LOOP_DO_WHILE)
list_move_tail(&body->instrs, &cond->instrs);
else
list_move_head(&body->instrs, &cond->instrs);
- if (!(loop = hlsl_new_loop(ctx, body, unroll_type, unroll_limit, loc)))
+ if (!(loop = hlsl_new_loop(ctx, iter, body, unroll_type, unroll_limit, loc)))
goto oom;
hlsl_block_add_instr(init, loop);
@@ -862,6 +845,7 @@ static struct hlsl_ir_node *get_swizzle(struct hlsl_ctx *ctx, struct hlsl_ir_nod
if (value->data_type->class == HLSL_CLASS_MATRIX)
{
/* Matrix swizzle */
+ struct hlsl_matrix_swizzle s;
bool m_swizzle;
unsigned int inc, x, y;
@@ -892,10 +876,11 @@ static struct hlsl_ir_node *get_swizzle(struct hlsl_ctx *ctx, struct hlsl_ir_nod
if (x >= value->data_type->dimx || y >= value->data_type->dimy)
return NULL;
- swiz |= (y << 4 | x) << component * 8;
+ s.components[component].x = x;
+ s.components[component].y = y;
component++;
}
- return hlsl_new_swizzle(ctx, swiz, component, value, loc);
+ return hlsl_new_matrix_swizzle(ctx, s, component, value, loc);
}
/* Vector swizzle */
@@ -924,8 +909,7 @@ static struct hlsl_ir_node *get_swizzle(struct hlsl_ctx *ctx, struct hlsl_ir_nod
if (s >= value->data_type->dimx)
return NULL;
- swiz |= s << component * 2;
- component++;
+ hlsl_swizzle_set_component(&swiz, component++, s);
}
if (valid)
return hlsl_new_swizzle(ctx, swiz, component, value, loc);
@@ -2102,8 +2086,8 @@ static bool invert_swizzle(uint32_t *swizzle, unsigned int *writemask, unsigned
{
if (*writemask & (1 << i))
{
- unsigned int s = (*swizzle >> (i * 2)) & 3;
- new_swizzle |= s << (bit++ * 2);
+ unsigned int s = hlsl_swizzle_get_component(*swizzle, i);
+ hlsl_swizzle_set_component(&new_swizzle, bit++, s);
if (new_writemask & (1 << s))
return false;
new_writemask |= 1 << s;
@@ -2117,9 +2101,9 @@ static bool invert_swizzle(uint32_t *swizzle, unsigned int *writemask, unsigned
{
for (j = 0; j < width; ++j)
{
- unsigned int s = (new_swizzle >> (j * 2)) & 3;
+ unsigned int s = hlsl_swizzle_get_component(new_swizzle, j);
if (s == i)
- inverted |= j << (bit++ * 2);
+ hlsl_swizzle_set_component(&inverted, bit++, j);
}
}
@@ -2129,22 +2113,22 @@ static bool invert_swizzle(uint32_t *swizzle, unsigned int *writemask, unsigned
return true;
}
-static bool invert_swizzle_matrix(uint32_t *swizzle, unsigned int *writemask, unsigned int *ret_width)
+static bool invert_swizzle_matrix(const struct hlsl_matrix_swizzle *swizzle,
+ uint32_t *ret_inverted, unsigned int *writemask, unsigned int *ret_width)
{
- /* swizzle is 8 bits per component, each component is (from LSB) 4 bits X, then 4 bits Y.
- * components are indexed by their sources. i.e. the first component comes from the first
- * component of the rhs. */
- unsigned int i, j, bit = 0, inverted = 0, width, new_writemask = 0, new_swizzle = 0;
+ unsigned int i, j, bit = 0, inverted = 0, width, new_writemask = 0;
+ struct hlsl_matrix_swizzle new_swizzle = {0};
/* First, we filter the swizzle to remove components that aren't enabled by writemask. */
for (i = 0; i < 4; ++i)
{
if (*writemask & (1 << i))
{
- unsigned int s = (*swizzle >> (i * 8)) & 0xff;
- unsigned int x = s & 0xf, y = (s >> 4) & 0xf;
+ unsigned int x = swizzle->components[i].x;
+ unsigned int y = swizzle->components[i].y;
unsigned int idx = x + y * 4;
- new_swizzle |= s << (bit++ * 8);
+
+ new_swizzle.components[bit++] = swizzle->components[i];
if (new_writemask & (1 << idx))
return false;
new_writemask |= 1 << idx;
@@ -2152,22 +2136,22 @@ static bool invert_swizzle_matrix(uint32_t *swizzle, unsigned int *writemask, un
}
width = bit;
- /* Then we invert the swizzle. The resulting swizzle has 2 bits per component, because it's for the
- * incoming vector. */
+ /* Then we invert the swizzle. The resulting swizzle uses a uint32_t
+ * vector format, because it's for the incoming vector. */
bit = 0;
for (i = 0; i < 16; ++i)
{
for (j = 0; j < width; ++j)
{
- unsigned int s = (new_swizzle >> (j * 8)) & 0xff;
- unsigned int x = s & 0xf, y = (s >> 4) & 0xf;
+ unsigned int x = new_swizzle.components[j].x;
+ unsigned int y = new_swizzle.components[j].y;
unsigned int idx = x + y * 4;
if (idx == i)
- inverted |= j << (bit++ * 2);
+ hlsl_swizzle_set_component(&inverted, bit++, j);
}
}
- *swizzle = inverted;
+ *ret_inverted = inverted;
*writemask = new_writemask;
*ret_width = width;
return true;
@@ -2221,28 +2205,34 @@ static bool add_assignment(struct hlsl_ctx *ctx, struct hlsl_block *block, struc
{
struct hlsl_ir_swizzle *swizzle = hlsl_ir_swizzle(lhs);
struct hlsl_ir_node *new_swizzle;
- uint32_t s = swizzle->swizzle;
+ uint32_t s;
VKD3D_ASSERT(!matrix_writemask);
if (swizzle->val.node->data_type->class == HLSL_CLASS_MATRIX)
{
+ struct hlsl_matrix_swizzle ms = swizzle->u.matrix;
+
if (swizzle->val.node->type != HLSL_IR_LOAD && swizzle->val.node->type != HLSL_IR_INDEX)
{
hlsl_fixme(ctx, &lhs->loc, "Unhandled source of matrix swizzle.");
return false;
}
- if (!invert_swizzle_matrix(&s, &writemask, &width))
+ if (!invert_swizzle_matrix(&ms, &s, &writemask, &width))
{
hlsl_error(ctx, &lhs->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_WRITEMASK, "Invalid writemask for matrix.");
return false;
}
matrix_writemask = true;
}
- else if (!invert_swizzle(&s, &writemask, &width))
+ else
{
- hlsl_error(ctx, &lhs->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_WRITEMASK, "Invalid writemask.");
- return false;
+ s = swizzle->u.vector;
+ if (!invert_swizzle(&s, &writemask, &width))
+ {
+ hlsl_error(ctx, &lhs->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_WRITEMASK, "Invalid writemask.");
+ return false;
+ }
}
if (!(new_swizzle = hlsl_new_swizzle(ctx, s, width, rhs, &swizzle->node.loc)))
@@ -8831,25 +8821,25 @@ if_body:
loop_statement:
attribute_list_optional loop_scope_start KW_WHILE '(' expr ')' statement
{
- $$ = create_loop(ctx, LOOP_WHILE, &$1, NULL, $5, NULL, $7, &@3);
+ $$ = create_loop(ctx, HLSL_LOOP_WHILE, &$1, NULL, $5, NULL, $7, &@3);
hlsl_pop_scope(ctx);
cleanup_parse_attribute_list(&$1);
}
| attribute_list_optional loop_scope_start KW_DO statement KW_WHILE '(' expr ')' ';'
{
- $$ = create_loop(ctx, LOOP_DO_WHILE, &$1, NULL, $7, NULL, $4, &@3);
+ $$ = create_loop(ctx, HLSL_LOOP_DO_WHILE, &$1, NULL, $7, NULL, $4, &@3);
hlsl_pop_scope(ctx);
cleanup_parse_attribute_list(&$1);
}
| attribute_list_optional loop_scope_start KW_FOR '(' expr_statement expr_statement expr_optional ')' statement
{
- $$ = create_loop(ctx, LOOP_FOR, &$1, $5, $6, $7, $9, &@3);
+ $$ = create_loop(ctx, HLSL_LOOP_FOR, &$1, $5, $6, $7, $9, &@3);
hlsl_pop_scope(ctx);
cleanup_parse_attribute_list(&$1);
}
| attribute_list_optional loop_scope_start KW_FOR '(' declaration expr_statement expr_optional ')' statement
{
- $$ = create_loop(ctx, LOOP_FOR, &$1, $5, $6, $7, $9, &@3);
+ $$ = create_loop(ctx, HLSL_LOOP_FOR, &$1, $5, $6, $7, $9, &@3);
hlsl_pop_scope(ctx);
cleanup_parse_attribute_list(&$1);
}
diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c
index e6924aa70ef..c3c8e5d55b3 100644
--- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c
+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c
@@ -1076,7 +1076,7 @@ static bool lower_matrix_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *ins
struct hlsl_deref var_deref;
struct hlsl_type *matrix_type;
struct hlsl_ir_var *var;
- unsigned int x, y, k, i;
+ unsigned int k, i;
if (instr->type != HLSL_IR_SWIZZLE)
return false;
@@ -1094,9 +1094,7 @@ static bool lower_matrix_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *ins
struct hlsl_block store_block;
struct hlsl_ir_node *load;
- y = (swizzle->swizzle >> (8 * i + 4)) & 0xf;
- x = (swizzle->swizzle >> 8 * i) & 0xf;
- k = y * matrix_type->dimx + x;
+ k = swizzle->u.matrix.components[i].y * matrix_type->dimx + swizzle->u.matrix.components[i].x;
if (!(load = hlsl_add_load_component(ctx, block, swizzle->val.node, k, &instr->loc)))
return false;
@@ -1359,8 +1357,10 @@ struct copy_propagation_var_def
struct copy_propagation_state
{
- struct rb_tree var_defs;
- struct copy_propagation_state *parent;
+ struct rb_tree *scope_var_defs;
+ size_t scope_count, scopes_capacity;
+ struct hlsl_ir_node *stop;
+ bool stopped;
};
static int copy_propagation_var_def_compare(const void *key, const struct rb_entry *entry)
@@ -1382,6 +1382,38 @@ static void copy_propagation_var_def_destroy(struct rb_entry *entry, void *conte
vkd3d_free(var_def);
}
+static size_t copy_propagation_push_scope(struct copy_propagation_state *state, struct hlsl_ctx *ctx)
+{
+ if (!(hlsl_array_reserve(ctx, (void **)&state->scope_var_defs, &state->scopes_capacity,
+ state->scope_count + 1, sizeof(*state->scope_var_defs))))
+ return false;
+
+ rb_init(&state->scope_var_defs[state->scope_count++], copy_propagation_var_def_compare);
+
+ return state->scope_count;
+}
+
+static size_t copy_propagation_pop_scope(struct copy_propagation_state *state)
+{
+ rb_destroy(&state->scope_var_defs[--state->scope_count], copy_propagation_var_def_destroy, NULL);
+
+ return state->scope_count;
+}
+
+static bool copy_propagation_state_init(struct copy_propagation_state *state, struct hlsl_ctx *ctx)
+{
+ memset(state, 0, sizeof(*state));
+
+ return copy_propagation_push_scope(state, ctx);
+}
+
+static void copy_propagation_state_destroy(struct copy_propagation_state *state)
+{
+ while (copy_propagation_pop_scope(state));
+
+ vkd3d_free(state->scope_var_defs);
+}
+
static struct copy_propagation_value *copy_propagation_get_value_at_time(
struct copy_propagation_component_trace *trace, unsigned int time)
{
@@ -1399,9 +1431,10 @@ static struct copy_propagation_value *copy_propagation_get_value_at_time(
static struct copy_propagation_value *copy_propagation_get_value(const struct copy_propagation_state *state,
const struct hlsl_ir_var *var, unsigned int component, unsigned int time)
{
- for (; state; state = state->parent)
+ for (size_t i = state->scope_count - 1; i < state->scope_count; i--)
{
- struct rb_entry *entry = rb_get(&state->var_defs, var);
+ struct rb_tree *tree = &state->scope_var_defs[i];
+ struct rb_entry *entry = rb_get(tree, var);
if (entry)
{
struct copy_propagation_var_def *var_def = RB_ENTRY_VALUE(entry, struct copy_propagation_var_def, entry);
@@ -1427,7 +1460,8 @@ static struct copy_propagation_value *copy_propagation_get_value(const struct co
static struct copy_propagation_var_def *copy_propagation_create_var_def(struct hlsl_ctx *ctx,
struct copy_propagation_state *state, struct hlsl_ir_var *var)
{
- struct rb_entry *entry = rb_get(&state->var_defs, var);
+ struct rb_tree *tree = &state->scope_var_defs[state->scope_count - 1];
+ struct rb_entry *entry = rb_get(tree, var);
struct copy_propagation_var_def *var_def;
unsigned int component_count = hlsl_type_component_count(var->data_type);
int res;
@@ -1440,7 +1474,7 @@ static struct copy_propagation_var_def *copy_propagation_create_var_def(struct h
var_def->var = var;
- res = rb_put(&state->var_defs, var, &var_def->entry);
+ res = rb_put(tree, var, &var_def->entry);
VKD3D_ASSERT(!res);
return var_def;
@@ -1597,7 +1631,7 @@ static bool copy_propagation_replace_with_single_instr(struct hlsl_ctx *ctx,
var->name, start, start + count, debug_hlsl_swizzle(swizzle, instr_component_count));
return false;
}
- ret_swizzle |= value->component << HLSL_SWIZZLE_SHIFT(i);
+ hlsl_swizzle_set_component(&ret_swizzle, i, value->component);
}
TRACE("Load from %s[%u-%u]%s propagated as instruction %p%s.\n",
@@ -1721,10 +1755,10 @@ static bool copy_propagation_transform_swizzle(struct hlsl_ctx *ctx,
return false;
load = hlsl_ir_load(swizzle->val.node);
- if (copy_propagation_replace_with_constant_vector(ctx, state, load, swizzle->swizzle, &swizzle->node))
+ if (copy_propagation_replace_with_constant_vector(ctx, state, load, swizzle->u.vector, &swizzle->node))
return true;
- if (copy_propagation_replace_with_single_instr(ctx, state, load, swizzle->swizzle, &swizzle->node))
+ if (copy_propagation_replace_with_single_instr(ctx, state, load, swizzle->u.vector, &swizzle->node))
return true;
return false;
@@ -1820,18 +1854,6 @@ static void copy_propagation_record_store(struct hlsl_ctx *ctx, struct hlsl_ir_s
}
}
-static void copy_propagation_state_init(struct hlsl_ctx *ctx, struct copy_propagation_state *state,
- struct copy_propagation_state *parent)
-{
- rb_init(&state->var_defs, copy_propagation_var_def_compare);
- state->parent = parent;
-}
-
-static void copy_propagation_state_destroy(struct copy_propagation_state *state)
-{
- rb_destroy(&state->var_defs, copy_propagation_var_def_destroy, NULL);
-}
-
static void copy_propagation_invalidate_from_block(struct hlsl_ctx *ctx, struct copy_propagation_state *state,
struct hlsl_block *block, unsigned int time)
{
@@ -1900,16 +1922,19 @@ static bool copy_propagation_transform_block(struct hlsl_ctx *ctx, struct hlsl_b
static bool copy_propagation_process_if(struct hlsl_ctx *ctx, struct hlsl_ir_if *iff,
struct copy_propagation_state *state)
{
- struct copy_propagation_state inner_state;
bool progress = false;
- copy_propagation_state_init(ctx, &inner_state, state);
- progress |= copy_propagation_transform_block(ctx, &iff->then_block, &inner_state);
- copy_propagation_state_destroy(&inner_state);
+ copy_propagation_push_scope(state, ctx);
+ progress |= copy_propagation_transform_block(ctx, &iff->then_block, state);
+ if (state->stopped)
+ return progress;
+ copy_propagation_pop_scope(state);
- copy_propagation_state_init(ctx, &inner_state, state);
- progress |= copy_propagation_transform_block(ctx, &iff->else_block, &inner_state);
- copy_propagation_state_destroy(&inner_state);
+ copy_propagation_push_scope(state, ctx);
+ progress |= copy_propagation_transform_block(ctx, &iff->else_block, state);
+ if (state->stopped)
+ return progress;
+ copy_propagation_pop_scope(state);
/* Ideally we'd invalidate the outer state looking at what was
* touched in the two inner states, but this doesn't work for
@@ -1924,14 +1949,16 @@ static bool copy_propagation_process_if(struct hlsl_ctx *ctx, struct hlsl_ir_if
static bool copy_propagation_process_loop(struct hlsl_ctx *ctx, struct hlsl_ir_loop *loop,
struct copy_propagation_state *state)
{
- struct copy_propagation_state inner_state;
bool progress = false;
copy_propagation_invalidate_from_block(ctx, state, &loop->body, loop->node.index);
+ copy_propagation_invalidate_from_block(ctx, state, &loop->iter, loop->node.index);
- copy_propagation_state_init(ctx, &inner_state, state);
- progress |= copy_propagation_transform_block(ctx, &loop->body, &inner_state);
- copy_propagation_state_destroy(&inner_state);
+ copy_propagation_push_scope(state, ctx);
+ progress |= copy_propagation_transform_block(ctx, &loop->body, state);
+ if (state->stopped)
+ return progress;
+ copy_propagation_pop_scope(state);
return progress;
}
@@ -1939,15 +1966,16 @@ static bool copy_propagation_process_loop(struct hlsl_ctx *ctx, struct hlsl_ir_l
static bool copy_propagation_process_switch(struct hlsl_ctx *ctx, struct hlsl_ir_switch *s,
struct copy_propagation_state *state)
{
- struct copy_propagation_state inner_state;
struct hlsl_ir_switch_case *c;
bool progress = false;
LIST_FOR_EACH_ENTRY(c, &s->cases, struct hlsl_ir_switch_case, entry)
{
- copy_propagation_state_init(ctx, &inner_state, state);
- progress |= copy_propagation_transform_block(ctx, &c->body, &inner_state);
- copy_propagation_state_destroy(&inner_state);
+ copy_propagation_push_scope(state, ctx);
+ progress |= copy_propagation_transform_block(ctx, &c->body, state);
+ if (state->stopped)
+ return progress;
+ copy_propagation_pop_scope(state);
}
LIST_FOR_EACH_ENTRY(c, &s->cases, struct hlsl_ir_switch_case, entry)
@@ -1966,6 +1994,12 @@ static bool copy_propagation_transform_block(struct hlsl_ctx *ctx, struct hlsl_b
LIST_FOR_EACH_ENTRY_SAFE(instr, next, &block->instrs, struct hlsl_ir_node, entry)
{
+ if (instr == state->stop)
+ {
+ state->stopped = true;
+ return progress;
+ }
+
switch (instr->type)
{
case HLSL_IR_LOAD:
@@ -2003,6 +2037,9 @@ static bool copy_propagation_transform_block(struct hlsl_ctx *ctx, struct hlsl_b
default:
break;
}
+
+ if (state->stopped)
+ return progress;
}
return progress;
@@ -2015,7 +2052,7 @@ bool hlsl_copy_propagation_execute(struct hlsl_ctx *ctx, struct hlsl_block *bloc
index_instructions(block, 2);
- copy_propagation_state_init(ctx, &state, NULL);
+ copy_propagation_state_init(&state, ctx);
progress = copy_propagation_transform_block(ctx, block, &state);
@@ -2403,8 +2440,8 @@ static bool fold_swizzle_chains(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr
struct hlsl_ir_node *new_swizzle;
uint32_t combined_swizzle;
- combined_swizzle = hlsl_combine_swizzles(hlsl_ir_swizzle(next_instr)->swizzle,
- swizzle->swizzle, instr->data_type->dimx);
+ combined_swizzle = hlsl_combine_swizzles(hlsl_ir_swizzle(next_instr)->u.vector,
+ swizzle->u.vector, instr->data_type->dimx);
next_instr = hlsl_ir_swizzle(next_instr)->val.node;
if (!(new_swizzle = hlsl_new_swizzle(ctx, combined_swizzle, instr->data_type->dimx, next_instr, &instr->loc)))
@@ -2431,7 +2468,7 @@ static bool remove_trivial_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *i
return false;
for (i = 0; i < instr->data_type->dimx; ++i)
- if (hlsl_swizzle_get_component(swizzle->swizzle, i) != i)
+ if (hlsl_swizzle_get_component(swizzle->u.vector, i) != i)
return false;
hlsl_replace_node(instr, swizzle->val.node);
@@ -6569,6 +6606,7 @@ void hlsl_run_const_passes(struct hlsl_ctx *ctx, struct hlsl_block *body)
{
progress = hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, body, NULL);
progress |= hlsl_transform_ir(ctx, hlsl_fold_constant_identities, body, NULL);
+ progress |= hlsl_transform_ir(ctx, hlsl_normalize_binary_exprs, body, NULL);
progress |= hlsl_transform_ir(ctx, hlsl_fold_constant_swizzles, body, NULL);
progress |= hlsl_copy_propagation_execute(ctx, body);
progress |= hlsl_transform_ir(ctx, fold_swizzle_chains, body, NULL);
@@ -6786,7 +6824,6 @@ static uint32_t generate_vsir_get_src_swizzle(uint32_t src_writemask, uint32_t d
swizzle = hlsl_swizzle_from_writemask(src_writemask);
swizzle = hlsl_map_swizzle(swizzle, dst_writemask);
- swizzle = vsir_swizzle_from_hlsl(swizzle);
return swizzle;
}
@@ -7855,9 +7892,8 @@ static void generate_vsir_instr_swizzle(struct hlsl_ctx *ctx,
dst_param->write_mask = instr->reg.writemask;
swizzle = hlsl_swizzle_from_writemask(val->reg.writemask);
- swizzle = hlsl_combine_swizzles(swizzle, swizzle_instr->swizzle, instr->data_type->dimx);
+ swizzle = hlsl_combine_swizzles(swizzle, swizzle_instr->u.vector, instr->data_type->dimx);
swizzle = hlsl_map_swizzle(swizzle, ins->dst[0].write_mask);
- swizzle = vsir_swizzle_from_hlsl(swizzle);
src_param = &ins->src[0];
VKD3D_ASSERT(val->type != HLSL_IR_CONSTANT);
@@ -8015,7 +8051,7 @@ static void sm1_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl
version.major = ctx->profile->major_version;
version.minor = ctx->profile->minor_version;
version.type = ctx->profile->type;
- if (!vsir_program_init(program, NULL, &version, 0, VSIR_CF_STRUCTURED, VSIR_NOT_NORMALISED))
+ if (!vsir_program_init(program, NULL, &version, 0, VSIR_CF_STRUCTURED, VSIR_NORMALISED_SM4))
{
ctx->result = VKD3D_ERROR_OUT_OF_MEMORY;
return;
@@ -9886,7 +9922,7 @@ static void sm4_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl
version.minor = ctx->profile->minor_version;
version.type = ctx->profile->type;
- if (!vsir_program_init(program, NULL, &version, 0, VSIR_CF_STRUCTURED, VSIR_NOT_NORMALISED))
+ if (!vsir_program_init(program, NULL, &version, 0, VSIR_CF_STRUCTURED, VSIR_NORMALISED_SM4))
{
ctx->result = VKD3D_ERROR_OUT_OF_MEMORY;
return;
@@ -9951,39 +9987,129 @@ static void sm4_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl
generate_vsir_scan_global_flags(ctx, program, func);
}
-static struct hlsl_ir_jump *loop_unrolling_find_jump(struct hlsl_block *block, struct hlsl_ir_node *stop_point,
- struct hlsl_block **found_block)
+static bool loop_unrolling_generate_const_bool_store(struct hlsl_ctx *ctx, struct hlsl_ir_var *var,
+ bool val, struct hlsl_block *block, struct vkd3d_shader_location *loc)
{
- struct hlsl_ir_node *node;
+ struct hlsl_ir_node *const_node, *store;
- LIST_FOR_EACH_ENTRY(node, &block->instrs, struct hlsl_ir_node, entry)
+ if (!(const_node = hlsl_new_bool_constant(ctx, val, loc)))
+ return false;
+ hlsl_block_add_instr(block, const_node);
+
+ if (!(store = hlsl_new_simple_store(ctx, var, const_node)))
+ return false;
+ hlsl_block_add_instr(block, store);
+
+ return true;
+}
+
+static bool loop_unrolling_remove_jumps_recurse(struct hlsl_ctx *ctx, struct hlsl_block *block,
+ struct hlsl_ir_var *loop_broken, struct hlsl_ir_var *loop_continued);
+
+static bool loop_unrolling_remove_jumps_visit(struct hlsl_ctx *ctx, struct hlsl_ir_node *node,
+ struct hlsl_ir_var *loop_broken, struct hlsl_ir_var *loop_continued)
+{
+ struct hlsl_ir_jump *jump;
+ struct hlsl_ir_var *var;
+ struct hlsl_block draft;
+ struct hlsl_ir_if *iff;
+
+ if (node->type == HLSL_IR_IF)
{
- if (node == stop_point)
- return NULL;
+ iff = hlsl_ir_if(node);
+ if (loop_unrolling_remove_jumps_recurse(ctx, &iff->then_block, loop_broken, loop_continued))
+ return true;
+ if (loop_unrolling_remove_jumps_recurse(ctx, &iff->else_block, loop_broken, loop_continued))
+ return true;
+ return false;
+ }
- if (node->type == HLSL_IR_IF)
- {
- struct hlsl_ir_if *iff = hlsl_ir_if(node);
- struct hlsl_ir_jump *jump = NULL;
+ if (node->type == HLSL_IR_JUMP)
+ {
+ jump = hlsl_ir_jump(node);
+ if (jump->type != HLSL_IR_JUMP_UNRESOLVED_CONTINUE && jump->type != HLSL_IR_JUMP_BREAK)
+ return false;
- if ((jump = loop_unrolling_find_jump(&iff->then_block, stop_point, found_block)))
- return jump;
- if ((jump = loop_unrolling_find_jump(&iff->else_block, stop_point, found_block)))
- return jump;
- }
- else if (node->type == HLSL_IR_JUMP)
- {
- struct hlsl_ir_jump *jump = hlsl_ir_jump(node);
+ hlsl_block_init(&draft);
- if (jump->type == HLSL_IR_JUMP_BREAK || jump->type == HLSL_IR_JUMP_CONTINUE)
- {
- *found_block = block;
- return jump;
- }
- }
+ if (jump->type == HLSL_IR_JUMP_UNRESOLVED_CONTINUE)
+ var = loop_continued;
+ else
+ var = loop_broken;
+
+ if (!loop_unrolling_generate_const_bool_store(ctx, var, true, &draft, &jump->node.loc))
+ return false;
+
+ list_move_before(&jump->node.entry, &draft.instrs);
+ list_remove(&jump->node.entry);
+ hlsl_free_instr(&jump->node);
+
+ return true;
}
- return NULL;
+ return false;
+}
+
+static struct hlsl_ir_if *loop_unrolling_generate_var_check(struct hlsl_ctx *ctx,
+ struct hlsl_block *dst, struct hlsl_ir_var *var, struct vkd3d_shader_location *loc)
+{
+ struct hlsl_ir_node *cond, *iff;
+ struct hlsl_block then_block;
+ struct hlsl_ir_load *load;
+
+ hlsl_block_init(&then_block);
+
+ if (!(load = hlsl_new_var_load(ctx, var, loc)))
+ return NULL;
+ hlsl_block_add_instr(dst, &load->node);
+
+ if (!(cond = hlsl_new_unary_expr(ctx, HLSL_OP1_LOGIC_NOT, &load->node, loc)))
+ return NULL;
+ hlsl_block_add_instr(dst, cond);
+
+ if (!(iff = hlsl_new_if(ctx, cond, &then_block, NULL, loc)))
+ return NULL;
+ hlsl_block_add_instr(dst, iff);
+
+ return hlsl_ir_if(iff);
+}
+
+static bool loop_unrolling_remove_jumps_recurse(struct hlsl_ctx *ctx, struct hlsl_block *block,
+ struct hlsl_ir_var *loop_broken, struct hlsl_ir_var *loop_continued)
+{
+ struct hlsl_ir_node *node, *next;
+
+ LIST_FOR_EACH_ENTRY_SAFE(node, next, &block->instrs, struct hlsl_ir_node, entry)
+ {
+ struct hlsl_ir_if *broken_check, *continued_check;
+ struct hlsl_block draft;
+
+ if (!loop_unrolling_remove_jumps_visit(ctx, node, loop_broken, loop_continued))
+ continue;
+
+ if (&next->entry == &block->instrs)
+ return true;
+
+ hlsl_block_init(&draft);
+
+ broken_check = loop_unrolling_generate_var_check(ctx, &draft, loop_broken, &next->loc);
+ continued_check = loop_unrolling_generate_var_check(ctx,
+ &broken_check->then_block, loop_continued, &next->loc);
+
+ list_move_before(&next->entry, &draft.instrs);
+
+ list_move_slice_tail(&continued_check->then_block.instrs, &next->entry, list_tail(&block->instrs));
+
+ return true;
+ }
+
+ return false;
+}
+
+static void loop_unrolling_remove_jumps(struct hlsl_ctx *ctx, struct hlsl_block *block,
+ struct hlsl_ir_var *loop_broken, struct hlsl_ir_var *loop_continued)
+{
+ while (loop_unrolling_remove_jumps_recurse(ctx, block, loop_broken, loop_continued));
}
static unsigned int loop_unrolling_get_max_iterations(struct hlsl_ctx *ctx, struct hlsl_ir_loop *loop)
@@ -9993,7 +10119,7 @@ static unsigned int loop_unrolling_get_max_iterations(struct hlsl_ctx *ctx, stru
return loop->unroll_limit;
/* All SMs will default to 1024 if [unroll] has been specified without an explicit limit. */
- if (loop->unroll_type == HLSL_IR_LOOP_FORCE_UNROLL)
+ if (loop->unroll_type == HLSL_LOOP_FORCE_UNROLL)
return 1024;
/* SM4 limits implicit unrolling to 254 iterations. */
@@ -10004,167 +10130,279 @@ static unsigned int loop_unrolling_get_max_iterations(struct hlsl_ctx *ctx, stru
return 1024;
}
-static bool loop_unrolling_unroll_loop(struct hlsl_ctx *ctx, struct hlsl_block *block,
- struct hlsl_block *loop_parent, struct hlsl_ir_loop *loop)
+static void loop_unrolling_simplify(struct hlsl_ctx *ctx, struct hlsl_block *block,
+ struct copy_propagation_state *state, unsigned int *index)
{
- unsigned int max_iterations, i;
+ size_t scopes_depth = state->scope_count - 1;
+ unsigned int current_index;
+ bool progress;
+
+ do
+ {
+ state->stopped = false;
+ for (size_t i = state->scope_count; scopes_depth < i; --i)
+ copy_propagation_pop_scope(state);
+ copy_propagation_push_scope(state, ctx);
+
+ progress = hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, block, NULL);
+ progress |= hlsl_transform_ir(ctx, hlsl_fold_constant_identities, block, NULL);
+ progress |= hlsl_transform_ir(ctx, hlsl_fold_constant_swizzles, block, NULL);
+
+ current_index = index_instructions(block, *index);
+ progress |= copy_propagation_transform_block(ctx, block, state);
+
+ progress |= hlsl_transform_ir(ctx, fold_swizzle_chains, block, NULL);
+ progress |= hlsl_transform_ir(ctx, remove_trivial_swizzles, block, NULL);
+ progress |= hlsl_transform_ir(ctx, remove_trivial_conditional_branches, block, NULL);
+ } while (progress);
+
+ *index = current_index;
+}
+
+static bool loop_unrolling_check_val(struct copy_propagation_state *state, struct hlsl_ir_var *var)
+{
+ struct copy_propagation_value *v;
+
+ if (!(v = copy_propagation_get_value(state, var, 0, UINT_MAX))
+ || v->node->type != HLSL_IR_CONSTANT)
+ return false;
+
+ return hlsl_ir_constant(v->node)->value.u[0].u;
+}
+
+static bool loop_unrolling_unroll_loop(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_loop *loop)
+{
+ struct hlsl_block draft, tmp_dst, loop_body;
+ struct hlsl_ir_var *broken, *continued;
+ unsigned int max_iterations, i, index;
+ struct copy_propagation_state state;
+ struct hlsl_ir_if *target_if;
+
+ if (!(broken = hlsl_new_synthetic_var(ctx, "broken",
+ hlsl_get_scalar_type(ctx, HLSL_TYPE_BOOL), &loop->node.loc)))
+ goto fail;
+
+ if (!(continued = hlsl_new_synthetic_var(ctx, "continued",
+ hlsl_get_scalar_type(ctx, HLSL_TYPE_BOOL), &loop->node.loc)))
+ goto fail;
+
+ hlsl_block_init(&draft);
+ hlsl_block_init(&tmp_dst);
max_iterations = loop_unrolling_get_max_iterations(ctx, loop);
+ copy_propagation_state_init(&state, ctx);
+ index = 2;
+ state.stop = &loop->node;
+ loop_unrolling_simplify(ctx, block, &state, &index);
+ state.stopped = false;
+ index = loop->node.index;
+
+ if (!loop_unrolling_generate_const_bool_store(ctx, broken, false, &tmp_dst, &loop->node.loc))
+ goto fail;
+ hlsl_block_add_block(&draft, &tmp_dst);
+
+ if (!loop_unrolling_generate_const_bool_store(ctx, continued, false, &tmp_dst, &loop->node.loc))
+ goto fail;
+ hlsl_block_add_block(&draft, &tmp_dst);
+
+ if (!(target_if = loop_unrolling_generate_var_check(ctx, &tmp_dst, broken, &loop->node.loc)))
+ goto fail;
+ state.stop = LIST_ENTRY(list_head(&tmp_dst.instrs), struct hlsl_ir_node, entry);
+ hlsl_block_add_block(&draft, &tmp_dst);
+
+ copy_propagation_push_scope(&state, ctx);
+ loop_unrolling_simplify(ctx, &draft, &state, &index);
+
+ /* As an optimization, we only remove jumps from the loop's body once. */
+ if (!hlsl_clone_block(ctx, &loop_body, &loop->body))
+ goto fail;
+ loop_unrolling_remove_jumps(ctx, &loop_body, broken, continued);
for (i = 0; i < max_iterations; ++i)
{
- struct hlsl_block tmp_dst, *jump_block;
- struct hlsl_ir_jump *jump = NULL;
+ copy_propagation_push_scope(&state, ctx);
- if (!hlsl_clone_block(ctx, &tmp_dst, &loop->body))
- return false;
- list_move_before(&loop->node.entry, &tmp_dst.instrs);
- hlsl_block_cleanup(&tmp_dst);
+ if (!loop_unrolling_generate_const_bool_store(ctx, continued, false, &tmp_dst, &loop->node.loc))
+ goto fail;
+ hlsl_block_add_block(&target_if->then_block, &tmp_dst);
- hlsl_run_const_passes(ctx, block);
+ if (!hlsl_clone_block(ctx, &tmp_dst, &loop_body))
+ goto fail;
+ hlsl_block_add_block(&target_if->then_block, &tmp_dst);
- if ((jump = loop_unrolling_find_jump(loop_parent, &loop->node, &jump_block)))
- {
- enum hlsl_ir_jump_type type = jump->type;
+ loop_unrolling_simplify(ctx, &target_if->then_block, &state, &index);
- if (jump_block != loop_parent)
- {
- if (loop->unroll_type == HLSL_IR_LOOP_FORCE_UNROLL)
- hlsl_error(ctx, &jump->node.loc, VKD3D_SHADER_ERROR_HLSL_FAILED_FORCED_UNROLL,
- "Unable to unroll loop, unrolling loops with conditional jumps is currently not supported.");
- return false;
- }
+ if (loop_unrolling_check_val(&state, broken))
+ break;
- list_move_slice_tail(&tmp_dst.instrs, &jump->node.entry, list_prev(&loop_parent->instrs, &loop->node.entry));
- hlsl_block_cleanup(&tmp_dst);
+ if (!(target_if = loop_unrolling_generate_var_check(ctx, &tmp_dst, broken, &loop->node.loc)))
+ goto fail;
+ hlsl_block_add_block(&draft, &tmp_dst);
- if (type == HLSL_IR_JUMP_BREAK)
- break;
- }
- }
+ if (!hlsl_clone_block(ctx, &tmp_dst, &loop->iter))
+ goto fail;
+ hlsl_block_add_block(&target_if->then_block, &tmp_dst);
+ }
/* Native will not emit an error if max_iterations has been reached with an
* explicit limit. It also will not insert a loop if there are iterations left
* i.e [unroll(4)] for (i = 0; i < 8; ++i)) */
if (!loop->unroll_limit && i == max_iterations)
{
- if (loop->unroll_type == HLSL_IR_LOOP_FORCE_UNROLL)
+ if (loop->unroll_type == HLSL_LOOP_FORCE_UNROLL)
hlsl_error(ctx, &loop->node.loc, VKD3D_SHADER_ERROR_HLSL_FAILED_FORCED_UNROLL,
"Unable to unroll loop, maximum iterations reached (%u).", max_iterations);
- return false;
+ goto fail;
}
+ hlsl_block_cleanup(&loop_body);
+ copy_propagation_state_destroy(&state);
+
+ list_move_before(&loop->node.entry, &draft.instrs);
+ hlsl_block_cleanup(&draft);
list_remove(&loop->node.entry);
hlsl_free_instr(&loop->node);
return true;
+
+fail:
+ hlsl_block_cleanup(&loop_body);
+ copy_propagation_state_destroy(&state);
+ hlsl_block_cleanup(&draft);
+
+ return false;
}
-/*
- * loop_unrolling_find_unrollable_loop() is not the normal way to do things;
- * normal passes simply iterate over the whole block and apply a transformation
- * to every relevant instruction. However, loop unrolling can fail, and we want
- * to leave the loop in its previous state in that case. That isn't a problem by
- * itself, except that loop unrolling needs copy-prop in order to work properly,
- * and copy-prop state at the time of the loop depends on the rest of the program
- * up to that point. This means we need to clone the whole program, and at that
- * point we have to search it again anyway to find the clone of the loop we were
- * going to unroll.
- *
- * FIXME: Ideally we wouldn't clone the whole program; instead we would run copyprop
- * up until the loop instruction, clone just that loop, then use copyprop again
- * with the saved state after unrolling. However, copyprop currently isn't built
- * for that yet [notably, it still relies on indices]. Note also this still doesn't
- * really let us use transform_ir() anyway [since we don't have a good way to say
- * "copyprop from the beginning of the program up to the instruction we're
- * currently processing" from the callback]; we'd have to use a dedicated
- * recursive function instead. */
-static struct hlsl_ir_loop *loop_unrolling_find_unrollable_loop(struct hlsl_ctx *ctx, struct hlsl_block *block,
- struct hlsl_block **containing_block)
+static bool unroll_loops(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, void *context)
{
- struct hlsl_ir_node *instr;
+ struct hlsl_block *program = context;
+ struct hlsl_ir_loop *loop;
- LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry)
+ if (node->type != HLSL_IR_LOOP)
+ return true;
+
+ loop = hlsl_ir_loop(node);
+
+ if (loop->unroll_type != HLSL_LOOP_UNROLL && loop->unroll_type != HLSL_LOOP_FORCE_UNROLL)
+ return true;
+
+ if (!loop_unrolling_unroll_loop(ctx, program, loop))
+ loop->unroll_type = HLSL_LOOP_FORCE_LOOP;
+
+ return true;
+}
+
+/* We could handle this at parse time. However, loop unrolling often needs to
+ * know the value of variables modified in the "iter" block. It is possible to
+ * detect that all exit paths of a loop body modify such variables in the same
+ * way, but difficult, and d3dcompiler does not attempt to do so.
+ * In fact, d3dcompiler is capable of unrolling the following loop:
+ * for (int i = 0; i < 10; ++i)
+ * {
+ * if (some_uniform > 4)
+ * continue;
+ * }
+ * but cannot unroll the same loop with "++i" moved to each exit path:
+ * for (int i = 0; i < 10;)
+ * {
+ * if (some_uniform > 4)
+ * {
+ * ++i;
+ * continue;
+ * }
+ * ++i;
+ * }
+ */
+static bool resolve_loops(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, void *context)
+{
+ struct hlsl_ir_loop *loop;
+
+ if (node->type != HLSL_IR_LOOP)
+ return true;
+
+ loop = hlsl_ir_loop(node);
+
+ hlsl_block_add_block(&loop->body, &loop->iter);
+ return true;
+}
+
+static void resolve_continues(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_loop *last_loop)
+{
+ struct hlsl_ir_node *node;
+
+ LIST_FOR_EACH_ENTRY(node, &block->instrs, struct hlsl_ir_node, entry)
{
- switch (instr->type)
+ switch (node->type)
{
case HLSL_IR_LOOP:
{
- struct hlsl_ir_loop *nested_loop;
- struct hlsl_ir_loop *loop = hlsl_ir_loop(instr);
-
- if ((nested_loop = loop_unrolling_find_unrollable_loop(ctx, &loop->body, containing_block)))
- return nested_loop;
-
- if (loop->unroll_type == HLSL_IR_LOOP_UNROLL || loop->unroll_type == HLSL_IR_LOOP_FORCE_UNROLL)
- {
- *containing_block = block;
- return loop;
- }
+ struct hlsl_ir_loop *loop = hlsl_ir_loop(node);
+ resolve_continues(ctx, &loop->body, loop);
break;
}
case HLSL_IR_IF:
{
- struct hlsl_ir_loop *loop;
- struct hlsl_ir_if *iff = hlsl_ir_if(instr);
-
- if ((loop = loop_unrolling_find_unrollable_loop(ctx, &iff->then_block, containing_block)))
- return loop;
- if ((loop = loop_unrolling_find_unrollable_loop(ctx, &iff->else_block, containing_block)))
- return loop;
-
+ struct hlsl_ir_if *iff = hlsl_ir_if(node);
+ resolve_continues(ctx, &iff->then_block, last_loop);
+ resolve_continues(ctx, &iff->else_block, last_loop);
break;
}
case HLSL_IR_SWITCH:
{
- struct hlsl_ir_switch *s = hlsl_ir_switch(instr);
+ struct hlsl_ir_switch *s = hlsl_ir_switch(node);
struct hlsl_ir_switch_case *c;
- struct hlsl_ir_loop *loop;
LIST_FOR_EACH_ENTRY(c, &s->cases, struct hlsl_ir_switch_case, entry)
{
- if ((loop = loop_unrolling_find_unrollable_loop(ctx, &c->body, containing_block)))
- return loop;
+ resolve_continues(ctx, &c->body, last_loop);
}
break;
}
+ case HLSL_IR_JUMP:
+ {
+ struct hlsl_ir_jump *jump = hlsl_ir_jump(node);
+
+ if (jump->type != HLSL_IR_JUMP_UNRESOLVED_CONTINUE)
+ break;
+
+ if (last_loop->type == HLSL_LOOP_FOR)
+ {
+ struct hlsl_block draft;
+
+ if (!hlsl_clone_block(ctx, &draft, &last_loop->iter))
+ return;
+
+ list_move_before(&node->entry, &draft.instrs);
+ hlsl_block_cleanup(&draft);
+ }
+
+ jump->type = HLSL_IR_JUMP_CONTINUE;
+ break;
+ }
default:
break;
}
}
-
- return NULL;
}
-static void transform_unroll_loops(struct hlsl_ctx *ctx, struct hlsl_block *block)
+static void loop_unrolling_execute(struct hlsl_ctx *ctx, struct hlsl_block *block)
{
- while (true)
- {
- struct hlsl_block clone, *containing_block;
- struct hlsl_ir_loop *loop, *cloned_loop;
-
- if (!(loop = loop_unrolling_find_unrollable_loop(ctx, block, &containing_block)))
- return;
-
- if (!hlsl_clone_block(ctx, &clone, block))
- return;
-
- cloned_loop = loop_unrolling_find_unrollable_loop(ctx, &clone, &containing_block);
- VKD3D_ASSERT(cloned_loop);
+ bool progress;
- if (!loop_unrolling_unroll_loop(ctx, &clone, containing_block, cloned_loop))
- {
- hlsl_block_cleanup(&clone);
- loop->unroll_type = HLSL_IR_LOOP_FORCE_LOOP;
- continue;
- }
+ /* These are required by copy propagation, which in turn is required for
+ * unrolling. */
+ do
+ {
+ progress = hlsl_transform_ir(ctx, split_array_copies, block, NULL);
+ progress |= hlsl_transform_ir(ctx, split_struct_copies, block, NULL);
+ } while (progress);
+ hlsl_transform_ir(ctx, split_matrix_copies, block, NULL);
- hlsl_block_cleanup(block);
- hlsl_block_init(block);
- hlsl_block_add_block(block, &clone);
- }
+ hlsl_transform_ir(ctx, unroll_loops, block, block);
+ resolve_continues(ctx, block, NULL);
+ hlsl_transform_ir(ctx, resolve_loops, block, NULL);
}
static bool lower_f16tof32(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, struct hlsl_block *block)
@@ -10413,7 +10651,7 @@ static void process_entry_function(struct hlsl_ctx *ctx,
hlsl_transform_ir(ctx, lower_discard_nz, body, NULL);
}
- transform_unroll_loops(ctx, body);
+ loop_unrolling_execute(ctx, body);
hlsl_run_const_passes(ctx, body);
remove_unreachable_code(ctx, body);
diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c
index 716adb15f08..cd7cd2fe6a3 100644
--- a/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c
+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c
@@ -220,7 +220,9 @@ static bool fold_cast(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst,
break;
case HLSL_TYPE_BOOL:
- /* Casts to bool should have already been lowered. */
+ dst->u[k].u = u ? ~0u : 0u;
+ break;
+
default:
vkd3d_unreachable();
}
@@ -1544,6 +1546,149 @@ bool hlsl_fold_constant_identities(struct hlsl_ctx *ctx, struct hlsl_ir_node *in
return false;
}
+static bool is_op_associative(enum hlsl_ir_expr_op op, enum hlsl_base_type type)
+{
+ switch (op)
+ {
+ case HLSL_OP2_ADD:
+ case HLSL_OP2_MUL:
+ return type == HLSL_TYPE_INT || type == HLSL_TYPE_UINT;
+
+ case HLSL_OP2_BIT_AND:
+ case HLSL_OP2_BIT_OR:
+ case HLSL_OP2_BIT_XOR:
+ case HLSL_OP2_LOGIC_AND:
+ case HLSL_OP2_LOGIC_OR:
+ case HLSL_OP2_MAX:
+ case HLSL_OP2_MIN:
+ return true;
+
+ default:
+ return false;
+ }
+}
+
+static bool is_op_commutative(enum hlsl_ir_expr_op op)
+{
+ switch (op)
+ {
+ case HLSL_OP2_ADD:
+ case HLSL_OP2_BIT_AND:
+ case HLSL_OP2_BIT_OR:
+ case HLSL_OP2_BIT_XOR:
+ case HLSL_OP2_DOT:
+ case HLSL_OP2_LOGIC_AND:
+ case HLSL_OP2_LOGIC_OR:
+ case HLSL_OP2_MAX:
+ case HLSL_OP2_MIN:
+ case HLSL_OP2_MUL:
+ return true;
+
+ default:
+ return false;
+ }
+}
+
+bool hlsl_normalize_binary_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context)
+{
+ struct hlsl_ir_node *arg1 , *arg2;
+ struct hlsl_ir_expr *expr;
+ enum hlsl_base_type type;
+ enum hlsl_ir_expr_op op;
+ bool progress = false;
+
+ if (instr->type != HLSL_IR_EXPR)
+ return false;
+ expr = hlsl_ir_expr(instr);
+
+ if (instr->data_type->class > HLSL_CLASS_VECTOR)
+ return false;
+
+ arg1 = expr->operands[0].node;
+ arg2 = expr->operands[1].node;
+ type = instr->data_type->e.numeric.type;
+ op = expr->op;
+
+ if (!arg1 || !arg2)
+ return false;
+
+ if (is_op_commutative(op) && arg1->type == HLSL_IR_CONSTANT && arg2->type != HLSL_IR_CONSTANT)
+ {
+ /* a OP x -> x OP a */
+ struct hlsl_ir_node *tmp = arg1;
+
+ arg1 = arg2;
+ arg2 = tmp;
+ progress = true;
+ }
+
+ if (is_op_associative(op, type))
+ {
+ struct hlsl_ir_expr *e1 = arg1->type == HLSL_IR_EXPR ? hlsl_ir_expr(arg1) : NULL;
+ struct hlsl_ir_expr *e2 = arg2->type == HLSL_IR_EXPR ? hlsl_ir_expr(arg2) : NULL;
+
+ if (e1 && e1->op == op && e1->operands[0].node->type != HLSL_IR_CONSTANT
+ && e1->operands[1].node->type == HLSL_IR_CONSTANT)
+ {
+ if (arg2->type == HLSL_IR_CONSTANT)
+ {
+ /* (x OP a) OP b -> x OP (a OP b) */
+ struct hlsl_ir_node *ab;
+
+ if (!(ab = hlsl_new_binary_expr(ctx, op, e1->operands[1].node, arg2)))
+ return false;
+ list_add_before(&instr->entry, &ab->entry);
+
+ arg1 = e1->operands[0].node;
+ arg2 = ab;
+ progress = true;
+ }
+ else if (is_op_commutative(op))
+ {
+ /* (x OP a) OP y -> (x OP y) OP a */
+ struct hlsl_ir_node *xy;
+
+ if (!(xy = hlsl_new_binary_expr(ctx, op, e1->operands[0].node, arg2)))
+ return false;
+ list_add_before(&instr->entry, &xy->entry);
+
+ arg1 = xy;
+ arg2 = e1->operands[1].node;
+ progress = true;
+ }
+ }
+
+ if (!progress && arg1->type != HLSL_IR_CONSTANT && e2 && e2->op == op
+ && e2->operands[0].node->type != HLSL_IR_CONSTANT && e2->operands[1].node->type == HLSL_IR_CONSTANT)
+ {
+ /* x OP (y OP a) -> (x OP y) OP a */
+ struct hlsl_ir_node *xy;
+
+ if (!(xy = hlsl_new_binary_expr(ctx, op, arg1, e2->operands[0].node)))
+ return false;
+ list_add_before(&instr->entry, &xy->entry);
+
+ arg1 = xy;
+ arg2 = e2->operands[1].node;
+ progress = true;
+ }
+
+ }
+
+ if (progress)
+ {
+ struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {arg1, arg2};
+ struct hlsl_ir_node *res;
+
+ if (!(res = hlsl_new_expr(ctx, op, operands, instr->data_type, &instr->loc)))
+ return false;
+ list_add_before(&instr->entry, &res->entry);
+ hlsl_replace_node(instr, res);
+ }
+
+ return progress;
+}
+
bool hlsl_fold_constant_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context)
{
struct hlsl_constant_value value;
@@ -1560,7 +1705,7 @@ bool hlsl_fold_constant_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *inst
src = hlsl_ir_constant(swizzle->val.node);
for (i = 0; i < swizzle->node.data_type->dimx; ++i)
- value.u[i] = src->value.u[hlsl_swizzle_get_component(swizzle->swizzle, i)];
+ value.u[i] = src->value.u[hlsl_swizzle_get_component(swizzle->u.vector, i)];
if (!(dst = hlsl_new_constant(ctx, instr->data_type, &value, &instr->loc)))
return false;
diff --git a/libs/vkd3d/libs/vkd3d-shader/ir.c b/libs/vkd3d/libs/vkd3d-shader/ir.c
index b3442ec92ae..e6d90e14212 100644
--- a/libs/vkd3d/libs/vkd3d-shader/ir.c
+++ b/libs/vkd3d/libs/vkd3d-shader/ir.c
@@ -1582,7 +1582,7 @@ static enum vkd3d_result instruction_array_normalise_hull_shader_control_point_i
enum vkd3d_result ret;
unsigned int i, j;
- VKD3D_ASSERT(program->normalisation_level == VSIR_NOT_NORMALISED);
+ VKD3D_ASSERT(program->normalisation_level == VSIR_NORMALISED_SM4);
if (program->shader_version.type != VKD3D_SHADER_TYPE_HULL)
{
@@ -2340,7 +2340,7 @@ static enum vkd3d_result vsir_program_normalise_io_registers(struct vsir_program
program->instructions = normaliser.instructions;
program->use_vocp = normaliser.use_vocp;
- program->normalisation_level = VSIR_FULLY_NORMALISED_IO;
+ program->normalisation_level = VSIR_NORMALISED_SM6;
return VKD3D_OK;
}
@@ -7210,6 +7210,7 @@ static const struct shader_signature *vsir_signature_from_register_type(struct v
enum vkd3d_shader_register_type register_type, bool *has_control_point, unsigned int *control_point_count)
{
*has_control_point = false;
+ *control_point_count = 0;
switch (register_type)
{
@@ -7233,7 +7234,7 @@ static const struct shader_signature *vsir_signature_from_register_type(struct v
{
case VKD3D_SHADER_TYPE_HULL:
if (ctx->phase == VKD3DSIH_HS_CONTROL_POINT_PHASE
- || ctx->program->normalisation_level >= VSIR_FULLY_NORMALISED_IO)
+ || ctx->program->normalisation_level >= VSIR_NORMALISED_SM6)
{
*has_control_point = ctx->program->normalisation_level >= VSIR_NORMALISED_HULL_CONTROL_POINT_IO;
*control_point_count = ctx->program->output_control_point_count;
@@ -7275,7 +7276,7 @@ static void vsir_validate_io_register(struct validation_context *ctx, const stru
signature = vsir_signature_from_register_type(ctx, reg->type, &has_control_point, &control_point_count);
VKD3D_ASSERT(signature);
- if (ctx->program->normalisation_level < VSIR_FULLY_NORMALISED_IO)
+ if (ctx->program->normalisation_level < VSIR_NORMALISED_SM6)
{
/* Indices are [register] or [control point, register]. Both are
* allowed to have a relative address. */
@@ -8097,29 +8098,20 @@ static void vsir_validate_signature_element(struct validation_context *ctx,
"element %u of %s signature: Non-contiguous mask %#x.",
idx, signature_type_name, element->mask);
- /* Here we'd likely want to validate that the usage mask is a subset of the
- * signature mask. Unfortunately the D3DBC parser sometimes violates this.
- * For example I've seen a shader like this:
- * ps_3_0
- * [...]
- * dcl_texcoord0 v0
- * [...]
- * texld r2.xyzw, v0.xyzw, s1.xyzw
- * [...]
- *
- * The dcl_textcoord0 instruction secretly has a .xy mask, which is used to
- * compute the signature mask, but the texld instruction apparently uses all
- * the components. Of course the last two components are ignored, but
- * formally they seem to be used. So we end up with a signature element with
- * mask .xy and usage mask .xyzw.
- *
- * The correct fix would probably be to make the D3DBC parser aware of which
- * components are really used for each instruction, but that would take some
- * time. */
- if (element->used_mask & ~0xf)
- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE,
- "element %u of %s signature: Invalid usage mask %#x.",
- idx, signature_type_name, element->used_mask);
+ if (ctx->program->normalisation_level >= VSIR_NORMALISED_SM4)
+ {
+ if ((element->used_mask & element->mask) != element->used_mask)
+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE,
+ "element %u of %s signature: Invalid usage mask %#x with mask %#x.",
+ idx, signature_type_name, element->used_mask, element->mask);
+ }
+ else
+ {
+ if (element->used_mask & ~0xf)
+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE,
+ "element %u of %s signature: Invalid usage mask %#x.",
+ idx, signature_type_name, element->used_mask);
+ }
switch (element->sysval_semantic)
{
@@ -8373,7 +8365,7 @@ static void vsir_validate_signature(struct validation_context *ctx, const struct
}
/* After I/O normalisation tessellation factors are merged in a single array. */
- if (ctx->program->normalisation_level >= VSIR_FULLY_NORMALISED_IO)
+ if (ctx->program->normalisation_level >= VSIR_NORMALISED_SM6)
{
expected_outer_count = min(1, expected_outer_count);
expected_inner_count = min(1, expected_inner_count);
@@ -8567,7 +8559,7 @@ static void vsir_validate_dcl_index_range(struct validation_context *ctx,
const struct shader_signature *signature;
bool has_control_point;
- if (ctx->program->normalisation_level >= VSIR_FULLY_NORMALISED_IO)
+ if (ctx->program->normalisation_level >= VSIR_NORMALISED_SM6)
{
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_HANDLER,
"DCL_INDEX_RANGE is not allowed with fully normalised input/output.");
diff --git a/libs/vkd3d/libs/vkd3d-shader/msl.c b/libs/vkd3d/libs/vkd3d-shader/msl.c
index 881e51527ff..bb85e62e94c 100644
--- a/libs/vkd3d/libs/vkd3d-shader/msl.c
+++ b/libs/vkd3d/libs/vkd3d-shader/msl.c
@@ -1314,7 +1314,7 @@ int msl_compile(struct vsir_program *program, uint64_t config_flags,
if ((ret = vsir_program_transform(program, config_flags, compile_info, message_context)) < 0)
return ret;
- VKD3D_ASSERT(program->normalisation_level == VSIR_FULLY_NORMALISED_IO);
+ VKD3D_ASSERT(program->normalisation_level == VSIR_NORMALISED_SM6);
if ((ret = msl_generator_init(&generator, program, compile_info, descriptor_info, message_context)) < 0)
return ret;
diff --git a/libs/vkd3d/libs/vkd3d-shader/spirv.c b/libs/vkd3d/libs/vkd3d-shader/spirv.c
index 7837b1fc8e4..a7b935543a0 100644
--- a/libs/vkd3d/libs/vkd3d-shader/spirv.c
+++ b/libs/vkd3d/libs/vkd3d-shader/spirv.c
@@ -10826,7 +10826,7 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, struct
compile_info, compiler->message_context)) < 0)
return result;
- VKD3D_ASSERT(program->normalisation_level == VSIR_FULLY_NORMALISED_IO);
+ VKD3D_ASSERT(program->normalisation_level == VSIR_NORMALISED_SM6);
max_element_count = max(program->output_signature.element_count, program->patch_constant_signature.element_count);
if (!(compiler->output_info = vkd3d_calloc(max_element_count, sizeof(*compiler->output_info))))
diff --git a/libs/vkd3d/libs/vkd3d-shader/tpf.c b/libs/vkd3d/libs/vkd3d-shader/tpf.c
index 0dbcd2f6f07..872603052ac 100644
--- a/libs/vkd3d/libs/vkd3d-shader/tpf.c
+++ b/libs/vkd3d/libs/vkd3d-shader/tpf.c
@@ -2793,7 +2793,7 @@ static bool shader_sm4_init(struct vkd3d_shader_sm4_parser *sm4, struct vsir_pro
/* Estimate instruction count to avoid reallocation in most shaders. */
if (!vsir_program_init(program, compile_info,
- &version, token_count / 7u + 20, VSIR_CF_STRUCTURED, VSIR_NOT_NORMALISED))
+ &version, token_count / 7u + 20, VSIR_CF_STRUCTURED, VSIR_NORMALISED_SM4))
return false;
vkd3d_shader_parser_init(&sm4->p, program, message_context, compile_info->source_name);
sm4->ptr = sm4->start;
diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h
index 88604539fae..3bfb0a7c3cd 100644
--- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h
+++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h
@@ -1411,9 +1411,10 @@ enum vsir_control_flow_type
enum vsir_normalisation_level
{
- VSIR_NOT_NORMALISED,
+ VSIR_NORMALISED_SM1,
+ VSIR_NORMALISED_SM4,
VSIR_NORMALISED_HULL_CONTROL_POINT_IO,
- VSIR_FULLY_NORMALISED_IO,
+ VSIR_NORMALISED_SM6,
};
struct vsir_program
--
2.45.2