Files
wine-staging/patches/vkd3d-latest/0002-Updated-vkd3d-to-74ebd4621f3177f45ce22512019ee968ec4.patch
Alistair Leslie-Hughes e97aabbb86 Updated vkd3d-latest patchset
2025-11-15 16:50:06 +11:00

896 lines
37 KiB
Diff

From 82311f84ad8344655c3649f05d002092fb88df6b Mon Sep 17 00:00:00 2001
From: Alistair Leslie-Hughes <leslie_alistair@hotmail.com>
Date: Wed, 5 Nov 2025 10:32:36 +1100
Subject: [PATCH] Updated vkd3d to 74ebd4621f3177f45ce22512019ee968ec400f4d.
---
libs/vkd3d/libs/vkd3d-shader/hlsl.c | 22 +-
libs/vkd3d/libs/vkd3d-shader/hlsl.h | 17 +-
libs/vkd3d/libs/vkd3d-shader/hlsl.y | 26 +-
libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 370 +++++++++++++++++-
libs/vkd3d/libs/vkd3d-shader/msl.c | 164 +++++++-
.../libs/vkd3d-shader/vkd3d_shader_private.h | 1 +
6 files changed, 573 insertions(+), 27 deletions(-)
diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.c b/libs/vkd3d/libs/vkd3d-shader/hlsl.c
index 1de17e2b551..dc9886b5384 100644
--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.c
+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.c
@@ -1976,14 +1976,15 @@ static struct hlsl_ir_node *hlsl_new_error_expr(struct hlsl_ctx *ctx)
return hlsl_new_expr(ctx, HLSL_OP0_ERROR, operands, ctx->builtin_types.error, &loc);
}
-struct hlsl_ir_node *hlsl_new_if(struct hlsl_ctx *ctx, struct hlsl_ir_node *condition,
- struct hlsl_block *then_block, struct hlsl_block *else_block, const struct vkd3d_shader_location *loc)
+struct hlsl_ir_node *hlsl_new_if(struct hlsl_ctx *ctx, struct hlsl_ir_node *condition, struct hlsl_block *then_block,
+ struct hlsl_block *else_block, enum hlsl_if_flatten_type flatten_type, const struct vkd3d_shader_location *loc)
{
struct hlsl_ir_if *iff;
if (!(iff = hlsl_alloc(ctx, sizeof(*iff))))
return NULL;
init_node(&iff->node, HLSL_IR_IF, NULL, loc);
+ iff->flatten_type = flatten_type;
hlsl_src_from_node(&iff->condition, condition);
hlsl_block_init(&iff->then_block);
hlsl_block_add_block(&iff->then_block, then_block);
@@ -1993,10 +1994,11 @@ struct hlsl_ir_node *hlsl_new_if(struct hlsl_ctx *ctx, struct hlsl_ir_node *cond
return &iff->node;
}
-void hlsl_block_add_if(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *condition,
- struct hlsl_block *then_block, struct hlsl_block *else_block, const struct vkd3d_shader_location *loc)
+void hlsl_block_add_if(struct hlsl_ctx *ctx, struct hlsl_block *block,
+ struct hlsl_ir_node *condition, struct hlsl_block *then_block, struct hlsl_block *else_block,
+ enum hlsl_if_flatten_type flatten_type, const struct vkd3d_shader_location *loc)
{
- struct hlsl_ir_node *instr = hlsl_new_if(ctx, condition, then_block, else_block, loc);
+ struct hlsl_ir_node *instr = hlsl_new_if(ctx, condition, then_block, else_block, flatten_type, loc);
if (instr)
{
@@ -2674,7 +2676,8 @@ static struct hlsl_ir_node *clone_if(struct hlsl_ctx *ctx, struct clone_instr_ma
return NULL;
}
- if (!(dst = hlsl_new_if(ctx, map_instr(map, src->condition.node), &then_block, &else_block, &src->node.loc)))
+ if (!(dst = hlsl_new_if(ctx, map_instr(map, src->condition.node),
+ &then_block, &else_block, src->flatten_type, &src->node.loc)))
{
hlsl_block_cleanup(&then_block);
hlsl_block_cleanup(&else_block);
@@ -3849,6 +3852,13 @@ static void dump_ir_jump(struct vkd3d_string_buffer *buffer, const struct hlsl_i
vkd3d_string_buffer_printf(buffer, "unresolved_continue");
break;
}
+
+ if (jump->condition.node)
+ {
+ vkd3d_string_buffer_printf(buffer, " (");
+ dump_src(buffer, &jump->condition);
+ vkd3d_string_buffer_printf(buffer, ")");
+ }
}
static void dump_ir_loop(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, const struct hlsl_ir_loop *loop)
diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h
index 75027f83e33..87cfaf83f76 100644
--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.h
+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h
@@ -661,12 +661,20 @@ struct hlsl_ir_call
struct hlsl_ir_function_decl *decl;
};
+enum hlsl_if_flatten_type
+{
+ HLSL_IF_FLATTEN_DEFAULT,
+ HLSL_IF_FORCE_FLATTEN,
+ HLSL_IF_FORCE_BRANCH
+};
+
struct hlsl_ir_if
{
struct hlsl_ir_node node;
struct hlsl_src condition;
struct hlsl_block then_block;
struct hlsl_block else_block;
+ enum hlsl_if_flatten_type flatten_type;
};
enum hlsl_loop_unroll_type
@@ -1586,8 +1594,9 @@ struct hlsl_ir_node *hlsl_block_add_expr(struct hlsl_ctx *ctx, struct hlsl_block
struct hlsl_type *data_type, const struct vkd3d_shader_location *loc);
struct hlsl_ir_node *hlsl_block_add_float_constant(struct hlsl_ctx *ctx, struct hlsl_block *block,
float f, const struct vkd3d_shader_location *loc);
-void hlsl_block_add_if(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *condition,
- struct hlsl_block *then_block, struct hlsl_block *else_block, const struct vkd3d_shader_location *loc);
+void hlsl_block_add_if(struct hlsl_ctx *ctx, struct hlsl_block *block,
+ struct hlsl_ir_node *condition, struct hlsl_block *then_block, struct hlsl_block *else_block,
+ enum hlsl_if_flatten_type flatten_type, const struct vkd3d_shader_location *loc);
struct hlsl_ir_node *hlsl_block_add_index(struct hlsl_ctx *ctx, struct hlsl_block *block,
struct hlsl_ir_node *val, struct hlsl_ir_node *idx, const struct vkd3d_shader_location *loc);
struct hlsl_ir_node *hlsl_block_add_int_constant(struct hlsl_ctx *ctx, struct hlsl_block *block,
@@ -1704,8 +1713,8 @@ struct hlsl_ir_node *hlsl_new_copy(struct hlsl_ctx *ctx, struct hlsl_ir_node *no
struct hlsl_ir_function_decl *hlsl_new_func_decl(struct hlsl_ctx *ctx,
struct hlsl_type *return_type, const struct hlsl_func_parameters *parameters,
const struct hlsl_semantic *semantic, const struct vkd3d_shader_location *loc);
-struct hlsl_ir_node *hlsl_new_if(struct hlsl_ctx *ctx, struct hlsl_ir_node *condition,
- struct hlsl_block *then_block, struct hlsl_block *else_block, const struct vkd3d_shader_location *loc);
+struct hlsl_ir_node *hlsl_new_if(struct hlsl_ctx *ctx, struct hlsl_ir_node *condition, struct hlsl_block *then_block,
+ struct hlsl_block *else_block, enum hlsl_if_flatten_type flatten_type, const struct vkd3d_shader_location *loc);
struct hlsl_type *hlsl_new_stream_output_type(struct hlsl_ctx *ctx,
enum hlsl_so_object_type so_type, struct hlsl_type *type);
struct hlsl_ir_node *hlsl_new_ternary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_expr_op op,
diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.y b/libs/vkd3d/libs/vkd3d-shader/hlsl.y
index 8ec963c8656..92bfd7040fc 100644
--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.y
+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.y
@@ -471,7 +471,7 @@ static void append_conditional_break(struct hlsl_ctx *ctx, struct hlsl_block *co
hlsl_block_init(&then_block);
hlsl_block_add_jump(ctx, &then_block, HLSL_IR_JUMP_BREAK, NULL, &condition->loc);
- hlsl_block_add_if(ctx, cond_block, not, &then_block, NULL, &condition->loc);
+ hlsl_block_add_if(ctx, cond_block, not, &then_block, NULL, HLSL_IF_FLATTEN_DEFAULT, &condition->loc);
}
static void check_attribute_list_for_duplicates(struct hlsl_ctx *ctx, const struct parse_attribute_list *attrs)
@@ -9139,6 +9139,7 @@ selection_statement:
{
struct hlsl_ir_node *condition = node_from_block($4);
const struct parse_attribute_list *attributes = &$1;
+ enum hlsl_if_flatten_type flatten_type = HLSL_IF_FLATTEN_DEFAULT;
unsigned int i;
check_attribute_list_for_duplicates(ctx, attributes);
@@ -9147,10 +9148,19 @@ selection_statement:
{
const struct hlsl_attribute *attr = attributes->attrs[i];
- if (!strcmp(attr->name, "branch")
- || !strcmp(attr->name, "flatten"))
+ if (!strcmp(attr->name, "branch"))
{
- hlsl_warning(ctx, &@1, VKD3D_SHADER_WARNING_HLSL_IGNORED_ATTRIBUTE, "Unhandled attribute '%s'.", attr->name);
+ if (flatten_type == HLSL_IF_FORCE_FLATTEN)
+ hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX,
+ "The 'branch' and 'flatten' attributes are mutually exclusive.");
+ flatten_type = HLSL_IF_FORCE_BRANCH;
+ }
+ else if (!strcmp(attr->name, "flatten"))
+ {
+ if (flatten_type == HLSL_IF_FORCE_BRANCH)
+ hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX,
+ "The 'branch' and 'flatten' attributes are mutually exclusive.");
+ flatten_type = HLSL_IF_FORCE_FLATTEN;
}
else
{
@@ -9158,10 +9168,16 @@ selection_statement:
}
}
+ if (flatten_type == HLSL_IF_FORCE_BRANCH && hlsl_version_lt(ctx, 2, 1))
+ {
+ hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_INCOMPATIBLE_PROFILE,
+ "The 'branch' attribute requires shader model 2.1 or higher.");
+ }
+
check_condition_type(ctx, condition);
condition = add_cast(ctx, $4, condition, hlsl_get_scalar_type(ctx, HLSL_TYPE_BOOL), &@4);
- hlsl_block_add_if(ctx, $4, condition, $6.then_block, $6.else_block, &@2);
+ hlsl_block_add_if(ctx, $4, condition, $6.then_block, $6.else_block, flatten_type, &@2);
destroy_block($6.then_block);
destroy_block($6.else_block);
diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c
index 9048214923b..6fe29e5bfed 100644
--- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c
+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c
@@ -1060,7 +1060,7 @@ static void insert_early_return_break(struct hlsl_ctx *ctx,
hlsl_block_add_jump(ctx, &then_block, HLSL_IR_JUMP_BREAK, NULL, &cf_instr->loc);
- if (!(iff = hlsl_new_if(ctx, &load->node, &then_block, NULL, &cf_instr->loc)))
+ if (!(iff = hlsl_new_if(ctx, &load->node, &then_block, NULL, HLSL_IF_FLATTEN_DEFAULT, &cf_instr->loc)))
return;
list_add_after(&load->node.entry, &iff->entry);
}
@@ -1245,7 +1245,7 @@ static bool lower_return(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *fun
load = hlsl_block_add_simple_load(ctx, block, func->early_return_var, &cf_instr->loc);
not = hlsl_block_add_unary_expr(ctx, block, HLSL_OP1_LOGIC_NOT, load, &cf_instr->loc);
- hlsl_block_add_if(ctx, block, not, &then_block, NULL, &cf_instr->loc);
+ hlsl_block_add_if(ctx, block, not, &then_block, NULL, HLSL_IF_FLATTEN_DEFAULT, &cf_instr->loc);
}
return has_early_return;
@@ -3916,6 +3916,320 @@ static bool remove_trivial_conditional_branches(struct hlsl_ctx *ctx, struct hls
return true;
}
+static bool is_conditional_block_simple(const struct hlsl_block *cond_block)
+{
+ static const unsigned int max_cost = 10;
+ struct hlsl_ir_node *instr;
+ unsigned int cost = 0;
+
+ LIST_FOR_EACH_ENTRY(instr, &cond_block->instrs, struct hlsl_ir_node, entry)
+ {
+ switch (instr->type)
+ {
+ case HLSL_IR_CONSTANT:
+ case HLSL_IR_STRING_CONSTANT:
+ case HLSL_IR_SWIZZLE:
+ break;
+
+ case HLSL_IR_EXPR:
+ ++cost;
+ break;
+
+ case HLSL_IR_JUMP:
+ {
+ struct hlsl_ir_jump *jump = hlsl_ir_jump(instr);
+
+ if (jump->type != HLSL_IR_JUMP_DISCARD_NZ && jump->type != HLSL_IR_JUMP_DISCARD_NEG)
+ return false;
+ ++cost;
+ break;
+ }
+
+ case HLSL_IR_STORE:
+ if (hlsl_ir_store(instr)->lhs.var->is_tgsm)
+ return false;
+ ++cost;
+ break;
+
+ case HLSL_IR_LOAD:
+ if (hlsl_ir_load(instr)->src.var->is_tgsm)
+ return false;
+ break;
+
+ default:
+ return false;
+ }
+
+ if (cost > max_cost)
+ return false;
+ }
+
+ return true;
+}
+
+static bool can_flatten_conditional_block(struct hlsl_ctx *ctx, const struct hlsl_block *cond_block)
+{
+ struct hlsl_ir_node *instr;
+
+ LIST_FOR_EACH_ENTRY(instr, &cond_block->instrs, struct hlsl_ir_node, entry)
+ {
+ switch (instr->type)
+ {
+ case HLSL_IR_CALL:
+ case HLSL_IR_RESOURCE_STORE:
+ case HLSL_IR_INTERLOCKED:
+ case HLSL_IR_SYNC:
+ goto fail;
+
+ case HLSL_IR_JUMP:
+ {
+ struct hlsl_ir_jump *jump = hlsl_ir_jump(instr);
+
+ if (jump->type != HLSL_IR_JUMP_DISCARD_NZ && jump->type != HLSL_IR_JUMP_DISCARD_NEG)
+ {
+ hlsl_fixme(ctx, &instr->loc, "Flattening conditional blocks with non-discard jump instructions.");
+ return false;
+ }
+ break;
+ }
+
+ case HLSL_IR_STORE:
+ if (hlsl_ir_store(instr)->lhs.var->is_tgsm)
+ goto fail;
+ break;
+
+ case HLSL_IR_IF:
+ {
+ struct hlsl_ir_if *iff = hlsl_ir_if(instr);
+
+ if (!can_flatten_conditional_block(ctx, &iff->then_block)
+ || !can_flatten_conditional_block(ctx, &iff->else_block))
+ return false;
+ break;
+ }
+
+ case HLSL_IR_LOOP:
+ {
+ struct hlsl_ir_loop *loop = hlsl_ir_loop(instr);
+
+ if (!can_flatten_conditional_block(ctx, &loop->iter)
+ || !can_flatten_conditional_block(ctx, &loop->body))
+ return false;
+ break;
+ }
+
+ case HLSL_IR_SWITCH:
+ {
+ struct hlsl_ir_switch *s = hlsl_ir_switch(instr);
+ struct hlsl_ir_switch_case *c;
+
+ LIST_FOR_EACH_ENTRY(c, &s->cases, struct hlsl_ir_switch_case, entry)
+ {
+ if (!can_flatten_conditional_block(ctx, &c->body))
+ return false;
+ }
+ break;
+ }
+
+ case HLSL_IR_CONSTANT:
+ case HLSL_IR_EXPR:
+ case HLSL_IR_INDEX:
+ case HLSL_IR_LOAD:
+ case HLSL_IR_RESOURCE_LOAD:
+ case HLSL_IR_STRING_CONSTANT:
+ case HLSL_IR_SWIZZLE:
+ case HLSL_IR_COMPILE:
+ case HLSL_IR_SAMPLER_STATE:
+ case HLSL_IR_STATEBLOCK_CONSTANT:
+ break;
+ }
+ }
+
+ return true;
+
+fail:
+ hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_CANNOT_FLATTEN,
+ "Conditional branches with side effects cannot be flattened.");
+ return false;
+}
+
+static bool lower_conditional_block_stores(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr,
+ struct hlsl_ir_node *cond, bool is_then)
+{
+ struct hlsl_ir_node *load, *new_val;
+ struct hlsl_ir_store *store;
+ struct hlsl_type *rhs_type;
+ struct hlsl_block block;
+
+ if (instr->type != HLSL_IR_STORE)
+ return false;
+ store = hlsl_ir_store(instr);
+ rhs_type = store->rhs.node->data_type;
+
+ VKD3D_ASSERT(rhs_type->class <= HLSL_CLASS_VECTOR);
+ VKD3D_ASSERT(cond->data_type->e.numeric.dimx == 1);
+
+ hlsl_block_init(&block);
+
+ load = hlsl_block_add_load_index(ctx, &block, &store->lhs, NULL, &store->node.loc);
+
+ if (store->writemask && !hlsl_types_are_equal(rhs_type, load->data_type))
+ load = hlsl_block_add_swizzle(ctx, &block, hlsl_swizzle_from_writemask(store->writemask),
+ rhs_type->e.numeric.dimx, load, &store->node.loc);
+
+ if (rhs_type->e.numeric.dimx != 1)
+ cond = hlsl_block_add_swizzle(ctx, &block, HLSL_SWIZZLE(X, X, X, X),
+ rhs_type->e.numeric.dimx, cond, &store->node.loc);
+
+ if (is_then)
+ new_val = hlsl_add_conditional(ctx, &block, cond, store->rhs.node, load);
+ else
+ new_val = hlsl_add_conditional(ctx, &block, cond, load, store->rhs.node);
+
+ list_move_before(&store->node.entry, &block.instrs);
+ hlsl_src_remove(&store->rhs);
+ hlsl_src_from_node(&store->rhs, new_val);
+ return true;
+}
+
+static bool lower_conditional_block_discard_nz(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr,
+ struct hlsl_ir_node *cond, bool is_then)
+{
+ struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {0};
+ struct hlsl_ir_node *discard_cond, *new_cond = NULL;
+ struct hlsl_ir_jump *jump;
+ struct hlsl_block block;
+
+ if (instr->type != HLSL_IR_JUMP)
+ return false;
+ jump = hlsl_ir_jump(instr);
+ discard_cond = jump->condition.node;
+
+ if (jump->type != HLSL_IR_JUMP_DISCARD_NZ)
+ return false;
+
+ VKD3D_ASSERT(ctx->profile->major_version >= 4);
+ VKD3D_ASSERT(cond->data_type->e.numeric.type == HLSL_TYPE_BOOL && cond->data_type->e.numeric.dimx == 1);
+ VKD3D_ASSERT(discard_cond->data_type->e.numeric.dimx == 1);
+
+ hlsl_block_init(&block);
+
+ if (!is_then)
+ cond = hlsl_block_add_unary_expr(ctx, &block, HLSL_OP1_LOGIC_NOT, cond, &instr->loc);
+ discard_cond = hlsl_block_add_cast(ctx, &block, discard_cond, cond->data_type, &instr->loc);
+
+ operands[0] = cond;
+ operands[1] = discard_cond;
+
+ /* discard_nz (cond && discard_cond) */
+ new_cond = hlsl_block_add_expr(ctx, &block, HLSL_OP2_LOGIC_AND, operands,
+ hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), &jump->node.loc);
+
+ list_move_before(&jump->node.entry, &block.instrs);
+ hlsl_src_remove(&jump->condition);
+ hlsl_src_from_node(&jump->condition, new_cond);
+ return true;
+}
+
+static bool lower_conditional_block_discard_neg(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr,
+ struct hlsl_ir_node *cond, bool is_then)
+{
+ struct hlsl_ir_node *discard_cond, *new_cond = NULL;
+ struct hlsl_constant_value zero_value = {0};
+ struct hlsl_ir_node *zero;
+ struct hlsl_ir_jump *jump;
+ struct hlsl_block block;
+
+ if (instr->type != HLSL_IR_JUMP)
+ return false;
+ jump = hlsl_ir_jump(instr);
+ discard_cond = jump->condition.node;
+
+ if (jump->type != HLSL_IR_JUMP_DISCARD_NEG)
+ return false;
+
+ VKD3D_ASSERT(ctx->profile->major_version < 4);
+ VKD3D_ASSERT(cond->data_type->e.numeric.type == HLSL_TYPE_BOOL && cond->data_type->e.numeric.dimx == 1);
+
+ hlsl_block_init(&block);
+
+ if (!(zero = hlsl_new_constant(ctx, discard_cond->data_type, &zero_value, &instr->loc)))
+ return false;
+ hlsl_block_add_instr(&block, zero);
+
+ if (zero->data_type->e.numeric.dimx != 1)
+ cond = hlsl_block_add_swizzle(ctx, &block, HLSL_SWIZZLE(X, X, X, X),
+ zero->data_type->e.numeric.dimx, cond, &instr->loc);
+
+ if (is_then)
+ new_cond = hlsl_add_conditional(ctx, &block, cond, discard_cond, zero);
+ else
+ new_cond = hlsl_add_conditional(ctx, &block, cond, zero, discard_cond);
+
+ list_move_before(&jump->node.entry, &block.instrs);
+ hlsl_src_remove(&jump->condition);
+ hlsl_src_from_node(&jump->condition, new_cond);
+ return true;
+}
+
+struct flatten_conditional_block_ctx
+{
+ struct hlsl_ir_node *cond;
+ bool is_then;
+};
+
+static bool lower_conditional_block_instrs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context)
+{
+ struct flatten_conditional_block_ctx *flatten_ctx = context;
+
+ return lower_conditional_block_stores(ctx, instr, flatten_ctx->cond, flatten_ctx->is_then)
+ || lower_conditional_block_discard_nz(ctx, instr, flatten_ctx->cond, flatten_ctx->is_then)
+ || lower_conditional_block_discard_neg(ctx, instr, flatten_ctx->cond, flatten_ctx->is_then);
+}
+
+static bool flatten_conditional_branches(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context)
+{
+ struct flatten_conditional_block_ctx flatten_ctx;
+ struct hlsl_ir_if *iff;
+ bool force_flatten;
+
+ if (instr->type != HLSL_IR_IF)
+ return false;
+ iff = hlsl_ir_if(instr);
+
+ if (iff->flatten_type == HLSL_IF_FORCE_BRANCH)
+ return false;
+
+ force_flatten = iff->flatten_type == HLSL_IF_FORCE_FLATTEN
+ || hlsl_version_lt(ctx, 2, 1); /* Always flatten branches for SM < 2.1. */
+
+ if (force_flatten)
+ {
+ if (!can_flatten_conditional_block(ctx, &iff->then_block)
+ || !can_flatten_conditional_block(ctx, &iff->else_block))
+ return false;
+ }
+ else if (!is_conditional_block_simple(&iff->then_block) || !is_conditional_block_simple(&iff->else_block))
+ {
+ /* Only flatten simple blocks by default. */
+ return false;
+ }
+
+ flatten_ctx.cond = iff->condition.node;
+
+ flatten_ctx.is_then = true;
+ hlsl_transform_ir(ctx, lower_conditional_block_instrs, &iff->then_block, &flatten_ctx);
+
+ flatten_ctx.is_then = false;
+ hlsl_transform_ir(ctx, lower_conditional_block_instrs, &iff->else_block, &flatten_ctx);
+
+ list_move_before(&instr->entry, &iff->then_block.instrs);
+ list_move_before(&instr->entry, &iff->else_block.instrs);
+ list_remove(&instr->entry);
+ hlsl_free_instr(instr);
+ return true;
+}
+
static bool normalize_switch_cases(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context)
{
struct hlsl_ir_switch_case *c, *def = NULL;
@@ -5462,6 +5776,35 @@ static bool lower_discard_nz(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, v
return true;
}
+static bool cast_discard_neg_conditions_to_vec4(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context)
+{
+ struct hlsl_ir_node *swizzle;
+ struct hlsl_ir_jump *jump;
+ struct hlsl_block block;
+ unsigned int dimx;
+
+ if (instr->type != HLSL_IR_JUMP)
+ return false;
+ jump = hlsl_ir_jump(instr);
+ if (jump->type != HLSL_IR_JUMP_DISCARD_NEG)
+ return false;
+
+ dimx = jump->condition.node->data_type->e.numeric.dimx;
+ if (dimx == 4)
+ return false;
+
+ hlsl_block_init(&block);
+
+ swizzle = hlsl_block_add_swizzle(ctx, &block, hlsl_swizzle_from_writemask((1 << dimx) - 1), 4,
+ jump->condition.node, &instr->loc);
+
+ list_move_before(&instr->entry, &block.instrs);
+ hlsl_src_remove(&jump->condition);
+ hlsl_src_from_node(&jump->condition, swizzle);
+
+ return true;
+}
+
static bool dce(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context)
{
switch (instr->type)
@@ -8465,6 +8808,7 @@ static void hlsl_run_folding_passes(struct hlsl_ctx *ctx, struct hlsl_block *bod
progress |= replace_ir(ctx, fold_swizzle_chains, body);
progress |= replace_ir(ctx, fold_trivial_swizzles, body);
progress |= hlsl_transform_ir(ctx, remove_trivial_conditional_branches, body, NULL);
+ progress |= hlsl_transform_ir(ctx, flatten_conditional_branches, body, NULL);
} while (progress);
replace_ir(ctx, fold_redundant_casts, body);
}
@@ -9996,6 +10340,8 @@ static void sm1_generate_vsir_instr_jump(struct hlsl_ctx *ctx,
if (jump->type == HLSL_IR_JUMP_DISCARD_NEG)
{
+ VKD3D_ASSERT(condition->data_type->e.numeric.dimx == 4);
+
if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VSIR_OP_TEXKILL, 0, 1)))
return;
@@ -10016,11 +10362,9 @@ static void sm1_generate_vsir_instr_if(struct hlsl_ctx *ctx, struct vsir_program
struct hlsl_ir_node *instr = &iff->node;
struct vkd3d_shader_instruction *ins;
- if (hlsl_version_lt(ctx, 2, 1))
- {
- hlsl_fixme(ctx, &instr->loc, "Flatten \"if\" conditionals branches.");
- return;
- }
+ /* Conditional branches should have already been flattened for SM < 2.1. */
+ VKD3D_ASSERT(hlsl_version_ge(ctx, 2, 1));
+
VKD3D_ASSERT(condition->data_type->e.numeric.dimx == 1 && condition->data_type->e.numeric.dimy == 1);
if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VSIR_OP_IFC, 0, 2)))
@@ -13835,7 +14179,7 @@ static struct hlsl_ir_if *loop_unrolling_generate_var_check(struct hlsl_ctx *ctx
load = hlsl_block_add_simple_load(ctx, dst, var, loc);
cond = hlsl_block_add_unary_expr(ctx, dst, HLSL_OP1_LOGIC_NOT, load, loc);
- if (!(iff = hlsl_new_if(ctx, cond, &then_block, NULL, loc)))
+ if (!(iff = hlsl_new_if(ctx, cond, &then_block, NULL, HLSL_IF_FLATTEN_DEFAULT, loc)))
return NULL;
hlsl_block_add_instr(dst, iff);
@@ -14815,6 +15159,8 @@ static void process_entry_function(struct hlsl_ctx *ctx, struct list *semantic_v
{
while (replace_ir(ctx, lower_nonconstant_array_loads, body));
+ hlsl_transform_ir(ctx, cast_discard_neg_conditions_to_vec4, body, NULL);
+
replace_ir(ctx, lower_ternary, body);
replace_ir(ctx, lower_int_modulus_sm1, body);
replace_ir(ctx, lower_division, body);
@@ -14847,6 +15193,14 @@ static void process_entry_function(struct hlsl_ctx *ctx, struct list *semantic_v
hlsl_run_folding_passes(ctx, body);
+ if (profile->major_version < 4)
+ {
+ /* Ternary operations can be potentially introduced by hlsl_run_folding_passes(). */
+ replace_ir(ctx, lower_ternary, body);
+ if (ctx->profile->type != VKD3D_SHADER_TYPE_PIXEL)
+ replace_ir(ctx, lower_cmp, body);
+ }
+
do
compute_liveness(ctx, body);
while (hlsl_transform_ir(ctx, dce, body, NULL));
diff --git a/libs/vkd3d/libs/vkd3d-shader/msl.c b/libs/vkd3d/libs/vkd3d-shader/msl.c
index c974c9e532b..d34133d6d4c 100644
--- a/libs/vkd3d/libs/vkd3d-shader/msl.c
+++ b/libs/vkd3d/libs/vkd3d-shader/msl.c
@@ -226,14 +226,22 @@ static bool msl_get_binding(const struct msl_generator *gen, const struct vkd3d_
continue;
if (binding->register_space != descriptor->register_space)
continue;
- if (binding->register_index != register_idx)
+ if (binding->register_index > descriptor->register_index)
+ continue;
+ if (descriptor->count != ~0u && binding->binding.count < descriptor->count)
+ continue;
+ if (descriptor->count != ~0u
+ && binding->binding.count - descriptor->count < descriptor->register_index - binding->register_index)
+ continue;
+ if (descriptor->count == ~0u
+ && binding->binding.count <= descriptor->register_index - binding->register_index)
continue;
if (!msl_check_shader_visibility(gen, binding->shader_visibility))
continue;
if ((binding->flags & flags) != flags)
continue;
- *idx = binding->binding.binding;
+ *idx = register_idx + binding->binding.binding - binding->register_index;
return true;
}
@@ -416,6 +424,11 @@ static enum msl_data_type msl_print_register_name(struct vkd3d_string_buffer *bu
msl_print_subscript(buffer, gen, reg->idx[2].rel_addr, reg->idx[2].offset);
return MSL_DATA_UNION;
+ case VKD3DSPR_IMMCONSTBUFFER:
+ vkd3d_string_buffer_printf(buffer, "icb%u", reg->idx[0].offset);
+ msl_print_subscript(buffer, gen, reg->idx[1].rel_addr, reg->idx[1].offset);
+ return MSL_DATA_UINT;
+
case VKD3DSPR_IDXTEMP:
vkd3d_string_buffer_printf(buffer, "x%u", reg->idx[0].offset);
msl_print_subscript(buffer, gen, reg->idx[1].rel_addr, reg->idx[1].offset);
@@ -445,6 +458,24 @@ static enum msl_data_type msl_print_register_name(struct vkd3d_string_buffer *bu
vkd3d_string_buffer_printf(buffer, "v_local_thread_index");
return MSL_DATA_UNION;
+ case VKD3DSPR_UNDEF:
+ switch (reg->dimension)
+ {
+ case VSIR_DIMENSION_SCALAR:
+ vkd3d_string_buffer_printf(buffer, "0u");
+ return MSL_DATA_UINT;
+
+ case VSIR_DIMENSION_VEC4:
+ vkd3d_string_buffer_printf(buffer, "uint4(0u, 0u, 0u, 0u)");
+ return MSL_DATA_UINT;
+
+ default:
+ vkd3d_string_buffer_printf(buffer, "<unhandled_dimension %#x>", reg->dimension);
+ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL,
+ "Internal compiler error: Unhandled dimension %#x.", reg->dimension);
+ return MSL_DATA_UINT;
+ }
+
default:
msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL,
"Internal compiler error: Unhandled register type %#x.", reg->type);
@@ -667,6 +698,28 @@ static void msl_dot(struct msl_generator *gen, const struct vkd3d_shader_instruc
msl_dst_cleanup(&dst, &gen->string_buffers);
}
+static void msl_firstbit(struct msl_generator *gen, const struct vkd3d_shader_instruction *ins)
+{
+ const char *op = ins->opcode == VSIR_OP_FIRSTBIT_LO ? "ctz" : "clz";
+ unsigned int mask_size;
+ struct msl_src src;
+ struct msl_dst dst;
+ uint32_t mask;
+
+ mask = msl_dst_init(&dst, gen, ins, &ins->dst[0]);
+ msl_src_init(&src, gen, &ins->src[0], mask);
+
+ if ((mask_size = vsir_write_mask_component_count(mask)) > 1)
+ msl_print_assignment(gen, &dst, "select(uint%u(0xffffffffu), %s(%s), bool%u(%s))",
+ mask_size, op, src.str->buffer, mask_size, src.str->buffer);
+ else
+ msl_print_assignment(gen, &dst, "%s ? %s(%s) : 0xffffffffu",
+ src.str->buffer, op, src.str->buffer);
+
+ msl_src_cleanup(&src, &gen->string_buffers);
+ msl_dst_cleanup(&dst, &gen->string_buffers);
+}
+
static void msl_intrinsic(struct msl_generator *gen, const struct vkd3d_shader_instruction *ins, const char *op)
{
struct vkd3d_string_buffer *args;
@@ -1336,6 +1389,46 @@ static void msl_dcl_indexable_temp(struct msl_generator *gen, const struct vkd3d
ins->declaration.indexable_temp.register_size);
}
+static void msl_barrier(struct msl_generator *gen, const struct vkd3d_shader_instruction *ins)
+{
+ uint32_t flags = ins->flags;
+
+ if (flags & (VKD3DSSF_GLOBAL_UAV | VKD3DSSF_THREAD_GROUP_UAV))
+ {
+ const char *scope = flags & VKD3DSSF_GLOBAL_UAV ? "thread_scope_device" : "thread_scope_threadgroup";
+ const char *mem_flags = "mem_flags::mem_device | mem_flags::mem_texture";
+
+ if (flags & VKD3DSSF_GROUP_SHARED_MEMORY)
+ {
+ mem_flags = "mem_flags::mem_device | mem_flags::mem_texture | mem_flags::mem_threadgroup";
+ flags &= ~VKD3DSSF_GROUP_SHARED_MEMORY;
+ }
+
+ msl_print_indent(gen->buffer, gen->indent);
+ vkd3d_string_buffer_printf(gen->buffer,
+ "atomic_thread_fence(%s, memory_order_seq_cst, %s);\n", mem_flags, scope);
+ flags &= ~(VKD3DSSF_GLOBAL_UAV | VKD3DSSF_THREAD_GROUP_UAV);
+ }
+ else if (flags & VKD3DSSF_GROUP_SHARED_MEMORY)
+ {
+ msl_print_indent(gen->buffer, gen->indent);
+ vkd3d_string_buffer_printf(gen->buffer,
+ "atomic_thread_fence(mem_flags::mem_threadgroup, memory_order_seq_cst, thread_scope_threadgroup);\n");
+ flags &= ~VKD3DSSF_GROUP_SHARED_MEMORY;
+ }
+
+ if (flags & VKD3DSSF_THREAD_GROUP)
+ {
+ msl_print_indent(gen->buffer, gen->indent);
+ vkd3d_string_buffer_printf(gen->buffer, "threadgroup_barrier(mem_flags::mem_none);\n");
+ flags &= ~VKD3DSSF_THREAD_GROUP;
+ }
+
+ if (flags)
+ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL,
+ "Internal compiler error: Unhandled synchronisation flags %#x.", flags);
+}
+
static void msl_handle_instruction(struct msl_generator *gen, const struct vkd3d_shader_instruction *ins)
{
gen->location = ins->location;
@@ -1373,6 +1466,9 @@ static void msl_handle_instruction(struct msl_generator *gen, const struct vkd3d
case VSIR_OP_COS:
msl_intrinsic(gen, ins, "cos");
break;
+ case VSIR_OP_COUNTBITS:
+ msl_intrinsic(gen, ins, "popcount");
+ break;
case VSIR_OP_DCL_INDEXABLE_TEMP:
msl_dcl_indexable_temp(gen, ins);
break;
@@ -1425,6 +1521,10 @@ static void msl_handle_instruction(struct msl_generator *gen, const struct vkd3d
case VSIR_OP_EXP:
msl_intrinsic(gen, ins, "exp2");
break;
+ case VSIR_OP_FIRSTBIT_HI:
+ case VSIR_OP_FIRSTBIT_LO:
+ msl_firstbit(gen, ins);
+ break;
case VSIR_OP_FRC:
msl_intrinsic(gen, ins, "fract");
break;
@@ -1570,6 +1670,9 @@ static void msl_handle_instruction(struct msl_generator *gen, const struct vkd3d
case VSIR_OP_SWITCH:
msl_switch(gen, ins);
break;
+ case VSIR_OP_SYNC:
+ msl_barrier(gen, ins);
+ break;
case VSIR_OP_TAN:
msl_intrinsic(gen, ins, "tan");
break;
@@ -1857,6 +1960,59 @@ static void msl_generate_output_struct_declarations(struct msl_generator *gen)
vkd3d_string_buffer_printf(buffer, "};\n\n");
}
+static void msl_generate_immediate_constant_buffers(struct msl_generator *gen)
+{
+ const struct vkd3d_shader_immediate_constant_buffer *icb;
+ size_t i, j;
+
+ for (i = 0; i < gen->program->icb_count; ++i)
+ {
+ icb = gen->program->icbs[i];
+
+ if (data_type_is_64_bit(icb->data_type))
+ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL,
+ "Internal compiler error: Immediate constant buffer %zu has unhandled data type \"%s\" (%#x).",
+ i, vsir_data_type_get_name(icb->data_type, "<unknown>"), icb->data_type);
+
+ msl_print_indent(gen->buffer, gen->indent);
+ vkd3d_string_buffer_printf(gen->buffer, "constant %s icb%u[%u] =",
+ icb->component_count == 4 ? "uint4" : "uint", icb->register_idx, icb->element_count);
+
+ if (icb->is_null || data_type_is_64_bit(icb->data_type))
+ {
+ vkd3d_string_buffer_printf(gen->buffer, " {};\n\n");
+ continue;
+ }
+
+ vkd3d_string_buffer_printf(gen->buffer, "\n");
+ msl_print_indent(gen->buffer, gen->indent);
+ vkd3d_string_buffer_printf(gen->buffer, "{\n");
+
+ if (icb->component_count == 4)
+ {
+ for (j = 0; j < icb->element_count; ++j)
+ {
+ msl_print_indent(gen->buffer, gen->indent + 1);
+ vkd3d_string_buffer_printf(gen->buffer, "{0x%08x, 0x%08x, 0x%08x, 0x%08x},\n",
+ icb->data[4 * j + 0], icb->data[4 * j + 1], icb->data[4 * j + 2], icb->data[4 * j + 3]);
+ }
+ }
+ else
+ {
+ for (j = 0; j < icb->element_count; ++j)
+ {
+ if (!(j & 3))
+ msl_print_indent(gen->buffer, gen->indent + 1);
+ vkd3d_string_buffer_printf(gen->buffer, "0x%08x,%s", icb->data[j],
+ j == icb->element_count - 1 || (j & 3) == 3 ? "\n" : " ");
+ }
+ }
+
+ msl_print_indent(gen->buffer, gen->indent);
+ vkd3d_string_buffer_printf(gen->buffer, "};\n\n");
+ }
+}
+
static void msl_generate_entrypoint_prologue(struct msl_generator *gen)
{
const struct shader_signature *signature = &gen->program->input_signature;
@@ -2102,8 +2258,7 @@ static int msl_generator_generate(struct msl_generator *gen, struct vkd3d_shader
MESSAGE("Generating a MSL shader. This is unsupported; you get to keep all the pieces if it breaks.\n");
vkd3d_string_buffer_printf(gen->buffer, "/* Generated by %s. */\n\n", vkd3d_shader_get_version(NULL, NULL));
- vkd3d_string_buffer_printf(gen->buffer, "#include <metal_common>\n");
- vkd3d_string_buffer_printf(gen->buffer, "#include <metal_texture>\n\n");
+ vkd3d_string_buffer_printf(gen->buffer, "#include <metal_stdlib>\n");
vkd3d_string_buffer_printf(gen->buffer, "using namespace metal;\n\n");
if (gen->program->global_flags & ~(VKD3DSGF_REFACTORING_ALLOWED | VKD3DSGF_FORCE_EARLY_DEPTH_STENCIL))
@@ -2144,6 +2299,7 @@ static int msl_generator_generate(struct msl_generator *gen, struct vkd3d_shader
msl_generate_input_struct_declarations(gen);
msl_generate_output_struct_declarations(gen);
+ msl_generate_immediate_constant_buffers(gen);
vkd3d_string_buffer_printf(gen->buffer,
"static void %s_main(thread vkd3d_vec4 *v, "
diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h
index 404e7cec6ea..763a4906919 100644
--- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h
+++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h
@@ -175,6 +175,7 @@ enum vkd3d_shader_error
VKD3D_SHADER_ERROR_HLSL_MISSING_PRIMITIVE_TYPE = 5043,
VKD3D_SHADER_ERROR_HLSL_MISPLACED_STREAM_OUTPUT = 5044,
VKD3D_SHADER_ERROR_HLSL_MISSING_INPUT_PATCH = 5045,
+ VKD3D_SHADER_ERROR_HLSL_CANNOT_FLATTEN = 5046,
VKD3D_SHADER_WARNING_HLSL_IMPLICIT_TRUNCATION = 5300,
VKD3D_SHADER_WARNING_HLSL_DIVISION_BY_ZERO = 5301,
--
2.51.0