From 82311f84ad8344655c3649f05d002092fb88df6b Mon Sep 17 00:00:00 2001 From: Alistair Leslie-Hughes Date: Wed, 5 Nov 2025 10:32:36 +1100 Subject: [PATCH] Updated vkd3d to 74ebd4621f3177f45ce22512019ee968ec400f4d. --- libs/vkd3d/libs/vkd3d-shader/hlsl.c | 22 +- libs/vkd3d/libs/vkd3d-shader/hlsl.h | 17 +- libs/vkd3d/libs/vkd3d-shader/hlsl.y | 26 +- libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 370 +++++++++++++++++- libs/vkd3d/libs/vkd3d-shader/msl.c | 164 +++++++- .../libs/vkd3d-shader/vkd3d_shader_private.h | 1 + 6 files changed, 573 insertions(+), 27 deletions(-) diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.c b/libs/vkd3d/libs/vkd3d-shader/hlsl.c index 1de17e2b551..dc9886b5384 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.c +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.c @@ -1976,14 +1976,15 @@ static struct hlsl_ir_node *hlsl_new_error_expr(struct hlsl_ctx *ctx) return hlsl_new_expr(ctx, HLSL_OP0_ERROR, operands, ctx->builtin_types.error, &loc); } -struct hlsl_ir_node *hlsl_new_if(struct hlsl_ctx *ctx, struct hlsl_ir_node *condition, - struct hlsl_block *then_block, struct hlsl_block *else_block, const struct vkd3d_shader_location *loc) +struct hlsl_ir_node *hlsl_new_if(struct hlsl_ctx *ctx, struct hlsl_ir_node *condition, struct hlsl_block *then_block, + struct hlsl_block *else_block, enum hlsl_if_flatten_type flatten_type, const struct vkd3d_shader_location *loc) { struct hlsl_ir_if *iff; if (!(iff = hlsl_alloc(ctx, sizeof(*iff)))) return NULL; init_node(&iff->node, HLSL_IR_IF, NULL, loc); + iff->flatten_type = flatten_type; hlsl_src_from_node(&iff->condition, condition); hlsl_block_init(&iff->then_block); hlsl_block_add_block(&iff->then_block, then_block); @@ -1993,10 +1994,11 @@ struct hlsl_ir_node *hlsl_new_if(struct hlsl_ctx *ctx, struct hlsl_ir_node *cond return &iff->node; } -void hlsl_block_add_if(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *condition, - struct hlsl_block *then_block, struct hlsl_block *else_block, const struct vkd3d_shader_location *loc) +void hlsl_block_add_if(struct hlsl_ctx *ctx, struct hlsl_block *block, + struct hlsl_ir_node *condition, struct hlsl_block *then_block, struct hlsl_block *else_block, + enum hlsl_if_flatten_type flatten_type, const struct vkd3d_shader_location *loc) { - struct hlsl_ir_node *instr = hlsl_new_if(ctx, condition, then_block, else_block, loc); + struct hlsl_ir_node *instr = hlsl_new_if(ctx, condition, then_block, else_block, flatten_type, loc); if (instr) { @@ -2674,7 +2676,8 @@ static struct hlsl_ir_node *clone_if(struct hlsl_ctx *ctx, struct clone_instr_ma return NULL; } - if (!(dst = hlsl_new_if(ctx, map_instr(map, src->condition.node), &then_block, &else_block, &src->node.loc))) + if (!(dst = hlsl_new_if(ctx, map_instr(map, src->condition.node), + &then_block, &else_block, src->flatten_type, &src->node.loc))) { hlsl_block_cleanup(&then_block); hlsl_block_cleanup(&else_block); @@ -3849,6 +3852,13 @@ static void dump_ir_jump(struct vkd3d_string_buffer *buffer, const struct hlsl_i vkd3d_string_buffer_printf(buffer, "unresolved_continue"); break; } + + if (jump->condition.node) + { + vkd3d_string_buffer_printf(buffer, " ("); + dump_src(buffer, &jump->condition); + vkd3d_string_buffer_printf(buffer, ")"); + } } static void dump_ir_loop(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, const struct hlsl_ir_loop *loop) diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h index 75027f83e33..87cfaf83f76 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.h +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h @@ -661,12 +661,20 @@ struct hlsl_ir_call struct hlsl_ir_function_decl *decl; }; +enum hlsl_if_flatten_type +{ + HLSL_IF_FLATTEN_DEFAULT, + HLSL_IF_FORCE_FLATTEN, + HLSL_IF_FORCE_BRANCH +}; + struct hlsl_ir_if { struct hlsl_ir_node node; struct hlsl_src condition; struct hlsl_block then_block; struct hlsl_block else_block; + enum hlsl_if_flatten_type flatten_type; }; enum hlsl_loop_unroll_type @@ -1586,8 +1594,9 @@ struct hlsl_ir_node *hlsl_block_add_expr(struct hlsl_ctx *ctx, struct hlsl_block struct hlsl_type *data_type, const struct vkd3d_shader_location *loc); struct hlsl_ir_node *hlsl_block_add_float_constant(struct hlsl_ctx *ctx, struct hlsl_block *block, float f, const struct vkd3d_shader_location *loc); -void hlsl_block_add_if(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *condition, - struct hlsl_block *then_block, struct hlsl_block *else_block, const struct vkd3d_shader_location *loc); +void hlsl_block_add_if(struct hlsl_ctx *ctx, struct hlsl_block *block, + struct hlsl_ir_node *condition, struct hlsl_block *then_block, struct hlsl_block *else_block, + enum hlsl_if_flatten_type flatten_type, const struct vkd3d_shader_location *loc); struct hlsl_ir_node *hlsl_block_add_index(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *val, struct hlsl_ir_node *idx, const struct vkd3d_shader_location *loc); struct hlsl_ir_node *hlsl_block_add_int_constant(struct hlsl_ctx *ctx, struct hlsl_block *block, @@ -1704,8 +1713,8 @@ struct hlsl_ir_node *hlsl_new_copy(struct hlsl_ctx *ctx, struct hlsl_ir_node *no struct hlsl_ir_function_decl *hlsl_new_func_decl(struct hlsl_ctx *ctx, struct hlsl_type *return_type, const struct hlsl_func_parameters *parameters, const struct hlsl_semantic *semantic, const struct vkd3d_shader_location *loc); -struct hlsl_ir_node *hlsl_new_if(struct hlsl_ctx *ctx, struct hlsl_ir_node *condition, - struct hlsl_block *then_block, struct hlsl_block *else_block, const struct vkd3d_shader_location *loc); +struct hlsl_ir_node *hlsl_new_if(struct hlsl_ctx *ctx, struct hlsl_ir_node *condition, struct hlsl_block *then_block, + struct hlsl_block *else_block, enum hlsl_if_flatten_type flatten_type, const struct vkd3d_shader_location *loc); struct hlsl_type *hlsl_new_stream_output_type(struct hlsl_ctx *ctx, enum hlsl_so_object_type so_type, struct hlsl_type *type); struct hlsl_ir_node *hlsl_new_ternary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_expr_op op, diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.y b/libs/vkd3d/libs/vkd3d-shader/hlsl.y index 8ec963c8656..92bfd7040fc 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.y +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.y @@ -471,7 +471,7 @@ static void append_conditional_break(struct hlsl_ctx *ctx, struct hlsl_block *co hlsl_block_init(&then_block); hlsl_block_add_jump(ctx, &then_block, HLSL_IR_JUMP_BREAK, NULL, &condition->loc); - hlsl_block_add_if(ctx, cond_block, not, &then_block, NULL, &condition->loc); + hlsl_block_add_if(ctx, cond_block, not, &then_block, NULL, HLSL_IF_FLATTEN_DEFAULT, &condition->loc); } static void check_attribute_list_for_duplicates(struct hlsl_ctx *ctx, const struct parse_attribute_list *attrs) @@ -9139,6 +9139,7 @@ selection_statement: { struct hlsl_ir_node *condition = node_from_block($4); const struct parse_attribute_list *attributes = &$1; + enum hlsl_if_flatten_type flatten_type = HLSL_IF_FLATTEN_DEFAULT; unsigned int i; check_attribute_list_for_duplicates(ctx, attributes); @@ -9147,10 +9148,19 @@ selection_statement: { const struct hlsl_attribute *attr = attributes->attrs[i]; - if (!strcmp(attr->name, "branch") - || !strcmp(attr->name, "flatten")) + if (!strcmp(attr->name, "branch")) { - hlsl_warning(ctx, &@1, VKD3D_SHADER_WARNING_HLSL_IGNORED_ATTRIBUTE, "Unhandled attribute '%s'.", attr->name); + if (flatten_type == HLSL_IF_FORCE_FLATTEN) + hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, + "The 'branch' and 'flatten' attributes are mutually exclusive."); + flatten_type = HLSL_IF_FORCE_BRANCH; + } + else if (!strcmp(attr->name, "flatten")) + { + if (flatten_type == HLSL_IF_FORCE_BRANCH) + hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, + "The 'branch' and 'flatten' attributes are mutually exclusive."); + flatten_type = HLSL_IF_FORCE_FLATTEN; } else { @@ -9158,10 +9168,16 @@ selection_statement: } } + if (flatten_type == HLSL_IF_FORCE_BRANCH && hlsl_version_lt(ctx, 2, 1)) + { + hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_INCOMPATIBLE_PROFILE, + "The 'branch' attribute requires shader model 2.1 or higher."); + } + check_condition_type(ctx, condition); condition = add_cast(ctx, $4, condition, hlsl_get_scalar_type(ctx, HLSL_TYPE_BOOL), &@4); - hlsl_block_add_if(ctx, $4, condition, $6.then_block, $6.else_block, &@2); + hlsl_block_add_if(ctx, $4, condition, $6.then_block, $6.else_block, flatten_type, &@2); destroy_block($6.then_block); destroy_block($6.else_block); diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c index 9048214923b..6fe29e5bfed 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c @@ -1060,7 +1060,7 @@ static void insert_early_return_break(struct hlsl_ctx *ctx, hlsl_block_add_jump(ctx, &then_block, HLSL_IR_JUMP_BREAK, NULL, &cf_instr->loc); - if (!(iff = hlsl_new_if(ctx, &load->node, &then_block, NULL, &cf_instr->loc))) + if (!(iff = hlsl_new_if(ctx, &load->node, &then_block, NULL, HLSL_IF_FLATTEN_DEFAULT, &cf_instr->loc))) return; list_add_after(&load->node.entry, &iff->entry); } @@ -1245,7 +1245,7 @@ static bool lower_return(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *fun load = hlsl_block_add_simple_load(ctx, block, func->early_return_var, &cf_instr->loc); not = hlsl_block_add_unary_expr(ctx, block, HLSL_OP1_LOGIC_NOT, load, &cf_instr->loc); - hlsl_block_add_if(ctx, block, not, &then_block, NULL, &cf_instr->loc); + hlsl_block_add_if(ctx, block, not, &then_block, NULL, HLSL_IF_FLATTEN_DEFAULT, &cf_instr->loc); } return has_early_return; @@ -3916,6 +3916,320 @@ static bool remove_trivial_conditional_branches(struct hlsl_ctx *ctx, struct hls return true; } +static bool is_conditional_block_simple(const struct hlsl_block *cond_block) +{ + static const unsigned int max_cost = 10; + struct hlsl_ir_node *instr; + unsigned int cost = 0; + + LIST_FOR_EACH_ENTRY(instr, &cond_block->instrs, struct hlsl_ir_node, entry) + { + switch (instr->type) + { + case HLSL_IR_CONSTANT: + case HLSL_IR_STRING_CONSTANT: + case HLSL_IR_SWIZZLE: + break; + + case HLSL_IR_EXPR: + ++cost; + break; + + case HLSL_IR_JUMP: + { + struct hlsl_ir_jump *jump = hlsl_ir_jump(instr); + + if (jump->type != HLSL_IR_JUMP_DISCARD_NZ && jump->type != HLSL_IR_JUMP_DISCARD_NEG) + return false; + ++cost; + break; + } + + case HLSL_IR_STORE: + if (hlsl_ir_store(instr)->lhs.var->is_tgsm) + return false; + ++cost; + break; + + case HLSL_IR_LOAD: + if (hlsl_ir_load(instr)->src.var->is_tgsm) + return false; + break; + + default: + return false; + } + + if (cost > max_cost) + return false; + } + + return true; +} + +static bool can_flatten_conditional_block(struct hlsl_ctx *ctx, const struct hlsl_block *cond_block) +{ + struct hlsl_ir_node *instr; + + LIST_FOR_EACH_ENTRY(instr, &cond_block->instrs, struct hlsl_ir_node, entry) + { + switch (instr->type) + { + case HLSL_IR_CALL: + case HLSL_IR_RESOURCE_STORE: + case HLSL_IR_INTERLOCKED: + case HLSL_IR_SYNC: + goto fail; + + case HLSL_IR_JUMP: + { + struct hlsl_ir_jump *jump = hlsl_ir_jump(instr); + + if (jump->type != HLSL_IR_JUMP_DISCARD_NZ && jump->type != HLSL_IR_JUMP_DISCARD_NEG) + { + hlsl_fixme(ctx, &instr->loc, "Flattening conditional blocks with non-discard jump instructions."); + return false; + } + break; + } + + case HLSL_IR_STORE: + if (hlsl_ir_store(instr)->lhs.var->is_tgsm) + goto fail; + break; + + case HLSL_IR_IF: + { + struct hlsl_ir_if *iff = hlsl_ir_if(instr); + + if (!can_flatten_conditional_block(ctx, &iff->then_block) + || !can_flatten_conditional_block(ctx, &iff->else_block)) + return false; + break; + } + + case HLSL_IR_LOOP: + { + struct hlsl_ir_loop *loop = hlsl_ir_loop(instr); + + if (!can_flatten_conditional_block(ctx, &loop->iter) + || !can_flatten_conditional_block(ctx, &loop->body)) + return false; + break; + } + + case HLSL_IR_SWITCH: + { + struct hlsl_ir_switch *s = hlsl_ir_switch(instr); + struct hlsl_ir_switch_case *c; + + LIST_FOR_EACH_ENTRY(c, &s->cases, struct hlsl_ir_switch_case, entry) + { + if (!can_flatten_conditional_block(ctx, &c->body)) + return false; + } + break; + } + + case HLSL_IR_CONSTANT: + case HLSL_IR_EXPR: + case HLSL_IR_INDEX: + case HLSL_IR_LOAD: + case HLSL_IR_RESOURCE_LOAD: + case HLSL_IR_STRING_CONSTANT: + case HLSL_IR_SWIZZLE: + case HLSL_IR_COMPILE: + case HLSL_IR_SAMPLER_STATE: + case HLSL_IR_STATEBLOCK_CONSTANT: + break; + } + } + + return true; + +fail: + hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_CANNOT_FLATTEN, + "Conditional branches with side effects cannot be flattened."); + return false; +} + +static bool lower_conditional_block_stores(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, + struct hlsl_ir_node *cond, bool is_then) +{ + struct hlsl_ir_node *load, *new_val; + struct hlsl_ir_store *store; + struct hlsl_type *rhs_type; + struct hlsl_block block; + + if (instr->type != HLSL_IR_STORE) + return false; + store = hlsl_ir_store(instr); + rhs_type = store->rhs.node->data_type; + + VKD3D_ASSERT(rhs_type->class <= HLSL_CLASS_VECTOR); + VKD3D_ASSERT(cond->data_type->e.numeric.dimx == 1); + + hlsl_block_init(&block); + + load = hlsl_block_add_load_index(ctx, &block, &store->lhs, NULL, &store->node.loc); + + if (store->writemask && !hlsl_types_are_equal(rhs_type, load->data_type)) + load = hlsl_block_add_swizzle(ctx, &block, hlsl_swizzle_from_writemask(store->writemask), + rhs_type->e.numeric.dimx, load, &store->node.loc); + + if (rhs_type->e.numeric.dimx != 1) + cond = hlsl_block_add_swizzle(ctx, &block, HLSL_SWIZZLE(X, X, X, X), + rhs_type->e.numeric.dimx, cond, &store->node.loc); + + if (is_then) + new_val = hlsl_add_conditional(ctx, &block, cond, store->rhs.node, load); + else + new_val = hlsl_add_conditional(ctx, &block, cond, load, store->rhs.node); + + list_move_before(&store->node.entry, &block.instrs); + hlsl_src_remove(&store->rhs); + hlsl_src_from_node(&store->rhs, new_val); + return true; +} + +static bool lower_conditional_block_discard_nz(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, + struct hlsl_ir_node *cond, bool is_then) +{ + struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {0}; + struct hlsl_ir_node *discard_cond, *new_cond = NULL; + struct hlsl_ir_jump *jump; + struct hlsl_block block; + + if (instr->type != HLSL_IR_JUMP) + return false; + jump = hlsl_ir_jump(instr); + discard_cond = jump->condition.node; + + if (jump->type != HLSL_IR_JUMP_DISCARD_NZ) + return false; + + VKD3D_ASSERT(ctx->profile->major_version >= 4); + VKD3D_ASSERT(cond->data_type->e.numeric.type == HLSL_TYPE_BOOL && cond->data_type->e.numeric.dimx == 1); + VKD3D_ASSERT(discard_cond->data_type->e.numeric.dimx == 1); + + hlsl_block_init(&block); + + if (!is_then) + cond = hlsl_block_add_unary_expr(ctx, &block, HLSL_OP1_LOGIC_NOT, cond, &instr->loc); + discard_cond = hlsl_block_add_cast(ctx, &block, discard_cond, cond->data_type, &instr->loc); + + operands[0] = cond; + operands[1] = discard_cond; + + /* discard_nz (cond && discard_cond) */ + new_cond = hlsl_block_add_expr(ctx, &block, HLSL_OP2_LOGIC_AND, operands, + hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), &jump->node.loc); + + list_move_before(&jump->node.entry, &block.instrs); + hlsl_src_remove(&jump->condition); + hlsl_src_from_node(&jump->condition, new_cond); + return true; +} + +static bool lower_conditional_block_discard_neg(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, + struct hlsl_ir_node *cond, bool is_then) +{ + struct hlsl_ir_node *discard_cond, *new_cond = NULL; + struct hlsl_constant_value zero_value = {0}; + struct hlsl_ir_node *zero; + struct hlsl_ir_jump *jump; + struct hlsl_block block; + + if (instr->type != HLSL_IR_JUMP) + return false; + jump = hlsl_ir_jump(instr); + discard_cond = jump->condition.node; + + if (jump->type != HLSL_IR_JUMP_DISCARD_NEG) + return false; + + VKD3D_ASSERT(ctx->profile->major_version < 4); + VKD3D_ASSERT(cond->data_type->e.numeric.type == HLSL_TYPE_BOOL && cond->data_type->e.numeric.dimx == 1); + + hlsl_block_init(&block); + + if (!(zero = hlsl_new_constant(ctx, discard_cond->data_type, &zero_value, &instr->loc))) + return false; + hlsl_block_add_instr(&block, zero); + + if (zero->data_type->e.numeric.dimx != 1) + cond = hlsl_block_add_swizzle(ctx, &block, HLSL_SWIZZLE(X, X, X, X), + zero->data_type->e.numeric.dimx, cond, &instr->loc); + + if (is_then) + new_cond = hlsl_add_conditional(ctx, &block, cond, discard_cond, zero); + else + new_cond = hlsl_add_conditional(ctx, &block, cond, zero, discard_cond); + + list_move_before(&jump->node.entry, &block.instrs); + hlsl_src_remove(&jump->condition); + hlsl_src_from_node(&jump->condition, new_cond); + return true; +} + +struct flatten_conditional_block_ctx +{ + struct hlsl_ir_node *cond; + bool is_then; +}; + +static bool lower_conditional_block_instrs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +{ + struct flatten_conditional_block_ctx *flatten_ctx = context; + + return lower_conditional_block_stores(ctx, instr, flatten_ctx->cond, flatten_ctx->is_then) + || lower_conditional_block_discard_nz(ctx, instr, flatten_ctx->cond, flatten_ctx->is_then) + || lower_conditional_block_discard_neg(ctx, instr, flatten_ctx->cond, flatten_ctx->is_then); +} + +static bool flatten_conditional_branches(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +{ + struct flatten_conditional_block_ctx flatten_ctx; + struct hlsl_ir_if *iff; + bool force_flatten; + + if (instr->type != HLSL_IR_IF) + return false; + iff = hlsl_ir_if(instr); + + if (iff->flatten_type == HLSL_IF_FORCE_BRANCH) + return false; + + force_flatten = iff->flatten_type == HLSL_IF_FORCE_FLATTEN + || hlsl_version_lt(ctx, 2, 1); /* Always flatten branches for SM < 2.1. */ + + if (force_flatten) + { + if (!can_flatten_conditional_block(ctx, &iff->then_block) + || !can_flatten_conditional_block(ctx, &iff->else_block)) + return false; + } + else if (!is_conditional_block_simple(&iff->then_block) || !is_conditional_block_simple(&iff->else_block)) + { + /* Only flatten simple blocks by default. */ + return false; + } + + flatten_ctx.cond = iff->condition.node; + + flatten_ctx.is_then = true; + hlsl_transform_ir(ctx, lower_conditional_block_instrs, &iff->then_block, &flatten_ctx); + + flatten_ctx.is_then = false; + hlsl_transform_ir(ctx, lower_conditional_block_instrs, &iff->else_block, &flatten_ctx); + + list_move_before(&instr->entry, &iff->then_block.instrs); + list_move_before(&instr->entry, &iff->else_block.instrs); + list_remove(&instr->entry); + hlsl_free_instr(instr); + return true; +} + static bool normalize_switch_cases(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) { struct hlsl_ir_switch_case *c, *def = NULL; @@ -5462,6 +5776,35 @@ static bool lower_discard_nz(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, v return true; } +static bool cast_discard_neg_conditions_to_vec4(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +{ + struct hlsl_ir_node *swizzle; + struct hlsl_ir_jump *jump; + struct hlsl_block block; + unsigned int dimx; + + if (instr->type != HLSL_IR_JUMP) + return false; + jump = hlsl_ir_jump(instr); + if (jump->type != HLSL_IR_JUMP_DISCARD_NEG) + return false; + + dimx = jump->condition.node->data_type->e.numeric.dimx; + if (dimx == 4) + return false; + + hlsl_block_init(&block); + + swizzle = hlsl_block_add_swizzle(ctx, &block, hlsl_swizzle_from_writemask((1 << dimx) - 1), 4, + jump->condition.node, &instr->loc); + + list_move_before(&instr->entry, &block.instrs); + hlsl_src_remove(&jump->condition); + hlsl_src_from_node(&jump->condition, swizzle); + + return true; +} + static bool dce(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) { switch (instr->type) @@ -8465,6 +8808,7 @@ static void hlsl_run_folding_passes(struct hlsl_ctx *ctx, struct hlsl_block *bod progress |= replace_ir(ctx, fold_swizzle_chains, body); progress |= replace_ir(ctx, fold_trivial_swizzles, body); progress |= hlsl_transform_ir(ctx, remove_trivial_conditional_branches, body, NULL); + progress |= hlsl_transform_ir(ctx, flatten_conditional_branches, body, NULL); } while (progress); replace_ir(ctx, fold_redundant_casts, body); } @@ -9996,6 +10340,8 @@ static void sm1_generate_vsir_instr_jump(struct hlsl_ctx *ctx, if (jump->type == HLSL_IR_JUMP_DISCARD_NEG) { + VKD3D_ASSERT(condition->data_type->e.numeric.dimx == 4); + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VSIR_OP_TEXKILL, 0, 1))) return; @@ -10016,11 +10362,9 @@ static void sm1_generate_vsir_instr_if(struct hlsl_ctx *ctx, struct vsir_program struct hlsl_ir_node *instr = &iff->node; struct vkd3d_shader_instruction *ins; - if (hlsl_version_lt(ctx, 2, 1)) - { - hlsl_fixme(ctx, &instr->loc, "Flatten \"if\" conditionals branches."); - return; - } + /* Conditional branches should have already been flattened for SM < 2.1. */ + VKD3D_ASSERT(hlsl_version_ge(ctx, 2, 1)); + VKD3D_ASSERT(condition->data_type->e.numeric.dimx == 1 && condition->data_type->e.numeric.dimy == 1); if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VSIR_OP_IFC, 0, 2))) @@ -13835,7 +14179,7 @@ static struct hlsl_ir_if *loop_unrolling_generate_var_check(struct hlsl_ctx *ctx load = hlsl_block_add_simple_load(ctx, dst, var, loc); cond = hlsl_block_add_unary_expr(ctx, dst, HLSL_OP1_LOGIC_NOT, load, loc); - if (!(iff = hlsl_new_if(ctx, cond, &then_block, NULL, loc))) + if (!(iff = hlsl_new_if(ctx, cond, &then_block, NULL, HLSL_IF_FLATTEN_DEFAULT, loc))) return NULL; hlsl_block_add_instr(dst, iff); @@ -14815,6 +15159,8 @@ static void process_entry_function(struct hlsl_ctx *ctx, struct list *semantic_v { while (replace_ir(ctx, lower_nonconstant_array_loads, body)); + hlsl_transform_ir(ctx, cast_discard_neg_conditions_to_vec4, body, NULL); + replace_ir(ctx, lower_ternary, body); replace_ir(ctx, lower_int_modulus_sm1, body); replace_ir(ctx, lower_division, body); @@ -14847,6 +15193,14 @@ static void process_entry_function(struct hlsl_ctx *ctx, struct list *semantic_v hlsl_run_folding_passes(ctx, body); + if (profile->major_version < 4) + { + /* Ternary operations can be potentially introduced by hlsl_run_folding_passes(). */ + replace_ir(ctx, lower_ternary, body); + if (ctx->profile->type != VKD3D_SHADER_TYPE_PIXEL) + replace_ir(ctx, lower_cmp, body); + } + do compute_liveness(ctx, body); while (hlsl_transform_ir(ctx, dce, body, NULL)); diff --git a/libs/vkd3d/libs/vkd3d-shader/msl.c b/libs/vkd3d/libs/vkd3d-shader/msl.c index c974c9e532b..d34133d6d4c 100644 --- a/libs/vkd3d/libs/vkd3d-shader/msl.c +++ b/libs/vkd3d/libs/vkd3d-shader/msl.c @@ -226,14 +226,22 @@ static bool msl_get_binding(const struct msl_generator *gen, const struct vkd3d_ continue; if (binding->register_space != descriptor->register_space) continue; - if (binding->register_index != register_idx) + if (binding->register_index > descriptor->register_index) + continue; + if (descriptor->count != ~0u && binding->binding.count < descriptor->count) + continue; + if (descriptor->count != ~0u + && binding->binding.count - descriptor->count < descriptor->register_index - binding->register_index) + continue; + if (descriptor->count == ~0u + && binding->binding.count <= descriptor->register_index - binding->register_index) continue; if (!msl_check_shader_visibility(gen, binding->shader_visibility)) continue; if ((binding->flags & flags) != flags) continue; - *idx = binding->binding.binding; + *idx = register_idx + binding->binding.binding - binding->register_index; return true; } @@ -416,6 +424,11 @@ static enum msl_data_type msl_print_register_name(struct vkd3d_string_buffer *bu msl_print_subscript(buffer, gen, reg->idx[2].rel_addr, reg->idx[2].offset); return MSL_DATA_UNION; + case VKD3DSPR_IMMCONSTBUFFER: + vkd3d_string_buffer_printf(buffer, "icb%u", reg->idx[0].offset); + msl_print_subscript(buffer, gen, reg->idx[1].rel_addr, reg->idx[1].offset); + return MSL_DATA_UINT; + case VKD3DSPR_IDXTEMP: vkd3d_string_buffer_printf(buffer, "x%u", reg->idx[0].offset); msl_print_subscript(buffer, gen, reg->idx[1].rel_addr, reg->idx[1].offset); @@ -445,6 +458,24 @@ static enum msl_data_type msl_print_register_name(struct vkd3d_string_buffer *bu vkd3d_string_buffer_printf(buffer, "v_local_thread_index"); return MSL_DATA_UNION; + case VKD3DSPR_UNDEF: + switch (reg->dimension) + { + case VSIR_DIMENSION_SCALAR: + vkd3d_string_buffer_printf(buffer, "0u"); + return MSL_DATA_UINT; + + case VSIR_DIMENSION_VEC4: + vkd3d_string_buffer_printf(buffer, "uint4(0u, 0u, 0u, 0u)"); + return MSL_DATA_UINT; + + default: + vkd3d_string_buffer_printf(buffer, "", reg->dimension); + msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, + "Internal compiler error: Unhandled dimension %#x.", reg->dimension); + return MSL_DATA_UINT; + } + default: msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, "Internal compiler error: Unhandled register type %#x.", reg->type); @@ -667,6 +698,28 @@ static void msl_dot(struct msl_generator *gen, const struct vkd3d_shader_instruc msl_dst_cleanup(&dst, &gen->string_buffers); } +static void msl_firstbit(struct msl_generator *gen, const struct vkd3d_shader_instruction *ins) +{ + const char *op = ins->opcode == VSIR_OP_FIRSTBIT_LO ? "ctz" : "clz"; + unsigned int mask_size; + struct msl_src src; + struct msl_dst dst; + uint32_t mask; + + mask = msl_dst_init(&dst, gen, ins, &ins->dst[0]); + msl_src_init(&src, gen, &ins->src[0], mask); + + if ((mask_size = vsir_write_mask_component_count(mask)) > 1) + msl_print_assignment(gen, &dst, "select(uint%u(0xffffffffu), %s(%s), bool%u(%s))", + mask_size, op, src.str->buffer, mask_size, src.str->buffer); + else + msl_print_assignment(gen, &dst, "%s ? %s(%s) : 0xffffffffu", + src.str->buffer, op, src.str->buffer); + + msl_src_cleanup(&src, &gen->string_buffers); + msl_dst_cleanup(&dst, &gen->string_buffers); +} + static void msl_intrinsic(struct msl_generator *gen, const struct vkd3d_shader_instruction *ins, const char *op) { struct vkd3d_string_buffer *args; @@ -1336,6 +1389,46 @@ static void msl_dcl_indexable_temp(struct msl_generator *gen, const struct vkd3d ins->declaration.indexable_temp.register_size); } +static void msl_barrier(struct msl_generator *gen, const struct vkd3d_shader_instruction *ins) +{ + uint32_t flags = ins->flags; + + if (flags & (VKD3DSSF_GLOBAL_UAV | VKD3DSSF_THREAD_GROUP_UAV)) + { + const char *scope = flags & VKD3DSSF_GLOBAL_UAV ? "thread_scope_device" : "thread_scope_threadgroup"; + const char *mem_flags = "mem_flags::mem_device | mem_flags::mem_texture"; + + if (flags & VKD3DSSF_GROUP_SHARED_MEMORY) + { + mem_flags = "mem_flags::mem_device | mem_flags::mem_texture | mem_flags::mem_threadgroup"; + flags &= ~VKD3DSSF_GROUP_SHARED_MEMORY; + } + + msl_print_indent(gen->buffer, gen->indent); + vkd3d_string_buffer_printf(gen->buffer, + "atomic_thread_fence(%s, memory_order_seq_cst, %s);\n", mem_flags, scope); + flags &= ~(VKD3DSSF_GLOBAL_UAV | VKD3DSSF_THREAD_GROUP_UAV); + } + else if (flags & VKD3DSSF_GROUP_SHARED_MEMORY) + { + msl_print_indent(gen->buffer, gen->indent); + vkd3d_string_buffer_printf(gen->buffer, + "atomic_thread_fence(mem_flags::mem_threadgroup, memory_order_seq_cst, thread_scope_threadgroup);\n"); + flags &= ~VKD3DSSF_GROUP_SHARED_MEMORY; + } + + if (flags & VKD3DSSF_THREAD_GROUP) + { + msl_print_indent(gen->buffer, gen->indent); + vkd3d_string_buffer_printf(gen->buffer, "threadgroup_barrier(mem_flags::mem_none);\n"); + flags &= ~VKD3DSSF_THREAD_GROUP; + } + + if (flags) + msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, + "Internal compiler error: Unhandled synchronisation flags %#x.", flags); +} + static void msl_handle_instruction(struct msl_generator *gen, const struct vkd3d_shader_instruction *ins) { gen->location = ins->location; @@ -1373,6 +1466,9 @@ static void msl_handle_instruction(struct msl_generator *gen, const struct vkd3d case VSIR_OP_COS: msl_intrinsic(gen, ins, "cos"); break; + case VSIR_OP_COUNTBITS: + msl_intrinsic(gen, ins, "popcount"); + break; case VSIR_OP_DCL_INDEXABLE_TEMP: msl_dcl_indexable_temp(gen, ins); break; @@ -1425,6 +1521,10 @@ static void msl_handle_instruction(struct msl_generator *gen, const struct vkd3d case VSIR_OP_EXP: msl_intrinsic(gen, ins, "exp2"); break; + case VSIR_OP_FIRSTBIT_HI: + case VSIR_OP_FIRSTBIT_LO: + msl_firstbit(gen, ins); + break; case VSIR_OP_FRC: msl_intrinsic(gen, ins, "fract"); break; @@ -1570,6 +1670,9 @@ static void msl_handle_instruction(struct msl_generator *gen, const struct vkd3d case VSIR_OP_SWITCH: msl_switch(gen, ins); break; + case VSIR_OP_SYNC: + msl_barrier(gen, ins); + break; case VSIR_OP_TAN: msl_intrinsic(gen, ins, "tan"); break; @@ -1857,6 +1960,59 @@ static void msl_generate_output_struct_declarations(struct msl_generator *gen) vkd3d_string_buffer_printf(buffer, "};\n\n"); } +static void msl_generate_immediate_constant_buffers(struct msl_generator *gen) +{ + const struct vkd3d_shader_immediate_constant_buffer *icb; + size_t i, j; + + for (i = 0; i < gen->program->icb_count; ++i) + { + icb = gen->program->icbs[i]; + + if (data_type_is_64_bit(icb->data_type)) + msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, + "Internal compiler error: Immediate constant buffer %zu has unhandled data type \"%s\" (%#x).", + i, vsir_data_type_get_name(icb->data_type, ""), icb->data_type); + + msl_print_indent(gen->buffer, gen->indent); + vkd3d_string_buffer_printf(gen->buffer, "constant %s icb%u[%u] =", + icb->component_count == 4 ? "uint4" : "uint", icb->register_idx, icb->element_count); + + if (icb->is_null || data_type_is_64_bit(icb->data_type)) + { + vkd3d_string_buffer_printf(gen->buffer, " {};\n\n"); + continue; + } + + vkd3d_string_buffer_printf(gen->buffer, "\n"); + msl_print_indent(gen->buffer, gen->indent); + vkd3d_string_buffer_printf(gen->buffer, "{\n"); + + if (icb->component_count == 4) + { + for (j = 0; j < icb->element_count; ++j) + { + msl_print_indent(gen->buffer, gen->indent + 1); + vkd3d_string_buffer_printf(gen->buffer, "{0x%08x, 0x%08x, 0x%08x, 0x%08x},\n", + icb->data[4 * j + 0], icb->data[4 * j + 1], icb->data[4 * j + 2], icb->data[4 * j + 3]); + } + } + else + { + for (j = 0; j < icb->element_count; ++j) + { + if (!(j & 3)) + msl_print_indent(gen->buffer, gen->indent + 1); + vkd3d_string_buffer_printf(gen->buffer, "0x%08x,%s", icb->data[j], + j == icb->element_count - 1 || (j & 3) == 3 ? "\n" : " "); + } + } + + msl_print_indent(gen->buffer, gen->indent); + vkd3d_string_buffer_printf(gen->buffer, "};\n\n"); + } +} + static void msl_generate_entrypoint_prologue(struct msl_generator *gen) { const struct shader_signature *signature = &gen->program->input_signature; @@ -2102,8 +2258,7 @@ static int msl_generator_generate(struct msl_generator *gen, struct vkd3d_shader MESSAGE("Generating a MSL shader. This is unsupported; you get to keep all the pieces if it breaks.\n"); vkd3d_string_buffer_printf(gen->buffer, "/* Generated by %s. */\n\n", vkd3d_shader_get_version(NULL, NULL)); - vkd3d_string_buffer_printf(gen->buffer, "#include \n"); - vkd3d_string_buffer_printf(gen->buffer, "#include \n\n"); + vkd3d_string_buffer_printf(gen->buffer, "#include \n"); vkd3d_string_buffer_printf(gen->buffer, "using namespace metal;\n\n"); if (gen->program->global_flags & ~(VKD3DSGF_REFACTORING_ALLOWED | VKD3DSGF_FORCE_EARLY_DEPTH_STENCIL)) @@ -2144,6 +2299,7 @@ static int msl_generator_generate(struct msl_generator *gen, struct vkd3d_shader msl_generate_input_struct_declarations(gen); msl_generate_output_struct_declarations(gen); + msl_generate_immediate_constant_buffers(gen); vkd3d_string_buffer_printf(gen->buffer, "static void %s_main(thread vkd3d_vec4 *v, " diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h index 404e7cec6ea..763a4906919 100644 --- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h +++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h @@ -175,6 +175,7 @@ enum vkd3d_shader_error VKD3D_SHADER_ERROR_HLSL_MISSING_PRIMITIVE_TYPE = 5043, VKD3D_SHADER_ERROR_HLSL_MISPLACED_STREAM_OUTPUT = 5044, VKD3D_SHADER_ERROR_HLSL_MISSING_INPUT_PATCH = 5045, + VKD3D_SHADER_ERROR_HLSL_CANNOT_FLATTEN = 5046, VKD3D_SHADER_WARNING_HLSL_IMPLICIT_TRUNCATION = 5300, VKD3D_SHADER_WARNING_HLSL_DIVISION_BY_ZERO = 5301, -- 2.51.0