From 351d58a95b32b9b3896d8ca7c446e574508c7684 Mon Sep 17 00:00:00 2001 From: Victor Chiletto Date: Wed, 16 Oct 2024 17:19:36 -0300 Subject: [PATCH] vkd3d-shader/hlsl: Partially defer continue resolution. We need 'for' iter blocks to be separate for loop unrolling. --- libs/vkd3d-shader/hlsl.c | 20 +++++-- libs/vkd3d-shader/hlsl.h | 14 ++++- libs/vkd3d-shader/hlsl.y | 43 +++++--------- libs/vkd3d-shader/hlsl_codegen.c | 98 ++++++++++++++++++++++++++++++++ 4 files changed, 139 insertions(+), 36 deletions(-) diff --git a/libs/vkd3d-shader/hlsl.c b/libs/vkd3d-shader/hlsl.c index e7518404..3a84afed 100644 --- a/libs/vkd3d-shader/hlsl.c +++ b/libs/vkd3d-shader/hlsl.c @@ -2064,7 +2064,7 @@ struct hlsl_ir_node *hlsl_new_jump(struct hlsl_ctx *ctx, enum hlsl_ir_jump_type return &jump->node; } -struct hlsl_ir_node *hlsl_new_loop(struct hlsl_ctx *ctx, +struct hlsl_ir_node *hlsl_new_loop(struct hlsl_ctx *ctx, struct hlsl_block *iter, struct hlsl_block *block, enum hlsl_ir_loop_unroll_type unroll_type, unsigned int unroll_limit, const struct vkd3d_shader_location *loc) { @@ -2076,6 +2076,10 @@ struct hlsl_ir_node *hlsl_new_loop(struct hlsl_ctx *ctx, hlsl_block_init(&loop->body); hlsl_block_add_block(&loop->body, block); + hlsl_block_init(&loop->iter); + if (iter) + hlsl_block_add_block(&loop->iter, iter); + loop->unroll_type = unroll_type; loop->unroll_limit = unroll_limit; return &loop->node; @@ -2231,14 +2235,21 @@ static struct hlsl_ir_node *clone_load(struct hlsl_ctx *ctx, struct clone_instr_ static struct hlsl_ir_node *clone_loop(struct hlsl_ctx *ctx, struct clone_instr_map *map, struct hlsl_ir_loop *src) { + struct hlsl_block iter, body; struct hlsl_ir_node *dst; - struct hlsl_block body; - if (!clone_block(ctx, &body, &src->body, map)) + if (!clone_block(ctx, &iter, &src->iter, map)) return NULL; - if (!(dst = hlsl_new_loop(ctx, &body, src->unroll_type, src->unroll_limit, &src->node.loc))) + if (!clone_block(ctx, &body, &src->body, map)) { + hlsl_block_cleanup(&iter); + return NULL; + } + + if (!(dst = hlsl_new_loop(ctx, &iter, &body, src->unroll_type, src->unroll_limit, &src->node.loc))) + { + hlsl_block_cleanup(&iter); hlsl_block_cleanup(&body); return NULL; } @@ -3713,6 +3724,7 @@ static void free_ir_load(struct hlsl_ir_load *load) static void free_ir_loop(struct hlsl_ir_loop *loop) { hlsl_block_cleanup(&loop->body); + hlsl_block_cleanup(&loop->iter); vkd3d_free(loop); } diff --git a/libs/vkd3d-shader/hlsl.h b/libs/vkd3d-shader/hlsl.h index 0a20acd9..d226273e 100644 --- a/libs/vkd3d-shader/hlsl.h +++ b/libs/vkd3d-shader/hlsl.h @@ -666,11 +666,20 @@ enum hlsl_ir_loop_unroll_type HLSL_IR_LOOP_FORCE_LOOP }; +enum hlsl_loop_type +{ + HLSL_LOOP_FOR, + HLSL_LOOP_WHILE, + HLSL_LOOP_DO_WHILE +}; + struct hlsl_ir_loop { struct hlsl_ir_node node; + struct hlsl_block iter; /* loop condition is stored in the body (as "if (!condition) break;") */ struct hlsl_block body; + enum hlsl_loop_type type; unsigned int next_index; /* liveness index of the end of the loop */ unsigned int unroll_limit; enum hlsl_ir_loop_unroll_type unroll_type; @@ -1550,8 +1559,9 @@ struct hlsl_ir_node *hlsl_new_compile(struct hlsl_ctx *ctx, enum hlsl_compile_ty struct hlsl_block *args_instrs, const struct vkd3d_shader_location *loc); struct hlsl_ir_node *hlsl_new_index(struct hlsl_ctx *ctx, struct hlsl_ir_node *val, struct hlsl_ir_node *idx, const struct vkd3d_shader_location *loc); -struct hlsl_ir_node *hlsl_new_loop(struct hlsl_ctx *ctx, - struct hlsl_block *block, enum hlsl_ir_loop_unroll_type unroll_type, unsigned int unroll_limit, const struct vkd3d_shader_location *loc); +struct hlsl_ir_node *hlsl_new_loop(struct hlsl_ctx *ctx, struct hlsl_block *iter, + struct hlsl_block *block, enum hlsl_ir_loop_unroll_type unroll_type, + unsigned int unroll_limit, const struct vkd3d_shader_location *loc); struct hlsl_ir_node *hlsl_new_resource_load(struct hlsl_ctx *ctx, const struct hlsl_resource_load_params *params, const struct vkd3d_shader_location *loc); struct hlsl_ir_node *hlsl_new_resource_store(struct hlsl_ctx *ctx, const struct hlsl_deref *resource, diff --git a/libs/vkd3d-shader/hlsl.y b/libs/vkd3d-shader/hlsl.y index afa41f4b..925f01ef 100644 --- a/libs/vkd3d-shader/hlsl.y +++ b/libs/vkd3d-shader/hlsl.y @@ -555,13 +555,6 @@ static bool append_conditional_break(struct hlsl_ctx *ctx, struct hlsl_block *co return true; } -enum loop_type -{ - LOOP_FOR, - LOOP_WHILE, - LOOP_DO_WHILE -}; - static void check_attribute_list_for_duplicates(struct hlsl_ctx *ctx, const struct parse_attribute_list *attrs) { unsigned int i, j; @@ -577,8 +570,8 @@ static void check_attribute_list_for_duplicates(struct hlsl_ctx *ctx, const stru } } -static void resolve_loop_continue(struct hlsl_ctx *ctx, struct hlsl_block *block, enum loop_type type, - struct hlsl_block *cond, struct hlsl_block *iter) +static void resolve_loop_continue(struct hlsl_ctx *ctx, struct hlsl_block *block, + enum hlsl_loop_type type, struct hlsl_block *cond) { struct hlsl_ir_node *instr, *next; @@ -588,8 +581,8 @@ static void resolve_loop_continue(struct hlsl_ctx *ctx, struct hlsl_block *block { struct hlsl_ir_if *iff = hlsl_ir_if(instr); - resolve_loop_continue(ctx, &iff->then_block, type, cond, iter); - resolve_loop_continue(ctx, &iff->else_block, type, cond, iter); + resolve_loop_continue(ctx, &iff->then_block, type, cond); + resolve_loop_continue(ctx, &iff->else_block, type, cond); } else if (instr->type == HLSL_IR_JUMP) { @@ -599,7 +592,7 @@ static void resolve_loop_continue(struct hlsl_ctx *ctx, struct hlsl_block *block if (jump->type != HLSL_IR_JUMP_UNRESOLVED_CONTINUE) continue; - if (type == LOOP_DO_WHILE) + if (type == HLSL_LOOP_DO_WHILE) { if (!hlsl_clone_block(ctx, &cond_block, cond)) return; @@ -610,13 +603,6 @@ static void resolve_loop_continue(struct hlsl_ctx *ctx, struct hlsl_block *block } list_move_before(&instr->entry, &cond_block.instrs); } - else if (type == LOOP_FOR) - { - if (!hlsl_clone_block(ctx, &cond_block, iter)) - return; - list_move_before(&instr->entry, &cond_block.instrs); - } - jump->type = HLSL_IR_JUMP_CONTINUE; } } } @@ -740,7 +726,7 @@ static unsigned int evaluate_static_expression_as_uint(struct hlsl_ctx *ctx, str return res.number.u; } -static struct hlsl_block *create_loop(struct hlsl_ctx *ctx, enum loop_type type, +static struct hlsl_block *create_loop(struct hlsl_ctx *ctx, enum hlsl_loop_type type, const struct parse_attribute_list *attributes, struct hlsl_block *init, struct hlsl_block *cond, struct hlsl_block *iter, struct hlsl_block *body, const struct vkd3d_shader_location *loc) { @@ -792,7 +778,7 @@ static struct hlsl_block *create_loop(struct hlsl_ctx *ctx, enum loop_type type, } } - resolve_loop_continue(ctx, body, type, cond, iter); + resolve_loop_continue(ctx, body, type, cond); if (!init && !(init = make_empty_block(ctx))) goto oom; @@ -800,15 +786,12 @@ static struct hlsl_block *create_loop(struct hlsl_ctx *ctx, enum loop_type type, if (!append_conditional_break(ctx, cond)) goto oom; - if (iter) - hlsl_block_add_block(body, iter); - - if (type == LOOP_DO_WHILE) + if (type == HLSL_LOOP_DO_WHILE) list_move_tail(&body->instrs, &cond->instrs); else list_move_head(&body->instrs, &cond->instrs); - if (!(loop = hlsl_new_loop(ctx, body, unroll_type, unroll_limit, loc))) + if (!(loop = hlsl_new_loop(ctx, iter, body, unroll_type, unroll_limit, loc))) goto oom; hlsl_block_add_instr(init, loop); @@ -8831,25 +8814,25 @@ if_body: loop_statement: attribute_list_optional loop_scope_start KW_WHILE '(' expr ')' statement { - $$ = create_loop(ctx, LOOP_WHILE, &$1, NULL, $5, NULL, $7, &@3); + $$ = create_loop(ctx, HLSL_LOOP_WHILE, &$1, NULL, $5, NULL, $7, &@3); hlsl_pop_scope(ctx); cleanup_parse_attribute_list(&$1); } | attribute_list_optional loop_scope_start KW_DO statement KW_WHILE '(' expr ')' ';' { - $$ = create_loop(ctx, LOOP_DO_WHILE, &$1, NULL, $7, NULL, $4, &@3); + $$ = create_loop(ctx, HLSL_LOOP_DO_WHILE, &$1, NULL, $7, NULL, $4, &@3); hlsl_pop_scope(ctx); cleanup_parse_attribute_list(&$1); } | attribute_list_optional loop_scope_start KW_FOR '(' expr_statement expr_statement expr_optional ')' statement { - $$ = create_loop(ctx, LOOP_FOR, &$1, $5, $6, $7, $9, &@3); + $$ = create_loop(ctx, HLSL_LOOP_FOR, &$1, $5, $6, $7, $9, &@3); hlsl_pop_scope(ctx); cleanup_parse_attribute_list(&$1); } | attribute_list_optional loop_scope_start KW_FOR '(' declaration expr_statement expr_optional ')' statement { - $$ = create_loop(ctx, LOOP_FOR, &$1, $5, $6, $7, $9, &@3); + $$ = create_loop(ctx, HLSL_LOOP_FOR, &$1, $5, $6, $7, $9, &@3); hlsl_pop_scope(ctx); cleanup_parse_attribute_list(&$1); } diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index c31e35f8..44d3e388 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -1954,6 +1954,7 @@ static bool copy_propagation_process_loop(struct hlsl_ctx *ctx, struct hlsl_ir_l bool progress = false; copy_propagation_invalidate_from_block(ctx, state, &loop->body, loop->node.index); + copy_propagation_invalidate_from_block(ctx, state, &loop->iter, loop->node.index); copy_propagation_push_scope(state, ctx); progress |= copy_propagation_transform_block(ctx, &loop->body, state); @@ -10177,6 +10178,101 @@ static bool unroll_loops(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, void * return true; } +/* We could handle this at parse time. However, loop unrolling often needs to + * know the value of variables modified in the "iter" block. It is possible to + * detect that all exit paths of a loop body modify such variables in the same + * way, but difficult, and d3dcompiler does not attempt to do so. + * In fact, d3dcompiler is capable of unrolling the following loop: + * for (int i = 0; i < 10; ++i) + * { + * if (some_uniform > 4) + * continue; + * } + * but cannot unroll the same loop with "++i" moved to each exit path: + * for (int i = 0; i < 10;) + * { + * if (some_uniform > 4) + * { + * ++i; + * continue; + * } + * ++i; + * } + */ +static bool resolve_loops(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, void *context) +{ + struct hlsl_ir_loop *loop; + + if (node->type != HLSL_IR_LOOP) + return true; + + loop = hlsl_ir_loop(node); + + hlsl_block_add_block(&loop->body, &loop->iter); + return true; +} + +static void resolve_continues(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_loop *last_loop) +{ + struct hlsl_ir_node *node; + + LIST_FOR_EACH_ENTRY(node, &block->instrs, struct hlsl_ir_node, entry) + { + switch (node->type) + { + case HLSL_IR_LOOP: + { + struct hlsl_ir_loop *loop = hlsl_ir_loop(node); + + resolve_continues(ctx, &loop->body, loop); + break; + } + case HLSL_IR_IF: + { + struct hlsl_ir_if *iff = hlsl_ir_if(node); + resolve_continues(ctx, &iff->then_block, last_loop); + resolve_continues(ctx, &iff->else_block, last_loop); + break; + } + case HLSL_IR_SWITCH: + { + struct hlsl_ir_switch *s = hlsl_ir_switch(node); + struct hlsl_ir_switch_case *c; + + LIST_FOR_EACH_ENTRY(c, &s->cases, struct hlsl_ir_switch_case, entry) + { + resolve_continues(ctx, &c->body, last_loop); + } + + break; + } + case HLSL_IR_JUMP: + { + struct hlsl_ir_jump *jump = hlsl_ir_jump(node); + + if (jump->type != HLSL_IR_JUMP_UNRESOLVED_CONTINUE) + break; + + if (last_loop->type == HLSL_LOOP_FOR) + { + struct hlsl_block draft; + + if (!hlsl_clone_block(ctx, &draft, &last_loop->iter)) + return; + + list_move_before(&node->entry, &draft.instrs); + hlsl_block_cleanup(&draft); + } + + jump->type = HLSL_IR_JUMP_CONTINUE; + break; + } + default: + break; + } + } +} + static void loop_unrolling_execute(struct hlsl_ctx *ctx, struct hlsl_block *block) { bool progress; @@ -10190,6 +10286,8 @@ static void loop_unrolling_execute(struct hlsl_ctx *ctx, struct hlsl_block *bloc } while (progress); hlsl_transform_ir(ctx, split_matrix_copies, block, NULL); + resolve_continues(ctx, block, NULL); + hlsl_transform_ir(ctx, resolve_loops, block, NULL); hlsl_transform_ir(ctx, unroll_loops, block, block); }