vkd3d-shader/hlsl: Introduce a compiler pass to vectorize expressions.

This commit is contained in:
Elizabeth Figura 2024-10-30 11:51:07 -05:00 committed by Henri Verbeet
parent 7b4a29da81
commit 1a999f74fc
Notes: Henri Verbeet 2025-04-03 20:34:10 +02:00
Approved-by: Francisco Casas (@fcasas)
Approved-by: Henri Verbeet (@hverbeet)
Merge-Request: https://gitlab.winehq.org/wine/vkd3d/-/merge_requests/1443

View File

@ -2491,6 +2491,231 @@ enum validation_result
DEREF_VALIDATION_NOT_CONSTANT,
};
struct vectorize_exprs_state
{
struct vectorizable_exprs_group
{
struct hlsl_block *block;
struct hlsl_ir_expr *exprs[4];
uint8_t expr_count, component_count;
} *groups;
size_t count, capacity;
};
static bool is_same_vectorizable_source(struct hlsl_ir_node *a, struct hlsl_ir_node *b)
{
/* TODO: We can also vectorize different constants. */
if (a->type == HLSL_IR_SWIZZLE)
a = hlsl_ir_swizzle(a)->val.node;
if (b->type == HLSL_IR_SWIZZLE)
b = hlsl_ir_swizzle(b)->val.node;
return a == b;
}
static bool is_same_vectorizable_expr(struct hlsl_ir_expr *a, struct hlsl_ir_expr *b)
{
if (a->op != b->op)
return false;
for (size_t j = 0; j < HLSL_MAX_OPERANDS; ++j)
{
if (!a->operands[j].node)
break;
if (!is_same_vectorizable_source(a->operands[j].node, b->operands[j].node))
return false;
}
return true;
}
static void record_vectorizable_expr(struct hlsl_ctx *ctx, struct hlsl_block *block,
struct hlsl_ir_expr *expr, struct vectorize_exprs_state *state)
{
if (expr->node.data_type->class > HLSL_CLASS_VECTOR)
return;
/* These are the only current ops that are not per-component. */
if (expr->op == HLSL_OP1_COS_REDUCED || expr->op == HLSL_OP1_SIN_REDUCED
|| expr->op == HLSL_OP2_DOT || expr->op == HLSL_OP3_DP2ADD)
return;
for (size_t i = 0; i < state->count; ++i)
{
struct vectorizable_exprs_group *group = &state->groups[i];
struct hlsl_ir_expr *other = group->exprs[0];
/* These are SSA instructions, which means they have the same value
* regardless of what block they're in. However, being in different
* blocks may mean that one expression or the other is not always
* executed. */
if (expr->node.data_type->e.numeric.dimx + group->component_count <= 4
&& group->block == block
&& is_same_vectorizable_expr(expr, other))
{
group->exprs[group->expr_count++] = expr;
group->component_count += expr->node.data_type->e.numeric.dimx;
return;
}
}
if (!hlsl_array_reserve(ctx, (void **)&state->groups,
&state->capacity, state->count + 1, sizeof(*state->groups)))
return;
state->groups[state->count].block = block;
state->groups[state->count].exprs[0] = expr;
state->groups[state->count].expr_count = 1;
state->groups[state->count].component_count = expr->node.data_type->e.numeric.dimx;
++state->count;
}
static void find_vectorizable_expr_groups(struct hlsl_ctx *ctx, struct hlsl_block *block,
struct vectorize_exprs_state *state)
{
struct hlsl_ir_node *instr;
LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry)
{
if (instr->type == HLSL_IR_EXPR)
{
record_vectorizable_expr(ctx, block, hlsl_ir_expr(instr), state);
}
else if (instr->type == HLSL_IR_IF)
{
struct hlsl_ir_if *iff = hlsl_ir_if(instr);
find_vectorizable_expr_groups(ctx, &iff->then_block, state);
find_vectorizable_expr_groups(ctx, &iff->else_block, state);
}
else if (instr->type == HLSL_IR_LOOP)
{
find_vectorizable_expr_groups(ctx, &hlsl_ir_loop(instr)->body, state);
}
else if (instr->type == HLSL_IR_SWITCH)
{
struct hlsl_ir_switch *s = hlsl_ir_switch(instr);
struct hlsl_ir_switch_case *c;
LIST_FOR_EACH_ENTRY(c, &s->cases, struct hlsl_ir_switch_case, entry)
find_vectorizable_expr_groups(ctx, &c->body, state);
}
}
}
/* Combine sequences like
*
* 3: @1.x
* 4: @2.x
* 5: @3 * @4
* 6: @1.y
* 7: @2.x
* 8: @6 * @7
*
* into
*
* 5_1: @1.xy
* 5_2: @2.xx
* 5_3: @5_1 * @5_2
* 5: @5_3.x
* 8: @5_3.y
*
* Each operand to an expression needs to refer to the same ultimate source
* (in this case @1 and @2 respectively), but can be a swizzle thereof.
*
* In practice the swizzles @5 and @8 can generally then be vectorized again,
* either as part of another expression, or as part of a store.
*/
static bool vectorize_exprs(struct hlsl_ctx *ctx, struct hlsl_block *block)
{
struct vectorize_exprs_state state = {0};
bool progress = false;
find_vectorizable_expr_groups(ctx, block, &state);
for (unsigned int i = 0; i < state.count; ++i)
{
struct vectorizable_exprs_group *group = &state.groups[i];
struct hlsl_ir_node *args[HLSL_MAX_OPERANDS] = {0};
uint32_t swizzles[HLSL_MAX_OPERANDS] = {0};
struct hlsl_ir_node *arg, *combined;
unsigned int component_count = 0;
struct hlsl_type *combined_type;
struct hlsl_block new_block;
struct hlsl_ir_expr *expr;
if (group->expr_count == 1)
continue;
hlsl_block_init(&new_block);
for (unsigned int j = 0; j < group->expr_count; ++j)
{
expr = group->exprs[j];
for (unsigned int a = 0; a < HLSL_MAX_OPERANDS; ++a)
{
uint32_t arg_swizzle;
if (!(arg = expr->operands[a].node))
break;
if (arg->type == HLSL_IR_SWIZZLE)
arg_swizzle = hlsl_ir_swizzle(arg)->u.vector;
else
arg_swizzle = HLSL_SWIZZLE(X, Y, Z, W);
/* Mask out the invalid components. */
arg_swizzle &= (1u << VKD3D_SHADER_SWIZZLE_SHIFT(arg->data_type->e.numeric.dimx)) - 1;
swizzles[a] |= arg_swizzle << VKD3D_SHADER_SWIZZLE_SHIFT(component_count);
}
component_count += expr->node.data_type->e.numeric.dimx;
}
expr = group->exprs[0];
for (unsigned int a = 0; a < HLSL_MAX_OPERANDS; ++a)
{
if (!(arg = expr->operands[a].node))
break;
if (arg->type == HLSL_IR_SWIZZLE)
arg = hlsl_ir_swizzle(arg)->val.node;
args[a] = hlsl_block_add_swizzle(ctx, &new_block, swizzles[a], component_count, arg, &arg->loc);
}
combined_type = hlsl_get_vector_type(ctx, expr->node.data_type->e.numeric.type, component_count);
combined = hlsl_block_add_expr(ctx, &new_block, expr->op, args, combined_type, &expr->node.loc);
list_move_before(&expr->node.entry, &new_block.instrs);
TRACE("Combining %u %s instructions into %p.\n", group->expr_count,
debug_hlsl_expr_op(group->exprs[0]->op), combined);
component_count = 0;
for (unsigned int j = 0; j < group->expr_count; ++j)
{
struct hlsl_ir_node *replacement;
expr = group->exprs[j];
if (!(replacement = hlsl_new_swizzle(ctx,
HLSL_SWIZZLE(X, Y, Z, W) >> VKD3D_SHADER_SWIZZLE_SHIFT(component_count),
expr->node.data_type->e.numeric.dimx, combined, &expr->node.loc)))
goto out;
component_count += expr->node.data_type->e.numeric.dimx;
list_add_before(&expr->node.entry, &replacement->entry);
hlsl_replace_node(&expr->node, replacement);
}
progress = true;
}
out:
vkd3d_free(state.groups);
return progress;
}
static enum validation_result validate_component_index_range_from_deref(struct hlsl_ctx *ctx,
const struct hlsl_deref *deref)
{
@ -12589,6 +12814,7 @@ static void process_entry_function(struct hlsl_ctx *ctx,
struct recursive_call_ctx recursive_call_ctx;
struct hlsl_ir_var *var;
unsigned int i;
bool progress;
ctx->is_patch_constant_func = entry_func == ctx->patch_constant_func;
@ -12783,6 +13009,16 @@ static void process_entry_function(struct hlsl_ctx *ctx,
hlsl_transform_ir(ctx, lower_separate_samples, body, NULL);
hlsl_transform_ir(ctx, validate_dereferences, body, NULL);
do
{
progress = vectorize_exprs(ctx, body);
compute_liveness(ctx, entry_func);
progress |= hlsl_transform_ir(ctx, dce, body, NULL);
progress |= hlsl_transform_ir(ctx, fold_swizzle_chains, body, NULL);
progress |= hlsl_transform_ir(ctx, remove_trivial_swizzles, body, NULL);
} while (progress);
hlsl_transform_ir(ctx, track_object_components_sampler_dim, body, NULL);
if (hlsl_version_ge(ctx, 4, 0))