mirror of
https://gitlab.winehq.org/wine/vkd3d.git
synced 2025-04-13 05:43:18 -07:00
vkd3d-shader/hlsl: Introduce a compiler pass to vectorize expressions.
This commit is contained in:
parent
7b4a29da81
commit
1a999f74fc
Notes:
Henri Verbeet
2025-04-03 20:34:10 +02:00
Approved-by: Francisco Casas (@fcasas) Approved-by: Henri Verbeet (@hverbeet) Merge-Request: https://gitlab.winehq.org/wine/vkd3d/-/merge_requests/1443
@ -2491,6 +2491,231 @@ enum validation_result
|
|||||||
DEREF_VALIDATION_NOT_CONSTANT,
|
DEREF_VALIDATION_NOT_CONSTANT,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct vectorize_exprs_state
|
||||||
|
{
|
||||||
|
struct vectorizable_exprs_group
|
||||||
|
{
|
||||||
|
struct hlsl_block *block;
|
||||||
|
struct hlsl_ir_expr *exprs[4];
|
||||||
|
uint8_t expr_count, component_count;
|
||||||
|
} *groups;
|
||||||
|
size_t count, capacity;
|
||||||
|
};
|
||||||
|
|
||||||
|
static bool is_same_vectorizable_source(struct hlsl_ir_node *a, struct hlsl_ir_node *b)
|
||||||
|
{
|
||||||
|
/* TODO: We can also vectorize different constants. */
|
||||||
|
|
||||||
|
if (a->type == HLSL_IR_SWIZZLE)
|
||||||
|
a = hlsl_ir_swizzle(a)->val.node;
|
||||||
|
if (b->type == HLSL_IR_SWIZZLE)
|
||||||
|
b = hlsl_ir_swizzle(b)->val.node;
|
||||||
|
|
||||||
|
return a == b;
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool is_same_vectorizable_expr(struct hlsl_ir_expr *a, struct hlsl_ir_expr *b)
|
||||||
|
{
|
||||||
|
if (a->op != b->op)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
for (size_t j = 0; j < HLSL_MAX_OPERANDS; ++j)
|
||||||
|
{
|
||||||
|
if (!a->operands[j].node)
|
||||||
|
break;
|
||||||
|
if (!is_same_vectorizable_source(a->operands[j].node, b->operands[j].node))
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void record_vectorizable_expr(struct hlsl_ctx *ctx, struct hlsl_block *block,
|
||||||
|
struct hlsl_ir_expr *expr, struct vectorize_exprs_state *state)
|
||||||
|
{
|
||||||
|
if (expr->node.data_type->class > HLSL_CLASS_VECTOR)
|
||||||
|
return;
|
||||||
|
|
||||||
|
/* These are the only current ops that are not per-component. */
|
||||||
|
if (expr->op == HLSL_OP1_COS_REDUCED || expr->op == HLSL_OP1_SIN_REDUCED
|
||||||
|
|| expr->op == HLSL_OP2_DOT || expr->op == HLSL_OP3_DP2ADD)
|
||||||
|
return;
|
||||||
|
|
||||||
|
for (size_t i = 0; i < state->count; ++i)
|
||||||
|
{
|
||||||
|
struct vectorizable_exprs_group *group = &state->groups[i];
|
||||||
|
struct hlsl_ir_expr *other = group->exprs[0];
|
||||||
|
|
||||||
|
/* These are SSA instructions, which means they have the same value
|
||||||
|
* regardless of what block they're in. However, being in different
|
||||||
|
* blocks may mean that one expression or the other is not always
|
||||||
|
* executed. */
|
||||||
|
|
||||||
|
if (expr->node.data_type->e.numeric.dimx + group->component_count <= 4
|
||||||
|
&& group->block == block
|
||||||
|
&& is_same_vectorizable_expr(expr, other))
|
||||||
|
{
|
||||||
|
group->exprs[group->expr_count++] = expr;
|
||||||
|
group->component_count += expr->node.data_type->e.numeric.dimx;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!hlsl_array_reserve(ctx, (void **)&state->groups,
|
||||||
|
&state->capacity, state->count + 1, sizeof(*state->groups)))
|
||||||
|
return;
|
||||||
|
state->groups[state->count].block = block;
|
||||||
|
state->groups[state->count].exprs[0] = expr;
|
||||||
|
state->groups[state->count].expr_count = 1;
|
||||||
|
state->groups[state->count].component_count = expr->node.data_type->e.numeric.dimx;
|
||||||
|
++state->count;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void find_vectorizable_expr_groups(struct hlsl_ctx *ctx, struct hlsl_block *block,
|
||||||
|
struct vectorize_exprs_state *state)
|
||||||
|
{
|
||||||
|
struct hlsl_ir_node *instr;
|
||||||
|
|
||||||
|
LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry)
|
||||||
|
{
|
||||||
|
if (instr->type == HLSL_IR_EXPR)
|
||||||
|
{
|
||||||
|
record_vectorizable_expr(ctx, block, hlsl_ir_expr(instr), state);
|
||||||
|
}
|
||||||
|
else if (instr->type == HLSL_IR_IF)
|
||||||
|
{
|
||||||
|
struct hlsl_ir_if *iff = hlsl_ir_if(instr);
|
||||||
|
|
||||||
|
find_vectorizable_expr_groups(ctx, &iff->then_block, state);
|
||||||
|
find_vectorizable_expr_groups(ctx, &iff->else_block, state);
|
||||||
|
}
|
||||||
|
else if (instr->type == HLSL_IR_LOOP)
|
||||||
|
{
|
||||||
|
find_vectorizable_expr_groups(ctx, &hlsl_ir_loop(instr)->body, state);
|
||||||
|
}
|
||||||
|
else if (instr->type == HLSL_IR_SWITCH)
|
||||||
|
{
|
||||||
|
struct hlsl_ir_switch *s = hlsl_ir_switch(instr);
|
||||||
|
struct hlsl_ir_switch_case *c;
|
||||||
|
|
||||||
|
LIST_FOR_EACH_ENTRY(c, &s->cases, struct hlsl_ir_switch_case, entry)
|
||||||
|
find_vectorizable_expr_groups(ctx, &c->body, state);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Combine sequences like
|
||||||
|
*
|
||||||
|
* 3: @1.x
|
||||||
|
* 4: @2.x
|
||||||
|
* 5: @3 * @4
|
||||||
|
* 6: @1.y
|
||||||
|
* 7: @2.x
|
||||||
|
* 8: @6 * @7
|
||||||
|
*
|
||||||
|
* into
|
||||||
|
*
|
||||||
|
* 5_1: @1.xy
|
||||||
|
* 5_2: @2.xx
|
||||||
|
* 5_3: @5_1 * @5_2
|
||||||
|
* 5: @5_3.x
|
||||||
|
* 8: @5_3.y
|
||||||
|
*
|
||||||
|
* Each operand to an expression needs to refer to the same ultimate source
|
||||||
|
* (in this case @1 and @2 respectively), but can be a swizzle thereof.
|
||||||
|
*
|
||||||
|
* In practice the swizzles @5 and @8 can generally then be vectorized again,
|
||||||
|
* either as part of another expression, or as part of a store.
|
||||||
|
*/
|
||||||
|
static bool vectorize_exprs(struct hlsl_ctx *ctx, struct hlsl_block *block)
|
||||||
|
{
|
||||||
|
struct vectorize_exprs_state state = {0};
|
||||||
|
bool progress = false;
|
||||||
|
|
||||||
|
find_vectorizable_expr_groups(ctx, block, &state);
|
||||||
|
|
||||||
|
for (unsigned int i = 0; i < state.count; ++i)
|
||||||
|
{
|
||||||
|
struct vectorizable_exprs_group *group = &state.groups[i];
|
||||||
|
struct hlsl_ir_node *args[HLSL_MAX_OPERANDS] = {0};
|
||||||
|
uint32_t swizzles[HLSL_MAX_OPERANDS] = {0};
|
||||||
|
struct hlsl_ir_node *arg, *combined;
|
||||||
|
unsigned int component_count = 0;
|
||||||
|
struct hlsl_type *combined_type;
|
||||||
|
struct hlsl_block new_block;
|
||||||
|
struct hlsl_ir_expr *expr;
|
||||||
|
|
||||||
|
if (group->expr_count == 1)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
hlsl_block_init(&new_block);
|
||||||
|
|
||||||
|
for (unsigned int j = 0; j < group->expr_count; ++j)
|
||||||
|
{
|
||||||
|
expr = group->exprs[j];
|
||||||
|
|
||||||
|
for (unsigned int a = 0; a < HLSL_MAX_OPERANDS; ++a)
|
||||||
|
{
|
||||||
|
uint32_t arg_swizzle;
|
||||||
|
|
||||||
|
if (!(arg = expr->operands[a].node))
|
||||||
|
break;
|
||||||
|
|
||||||
|
if (arg->type == HLSL_IR_SWIZZLE)
|
||||||
|
arg_swizzle = hlsl_ir_swizzle(arg)->u.vector;
|
||||||
|
else
|
||||||
|
arg_swizzle = HLSL_SWIZZLE(X, Y, Z, W);
|
||||||
|
|
||||||
|
/* Mask out the invalid components. */
|
||||||
|
arg_swizzle &= (1u << VKD3D_SHADER_SWIZZLE_SHIFT(arg->data_type->e.numeric.dimx)) - 1;
|
||||||
|
swizzles[a] |= arg_swizzle << VKD3D_SHADER_SWIZZLE_SHIFT(component_count);
|
||||||
|
}
|
||||||
|
|
||||||
|
component_count += expr->node.data_type->e.numeric.dimx;
|
||||||
|
}
|
||||||
|
|
||||||
|
expr = group->exprs[0];
|
||||||
|
for (unsigned int a = 0; a < HLSL_MAX_OPERANDS; ++a)
|
||||||
|
{
|
||||||
|
if (!(arg = expr->operands[a].node))
|
||||||
|
break;
|
||||||
|
if (arg->type == HLSL_IR_SWIZZLE)
|
||||||
|
arg = hlsl_ir_swizzle(arg)->val.node;
|
||||||
|
args[a] = hlsl_block_add_swizzle(ctx, &new_block, swizzles[a], component_count, arg, &arg->loc);
|
||||||
|
}
|
||||||
|
|
||||||
|
combined_type = hlsl_get_vector_type(ctx, expr->node.data_type->e.numeric.type, component_count);
|
||||||
|
combined = hlsl_block_add_expr(ctx, &new_block, expr->op, args, combined_type, &expr->node.loc);
|
||||||
|
|
||||||
|
list_move_before(&expr->node.entry, &new_block.instrs);
|
||||||
|
|
||||||
|
TRACE("Combining %u %s instructions into %p.\n", group->expr_count,
|
||||||
|
debug_hlsl_expr_op(group->exprs[0]->op), combined);
|
||||||
|
|
||||||
|
component_count = 0;
|
||||||
|
for (unsigned int j = 0; j < group->expr_count; ++j)
|
||||||
|
{
|
||||||
|
struct hlsl_ir_node *replacement;
|
||||||
|
|
||||||
|
expr = group->exprs[j];
|
||||||
|
|
||||||
|
if (!(replacement = hlsl_new_swizzle(ctx,
|
||||||
|
HLSL_SWIZZLE(X, Y, Z, W) >> VKD3D_SHADER_SWIZZLE_SHIFT(component_count),
|
||||||
|
expr->node.data_type->e.numeric.dimx, combined, &expr->node.loc)))
|
||||||
|
goto out;
|
||||||
|
component_count += expr->node.data_type->e.numeric.dimx;
|
||||||
|
list_add_before(&expr->node.entry, &replacement->entry);
|
||||||
|
hlsl_replace_node(&expr->node, replacement);
|
||||||
|
}
|
||||||
|
|
||||||
|
progress = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
out:
|
||||||
|
vkd3d_free(state.groups);
|
||||||
|
return progress;
|
||||||
|
}
|
||||||
|
|
||||||
static enum validation_result validate_component_index_range_from_deref(struct hlsl_ctx *ctx,
|
static enum validation_result validate_component_index_range_from_deref(struct hlsl_ctx *ctx,
|
||||||
const struct hlsl_deref *deref)
|
const struct hlsl_deref *deref)
|
||||||
{
|
{
|
||||||
@ -12589,6 +12814,7 @@ static void process_entry_function(struct hlsl_ctx *ctx,
|
|||||||
struct recursive_call_ctx recursive_call_ctx;
|
struct recursive_call_ctx recursive_call_ctx;
|
||||||
struct hlsl_ir_var *var;
|
struct hlsl_ir_var *var;
|
||||||
unsigned int i;
|
unsigned int i;
|
||||||
|
bool progress;
|
||||||
|
|
||||||
ctx->is_patch_constant_func = entry_func == ctx->patch_constant_func;
|
ctx->is_patch_constant_func = entry_func == ctx->patch_constant_func;
|
||||||
|
|
||||||
@ -12783,6 +13009,16 @@ static void process_entry_function(struct hlsl_ctx *ctx,
|
|||||||
hlsl_transform_ir(ctx, lower_separate_samples, body, NULL);
|
hlsl_transform_ir(ctx, lower_separate_samples, body, NULL);
|
||||||
|
|
||||||
hlsl_transform_ir(ctx, validate_dereferences, body, NULL);
|
hlsl_transform_ir(ctx, validate_dereferences, body, NULL);
|
||||||
|
|
||||||
|
do
|
||||||
|
{
|
||||||
|
progress = vectorize_exprs(ctx, body);
|
||||||
|
compute_liveness(ctx, entry_func);
|
||||||
|
progress |= hlsl_transform_ir(ctx, dce, body, NULL);
|
||||||
|
progress |= hlsl_transform_ir(ctx, fold_swizzle_chains, body, NULL);
|
||||||
|
progress |= hlsl_transform_ir(ctx, remove_trivial_swizzles, body, NULL);
|
||||||
|
} while (progress);
|
||||||
|
|
||||||
hlsl_transform_ir(ctx, track_object_components_sampler_dim, body, NULL);
|
hlsl_transform_ir(ctx, track_object_components_sampler_dim, body, NULL);
|
||||||
|
|
||||||
if (hlsl_version_ge(ctx, 4, 0))
|
if (hlsl_version_ge(ctx, 4, 0))
|
||||||
|
Loading…
x
Reference in New Issue
Block a user