vkd3d-shader/hlsl: Allow to force alignment on some semantic vars.

This is required to properly optimize signatures, because these
semantics must be alligned while being packed:
- Array elements.
- The first fields of structs.
- Major vectors of a matrix.

For now this has no effect since semantics are allocated with reg_size
4, but will have effect when optimizing interstage signatures.
This commit is contained in:
Francisco Casas 2024-10-18 18:31:38 -03:00 committed by Henri Verbeet
parent 7fd6c29ee8
commit d562b03c43
Notes: Henri Verbeet 2024-10-22 20:54:15 +02:00
Approved-by: Elizabeth Figura (@zfigura)
Approved-by: Henri Verbeet (@hverbeet)
Merge-Request: https://gitlab.winehq.org/wine/vkd3d/-/merge_requests/1193
2 changed files with 44 additions and 23 deletions

View File

@ -516,6 +516,9 @@ struct hlsl_ir_var
/* Whether the shader performs dereferences with non-constant offsets in the variable. */ /* Whether the shader performs dereferences with non-constant offsets in the variable. */
bool indexable; bool indexable;
/* Whether this is a semantic variable that was split from an array, or is the first
* element of a struct, and thus needs to be aligned when packed in the signature. */
bool force_align;
uint32_t is_input_semantic : 1; uint32_t is_input_semantic : 1;
uint32_t is_output_semantic : 1; uint32_t is_output_semantic : 1;

View File

@ -278,7 +278,7 @@ static bool types_are_semantic_equivalent(struct hlsl_ctx *ctx, const struct hls
static struct hlsl_ir_var *add_semantic_var(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func, static struct hlsl_ir_var *add_semantic_var(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func,
struct hlsl_ir_var *var, struct hlsl_type *type, uint32_t modifiers, struct hlsl_semantic *semantic, struct hlsl_ir_var *var, struct hlsl_type *type, uint32_t modifiers, struct hlsl_semantic *semantic,
uint32_t index, bool output, const struct vkd3d_shader_location *loc) uint32_t index, bool output, bool force_align, const struct vkd3d_shader_location *loc)
{ {
struct hlsl_semantic new_semantic; struct hlsl_semantic new_semantic;
struct hlsl_ir_var *ext_var; struct hlsl_ir_var *ext_var;
@ -338,6 +338,7 @@ static struct hlsl_ir_var *add_semantic_var(struct hlsl_ctx *ctx, struct hlsl_ir
else else
ext_var->is_input_semantic = 1; ext_var->is_input_semantic = 1;
ext_var->is_param = var->is_param; ext_var->is_param = var->is_param;
ext_var->force_align = force_align;
list_add_before(&var->scope_entry, &ext_var->scope_entry); list_add_before(&var->scope_entry, &ext_var->scope_entry);
list_add_tail(&func->extern_vars, &ext_var->extern_entry); list_add_tail(&func->extern_vars, &ext_var->extern_entry);
@ -362,7 +363,7 @@ static uint32_t combine_field_storage_modifiers(uint32_t modifiers, uint32_t fie
} }
static void prepend_input_copy(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func, struct hlsl_ir_load *lhs, static void prepend_input_copy(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func, struct hlsl_ir_load *lhs,
uint32_t modifiers, struct hlsl_semantic *semantic, uint32_t semantic_index) uint32_t modifiers, struct hlsl_semantic *semantic, uint32_t semantic_index, bool force_align)
{ {
struct hlsl_type *type = lhs->node.data_type, *vector_type_src, *vector_type_dst; struct hlsl_type *type = lhs->node.data_type, *vector_type_src, *vector_type_dst;
struct vkd3d_shader_location *loc = &lhs->node.loc; struct vkd3d_shader_location *loc = &lhs->node.loc;
@ -386,14 +387,17 @@ static void prepend_input_copy(struct hlsl_ctx *ctx, struct hlsl_ir_function_dec
if (ctx->profile->major_version < 4 && ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX) if (ctx->profile->major_version < 4 && ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX)
vector_type_src = hlsl_get_vector_type(ctx, type->e.numeric.type, 4); vector_type_src = hlsl_get_vector_type(ctx, type->e.numeric.type, 4);
if (hlsl_type_major_size(type) > 1)
force_align = true;
for (i = 0; i < hlsl_type_major_size(type); ++i) for (i = 0; i < hlsl_type_major_size(type); ++i)
{ {
struct hlsl_ir_node *store, *cast; struct hlsl_ir_node *store, *cast;
struct hlsl_ir_var *input; struct hlsl_ir_var *input;
struct hlsl_ir_load *load; struct hlsl_ir_load *load;
if (!(input = add_semantic_var(ctx, func, var, vector_type_src, modifiers, semantic, if (!(input = add_semantic_var(ctx, func, var, vector_type_src,
semantic_index + i, false, loc))) modifiers, semantic, semantic_index + i, false, force_align, loc)))
return; return;
if (!(load = hlsl_new_var_load(ctx, input, &var->loc))) if (!(load = hlsl_new_var_load(ctx, input, &var->loc)))
@ -425,8 +429,9 @@ static void prepend_input_copy(struct hlsl_ctx *ctx, struct hlsl_ir_function_dec
} }
} }
static void prepend_input_copy_recurse(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func, static void prepend_input_copy_recurse(struct hlsl_ctx *ctx,
struct hlsl_ir_load *lhs, uint32_t modifiers, struct hlsl_semantic *semantic, uint32_t semantic_index) struct hlsl_ir_function_decl *func, struct hlsl_ir_load *lhs, uint32_t modifiers,
struct hlsl_semantic *semantic, uint32_t semantic_index, bool force_align)
{ {
struct vkd3d_shader_location *loc = &lhs->node.loc; struct vkd3d_shader_location *loc = &lhs->node.loc;
struct hlsl_type *type = lhs->node.data_type; struct hlsl_type *type = lhs->node.data_type;
@ -449,6 +454,7 @@ static void prepend_input_copy_recurse(struct hlsl_ctx *ctx, struct hlsl_ir_func
elem_semantic_index = semantic_index elem_semantic_index = semantic_index
+ i * hlsl_type_get_array_element_reg_size(type->e.array.type, HLSL_REGSET_NUMERIC) / 4; + i * hlsl_type_get_array_element_reg_size(type->e.array.type, HLSL_REGSET_NUMERIC) / 4;
element_modifiers = modifiers; element_modifiers = modifiers;
force_align = true;
} }
else else
{ {
@ -463,6 +469,7 @@ static void prepend_input_copy_recurse(struct hlsl_ctx *ctx, struct hlsl_ir_func
elem_semantic_index = semantic->index; elem_semantic_index = semantic->index;
loc = &field->loc; loc = &field->loc;
element_modifiers = combine_field_storage_modifiers(modifiers, field->storage_modifiers); element_modifiers = combine_field_storage_modifiers(modifiers, field->storage_modifiers);
force_align = (i == 0);
} }
if (!(c = hlsl_new_uint_constant(ctx, i, &var->loc))) if (!(c = hlsl_new_uint_constant(ctx, i, &var->loc)))
@ -474,12 +481,13 @@ static void prepend_input_copy_recurse(struct hlsl_ctx *ctx, struct hlsl_ir_func
return; return;
list_add_after(&c->entry, &element_load->node.entry); list_add_after(&c->entry, &element_load->node.entry);
prepend_input_copy_recurse(ctx, func, element_load, element_modifiers, semantic, elem_semantic_index); prepend_input_copy_recurse(ctx, func, element_load, element_modifiers,
semantic, elem_semantic_index, force_align);
} }
} }
else else
{ {
prepend_input_copy(ctx, func, lhs, modifiers, semantic, semantic_index); prepend_input_copy(ctx, func, lhs, modifiers, semantic, semantic_index, force_align);
} }
} }
@ -494,11 +502,12 @@ static void prepend_input_var_copy(struct hlsl_ctx *ctx, struct hlsl_ir_function
return; return;
list_add_head(&func->body.instrs, &load->node.entry); list_add_head(&func->body.instrs, &load->node.entry);
prepend_input_copy_recurse(ctx, func, load, var->storage_modifiers, &var->semantic, var->semantic.index); prepend_input_copy_recurse(ctx, func, load, var->storage_modifiers, &var->semantic, var->semantic.index, false);
} }
static void append_output_copy(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func, struct hlsl_ir_load *rhs, static void append_output_copy(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func,
uint32_t modifiers, struct hlsl_semantic *semantic, uint32_t semantic_index) struct hlsl_ir_load *rhs, uint32_t modifiers,
struct hlsl_semantic *semantic, uint32_t semantic_index, bool force_align)
{ {
struct hlsl_type *type = rhs->node.data_type, *vector_type; struct hlsl_type *type = rhs->node.data_type, *vector_type;
struct vkd3d_shader_location *loc = &rhs->node.loc; struct vkd3d_shader_location *loc = &rhs->node.loc;
@ -519,6 +528,9 @@ static void append_output_copy(struct hlsl_ctx *ctx, struct hlsl_ir_function_dec
vector_type = hlsl_get_vector_type(ctx, type->e.numeric.type, hlsl_type_minor_size(type)); vector_type = hlsl_get_vector_type(ctx, type->e.numeric.type, hlsl_type_minor_size(type));
if (hlsl_type_major_size(type) > 1)
force_align = true;
for (i = 0; i < hlsl_type_major_size(type); ++i) for (i = 0; i < hlsl_type_major_size(type); ++i)
{ {
struct hlsl_ir_node *store; struct hlsl_ir_node *store;
@ -526,7 +538,7 @@ static void append_output_copy(struct hlsl_ctx *ctx, struct hlsl_ir_function_dec
struct hlsl_ir_load *load; struct hlsl_ir_load *load;
if (!(output = add_semantic_var(ctx, func, var, vector_type, if (!(output = add_semantic_var(ctx, func, var, vector_type,
modifiers, semantic, semantic_index + i, true, loc))) modifiers, semantic, semantic_index + i, true, force_align, loc)))
return; return;
if (type->class == HLSL_CLASS_MATRIX) if (type->class == HLSL_CLASS_MATRIX)
@ -554,8 +566,9 @@ static void append_output_copy(struct hlsl_ctx *ctx, struct hlsl_ir_function_dec
} }
} }
static void append_output_copy_recurse(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func, static void append_output_copy_recurse(struct hlsl_ctx *ctx,
struct hlsl_ir_load *rhs, uint32_t modifiers, struct hlsl_semantic *semantic, uint32_t semantic_index) struct hlsl_ir_function_decl *func, struct hlsl_ir_load *rhs, uint32_t modifiers,
struct hlsl_semantic *semantic, uint32_t semantic_index, bool force_align)
{ {
struct vkd3d_shader_location *loc = &rhs->node.loc; struct vkd3d_shader_location *loc = &rhs->node.loc;
struct hlsl_type *type = rhs->node.data_type; struct hlsl_type *type = rhs->node.data_type;
@ -578,6 +591,7 @@ static void append_output_copy_recurse(struct hlsl_ctx *ctx, struct hlsl_ir_func
elem_semantic_index = semantic_index elem_semantic_index = semantic_index
+ i * hlsl_type_get_array_element_reg_size(type->e.array.type, HLSL_REGSET_NUMERIC) / 4; + i * hlsl_type_get_array_element_reg_size(type->e.array.type, HLSL_REGSET_NUMERIC) / 4;
element_modifiers = modifiers; element_modifiers = modifiers;
force_align = true;
} }
else else
{ {
@ -589,6 +603,7 @@ static void append_output_copy_recurse(struct hlsl_ctx *ctx, struct hlsl_ir_func
elem_semantic_index = semantic->index; elem_semantic_index = semantic->index;
loc = &field->loc; loc = &field->loc;
element_modifiers = combine_field_storage_modifiers(modifiers, field->storage_modifiers); element_modifiers = combine_field_storage_modifiers(modifiers, field->storage_modifiers);
force_align = (i == 0);
} }
if (!(c = hlsl_new_uint_constant(ctx, i, &var->loc))) if (!(c = hlsl_new_uint_constant(ctx, i, &var->loc)))
@ -599,12 +614,13 @@ static void append_output_copy_recurse(struct hlsl_ctx *ctx, struct hlsl_ir_func
return; return;
hlsl_block_add_instr(&func->body, &element_load->node); hlsl_block_add_instr(&func->body, &element_load->node);
append_output_copy_recurse(ctx, func, element_load, element_modifiers, semantic, elem_semantic_index); append_output_copy_recurse(ctx, func, element_load, element_modifiers,
semantic, elem_semantic_index, force_align);
} }
} }
else else
{ {
append_output_copy(ctx, func, rhs, modifiers, semantic, semantic_index); append_output_copy(ctx, func, rhs, modifiers, semantic, semantic_index, force_align);
} }
} }
@ -620,7 +636,7 @@ static void append_output_var_copy(struct hlsl_ctx *ctx, struct hlsl_ir_function
return; return;
hlsl_block_add_instr(&func->body, &load->node); hlsl_block_add_instr(&func->body, &load->node);
append_output_copy_recurse(ctx, func, load, var->storage_modifiers, &var->semantic, var->semantic.index); append_output_copy_recurse(ctx, func, load, var->storage_modifiers, &var->semantic, var->semantic.index, false);
} }
bool hlsl_transform_ir(struct hlsl_ctx *ctx, bool (*func)(struct hlsl_ctx *ctx, struct hlsl_ir_node *, void *), bool hlsl_transform_ir(struct hlsl_ctx *ctx, bool (*func)(struct hlsl_ctx *ctx, struct hlsl_ir_node *, void *),
@ -4496,10 +4512,11 @@ static void record_allocation(struct hlsl_ctx *ctx, struct register_allocator *a
/* reg_size is the number of register components to be reserved, while component_count is the number /* reg_size is the number of register components to be reserved, while component_count is the number
* of components for the register's writemask. In SM1, floats and vectors allocate the whole * of components for the register's writemask. In SM1, floats and vectors allocate the whole
* register, even if they don't use it completely. */ * register, even if they don't use it completely. */
static struct hlsl_reg allocate_register(struct hlsl_ctx *ctx, static struct hlsl_reg allocate_register(struct hlsl_ctx *ctx, struct register_allocator *allocator,
struct register_allocator *allocator, unsigned int first_write, unsigned int last_read, unsigned int first_write, unsigned int last_read, unsigned int reg_size,
unsigned int reg_size, unsigned int component_count, int mode) unsigned int component_count, int mode, bool force_align)
{ {
unsigned int required_size = force_align ? 4 : reg_size;
struct hlsl_reg ret = {0}; struct hlsl_reg ret = {0};
unsigned int writemask; unsigned int writemask;
uint32_t reg_idx; uint32_t reg_idx;
@ -4510,7 +4527,7 @@ static struct hlsl_reg allocate_register(struct hlsl_ctx *ctx,
{ {
writemask = get_available_writemask(allocator, first_write, last_read, reg_idx, mode); writemask = get_available_writemask(allocator, first_write, last_read, reg_idx, mode);
if (vkd3d_popcount(writemask) >= reg_size) if (vkd3d_popcount(writemask) >= required_size)
{ {
writemask = hlsl_combine_writemasks(writemask, (1u << reg_size) - 1); writemask = hlsl_combine_writemasks(writemask, (1u << reg_size) - 1);
break; break;
@ -4603,7 +4620,7 @@ static struct hlsl_reg allocate_numeric_registers_for_type(struct hlsl_ctx *ctx,
/* FIXME: We could potentially pack structs or arrays more efficiently... */ /* FIXME: We could potentially pack structs or arrays more efficiently... */
if (type->class <= HLSL_CLASS_VECTOR) if (type->class <= HLSL_CLASS_VECTOR)
return allocate_register(ctx, allocator, first_write, last_read, type->dimx, type->dimx, 0); return allocate_register(ctx, allocator, first_write, last_read, type->dimx, type->dimx, 0, false);
else else
return allocate_range(ctx, allocator, first_write, last_read, reg_size, 0); return allocate_range(ctx, allocator, first_write, last_read, reg_size, 0);
} }
@ -5292,7 +5309,8 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var
int mode = (ctx->profile->major_version < 4) int mode = (ctx->profile->major_version < 4)
? 0 : sm4_get_interpolation_mode(var->data_type, var->storage_modifiers); ? 0 : sm4_get_interpolation_mode(var->data_type, var->storage_modifiers);
var->regs[HLSL_REGSET_NUMERIC] = allocate_register(ctx, allocator, 1, UINT_MAX, 4, var->data_type->dimx, mode); var->regs[HLSL_REGSET_NUMERIC] = allocate_register(ctx, allocator, 1,
UINT_MAX, 4, var->data_type->dimx, mode, var->force_align);
TRACE("Allocated %s to %s.\n", var->name, debug_register(output ? 'o' : 'v', TRACE("Allocated %s to %s.\n", var->name, debug_register(output ? 'o' : 'v',
var->regs[HLSL_REGSET_NUMERIC], var->data_type)); var->regs[HLSL_REGSET_NUMERIC], var->data_type));