mirror of
https://gitlab.winehq.org/wine/vkd3d.git
synced 2025-09-12 18:50:22 -07:00
vkd3d-shader/hlsl: Store stream index in struct hlsl_semantic.
In addition, support stream indices in tpf_write_signature().
This commit is contained in:
Notes:
Henri Verbeet
2025-06-02 21:00:38 +02:00
Approved-by: Elizabeth Figura (@zfigura) Approved-by: Henri Verbeet (@hverbeet) Merge-Request: https://gitlab.winehq.org/wine/vkd3d/-/merge_requests/1518
@@ -245,6 +245,7 @@ struct hlsl_semantic
|
||||
{
|
||||
const char *name;
|
||||
uint32_t index;
|
||||
uint32_t stream_index;
|
||||
|
||||
/* Name exactly as it appears in the sources. */
|
||||
const char *raw_name;
|
||||
|
@@ -288,7 +288,8 @@ static bool types_are_semantic_equivalent(struct hlsl_ctx *ctx, const struct hls
|
||||
|
||||
static struct hlsl_ir_var *add_semantic_var(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func,
|
||||
struct hlsl_ir_var *var, struct hlsl_type *type, uint32_t modifiers, struct hlsl_semantic *semantic,
|
||||
uint32_t index, bool output, bool force_align, bool create, const struct vkd3d_shader_location *loc)
|
||||
uint32_t index, uint32_t stream_index, bool output, bool force_align, bool create,
|
||||
const struct vkd3d_shader_location *loc)
|
||||
{
|
||||
struct hlsl_semantic new_semantic;
|
||||
struct hlsl_ir_var *ext_var;
|
||||
@@ -300,7 +301,12 @@ static struct hlsl_ir_var *add_semantic_var(struct hlsl_ctx *ctx, struct hlsl_ir
|
||||
else
|
||||
prefix = output ? "output" : "input";
|
||||
|
||||
if (!(new_name = hlsl_sprintf_alloc(ctx, "<%s-%s%u>", prefix, semantic->name, index)))
|
||||
if (stream_index)
|
||||
new_name = hlsl_sprintf_alloc(ctx, "<%s-m%u:%s%u>", prefix, stream_index, semantic->name, index);
|
||||
else
|
||||
new_name = hlsl_sprintf_alloc(ctx, "<%s-%s%u>", prefix, semantic->name, index);
|
||||
|
||||
if (!new_name)
|
||||
return NULL;
|
||||
|
||||
LIST_FOR_EACH_ENTRY(ext_var, &func->extern_vars, struct hlsl_ir_var, extern_entry)
|
||||
@@ -353,6 +359,7 @@ static struct hlsl_ir_var *add_semantic_var(struct hlsl_ctx *ctx, struct hlsl_ir
|
||||
return NULL;
|
||||
}
|
||||
new_semantic.index = index;
|
||||
new_semantic.stream_index = stream_index;
|
||||
if (!(ext_var = hlsl_new_var(ctx, new_name, type, loc, &new_semantic, modifiers, NULL)))
|
||||
{
|
||||
vkd3d_free(new_name);
|
||||
@@ -435,7 +442,7 @@ static void prepend_input_copy(struct hlsl_ctx *ctx, struct hlsl_ir_function_dec
|
||||
prim_type_src->modifiers = var->data_type->modifiers & HLSL_PRIMITIVE_MODIFIERS_MASK;
|
||||
|
||||
if (!(input = add_semantic_var(ctx, func, var, prim_type_src,
|
||||
modifiers, semantic, semantic_index + i, false, force_align, true, loc)))
|
||||
modifiers, semantic, semantic_index + i, 0, false, force_align, true, loc)))
|
||||
return;
|
||||
hlsl_init_simple_deref_from_var(&prim_deref, input);
|
||||
|
||||
@@ -448,7 +455,7 @@ static void prepend_input_copy(struct hlsl_ctx *ctx, struct hlsl_ir_function_dec
|
||||
else
|
||||
{
|
||||
if (!(input = add_semantic_var(ctx, func, var, vector_type_src,
|
||||
modifiers, semantic, semantic_index + i, false, force_align, true, loc)))
|
||||
modifiers, semantic, semantic_index + i, 0, false, force_align, true, loc)))
|
||||
return;
|
||||
|
||||
if (!(load = hlsl_new_var_load(ctx, input, &var->loc)))
|
||||
@@ -558,7 +565,7 @@ static void prepend_input_var_copy(struct hlsl_ctx *ctx, struct hlsl_ir_function
|
||||
|
||||
static void append_output_copy(struct hlsl_ctx *ctx, struct hlsl_block *block,
|
||||
struct hlsl_ir_function_decl *func, struct hlsl_ir_load *rhs, uint32_t modifiers,
|
||||
struct hlsl_semantic *semantic, uint32_t semantic_index, bool force_align, bool create)
|
||||
struct hlsl_semantic *semantic, uint32_t semantic_index, uint32_t stream_index, bool force_align, bool create)
|
||||
{
|
||||
struct hlsl_type *type = rhs->node.data_type, *vector_type;
|
||||
struct vkd3d_shader_location *loc = &rhs->node.loc;
|
||||
@@ -588,7 +595,7 @@ static void append_output_copy(struct hlsl_ctx *ctx, struct hlsl_block *block,
|
||||
struct hlsl_ir_node *load;
|
||||
|
||||
if (!(output = add_semantic_var(ctx, func, var, vector_type,
|
||||
modifiers, semantic, semantic_index + i, true, force_align, create, loc)))
|
||||
modifiers, semantic, semantic_index + i, stream_index, true, force_align, create, loc)))
|
||||
return;
|
||||
|
||||
if (type->class == HLSL_CLASS_MATRIX)
|
||||
@@ -609,7 +616,7 @@ static void append_output_copy(struct hlsl_ctx *ctx, struct hlsl_block *block,
|
||||
|
||||
static void append_output_copy_recurse(struct hlsl_ctx *ctx, struct hlsl_block *block,
|
||||
struct hlsl_ir_function_decl *func, const struct hlsl_type *type, struct hlsl_ir_load *rhs, uint32_t modifiers,
|
||||
struct hlsl_semantic *semantic, uint32_t semantic_index, bool force_align, bool create)
|
||||
struct hlsl_semantic *semantic, uint32_t semantic_index, uint32_t stream_index, bool force_align, bool create)
|
||||
{
|
||||
struct vkd3d_shader_location *loc = &rhs->node.loc;
|
||||
struct hlsl_ir_var *var = rhs->src.var;
|
||||
@@ -654,12 +661,13 @@ static void append_output_copy_recurse(struct hlsl_ctx *ctx, struct hlsl_block *
|
||||
hlsl_block_add_instr(block, &element_load->node);
|
||||
|
||||
append_output_copy_recurse(ctx, block, func, element_type, element_load, element_modifiers, semantic,
|
||||
elem_semantic_index, force_align, create);
|
||||
elem_semantic_index, stream_index, force_align, create);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
append_output_copy(ctx, block, func, rhs, modifiers, semantic, semantic_index, force_align, create);
|
||||
append_output_copy(ctx, block, func, rhs, modifiers, semantic,
|
||||
semantic_index, stream_index, force_align, create);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -676,7 +684,7 @@ static void append_output_var_copy(struct hlsl_ctx *ctx, struct hlsl_ir_function
|
||||
hlsl_block_add_instr(&func->body, &load->node);
|
||||
|
||||
append_output_copy_recurse(ctx, &func->body, func, var->data_type, load, var->storage_modifiers,
|
||||
&var->semantic, var->semantic.index, false, true);
|
||||
&var->semantic, var->semantic.index, 0, false, true);
|
||||
}
|
||||
|
||||
bool hlsl_transform_ir(struct hlsl_ctx *ctx, bool (*func)(struct hlsl_ctx *ctx, struct hlsl_ir_node *, void *),
|
||||
@@ -3360,7 +3368,7 @@ static bool split_struct_copies(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr
|
||||
struct stream_append_ctx
|
||||
{
|
||||
struct hlsl_ir_function_decl *func;
|
||||
bool created;
|
||||
bool created[VKD3D_MAX_STREAM_COUNT];
|
||||
};
|
||||
|
||||
static bool lower_stream_appends(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context)
|
||||
@@ -3371,6 +3379,7 @@ static bool lower_stream_appends(struct hlsl_ctx *ctx, struct hlsl_ir_node *inst
|
||||
const struct hlsl_type *type;
|
||||
struct hlsl_ir_var *var;
|
||||
struct hlsl_block block;
|
||||
uint32_t stream_index;
|
||||
|
||||
if (instr->type != HLSL_IR_RESOURCE_STORE)
|
||||
return false;
|
||||
@@ -3390,18 +3399,17 @@ static bool lower_stream_appends(struct hlsl_ctx *ctx, struct hlsl_ir_node *inst
|
||||
}
|
||||
|
||||
VKD3D_ASSERT(var->regs[HLSL_REGSET_STREAM_OUTPUTS].allocated);
|
||||
stream_index = var->regs[HLSL_REGSET_STREAM_OUTPUTS].index;
|
||||
|
||||
if (var->regs[HLSL_REGSET_STREAM_OUTPUTS].index)
|
||||
{
|
||||
hlsl_fixme(ctx, &instr->loc, "Append to an output stream with a nonzero stream index.");
|
||||
return false;
|
||||
}
|
||||
VKD3D_ASSERT(stream_index < ARRAY_SIZE(append_ctx->created));
|
||||
|
||||
hlsl_block_init(&block);
|
||||
|
||||
append_output_copy_recurse(ctx, &block, append_ctx->func, type->e.so.type, hlsl_ir_load(rhs), var->storage_modifiers,
|
||||
&var->semantic, var->semantic.index, false, !append_ctx->created);
|
||||
append_ctx->created = true;
|
||||
append_output_copy_recurse(ctx, &block, append_ctx->func, type->e.so.type, hlsl_ir_load(rhs),
|
||||
var->storage_modifiers, &var->semantic, var->semantic.index,
|
||||
var->regs[HLSL_REGSET_STREAM_OUTPUTS].index, false, !append_ctx->created[stream_index]);
|
||||
|
||||
append_ctx->created[stream_index] = true;
|
||||
|
||||
list_move_before(&instr->entry, &block.instrs);
|
||||
hlsl_src_remove(&store->value);
|
||||
@@ -6518,8 +6526,8 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var
|
||||
static void allocate_semantic_registers(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func,
|
||||
uint32_t *output_reg_count)
|
||||
{
|
||||
struct register_allocator input_allocator = {0}, output_allocators[VKD3D_MAX_STREAM_COUNT] = {{0}};
|
||||
struct register_allocator in_prim_allocator = {0}, patch_constant_out_patch_allocator = {0};
|
||||
struct register_allocator input_allocator = {0}, output_allocator = {0};
|
||||
bool is_vertex_shader = ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX;
|
||||
bool is_pixel_shader = ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL;
|
||||
struct hlsl_ir_var *var;
|
||||
@@ -6527,7 +6535,8 @@ static void allocate_semantic_registers(struct hlsl_ctx *ctx, struct hlsl_ir_fun
|
||||
in_prim_allocator.prioritize_smaller_writemasks = true;
|
||||
patch_constant_out_patch_allocator.prioritize_smaller_writemasks = true;
|
||||
input_allocator.prioritize_smaller_writemasks = true;
|
||||
output_allocator.prioritize_smaller_writemasks = true;
|
||||
for (unsigned int i = 0; i < ARRAY_SIZE(output_allocators); ++i)
|
||||
output_allocators[i].prioritize_smaller_writemasks = true;
|
||||
|
||||
LIST_FOR_EACH_ENTRY(var, &entry_func->extern_vars, struct hlsl_ir_var, extern_entry)
|
||||
{
|
||||
@@ -6550,15 +6559,22 @@ static void allocate_semantic_registers(struct hlsl_ctx *ctx, struct hlsl_ir_fun
|
||||
}
|
||||
|
||||
if (var->is_output_semantic)
|
||||
allocate_semantic_register(ctx, var, &output_allocator, true, !is_pixel_shader);
|
||||
{
|
||||
VKD3D_ASSERT(var->semantic.stream_index < ARRAY_SIZE(output_allocators));
|
||||
allocate_semantic_register(ctx, var, &output_allocators[var->semantic.stream_index],
|
||||
true, !is_pixel_shader);
|
||||
}
|
||||
}
|
||||
|
||||
*output_reg_count = output_allocator.reg_count;
|
||||
*output_reg_count = output_allocators[0].reg_count;
|
||||
for (unsigned int i = 1; i < ARRAY_SIZE(output_allocators); ++i)
|
||||
*output_reg_count = max(*output_reg_count, output_allocators[i].reg_count);
|
||||
|
||||
vkd3d_free(in_prim_allocator.allocations);
|
||||
vkd3d_free(patch_constant_out_patch_allocator.allocations);
|
||||
vkd3d_free(input_allocator.allocations);
|
||||
vkd3d_free(output_allocator.allocations);
|
||||
for (unsigned int i = 0; i < ARRAY_SIZE(output_allocators); ++i)
|
||||
vkd3d_free(output_allocators[i].allocations);
|
||||
}
|
||||
|
||||
static const struct hlsl_buffer *get_reserved_buffer(struct hlsl_ctx *ctx,
|
||||
@@ -8018,6 +8034,7 @@ static void generate_vsir_signature_entry(struct hlsl_ctx *ctx, struct vsir_prog
|
||||
return;
|
||||
}
|
||||
element->semantic_index = var->semantic.index;
|
||||
element->stream_index = var->semantic.stream_index;
|
||||
element->sysval_semantic = sysval;
|
||||
element->component_type = component_type;
|
||||
element->register_index = register_index;
|
||||
|
@@ -3261,6 +3261,8 @@ static int signature_element_pointer_compare(const void *x, const void *y)
|
||||
const struct signature_element *f = *(const struct signature_element **)y;
|
||||
int ret;
|
||||
|
||||
if ((ret = vkd3d_u32_compare(e->stream_index, f->stream_index)))
|
||||
return ret;
|
||||
if ((ret = vkd3d_u32_compare(e->register_index, f->register_index)))
|
||||
return ret;
|
||||
return vkd3d_u32_compare(e->mask, f->mask);
|
||||
@@ -3269,12 +3271,17 @@ static int signature_element_pointer_compare(const void *x, const void *y)
|
||||
static void tpf_write_signature(struct tpf_compiler *tpf, const struct shader_signature *signature, uint32_t tag)
|
||||
{
|
||||
bool has_minimum_precision = tpf->program->global_flags & VKD3DSGF_ENABLE_MINIMUM_PRECISION;
|
||||
bool output = tag == TAG_OSGN || (tag == TAG_PCSG
|
||||
&& tpf->program->shader_version.type == VKD3D_SHADER_TYPE_HULL);
|
||||
const struct vkd3d_shader_version *version = &tpf->program->shader_version;
|
||||
const struct signature_element **sorted_elements;
|
||||
struct vkd3d_bytecode_buffer buffer = {0};
|
||||
bool has_stream_index, output;
|
||||
unsigned int i;
|
||||
|
||||
output = tag == TAG_OSGN || (tag == TAG_PCSG && version->type == VKD3D_SHADER_TYPE_HULL);
|
||||
if (output && version->type == VKD3D_SHADER_TYPE_GEOMETRY && version->major >= 5)
|
||||
tag = TAG_OSG5;
|
||||
has_stream_index = tag == TAG_OSG5 || has_minimum_precision;
|
||||
|
||||
put_u32(&buffer, signature->element_count);
|
||||
put_u32(&buffer, 8); /* unknown */
|
||||
|
||||
@@ -3297,8 +3304,8 @@ static void tpf_write_signature(struct tpf_compiler *tpf, const struct shader_si
|
||||
if (sysval >= VKD3D_SHADER_SV_TARGET)
|
||||
sysval = VKD3D_SHADER_SV_NONE;
|
||||
|
||||
if (has_minimum_precision)
|
||||
put_u32(&buffer, 0); /* FIXME: stream index */
|
||||
if (has_stream_index)
|
||||
put_u32(&buffer, element->stream_index);
|
||||
put_u32(&buffer, 0); /* name */
|
||||
put_u32(&buffer, element->semantic_index);
|
||||
put_u32(&buffer, sysval);
|
||||
@@ -3312,13 +3319,16 @@ static void tpf_write_signature(struct tpf_compiler *tpf, const struct shader_si
|
||||
for (i = 0; i < signature->element_count; ++i)
|
||||
{
|
||||
const struct signature_element *element = sorted_elements[i];
|
||||
size_t name_index = 2 + i * 6;
|
||||
size_t string_offset;
|
||||
|
||||
string_offset = put_string(&buffer, element->semantic_name);
|
||||
if (has_stream_index)
|
||||
name_index += i + 1;
|
||||
if (has_minimum_precision)
|
||||
set_u32(&buffer, (2 + i * 8 + 1) * sizeof(uint32_t), string_offset);
|
||||
else
|
||||
set_u32(&buffer, (2 + i * 6) * sizeof(uint32_t), string_offset);
|
||||
name_index += i;
|
||||
|
||||
string_offset = put_string(&buffer, element->semantic_name);
|
||||
set_u32(&buffer, name_index * sizeof(uint32_t), string_offset);
|
||||
}
|
||||
|
||||
if (has_minimum_precision)
|
||||
|
@@ -233,7 +233,7 @@ void main(InputPatch<gs_data, 3> data, InputPatch<gs_data2, 3> data2, InputPatch
|
||||
{
|
||||
}
|
||||
|
||||
[geometry shader fail todo(sm>=5)]
|
||||
[geometry shader fail]
|
||||
struct gs_data
|
||||
{
|
||||
float4 pos : SV_POSITION;
|
||||
@@ -254,7 +254,7 @@ void main(InputPatch<gs_data, 3> data, InputPatch<gs_data2, 4> data2, InputPatch
|
||||
{
|
||||
}
|
||||
|
||||
[geometry shader fail todo(sm>=5)]
|
||||
[geometry shader fail]
|
||||
struct gs_data
|
||||
{
|
||||
float4 pos : SV_POSITION;
|
||||
@@ -691,7 +691,7 @@ void main(point gs_data vin[1], inout PointStream<gs_data> vout)
|
||||
|
||||
% The total component count of all outputs are used. If the allocated registers overlap,
|
||||
% the largest allocation size is used for the calculation.
|
||||
[geometry shader fail(sm<5) todo(sm>=5)]
|
||||
[geometry shader fail(sm<5)]
|
||||
struct gs_data
|
||||
{
|
||||
float2 x : X; // Allocates to o0.xy
|
||||
@@ -712,7 +712,7 @@ void main(point gs_data vin[1], inout PointStream<gs_data> vout1, inout PointStr
|
||||
vout2.Append((gs_data2)0);
|
||||
}
|
||||
|
||||
[geometry shader fail todo(sm>=5)]
|
||||
[geometry shader fail]
|
||||
struct gs_data
|
||||
{
|
||||
float2 x : X; // Allocates to o0.xy
|
||||
|
@@ -1363,35 +1363,6 @@ static void test_emit_signature(void)
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
};
|
||||
static const uint32_t dxbc_gs[] =
|
||||
{
|
||||
0x43425844, 0xe27a1766, 0x53623db9, 0x85065db9, 0x79f2e81e, 0x00000001, 0x00000324, 0x00000005,
|
||||
0x00000034, 0x000000ac, 0x000000dc, 0x00000174, 0x00000288, 0x46454452, 0x00000070, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x0000003c, 0x47530500, 0x00000100, 0x0000003c, 0x31314452, 0x0000003c,
|
||||
0x00000018, 0x00000020, 0x00000028, 0x00000024, 0x0000000c, 0x00000000, 0x7263694d, 0x666f736f,
|
||||
0x52282074, 0x4c482029, 0x53204c53, 0x65646168, 0x6f432072, 0x6c69706d, 0x31207265, 0x2e302e30,
|
||||
0x31303031, 0x36312e31, 0x00343833, 0x4e475349, 0x00000028, 0x00000001, 0x00000008, 0x00000020,
|
||||
0x00000000, 0x00000000, 0x00000003, 0x00000000, 0x00000f0f, 0x7a6d6573, 0xababab00, 0x3547534f,
|
||||
0x00000090, 0x00000004, 0x00000008, 0x00000000, 0x00000078, 0x00000000, 0x00000001, 0x00000003,
|
||||
0x00000000, 0x0000000f, 0x00000000, 0x00000084, 0x00000000, 0x00000000, 0x00000003, 0x00000001,
|
||||
0x0000000f, 0x00000001, 0x00000078, 0x00000000, 0x00000001, 0x00000003, 0x00000000, 0x0000000f,
|
||||
0x00000001, 0x00000089, 0x00000000, 0x00000000, 0x00000003, 0x00000001, 0x0000000f, 0x505f5653,
|
||||
0x5449534f, 0x004e4f49, 0x786d6573, 0x6d657300, 0xabab0079, 0x58454853, 0x0000010c, 0x00020050,
|
||||
0x00000043, 0x0100086a, 0x0400005f, 0x002010f2, 0x00000003, 0x00000000, 0x0100185d, 0x0300008f,
|
||||
0x00110000, 0x00000000, 0x0100085c, 0x04000067, 0x001020f2, 0x00000000, 0x00000001, 0x03000065,
|
||||
0x001020f2, 0x00000001, 0x0300008f, 0x00110000, 0x00000001, 0x0100085c, 0x04000067, 0x001020f2,
|
||||
0x00000000, 0x00000001, 0x03000065, 0x001020f2, 0x00000001, 0x0200005e, 0x0000000c, 0x08000036,
|
||||
0x001020f2, 0x00000000, 0x00004002, 0x3f800000, 0x40000000, 0x40400000, 0x40800000, 0x06000036,
|
||||
0x001020f2, 0x00000001, 0x00201e46, 0x00000000, 0x00000000, 0x03000075, 0x00110000, 0x00000000,
|
||||
0x08000036, 0x001020f2, 0x00000000, 0x00004002, 0x41300000, 0x41400000, 0x41500000, 0x41600000,
|
||||
0x06000036, 0x001020f2, 0x00000001, 0x00201e46, 0x00000001, 0x00000000, 0x03000075, 0x00110000,
|
||||
0x00000001, 0x0100003e, 0x54415453, 0x00000094, 0x00000007, 0x00000000, 0x00000000, 0x00000005,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000002, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000003, 0x00000001, 0x0000000c, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000,
|
||||
};
|
||||
|
||||
static const struct emit_signature_test
|
||||
{
|
||||
@@ -1637,7 +1608,7 @@ static void test_emit_signature(void)
|
||||
},
|
||||
{
|
||||
"gs_5_0",
|
||||
{dxbc_gs, sizeof(dxbc_gs)},
|
||||
{NULL, 0},
|
||||
"struct input_data\n"
|
||||
"{\n"
|
||||
" float4 z : semz;\n"
|
||||
|
Reference in New Issue
Block a user