From d5dcf311239f35088a5440e0a1794ce066cb3198 Mon Sep 17 00:00:00 2001 From: Shaun Ren Date: Thu, 20 Mar 2025 21:28:02 -0400 Subject: [PATCH] vkd3d-shader/hlsl: Store stream index in struct hlsl_semantic. In addition, support stream indices in tpf_write_signature(). --- libs/vkd3d-shader/hlsl.h | 1 + libs/vkd3d-shader/hlsl_codegen.c | 65 ++++++++++++------- libs/vkd3d-shader/tpf.c | 26 +++++--- tests/hlsl/geometry-shader-syntax.shader_test | 8 +-- tests/vkd3d_shader_api.c | 31 +-------- 5 files changed, 65 insertions(+), 66 deletions(-) diff --git a/libs/vkd3d-shader/hlsl.h b/libs/vkd3d-shader/hlsl.h index 9af4b9894..8b1f3ec3d 100644 --- a/libs/vkd3d-shader/hlsl.h +++ b/libs/vkd3d-shader/hlsl.h @@ -245,6 +245,7 @@ struct hlsl_semantic { const char *name; uint32_t index; + uint32_t stream_index; /* Name exactly as it appears in the sources. */ const char *raw_name; diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index ef412c9d8..9b2d67b58 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -288,7 +288,8 @@ static bool types_are_semantic_equivalent(struct hlsl_ctx *ctx, const struct hls static struct hlsl_ir_var *add_semantic_var(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func, struct hlsl_ir_var *var, struct hlsl_type *type, uint32_t modifiers, struct hlsl_semantic *semantic, - uint32_t index, bool output, bool force_align, bool create, const struct vkd3d_shader_location *loc) + uint32_t index, uint32_t stream_index, bool output, bool force_align, bool create, + const struct vkd3d_shader_location *loc) { struct hlsl_semantic new_semantic; struct hlsl_ir_var *ext_var; @@ -300,7 +301,12 @@ static struct hlsl_ir_var *add_semantic_var(struct hlsl_ctx *ctx, struct hlsl_ir else prefix = output ? "output" : "input"; - if (!(new_name = hlsl_sprintf_alloc(ctx, "<%s-%s%u>", prefix, semantic->name, index))) + if (stream_index) + new_name = hlsl_sprintf_alloc(ctx, "<%s-m%u:%s%u>", prefix, stream_index, semantic->name, index); + else + new_name = hlsl_sprintf_alloc(ctx, "<%s-%s%u>", prefix, semantic->name, index); + + if (!new_name) return NULL; LIST_FOR_EACH_ENTRY(ext_var, &func->extern_vars, struct hlsl_ir_var, extern_entry) @@ -353,6 +359,7 @@ static struct hlsl_ir_var *add_semantic_var(struct hlsl_ctx *ctx, struct hlsl_ir return NULL; } new_semantic.index = index; + new_semantic.stream_index = stream_index; if (!(ext_var = hlsl_new_var(ctx, new_name, type, loc, &new_semantic, modifiers, NULL))) { vkd3d_free(new_name); @@ -435,7 +442,7 @@ static void prepend_input_copy(struct hlsl_ctx *ctx, struct hlsl_ir_function_dec prim_type_src->modifiers = var->data_type->modifiers & HLSL_PRIMITIVE_MODIFIERS_MASK; if (!(input = add_semantic_var(ctx, func, var, prim_type_src, - modifiers, semantic, semantic_index + i, false, force_align, true, loc))) + modifiers, semantic, semantic_index + i, 0, false, force_align, true, loc))) return; hlsl_init_simple_deref_from_var(&prim_deref, input); @@ -448,7 +455,7 @@ static void prepend_input_copy(struct hlsl_ctx *ctx, struct hlsl_ir_function_dec else { if (!(input = add_semantic_var(ctx, func, var, vector_type_src, - modifiers, semantic, semantic_index + i, false, force_align, true, loc))) + modifiers, semantic, semantic_index + i, 0, false, force_align, true, loc))) return; if (!(load = hlsl_new_var_load(ctx, input, &var->loc))) @@ -558,7 +565,7 @@ static void prepend_input_var_copy(struct hlsl_ctx *ctx, struct hlsl_ir_function static void append_output_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_function_decl *func, struct hlsl_ir_load *rhs, uint32_t modifiers, - struct hlsl_semantic *semantic, uint32_t semantic_index, bool force_align, bool create) + struct hlsl_semantic *semantic, uint32_t semantic_index, uint32_t stream_index, bool force_align, bool create) { struct hlsl_type *type = rhs->node.data_type, *vector_type; struct vkd3d_shader_location *loc = &rhs->node.loc; @@ -588,7 +595,7 @@ static void append_output_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *load; if (!(output = add_semantic_var(ctx, func, var, vector_type, - modifiers, semantic, semantic_index + i, true, force_align, create, loc))) + modifiers, semantic, semantic_index + i, stream_index, true, force_align, create, loc))) return; if (type->class == HLSL_CLASS_MATRIX) @@ -609,7 +616,7 @@ static void append_output_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, static void append_output_copy_recurse(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_function_decl *func, const struct hlsl_type *type, struct hlsl_ir_load *rhs, uint32_t modifiers, - struct hlsl_semantic *semantic, uint32_t semantic_index, bool force_align, bool create) + struct hlsl_semantic *semantic, uint32_t semantic_index, uint32_t stream_index, bool force_align, bool create) { struct vkd3d_shader_location *loc = &rhs->node.loc; struct hlsl_ir_var *var = rhs->src.var; @@ -654,12 +661,13 @@ static void append_output_copy_recurse(struct hlsl_ctx *ctx, struct hlsl_block * hlsl_block_add_instr(block, &element_load->node); append_output_copy_recurse(ctx, block, func, element_type, element_load, element_modifiers, semantic, - elem_semantic_index, force_align, create); + elem_semantic_index, stream_index, force_align, create); } } else { - append_output_copy(ctx, block, func, rhs, modifiers, semantic, semantic_index, force_align, create); + append_output_copy(ctx, block, func, rhs, modifiers, semantic, + semantic_index, stream_index, force_align, create); } } @@ -676,7 +684,7 @@ static void append_output_var_copy(struct hlsl_ctx *ctx, struct hlsl_ir_function hlsl_block_add_instr(&func->body, &load->node); append_output_copy_recurse(ctx, &func->body, func, var->data_type, load, var->storage_modifiers, - &var->semantic, var->semantic.index, false, true); + &var->semantic, var->semantic.index, 0, false, true); } bool hlsl_transform_ir(struct hlsl_ctx *ctx, bool (*func)(struct hlsl_ctx *ctx, struct hlsl_ir_node *, void *), @@ -3360,7 +3368,7 @@ static bool split_struct_copies(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr struct stream_append_ctx { struct hlsl_ir_function_decl *func; - bool created; + bool created[VKD3D_MAX_STREAM_COUNT]; }; static bool lower_stream_appends(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) @@ -3371,6 +3379,7 @@ static bool lower_stream_appends(struct hlsl_ctx *ctx, struct hlsl_ir_node *inst const struct hlsl_type *type; struct hlsl_ir_var *var; struct hlsl_block block; + uint32_t stream_index; if (instr->type != HLSL_IR_RESOURCE_STORE) return false; @@ -3390,18 +3399,17 @@ static bool lower_stream_appends(struct hlsl_ctx *ctx, struct hlsl_ir_node *inst } VKD3D_ASSERT(var->regs[HLSL_REGSET_STREAM_OUTPUTS].allocated); + stream_index = var->regs[HLSL_REGSET_STREAM_OUTPUTS].index; - if (var->regs[HLSL_REGSET_STREAM_OUTPUTS].index) - { - hlsl_fixme(ctx, &instr->loc, "Append to an output stream with a nonzero stream index."); - return false; - } + VKD3D_ASSERT(stream_index < ARRAY_SIZE(append_ctx->created)); hlsl_block_init(&block); - append_output_copy_recurse(ctx, &block, append_ctx->func, type->e.so.type, hlsl_ir_load(rhs), var->storage_modifiers, - &var->semantic, var->semantic.index, false, !append_ctx->created); - append_ctx->created = true; + append_output_copy_recurse(ctx, &block, append_ctx->func, type->e.so.type, hlsl_ir_load(rhs), + var->storage_modifiers, &var->semantic, var->semantic.index, + var->regs[HLSL_REGSET_STREAM_OUTPUTS].index, false, !append_ctx->created[stream_index]); + + append_ctx->created[stream_index] = true; list_move_before(&instr->entry, &block.instrs); hlsl_src_remove(&store->value); @@ -6518,8 +6526,8 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var static void allocate_semantic_registers(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, uint32_t *output_reg_count) { + struct register_allocator input_allocator = {0}, output_allocators[VKD3D_MAX_STREAM_COUNT] = {{0}}; struct register_allocator in_prim_allocator = {0}, patch_constant_out_patch_allocator = {0}; - struct register_allocator input_allocator = {0}, output_allocator = {0}; bool is_vertex_shader = ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX; bool is_pixel_shader = ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL; struct hlsl_ir_var *var; @@ -6527,7 +6535,8 @@ static void allocate_semantic_registers(struct hlsl_ctx *ctx, struct hlsl_ir_fun in_prim_allocator.prioritize_smaller_writemasks = true; patch_constant_out_patch_allocator.prioritize_smaller_writemasks = true; input_allocator.prioritize_smaller_writemasks = true; - output_allocator.prioritize_smaller_writemasks = true; + for (unsigned int i = 0; i < ARRAY_SIZE(output_allocators); ++i) + output_allocators[i].prioritize_smaller_writemasks = true; LIST_FOR_EACH_ENTRY(var, &entry_func->extern_vars, struct hlsl_ir_var, extern_entry) { @@ -6550,15 +6559,22 @@ static void allocate_semantic_registers(struct hlsl_ctx *ctx, struct hlsl_ir_fun } if (var->is_output_semantic) - allocate_semantic_register(ctx, var, &output_allocator, true, !is_pixel_shader); + { + VKD3D_ASSERT(var->semantic.stream_index < ARRAY_SIZE(output_allocators)); + allocate_semantic_register(ctx, var, &output_allocators[var->semantic.stream_index], + true, !is_pixel_shader); + } } - *output_reg_count = output_allocator.reg_count; + *output_reg_count = output_allocators[0].reg_count; + for (unsigned int i = 1; i < ARRAY_SIZE(output_allocators); ++i) + *output_reg_count = max(*output_reg_count, output_allocators[i].reg_count); vkd3d_free(in_prim_allocator.allocations); vkd3d_free(patch_constant_out_patch_allocator.allocations); vkd3d_free(input_allocator.allocations); - vkd3d_free(output_allocator.allocations); + for (unsigned int i = 0; i < ARRAY_SIZE(output_allocators); ++i) + vkd3d_free(output_allocators[i].allocations); } static const struct hlsl_buffer *get_reserved_buffer(struct hlsl_ctx *ctx, @@ -8018,6 +8034,7 @@ static void generate_vsir_signature_entry(struct hlsl_ctx *ctx, struct vsir_prog return; } element->semantic_index = var->semantic.index; + element->stream_index = var->semantic.stream_index; element->sysval_semantic = sysval; element->component_type = component_type; element->register_index = register_index; diff --git a/libs/vkd3d-shader/tpf.c b/libs/vkd3d-shader/tpf.c index 19ca97cb4..08bdc3e64 100644 --- a/libs/vkd3d-shader/tpf.c +++ b/libs/vkd3d-shader/tpf.c @@ -3261,6 +3261,8 @@ static int signature_element_pointer_compare(const void *x, const void *y) const struct signature_element *f = *(const struct signature_element **)y; int ret; + if ((ret = vkd3d_u32_compare(e->stream_index, f->stream_index))) + return ret; if ((ret = vkd3d_u32_compare(e->register_index, f->register_index))) return ret; return vkd3d_u32_compare(e->mask, f->mask); @@ -3269,12 +3271,17 @@ static int signature_element_pointer_compare(const void *x, const void *y) static void tpf_write_signature(struct tpf_compiler *tpf, const struct shader_signature *signature, uint32_t tag) { bool has_minimum_precision = tpf->program->global_flags & VKD3DSGF_ENABLE_MINIMUM_PRECISION; - bool output = tag == TAG_OSGN || (tag == TAG_PCSG - && tpf->program->shader_version.type == VKD3D_SHADER_TYPE_HULL); + const struct vkd3d_shader_version *version = &tpf->program->shader_version; const struct signature_element **sorted_elements; struct vkd3d_bytecode_buffer buffer = {0}; + bool has_stream_index, output; unsigned int i; + output = tag == TAG_OSGN || (tag == TAG_PCSG && version->type == VKD3D_SHADER_TYPE_HULL); + if (output && version->type == VKD3D_SHADER_TYPE_GEOMETRY && version->major >= 5) + tag = TAG_OSG5; + has_stream_index = tag == TAG_OSG5 || has_minimum_precision; + put_u32(&buffer, signature->element_count); put_u32(&buffer, 8); /* unknown */ @@ -3297,8 +3304,8 @@ static void tpf_write_signature(struct tpf_compiler *tpf, const struct shader_si if (sysval >= VKD3D_SHADER_SV_TARGET) sysval = VKD3D_SHADER_SV_NONE; - if (has_minimum_precision) - put_u32(&buffer, 0); /* FIXME: stream index */ + if (has_stream_index) + put_u32(&buffer, element->stream_index); put_u32(&buffer, 0); /* name */ put_u32(&buffer, element->semantic_index); put_u32(&buffer, sysval); @@ -3312,13 +3319,16 @@ static void tpf_write_signature(struct tpf_compiler *tpf, const struct shader_si for (i = 0; i < signature->element_count; ++i) { const struct signature_element *element = sorted_elements[i]; + size_t name_index = 2 + i * 6; size_t string_offset; - string_offset = put_string(&buffer, element->semantic_name); + if (has_stream_index) + name_index += i + 1; if (has_minimum_precision) - set_u32(&buffer, (2 + i * 8 + 1) * sizeof(uint32_t), string_offset); - else - set_u32(&buffer, (2 + i * 6) * sizeof(uint32_t), string_offset); + name_index += i; + + string_offset = put_string(&buffer, element->semantic_name); + set_u32(&buffer, name_index * sizeof(uint32_t), string_offset); } if (has_minimum_precision) diff --git a/tests/hlsl/geometry-shader-syntax.shader_test b/tests/hlsl/geometry-shader-syntax.shader_test index 6fa833011..3d63cdaa3 100644 --- a/tests/hlsl/geometry-shader-syntax.shader_test +++ b/tests/hlsl/geometry-shader-syntax.shader_test @@ -233,7 +233,7 @@ void main(InputPatch data, InputPatch data2, InputPatch { } -[geometry shader fail todo(sm>=5)] +[geometry shader fail] struct gs_data { float4 pos : SV_POSITION; @@ -254,7 +254,7 @@ void main(InputPatch data, InputPatch data2, InputPatch { } -[geometry shader fail todo(sm>=5)] +[geometry shader fail] struct gs_data { float4 pos : SV_POSITION; @@ -691,7 +691,7 @@ void main(point gs_data vin[1], inout PointStream vout) % The total component count of all outputs are used. If the allocated registers overlap, % the largest allocation size is used for the calculation. -[geometry shader fail(sm<5) todo(sm>=5)] +[geometry shader fail(sm<5)] struct gs_data { float2 x : X; // Allocates to o0.xy @@ -712,7 +712,7 @@ void main(point gs_data vin[1], inout PointStream vout1, inout PointStr vout2.Append((gs_data2)0); } -[geometry shader fail todo(sm>=5)] +[geometry shader fail] struct gs_data { float2 x : X; // Allocates to o0.xy diff --git a/tests/vkd3d_shader_api.c b/tests/vkd3d_shader_api.c index 1575a2564..b379c34d6 100644 --- a/tests/vkd3d_shader_api.c +++ b/tests/vkd3d_shader_api.c @@ -1363,35 +1363,6 @@ static void test_emit_signature(void) 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, }; - static const uint32_t dxbc_gs[] = - { - 0x43425844, 0xe27a1766, 0x53623db9, 0x85065db9, 0x79f2e81e, 0x00000001, 0x00000324, 0x00000005, - 0x00000034, 0x000000ac, 0x000000dc, 0x00000174, 0x00000288, 0x46454452, 0x00000070, 0x00000000, - 0x00000000, 0x00000000, 0x0000003c, 0x47530500, 0x00000100, 0x0000003c, 0x31314452, 0x0000003c, - 0x00000018, 0x00000020, 0x00000028, 0x00000024, 0x0000000c, 0x00000000, 0x7263694d, 0x666f736f, - 0x52282074, 0x4c482029, 0x53204c53, 0x65646168, 0x6f432072, 0x6c69706d, 0x31207265, 0x2e302e30, - 0x31303031, 0x36312e31, 0x00343833, 0x4e475349, 0x00000028, 0x00000001, 0x00000008, 0x00000020, - 0x00000000, 0x00000000, 0x00000003, 0x00000000, 0x00000f0f, 0x7a6d6573, 0xababab00, 0x3547534f, - 0x00000090, 0x00000004, 0x00000008, 0x00000000, 0x00000078, 0x00000000, 0x00000001, 0x00000003, - 0x00000000, 0x0000000f, 0x00000000, 0x00000084, 0x00000000, 0x00000000, 0x00000003, 0x00000001, - 0x0000000f, 0x00000001, 0x00000078, 0x00000000, 0x00000001, 0x00000003, 0x00000000, 0x0000000f, - 0x00000001, 0x00000089, 0x00000000, 0x00000000, 0x00000003, 0x00000001, 0x0000000f, 0x505f5653, - 0x5449534f, 0x004e4f49, 0x786d6573, 0x6d657300, 0xabab0079, 0x58454853, 0x0000010c, 0x00020050, - 0x00000043, 0x0100086a, 0x0400005f, 0x002010f2, 0x00000003, 0x00000000, 0x0100185d, 0x0300008f, - 0x00110000, 0x00000000, 0x0100085c, 0x04000067, 0x001020f2, 0x00000000, 0x00000001, 0x03000065, - 0x001020f2, 0x00000001, 0x0300008f, 0x00110000, 0x00000001, 0x0100085c, 0x04000067, 0x001020f2, - 0x00000000, 0x00000001, 0x03000065, 0x001020f2, 0x00000001, 0x0200005e, 0x0000000c, 0x08000036, - 0x001020f2, 0x00000000, 0x00004002, 0x3f800000, 0x40000000, 0x40400000, 0x40800000, 0x06000036, - 0x001020f2, 0x00000001, 0x00201e46, 0x00000000, 0x00000000, 0x03000075, 0x00110000, 0x00000000, - 0x08000036, 0x001020f2, 0x00000000, 0x00004002, 0x41300000, 0x41400000, 0x41500000, 0x41600000, - 0x06000036, 0x001020f2, 0x00000001, 0x00201e46, 0x00000001, 0x00000000, 0x03000075, 0x00110000, - 0x00000001, 0x0100003e, 0x54415453, 0x00000094, 0x00000007, 0x00000000, 0x00000000, 0x00000005, - 0x00000000, 0x00000000, 0x00000000, 0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000002, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000003, 0x00000001, 0x0000000c, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, - }; static const struct emit_signature_test { @@ -1637,7 +1608,7 @@ static void test_emit_signature(void) }, { "gs_5_0", - {dxbc_gs, sizeof(dxbc_gs)}, + {NULL, 0}, "struct input_data\n" "{\n" " float4 z : semz;\n"