From 4cffbfb94f89317c6e5ce5f7c6aa488e67a8451d Mon Sep 17 00:00:00 2001 From: Alistair Leslie-Hughes Date: Fri, 25 Oct 2024 07:38:01 +1100 Subject: [PATCH] Updated vkd3d to 5eff8bf9188c401cc31ce14d42798dc3751377bd. --- libs/vkd3d/libs/vkd3d-shader/glsl.c | 112 ++++++++++++++----- libs/vkd3d/libs/vkd3d-shader/hlsl.c | 29 +++++ libs/vkd3d/libs/vkd3d-shader/hlsl.h | 24 +++- libs/vkd3d/libs/vkd3d-shader/hlsl.y | 2 + libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 118 +++++++++++++++++++- libs/vkd3d/libs/vkd3d-shader/tpf.c | 79 ++++++------- 6 files changed, 282 insertions(+), 82 deletions(-) diff --git a/libs/vkd3d/libs/vkd3d-shader/glsl.c b/libs/vkd3d/libs/vkd3d-shader/glsl.c index a2a090e1c21..363054cb6d9 100644 --- a/libs/vkd3d/libs/vkd3d-shader/glsl.c +++ b/libs/vkd3d/libs/vkd3d-shader/glsl.c @@ -20,8 +20,14 @@ struct glsl_resource_type_info { + /* The number of coordinates needed to sample the resource type. */ size_t coord_size; + /* Whether the resource type is an array type. */ + bool array; + /* Whether the resource type has a shadow/comparison variant. */ bool shadow; + /* The type suffix for resource type. I.e., the "2D" part of "usampler2D" + * or "iimage2D". */ const char *type_suffix; }; @@ -102,17 +108,17 @@ static const struct glsl_resource_type_info *shader_glsl_get_resource_type_info( { static const struct glsl_resource_type_info info[] = { - {0, 0, "None"}, /* VKD3D_SHADER_RESOURCE_NONE */ - {1, 0, "Buffer"}, /* VKD3D_SHADER_RESOURCE_BUFFER */ - {1, 1, "1D"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_1D */ - {2, 1, "2D"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_2D */ - {2, 0, "2DMS"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_2DMS */ - {3, 0, "3D"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_3D */ - {3, 1, "Cube"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_CUBE */ - {2, 1, "1DArray"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_1DARRAY */ - {3, 1, "2DArray"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_2DARRAY */ - {3, 0, "2DMSArray"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY */ - {4, 1, "CubeArray"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_CUBEARRAY */ + {0, 0, 0, "None"}, /* VKD3D_SHADER_RESOURCE_NONE */ + {1, 0, 0, "Buffer"}, /* VKD3D_SHADER_RESOURCE_BUFFER */ + {1, 0, 1, "1D"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_1D */ + {2, 0, 1, "2D"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_2D */ + {2, 0, 0, "2DMS"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_2DMS */ + {3, 0, 0, "3D"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_3D */ + {3, 0, 1, "Cube"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_CUBE */ + {2, 1, 1, "1DArray"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_1DARRAY */ + {3, 1, 1, "2DArray"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_2DARRAY */ + {3, 1, 0, "2DMSArray"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY */ + {4, 1, 1, "CubeArray"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_CUBEARRAY */ }; if (!t || t >= ARRAY_SIZE(info)) @@ -862,17 +868,24 @@ static void shader_glsl_print_shadow_coord(struct vkd3d_string_buffer *buffer, s static void shader_glsl_sample(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_instruction *ins) { + bool shadow_sampler, array, bias, gather, grad, lod, lod_zero, shadow; const struct glsl_resource_type_info *resource_type_info; unsigned int resource_id, resource_idx, resource_space; unsigned int sampler_id, sampler_idx, sampler_space; const struct vkd3d_shader_descriptor_info1 *d; enum vkd3d_shader_component_type sampled_type; enum vkd3d_shader_resource_type resource_type; + unsigned int component_idx, coord_size; struct vkd3d_string_buffer *sample; enum vkd3d_data_type data_type; - unsigned int coord_size; struct glsl_dst dst; - bool shadow; + + bias = ins->opcode == VKD3DSIH_SAMPLE_B; + gather = ins->opcode == VKD3DSIH_GATHER4; + grad = ins->opcode == VKD3DSIH_SAMPLE_GRAD; + lod = ins->opcode == VKD3DSIH_SAMPLE_LOD || ins->opcode == VKD3DSIH_SAMPLE_C_LZ; + lod_zero = ins->opcode == VKD3DSIH_SAMPLE_C_LZ; + shadow = ins->opcode == VKD3DSIH_SAMPLE_C || ins->opcode == VKD3DSIH_SAMPLE_C_LZ; if (vkd3d_shader_instruction_has_texel_offset(ins)) vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, @@ -904,12 +917,14 @@ static void shader_glsl_sample(struct vkd3d_glsl_generator *gen, const struct vk if ((resource_type_info = shader_glsl_get_resource_type_info(resource_type))) { coord_size = resource_type_info->coord_size; + array = resource_type_info->array; } else { vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, "Internal compiler error: Unhandled resource type %#x.", resource_type); coord_size = 2; + array = false; } sampler_id = ins->src[2].reg.idx[0].offset; @@ -917,17 +932,17 @@ static void shader_glsl_sample(struct vkd3d_glsl_generator *gen, const struct vk if ((d = shader_glsl_get_descriptor_by_id(gen, VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER, sampler_id))) { sampler_space = d->register_space; - shadow = d->flags & VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_SAMPLER_COMPARISON_MODE; + shadow_sampler = d->flags & VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_SAMPLER_COMPARISON_MODE; - if (ins->opcode == VKD3DSIH_SAMPLE_C || ins->opcode == VKD3DSIH_SAMPLE_C_LZ) + if (shadow) { - if (!shadow) + if (!shadow_sampler) vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, "Internal compiler error: Sampler %u is not a comparison sampler.", sampler_id); } else { - if (shadow) + if (shadow_sampler) vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, "Internal compiler error: Sampler %u is a comparison sampler.", sampler_id); } @@ -942,26 +957,44 @@ static void shader_glsl_sample(struct vkd3d_glsl_generator *gen, const struct vk glsl_dst_init(&dst, gen, ins, &ins->dst[0]); sample = vkd3d_string_buffer_get(&gen->string_buffers); - if (ins->opcode == VKD3DSIH_SAMPLE_C_LZ) + if (gather) + vkd3d_string_buffer_printf(sample, "textureGather("); + else if (grad) + vkd3d_string_buffer_printf(sample, "textureGrad("); + else if (lod) vkd3d_string_buffer_printf(sample, "textureLod("); else vkd3d_string_buffer_printf(sample, "texture("); shader_glsl_print_combined_sampler_name(sample, gen, resource_idx, resource_space, sampler_idx, sampler_space); vkd3d_string_buffer_printf(sample, ", "); - if (ins->opcode == VKD3DSIH_SAMPLE_C || ins->opcode == VKD3DSIH_SAMPLE_C_LZ) + if (shadow) shader_glsl_print_shadow_coord(sample, gen, &ins->src[0], &ins->src[3], coord_size); else shader_glsl_print_src(sample, gen, &ins->src[0], vkd3d_write_mask_from_component_count(coord_size), ins->src[0].reg.data_type); - if (ins->opcode == VKD3DSIH_SAMPLE_B) + if (grad) { vkd3d_string_buffer_printf(sample, ", "); - shader_glsl_print_src(sample, gen, &ins->src[3], VKD3DSP_WRITEMASK_0, ins->src[3].reg.data_type); + shader_glsl_print_src(sample, gen, &ins->src[3], + vkd3d_write_mask_from_component_count(coord_size - array), ins->src[3].reg.data_type); + vkd3d_string_buffer_printf(sample, ", "); + shader_glsl_print_src(sample, gen, &ins->src[4], + vkd3d_write_mask_from_component_count(coord_size - array), ins->src[4].reg.data_type); } - else if (ins->opcode == VKD3DSIH_SAMPLE_C_LZ) + else if (lod_zero) { vkd3d_string_buffer_printf(sample, ", 0.0"); } + else if (bias || lod) + { + vkd3d_string_buffer_printf(sample, ", "); + shader_glsl_print_src(sample, gen, &ins->src[3], VKD3DSP_WRITEMASK_0, ins->src[3].reg.data_type); + } + if (gather) + { + if ((component_idx = vsir_swizzle_get_component(ins->src[2].swizzle, 0))) + vkd3d_string_buffer_printf(sample, ", %d", component_idx); + } vkd3d_string_buffer_printf(sample, ")"); shader_glsl_print_swizzle(sample, ins->src[1].swizzle, ins->dst[0].write_mask); @@ -1465,6 +1498,15 @@ static void vkd3d_glsl_handle_instruction(struct vkd3d_glsl_generator *gen, case VKD3DSIH_FTOU: shader_glsl_cast(gen, ins, "uint", "uvec"); break; + case VKD3DSIH_GATHER4: + case VKD3DSIH_SAMPLE: + case VKD3DSIH_SAMPLE_B: + case VKD3DSIH_SAMPLE_C: + case VKD3DSIH_SAMPLE_C_LZ: + case VKD3DSIH_SAMPLE_GRAD: + case VKD3DSIH_SAMPLE_LOD: + shader_glsl_sample(gen, ins); + break; case VKD3DSIH_GEO: case VKD3DSIH_IGE: shader_glsl_relop(gen, ins, ">=", "greaterThanEqual"); @@ -1482,9 +1524,11 @@ static void vkd3d_glsl_handle_instruction(struct vkd3d_glsl_generator *gen, break; case VKD3DSIH_IMAX: case VKD3DSIH_MAX: + case VKD3DSIH_UMAX: shader_glsl_intrinsic(gen, ins, "max"); break; case VKD3DSIH_MIN: + case VKD3DSIH_UMIN: shader_glsl_intrinsic(gen, ins, "min"); break; case VKD3DSIH_IMUL: @@ -1553,12 +1597,6 @@ static void vkd3d_glsl_handle_instruction(struct vkd3d_glsl_generator *gen, case VKD3DSIH_RSQ: shader_glsl_intrinsic(gen, ins, "inversesqrt"); break; - case VKD3DSIH_SAMPLE: - case VKD3DSIH_SAMPLE_B: - case VKD3DSIH_SAMPLE_C: - case VKD3DSIH_SAMPLE_C_LZ: - shader_glsl_sample(gen, ins); - break; case VKD3DSIH_SQRT: shader_glsl_intrinsic(gen, ins, "sqrt"); break; @@ -2197,6 +2235,20 @@ static void shader_glsl_generate_output_declarations(struct vkd3d_glsl_generator } } +static void shader_glsl_handle_global_flags(struct vkd3d_string_buffer *buffer, + struct vkd3d_glsl_generator *gen, enum vsir_global_flags flags) +{ + if (flags & VKD3DSGF_FORCE_EARLY_DEPTH_STENCIL) + { + vkd3d_string_buffer_printf(buffer, "layout(early_fragment_tests) in;\n"); + flags &= ~VKD3DSGF_FORCE_EARLY_DEPTH_STENCIL; + } + + if (flags) + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, + "Internal compiler error: Unhandled global flags %#"PRIx64".", (uint64_t)flags); +} + static void shader_glsl_generate_declarations(struct vkd3d_glsl_generator *gen) { const struct vsir_program *program = gen->program; @@ -2210,9 +2262,7 @@ static void shader_glsl_generate_declarations(struct vkd3d_glsl_generator *gen) group_size->x, group_size->y, group_size->z); } - if (program->global_flags) - vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, - "Internal compiler error: Unhandled global flags %#"PRIx64".", (uint64_t)program->global_flags); + shader_glsl_handle_global_flags(buffer, gen, program->global_flags); shader_glsl_generate_descriptor_declarations(gen); shader_glsl_generate_input_declarations(gen); diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.c b/libs/vkd3d/libs/vkd3d-shader/hlsl.c index c7aa148ea11..cafff2fa878 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.c +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.c @@ -1695,6 +1695,22 @@ struct hlsl_ir_node *hlsl_new_switch(struct hlsl_ctx *ctx, struct hlsl_ir_node * return &s->node; } +struct hlsl_ir_node *hlsl_new_vsir_instruction_ref(struct hlsl_ctx *ctx, unsigned int vsir_instr_idx, + struct hlsl_type *type, const struct hlsl_reg *reg, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_vsir_instruction_ref *vsir_instr; + + if (!(vsir_instr = hlsl_alloc(ctx, sizeof(*vsir_instr)))) + return NULL; + init_node(&vsir_instr->node, HLSL_IR_VSIR_INSTRUCTION_REF, type, loc); + vsir_instr->vsir_instr_idx = vsir_instr_idx; + + if (reg) + vsir_instr->node.reg = *reg; + + return &vsir_instr->node; +} + struct hlsl_ir_load *hlsl_new_load_index(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, struct hlsl_ir_node *idx, const struct vkd3d_shader_location *loc) { @@ -2517,6 +2533,9 @@ static struct hlsl_ir_node *clone_instr(struct hlsl_ctx *ctx, case HLSL_IR_STATEBLOCK_CONSTANT: return clone_stateblock_constant(ctx, map, hlsl_ir_stateblock_constant(instr)); + + case HLSL_IR_VSIR_INSTRUCTION_REF: + vkd3d_unreachable(); } vkd3d_unreachable(); @@ -2938,6 +2957,7 @@ const char *hlsl_node_type_to_string(enum hlsl_ir_node_type type) [HLSL_IR_COMPILE] = "HLSL_IR_COMPILE", [HLSL_IR_SAMPLER_STATE] = "HLSL_IR_SAMPLER_STATE", [HLSL_IR_STATEBLOCK_CONSTANT] = "HLSL_IR_STATEBLOCK_CONSTANT", + [HLSL_IR_VSIR_INSTRUCTION_REF] = "HLSL_IR_VSIR_INSTRUCTION_REF", }; if (type >= ARRAY_SIZE(names)) @@ -3531,6 +3551,11 @@ static void dump_instr(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, case HLSL_IR_STATEBLOCK_CONSTANT: dump_ir_stateblock_constant(buffer, hlsl_ir_stateblock_constant(instr)); break; + + case HLSL_IR_VSIR_INSTRUCTION_REF: + vkd3d_string_buffer_printf(buffer, "vsir_program instruction %u", + hlsl_ir_vsir_instruction_ref(instr)->vsir_instr_idx); + break; } } @@ -3839,6 +3864,10 @@ void hlsl_free_instr(struct hlsl_ir_node *node) case HLSL_IR_STATEBLOCK_CONSTANT: free_ir_stateblock_constant(hlsl_ir_stateblock_constant(node)); break; + + case HLSL_IR_VSIR_INSTRUCTION_REF: + vkd3d_free(hlsl_ir_vsir_instruction_ref(node)); + break; } } diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h index e234cd0ba40..ae7f8c1c04f 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.h +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h @@ -328,6 +328,8 @@ enum hlsl_ir_node_type HLSL_IR_COMPILE, HLSL_IR_SAMPLER_STATE, HLSL_IR_STATEBLOCK_CONSTANT, + + HLSL_IR_VSIR_INSTRUCTION_REF, }; /* Common data for every type of IR instruction node. */ @@ -930,6 +932,16 @@ struct hlsl_ir_stateblock_constant char *name; }; +/* A vkd3d_shader_instruction that can be inserted in a hlsl_block. + * Only used for the HLSL IR to vsir translation, might be removed once this translation is complete. */ +struct hlsl_ir_vsir_instruction_ref +{ + struct hlsl_ir_node node; + + /* Index to a vkd3d_shader_instruction within a vkd3d_shader_instruction_array in a vsir_program. */ + unsigned int vsir_instr_idx; +}; + struct hlsl_scope { /* Item entry for hlsl_ctx.scopes. */ @@ -1245,6 +1257,12 @@ static inline struct hlsl_ir_stateblock_constant *hlsl_ir_stateblock_constant(co return CONTAINING_RECORD(node, struct hlsl_ir_stateblock_constant, node); } +static inline struct hlsl_ir_vsir_instruction_ref *hlsl_ir_vsir_instruction_ref(const struct hlsl_ir_node *node) +{ + VKD3D_ASSERT(node->type == HLSL_IR_VSIR_INSTRUCTION_REF); + return CONTAINING_RECORD(node, struct hlsl_ir_vsir_instruction_ref, node); +} + static inline void hlsl_block_init(struct hlsl_block *block) { list_init(&block->instrs); @@ -1433,9 +1451,6 @@ struct hlsl_state_block_entry *clone_stateblock_entry(struct hlsl_ctx *ctx, void hlsl_lower_index_loads(struct hlsl_ctx *ctx, struct hlsl_block *body); void hlsl_run_const_passes(struct hlsl_ctx *ctx, struct hlsl_block *body); -uint32_t allocate_temp_registers(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func); -void mark_indexable_vars(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func); -void compute_liveness(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func); int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, enum vkd3d_shader_target_type target_type, struct vkd3d_shader_code *out); int hlsl_emit_effect_binary(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out); @@ -1570,6 +1585,9 @@ struct hlsl_ir_switch_case *hlsl_new_switch_case(struct hlsl_ctx *ctx, unsigned struct hlsl_ir_node *hlsl_new_switch(struct hlsl_ctx *ctx, struct hlsl_ir_node *selector, struct list *cases, const struct vkd3d_shader_location *loc); +struct hlsl_ir_node *hlsl_new_vsir_instruction_ref(struct hlsl_ctx *ctx, unsigned int vsir_instr_idx, + struct hlsl_type *type, const struct hlsl_reg *reg, const struct vkd3d_shader_location *loc); + void hlsl_error(struct hlsl_ctx *ctx, const struct vkd3d_shader_location *loc, enum vkd3d_shader_error error, const char *fmt, ...) VKD3D_PRINTF_FUNC(4, 5); void hlsl_fixme(struct hlsl_ctx *ctx, const struct vkd3d_shader_location *loc, diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.y b/libs/vkd3d/libs/vkd3d-shader/hlsl.y index 49cff4c81b8..cd938fd5906 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.y +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.y @@ -672,6 +672,8 @@ static struct hlsl_default_value evaluate_static_expression(struct hlsl_ctx *ctx hlsl_error(ctx, &node->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, "Expected literal expression."); break; + case HLSL_IR_VSIR_INSTRUCTION_REF: + vkd3d_unreachable(); } } diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c index 6e1b2b437b0..2cb56d6b493 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c @@ -4162,6 +4162,9 @@ static bool dce(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) case HLSL_IR_STATEBLOCK_CONSTANT: /* Stateblock constants should not appear in the shader program. */ vkd3d_unreachable(); + case HLSL_IR_VSIR_INSTRUCTION_REF: + /* HLSL IR nodes are not translated to hlsl_ir_vsir_instruction_ref at this point. */ + vkd3d_unreachable(); } return false; @@ -4193,7 +4196,7 @@ static bool mark_indexable_var(struct hlsl_ctx *ctx, struct hlsl_deref *deref, return true; } -void mark_indexable_vars(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) +static void mark_indexable_vars(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) { struct hlsl_scope *scope; struct hlsl_ir_var *var; @@ -4301,6 +4304,9 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop case HLSL_IR_STATEBLOCK_CONSTANT: /* Stateblock constants should not appear in the shader program. */ vkd3d_unreachable(); + case HLSL_IR_VSIR_INSTRUCTION_REF: + /* HLSL IR nodes are not translated to hlsl_ir_vsir_instruction_ref at this point. */ + vkd3d_unreachable(); case HLSL_IR_STORE: { @@ -4441,7 +4447,7 @@ static void init_var_liveness(struct hlsl_ir_var *var) var->last_read = UINT_MAX; } -void compute_liveness(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) +static void compute_liveness(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) { struct hlsl_scope *scope; struct hlsl_ir_var *var; @@ -5222,7 +5228,7 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi * index to all (simultaneously live) variables or intermediate values. Agnostic * as to how many registers are actually available for the current backend, and * does not handle constants. */ -uint32_t allocate_temp_registers(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) +static uint32_t allocate_temp_registers(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) { struct register_allocator allocator = {0}; struct hlsl_scope *scope; @@ -7536,10 +7542,101 @@ static void sm1_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl sm1_generate_vsir_block(ctx, &entry_func->body, program); } +static void add_last_vsir_instr_to_block(struct hlsl_ctx *ctx, struct vsir_program *program, struct hlsl_block *block) +{ + struct vkd3d_shader_location *loc; + struct hlsl_ir_node *vsir_instr; + + loc = &program->instructions.elements[program->instructions.count - 1].location; + + if (!(vsir_instr = hlsl_new_vsir_instruction_ref(ctx, program->instructions.count - 1, NULL, NULL, loc))) + { + ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; + return; + } + hlsl_block_add_instr(block, vsir_instr); +} + +static void sm4_generate_vsir_instr_dcl_temps(struct hlsl_ctx *ctx, struct vsir_program *program, + uint32_t temp_count, struct hlsl_block *block, const struct vkd3d_shader_location *loc) +{ + struct vkd3d_shader_instruction *ins; + + if (!(ins = generate_vsir_add_program_instruction(ctx, program, loc, VKD3DSIH_DCL_TEMPS, 0, 0))) + return; + + ins->declaration.count = temp_count; + + add_last_vsir_instr_to_block(ctx, program, block); +} + +static void sm4_generate_vsir_instr_dcl_indexable_temp(struct hlsl_ctx *ctx, + struct vsir_program *program, struct hlsl_block *block, uint32_t idx, + uint32_t size, uint32_t comp_count, const struct vkd3d_shader_location *loc) +{ + struct vkd3d_shader_instruction *ins; + + if (!(ins = generate_vsir_add_program_instruction(ctx, program, loc, VKD3DSIH_DCL_INDEXABLE_TEMP, 0, 0))) + return; + + ins->declaration.indexable_temp.register_idx = idx; + ins->declaration.indexable_temp.register_size = size; + ins->declaration.indexable_temp.alignment = 0; + ins->declaration.indexable_temp.data_type = VKD3D_DATA_FLOAT; + ins->declaration.indexable_temp.component_count = comp_count; + ins->declaration.indexable_temp.has_function_scope = false; + + add_last_vsir_instr_to_block(ctx, program, block); +} + +static void sm4_generate_vsir_add_function(struct hlsl_ctx *ctx, + struct hlsl_ir_function_decl *func, uint64_t config_flags, struct vsir_program *program) +{ + struct hlsl_block block = {0}; + struct hlsl_scope *scope; + struct hlsl_ir_var *var; + uint32_t temp_count; + + compute_liveness(ctx, func); + mark_indexable_vars(ctx, func); + temp_count = allocate_temp_registers(ctx, func); + if (ctx->result) + return; + program->temp_count = max(program->temp_count, temp_count); + + hlsl_block_init(&block); + + if (temp_count) + sm4_generate_vsir_instr_dcl_temps(ctx, program, temp_count, &block, &func->loc); + + LIST_FOR_EACH_ENTRY(scope, &ctx->scopes, struct hlsl_scope, entry) + { + LIST_FOR_EACH_ENTRY(var, &scope->vars, struct hlsl_ir_var, scope_entry) + { + if (var->is_uniform || var->is_input_semantic || var->is_output_semantic) + continue; + if (!var->regs[HLSL_REGSET_NUMERIC].allocated) + continue; + + if (var->indexable) + { + unsigned int id = var->regs[HLSL_REGSET_NUMERIC].id; + unsigned int size = align(var->data_type->reg_size[HLSL_REGSET_NUMERIC], 4) / 4; + + sm4_generate_vsir_instr_dcl_indexable_temp(ctx, program, &block, id, size, 4, &var->loc); + } + } + } + + list_move_head(&func->body.instrs, &block.instrs); + + hlsl_block_cleanup(&block); +} + /* OBJECTIVE: Translate all the information from ctx and entry_func to the * vsir_program, so it can be used as input to tpf_compile() without relying * on ctx and entry_func. */ -static void sm4_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, +static void sm4_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func, uint64_t config_flags, struct vsir_program *program) { struct vkd3d_shader_version version = {0}; @@ -7554,9 +7651,20 @@ static void sm4_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl return; } - generate_vsir_signature(ctx, program, entry_func); + generate_vsir_signature(ctx, program, func); if (version.type == VKD3D_SHADER_TYPE_HULL) generate_vsir_signature(ctx, program, ctx->patch_constant_func); + + if (version.type == VKD3D_SHADER_TYPE_COMPUTE) + { + program->thread_group_size.x = ctx->thread_count[0]; + program->thread_group_size.y = ctx->thread_count[1]; + program->thread_group_size.z = ctx->thread_count[2]; + } + + sm4_generate_vsir_add_function(ctx, func, config_flags, program); + if (version.type == VKD3D_SHADER_TYPE_HULL) + sm4_generate_vsir_add_function(ctx, ctx->patch_constant_func, config_flags, program); } static struct hlsl_ir_jump *loop_unrolling_find_jump(struct hlsl_block *block, struct hlsl_ir_node *stop_point, diff --git a/libs/vkd3d/libs/vkd3d-shader/tpf.c b/libs/vkd3d/libs/vkd3d-shader/tpf.c index c937b245559..2198b828b7c 100644 --- a/libs/vkd3d/libs/vkd3d-shader/tpf.c +++ b/libs/vkd3d/libs/vkd3d-shader/tpf.c @@ -3002,9 +3002,10 @@ bool sm4_register_from_semantic_name(const struct vkd3d_shader_version *version, } register_table[] = { - {"sv_dispatchthreadid", false, VKD3D_SHADER_TYPE_COMPUTE, VKD3DSPR_THREADID, false}, - {"sv_groupid", false, VKD3D_SHADER_TYPE_COMPUTE, VKD3DSPR_THREADGROUPID, false}, - {"sv_groupthreadid", false, VKD3D_SHADER_TYPE_COMPUTE, VKD3DSPR_LOCALTHREADID, false}, + {"sv_dispatchthreadid", false, VKD3D_SHADER_TYPE_COMPUTE, VKD3DSPR_THREADID, false}, + {"sv_groupid", false, VKD3D_SHADER_TYPE_COMPUTE, VKD3DSPR_THREADGROUPID, false}, + {"sv_groupindex", false, VKD3D_SHADER_TYPE_COMPUTE, VKD3DSPR_LOCALTHREADINDEX, false}, + {"sv_groupthreadid", false, VKD3D_SHADER_TYPE_COMPUTE, VKD3DSPR_LOCALTHREADID, false}, {"sv_domainlocation", false, VKD3D_SHADER_TYPE_DOMAIN, VKD3DSPR_TESSCOORD, false}, {"sv_primitiveid", false, VKD3D_SHADER_TYPE_DOMAIN, VKD3DSPR_PRIMID, false}, @@ -3105,6 +3106,7 @@ bool sm4_sysval_semantic_from_semantic_name(enum vkd3d_shader_sysval_semantic *s { {"sv_dispatchthreadid", false, VKD3D_SHADER_TYPE_COMPUTE, ~0u}, {"sv_groupid", false, VKD3D_SHADER_TYPE_COMPUTE, ~0u}, + {"sv_groupindex", false, VKD3D_SHADER_TYPE_COMPUTE, ~0u}, {"sv_groupthreadid", false, VKD3D_SHADER_TYPE_COMPUTE, ~0u}, {"sv_domainlocation", false, VKD3D_SHADER_TYPE_DOMAIN, ~0u}, @@ -4935,42 +4937,39 @@ static void tpf_write_dcl_semantic(const struct tpf_compiler *tpf, write_sm4_instruction(tpf, &instr); } -static void write_sm4_dcl_temps(const struct tpf_compiler *tpf, uint32_t temp_count) +static void tpf_dcl_temps(const struct tpf_compiler *tpf, unsigned int count) { struct sm4_instruction instr = { .opcode = VKD3D_SM4_OP_DCL_TEMPS, - .idx = {temp_count}, + .idx = {count}, .idx_count = 1, }; write_sm4_instruction(tpf, &instr); } -static void write_sm4_dcl_indexable_temp(const struct tpf_compiler *tpf, uint32_t idx, - uint32_t size, uint32_t comp_count) +static void tpf_dcl_indexable_temp(const struct tpf_compiler *tpf, const struct vkd3d_shader_indexable_temp *temp) { struct sm4_instruction instr = { .opcode = VKD3D_SM4_OP_DCL_INDEXABLE_TEMP, - .idx = {idx, size, comp_count}, + .idx = {temp->register_idx, temp->register_size, temp->component_count}, .idx_count = 3, }; write_sm4_instruction(tpf, &instr); } -static void write_sm4_dcl_thread_group(const struct tpf_compiler *tpf, const uint32_t thread_count[3]) +static void tpf_dcl_thread_group(const struct tpf_compiler *tpf, const struct vsir_thread_group_size *group_size) { struct sm4_instruction instr = { .opcode = VKD3D_SM5_OP_DCL_THREAD_GROUP, - .idx[0] = thread_count[0], - .idx[1] = thread_count[1], - .idx[2] = thread_count[2], + .idx = {group_size->x, group_size->y, group_size->z}, .idx_count = 3, }; @@ -6449,9 +6448,28 @@ static void write_sm4_swizzle(const struct tpf_compiler *tpf, const struct hlsl_ write_sm4_instruction(tpf, &instr); } +static void tpf_handle_instruction(const struct tpf_compiler *tpf, const struct vkd3d_shader_instruction *ins) +{ + switch (ins->opcode) + { + case VKD3DSIH_DCL_TEMPS: + tpf_dcl_temps(tpf, ins->declaration.count); + break; + + case VKD3DSIH_DCL_INDEXABLE_TEMP: + tpf_dcl_indexable_temp(tpf, &ins->declaration.indexable_temp); + break; + + default: + vkd3d_unreachable(); + break; + } +} + static void write_sm4_block(const struct tpf_compiler *tpf, const struct hlsl_block *block) { const struct hlsl_ir_node *instr; + unsigned int vsir_instr_idx; LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry) { @@ -6517,6 +6535,11 @@ static void write_sm4_block(const struct tpf_compiler *tpf, const struct hlsl_bl write_sm4_swizzle(tpf, hlsl_ir_swizzle(instr)); break; + case HLSL_IR_VSIR_INSTRUCTION_REF: + vsir_instr_idx = hlsl_ir_vsir_instruction_ref(instr)->vsir_instr_idx; + tpf_handle_instruction(tpf, &tpf->program->instructions.elements[vsir_instr_idx]); + break; + default: hlsl_fixme(tpf->ctx, &instr->loc, "Instruction type %s.", hlsl_node_type_to_string(instr->type)); } @@ -6526,15 +6549,7 @@ static void write_sm4_block(const struct tpf_compiler *tpf, const struct hlsl_bl static void tpf_write_shader_function(struct tpf_compiler *tpf, struct hlsl_ir_function_decl *func) { struct hlsl_ctx *ctx = tpf->ctx; - const struct hlsl_scope *scope; const struct hlsl_ir_var *var; - uint32_t temp_count; - - compute_liveness(ctx, func); - mark_indexable_vars(ctx, func); - temp_count = allocate_temp_registers(ctx, func); - if (ctx->result) - return; LIST_FOR_EACH_ENTRY(var, &func->extern_vars, struct hlsl_ir_var, extern_entry) { @@ -6544,29 +6559,7 @@ static void tpf_write_shader_function(struct tpf_compiler *tpf, struct hlsl_ir_f } if (tpf->program->shader_version.type == VKD3D_SHADER_TYPE_COMPUTE) - write_sm4_dcl_thread_group(tpf, ctx->thread_count); - - if (temp_count) - write_sm4_dcl_temps(tpf, temp_count); - - LIST_FOR_EACH_ENTRY(scope, &ctx->scopes, struct hlsl_scope, entry) - { - LIST_FOR_EACH_ENTRY(var, &scope->vars, struct hlsl_ir_var, scope_entry) - { - if (var->is_uniform || var->is_input_semantic || var->is_output_semantic) - continue; - if (!var->regs[HLSL_REGSET_NUMERIC].allocated) - continue; - - if (var->indexable) - { - unsigned int id = var->regs[HLSL_REGSET_NUMERIC].id; - unsigned int size = align(var->data_type->reg_size[HLSL_REGSET_NUMERIC], 4) / 4; - - write_sm4_dcl_indexable_temp(tpf, id, size, 4); - } - } - } + tpf_dcl_thread_group(tpf, &tpf->program->thread_group_size); write_sm4_block(tpf, &func->body); -- 2.45.2