wine-staging/patches/vkd3d-latest/0004-Updated-vkd3d-to-5eff8bf9188c401cc31ce14d42798dc3751.patch
2024-11-01 18:00:55 +11:00

736 lines
31 KiB
Diff

From 4cffbfb94f89317c6e5ce5f7c6aa488e67a8451d Mon Sep 17 00:00:00 2001
From: Alistair Leslie-Hughes <leslie_alistair@hotmail.com>
Date: Fri, 25 Oct 2024 07:38:01 +1100
Subject: [PATCH] Updated vkd3d to 5eff8bf9188c401cc31ce14d42798dc3751377bd.
---
libs/vkd3d/libs/vkd3d-shader/glsl.c | 112 ++++++++++++++-----
libs/vkd3d/libs/vkd3d-shader/hlsl.c | 29 +++++
libs/vkd3d/libs/vkd3d-shader/hlsl.h | 24 +++-
libs/vkd3d/libs/vkd3d-shader/hlsl.y | 2 +
libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 118 +++++++++++++++++++-
libs/vkd3d/libs/vkd3d-shader/tpf.c | 79 ++++++-------
6 files changed, 282 insertions(+), 82 deletions(-)
diff --git a/libs/vkd3d/libs/vkd3d-shader/glsl.c b/libs/vkd3d/libs/vkd3d-shader/glsl.c
index a2a090e1c21..363054cb6d9 100644
--- a/libs/vkd3d/libs/vkd3d-shader/glsl.c
+++ b/libs/vkd3d/libs/vkd3d-shader/glsl.c
@@ -20,8 +20,14 @@
struct glsl_resource_type_info
{
+ /* The number of coordinates needed to sample the resource type. */
size_t coord_size;
+ /* Whether the resource type is an array type. */
+ bool array;
+ /* Whether the resource type has a shadow/comparison variant. */
bool shadow;
+ /* The type suffix for resource type. I.e., the "2D" part of "usampler2D"
+ * or "iimage2D". */
const char *type_suffix;
};
@@ -102,17 +108,17 @@ static const struct glsl_resource_type_info *shader_glsl_get_resource_type_info(
{
static const struct glsl_resource_type_info info[] =
{
- {0, 0, "None"}, /* VKD3D_SHADER_RESOURCE_NONE */
- {1, 0, "Buffer"}, /* VKD3D_SHADER_RESOURCE_BUFFER */
- {1, 1, "1D"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_1D */
- {2, 1, "2D"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_2D */
- {2, 0, "2DMS"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_2DMS */
- {3, 0, "3D"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_3D */
- {3, 1, "Cube"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_CUBE */
- {2, 1, "1DArray"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_1DARRAY */
- {3, 1, "2DArray"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_2DARRAY */
- {3, 0, "2DMSArray"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY */
- {4, 1, "CubeArray"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_CUBEARRAY */
+ {0, 0, 0, "None"}, /* VKD3D_SHADER_RESOURCE_NONE */
+ {1, 0, 0, "Buffer"}, /* VKD3D_SHADER_RESOURCE_BUFFER */
+ {1, 0, 1, "1D"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_1D */
+ {2, 0, 1, "2D"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_2D */
+ {2, 0, 0, "2DMS"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_2DMS */
+ {3, 0, 0, "3D"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_3D */
+ {3, 0, 1, "Cube"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_CUBE */
+ {2, 1, 1, "1DArray"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_1DARRAY */
+ {3, 1, 1, "2DArray"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_2DARRAY */
+ {3, 1, 0, "2DMSArray"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY */
+ {4, 1, 1, "CubeArray"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_CUBEARRAY */
};
if (!t || t >= ARRAY_SIZE(info))
@@ -862,17 +868,24 @@ static void shader_glsl_print_shadow_coord(struct vkd3d_string_buffer *buffer, s
static void shader_glsl_sample(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_instruction *ins)
{
+ bool shadow_sampler, array, bias, gather, grad, lod, lod_zero, shadow;
const struct glsl_resource_type_info *resource_type_info;
unsigned int resource_id, resource_idx, resource_space;
unsigned int sampler_id, sampler_idx, sampler_space;
const struct vkd3d_shader_descriptor_info1 *d;
enum vkd3d_shader_component_type sampled_type;
enum vkd3d_shader_resource_type resource_type;
+ unsigned int component_idx, coord_size;
struct vkd3d_string_buffer *sample;
enum vkd3d_data_type data_type;
- unsigned int coord_size;
struct glsl_dst dst;
- bool shadow;
+
+ bias = ins->opcode == VKD3DSIH_SAMPLE_B;
+ gather = ins->opcode == VKD3DSIH_GATHER4;
+ grad = ins->opcode == VKD3DSIH_SAMPLE_GRAD;
+ lod = ins->opcode == VKD3DSIH_SAMPLE_LOD || ins->opcode == VKD3DSIH_SAMPLE_C_LZ;
+ lod_zero = ins->opcode == VKD3DSIH_SAMPLE_C_LZ;
+ shadow = ins->opcode == VKD3DSIH_SAMPLE_C || ins->opcode == VKD3DSIH_SAMPLE_C_LZ;
if (vkd3d_shader_instruction_has_texel_offset(ins))
vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL,
@@ -904,12 +917,14 @@ static void shader_glsl_sample(struct vkd3d_glsl_generator *gen, const struct vk
if ((resource_type_info = shader_glsl_get_resource_type_info(resource_type)))
{
coord_size = resource_type_info->coord_size;
+ array = resource_type_info->array;
}
else
{
vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL,
"Internal compiler error: Unhandled resource type %#x.", resource_type);
coord_size = 2;
+ array = false;
}
sampler_id = ins->src[2].reg.idx[0].offset;
@@ -917,17 +932,17 @@ static void shader_glsl_sample(struct vkd3d_glsl_generator *gen, const struct vk
if ((d = shader_glsl_get_descriptor_by_id(gen, VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER, sampler_id)))
{
sampler_space = d->register_space;
- shadow = d->flags & VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_SAMPLER_COMPARISON_MODE;
+ shadow_sampler = d->flags & VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_SAMPLER_COMPARISON_MODE;
- if (ins->opcode == VKD3DSIH_SAMPLE_C || ins->opcode == VKD3DSIH_SAMPLE_C_LZ)
+ if (shadow)
{
- if (!shadow)
+ if (!shadow_sampler)
vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL,
"Internal compiler error: Sampler %u is not a comparison sampler.", sampler_id);
}
else
{
- if (shadow)
+ if (shadow_sampler)
vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL,
"Internal compiler error: Sampler %u is a comparison sampler.", sampler_id);
}
@@ -942,26 +957,44 @@ static void shader_glsl_sample(struct vkd3d_glsl_generator *gen, const struct vk
glsl_dst_init(&dst, gen, ins, &ins->dst[0]);
sample = vkd3d_string_buffer_get(&gen->string_buffers);
- if (ins->opcode == VKD3DSIH_SAMPLE_C_LZ)
+ if (gather)
+ vkd3d_string_buffer_printf(sample, "textureGather(");
+ else if (grad)
+ vkd3d_string_buffer_printf(sample, "textureGrad(");
+ else if (lod)
vkd3d_string_buffer_printf(sample, "textureLod(");
else
vkd3d_string_buffer_printf(sample, "texture(");
shader_glsl_print_combined_sampler_name(sample, gen, resource_idx, resource_space, sampler_idx, sampler_space);
vkd3d_string_buffer_printf(sample, ", ");
- if (ins->opcode == VKD3DSIH_SAMPLE_C || ins->opcode == VKD3DSIH_SAMPLE_C_LZ)
+ if (shadow)
shader_glsl_print_shadow_coord(sample, gen, &ins->src[0], &ins->src[3], coord_size);
else
shader_glsl_print_src(sample, gen, &ins->src[0],
vkd3d_write_mask_from_component_count(coord_size), ins->src[0].reg.data_type);
- if (ins->opcode == VKD3DSIH_SAMPLE_B)
+ if (grad)
{
vkd3d_string_buffer_printf(sample, ", ");
- shader_glsl_print_src(sample, gen, &ins->src[3], VKD3DSP_WRITEMASK_0, ins->src[3].reg.data_type);
+ shader_glsl_print_src(sample, gen, &ins->src[3],
+ vkd3d_write_mask_from_component_count(coord_size - array), ins->src[3].reg.data_type);
+ vkd3d_string_buffer_printf(sample, ", ");
+ shader_glsl_print_src(sample, gen, &ins->src[4],
+ vkd3d_write_mask_from_component_count(coord_size - array), ins->src[4].reg.data_type);
}
- else if (ins->opcode == VKD3DSIH_SAMPLE_C_LZ)
+ else if (lod_zero)
{
vkd3d_string_buffer_printf(sample, ", 0.0");
}
+ else if (bias || lod)
+ {
+ vkd3d_string_buffer_printf(sample, ", ");
+ shader_glsl_print_src(sample, gen, &ins->src[3], VKD3DSP_WRITEMASK_0, ins->src[3].reg.data_type);
+ }
+ if (gather)
+ {
+ if ((component_idx = vsir_swizzle_get_component(ins->src[2].swizzle, 0)))
+ vkd3d_string_buffer_printf(sample, ", %d", component_idx);
+ }
vkd3d_string_buffer_printf(sample, ")");
shader_glsl_print_swizzle(sample, ins->src[1].swizzle, ins->dst[0].write_mask);
@@ -1465,6 +1498,15 @@ static void vkd3d_glsl_handle_instruction(struct vkd3d_glsl_generator *gen,
case VKD3DSIH_FTOU:
shader_glsl_cast(gen, ins, "uint", "uvec");
break;
+ case VKD3DSIH_GATHER4:
+ case VKD3DSIH_SAMPLE:
+ case VKD3DSIH_SAMPLE_B:
+ case VKD3DSIH_SAMPLE_C:
+ case VKD3DSIH_SAMPLE_C_LZ:
+ case VKD3DSIH_SAMPLE_GRAD:
+ case VKD3DSIH_SAMPLE_LOD:
+ shader_glsl_sample(gen, ins);
+ break;
case VKD3DSIH_GEO:
case VKD3DSIH_IGE:
shader_glsl_relop(gen, ins, ">=", "greaterThanEqual");
@@ -1482,9 +1524,11 @@ static void vkd3d_glsl_handle_instruction(struct vkd3d_glsl_generator *gen,
break;
case VKD3DSIH_IMAX:
case VKD3DSIH_MAX:
+ case VKD3DSIH_UMAX:
shader_glsl_intrinsic(gen, ins, "max");
break;
case VKD3DSIH_MIN:
+ case VKD3DSIH_UMIN:
shader_glsl_intrinsic(gen, ins, "min");
break;
case VKD3DSIH_IMUL:
@@ -1553,12 +1597,6 @@ static void vkd3d_glsl_handle_instruction(struct vkd3d_glsl_generator *gen,
case VKD3DSIH_RSQ:
shader_glsl_intrinsic(gen, ins, "inversesqrt");
break;
- case VKD3DSIH_SAMPLE:
- case VKD3DSIH_SAMPLE_B:
- case VKD3DSIH_SAMPLE_C:
- case VKD3DSIH_SAMPLE_C_LZ:
- shader_glsl_sample(gen, ins);
- break;
case VKD3DSIH_SQRT:
shader_glsl_intrinsic(gen, ins, "sqrt");
break;
@@ -2197,6 +2235,20 @@ static void shader_glsl_generate_output_declarations(struct vkd3d_glsl_generator
}
}
+static void shader_glsl_handle_global_flags(struct vkd3d_string_buffer *buffer,
+ struct vkd3d_glsl_generator *gen, enum vsir_global_flags flags)
+{
+ if (flags & VKD3DSGF_FORCE_EARLY_DEPTH_STENCIL)
+ {
+ vkd3d_string_buffer_printf(buffer, "layout(early_fragment_tests) in;\n");
+ flags &= ~VKD3DSGF_FORCE_EARLY_DEPTH_STENCIL;
+ }
+
+ if (flags)
+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL,
+ "Internal compiler error: Unhandled global flags %#"PRIx64".", (uint64_t)flags);
+}
+
static void shader_glsl_generate_declarations(struct vkd3d_glsl_generator *gen)
{
const struct vsir_program *program = gen->program;
@@ -2210,9 +2262,7 @@ static void shader_glsl_generate_declarations(struct vkd3d_glsl_generator *gen)
group_size->x, group_size->y, group_size->z);
}
- if (program->global_flags)
- vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL,
- "Internal compiler error: Unhandled global flags %#"PRIx64".", (uint64_t)program->global_flags);
+ shader_glsl_handle_global_flags(buffer, gen, program->global_flags);
shader_glsl_generate_descriptor_declarations(gen);
shader_glsl_generate_input_declarations(gen);
diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.c b/libs/vkd3d/libs/vkd3d-shader/hlsl.c
index c7aa148ea11..cafff2fa878 100644
--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.c
+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.c
@@ -1695,6 +1695,22 @@ struct hlsl_ir_node *hlsl_new_switch(struct hlsl_ctx *ctx, struct hlsl_ir_node *
return &s->node;
}
+struct hlsl_ir_node *hlsl_new_vsir_instruction_ref(struct hlsl_ctx *ctx, unsigned int vsir_instr_idx,
+ struct hlsl_type *type, const struct hlsl_reg *reg, const struct vkd3d_shader_location *loc)
+{
+ struct hlsl_ir_vsir_instruction_ref *vsir_instr;
+
+ if (!(vsir_instr = hlsl_alloc(ctx, sizeof(*vsir_instr))))
+ return NULL;
+ init_node(&vsir_instr->node, HLSL_IR_VSIR_INSTRUCTION_REF, type, loc);
+ vsir_instr->vsir_instr_idx = vsir_instr_idx;
+
+ if (reg)
+ vsir_instr->node.reg = *reg;
+
+ return &vsir_instr->node;
+}
+
struct hlsl_ir_load *hlsl_new_load_index(struct hlsl_ctx *ctx, const struct hlsl_deref *deref,
struct hlsl_ir_node *idx, const struct vkd3d_shader_location *loc)
{
@@ -2517,6 +2533,9 @@ static struct hlsl_ir_node *clone_instr(struct hlsl_ctx *ctx,
case HLSL_IR_STATEBLOCK_CONSTANT:
return clone_stateblock_constant(ctx, map, hlsl_ir_stateblock_constant(instr));
+
+ case HLSL_IR_VSIR_INSTRUCTION_REF:
+ vkd3d_unreachable();
}
vkd3d_unreachable();
@@ -2938,6 +2957,7 @@ const char *hlsl_node_type_to_string(enum hlsl_ir_node_type type)
[HLSL_IR_COMPILE] = "HLSL_IR_COMPILE",
[HLSL_IR_SAMPLER_STATE] = "HLSL_IR_SAMPLER_STATE",
[HLSL_IR_STATEBLOCK_CONSTANT] = "HLSL_IR_STATEBLOCK_CONSTANT",
+ [HLSL_IR_VSIR_INSTRUCTION_REF] = "HLSL_IR_VSIR_INSTRUCTION_REF",
};
if (type >= ARRAY_SIZE(names))
@@ -3531,6 +3551,11 @@ static void dump_instr(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer,
case HLSL_IR_STATEBLOCK_CONSTANT:
dump_ir_stateblock_constant(buffer, hlsl_ir_stateblock_constant(instr));
break;
+
+ case HLSL_IR_VSIR_INSTRUCTION_REF:
+ vkd3d_string_buffer_printf(buffer, "vsir_program instruction %u",
+ hlsl_ir_vsir_instruction_ref(instr)->vsir_instr_idx);
+ break;
}
}
@@ -3839,6 +3864,10 @@ void hlsl_free_instr(struct hlsl_ir_node *node)
case HLSL_IR_STATEBLOCK_CONSTANT:
free_ir_stateblock_constant(hlsl_ir_stateblock_constant(node));
break;
+
+ case HLSL_IR_VSIR_INSTRUCTION_REF:
+ vkd3d_free(hlsl_ir_vsir_instruction_ref(node));
+ break;
}
}
diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h
index e234cd0ba40..ae7f8c1c04f 100644
--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.h
+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h
@@ -328,6 +328,8 @@ enum hlsl_ir_node_type
HLSL_IR_COMPILE,
HLSL_IR_SAMPLER_STATE,
HLSL_IR_STATEBLOCK_CONSTANT,
+
+ HLSL_IR_VSIR_INSTRUCTION_REF,
};
/* Common data for every type of IR instruction node. */
@@ -930,6 +932,16 @@ struct hlsl_ir_stateblock_constant
char *name;
};
+/* A vkd3d_shader_instruction that can be inserted in a hlsl_block.
+ * Only used for the HLSL IR to vsir translation, might be removed once this translation is complete. */
+struct hlsl_ir_vsir_instruction_ref
+{
+ struct hlsl_ir_node node;
+
+ /* Index to a vkd3d_shader_instruction within a vkd3d_shader_instruction_array in a vsir_program. */
+ unsigned int vsir_instr_idx;
+};
+
struct hlsl_scope
{
/* Item entry for hlsl_ctx.scopes. */
@@ -1245,6 +1257,12 @@ static inline struct hlsl_ir_stateblock_constant *hlsl_ir_stateblock_constant(co
return CONTAINING_RECORD(node, struct hlsl_ir_stateblock_constant, node);
}
+static inline struct hlsl_ir_vsir_instruction_ref *hlsl_ir_vsir_instruction_ref(const struct hlsl_ir_node *node)
+{
+ VKD3D_ASSERT(node->type == HLSL_IR_VSIR_INSTRUCTION_REF);
+ return CONTAINING_RECORD(node, struct hlsl_ir_vsir_instruction_ref, node);
+}
+
static inline void hlsl_block_init(struct hlsl_block *block)
{
list_init(&block->instrs);
@@ -1433,9 +1451,6 @@ struct hlsl_state_block_entry *clone_stateblock_entry(struct hlsl_ctx *ctx,
void hlsl_lower_index_loads(struct hlsl_ctx *ctx, struct hlsl_block *body);
void hlsl_run_const_passes(struct hlsl_ctx *ctx, struct hlsl_block *body);
-uint32_t allocate_temp_registers(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func);
-void mark_indexable_vars(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func);
-void compute_liveness(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func);
int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func,
enum vkd3d_shader_target_type target_type, struct vkd3d_shader_code *out);
int hlsl_emit_effect_binary(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out);
@@ -1570,6 +1585,9 @@ struct hlsl_ir_switch_case *hlsl_new_switch_case(struct hlsl_ctx *ctx, unsigned
struct hlsl_ir_node *hlsl_new_switch(struct hlsl_ctx *ctx, struct hlsl_ir_node *selector,
struct list *cases, const struct vkd3d_shader_location *loc);
+struct hlsl_ir_node *hlsl_new_vsir_instruction_ref(struct hlsl_ctx *ctx, unsigned int vsir_instr_idx,
+ struct hlsl_type *type, const struct hlsl_reg *reg, const struct vkd3d_shader_location *loc);
+
void hlsl_error(struct hlsl_ctx *ctx, const struct vkd3d_shader_location *loc,
enum vkd3d_shader_error error, const char *fmt, ...) VKD3D_PRINTF_FUNC(4, 5);
void hlsl_fixme(struct hlsl_ctx *ctx, const struct vkd3d_shader_location *loc,
diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.y b/libs/vkd3d/libs/vkd3d-shader/hlsl.y
index 49cff4c81b8..cd938fd5906 100644
--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.y
+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.y
@@ -672,6 +672,8 @@ static struct hlsl_default_value evaluate_static_expression(struct hlsl_ctx *ctx
hlsl_error(ctx, &node->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX,
"Expected literal expression.");
break;
+ case HLSL_IR_VSIR_INSTRUCTION_REF:
+ vkd3d_unreachable();
}
}
diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c
index 6e1b2b437b0..2cb56d6b493 100644
--- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c
+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c
@@ -4162,6 +4162,9 @@ static bool dce(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context)
case HLSL_IR_STATEBLOCK_CONSTANT:
/* Stateblock constants should not appear in the shader program. */
vkd3d_unreachable();
+ case HLSL_IR_VSIR_INSTRUCTION_REF:
+ /* HLSL IR nodes are not translated to hlsl_ir_vsir_instruction_ref at this point. */
+ vkd3d_unreachable();
}
return false;
@@ -4193,7 +4196,7 @@ static bool mark_indexable_var(struct hlsl_ctx *ctx, struct hlsl_deref *deref,
return true;
}
-void mark_indexable_vars(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func)
+static void mark_indexable_vars(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func)
{
struct hlsl_scope *scope;
struct hlsl_ir_var *var;
@@ -4301,6 +4304,9 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop
case HLSL_IR_STATEBLOCK_CONSTANT:
/* Stateblock constants should not appear in the shader program. */
vkd3d_unreachable();
+ case HLSL_IR_VSIR_INSTRUCTION_REF:
+ /* HLSL IR nodes are not translated to hlsl_ir_vsir_instruction_ref at this point. */
+ vkd3d_unreachable();
case HLSL_IR_STORE:
{
@@ -4441,7 +4447,7 @@ static void init_var_liveness(struct hlsl_ir_var *var)
var->last_read = UINT_MAX;
}
-void compute_liveness(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func)
+static void compute_liveness(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func)
{
struct hlsl_scope *scope;
struct hlsl_ir_var *var;
@@ -5222,7 +5228,7 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi
* index to all (simultaneously live) variables or intermediate values. Agnostic
* as to how many registers are actually available for the current backend, and
* does not handle constants. */
-uint32_t allocate_temp_registers(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func)
+static uint32_t allocate_temp_registers(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func)
{
struct register_allocator allocator = {0};
struct hlsl_scope *scope;
@@ -7536,10 +7542,101 @@ static void sm1_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl
sm1_generate_vsir_block(ctx, &entry_func->body, program);
}
+static void add_last_vsir_instr_to_block(struct hlsl_ctx *ctx, struct vsir_program *program, struct hlsl_block *block)
+{
+ struct vkd3d_shader_location *loc;
+ struct hlsl_ir_node *vsir_instr;
+
+ loc = &program->instructions.elements[program->instructions.count - 1].location;
+
+ if (!(vsir_instr = hlsl_new_vsir_instruction_ref(ctx, program->instructions.count - 1, NULL, NULL, loc)))
+ {
+ ctx->result = VKD3D_ERROR_OUT_OF_MEMORY;
+ return;
+ }
+ hlsl_block_add_instr(block, vsir_instr);
+}
+
+static void sm4_generate_vsir_instr_dcl_temps(struct hlsl_ctx *ctx, struct vsir_program *program,
+ uint32_t temp_count, struct hlsl_block *block, const struct vkd3d_shader_location *loc)
+{
+ struct vkd3d_shader_instruction *ins;
+
+ if (!(ins = generate_vsir_add_program_instruction(ctx, program, loc, VKD3DSIH_DCL_TEMPS, 0, 0)))
+ return;
+
+ ins->declaration.count = temp_count;
+
+ add_last_vsir_instr_to_block(ctx, program, block);
+}
+
+static void sm4_generate_vsir_instr_dcl_indexable_temp(struct hlsl_ctx *ctx,
+ struct vsir_program *program, struct hlsl_block *block, uint32_t idx,
+ uint32_t size, uint32_t comp_count, const struct vkd3d_shader_location *loc)
+{
+ struct vkd3d_shader_instruction *ins;
+
+ if (!(ins = generate_vsir_add_program_instruction(ctx, program, loc, VKD3DSIH_DCL_INDEXABLE_TEMP, 0, 0)))
+ return;
+
+ ins->declaration.indexable_temp.register_idx = idx;
+ ins->declaration.indexable_temp.register_size = size;
+ ins->declaration.indexable_temp.alignment = 0;
+ ins->declaration.indexable_temp.data_type = VKD3D_DATA_FLOAT;
+ ins->declaration.indexable_temp.component_count = comp_count;
+ ins->declaration.indexable_temp.has_function_scope = false;
+
+ add_last_vsir_instr_to_block(ctx, program, block);
+}
+
+static void sm4_generate_vsir_add_function(struct hlsl_ctx *ctx,
+ struct hlsl_ir_function_decl *func, uint64_t config_flags, struct vsir_program *program)
+{
+ struct hlsl_block block = {0};
+ struct hlsl_scope *scope;
+ struct hlsl_ir_var *var;
+ uint32_t temp_count;
+
+ compute_liveness(ctx, func);
+ mark_indexable_vars(ctx, func);
+ temp_count = allocate_temp_registers(ctx, func);
+ if (ctx->result)
+ return;
+ program->temp_count = max(program->temp_count, temp_count);
+
+ hlsl_block_init(&block);
+
+ if (temp_count)
+ sm4_generate_vsir_instr_dcl_temps(ctx, program, temp_count, &block, &func->loc);
+
+ LIST_FOR_EACH_ENTRY(scope, &ctx->scopes, struct hlsl_scope, entry)
+ {
+ LIST_FOR_EACH_ENTRY(var, &scope->vars, struct hlsl_ir_var, scope_entry)
+ {
+ if (var->is_uniform || var->is_input_semantic || var->is_output_semantic)
+ continue;
+ if (!var->regs[HLSL_REGSET_NUMERIC].allocated)
+ continue;
+
+ if (var->indexable)
+ {
+ unsigned int id = var->regs[HLSL_REGSET_NUMERIC].id;
+ unsigned int size = align(var->data_type->reg_size[HLSL_REGSET_NUMERIC], 4) / 4;
+
+ sm4_generate_vsir_instr_dcl_indexable_temp(ctx, program, &block, id, size, 4, &var->loc);
+ }
+ }
+ }
+
+ list_move_head(&func->body.instrs, &block.instrs);
+
+ hlsl_block_cleanup(&block);
+}
+
/* OBJECTIVE: Translate all the information from ctx and entry_func to the
* vsir_program, so it can be used as input to tpf_compile() without relying
* on ctx and entry_func. */
-static void sm4_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func,
+static void sm4_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func,
uint64_t config_flags, struct vsir_program *program)
{
struct vkd3d_shader_version version = {0};
@@ -7554,9 +7651,20 @@ static void sm4_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl
return;
}
- generate_vsir_signature(ctx, program, entry_func);
+ generate_vsir_signature(ctx, program, func);
if (version.type == VKD3D_SHADER_TYPE_HULL)
generate_vsir_signature(ctx, program, ctx->patch_constant_func);
+
+ if (version.type == VKD3D_SHADER_TYPE_COMPUTE)
+ {
+ program->thread_group_size.x = ctx->thread_count[0];
+ program->thread_group_size.y = ctx->thread_count[1];
+ program->thread_group_size.z = ctx->thread_count[2];
+ }
+
+ sm4_generate_vsir_add_function(ctx, func, config_flags, program);
+ if (version.type == VKD3D_SHADER_TYPE_HULL)
+ sm4_generate_vsir_add_function(ctx, ctx->patch_constant_func, config_flags, program);
}
static struct hlsl_ir_jump *loop_unrolling_find_jump(struct hlsl_block *block, struct hlsl_ir_node *stop_point,
diff --git a/libs/vkd3d/libs/vkd3d-shader/tpf.c b/libs/vkd3d/libs/vkd3d-shader/tpf.c
index c937b245559..2198b828b7c 100644
--- a/libs/vkd3d/libs/vkd3d-shader/tpf.c
+++ b/libs/vkd3d/libs/vkd3d-shader/tpf.c
@@ -3002,9 +3002,10 @@ bool sm4_register_from_semantic_name(const struct vkd3d_shader_version *version,
}
register_table[] =
{
- {"sv_dispatchthreadid", false, VKD3D_SHADER_TYPE_COMPUTE, VKD3DSPR_THREADID, false},
- {"sv_groupid", false, VKD3D_SHADER_TYPE_COMPUTE, VKD3DSPR_THREADGROUPID, false},
- {"sv_groupthreadid", false, VKD3D_SHADER_TYPE_COMPUTE, VKD3DSPR_LOCALTHREADID, false},
+ {"sv_dispatchthreadid", false, VKD3D_SHADER_TYPE_COMPUTE, VKD3DSPR_THREADID, false},
+ {"sv_groupid", false, VKD3D_SHADER_TYPE_COMPUTE, VKD3DSPR_THREADGROUPID, false},
+ {"sv_groupindex", false, VKD3D_SHADER_TYPE_COMPUTE, VKD3DSPR_LOCALTHREADINDEX, false},
+ {"sv_groupthreadid", false, VKD3D_SHADER_TYPE_COMPUTE, VKD3DSPR_LOCALTHREADID, false},
{"sv_domainlocation", false, VKD3D_SHADER_TYPE_DOMAIN, VKD3DSPR_TESSCOORD, false},
{"sv_primitiveid", false, VKD3D_SHADER_TYPE_DOMAIN, VKD3DSPR_PRIMID, false},
@@ -3105,6 +3106,7 @@ bool sm4_sysval_semantic_from_semantic_name(enum vkd3d_shader_sysval_semantic *s
{
{"sv_dispatchthreadid", false, VKD3D_SHADER_TYPE_COMPUTE, ~0u},
{"sv_groupid", false, VKD3D_SHADER_TYPE_COMPUTE, ~0u},
+ {"sv_groupindex", false, VKD3D_SHADER_TYPE_COMPUTE, ~0u},
{"sv_groupthreadid", false, VKD3D_SHADER_TYPE_COMPUTE, ~0u},
{"sv_domainlocation", false, VKD3D_SHADER_TYPE_DOMAIN, ~0u},
@@ -4935,42 +4937,39 @@ static void tpf_write_dcl_semantic(const struct tpf_compiler *tpf,
write_sm4_instruction(tpf, &instr);
}
-static void write_sm4_dcl_temps(const struct tpf_compiler *tpf, uint32_t temp_count)
+static void tpf_dcl_temps(const struct tpf_compiler *tpf, unsigned int count)
{
struct sm4_instruction instr =
{
.opcode = VKD3D_SM4_OP_DCL_TEMPS,
- .idx = {temp_count},
+ .idx = {count},
.idx_count = 1,
};
write_sm4_instruction(tpf, &instr);
}
-static void write_sm4_dcl_indexable_temp(const struct tpf_compiler *tpf, uint32_t idx,
- uint32_t size, uint32_t comp_count)
+static void tpf_dcl_indexable_temp(const struct tpf_compiler *tpf, const struct vkd3d_shader_indexable_temp *temp)
{
struct sm4_instruction instr =
{
.opcode = VKD3D_SM4_OP_DCL_INDEXABLE_TEMP,
- .idx = {idx, size, comp_count},
+ .idx = {temp->register_idx, temp->register_size, temp->component_count},
.idx_count = 3,
};
write_sm4_instruction(tpf, &instr);
}
-static void write_sm4_dcl_thread_group(const struct tpf_compiler *tpf, const uint32_t thread_count[3])
+static void tpf_dcl_thread_group(const struct tpf_compiler *tpf, const struct vsir_thread_group_size *group_size)
{
struct sm4_instruction instr =
{
.opcode = VKD3D_SM5_OP_DCL_THREAD_GROUP,
- .idx[0] = thread_count[0],
- .idx[1] = thread_count[1],
- .idx[2] = thread_count[2],
+ .idx = {group_size->x, group_size->y, group_size->z},
.idx_count = 3,
};
@@ -6449,9 +6448,28 @@ static void write_sm4_swizzle(const struct tpf_compiler *tpf, const struct hlsl_
write_sm4_instruction(tpf, &instr);
}
+static void tpf_handle_instruction(const struct tpf_compiler *tpf, const struct vkd3d_shader_instruction *ins)
+{
+ switch (ins->opcode)
+ {
+ case VKD3DSIH_DCL_TEMPS:
+ tpf_dcl_temps(tpf, ins->declaration.count);
+ break;
+
+ case VKD3DSIH_DCL_INDEXABLE_TEMP:
+ tpf_dcl_indexable_temp(tpf, &ins->declaration.indexable_temp);
+ break;
+
+ default:
+ vkd3d_unreachable();
+ break;
+ }
+}
+
static void write_sm4_block(const struct tpf_compiler *tpf, const struct hlsl_block *block)
{
const struct hlsl_ir_node *instr;
+ unsigned int vsir_instr_idx;
LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry)
{
@@ -6517,6 +6535,11 @@ static void write_sm4_block(const struct tpf_compiler *tpf, const struct hlsl_bl
write_sm4_swizzle(tpf, hlsl_ir_swizzle(instr));
break;
+ case HLSL_IR_VSIR_INSTRUCTION_REF:
+ vsir_instr_idx = hlsl_ir_vsir_instruction_ref(instr)->vsir_instr_idx;
+ tpf_handle_instruction(tpf, &tpf->program->instructions.elements[vsir_instr_idx]);
+ break;
+
default:
hlsl_fixme(tpf->ctx, &instr->loc, "Instruction type %s.", hlsl_node_type_to_string(instr->type));
}
@@ -6526,15 +6549,7 @@ static void write_sm4_block(const struct tpf_compiler *tpf, const struct hlsl_bl
static void tpf_write_shader_function(struct tpf_compiler *tpf, struct hlsl_ir_function_decl *func)
{
struct hlsl_ctx *ctx = tpf->ctx;
- const struct hlsl_scope *scope;
const struct hlsl_ir_var *var;
- uint32_t temp_count;
-
- compute_liveness(ctx, func);
- mark_indexable_vars(ctx, func);
- temp_count = allocate_temp_registers(ctx, func);
- if (ctx->result)
- return;
LIST_FOR_EACH_ENTRY(var, &func->extern_vars, struct hlsl_ir_var, extern_entry)
{
@@ -6544,29 +6559,7 @@ static void tpf_write_shader_function(struct tpf_compiler *tpf, struct hlsl_ir_f
}
if (tpf->program->shader_version.type == VKD3D_SHADER_TYPE_COMPUTE)
- write_sm4_dcl_thread_group(tpf, ctx->thread_count);
-
- if (temp_count)
- write_sm4_dcl_temps(tpf, temp_count);
-
- LIST_FOR_EACH_ENTRY(scope, &ctx->scopes, struct hlsl_scope, entry)
- {
- LIST_FOR_EACH_ENTRY(var, &scope->vars, struct hlsl_ir_var, scope_entry)
- {
- if (var->is_uniform || var->is_input_semantic || var->is_output_semantic)
- continue;
- if (!var->regs[HLSL_REGSET_NUMERIC].allocated)
- continue;
-
- if (var->indexable)
- {
- unsigned int id = var->regs[HLSL_REGSET_NUMERIC].id;
- unsigned int size = align(var->data_type->reg_size[HLSL_REGSET_NUMERIC], 4) / 4;
-
- write_sm4_dcl_indexable_temp(tpf, id, size, 4);
- }
- }
- }
+ tpf_dcl_thread_group(tpf, &tpf->program->thread_group_size);
write_sm4_block(tpf, &func->body);
--
2.45.2