From 00ab2db3813cc8bcd0dc0edf90025365b9dac52f Mon Sep 17 00:00:00 2001 From: Alistair Leslie-Hughes Date: Wed, 6 Aug 2025 13:39:31 +1000 Subject: [PATCH] Updated vkd3d to 4bb880f9ed09dab9a87a56bb065f087e92a0d62c. --- libs/vkd3d/libs/vkd3d-shader/hlsl.c | 67 ++- libs/vkd3d/libs/vkd3d-shader/hlsl.h | 8 +- libs/vkd3d/libs/vkd3d-shader/hlsl.l | 1 + libs/vkd3d/libs/vkd3d-shader/hlsl.y | 69 ++- libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 456 ++++++++++++++++++-- libs/vkd3d/libs/vkd3d-shader/tpf.c | 21 + 6 files changed, 561 insertions(+), 61 deletions(-) diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.c b/libs/vkd3d/libs/vkd3d-shader/hlsl.c index 113ac760731..3199072275b 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.c +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.c @@ -2062,7 +2062,7 @@ struct hlsl_ir_node *hlsl_block_add_load_component(struct hlsl_ctx *ctx, struct return &load->node; } -static struct hlsl_ir_node *hlsl_new_resource_load(struct hlsl_ctx *ctx, +static struct hlsl_ir_resource_load *hlsl_new_resource_load(struct hlsl_ctx *ctx, const struct hlsl_resource_load_params *params, const struct vkd3d_shader_location *loc) { struct hlsl_ir_resource_load *load; @@ -2098,18 +2098,23 @@ static struct hlsl_ir_node *hlsl_new_resource_load(struct hlsl_ctx *ctx, load->sampling_dim = params->sampling_dim; if (load->sampling_dim == HLSL_SAMPLER_DIM_GENERIC) load->sampling_dim = hlsl_deref_get_type(ctx, &load->resource)->sampler_dim; - return &load->node; + return load; } struct hlsl_ir_node *hlsl_block_add_resource_load(struct hlsl_ctx *ctx, struct hlsl_block *block, const struct hlsl_resource_load_params *params, const struct vkd3d_shader_location *loc) { - return append_new_instr(ctx, block, hlsl_new_resource_load(ctx, params, loc)); + struct hlsl_ir_resource_load *load = hlsl_new_resource_load(ctx, params, loc); + + if (load && load->sampling_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER) + hlsl_src_from_node(&load->byte_offset, hlsl_block_add_uint_constant(ctx, block, 0, loc)); + + return append_new_instr(ctx, block, &load->node); } -static struct hlsl_ir_node *hlsl_new_resource_store(struct hlsl_ctx *ctx, enum hlsl_resource_store_type type, - const struct hlsl_deref *resource, struct hlsl_ir_node *coords, struct hlsl_ir_node *value, - const struct vkd3d_shader_location *loc) +static struct hlsl_ir_node *hlsl_new_resource_store(struct hlsl_ctx *ctx, + enum hlsl_resource_store_type type, const struct hlsl_deref *resource, struct hlsl_ir_node *coords, + struct hlsl_ir_node *value, uint32_t writemask, const struct vkd3d_shader_location *loc) { struct hlsl_ir_resource_store *store; @@ -2117,6 +2122,7 @@ static struct hlsl_ir_node *hlsl_new_resource_store(struct hlsl_ctx *ctx, enum h return NULL; init_node(&store->node, HLSL_IR_RESOURCE_STORE, NULL, loc); store->store_type = type; + store->writemask = writemask; hlsl_copy_deref(ctx, &store->resource, resource); hlsl_src_from_node(&store->coords, coords); @@ -2126,9 +2132,9 @@ static struct hlsl_ir_node *hlsl_new_resource_store(struct hlsl_ctx *ctx, enum h void hlsl_block_add_resource_store(struct hlsl_ctx *ctx, struct hlsl_block *block, enum hlsl_resource_store_type type, const struct hlsl_deref *resource, struct hlsl_ir_node *coords, - struct hlsl_ir_node *value, const struct vkd3d_shader_location *loc) + struct hlsl_ir_node *value, uint32_t writemask, const struct vkd3d_shader_location *loc) { - append_new_instr(ctx, block, hlsl_new_resource_store(ctx, type, resource, coords, value, loc)); + append_new_instr(ctx, block, hlsl_new_resource_store(ctx, type, resource, coords, value, writemask, loc)); } struct hlsl_ir_node *hlsl_new_swizzle(struct hlsl_ctx *ctx, uint32_t s, unsigned int component_count, @@ -2377,6 +2383,19 @@ bool hlsl_index_chain_has_resource_access(struct hlsl_ir_index *index) return false; } +bool hlsl_index_chain_has_tgsm_access(struct hlsl_ir_index *index) +{ + if (index->val.node->type == HLSL_IR_LOAD) + { + struct hlsl_ir_load *load = hlsl_ir_load(index->val.node); + return load->src.var->storage_modifiers & HLSL_STORAGE_GROUPSHARED; + } + + if (index->val.node->type == HLSL_IR_INDEX) + return hlsl_index_chain_has_tgsm_access(hlsl_ir_index(index->val.node)); + return false; +} + static struct hlsl_ir_node *hlsl_new_index(struct hlsl_ctx *ctx, struct hlsl_ir_node *val, struct hlsl_ir_node *idx, const struct vkd3d_shader_location *loc) { @@ -2648,6 +2667,7 @@ static struct hlsl_ir_node *clone_resource_load(struct hlsl_ctx *ctx, vkd3d_free(dst); return NULL; } + clone_src(map, &dst->byte_offset, &src->byte_offset); clone_src(map, &dst->coords, &src->coords); clone_src(map, &dst->lod, &src->lod); clone_src(map, &dst->ddx, &src->ddx); @@ -2668,6 +2688,7 @@ static struct hlsl_ir_node *clone_resource_store(struct hlsl_ctx *ctx, return NULL; init_node(&dst->node, HLSL_IR_RESOURCE_STORE, NULL, &src->node.loc); dst->store_type = src->store_type; + dst->writemask = src->writemask; if (!clone_deref(ctx, map, &dst->resource, &src->resource)) { vkd3d_free(dst); @@ -2985,6 +3006,17 @@ bool hlsl_clone_block(struct hlsl_ctx *ctx, struct hlsl_block *dst_block, const return ret; } +struct hlsl_ir_node *hlsl_clone_instr(struct hlsl_ctx *ctx, const struct hlsl_ir_node *instr) +{ + struct clone_instr_map map = {0}; + struct hlsl_ir_node *ret; + + ret = clone_instr(ctx, &map, instr); + vkd3d_free(map.instrs); + + return ret; +} + struct hlsl_ir_function_decl *hlsl_new_func_decl(struct hlsl_ctx *ctx, struct hlsl_type *return_type, const struct hlsl_func_parameters *parameters, const struct hlsl_semantic *semantic, const struct vkd3d_shader_location *loc) @@ -3219,12 +3251,14 @@ static void hlsl_dump_type(struct vkd3d_string_buffer *buffer, const struct hlsl return; } - VKD3D_ASSERT(hlsl_is_numeric_type(type->e.resource.format)); - VKD3D_ASSERT(type->e.resource.format->e.numeric.type < ARRAY_SIZE(base_types)); if (type->sampler_dim == HLSL_SAMPLER_DIM_BUFFER) { vkd3d_string_buffer_printf(buffer, "Buffer<"); } + else if (type->sampler_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER) + { + vkd3d_string_buffer_printf(buffer, "StructuredBuffer<"); + } else { VKD3D_ASSERT(type->sampler_dim < ARRAY_SIZE(dimensions)); @@ -3762,6 +3796,11 @@ static void dump_ir_resource_load(struct vkd3d_string_buffer *buffer, const stru dump_deref(buffer, &load->resource); vkd3d_string_buffer_printf(buffer, ", sampler = "); dump_deref(buffer, &load->sampler); + if (load->byte_offset.node) + { + vkd3d_string_buffer_printf(buffer, ", byte_offset = "); + dump_src(buffer, &load->byte_offset); + } if (load->coords.node) { vkd3d_string_buffer_printf(buffer, ", coords = "); @@ -3800,7 +3839,8 @@ static void dump_ir_resource_load(struct vkd3d_string_buffer *buffer, const stru vkd3d_string_buffer_printf(buffer, ")"); } -static void dump_ir_resource_store(struct vkd3d_string_buffer *buffer, const struct hlsl_ir_resource_store *store) +static void dump_ir_resource_store(struct hlsl_ctx *ctx, + struct vkd3d_string_buffer *buffer, const struct hlsl_ir_resource_store *store) { static const char *const type_names[] = { @@ -3812,6 +3852,8 @@ static void dump_ir_resource_store(struct vkd3d_string_buffer *buffer, const str VKD3D_ASSERT(store->store_type < ARRAY_SIZE(type_names)); vkd3d_string_buffer_printf(buffer, "%s(resource = ", type_names[store->store_type]); dump_deref(buffer, &store->resource); + if (store->writemask != VKD3DSP_WRITEMASK_ALL && type_is_single_reg(hlsl_deref_get_type(ctx, &store->resource))) + vkd3d_string_buffer_printf(buffer, "%s", debug_hlsl_writemask(store->writemask)); if (store->coords.node) { vkd3d_string_buffer_printf(buffer, ", coords = "); @@ -4024,7 +4066,7 @@ static void dump_instr(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, break; case HLSL_IR_RESOURCE_STORE: - dump_ir_resource_store(buffer, hlsl_ir_resource_store(instr)); + dump_ir_resource_store(ctx, buffer, hlsl_ir_resource_store(instr)); break; case HLSL_IR_STRING_CONSTANT: @@ -4230,6 +4272,7 @@ static void free_ir_resource_load(struct hlsl_ir_resource_load *load) { hlsl_cleanup_deref(&load->sampler); hlsl_cleanup_deref(&load->resource); + hlsl_src_remove(&load->byte_offset); hlsl_src_remove(&load->coords); hlsl_src_remove(&load->lod); hlsl_src_remove(&load->ddx); diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h index d899c33f8a2..c3002258aa2 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.h +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h @@ -554,6 +554,7 @@ struct hlsl_ir_var uint32_t is_param : 1; uint32_t is_separated_resource : 1; uint32_t is_synthetic : 1; + uint32_t is_tgsm : 1; uint32_t has_explicit_bind_point : 1; }; @@ -893,7 +894,7 @@ struct hlsl_ir_resource_load struct hlsl_ir_node node; enum hlsl_resource_load_type load_type; struct hlsl_deref resource, sampler; - struct hlsl_src coords, lod, ddx, ddy, cmp, sample_index, texel_offset; + struct hlsl_src byte_offset, coords, lod, ddx, ddy, cmp, sample_index, texel_offset; enum hlsl_sampler_dim sampling_dim; }; @@ -910,6 +911,7 @@ struct hlsl_ir_resource_store enum hlsl_resource_store_type store_type; struct hlsl_deref resource; struct hlsl_src coords, value; + uint8_t writemask; }; struct hlsl_ir_store @@ -1586,7 +1588,7 @@ struct hlsl_ir_node *hlsl_block_add_resource_load(struct hlsl_ctx *ctx, struct h const struct hlsl_resource_load_params *params, const struct vkd3d_shader_location *loc); void hlsl_block_add_resource_store(struct hlsl_ctx *ctx, struct hlsl_block *block, enum hlsl_resource_store_type type, const struct hlsl_deref *resource, struct hlsl_ir_node *coords, - struct hlsl_ir_node *value, const struct vkd3d_shader_location *loc); + struct hlsl_ir_node *value, uint32_t writemask, const struct vkd3d_shader_location *loc); struct hlsl_ir_node *hlsl_block_add_simple_load(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_var *var, const struct vkd3d_shader_location *loc); void hlsl_block_add_simple_store(struct hlsl_ctx *ctx, struct hlsl_block *block, @@ -1609,6 +1611,7 @@ struct hlsl_ir_node *hlsl_block_add_unary_expr(struct hlsl_ctx *ctx, struct hlsl enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg, const struct vkd3d_shader_location *loc); void hlsl_block_cleanup(struct hlsl_block *block); bool hlsl_clone_block(struct hlsl_ctx *ctx, struct hlsl_block *dst_block, const struct hlsl_block *src_block); +struct hlsl_ir_node *hlsl_clone_instr(struct hlsl_ctx *ctx, const struct hlsl_ir_node *instr); void hlsl_dump_function(struct hlsl_ctx *ctx, const struct hlsl_ir_function_decl *func, const char *description, const struct hlsl_block *processed_block); @@ -1709,6 +1712,7 @@ struct hlsl_ir_node *hlsl_new_store_index(struct hlsl_ctx *ctx, const struct hls bool hlsl_index_is_noncontiguous(struct hlsl_ir_index *index); bool hlsl_index_is_resource_access(struct hlsl_ir_index *index); bool hlsl_index_chain_has_resource_access(struct hlsl_ir_index *index); +bool hlsl_index_chain_has_tgsm_access(struct hlsl_ir_index *index); struct hlsl_ir_node *hlsl_new_compile(struct hlsl_ctx *ctx, enum hlsl_compile_type compile_type, const char *profile_name, struct hlsl_ir_node **args, unsigned int args_count, diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.l b/libs/vkd3d/libs/vkd3d-shader/hlsl.l index d9fd43b5e78..0cdebb8a657 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.l +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.l @@ -158,6 +158,7 @@ static {return KW_STATIC; } string {return KW_STRING; } String {return KW_STRING; } struct {return KW_STRUCT; } +StructuredBuffer {return KW_STRUCTUREDBUFFER; } switch {return KW_SWITCH; } tbuffer {return KW_TBUFFER; } (?i:technique) {return KW_TECHNIQUE; } diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.y b/libs/vkd3d/libs/vkd3d-shader/hlsl.y index 40353abd81b..66582e884fe 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.y +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.y @@ -2117,7 +2117,8 @@ static bool add_assignment(struct hlsl_ctx *ctx, struct hlsl_block *block, struc VKD3D_ASSERT(coords->data_type->e.numeric.type == HLSL_TYPE_UINT); VKD3D_ASSERT(coords->data_type->e.numeric.dimx == dim_count); - hlsl_block_add_resource_store(ctx, block, HLSL_RESOURCE_STORE, &resource_deref, coords, rhs, &lhs->loc); + hlsl_block_add_resource_store(ctx, block, HLSL_RESOURCE_STORE, + &resource_deref, coords, rhs, writemask, &lhs->loc); hlsl_cleanup_deref(&resource_deref); } else if (matrix_writemask) @@ -2566,13 +2567,10 @@ static void declare_var(struct hlsl_ctx *ctx, struct parse_variable_def *v) "Ignoring the 'groupshared' modifier in a non-compute shader."); } - if (modifiers & HLSL_STORAGE_GROUPSHARED) - hlsl_fixme(ctx, &var->loc, "Group shared variables."); - /* Mark it as uniform. We need to do this here since synthetic * variables also get put in the global scope, but shouldn't be * considered uniforms, and we have no way of telling otherwise. */ - if (!(modifiers & HLSL_STORAGE_STATIC)) + if (!(modifiers & (HLSL_STORAGE_STATIC | HLSL_STORAGE_GROUPSHARED))) var->storage_modifiers |= HLSL_STORAGE_UNIFORM; if (stream_output) @@ -5065,13 +5063,25 @@ static bool intrinsic_interlocked(struct hlsl_ctx *ctx, enum hlsl_interlocked_op if (hlsl_deref_get_type(ctx, &dst_deref)->class != HLSL_CLASS_UAV) { - hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Interlocked targets must be UAV elements."); + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Interlocked targets must be UAV or groupshared elements."); return false; } } + else if (lhs->type == HLSL_IR_INDEX && hlsl_index_chain_has_tgsm_access(hlsl_ir_index(lhs))) + { + hlsl_fixme(ctx, loc, "Interlocked operations on indexed groupshared elements."); + return false; + } + else if (lhs->type == HLSL_IR_LOAD && (hlsl_ir_load(lhs)->src.var->storage_modifiers & HLSL_STORAGE_GROUPSHARED)) + { + hlsl_init_simple_deref_from_var(&dst_deref, hlsl_ir_load(lhs)->src.var); + coords = hlsl_block_add_uint_constant(ctx, params->instrs, 0, loc); + } else { - hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Interlocked targets must be UAV elements."); + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Interlocked targets must be UAV or groupshared elements."); return false; } @@ -5637,6 +5647,7 @@ static unsigned int hlsl_offset_dim_count(enum hlsl_sampler_dim dim) case HLSL_SAMPLER_DIM_CUBEARRAY: case HLSL_SAMPLER_DIM_BUFFER: case HLSL_SAMPLER_DIM_RAW_BUFFER: + case HLSL_SAMPLER_DIM_STRUCTURED_BUFFER: /* Offset parameters not supported for these types. */ return 0; default: @@ -6302,6 +6313,7 @@ static bool add_store_method_call(struct hlsl_ctx *ctx, struct hlsl_block *block struct hlsl_ir_node *offset, *rhs; struct hlsl_deref resource_deref; unsigned int value_dim; + uint32_t writemask; if (params->args_count != 2) { @@ -6323,11 +6335,12 @@ static bool add_store_method_call(struct hlsl_ctx *ctx, struct hlsl_block *block hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), loc); rhs = add_implicit_conversion(ctx, block, params->args[1], hlsl_get_vector_type(ctx, HLSL_TYPE_UINT, value_dim), loc); + writemask = vkd3d_write_mask_from_component_count(value_dim); if (!hlsl_init_deref_from_index_chain(ctx, &resource_deref, object)) return false; - hlsl_block_add_resource_store(ctx, block, HLSL_RESOURCE_STORE, &resource_deref, offset, rhs, loc); + hlsl_block_add_resource_store(ctx, block, HLSL_RESOURCE_STORE, &resource_deref, offset, rhs, writemask, loc); hlsl_cleanup_deref(&resource_deref); return true; @@ -6352,7 +6365,7 @@ static bool add_so_append_method_call(struct hlsl_ctx *ctx, struct hlsl_block *b if (!(rhs = add_implicit_conversion(ctx, block, params->args[0], object->data_type->e.so.type, loc))) return false; - hlsl_block_add_resource_store(ctx, block, HLSL_RESOURCE_STREAM_APPEND, &so_deref, NULL, rhs, loc); + hlsl_block_add_resource_store(ctx, block, HLSL_RESOURCE_STREAM_APPEND, &so_deref, NULL, rhs, 0, loc); hlsl_cleanup_deref(&so_deref); return true; @@ -6373,7 +6386,7 @@ static bool add_so_restartstrip_method_call(struct hlsl_ctx *ctx, struct hlsl_bl if (!hlsl_init_deref_from_index_chain(ctx, &so_deref, object)) return false; - hlsl_block_add_resource_store(ctx, block, HLSL_RESOURCE_STREAM_RESTART, &so_deref, NULL, NULL, loc); + hlsl_block_add_resource_store(ctx, block, HLSL_RESOURCE_STREAM_RESTART, &so_deref, NULL, NULL, 0, loc); hlsl_cleanup_deref(&so_deref); return true; @@ -6554,19 +6567,25 @@ static bool add_object_property_access(struct hlsl_ctx *ctx, return false; } -static void validate_texture_format_type(struct hlsl_ctx *ctx, struct hlsl_type *format, - const struct vkd3d_shader_location *loc) +static void validate_texture_format_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, + struct hlsl_type *format, const struct vkd3d_shader_location *loc) { - if (format->class > HLSL_CLASS_VECTOR) - { - struct vkd3d_string_buffer *string; + struct vkd3d_string_buffer *string; - string = hlsl_type_to_string(ctx, format); - if (string) + if (!(string = hlsl_type_to_string(ctx, format))) + return; + + if (dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER) + { + if (!type_contains_only_numerics(format)) hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, - "Texture data type %s is not scalar or vector.", string->buffer); - hlsl_release_string_buffer(ctx, string); + "SRV type %s is not numeric.", string->buffer); } + else if (format->class > HLSL_CLASS_VECTOR) + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Texture data type %s is not scalar or vector.", string->buffer); + + hlsl_release_string_buffer(ctx, string); } static bool check_continue(struct hlsl_ctx *ctx, const struct hlsl_scope *scope, const struct vkd3d_shader_location *loc) @@ -6834,6 +6853,7 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, %token KW_STATIC %token KW_STRING %token KW_STRUCT +%token KW_STRUCTUREDBUFFER %token KW_SWITCH %token KW_TBUFFER %token KW_TECHNIQUE @@ -7921,6 +7941,10 @@ texture_type: { $$ = HLSL_SAMPLER_DIM_BUFFER; } + | KW_STRUCTUREDBUFFER + { + $$ = HLSL_SAMPLER_DIM_STRUCTURED_BUFFER; + } | KW_TEXTURE1D { $$ = HLSL_SAMPLER_DIM_1D; @@ -8144,16 +8168,19 @@ type_no_void: } | texture_type { + if ($1 == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER) + hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Structured buffer type requires an explicit format."); $$ = hlsl_new_texture_type(ctx, $1, hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, 4), 0); } | texture_type '<' resource_format '>' { - validate_texture_format_type(ctx, $3, &@3); + validate_texture_format_type(ctx, $1, $3, &@3); $$ = hlsl_new_texture_type(ctx, $1, $3, 0); } | texture_ms_type '<' resource_format '>' { - validate_texture_format_type(ctx, $3, &@3); + validate_texture_format_type(ctx, $1, $3, &@3); $$ = hlsl_new_texture_type(ctx, $1, $3, 0); } diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c index 5a11547e7a1..dbda72eb30f 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c @@ -94,6 +94,134 @@ static struct hlsl_ir_node *new_offset_from_path_index(struct hlsl_ctx *ctx, str return base_offset; } +static unsigned int base_type_get_byte_size(enum hlsl_base_type t) +{ + switch (t) + { + case HLSL_TYPE_HALF: + case HLSL_TYPE_MIN16UINT: + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: + case HLSL_TYPE_BOOL: + return 4; + + case HLSL_TYPE_DOUBLE: + return 8; + } + + return 0; +} + +static unsigned int hlsl_type_get_packed_alignment(const struct hlsl_type *type) +{ + unsigned int max_align, i; + + switch (type->class) + { + case HLSL_CLASS_SCALAR: + case HLSL_CLASS_VECTOR: + case HLSL_CLASS_MATRIX: + return base_type_get_byte_size(type->e.numeric.type); + + case HLSL_CLASS_ARRAY: + return hlsl_type_get_packed_alignment(type->e.array.type); + + case HLSL_CLASS_STRUCT: + for (i = 0, max_align = 0; i < type->e.record.field_count; ++i) + { + struct hlsl_struct_field *field = &type->e.record.fields[i]; + + max_align = max(max_align, hlsl_type_get_packed_alignment(field->type)); + } + + return max_align; + + default: + vkd3d_unreachable(); + } +} + +static unsigned int hlsl_type_get_packed_size(const struct hlsl_type *type) +{ + unsigned int size, i; + + switch (type->class) + { + case HLSL_CLASS_SCALAR: + case HLSL_CLASS_VECTOR: + return type->e.numeric.dimx * base_type_get_byte_size(type->e.numeric.type); + + case HLSL_CLASS_MATRIX: + return type->e.numeric.dimx * type->e.numeric.dimy * base_type_get_byte_size(type->e.numeric.type); + + case HLSL_CLASS_ARRAY: + return type->e.array.elements_count * hlsl_type_get_packed_size(type->e.array.type); + + case HLSL_CLASS_STRUCT: + for (i = 0, size = 0; i < type->e.record.field_count; ++i) + { + struct hlsl_struct_field *field = &type->e.record.fields[i]; + + size = align(size, hlsl_type_get_packed_alignment(field->type)) + + hlsl_type_get_packed_size(field->type); + } + size = align(size, hlsl_type_get_packed_alignment(type)); + + return size; + + default: + vkd3d_unreachable(); + } +} + +static struct hlsl_ir_node *hlsl_block_add_packed_index_offset_append(struct hlsl_ctx *ctx, + struct hlsl_block *block, struct hlsl_ir_node *prev_offset, struct hlsl_ir_node *idx, + struct hlsl_type *type, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *idx_offset = NULL, *c; + unsigned int field_idx, offset, size, i; + + switch (type->class) + { + case HLSL_CLASS_VECTOR: + c = hlsl_block_add_uint_constant(ctx, block, base_type_get_byte_size(type->e.numeric.type), loc); + idx_offset = hlsl_block_add_binary_expr(ctx, block, HLSL_OP2_MUL, c, idx); + break; + + case HLSL_CLASS_MATRIX: + size = base_type_get_byte_size(type->e.numeric.type) * hlsl_type_minor_size(type); + c = hlsl_block_add_uint_constant(ctx, block, size, loc); + idx_offset = hlsl_block_add_binary_expr(ctx, block, HLSL_OP2_MUL, c, idx); + break; + + case HLSL_CLASS_ARRAY: + size = hlsl_type_get_packed_size(type->e.array.type); + c = hlsl_block_add_uint_constant(ctx, block, size, loc); + idx_offset = hlsl_block_add_binary_expr(ctx, block, HLSL_OP2_MUL, c, idx); + break; + + case HLSL_CLASS_STRUCT: + field_idx = hlsl_ir_constant(idx)->value.u[0].u; + for (i = 0, offset = 0; i < field_idx; ++i) + { + struct hlsl_struct_field *field = &type->e.record.fields[i]; + + offset = align(offset, hlsl_type_get_packed_alignment(field->type)) + + hlsl_type_get_packed_size(field->type); + } + + offset = align(offset, hlsl_type_get_packed_alignment(type->e.record.fields[field_idx].type)); + idx_offset = hlsl_block_add_uint_constant(ctx, block, offset, loc); + break; + + default: + vkd3d_unreachable(); + } + + return hlsl_block_add_binary_expr(ctx, block, HLSL_OP2_ADD, idx_offset, prev_offset); +} + /* TODO: remove when no longer needed, only used for replace_deref_path_with_offset() */ static struct hlsl_ir_node *new_offset_instr_from_deref(struct hlsl_ctx *ctx, struct hlsl_block *block, const struct hlsl_deref *deref, unsigned int *offset_component, const struct vkd3d_shader_location *loc) @@ -1308,6 +1436,73 @@ static bool lower_index_loads(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, return true; } + if (val->type == HLSL_IR_RESOURCE_LOAD) + { + struct hlsl_ir_resource_load *parent = hlsl_ir_resource_load(index->val.node); + + if (parent->sampling_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER) + { + if (hlsl_index_is_noncontiguous(index)) + { + /* For column major matrices, since we have to output a row, + * we need to emit dimx loads. */ + struct hlsl_ir_node *mat = index->val.node; + struct hlsl_deref row_deref; + + if (!(var = hlsl_new_synthetic_var(ctx, "row", instr->data_type, &instr->loc))) + return false; + hlsl_init_simple_deref_from_var(&row_deref, var); + + for (unsigned int i = 0; i < mat->data_type->e.numeric.dimx; ++i) + { + struct hlsl_type *type = parent->node.data_type; + struct hlsl_ir_node *c, *c_offset, *idx_offset; + struct hlsl_ir_resource_load *column_load; + + c = hlsl_block_add_uint_constant(ctx, block, i, &instr->loc); + c_offset = hlsl_block_add_packed_index_offset_append(ctx, + block, parent->byte_offset.node, c, type, &instr->loc); + type = hlsl_get_element_type_from_path_index(ctx, type, c); + + idx_offset = hlsl_block_add_packed_index_offset_append(ctx, + block, c_offset, index->idx.node, type, &instr->loc); + type = hlsl_get_element_type_from_path_index(ctx, type, c_offset); + + column_load = hlsl_ir_resource_load(hlsl_clone_instr(ctx, &parent->node)); + + hlsl_src_remove(&column_load->byte_offset); + hlsl_src_from_node(&column_load->byte_offset, idx_offset); + column_load->node.data_type = type; + + hlsl_block_add_instr(block, &column_load->node); + + hlsl_block_add_store_component(ctx, block, &row_deref, i, &column_load->node); + } + + hlsl_block_add_simple_load(ctx, block, var, &instr->loc); + } + else + { + struct hlsl_type *type = parent->node.data_type; + struct hlsl_ir_resource_load *appended_load; + struct hlsl_ir_node *idx_offset; + + idx_offset = hlsl_block_add_packed_index_offset_append(ctx, block, + parent->byte_offset.node, index->idx.node, type, &instr->loc); + appended_load = hlsl_ir_resource_load(hlsl_clone_instr(ctx, &parent->node)); + type = hlsl_get_element_type_from_path_index(ctx, type, index->idx.node); + + hlsl_src_remove(&appended_load->byte_offset); + hlsl_src_from_node(&appended_load->byte_offset, idx_offset); + appended_load->node.data_type = type; + + hlsl_block_add_instr(block, &appended_load->node); + } + + return true; + } + } + if (!(var = hlsl_new_synthetic_var(ctx, "index-val", val->data_type, &instr->loc))) return false; hlsl_init_simple_deref_from_var(&var_deref, var); @@ -1386,6 +1581,67 @@ static bool lower_broadcasts(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, s return false; } +/* Lowers loads from TGSMs to resource loads. */ +static bool lower_tgsm_loads(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) +{ + struct hlsl_resource_load_params params = {.type = HLSL_RESOURCE_LOAD}; + const struct vkd3d_shader_location *loc = &instr->loc; + struct hlsl_ir_load *load; + struct hlsl_deref *deref; + + if (instr->type != HLSL_IR_LOAD || !hlsl_is_numeric_type(instr->data_type)) + return false; + load = hlsl_ir_load(instr); + deref = &load->src; + + if (!deref->var->is_tgsm) + return false; + + if (deref->path_len) + { + hlsl_fixme(ctx, &instr->loc, "Load from indexed TGSM."); + return false; + } + + params.resource = hlsl_block_add_simple_load(ctx, block, deref->var, loc); + params.format = instr->data_type; + params.coords = hlsl_block_add_uint_constant(ctx, block, 0, &instr->loc); + hlsl_block_add_resource_load(ctx, block, ¶ms, loc); + + return true; +} + +/* Lowers stores to TGSMs to resource stores. */ +static bool lower_tgsm_stores(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) +{ + struct hlsl_ir_store *store; + struct hlsl_ir_node *coords; + struct hlsl_deref res_deref; + struct hlsl_deref *deref; + + if (instr->type != HLSL_IR_STORE) + return false; + store = hlsl_ir_store(instr); + deref = &store->lhs; + + if (!deref->var->is_tgsm) + return false; + + if (deref->path_len) + { + hlsl_fixme(ctx, &instr->loc, "Store to indexed TGSM."); + return false; + } + + hlsl_init_simple_deref_from_var(&res_deref, deref->var); + coords = hlsl_block_add_uint_constant(ctx, block, 0, &instr->loc); + + hlsl_block_add_resource_store(ctx, block, HLSL_RESOURCE_STORE, &res_deref, + coords, store->rhs.node, store->writemask, &instr->loc); + + return true; +} + /* Allocate a unique, ordered index to each instruction, which will be used for * copy propagation and computing liveness ranges. * Index 0 means unused, so start at 1. */ @@ -3217,10 +3473,10 @@ static bool validate_dereferences(struct hlsl_ctx *ctx, struct hlsl_ir_node *ins { struct hlsl_ir_resource_load *load = hlsl_ir_resource_load(instr); - if (!load->resource.var->is_uniform) + if (!load->resource.var->is_uniform && !load->resource.var->is_tgsm) { hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, - "Loaded resource must have a single uniform source."); + "Loaded resource must have a single uniform or groupshared source."); } else if (validate_component_index_range_from_deref(ctx, &load->resource) == DEREF_VALIDATION_NOT_CONSTANT) { @@ -3251,10 +3507,10 @@ static bool validate_dereferences(struct hlsl_ctx *ctx, struct hlsl_ir_node *ins { struct hlsl_ir_resource_store *store = hlsl_ir_resource_store(instr); - if (!store->resource.var->is_uniform) + if (!store->resource.var->is_uniform && !store->resource.var->is_tgsm) { hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, - "Accessed resource must have a single uniform source."); + "Accessed resource must have a single uniform or groupshared source."); } else if (validate_component_index_range_from_deref(ctx, &store->resource) == DEREF_VALIDATION_NOT_CONSTANT) { @@ -3281,10 +3537,10 @@ static bool validate_dereferences(struct hlsl_ctx *ctx, struct hlsl_ir_node *ins { struct hlsl_ir_interlocked *interlocked = hlsl_ir_interlocked(instr); - if (!interlocked->dst.var->is_uniform) + if (!interlocked->dst.var->is_uniform && !interlocked->dst.var->is_tgsm) { hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, - "Accessed resource must have a single uniform source."); + "Accessed resource must have a single uniform or groupshared source."); } else if (validate_component_index_range_from_deref(ctx, &interlocked->dst) == DEREF_VALIDATION_NOT_CONSTANT) { @@ -3482,7 +3738,30 @@ static bool lower_stream_appends(struct hlsl_ctx *ctx, struct hlsl_ir_node *inst hlsl_src_remove(&store->value); return true; +} +static void split_resource_load(struct hlsl_ctx *ctx, struct hlsl_ir_store *store, + struct hlsl_ir_resource_load *load, const unsigned int idx, struct hlsl_type *type) +{ + struct hlsl_ir_resource_load *vector_load; + struct hlsl_ir_node *c, *idx_offset; + struct hlsl_block block; + + hlsl_block_init(&block); + + c = hlsl_block_add_uint_constant(ctx, &block, idx, &store->node.loc); + idx_offset = hlsl_block_add_packed_index_offset_append(ctx, &block, + load->byte_offset.node, c, load->node.data_type, &store->node.loc); + + vector_load = hlsl_ir_resource_load(hlsl_clone_instr(ctx, &load->node)); + hlsl_src_remove(&vector_load->byte_offset); + hlsl_src_from_node(&vector_load->byte_offset, idx_offset); + vector_load->node.data_type = type; + hlsl_block_add_instr(&block, &vector_load->node); + + hlsl_block_add_store_index(ctx, &block, &store->lhs, c, &vector_load->node, 0, &store->node.loc); + + list_move_before(&store->node.entry, &block.instrs); } static bool split_matrix_copies(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) @@ -3503,16 +3782,32 @@ static bool split_matrix_copies(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr return false; element_type = hlsl_get_vector_type(ctx, type->e.numeric.type, hlsl_type_minor_size(type)); - if (rhs->type != HLSL_IR_LOAD) + if (rhs->type != HLSL_IR_LOAD && rhs->type != HLSL_IR_RESOURCE_LOAD) { hlsl_fixme(ctx, &instr->loc, "Copying from unsupported node type."); return false; } - for (i = 0; i < hlsl_type_major_size(type); ++i) + if (rhs->type == HLSL_IR_RESOURCE_LOAD) { - if (!split_copy(ctx, store, hlsl_ir_load(rhs), i, element_type)) - return false; + /* As we forbid non-scalar or vector types in non-structured resource + * loads, this is specific to structured buffer loads. */ + struct hlsl_ir_resource_load *load = hlsl_ir_resource_load(rhs); + + VKD3D_ASSERT(hlsl_deref_get_type(ctx, &load->resource)->sampler_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER); + + for (i = 0; i < hlsl_type_major_size(type); ++i) + { + split_resource_load(ctx, store, load, i, element_type); + } + } + else + { + for (i = 0; i < hlsl_type_major_size(type); ++i) + { + if (!split_copy(ctx, store, hlsl_ir_load(rhs), i, element_type)) + return false; + } } list_remove(&store->node.entry); @@ -5486,6 +5781,8 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop deref_mark_last_read(&load->sampler, last_read); } + if (load->byte_offset.node) + load->byte_offset.node->last_read = last_read; if (load->coords.node) load->coords.node->last_read = last_read; if (load->texel_offset.node) @@ -5867,6 +6164,9 @@ static bool track_object_components_sampler_dim(struct hlsl_ctx *ctx, struct hls load = hlsl_ir_resource_load(instr); var = load->resource.var; + if (var->is_tgsm) + return false; + regset = hlsl_deref_get_regset(ctx, &load->resource); if (!hlsl_regset_index_from_deref(ctx, &load->resource, regset, &index)) return false; @@ -5935,7 +6235,7 @@ static bool track_components_usage(struct hlsl_ctx *ctx, struct hlsl_ir_node *in { struct hlsl_ir_load *load = hlsl_ir_load(instr); - if (!load->src.var->is_uniform) + if (!load->src.var->is_uniform && !load->src.var->is_tgsm) return false; /* These will are handled by validate_static_object_references(). */ @@ -6459,7 +6759,7 @@ static uint32_t allocate_temp_registers(struct hlsl_ctx *ctx, struct hlsl_block { LIST_FOR_EACH_ENTRY(var, &scope->vars, struct hlsl_ir_var, scope_entry) { - if (!(var->is_input_semantic || var->is_output_semantic || var->is_uniform)) + if (!(var->is_input_semantic || var->is_output_semantic || var->is_uniform || var->is_tgsm)) memset(var->regs, 0, sizeof(var->regs)); } } @@ -7109,6 +7409,27 @@ static void allocate_stream_outputs(struct hlsl_ctx *ctx) } } +static void allocate_tgsms(struct hlsl_ctx *ctx) +{ + struct hlsl_ir_var *var; + struct hlsl_reg *reg; + uint32_t index = 0; + + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + if (!var->is_tgsm || !var->bind_count[HLSL_REGSET_NUMERIC]) + continue; + + reg = &var->regs[HLSL_REGSET_NUMERIC]; + reg->space = 0; + reg->index = index; + reg->id = index; + reg->allocated = true; + + ++index; + } +} + bool hlsl_component_index_range_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, unsigned int *start, unsigned int *count) { @@ -8700,6 +9021,15 @@ static bool sm4_generate_vsir_reg_from_deref(struct hlsl_ctx *ctx, struct vsir_p *writemask = hlsl_reg.writemask; } } + else if (var->is_tgsm) + { + VKD3D_ASSERT(var->regs[HLSL_REGSET_NUMERIC].allocated); + reg->type = VKD3DSPR_GROUPSHAREDMEM; + reg->dimension = VSIR_DIMENSION_VEC4; + reg->idx[0].offset = var->regs[HLSL_REGSET_NUMERIC].id; + reg->idx_count = 1; + *writemask = (1u << data_type->e.numeric.dimx) - 1; + } else { return sm4_generate_vsir_numeric_reg_from_deref(ctx, program, reg, writemask, deref); @@ -10906,6 +11236,8 @@ static bool sm4_generate_vsir_instr_store(struct hlsl_ctx *ctx, struct vkd3d_shader_src_param *src_param; struct vkd3d_shader_instruction *ins; + VKD3D_ASSERT(!store->lhs.var->is_tgsm); + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VSIR_OP_MOV, 1, 1))) return false; @@ -10940,6 +11272,7 @@ static bool sm4_generate_vsir_instr_load(struct hlsl_ctx *ctx, struct vsir_progr struct vkd3d_shader_instruction *ins; struct hlsl_constant_value value; + VKD3D_ASSERT(!load->src.var->is_tgsm); VKD3D_ASSERT(hlsl_is_numeric_type(type)); if (type->e.numeric.type == HLSL_TYPE_BOOL && var_is_user_input(version, load->src.var)) { @@ -10984,8 +11317,8 @@ static bool sm4_generate_vsir_instr_resource_store(struct hlsl_ctx *ctx, struct hlsl_type *resource_type = hlsl_deref_get_type(ctx, &store->resource); struct hlsl_ir_node *coords = store->coords.node, *value = store->value.node; struct hlsl_ir_node *instr = &store->node; + bool tgsm = store->resource.var->is_tgsm; struct vkd3d_shader_instruction *ins; - unsigned int writemask; if (store->store_type != HLSL_RESOURCE_STORE) { @@ -11012,9 +11345,9 @@ static bool sm4_generate_vsir_instr_resource_store(struct hlsl_ctx *ctx, return true; } - if (!store->resource.var->is_uniform) + if (!store->resource.var->is_uniform && !tgsm) { - hlsl_fixme(ctx, &store->node.loc, "Store to non-uniform resource variable."); + hlsl_fixme(ctx, &store->node.loc, "Store to non-uniform non-groupshared resource variable."); return false; } @@ -11024,14 +11357,19 @@ static bool sm4_generate_vsir_instr_resource_store(struct hlsl_ctx *ctx, return false; } - if (resource_type->sampler_dim == HLSL_SAMPLER_DIM_RAW_BUFFER) + if (tgsm && !hlsl_is_numeric_type(resource_type)) + { + hlsl_fixme(ctx, &store->node.loc, "Store to structured TGSM."); + return false; + } + + if (tgsm || resource_type->sampler_dim == HLSL_SAMPLER_DIM_RAW_BUFFER) { if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VSIR_OP_STORE_RAW, 1, 2))) return false; - writemask = vkd3d_write_mask_from_component_count(value->data_type->e.numeric.dimx); - if (!sm4_generate_vsir_init_dst_param_from_deref(ctx, program, - &ins->dst[0], &store->resource, &instr->loc, writemask)) + if (!sm4_generate_vsir_init_dst_param_from_deref(ctx, program, &ins->dst[0], + &store->resource, &instr->loc, store->writemask)) return false; } else @@ -11092,7 +11430,6 @@ static bool sm4_generate_vsir_instr_ld(struct hlsl_ctx *ctx, const struct hlsl_type *resource_type = hlsl_deref_get_type(ctx, &load->resource); bool uav = (hlsl_deref_get_regset(ctx, &load->resource) == HLSL_REGSET_UAVS); const struct vkd3d_shader_version *version = &program->shader_version; - bool raw = resource_type->sampler_dim == HLSL_SAMPLER_DIM_RAW_BUFFER; const struct hlsl_ir_node *sample_index = load->sample_index.node; const struct hlsl_ir_node *texel_offset = load->texel_offset.node; const struct hlsl_ir_node *coords = load->coords.node; @@ -11100,16 +11437,33 @@ static bool sm4_generate_vsir_instr_ld(struct hlsl_ctx *ctx, const struct hlsl_deref *resource = &load->resource; const struct hlsl_ir_node *instr = &load->node; enum hlsl_sampler_dim dim = load->sampling_dim; + bool tgsm = load->resource.var->is_tgsm; struct vkd3d_shader_instruction *ins; enum vkd3d_shader_opcode opcode; - bool multisampled; + bool multisampled, raw; VKD3D_ASSERT(load->load_type == HLSL_RESOURCE_LOAD); + if (resource_type->sampler_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER) + { + hlsl_fixme(ctx, &load->node.loc, "Structured buffer loads."); + return false; + } + multisampled = resource_type->class == HLSL_CLASS_TEXTURE && (resource_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS || resource_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY); + if (!tgsm) + { + raw = resource_type->sampler_dim == HLSL_SAMPLER_DIM_RAW_BUFFER; + } + else if (!(raw = hlsl_is_numeric_type(resource_type))) + { + hlsl_fixme(ctx, &load->node.loc, "Load from structured TGSM."); + return false; + } + if (uav) opcode = VSIR_OP_LD_UAV_TYPED; else if (raw) @@ -11130,7 +11484,7 @@ static bool sm4_generate_vsir_instr_ld(struct hlsl_ctx *ctx, vsir_dst_from_hlsl_node(&ins->dst[0], ctx, instr); - if (!uav) + if (!uav && !tgsm) { /* Mipmap level is in the last component in the IR, but needs to be in * the W component in the instruction. */ @@ -11401,7 +11755,7 @@ static bool sm4_generate_vsir_instr_resource_load(struct hlsl_ctx *ctx, return false; } - if (!load->resource.var->is_uniform) + if (!load->resource.var->is_uniform && !load->resource.var->is_tgsm) { hlsl_fixme(ctx, &load->node.loc, "Load from non-uniform resource variable."); return false; @@ -11761,7 +12115,7 @@ static void sm4_generate_vsir_add_function(struct hlsl_ctx *ctx, struct list *se { LIST_FOR_EACH_ENTRY(var, &scope->vars, struct hlsl_ir_var, scope_entry) { - if (var->is_uniform || var->is_input_semantic || var->is_output_semantic) + if (var->is_uniform || var->is_tgsm || var->is_input_semantic || var->is_output_semantic) continue; if (!var->regs[HLSL_REGSET_NUMERIC].allocated) continue; @@ -12161,6 +12515,9 @@ static enum vsir_data_type sm4_generate_vsir_get_format_type(const struct hlsl_t { const struct hlsl_type *format = type->e.resource.format; + if (type->sampler_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER) + return VSIR_DATA_MIXED; + switch (format->e.numeric.type) { case HLSL_TYPE_DOUBLE: @@ -12299,6 +12656,34 @@ static void sm4_generate_vsir_add_dcl_texture(struct hlsl_ctx *ctx, } } +static void sm4_generate_vsir_add_dcl_tgsm(struct hlsl_ctx *ctx, + struct vsir_program *program, const struct hlsl_ir_var *var) +{ + struct vkd3d_shader_dst_param *dst_param; + struct vkd3d_shader_instruction *ins; + + if (!hlsl_is_numeric_type(var->data_type)) + { + hlsl_fixme(ctx, &var->loc, "Structured TGSM declaration."); + return; + } + + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &var->loc, VSIR_OP_DCL_TGSM_RAW, 0, 0))) + { + ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; + return; + } + + dst_param = &ins->declaration.tgsm_raw.reg; + + vsir_dst_param_init(dst_param, VKD3DSPR_GROUPSHAREDMEM, VSIR_DATA_F32, 1); + dst_param->reg.dimension = VSIR_DIMENSION_NONE; + dst_param->reg.idx[0].offset = var->regs[HLSL_REGSET_NUMERIC].id; + + ins->declaration.tgsm_raw.byte_count = var->data_type->reg_size[HLSL_REGSET_NUMERIC] * 4; + ins->declaration.tgsm_raw.zero_init = false; +} + static void sm4_generate_vsir_add_dcl_stream(struct hlsl_ctx *ctx, struct vsir_program *program, const struct hlsl_ir_var *var) { @@ -12327,6 +12712,7 @@ static void sm4_generate_vsir(struct hlsl_ctx *ctx, struct extern_resource *extern_resources; unsigned int extern_resources_count; const struct hlsl_buffer *cbuffer; + const struct hlsl_ir_var *var; if (version->type == VKD3D_SHADER_TYPE_COMPUTE) { @@ -12377,10 +12763,14 @@ static void sm4_generate_vsir(struct hlsl_ctx *ctx, } sm4_free_extern_resources(extern_resources, extern_resources_count); - if (version->type == VKD3D_SHADER_TYPE_GEOMETRY && version->major >= 5) + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) { - const struct hlsl_ir_var *var; + if (var->is_tgsm && var->regs[HLSL_REGSET_NUMERIC].allocated) + sm4_generate_vsir_add_dcl_tgsm(ctx, program, var); + } + if (version->type == VKD3D_SHADER_TYPE_GEOMETRY && version->major >= 5) + { LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) { if (var->bind_count[HLSL_REGSET_STREAM_OUTPUTS]) @@ -12489,6 +12879,9 @@ static enum D3D_RESOURCE_RETURN_TYPE sm4_data_type(const struct hlsl_type *type) { const struct hlsl_type *format = type->e.resource.format; + if (type->sampler_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER) + return D3D_RETURN_TYPE_MIXED; + switch (format->e.numeric.type) { case HLSL_TYPE_DOUBLE: @@ -13594,6 +13987,9 @@ static void process_entry_function(struct hlsl_ctx *ctx, struct list *semantic_v lower_ir(ctx, lower_matrix_swizzles, body); lower_ir(ctx, lower_index_loads, body); + lower_ir(ctx, lower_tgsm_loads, body); + lower_ir(ctx, lower_tgsm_stores, body); + if (entry_func->return_var) { if (profile->type == VKD3D_SHADER_TYPE_GEOMETRY) @@ -13903,7 +14299,14 @@ int hlsl_emit_vsir(struct hlsl_ctx *ctx, const struct vkd3d_shader_compile_info LIST_FOR_EACH_ENTRY(var, &ctx->globals->vars, struct hlsl_ir_var, scope_entry) { if (var->storage_modifiers & HLSL_STORAGE_UNIFORM) + { prepend_uniform_copy(ctx, &global_uniform_block, var); + } + else if (var->storage_modifiers & HLSL_STORAGE_GROUPSHARED) + { + var->is_tgsm = 1; + list_add_tail(&ctx->extern_vars, &var->extern_entry); + } } process_entry_function(ctx, &semantic_vars, &body, &global_uniform_block, entry_func); @@ -13932,6 +14335,7 @@ int hlsl_emit_vsir(struct hlsl_ctx *ctx, const struct vkd3d_shader_compile_info allocate_objects(ctx, &semantic_vars, HLSL_REGSET_TEXTURES); allocate_objects(ctx, &semantic_vars, HLSL_REGSET_UAVS); allocate_objects(ctx, &semantic_vars, HLSL_REGSET_SAMPLERS); + allocate_tgsms(ctx); } if (TRACE_ON()) diff --git a/libs/vkd3d/libs/vkd3d-shader/tpf.c b/libs/vkd3d/libs/vkd3d-shader/tpf.c index ed19faf945b..ea15c1a9ad5 100644 --- a/libs/vkd3d/libs/vkd3d-shader/tpf.c +++ b/libs/vkd3d/libs/vkd3d-shader/tpf.c @@ -3910,6 +3910,23 @@ static void tpf_dcl_texture(const struct tpf_compiler *tpf, const struct vkd3d_s write_sm4_instruction(tpf, &instr); } +static void tpf_dcl_tgsm_raw(const struct tpf_compiler *tpf, const struct vkd3d_shader_instruction *ins) +{ + const struct vkd3d_shader_tgsm_raw *tgsm = &ins->declaration.tgsm_raw; + struct sm4_instruction instr = + { + .opcode = VKD3D_SM5_OP_DCL_TGSM_RAW, + + .dsts[0] = tgsm->reg, + .dst_count = 1, + + .idx[0] = tgsm->byte_count, + .idx_count = 1, + }; + + write_sm4_instruction(tpf, &instr); +} + static void write_sm4_dcl_global_flags(const struct tpf_compiler *tpf, uint32_t flags) { struct sm4_instruction instr = @@ -4199,6 +4216,10 @@ static void tpf_handle_instruction(struct tpf_compiler *tpf, const struct vkd3d_ tpf_dcl_sampler(tpf, ins); break; + case VSIR_OP_DCL_TGSM_RAW: + tpf_dcl_tgsm_raw(tpf, ins); + break; + case VSIR_OP_DCL: case VSIR_OP_DCL_RESOURCE_RAW: case VSIR_OP_DCL_UAV_RAW: -- 2.50.1