From 874ca85a9529106eb1004492b1008e865a1406b4 Mon Sep 17 00:00:00 2001 From: Francisco Casas Date: Wed, 6 Nov 2024 19:53:58 -0300 Subject: [PATCH] vkd3d-shader/hlsl: Store SM4 stores in the vsir program. --- libs/vkd3d-shader/hlsl_codegen.c | 254 ++++++++++++++++++++++++++++++- libs/vkd3d-shader/tpf.c | 23 --- 2 files changed, 253 insertions(+), 24 deletions(-) diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index d11ff481..8d49cdd7 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -19,6 +19,7 @@ */ #include "hlsl.h" +#include "vkd3d_shader_private.h" #include #include @@ -6812,7 +6813,7 @@ static void vsir_src_from_hlsl_constant_value(struct vkd3d_shader_src_param *src } static void vsir_src_from_hlsl_node(struct vkd3d_shader_src_param *src, - struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, uint32_t map_writemask) + struct hlsl_ctx *ctx, const struct hlsl_ir_node *instr, uint32_t map_writemask) { struct hlsl_ir_constant *constant; @@ -6832,6 +6833,230 @@ static void vsir_src_from_hlsl_node(struct vkd3d_shader_src_param *src, } } +static bool sm4_generate_vsir_numeric_reg_from_deref(struct hlsl_ctx *ctx, struct vsir_program *program, + struct vkd3d_shader_register *reg, uint32_t *writemask, const struct hlsl_deref *deref) +{ + const struct hlsl_ir_var *var = deref->var; + unsigned int offset_const_deref; + + reg->type = var->indexable ? VKD3DSPR_IDXTEMP : VKD3DSPR_TEMP; + reg->idx[0].offset = var->regs[HLSL_REGSET_NUMERIC].id; + reg->dimension = VSIR_DIMENSION_VEC4; + + VKD3D_ASSERT(var->regs[HLSL_REGSET_NUMERIC].allocated); + + if (!var->indexable) + { + offset_const_deref = hlsl_offset_from_deref_safe(ctx, deref); + reg->idx[0].offset += offset_const_deref / 4; + reg->idx_count = 1; + } + else + { + offset_const_deref = deref->const_offset; + reg->idx[1].offset = offset_const_deref / 4; + reg->idx_count = 2; + + if (deref->rel_offset.node) + { + struct vkd3d_shader_src_param *idx_src; + + if (!(idx_src = vsir_program_get_src_params(program, 1))) + { + ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; + return false; + } + memset(idx_src, 0, sizeof(*idx_src)); + reg->idx[1].rel_addr = idx_src; + + vsir_src_from_hlsl_node(idx_src, ctx, deref->rel_offset.node, VKD3DSP_WRITEMASK_ALL); + } + } + + *writemask = 0xf & (0xf << (offset_const_deref % 4)); + if (var->regs[HLSL_REGSET_NUMERIC].writemask) + *writemask = hlsl_combine_writemasks(var->regs[HLSL_REGSET_NUMERIC].writemask, *writemask); + return true; +} + +static bool sm4_generate_vsir_reg_from_deref(struct hlsl_ctx *ctx, struct vsir_program *program, + struct vkd3d_shader_register *reg, uint32_t *writemask, const struct hlsl_deref *deref) +{ + const struct vkd3d_shader_version *version = &program->shader_version; + const struct hlsl_type *data_type = hlsl_deref_get_type(ctx, deref); + const struct hlsl_ir_var *var = deref->var; + + if (var->is_uniform) + { + enum hlsl_regset regset = hlsl_deref_get_regset(ctx, deref); + + if (regset == HLSL_REGSET_TEXTURES) + { + reg->type = VKD3DSPR_RESOURCE; + reg->dimension = VSIR_DIMENSION_VEC4; + if (vkd3d_shader_ver_ge(version, 5, 1)) + { + reg->idx[0].offset = var->regs[HLSL_REGSET_TEXTURES].id; + reg->idx[1].offset = var->regs[HLSL_REGSET_TEXTURES].index; /* FIXME: array index */ + reg->idx_count = 2; + } + else + { + reg->idx[0].offset = var->regs[HLSL_REGSET_TEXTURES].index; + reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref); + reg->idx_count = 1; + } + VKD3D_ASSERT(regset == HLSL_REGSET_TEXTURES); + *writemask = VKD3DSP_WRITEMASK_ALL; + } + else if (regset == HLSL_REGSET_UAVS) + { + reg->type = VKD3DSPR_UAV; + reg->dimension = VSIR_DIMENSION_VEC4; + if (vkd3d_shader_ver_ge(version, 5, 1)) + { + reg->idx[0].offset = var->regs[HLSL_REGSET_UAVS].id; + reg->idx[1].offset = var->regs[HLSL_REGSET_UAVS].index; /* FIXME: array index */ + reg->idx_count = 2; + } + else + { + reg->idx[0].offset = var->regs[HLSL_REGSET_UAVS].index; + reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref); + reg->idx_count = 1; + } + VKD3D_ASSERT(regset == HLSL_REGSET_UAVS); + *writemask = VKD3DSP_WRITEMASK_ALL; + } + else if (regset == HLSL_REGSET_SAMPLERS) + { + reg->type = VKD3DSPR_SAMPLER; + reg->dimension = VSIR_DIMENSION_NONE; + if (vkd3d_shader_ver_ge(version, 5, 1)) + { + reg->idx[0].offset = var->regs[HLSL_REGSET_SAMPLERS].id; + reg->idx[1].offset = var->regs[HLSL_REGSET_SAMPLERS].index; /* FIXME: array index */ + reg->idx_count = 2; + } + else + { + reg->idx[0].offset = var->regs[HLSL_REGSET_SAMPLERS].index; + reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref); + reg->idx_count = 1; + } + VKD3D_ASSERT(regset == HLSL_REGSET_SAMPLERS); + *writemask = VKD3DSP_WRITEMASK_ALL; + } + else + { + unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref) + var->buffer_offset; + + VKD3D_ASSERT(data_type->class <= HLSL_CLASS_VECTOR); + reg->type = VKD3DSPR_CONSTBUFFER; + reg->dimension = VSIR_DIMENSION_VEC4; + if (vkd3d_shader_ver_ge(version, 5, 1)) + { + reg->idx[0].offset = var->buffer->reg.id; + reg->idx[1].offset = var->buffer->reg.index; /* FIXME: array index */ + reg->idx[2].offset = offset / 4; + reg->idx_count = 3; + } + else + { + reg->idx[0].offset = var->buffer->reg.index; + reg->idx[1].offset = offset / 4; + reg->idx_count = 2; + } + *writemask = ((1u << data_type->dimx) - 1) << (offset & 3); + } + } + else if (var->is_input_semantic) + { + bool has_idx; + + if (sm4_register_from_semantic_name(version, var->semantic.name, false, ®->type, &has_idx)) + { + unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref); + + if (has_idx) + { + reg->idx[0].offset = var->semantic.index + offset / 4; + reg->idx_count = 1; + } + + if (shader_sm4_is_scalar_register(reg)) + reg->dimension = VSIR_DIMENSION_SCALAR; + else + reg->dimension = VSIR_DIMENSION_VEC4; + *writemask = ((1u << data_type->dimx) - 1) << (offset % 4); + } + else + { + struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref); + + VKD3D_ASSERT(hlsl_reg.allocated); + + if (version->type == VKD3D_SHADER_TYPE_DOMAIN) + reg->type = VKD3DSPR_PATCHCONST; + else + reg->type = VKD3DSPR_INPUT; + reg->dimension = VSIR_DIMENSION_VEC4; + reg->idx[0].offset = hlsl_reg.id; + reg->idx_count = 1; + *writemask = hlsl_reg.writemask; + } + } + else if (var->is_output_semantic) + { + bool has_idx; + + if (sm4_register_from_semantic_name(version, var->semantic.name, true, ®->type, &has_idx)) + { + unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref); + + if (has_idx) + { + reg->idx[0].offset = var->semantic.index + offset / 4; + reg->idx_count = 1; + } + + if (shader_sm4_is_scalar_register(reg)) + reg->dimension = VSIR_DIMENSION_SCALAR; + else + reg->dimension = VSIR_DIMENSION_VEC4; + *writemask = ((1u << data_type->dimx) - 1) << (offset % 4); + } + else + { + struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref); + + VKD3D_ASSERT(hlsl_reg.allocated); + reg->type = VKD3DSPR_OUTPUT; + reg->dimension = VSIR_DIMENSION_VEC4; + reg->idx[0].offset = hlsl_reg.id; + reg->idx_count = 1; + *writemask = hlsl_reg.writemask; + } + } + else + { + return sm4_generate_vsir_numeric_reg_from_deref(ctx, program, reg, writemask, deref); + } + return true; +} + +static bool sm4_generate_vsir_init_dst_param_from_deref(struct hlsl_ctx *ctx, struct vsir_program *program, + struct vkd3d_shader_dst_param *dst_param, const struct hlsl_deref *deref, + const struct vkd3d_shader_location *loc, unsigned int writemask) +{ + uint32_t reg_writemask; + + if (!sm4_generate_vsir_reg_from_deref(ctx, program, &dst_param->reg, ®_writemask, deref)) + return false; + dst_param->write_mask = hlsl_combine_writemasks(reg_writemask, writemask); + return true; +} + static void vsir_dst_from_hlsl_node(struct vkd3d_shader_dst_param *dst, struct hlsl_ctx *ctx, const struct hlsl_ir_node *instr) { @@ -8505,6 +8730,28 @@ static bool sm4_generate_vsir_instr_expr(struct hlsl_ctx *ctx, } } +static bool sm4_generate_vsir_instr_store(struct hlsl_ctx *ctx, + struct vsir_program *program, struct hlsl_ir_store *store) +{ + struct hlsl_ir_node *instr = &store->node; + struct vkd3d_shader_dst_param *dst_param; + struct vkd3d_shader_src_param *src_param; + struct vkd3d_shader_instruction *ins; + + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_MOV, 1, 1))) + return false; + + dst_param = &ins->dst[0]; + if (!sm4_generate_vsir_init_dst_param_from_deref(ctx, program, + dst_param, &store->lhs, &instr->loc, store->writemask)) + return false; + + src_param = &ins->src[0]; + vsir_src_from_hlsl_node(src_param, ctx, store->rhs.node, dst_param->write_mask); + + return true; +} + static void sm4_generate_vsir_block(struct hlsl_ctx *ctx, struct hlsl_block *block, struct vsir_program *program) { struct vkd3d_string_buffer *dst_type_string; @@ -8550,6 +8797,11 @@ static void sm4_generate_vsir_block(struct hlsl_ctx *ctx, struct hlsl_block *blo sm4_generate_vsir_block(ctx, &hlsl_ir_loop(instr)->body, program); break; + case HLSL_IR_STORE: + if (sm4_generate_vsir_instr_store(ctx, program, hlsl_ir_store(instr))) + replace_instr_with_last_vsir_instr(ctx, program, instr); + break; + case HLSL_IR_SWITCH: LIST_FOR_EACH_ENTRY(c, &hlsl_ir_switch(instr)->cases, struct hlsl_ir_switch_case, entry) sm4_generate_vsir_block(ctx, &c->body, program); diff --git a/libs/vkd3d-shader/tpf.c b/libs/vkd3d-shader/tpf.c index ed94d77d..b29ad758 100644 --- a/libs/vkd3d-shader/tpf.c +++ b/libs/vkd3d-shader/tpf.c @@ -5528,25 +5528,6 @@ static void write_sm4_resource_store(const struct tpf_compiler *tpf, const struc write_sm4_instruction(tpf, &instr); } -static void write_sm4_store(const struct tpf_compiler *tpf, const struct hlsl_ir_store *store) -{ - const struct hlsl_ir_node *rhs = store->rhs.node; - struct sm4_instruction instr; - uint32_t writemask; - - memset(&instr, 0, sizeof(instr)); - instr.opcode = VKD3D_SM4_OP_MOV; - - sm4_register_from_deref(tpf, &instr.dsts[0].reg, &writemask, &store->lhs, &instr); - instr.dsts[0].write_mask = hlsl_combine_writemasks(writemask, store->writemask); - instr.dst_count = 1; - - sm4_src_from_node(tpf, &instr.srcs[0], rhs, instr.dsts[0].write_mask); - instr.src_count = 1; - - write_sm4_instruction(tpf, &instr); -} - static void write_sm4_switch(struct tpf_compiler *tpf, const struct hlsl_ir_switch *s) { const struct hlsl_ir_node *selector = s->selector.node; @@ -5804,10 +5785,6 @@ static void write_sm4_block(struct tpf_compiler *tpf, const struct hlsl_block *b write_sm4_loop(tpf, hlsl_ir_loop(instr)); break; - case HLSL_IR_STORE: - write_sm4_store(tpf, hlsl_ir_store(instr)); - break; - case HLSL_IR_SWITCH: write_sm4_switch(tpf, hlsl_ir_switch(instr)); break;